{ "best_global_step": 22134, "best_metric": 60.10979334635811, "best_model_checkpoint": "whisper-tiny-bfloat16-sada/checkpoints/checkpoint-22134", "epoch": 1.9995934683921675, "eval_steps": 3689, "global_step": 29512, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.775526797208483e-05, "grad_norm": 132.70848083496094, "learning_rate": 0.0, "loss": 4.4561, "step": 1 }, { "epoch": 0.00013551053594416967, "grad_norm": 133.01596069335938, "learning_rate": 1.3333333333333334e-07, "loss": 4.8642, "step": 2 }, { "epoch": 0.0002032658039162545, "grad_norm": 106.03809356689453, "learning_rate": 2.6666666666666667e-07, "loss": 4.3778, "step": 3 }, { "epoch": 0.00027102107188833934, "grad_norm": 88.75433349609375, "learning_rate": 4.0000000000000003e-07, "loss": 4.0385, "step": 4 }, { "epoch": 0.00033877633986042414, "grad_norm": 101.19609832763672, "learning_rate": 5.333333333333333e-07, "loss": 3.9092, "step": 5 }, { "epoch": 0.000406531607832509, "grad_norm": 67.02723693847656, "learning_rate": 6.666666666666667e-07, "loss": 3.7556, "step": 6 }, { "epoch": 0.0004742868758045938, "grad_norm": 67.52012634277344, "learning_rate": 8.000000000000001e-07, "loss": 3.7119, "step": 7 }, { "epoch": 0.0005420421437766787, "grad_norm": 83.27348327636719, "learning_rate": 9.333333333333334e-07, "loss": 3.6125, "step": 8 }, { "epoch": 0.0006097974117487635, "grad_norm": 84.20647430419922, "learning_rate": 1.0666666666666667e-06, "loss": 4.1354, "step": 9 }, { "epoch": 0.0006775526797208483, "grad_norm": 110.12113952636719, "learning_rate": 1.2000000000000002e-06, "loss": 3.9594, "step": 10 }, { "epoch": 0.0007453079476929331, "grad_norm": 84.61614990234375, "learning_rate": 1.3333333333333334e-06, "loss": 4.1415, "step": 11 }, { "epoch": 0.000813063215665018, "grad_norm": 119.0740966796875, "learning_rate": 1.4666666666666667e-06, "loss": 4.0689, "step": 12 }, { "epoch": 0.0008808184836371028, "grad_norm": 122.41352081298828, "learning_rate": 1.6000000000000001e-06, "loss": 4.0834, "step": 13 }, { "epoch": 0.0009485737516091876, "grad_norm": 99.06126403808594, "learning_rate": 1.7333333333333334e-06, "loss": 4.4224, "step": 14 }, { "epoch": 0.0010163290195812724, "grad_norm": 113.89531707763672, "learning_rate": 1.8666666666666669e-06, "loss": 4.0188, "step": 15 }, { "epoch": 0.0010840842875533573, "grad_norm": 70.24198150634766, "learning_rate": 2.0000000000000003e-06, "loss": 3.3951, "step": 16 }, { "epoch": 0.001151839555525442, "grad_norm": 67.99130249023438, "learning_rate": 2.1333333333333334e-06, "loss": 3.7385, "step": 17 }, { "epoch": 0.001219594823497527, "grad_norm": 54.554779052734375, "learning_rate": 2.266666666666667e-06, "loss": 2.9009, "step": 18 }, { "epoch": 0.0012873500914696117, "grad_norm": 47.79149627685547, "learning_rate": 2.4000000000000003e-06, "loss": 3.0572, "step": 19 }, { "epoch": 0.0013551053594416966, "grad_norm": 50.05776596069336, "learning_rate": 2.5333333333333334e-06, "loss": 2.8678, "step": 20 }, { "epoch": 0.0014228606274137815, "grad_norm": 55.41701889038086, "learning_rate": 2.666666666666667e-06, "loss": 3.3981, "step": 21 }, { "epoch": 0.0014906158953858662, "grad_norm": 57.1912727355957, "learning_rate": 2.8000000000000003e-06, "loss": 3.3247, "step": 22 }, { "epoch": 0.001558371163357951, "grad_norm": 39.77119445800781, "learning_rate": 2.9333333333333333e-06, "loss": 3.1466, "step": 23 }, { "epoch": 0.001626126431330036, "grad_norm": 37.258392333984375, "learning_rate": 3.066666666666667e-06, "loss": 3.0137, "step": 24 }, { "epoch": 0.0016938816993021207, "grad_norm": 41.15428924560547, "learning_rate": 3.2000000000000003e-06, "loss": 2.9908, "step": 25 }, { "epoch": 0.0017616369672742056, "grad_norm": 44.59917449951172, "learning_rate": 3.3333333333333333e-06, "loss": 3.6947, "step": 26 }, { "epoch": 0.0018293922352462903, "grad_norm": 32.7044792175293, "learning_rate": 3.466666666666667e-06, "loss": 2.9421, "step": 27 }, { "epoch": 0.0018971475032183752, "grad_norm": 28.445444107055664, "learning_rate": 3.6e-06, "loss": 2.9849, "step": 28 }, { "epoch": 0.00196490277119046, "grad_norm": 30.879247665405273, "learning_rate": 3.7333333333333337e-06, "loss": 2.9007, "step": 29 }, { "epoch": 0.002032658039162545, "grad_norm": 22.723188400268555, "learning_rate": 3.866666666666667e-06, "loss": 2.3549, "step": 30 }, { "epoch": 0.0021004133071346296, "grad_norm": 23.59457778930664, "learning_rate": 4.000000000000001e-06, "loss": 2.8564, "step": 31 }, { "epoch": 0.0021681685751067147, "grad_norm": 23.313579559326172, "learning_rate": 4.133333333333333e-06, "loss": 2.453, "step": 32 }, { "epoch": 0.0022359238430787994, "grad_norm": 23.877405166625977, "learning_rate": 4.266666666666667e-06, "loss": 2.2042, "step": 33 }, { "epoch": 0.002303679111050884, "grad_norm": 29.79376792907715, "learning_rate": 4.4e-06, "loss": 2.6679, "step": 34 }, { "epoch": 0.002371434379022969, "grad_norm": 21.018878936767578, "learning_rate": 4.533333333333334e-06, "loss": 2.3983, "step": 35 }, { "epoch": 0.002439189646995054, "grad_norm": 30.22182846069336, "learning_rate": 4.666666666666667e-06, "loss": 2.8668, "step": 36 }, { "epoch": 0.0025069449149671386, "grad_norm": 24.619905471801758, "learning_rate": 4.800000000000001e-06, "loss": 2.5986, "step": 37 }, { "epoch": 0.0025747001829392233, "grad_norm": 25.470375061035156, "learning_rate": 4.933333333333333e-06, "loss": 2.4936, "step": 38 }, { "epoch": 0.0026424554509113084, "grad_norm": 19.872745513916016, "learning_rate": 5.066666666666667e-06, "loss": 2.1633, "step": 39 }, { "epoch": 0.002710210718883393, "grad_norm": 24.880964279174805, "learning_rate": 5.2e-06, "loss": 2.6798, "step": 40 }, { "epoch": 0.002777965986855478, "grad_norm": 22.790157318115234, "learning_rate": 5.333333333333334e-06, "loss": 2.487, "step": 41 }, { "epoch": 0.002845721254827563, "grad_norm": 21.404136657714844, "learning_rate": 5.466666666666667e-06, "loss": 2.3383, "step": 42 }, { "epoch": 0.0029134765227996477, "grad_norm": 22.80590057373047, "learning_rate": 5.600000000000001e-06, "loss": 2.6347, "step": 43 }, { "epoch": 0.0029812317907717324, "grad_norm": 20.625160217285156, "learning_rate": 5.733333333333333e-06, "loss": 2.066, "step": 44 }, { "epoch": 0.0030489870587438175, "grad_norm": 21.700828552246094, "learning_rate": 5.866666666666667e-06, "loss": 2.188, "step": 45 }, { "epoch": 0.003116742326715902, "grad_norm": 18.8945255279541, "learning_rate": 6e-06, "loss": 2.1531, "step": 46 }, { "epoch": 0.003184497594687987, "grad_norm": 20.238525390625, "learning_rate": 6.133333333333334e-06, "loss": 1.9306, "step": 47 }, { "epoch": 0.003252252862660072, "grad_norm": 23.74344825744629, "learning_rate": 6.266666666666666e-06, "loss": 2.3206, "step": 48 }, { "epoch": 0.0033200081306321567, "grad_norm": 22.429964065551758, "learning_rate": 6.4000000000000006e-06, "loss": 2.05, "step": 49 }, { "epoch": 0.0033877633986042414, "grad_norm": 22.078792572021484, "learning_rate": 6.533333333333333e-06, "loss": 2.1155, "step": 50 }, { "epoch": 0.003455518666576326, "grad_norm": 19.691747665405273, "learning_rate": 6.666666666666667e-06, "loss": 1.8719, "step": 51 }, { "epoch": 0.0035232739345484113, "grad_norm": 27.412866592407227, "learning_rate": 6.800000000000001e-06, "loss": 2.0943, "step": 52 }, { "epoch": 0.003591029202520496, "grad_norm": 21.631580352783203, "learning_rate": 6.933333333333334e-06, "loss": 2.246, "step": 53 }, { "epoch": 0.0036587844704925807, "grad_norm": 16.729095458984375, "learning_rate": 7.066666666666667e-06, "loss": 1.8101, "step": 54 }, { "epoch": 0.003726539738464666, "grad_norm": 18.381364822387695, "learning_rate": 7.2e-06, "loss": 1.8668, "step": 55 }, { "epoch": 0.0037942950064367505, "grad_norm": 19.661123275756836, "learning_rate": 7.333333333333334e-06, "loss": 2.1912, "step": 56 }, { "epoch": 0.003862050274408835, "grad_norm": 13.772311210632324, "learning_rate": 7.4666666666666675e-06, "loss": 1.7408, "step": 57 }, { "epoch": 0.00392980554238092, "grad_norm": 20.559425354003906, "learning_rate": 7.6e-06, "loss": 1.9717, "step": 58 }, { "epoch": 0.003997560810353005, "grad_norm": 17.385791778564453, "learning_rate": 7.733333333333334e-06, "loss": 1.7681, "step": 59 }, { "epoch": 0.00406531607832509, "grad_norm": 18.455888748168945, "learning_rate": 7.866666666666667e-06, "loss": 1.8822, "step": 60 }, { "epoch": 0.004133071346297174, "grad_norm": 17.53643798828125, "learning_rate": 8.000000000000001e-06, "loss": 1.8158, "step": 61 }, { "epoch": 0.004200826614269259, "grad_norm": 15.640396118164062, "learning_rate": 8.133333333333332e-06, "loss": 1.8786, "step": 62 }, { "epoch": 0.004268581882241345, "grad_norm": 67.14771270751953, "learning_rate": 8.266666666666667e-06, "loss": 1.845, "step": 63 }, { "epoch": 0.004336337150213429, "grad_norm": 16.778207778930664, "learning_rate": 8.400000000000001e-06, "loss": 1.7286, "step": 64 }, { "epoch": 0.004404092418185514, "grad_norm": 20.485218048095703, "learning_rate": 8.533333333333334e-06, "loss": 1.882, "step": 65 }, { "epoch": 0.004471847686157599, "grad_norm": 15.77828598022461, "learning_rate": 8.666666666666668e-06, "loss": 1.7788, "step": 66 }, { "epoch": 0.0045396029541296835, "grad_norm": 20.225608825683594, "learning_rate": 8.8e-06, "loss": 1.9842, "step": 67 }, { "epoch": 0.004607358222101768, "grad_norm": 16.230987548828125, "learning_rate": 8.933333333333333e-06, "loss": 1.4581, "step": 68 }, { "epoch": 0.004675113490073853, "grad_norm": 13.831804275512695, "learning_rate": 9.066666666666667e-06, "loss": 1.6173, "step": 69 }, { "epoch": 0.004742868758045938, "grad_norm": 16.4691104888916, "learning_rate": 9.2e-06, "loss": 1.6055, "step": 70 }, { "epoch": 0.004810624026018023, "grad_norm": 17.788105010986328, "learning_rate": 9.333333333333334e-06, "loss": 1.7996, "step": 71 }, { "epoch": 0.004878379293990108, "grad_norm": 20.043182373046875, "learning_rate": 9.466666666666667e-06, "loss": 1.7224, "step": 72 }, { "epoch": 0.0049461345619621925, "grad_norm": 14.918837547302246, "learning_rate": 9.600000000000001e-06, "loss": 1.4448, "step": 73 }, { "epoch": 0.005013889829934277, "grad_norm": 15.70170783996582, "learning_rate": 9.733333333333334e-06, "loss": 1.7469, "step": 74 }, { "epoch": 0.005081645097906362, "grad_norm": 19.128456115722656, "learning_rate": 9.866666666666667e-06, "loss": 1.9505, "step": 75 }, { "epoch": 0.005149400365878447, "grad_norm": 34.82925796508789, "learning_rate": 1e-05, "loss": 1.6543, "step": 76 }, { "epoch": 0.005217155633850532, "grad_norm": 19.61214256286621, "learning_rate": 1.0133333333333333e-05, "loss": 1.9051, "step": 77 }, { "epoch": 0.005284910901822617, "grad_norm": 20.190847396850586, "learning_rate": 1.0266666666666668e-05, "loss": 1.6016, "step": 78 }, { "epoch": 0.005352666169794702, "grad_norm": 15.873608589172363, "learning_rate": 1.04e-05, "loss": 1.8286, "step": 79 }, { "epoch": 0.005420421437766786, "grad_norm": 17.513385772705078, "learning_rate": 1.0533333333333335e-05, "loss": 1.7122, "step": 80 }, { "epoch": 0.005488176705738871, "grad_norm": 13.794294357299805, "learning_rate": 1.0666666666666667e-05, "loss": 1.5369, "step": 81 }, { "epoch": 0.005555931973710956, "grad_norm": 16.763822555541992, "learning_rate": 1.08e-05, "loss": 1.688, "step": 82 }, { "epoch": 0.005623687241683041, "grad_norm": 15.182608604431152, "learning_rate": 1.0933333333333334e-05, "loss": 1.9076, "step": 83 }, { "epoch": 0.005691442509655126, "grad_norm": 19.11581802368164, "learning_rate": 1.1066666666666667e-05, "loss": 1.6034, "step": 84 }, { "epoch": 0.005759197777627211, "grad_norm": 18.303098678588867, "learning_rate": 1.1200000000000001e-05, "loss": 1.8248, "step": 85 }, { "epoch": 0.005826953045599295, "grad_norm": 17.912731170654297, "learning_rate": 1.1333333333333334e-05, "loss": 1.737, "step": 86 }, { "epoch": 0.00589470831357138, "grad_norm": 18.4353084564209, "learning_rate": 1.1466666666666666e-05, "loss": 1.7141, "step": 87 }, { "epoch": 0.005962463581543465, "grad_norm": 17.006074905395508, "learning_rate": 1.16e-05, "loss": 1.6568, "step": 88 }, { "epoch": 0.0060302188495155494, "grad_norm": 15.084410667419434, "learning_rate": 1.1733333333333333e-05, "loss": 1.3369, "step": 89 }, { "epoch": 0.006097974117487635, "grad_norm": 14.984724044799805, "learning_rate": 1.1866666666666668e-05, "loss": 1.777, "step": 90 }, { "epoch": 0.00616572938545972, "grad_norm": 19.447574615478516, "learning_rate": 1.2e-05, "loss": 1.4225, "step": 91 }, { "epoch": 0.006233484653431804, "grad_norm": 15.212474822998047, "learning_rate": 1.2133333333333335e-05, "loss": 1.4896, "step": 92 }, { "epoch": 0.006301239921403889, "grad_norm": 17.97924041748047, "learning_rate": 1.2266666666666667e-05, "loss": 1.5947, "step": 93 }, { "epoch": 0.006368995189375974, "grad_norm": 13.695365905761719, "learning_rate": 1.24e-05, "loss": 1.471, "step": 94 }, { "epoch": 0.0064367504573480585, "grad_norm": 15.574562072753906, "learning_rate": 1.2533333333333332e-05, "loss": 1.5803, "step": 95 }, { "epoch": 0.006504505725320144, "grad_norm": 16.71872329711914, "learning_rate": 1.2666666666666668e-05, "loss": 1.7408, "step": 96 }, { "epoch": 0.006572260993292229, "grad_norm": 14.232036590576172, "learning_rate": 1.2800000000000001e-05, "loss": 1.4414, "step": 97 }, { "epoch": 0.0066400162612643135, "grad_norm": 14.430774688720703, "learning_rate": 1.2933333333333334e-05, "loss": 1.712, "step": 98 }, { "epoch": 0.006707771529236398, "grad_norm": 15.887603759765625, "learning_rate": 1.3066666666666666e-05, "loss": 1.6097, "step": 99 }, { "epoch": 0.006775526797208483, "grad_norm": 21.155643463134766, "learning_rate": 1.32e-05, "loss": 2.0581, "step": 100 }, { "epoch": 0.0068432820651805676, "grad_norm": 17.30326271057129, "learning_rate": 1.3333333333333333e-05, "loss": 1.2823, "step": 101 }, { "epoch": 0.006911037333152652, "grad_norm": 18.184707641601562, "learning_rate": 1.3466666666666666e-05, "loss": 1.5165, "step": 102 }, { "epoch": 0.006978792601124738, "grad_norm": 23.46563148498535, "learning_rate": 1.3600000000000002e-05, "loss": 1.406, "step": 103 }, { "epoch": 0.0070465478690968225, "grad_norm": 14.586087226867676, "learning_rate": 1.3733333333333335e-05, "loss": 1.8416, "step": 104 }, { "epoch": 0.007114303137068907, "grad_norm": 14.55811595916748, "learning_rate": 1.3866666666666667e-05, "loss": 1.5466, "step": 105 }, { "epoch": 0.007182058405040992, "grad_norm": 20.244586944580078, "learning_rate": 1.4000000000000001e-05, "loss": 1.755, "step": 106 }, { "epoch": 0.007249813673013077, "grad_norm": 11.932172775268555, "learning_rate": 1.4133333333333334e-05, "loss": 1.4543, "step": 107 }, { "epoch": 0.007317568940985161, "grad_norm": 17.406888961791992, "learning_rate": 1.4266666666666667e-05, "loss": 1.4436, "step": 108 }, { "epoch": 0.007385324208957246, "grad_norm": 14.591705322265625, "learning_rate": 1.44e-05, "loss": 1.4088, "step": 109 }, { "epoch": 0.007453079476929332, "grad_norm": 18.028337478637695, "learning_rate": 1.4533333333333335e-05, "loss": 1.6675, "step": 110 }, { "epoch": 0.007520834744901416, "grad_norm": 16.37979507446289, "learning_rate": 1.4666666666666668e-05, "loss": 1.6646, "step": 111 }, { "epoch": 0.007588590012873501, "grad_norm": 19.028308868408203, "learning_rate": 1.48e-05, "loss": 1.701, "step": 112 }, { "epoch": 0.007656345280845586, "grad_norm": 17.28181266784668, "learning_rate": 1.4933333333333335e-05, "loss": 1.3984, "step": 113 }, { "epoch": 0.00772410054881767, "grad_norm": 16.78407859802246, "learning_rate": 1.5066666666666668e-05, "loss": 1.6851, "step": 114 }, { "epoch": 0.007791855816789755, "grad_norm": 15.634642601013184, "learning_rate": 1.52e-05, "loss": 1.6484, "step": 115 }, { "epoch": 0.00785961108476184, "grad_norm": 13.879626274108887, "learning_rate": 1.5333333333333334e-05, "loss": 1.4609, "step": 116 }, { "epoch": 0.007927366352733925, "grad_norm": 16.247541427612305, "learning_rate": 1.546666666666667e-05, "loss": 1.5219, "step": 117 }, { "epoch": 0.00799512162070601, "grad_norm": 20.57497215270996, "learning_rate": 1.56e-05, "loss": 1.6038, "step": 118 }, { "epoch": 0.008062876888678095, "grad_norm": 16.32591438293457, "learning_rate": 1.5733333333333334e-05, "loss": 1.5315, "step": 119 }, { "epoch": 0.00813063215665018, "grad_norm": 18.236255645751953, "learning_rate": 1.586666666666667e-05, "loss": 1.6589, "step": 120 }, { "epoch": 0.008198387424622264, "grad_norm": 20.850723266601562, "learning_rate": 1.6000000000000003e-05, "loss": 1.5556, "step": 121 }, { "epoch": 0.008266142692594349, "grad_norm": 17.931947708129883, "learning_rate": 1.6133333333333334e-05, "loss": 1.8209, "step": 122 }, { "epoch": 0.008333897960566434, "grad_norm": 20.531381607055664, "learning_rate": 1.6266666666666665e-05, "loss": 1.6421, "step": 123 }, { "epoch": 0.008401653228538518, "grad_norm": 15.6729736328125, "learning_rate": 1.6400000000000002e-05, "loss": 1.5652, "step": 124 }, { "epoch": 0.008469408496510603, "grad_norm": 18.139949798583984, "learning_rate": 1.6533333333333333e-05, "loss": 1.5578, "step": 125 }, { "epoch": 0.00853716376448269, "grad_norm": 15.77291202545166, "learning_rate": 1.6666666666666667e-05, "loss": 1.6091, "step": 126 }, { "epoch": 0.008604919032454774, "grad_norm": 18.00165557861328, "learning_rate": 1.6800000000000002e-05, "loss": 1.4725, "step": 127 }, { "epoch": 0.008672674300426859, "grad_norm": 17.33936309814453, "learning_rate": 1.6933333333333333e-05, "loss": 1.4272, "step": 128 }, { "epoch": 0.008740429568398943, "grad_norm": 16.75677490234375, "learning_rate": 1.7066666666666667e-05, "loss": 1.602, "step": 129 }, { "epoch": 0.008808184836371028, "grad_norm": 17.761140823364258, "learning_rate": 1.7199999999999998e-05, "loss": 1.8523, "step": 130 }, { "epoch": 0.008875940104343113, "grad_norm": 15.888497352600098, "learning_rate": 1.7333333333333336e-05, "loss": 1.5594, "step": 131 }, { "epoch": 0.008943695372315198, "grad_norm": 19.62826156616211, "learning_rate": 1.7466666666666667e-05, "loss": 1.6909, "step": 132 }, { "epoch": 0.009011450640287282, "grad_norm": 15.62592887878418, "learning_rate": 1.76e-05, "loss": 1.5873, "step": 133 }, { "epoch": 0.009079205908259367, "grad_norm": 17.64893913269043, "learning_rate": 1.7733333333333335e-05, "loss": 1.5681, "step": 134 }, { "epoch": 0.009146961176231452, "grad_norm": 15.14854907989502, "learning_rate": 1.7866666666666666e-05, "loss": 1.3324, "step": 135 }, { "epoch": 0.009214716444203536, "grad_norm": 18.115419387817383, "learning_rate": 1.8e-05, "loss": 1.5512, "step": 136 }, { "epoch": 0.009282471712175621, "grad_norm": 18.2511043548584, "learning_rate": 1.8133333333333335e-05, "loss": 1.6405, "step": 137 }, { "epoch": 0.009350226980147706, "grad_norm": 13.296855926513672, "learning_rate": 1.826666666666667e-05, "loss": 1.2269, "step": 138 }, { "epoch": 0.009417982248119792, "grad_norm": 17.17320442199707, "learning_rate": 1.84e-05, "loss": 1.4031, "step": 139 }, { "epoch": 0.009485737516091877, "grad_norm": 15.399650573730469, "learning_rate": 1.8533333333333334e-05, "loss": 1.4793, "step": 140 }, { "epoch": 0.009553492784063962, "grad_norm": 15.0385160446167, "learning_rate": 1.866666666666667e-05, "loss": 1.4129, "step": 141 }, { "epoch": 0.009621248052036046, "grad_norm": 14.586984634399414, "learning_rate": 1.88e-05, "loss": 1.1959, "step": 142 }, { "epoch": 0.009689003320008131, "grad_norm": 14.939247131347656, "learning_rate": 1.8933333333333334e-05, "loss": 1.6728, "step": 143 }, { "epoch": 0.009756758587980216, "grad_norm": 18.1126651763916, "learning_rate": 1.9066666666666668e-05, "loss": 1.5035, "step": 144 }, { "epoch": 0.0098245138559523, "grad_norm": 16.0823974609375, "learning_rate": 1.9200000000000003e-05, "loss": 1.544, "step": 145 }, { "epoch": 0.009892269123924385, "grad_norm": 17.45062828063965, "learning_rate": 1.9333333333333333e-05, "loss": 1.4593, "step": 146 }, { "epoch": 0.00996002439189647, "grad_norm": 21.030731201171875, "learning_rate": 1.9466666666666668e-05, "loss": 1.6544, "step": 147 }, { "epoch": 0.010027779659868554, "grad_norm": 18.31300163269043, "learning_rate": 1.9600000000000002e-05, "loss": 1.7682, "step": 148 }, { "epoch": 0.01009553492784064, "grad_norm": 13.541064262390137, "learning_rate": 1.9733333333333333e-05, "loss": 1.333, "step": 149 }, { "epoch": 0.010163290195812724, "grad_norm": 13.538237571716309, "learning_rate": 1.9866666666666667e-05, "loss": 1.3629, "step": 150 }, { "epoch": 0.010231045463784809, "grad_norm": 18.393657684326172, "learning_rate": 2e-05, "loss": 1.4467, "step": 151 }, { "epoch": 0.010298800731756893, "grad_norm": 19.18985939025879, "learning_rate": 2.0133333333333336e-05, "loss": 1.612, "step": 152 }, { "epoch": 0.01036655599972898, "grad_norm": 14.230488777160645, "learning_rate": 2.0266666666666667e-05, "loss": 1.4047, "step": 153 }, { "epoch": 0.010434311267701064, "grad_norm": 13.807863235473633, "learning_rate": 2.04e-05, "loss": 1.4977, "step": 154 }, { "epoch": 0.010502066535673149, "grad_norm": 15.149568557739258, "learning_rate": 2.0533333333333336e-05, "loss": 1.3856, "step": 155 }, { "epoch": 0.010569821803645234, "grad_norm": 16.095308303833008, "learning_rate": 2.0666666666666666e-05, "loss": 1.3687, "step": 156 }, { "epoch": 0.010637577071617318, "grad_norm": 14.864961624145508, "learning_rate": 2.08e-05, "loss": 1.5707, "step": 157 }, { "epoch": 0.010705332339589403, "grad_norm": 15.9810152053833, "learning_rate": 2.0933333333333335e-05, "loss": 1.6841, "step": 158 }, { "epoch": 0.010773087607561488, "grad_norm": 16.706584930419922, "learning_rate": 2.106666666666667e-05, "loss": 1.5017, "step": 159 }, { "epoch": 0.010840842875533573, "grad_norm": 18.726655960083008, "learning_rate": 2.12e-05, "loss": 1.7177, "step": 160 }, { "epoch": 0.010908598143505657, "grad_norm": 17.22323989868164, "learning_rate": 2.1333333333333335e-05, "loss": 1.5452, "step": 161 }, { "epoch": 0.010976353411477742, "grad_norm": 14.823019027709961, "learning_rate": 2.146666666666667e-05, "loss": 1.2856, "step": 162 }, { "epoch": 0.011044108679449827, "grad_norm": 14.756085395812988, "learning_rate": 2.16e-05, "loss": 1.4942, "step": 163 }, { "epoch": 0.011111863947421911, "grad_norm": 18.523683547973633, "learning_rate": 2.1733333333333334e-05, "loss": 1.4832, "step": 164 }, { "epoch": 0.011179619215393996, "grad_norm": 16.75520896911621, "learning_rate": 2.186666666666667e-05, "loss": 1.1847, "step": 165 }, { "epoch": 0.011247374483366082, "grad_norm": 17.045665740966797, "learning_rate": 2.2000000000000003e-05, "loss": 1.6358, "step": 166 }, { "epoch": 0.011315129751338167, "grad_norm": 17.235095977783203, "learning_rate": 2.2133333333333334e-05, "loss": 1.6984, "step": 167 }, { "epoch": 0.011382885019310252, "grad_norm": 18.120485305786133, "learning_rate": 2.2266666666666668e-05, "loss": 1.5544, "step": 168 }, { "epoch": 0.011450640287282337, "grad_norm": 15.252264976501465, "learning_rate": 2.2400000000000002e-05, "loss": 1.4386, "step": 169 }, { "epoch": 0.011518395555254421, "grad_norm": 13.91727066040039, "learning_rate": 2.2533333333333333e-05, "loss": 1.2083, "step": 170 }, { "epoch": 0.011586150823226506, "grad_norm": 16.50945472717285, "learning_rate": 2.2666666666666668e-05, "loss": 1.5878, "step": 171 }, { "epoch": 0.01165390609119859, "grad_norm": 12.260165214538574, "learning_rate": 2.2800000000000002e-05, "loss": 1.4173, "step": 172 }, { "epoch": 0.011721661359170675, "grad_norm": 17.12419891357422, "learning_rate": 2.2933333333333333e-05, "loss": 1.3117, "step": 173 }, { "epoch": 0.01178941662714276, "grad_norm": 17.508407592773438, "learning_rate": 2.3066666666666667e-05, "loss": 1.4294, "step": 174 }, { "epoch": 0.011857171895114845, "grad_norm": 17.78769302368164, "learning_rate": 2.32e-05, "loss": 1.8437, "step": 175 }, { "epoch": 0.01192492716308693, "grad_norm": 13.79132080078125, "learning_rate": 2.3333333333333336e-05, "loss": 1.3969, "step": 176 }, { "epoch": 0.011992682431059014, "grad_norm": 16.519269943237305, "learning_rate": 2.3466666666666667e-05, "loss": 1.5833, "step": 177 }, { "epoch": 0.012060437699031099, "grad_norm": 15.920942306518555, "learning_rate": 2.36e-05, "loss": 1.4783, "step": 178 }, { "epoch": 0.012128192967003185, "grad_norm": 17.00782585144043, "learning_rate": 2.3733333333333335e-05, "loss": 1.4038, "step": 179 }, { "epoch": 0.01219594823497527, "grad_norm": 18.844655990600586, "learning_rate": 2.3866666666666666e-05, "loss": 1.653, "step": 180 }, { "epoch": 0.012263703502947355, "grad_norm": 18.05002784729004, "learning_rate": 2.4e-05, "loss": 1.4137, "step": 181 }, { "epoch": 0.01233145877091944, "grad_norm": 14.200697898864746, "learning_rate": 2.4133333333333335e-05, "loss": 1.3067, "step": 182 }, { "epoch": 0.012399214038891524, "grad_norm": 16.28152084350586, "learning_rate": 2.426666666666667e-05, "loss": 1.3425, "step": 183 }, { "epoch": 0.012466969306863609, "grad_norm": 18.691898345947266, "learning_rate": 2.44e-05, "loss": 1.5995, "step": 184 }, { "epoch": 0.012534724574835693, "grad_norm": 16.62773895263672, "learning_rate": 2.4533333333333334e-05, "loss": 1.5722, "step": 185 }, { "epoch": 0.012602479842807778, "grad_norm": 19.650226593017578, "learning_rate": 2.466666666666667e-05, "loss": 1.5445, "step": 186 }, { "epoch": 0.012670235110779863, "grad_norm": 14.582402229309082, "learning_rate": 2.48e-05, "loss": 1.4822, "step": 187 }, { "epoch": 0.012737990378751948, "grad_norm": 16.5295352935791, "learning_rate": 2.4933333333333334e-05, "loss": 1.5981, "step": 188 }, { "epoch": 0.012805745646724032, "grad_norm": 17.096031188964844, "learning_rate": 2.5066666666666665e-05, "loss": 1.2527, "step": 189 }, { "epoch": 0.012873500914696117, "grad_norm": 15.424555778503418, "learning_rate": 2.5200000000000003e-05, "loss": 1.5284, "step": 190 }, { "epoch": 0.012941256182668202, "grad_norm": 14.505122184753418, "learning_rate": 2.5333333333333337e-05, "loss": 1.5726, "step": 191 }, { "epoch": 0.013009011450640288, "grad_norm": 13.430007934570312, "learning_rate": 2.5466666666666668e-05, "loss": 1.5905, "step": 192 }, { "epoch": 0.013076766718612373, "grad_norm": 12.291808128356934, "learning_rate": 2.5600000000000002e-05, "loss": 1.3122, "step": 193 }, { "epoch": 0.013144521986584458, "grad_norm": 16.053747177124023, "learning_rate": 2.5733333333333337e-05, "loss": 1.3164, "step": 194 }, { "epoch": 0.013212277254556542, "grad_norm": 17.762910842895508, "learning_rate": 2.5866666666666667e-05, "loss": 1.4727, "step": 195 }, { "epoch": 0.013280032522528627, "grad_norm": 15.695332527160645, "learning_rate": 2.6000000000000002e-05, "loss": 1.5584, "step": 196 }, { "epoch": 0.013347787790500712, "grad_norm": 14.789410591125488, "learning_rate": 2.6133333333333333e-05, "loss": 1.3253, "step": 197 }, { "epoch": 0.013415543058472796, "grad_norm": 15.248231887817383, "learning_rate": 2.6266666666666667e-05, "loss": 1.3087, "step": 198 }, { "epoch": 0.013483298326444881, "grad_norm": 18.894878387451172, "learning_rate": 2.64e-05, "loss": 1.2755, "step": 199 }, { "epoch": 0.013551053594416966, "grad_norm": 16.611616134643555, "learning_rate": 2.6533333333333332e-05, "loss": 1.4218, "step": 200 }, { "epoch": 0.01361880886238905, "grad_norm": 17.056982040405273, "learning_rate": 2.6666666666666667e-05, "loss": 1.4376, "step": 201 }, { "epoch": 0.013686564130361135, "grad_norm": 15.91457748413086, "learning_rate": 2.6800000000000004e-05, "loss": 1.4836, "step": 202 }, { "epoch": 0.01375431939833322, "grad_norm": 16.371660232543945, "learning_rate": 2.6933333333333332e-05, "loss": 1.7374, "step": 203 }, { "epoch": 0.013822074666305304, "grad_norm": 14.45882511138916, "learning_rate": 2.706666666666667e-05, "loss": 1.3324, "step": 204 }, { "epoch": 0.013889829934277391, "grad_norm": 18.779190063476562, "learning_rate": 2.7200000000000004e-05, "loss": 1.4845, "step": 205 }, { "epoch": 0.013957585202249476, "grad_norm": 19.7078914642334, "learning_rate": 2.733333333333333e-05, "loss": 1.6681, "step": 206 }, { "epoch": 0.01402534047022156, "grad_norm": 20.498266220092773, "learning_rate": 2.746666666666667e-05, "loss": 1.559, "step": 207 }, { "epoch": 0.014093095738193645, "grad_norm": 20.2978515625, "learning_rate": 2.7600000000000003e-05, "loss": 1.5821, "step": 208 }, { "epoch": 0.01416085100616573, "grad_norm": 18.85460090637207, "learning_rate": 2.7733333333333334e-05, "loss": 1.3, "step": 209 }, { "epoch": 0.014228606274137814, "grad_norm": 13.912948608398438, "learning_rate": 2.786666666666667e-05, "loss": 1.4833, "step": 210 }, { "epoch": 0.014296361542109899, "grad_norm": 19.084806442260742, "learning_rate": 2.8000000000000003e-05, "loss": 1.3379, "step": 211 }, { "epoch": 0.014364116810081984, "grad_norm": 15.123217582702637, "learning_rate": 2.8133333333333334e-05, "loss": 1.354, "step": 212 }, { "epoch": 0.014431872078054069, "grad_norm": 14.127299308776855, "learning_rate": 2.8266666666666668e-05, "loss": 1.2341, "step": 213 }, { "epoch": 0.014499627346026153, "grad_norm": 15.609139442443848, "learning_rate": 2.84e-05, "loss": 1.3474, "step": 214 }, { "epoch": 0.014567382613998238, "grad_norm": 20.588394165039062, "learning_rate": 2.8533333333333333e-05, "loss": 1.4881, "step": 215 }, { "epoch": 0.014635137881970323, "grad_norm": 12.27170467376709, "learning_rate": 2.8666666666666668e-05, "loss": 1.2255, "step": 216 }, { "epoch": 0.014702893149942407, "grad_norm": 18.14453125, "learning_rate": 2.88e-05, "loss": 1.6328, "step": 217 }, { "epoch": 0.014770648417914492, "grad_norm": 17.722230911254883, "learning_rate": 2.8933333333333333e-05, "loss": 2.0172, "step": 218 }, { "epoch": 0.014838403685886578, "grad_norm": 14.454687118530273, "learning_rate": 2.906666666666667e-05, "loss": 1.2936, "step": 219 }, { "epoch": 0.014906158953858663, "grad_norm": 16.369625091552734, "learning_rate": 2.9199999999999998e-05, "loss": 1.6289, "step": 220 }, { "epoch": 0.014973914221830748, "grad_norm": 17.64832305908203, "learning_rate": 2.9333333333333336e-05, "loss": 1.3135, "step": 221 }, { "epoch": 0.015041669489802833, "grad_norm": 14.401778221130371, "learning_rate": 2.946666666666667e-05, "loss": 1.2614, "step": 222 }, { "epoch": 0.015109424757774917, "grad_norm": 17.145814895629883, "learning_rate": 2.96e-05, "loss": 1.3473, "step": 223 }, { "epoch": 0.015177180025747002, "grad_norm": 17.273954391479492, "learning_rate": 2.9733333333333336e-05, "loss": 1.3747, "step": 224 }, { "epoch": 0.015244935293719087, "grad_norm": 17.190818786621094, "learning_rate": 2.986666666666667e-05, "loss": 1.5452, "step": 225 }, { "epoch": 0.015312690561691171, "grad_norm": 19.573585510253906, "learning_rate": 3e-05, "loss": 1.4942, "step": 226 }, { "epoch": 0.015380445829663256, "grad_norm": 15.349540710449219, "learning_rate": 3.0133333333333335e-05, "loss": 1.4277, "step": 227 }, { "epoch": 0.01544820109763534, "grad_norm": 19.786211013793945, "learning_rate": 3.0266666666666666e-05, "loss": 1.5014, "step": 228 }, { "epoch": 0.015515956365607425, "grad_norm": 17.51229476928711, "learning_rate": 3.04e-05, "loss": 1.695, "step": 229 }, { "epoch": 0.01558371163357951, "grad_norm": 19.894756317138672, "learning_rate": 3.0533333333333335e-05, "loss": 1.4994, "step": 230 }, { "epoch": 0.015651466901551597, "grad_norm": 13.575235366821289, "learning_rate": 3.066666666666667e-05, "loss": 1.4037, "step": 231 }, { "epoch": 0.01571922216952368, "grad_norm": 18.516822814941406, "learning_rate": 3.08e-05, "loss": 1.5494, "step": 232 }, { "epoch": 0.015786977437495766, "grad_norm": 17.343368530273438, "learning_rate": 3.093333333333334e-05, "loss": 1.1708, "step": 233 }, { "epoch": 0.01585473270546785, "grad_norm": 18.78635597229004, "learning_rate": 3.1066666666666665e-05, "loss": 1.6543, "step": 234 }, { "epoch": 0.015922487973439935, "grad_norm": 16.219989776611328, "learning_rate": 3.12e-05, "loss": 1.381, "step": 235 }, { "epoch": 0.01599024324141202, "grad_norm": 15.987724304199219, "learning_rate": 3.1333333333333334e-05, "loss": 1.3981, "step": 236 }, { "epoch": 0.016057998509384105, "grad_norm": 17.251537322998047, "learning_rate": 3.146666666666667e-05, "loss": 1.8181, "step": 237 }, { "epoch": 0.01612575377735619, "grad_norm": 15.804817199707031, "learning_rate": 3.16e-05, "loss": 1.4755, "step": 238 }, { "epoch": 0.016193509045328274, "grad_norm": 16.508943557739258, "learning_rate": 3.173333333333334e-05, "loss": 1.2167, "step": 239 }, { "epoch": 0.01626126431330036, "grad_norm": 17.382783889770508, "learning_rate": 3.1866666666666664e-05, "loss": 1.6186, "step": 240 }, { "epoch": 0.016329019581272444, "grad_norm": 17.938161849975586, "learning_rate": 3.2000000000000005e-05, "loss": 1.3685, "step": 241 }, { "epoch": 0.016396774849244528, "grad_norm": 21.103981018066406, "learning_rate": 3.213333333333334e-05, "loss": 1.6275, "step": 242 }, { "epoch": 0.016464530117216613, "grad_norm": 15.145224571228027, "learning_rate": 3.226666666666667e-05, "loss": 1.2058, "step": 243 }, { "epoch": 0.016532285385188698, "grad_norm": 15.368849754333496, "learning_rate": 3.24e-05, "loss": 1.3436, "step": 244 }, { "epoch": 0.016600040653160782, "grad_norm": 16.95122718811035, "learning_rate": 3.253333333333333e-05, "loss": 1.4346, "step": 245 }, { "epoch": 0.016667795921132867, "grad_norm": 14.612030029296875, "learning_rate": 3.266666666666667e-05, "loss": 1.4759, "step": 246 }, { "epoch": 0.016735551189104952, "grad_norm": 17.6989688873291, "learning_rate": 3.2800000000000004e-05, "loss": 1.3612, "step": 247 }, { "epoch": 0.016803306457077036, "grad_norm": 19.985427856445312, "learning_rate": 3.293333333333333e-05, "loss": 1.2718, "step": 248 }, { "epoch": 0.01687106172504912, "grad_norm": 12.162117958068848, "learning_rate": 3.3066666666666666e-05, "loss": 1.2133, "step": 249 }, { "epoch": 0.016938816993021206, "grad_norm": 19.053850173950195, "learning_rate": 3.32e-05, "loss": 1.1345, "step": 250 }, { "epoch": 0.01700657226099329, "grad_norm": 15.729182243347168, "learning_rate": 3.3333333333333335e-05, "loss": 1.3079, "step": 251 }, { "epoch": 0.01707432752896538, "grad_norm": 18.984880447387695, "learning_rate": 3.346666666666667e-05, "loss": 1.5124, "step": 252 }, { "epoch": 0.017142082796937463, "grad_norm": 14.444266319274902, "learning_rate": 3.3600000000000004e-05, "loss": 1.4279, "step": 253 }, { "epoch": 0.017209838064909548, "grad_norm": 17.174089431762695, "learning_rate": 3.373333333333333e-05, "loss": 1.1192, "step": 254 }, { "epoch": 0.017277593332881633, "grad_norm": 15.885781288146973, "learning_rate": 3.3866666666666665e-05, "loss": 1.3918, "step": 255 }, { "epoch": 0.017345348600853717, "grad_norm": 14.934944152832031, "learning_rate": 3.4000000000000007e-05, "loss": 1.3348, "step": 256 }, { "epoch": 0.017413103868825802, "grad_norm": 17.977643966674805, "learning_rate": 3.4133333333333334e-05, "loss": 1.392, "step": 257 }, { "epoch": 0.017480859136797887, "grad_norm": 17.527130126953125, "learning_rate": 3.426666666666667e-05, "loss": 1.2939, "step": 258 }, { "epoch": 0.01754861440476997, "grad_norm": 17.823453903198242, "learning_rate": 3.4399999999999996e-05, "loss": 1.3118, "step": 259 }, { "epoch": 0.017616369672742056, "grad_norm": 15.746111869812012, "learning_rate": 3.453333333333334e-05, "loss": 1.3865, "step": 260 }, { "epoch": 0.01768412494071414, "grad_norm": 18.798080444335938, "learning_rate": 3.466666666666667e-05, "loss": 1.399, "step": 261 }, { "epoch": 0.017751880208686226, "grad_norm": 15.139904975891113, "learning_rate": 3.48e-05, "loss": 1.5248, "step": 262 }, { "epoch": 0.01781963547665831, "grad_norm": 16.150434494018555, "learning_rate": 3.493333333333333e-05, "loss": 1.5033, "step": 263 }, { "epoch": 0.017887390744630395, "grad_norm": 19.125490188598633, "learning_rate": 3.506666666666667e-05, "loss": 1.4695, "step": 264 }, { "epoch": 0.01795514601260248, "grad_norm": 15.708895683288574, "learning_rate": 3.52e-05, "loss": 1.6189, "step": 265 }, { "epoch": 0.018022901280574564, "grad_norm": 14.185934066772461, "learning_rate": 3.5333333333333336e-05, "loss": 1.3536, "step": 266 }, { "epoch": 0.01809065654854665, "grad_norm": 15.22371768951416, "learning_rate": 3.546666666666667e-05, "loss": 1.4652, "step": 267 }, { "epoch": 0.018158411816518734, "grad_norm": 15.854277610778809, "learning_rate": 3.56e-05, "loss": 1.7211, "step": 268 }, { "epoch": 0.01822616708449082, "grad_norm": 18.41203498840332, "learning_rate": 3.573333333333333e-05, "loss": 1.54, "step": 269 }, { "epoch": 0.018293922352462903, "grad_norm": 15.323198318481445, "learning_rate": 3.586666666666667e-05, "loss": 1.3582, "step": 270 }, { "epoch": 0.018361677620434988, "grad_norm": 15.056265830993652, "learning_rate": 3.6e-05, "loss": 1.2959, "step": 271 }, { "epoch": 0.018429432888407073, "grad_norm": 18.912555694580078, "learning_rate": 3.6133333333333335e-05, "loss": 1.5964, "step": 272 }, { "epoch": 0.018497188156379157, "grad_norm": 15.093083381652832, "learning_rate": 3.626666666666667e-05, "loss": 1.3403, "step": 273 }, { "epoch": 0.018564943424351242, "grad_norm": 17.38453483581543, "learning_rate": 3.6400000000000004e-05, "loss": 1.5082, "step": 274 }, { "epoch": 0.018632698692323327, "grad_norm": 16.53204345703125, "learning_rate": 3.653333333333334e-05, "loss": 1.2373, "step": 275 }, { "epoch": 0.01870045396029541, "grad_norm": 13.423516273498535, "learning_rate": 3.6666666666666666e-05, "loss": 1.3111, "step": 276 }, { "epoch": 0.018768209228267496, "grad_norm": 14.92001724243164, "learning_rate": 3.68e-05, "loss": 1.5767, "step": 277 }, { "epoch": 0.018835964496239584, "grad_norm": 15.229578971862793, "learning_rate": 3.6933333333333334e-05, "loss": 1.19, "step": 278 }, { "epoch": 0.01890371976421167, "grad_norm": 17.159273147583008, "learning_rate": 3.706666666666667e-05, "loss": 1.335, "step": 279 }, { "epoch": 0.018971475032183754, "grad_norm": 13.7230806350708, "learning_rate": 3.72e-05, "loss": 1.2587, "step": 280 }, { "epoch": 0.01903923030015584, "grad_norm": 15.375404357910156, "learning_rate": 3.733333333333334e-05, "loss": 1.2744, "step": 281 }, { "epoch": 0.019106985568127923, "grad_norm": 21.054534912109375, "learning_rate": 3.7466666666666665e-05, "loss": 1.5581, "step": 282 }, { "epoch": 0.019174740836100008, "grad_norm": 17.954307556152344, "learning_rate": 3.76e-05, "loss": 1.6617, "step": 283 }, { "epoch": 0.019242496104072093, "grad_norm": 17.954139709472656, "learning_rate": 3.773333333333334e-05, "loss": 1.7486, "step": 284 }, { "epoch": 0.019310251372044177, "grad_norm": 16.210704803466797, "learning_rate": 3.786666666666667e-05, "loss": 1.661, "step": 285 }, { "epoch": 0.019378006640016262, "grad_norm": 18.14916229248047, "learning_rate": 3.8e-05, "loss": 1.8154, "step": 286 }, { "epoch": 0.019445761907988347, "grad_norm": 16.372896194458008, "learning_rate": 3.8133333333333336e-05, "loss": 1.5464, "step": 287 }, { "epoch": 0.01951351717596043, "grad_norm": 16.4403076171875, "learning_rate": 3.8266666666666664e-05, "loss": 1.0815, "step": 288 }, { "epoch": 0.019581272443932516, "grad_norm": 16.612815856933594, "learning_rate": 3.8400000000000005e-05, "loss": 1.4338, "step": 289 }, { "epoch": 0.0196490277119046, "grad_norm": 17.0092716217041, "learning_rate": 3.853333333333334e-05, "loss": 1.5257, "step": 290 }, { "epoch": 0.019716782979876685, "grad_norm": 13.920248031616211, "learning_rate": 3.866666666666667e-05, "loss": 1.4595, "step": 291 }, { "epoch": 0.01978453824784877, "grad_norm": 12.699677467346191, "learning_rate": 3.88e-05, "loss": 1.2602, "step": 292 }, { "epoch": 0.019852293515820855, "grad_norm": 17.31343650817871, "learning_rate": 3.8933333333333336e-05, "loss": 1.2934, "step": 293 }, { "epoch": 0.01992004878379294, "grad_norm": 16.7819881439209, "learning_rate": 3.906666666666667e-05, "loss": 1.5385, "step": 294 }, { "epoch": 0.019987804051765024, "grad_norm": 14.175074577331543, "learning_rate": 3.9200000000000004e-05, "loss": 1.1644, "step": 295 }, { "epoch": 0.02005555931973711, "grad_norm": 22.863780975341797, "learning_rate": 3.933333333333333e-05, "loss": 1.5932, "step": 296 }, { "epoch": 0.020123314587709194, "grad_norm": 15.278692245483398, "learning_rate": 3.9466666666666666e-05, "loss": 1.3997, "step": 297 }, { "epoch": 0.02019106985568128, "grad_norm": 15.878965377807617, "learning_rate": 3.960000000000001e-05, "loss": 1.5992, "step": 298 }, { "epoch": 0.020258825123653363, "grad_norm": 17.444494247436523, "learning_rate": 3.9733333333333335e-05, "loss": 1.5224, "step": 299 }, { "epoch": 0.020326580391625448, "grad_norm": 15.011860847473145, "learning_rate": 3.986666666666667e-05, "loss": 1.2609, "step": 300 }, { "epoch": 0.020394335659597532, "grad_norm": 17.057064056396484, "learning_rate": 4e-05, "loss": 1.2724, "step": 301 }, { "epoch": 0.020462090927569617, "grad_norm": 14.946621894836426, "learning_rate": 4.013333333333333e-05, "loss": 1.273, "step": 302 }, { "epoch": 0.020529846195541702, "grad_norm": 15.072589874267578, "learning_rate": 4.026666666666667e-05, "loss": 1.2029, "step": 303 }, { "epoch": 0.020597601463513786, "grad_norm": 14.304384231567383, "learning_rate": 4.0400000000000006e-05, "loss": 1.3601, "step": 304 }, { "epoch": 0.020665356731485875, "grad_norm": 16.18627166748047, "learning_rate": 4.0533333333333334e-05, "loss": 1.5844, "step": 305 }, { "epoch": 0.02073311199945796, "grad_norm": 14.144999504089355, "learning_rate": 4.066666666666667e-05, "loss": 1.1638, "step": 306 }, { "epoch": 0.020800867267430044, "grad_norm": 13.793155670166016, "learning_rate": 4.08e-05, "loss": 1.2101, "step": 307 }, { "epoch": 0.02086862253540213, "grad_norm": 19.344085693359375, "learning_rate": 4.093333333333334e-05, "loss": 1.5899, "step": 308 }, { "epoch": 0.020936377803374213, "grad_norm": 13.670002937316895, "learning_rate": 4.106666666666667e-05, "loss": 1.4271, "step": 309 }, { "epoch": 0.021004133071346298, "grad_norm": 17.72041893005371, "learning_rate": 4.12e-05, "loss": 1.5276, "step": 310 }, { "epoch": 0.021071888339318383, "grad_norm": 14.103938102722168, "learning_rate": 4.133333333333333e-05, "loss": 1.2355, "step": 311 }, { "epoch": 0.021139643607290468, "grad_norm": 16.439529418945312, "learning_rate": 4.146666666666667e-05, "loss": 1.3393, "step": 312 }, { "epoch": 0.021207398875262552, "grad_norm": 13.821382522583008, "learning_rate": 4.16e-05, "loss": 1.275, "step": 313 }, { "epoch": 0.021275154143234637, "grad_norm": 19.987564086914062, "learning_rate": 4.1733333333333336e-05, "loss": 1.415, "step": 314 }, { "epoch": 0.02134290941120672, "grad_norm": 12.660404205322266, "learning_rate": 4.186666666666667e-05, "loss": 1.2378, "step": 315 }, { "epoch": 0.021410664679178806, "grad_norm": 14.009933471679688, "learning_rate": 4.2e-05, "loss": 1.1615, "step": 316 }, { "epoch": 0.02147841994715089, "grad_norm": 16.11932945251465, "learning_rate": 4.213333333333334e-05, "loss": 1.0653, "step": 317 }, { "epoch": 0.021546175215122976, "grad_norm": 15.943892478942871, "learning_rate": 4.226666666666667e-05, "loss": 1.6208, "step": 318 }, { "epoch": 0.02161393048309506, "grad_norm": 16.01548194885254, "learning_rate": 4.24e-05, "loss": 1.4052, "step": 319 }, { "epoch": 0.021681685751067145, "grad_norm": 15.703156471252441, "learning_rate": 4.2533333333333335e-05, "loss": 1.3736, "step": 320 }, { "epoch": 0.02174944101903923, "grad_norm": 20.367021560668945, "learning_rate": 4.266666666666667e-05, "loss": 1.5758, "step": 321 }, { "epoch": 0.021817196287011315, "grad_norm": 15.678637504577637, "learning_rate": 4.2800000000000004e-05, "loss": 1.4458, "step": 322 }, { "epoch": 0.0218849515549834, "grad_norm": 15.707669258117676, "learning_rate": 4.293333333333334e-05, "loss": 1.3065, "step": 323 }, { "epoch": 0.021952706822955484, "grad_norm": 16.07583999633789, "learning_rate": 4.3066666666666665e-05, "loss": 1.5875, "step": 324 }, { "epoch": 0.02202046209092757, "grad_norm": 15.248065948486328, "learning_rate": 4.32e-05, "loss": 1.5972, "step": 325 }, { "epoch": 0.022088217358899653, "grad_norm": 17.77379608154297, "learning_rate": 4.3333333333333334e-05, "loss": 1.3385, "step": 326 }, { "epoch": 0.022155972626871738, "grad_norm": 24.381973266601562, "learning_rate": 4.346666666666667e-05, "loss": 1.6428, "step": 327 }, { "epoch": 0.022223727894843823, "grad_norm": 17.3684024810791, "learning_rate": 4.36e-05, "loss": 1.3634, "step": 328 }, { "epoch": 0.022291483162815907, "grad_norm": 20.45623207092285, "learning_rate": 4.373333333333334e-05, "loss": 1.3947, "step": 329 }, { "epoch": 0.022359238430787992, "grad_norm": 17.434114456176758, "learning_rate": 4.3866666666666665e-05, "loss": 1.4269, "step": 330 }, { "epoch": 0.02242699369876008, "grad_norm": 13.574700355529785, "learning_rate": 4.4000000000000006e-05, "loss": 1.2807, "step": 331 }, { "epoch": 0.022494748966732165, "grad_norm": 16.404966354370117, "learning_rate": 4.413333333333334e-05, "loss": 1.4549, "step": 332 }, { "epoch": 0.02256250423470425, "grad_norm": 14.14947509765625, "learning_rate": 4.426666666666667e-05, "loss": 1.4187, "step": 333 }, { "epoch": 0.022630259502676334, "grad_norm": 17.632492065429688, "learning_rate": 4.44e-05, "loss": 1.3515, "step": 334 }, { "epoch": 0.02269801477064842, "grad_norm": 18.149261474609375, "learning_rate": 4.4533333333333336e-05, "loss": 1.4875, "step": 335 }, { "epoch": 0.022765770038620504, "grad_norm": 19.864049911499023, "learning_rate": 4.466666666666667e-05, "loss": 1.5622, "step": 336 }, { "epoch": 0.02283352530659259, "grad_norm": 16.7431583404541, "learning_rate": 4.4800000000000005e-05, "loss": 1.4344, "step": 337 }, { "epoch": 0.022901280574564673, "grad_norm": 16.53998565673828, "learning_rate": 4.493333333333333e-05, "loss": 1.1745, "step": 338 }, { "epoch": 0.022969035842536758, "grad_norm": 17.786945343017578, "learning_rate": 4.5066666666666667e-05, "loss": 1.4335, "step": 339 }, { "epoch": 0.023036791110508843, "grad_norm": 17.730606079101562, "learning_rate": 4.52e-05, "loss": 1.341, "step": 340 }, { "epoch": 0.023104546378480927, "grad_norm": 15.536660194396973, "learning_rate": 4.5333333333333335e-05, "loss": 1.4935, "step": 341 }, { "epoch": 0.023172301646453012, "grad_norm": 22.405893325805664, "learning_rate": 4.546666666666667e-05, "loss": 1.5538, "step": 342 }, { "epoch": 0.023240056914425097, "grad_norm": 15.963828086853027, "learning_rate": 4.5600000000000004e-05, "loss": 1.373, "step": 343 }, { "epoch": 0.02330781218239718, "grad_norm": 13.041223526000977, "learning_rate": 4.573333333333333e-05, "loss": 1.346, "step": 344 }, { "epoch": 0.023375567450369266, "grad_norm": 14.552989959716797, "learning_rate": 4.5866666666666666e-05, "loss": 1.2723, "step": 345 }, { "epoch": 0.02344332271834135, "grad_norm": 16.034404754638672, "learning_rate": 4.600000000000001e-05, "loss": 1.3671, "step": 346 }, { "epoch": 0.023511077986313435, "grad_norm": 17.5505428314209, "learning_rate": 4.6133333333333334e-05, "loss": 1.4553, "step": 347 }, { "epoch": 0.02357883325428552, "grad_norm": 14.029112815856934, "learning_rate": 4.626666666666667e-05, "loss": 1.3639, "step": 348 }, { "epoch": 0.023646588522257605, "grad_norm": 14.633806228637695, "learning_rate": 4.64e-05, "loss": 1.61, "step": 349 }, { "epoch": 0.02371434379022969, "grad_norm": 15.407472610473633, "learning_rate": 4.653333333333334e-05, "loss": 1.2449, "step": 350 }, { "epoch": 0.023782099058201774, "grad_norm": 13.801481246948242, "learning_rate": 4.666666666666667e-05, "loss": 1.3105, "step": 351 }, { "epoch": 0.02384985432617386, "grad_norm": 17.675159454345703, "learning_rate": 4.6800000000000006e-05, "loss": 1.3142, "step": 352 }, { "epoch": 0.023917609594145944, "grad_norm": 15.383625984191895, "learning_rate": 4.6933333333333333e-05, "loss": 1.2976, "step": 353 }, { "epoch": 0.02398536486211803, "grad_norm": 18.087081909179688, "learning_rate": 4.706666666666667e-05, "loss": 1.3391, "step": 354 }, { "epoch": 0.024053120130090113, "grad_norm": 15.790519714355469, "learning_rate": 4.72e-05, "loss": 1.7103, "step": 355 }, { "epoch": 0.024120875398062198, "grad_norm": 15.931246757507324, "learning_rate": 4.7333333333333336e-05, "loss": 1.4792, "step": 356 }, { "epoch": 0.024188630666034282, "grad_norm": 18.39167022705078, "learning_rate": 4.746666666666667e-05, "loss": 1.5085, "step": 357 }, { "epoch": 0.02425638593400637, "grad_norm": 13.518149375915527, "learning_rate": 4.76e-05, "loss": 1.2711, "step": 358 }, { "epoch": 0.024324141201978455, "grad_norm": 18.02762794494629, "learning_rate": 4.773333333333333e-05, "loss": 1.4113, "step": 359 }, { "epoch": 0.02439189646995054, "grad_norm": 15.260608673095703, "learning_rate": 4.7866666666666674e-05, "loss": 1.3286, "step": 360 }, { "epoch": 0.024459651737922625, "grad_norm": 16.26275062561035, "learning_rate": 4.8e-05, "loss": 1.4436, "step": 361 }, { "epoch": 0.02452740700589471, "grad_norm": 14.213051795959473, "learning_rate": 4.8133333333333336e-05, "loss": 1.1339, "step": 362 }, { "epoch": 0.024595162273866794, "grad_norm": 16.478811264038086, "learning_rate": 4.826666666666667e-05, "loss": 1.5534, "step": 363 }, { "epoch": 0.02466291754183888, "grad_norm": 13.24108600616455, "learning_rate": 4.8400000000000004e-05, "loss": 1.4767, "step": 364 }, { "epoch": 0.024730672809810963, "grad_norm": 15.167949676513672, "learning_rate": 4.853333333333334e-05, "loss": 1.1592, "step": 365 }, { "epoch": 0.024798428077783048, "grad_norm": 18.150367736816406, "learning_rate": 4.866666666666667e-05, "loss": 1.4172, "step": 366 }, { "epoch": 0.024866183345755133, "grad_norm": 13.250176429748535, "learning_rate": 4.88e-05, "loss": 1.5022, "step": 367 }, { "epoch": 0.024933938613727218, "grad_norm": 14.780989646911621, "learning_rate": 4.8933333333333335e-05, "loss": 1.3243, "step": 368 }, { "epoch": 0.025001693881699302, "grad_norm": 13.34748649597168, "learning_rate": 4.906666666666667e-05, "loss": 1.285, "step": 369 }, { "epoch": 0.025069449149671387, "grad_norm": 14.677448272705078, "learning_rate": 4.92e-05, "loss": 1.1982, "step": 370 }, { "epoch": 0.02513720441764347, "grad_norm": 18.211145401000977, "learning_rate": 4.933333333333334e-05, "loss": 1.4018, "step": 371 }, { "epoch": 0.025204959685615556, "grad_norm": 17.68402099609375, "learning_rate": 4.9466666666666665e-05, "loss": 1.6153, "step": 372 }, { "epoch": 0.02527271495358764, "grad_norm": 14.44299030303955, "learning_rate": 4.96e-05, "loss": 1.4092, "step": 373 }, { "epoch": 0.025340470221559726, "grad_norm": 15.467754364013672, "learning_rate": 4.973333333333334e-05, "loss": 1.5647, "step": 374 }, { "epoch": 0.02540822548953181, "grad_norm": 14.037840843200684, "learning_rate": 4.986666666666667e-05, "loss": 1.2106, "step": 375 }, { "epoch": 0.025475980757503895, "grad_norm": 15.72461223602295, "learning_rate": 5e-05, "loss": 1.1832, "step": 376 }, { "epoch": 0.02554373602547598, "grad_norm": 15.03637981414795, "learning_rate": 5.013333333333333e-05, "loss": 1.5803, "step": 377 }, { "epoch": 0.025611491293448065, "grad_norm": 15.995429992675781, "learning_rate": 5.026666666666667e-05, "loss": 1.3837, "step": 378 }, { "epoch": 0.02567924656142015, "grad_norm": 17.96516227722168, "learning_rate": 5.0400000000000005e-05, "loss": 1.5114, "step": 379 }, { "epoch": 0.025747001829392234, "grad_norm": 17.960783004760742, "learning_rate": 5.053333333333333e-05, "loss": 1.4012, "step": 380 }, { "epoch": 0.02581475709736432, "grad_norm": 14.399024963378906, "learning_rate": 5.0666666666666674e-05, "loss": 1.2906, "step": 381 }, { "epoch": 0.025882512365336403, "grad_norm": 14.268051147460938, "learning_rate": 5.08e-05, "loss": 1.2354, "step": 382 }, { "epoch": 0.025950267633308488, "grad_norm": 16.070646286010742, "learning_rate": 5.0933333333333336e-05, "loss": 1.272, "step": 383 }, { "epoch": 0.026018022901280576, "grad_norm": 18.698619842529297, "learning_rate": 5.106666666666668e-05, "loss": 1.352, "step": 384 }, { "epoch": 0.02608577816925266, "grad_norm": 13.932543754577637, "learning_rate": 5.1200000000000004e-05, "loss": 1.3249, "step": 385 }, { "epoch": 0.026153533437224746, "grad_norm": 14.76308822631836, "learning_rate": 5.133333333333333e-05, "loss": 1.1807, "step": 386 }, { "epoch": 0.02622128870519683, "grad_norm": 17.602182388305664, "learning_rate": 5.146666666666667e-05, "loss": 1.4017, "step": 387 }, { "epoch": 0.026289043973168915, "grad_norm": 13.589346885681152, "learning_rate": 5.16e-05, "loss": 1.4205, "step": 388 }, { "epoch": 0.026356799241141, "grad_norm": 16.37788200378418, "learning_rate": 5.1733333333333335e-05, "loss": 1.4255, "step": 389 }, { "epoch": 0.026424554509113084, "grad_norm": 16.24977684020996, "learning_rate": 5.1866666666666676e-05, "loss": 1.2526, "step": 390 }, { "epoch": 0.02649230977708517, "grad_norm": 15.02576732635498, "learning_rate": 5.2000000000000004e-05, "loss": 1.2147, "step": 391 }, { "epoch": 0.026560065045057254, "grad_norm": 16.754850387573242, "learning_rate": 5.213333333333333e-05, "loss": 1.5725, "step": 392 }, { "epoch": 0.02662782031302934, "grad_norm": 17.120729446411133, "learning_rate": 5.2266666666666665e-05, "loss": 1.351, "step": 393 }, { "epoch": 0.026695575581001423, "grad_norm": 18.83492660522461, "learning_rate": 5.2400000000000007e-05, "loss": 1.3044, "step": 394 }, { "epoch": 0.026763330848973508, "grad_norm": 18.66061019897461, "learning_rate": 5.2533333333333334e-05, "loss": 1.4889, "step": 395 }, { "epoch": 0.026831086116945593, "grad_norm": 18.493236541748047, "learning_rate": 5.266666666666666e-05, "loss": 1.5015, "step": 396 }, { "epoch": 0.026898841384917677, "grad_norm": 19.523067474365234, "learning_rate": 5.28e-05, "loss": 1.5179, "step": 397 }, { "epoch": 0.026966596652889762, "grad_norm": 18.207366943359375, "learning_rate": 5.293333333333334e-05, "loss": 1.374, "step": 398 }, { "epoch": 0.027034351920861847, "grad_norm": 13.021437644958496, "learning_rate": 5.3066666666666665e-05, "loss": 1.0259, "step": 399 }, { "epoch": 0.02710210718883393, "grad_norm": 14.108929634094238, "learning_rate": 5.3200000000000006e-05, "loss": 1.3914, "step": 400 }, { "epoch": 0.027169862456806016, "grad_norm": 15.087890625, "learning_rate": 5.333333333333333e-05, "loss": 1.3143, "step": 401 }, { "epoch": 0.0272376177247781, "grad_norm": 16.389965057373047, "learning_rate": 5.346666666666667e-05, "loss": 1.5286, "step": 402 }, { "epoch": 0.027305372992750186, "grad_norm": 17.855867385864258, "learning_rate": 5.360000000000001e-05, "loss": 1.4221, "step": 403 }, { "epoch": 0.02737312826072227, "grad_norm": 14.016682624816895, "learning_rate": 5.3733333333333336e-05, "loss": 1.0922, "step": 404 }, { "epoch": 0.027440883528694355, "grad_norm": 16.08883285522461, "learning_rate": 5.3866666666666664e-05, "loss": 1.4932, "step": 405 }, { "epoch": 0.02750863879666644, "grad_norm": 14.883580207824707, "learning_rate": 5.4000000000000005e-05, "loss": 1.0806, "step": 406 }, { "epoch": 0.027576394064638524, "grad_norm": 13.443008422851562, "learning_rate": 5.413333333333334e-05, "loss": 1.2653, "step": 407 }, { "epoch": 0.02764414933261061, "grad_norm": 13.391244888305664, "learning_rate": 5.4266666666666667e-05, "loss": 1.2788, "step": 408 }, { "epoch": 0.027711904600582694, "grad_norm": 13.449023246765137, "learning_rate": 5.440000000000001e-05, "loss": 1.133, "step": 409 }, { "epoch": 0.027779659868554782, "grad_norm": 13.777191162109375, "learning_rate": 5.4533333333333335e-05, "loss": 1.1509, "step": 410 }, { "epoch": 0.027847415136526867, "grad_norm": 19.04452896118164, "learning_rate": 5.466666666666666e-05, "loss": 1.3306, "step": 411 }, { "epoch": 0.02791517040449895, "grad_norm": 16.583112716674805, "learning_rate": 5.4800000000000004e-05, "loss": 1.3018, "step": 412 }, { "epoch": 0.027982925672471036, "grad_norm": 16.8561954498291, "learning_rate": 5.493333333333334e-05, "loss": 1.257, "step": 413 }, { "epoch": 0.02805068094044312, "grad_norm": 15.321064949035645, "learning_rate": 5.5066666666666666e-05, "loss": 1.2498, "step": 414 }, { "epoch": 0.028118436208415205, "grad_norm": 15.825779914855957, "learning_rate": 5.520000000000001e-05, "loss": 1.2177, "step": 415 }, { "epoch": 0.02818619147638729, "grad_norm": 15.665361404418945, "learning_rate": 5.5333333333333334e-05, "loss": 1.244, "step": 416 }, { "epoch": 0.028253946744359375, "grad_norm": 15.516046524047852, "learning_rate": 5.546666666666667e-05, "loss": 1.4957, "step": 417 }, { "epoch": 0.02832170201233146, "grad_norm": 19.148746490478516, "learning_rate": 5.560000000000001e-05, "loss": 1.4346, "step": 418 }, { "epoch": 0.028389457280303544, "grad_norm": 15.215538024902344, "learning_rate": 5.573333333333334e-05, "loss": 1.081, "step": 419 }, { "epoch": 0.02845721254827563, "grad_norm": 15.612996101379395, "learning_rate": 5.5866666666666665e-05, "loss": 1.1671, "step": 420 }, { "epoch": 0.028524967816247714, "grad_norm": 13.754039764404297, "learning_rate": 5.6000000000000006e-05, "loss": 1.2679, "step": 421 }, { "epoch": 0.028592723084219798, "grad_norm": 16.350305557250977, "learning_rate": 5.613333333333334e-05, "loss": 1.3034, "step": 422 }, { "epoch": 0.028660478352191883, "grad_norm": 16.560344696044922, "learning_rate": 5.626666666666667e-05, "loss": 1.3633, "step": 423 }, { "epoch": 0.028728233620163968, "grad_norm": 20.391889572143555, "learning_rate": 5.6399999999999995e-05, "loss": 1.3533, "step": 424 }, { "epoch": 0.028795988888136052, "grad_norm": 16.13326072692871, "learning_rate": 5.6533333333333336e-05, "loss": 1.3678, "step": 425 }, { "epoch": 0.028863744156108137, "grad_norm": 21.544612884521484, "learning_rate": 5.666666666666667e-05, "loss": 1.6275, "step": 426 }, { "epoch": 0.028931499424080222, "grad_norm": 17.097408294677734, "learning_rate": 5.68e-05, "loss": 1.6119, "step": 427 }, { "epoch": 0.028999254692052306, "grad_norm": 15.699347496032715, "learning_rate": 5.693333333333334e-05, "loss": 1.2644, "step": 428 }, { "epoch": 0.02906700996002439, "grad_norm": 16.643613815307617, "learning_rate": 5.706666666666667e-05, "loss": 1.3497, "step": 429 }, { "epoch": 0.029134765227996476, "grad_norm": 20.941123962402344, "learning_rate": 5.72e-05, "loss": 1.4232, "step": 430 }, { "epoch": 0.02920252049596856, "grad_norm": 17.549013137817383, "learning_rate": 5.7333333333333336e-05, "loss": 1.3429, "step": 431 }, { "epoch": 0.029270275763940645, "grad_norm": 19.702617645263672, "learning_rate": 5.746666666666667e-05, "loss": 1.7924, "step": 432 }, { "epoch": 0.02933803103191273, "grad_norm": 16.396209716796875, "learning_rate": 5.76e-05, "loss": 1.3296, "step": 433 }, { "epoch": 0.029405786299884815, "grad_norm": 15.823278427124023, "learning_rate": 5.773333333333334e-05, "loss": 1.4228, "step": 434 }, { "epoch": 0.0294735415678569, "grad_norm": 19.61952018737793, "learning_rate": 5.7866666666666666e-05, "loss": 1.3522, "step": 435 }, { "epoch": 0.029541296835828984, "grad_norm": 14.721433639526367, "learning_rate": 5.8e-05, "loss": 1.284, "step": 436 }, { "epoch": 0.029609052103801072, "grad_norm": 16.594276428222656, "learning_rate": 5.813333333333334e-05, "loss": 1.4636, "step": 437 }, { "epoch": 0.029676807371773157, "grad_norm": 15.98005199432373, "learning_rate": 5.826666666666667e-05, "loss": 1.2825, "step": 438 }, { "epoch": 0.02974456263974524, "grad_norm": 15.501729965209961, "learning_rate": 5.8399999999999997e-05, "loss": 1.3736, "step": 439 }, { "epoch": 0.029812317907717326, "grad_norm": 18.077552795410156, "learning_rate": 5.853333333333334e-05, "loss": 1.4115, "step": 440 }, { "epoch": 0.02988007317568941, "grad_norm": 16.016721725463867, "learning_rate": 5.866666666666667e-05, "loss": 1.3177, "step": 441 }, { "epoch": 0.029947828443661496, "grad_norm": 16.39783477783203, "learning_rate": 5.88e-05, "loss": 1.4465, "step": 442 }, { "epoch": 0.03001558371163358, "grad_norm": 13.970220565795898, "learning_rate": 5.893333333333334e-05, "loss": 1.2956, "step": 443 }, { "epoch": 0.030083338979605665, "grad_norm": 16.5622615814209, "learning_rate": 5.906666666666667e-05, "loss": 1.2144, "step": 444 }, { "epoch": 0.03015109424757775, "grad_norm": 18.191911697387695, "learning_rate": 5.92e-05, "loss": 1.623, "step": 445 }, { "epoch": 0.030218849515549834, "grad_norm": 15.483613967895508, "learning_rate": 5.9333333333333343e-05, "loss": 1.3232, "step": 446 }, { "epoch": 0.03028660478352192, "grad_norm": 15.633200645446777, "learning_rate": 5.946666666666667e-05, "loss": 1.544, "step": 447 }, { "epoch": 0.030354360051494004, "grad_norm": 16.025054931640625, "learning_rate": 5.96e-05, "loss": 1.4837, "step": 448 }, { "epoch": 0.03042211531946609, "grad_norm": 15.954922676086426, "learning_rate": 5.973333333333334e-05, "loss": 1.341, "step": 449 }, { "epoch": 0.030489870587438173, "grad_norm": 14.852401733398438, "learning_rate": 5.9866666666666674e-05, "loss": 1.2346, "step": 450 }, { "epoch": 0.030557625855410258, "grad_norm": 14.887676239013672, "learning_rate": 6e-05, "loss": 1.2789, "step": 451 }, { "epoch": 0.030625381123382343, "grad_norm": 17.25469970703125, "learning_rate": 6.013333333333334e-05, "loss": 1.4446, "step": 452 }, { "epoch": 0.030693136391354427, "grad_norm": 19.356597900390625, "learning_rate": 6.026666666666667e-05, "loss": 1.3765, "step": 453 }, { "epoch": 0.030760891659326512, "grad_norm": 14.16335391998291, "learning_rate": 6.04e-05, "loss": 1.4213, "step": 454 }, { "epoch": 0.030828646927298597, "grad_norm": 15.812528610229492, "learning_rate": 6.053333333333333e-05, "loss": 1.4028, "step": 455 }, { "epoch": 0.03089640219527068, "grad_norm": 15.10727596282959, "learning_rate": 6.066666666666667e-05, "loss": 1.3841, "step": 456 }, { "epoch": 0.030964157463242766, "grad_norm": 19.91744613647461, "learning_rate": 6.08e-05, "loss": 1.4706, "step": 457 }, { "epoch": 0.03103191273121485, "grad_norm": 14.597813606262207, "learning_rate": 6.093333333333333e-05, "loss": 1.2428, "step": 458 }, { "epoch": 0.031099667999186936, "grad_norm": 14.952363014221191, "learning_rate": 6.106666666666667e-05, "loss": 1.3431, "step": 459 }, { "epoch": 0.03116742326715902, "grad_norm": 16.519468307495117, "learning_rate": 6.12e-05, "loss": 1.5399, "step": 460 }, { "epoch": 0.031235178535131105, "grad_norm": 14.55786418914795, "learning_rate": 6.133333333333334e-05, "loss": 1.3844, "step": 461 }, { "epoch": 0.03130293380310319, "grad_norm": 12.52665901184082, "learning_rate": 6.146666666666668e-05, "loss": 1.2085, "step": 462 }, { "epoch": 0.03137068907107528, "grad_norm": 17.549148559570312, "learning_rate": 6.16e-05, "loss": 1.588, "step": 463 }, { "epoch": 0.03143844433904736, "grad_norm": 14.761232376098633, "learning_rate": 6.173333333333333e-05, "loss": 1.1685, "step": 464 }, { "epoch": 0.03150619960701945, "grad_norm": 13.1014404296875, "learning_rate": 6.186666666666668e-05, "loss": 1.1718, "step": 465 }, { "epoch": 0.03157395487499153, "grad_norm": 15.998059272766113, "learning_rate": 6.2e-05, "loss": 1.3177, "step": 466 }, { "epoch": 0.03164171014296362, "grad_norm": 16.402875900268555, "learning_rate": 6.213333333333333e-05, "loss": 1.4821, "step": 467 }, { "epoch": 0.0317094654109357, "grad_norm": 12.871467590332031, "learning_rate": 6.226666666666667e-05, "loss": 1.0176, "step": 468 }, { "epoch": 0.031777220678907786, "grad_norm": 15.602563858032227, "learning_rate": 6.24e-05, "loss": 1.3418, "step": 469 }, { "epoch": 0.03184497594687987, "grad_norm": 13.5369234085083, "learning_rate": 6.253333333333333e-05, "loss": 1.4051, "step": 470 }, { "epoch": 0.031912731214851955, "grad_norm": 16.841650009155273, "learning_rate": 6.266666666666667e-05, "loss": 1.5438, "step": 471 }, { "epoch": 0.03198048648282404, "grad_norm": 14.581981658935547, "learning_rate": 6.280000000000001e-05, "loss": 1.2767, "step": 472 }, { "epoch": 0.032048241750796125, "grad_norm": 14.084460258483887, "learning_rate": 6.293333333333334e-05, "loss": 1.2224, "step": 473 }, { "epoch": 0.03211599701876821, "grad_norm": 15.54316234588623, "learning_rate": 6.306666666666668e-05, "loss": 1.6084, "step": 474 }, { "epoch": 0.032183752286740294, "grad_norm": 15.498743057250977, "learning_rate": 6.32e-05, "loss": 1.3937, "step": 475 }, { "epoch": 0.03225150755471238, "grad_norm": 17.728227615356445, "learning_rate": 6.333333333333333e-05, "loss": 1.2996, "step": 476 }, { "epoch": 0.032319262822684464, "grad_norm": 13.956122398376465, "learning_rate": 6.346666666666667e-05, "loss": 1.4472, "step": 477 }, { "epoch": 0.03238701809065655, "grad_norm": 18.578060150146484, "learning_rate": 6.36e-05, "loss": 1.4659, "step": 478 }, { "epoch": 0.03245477335862863, "grad_norm": 14.055442810058594, "learning_rate": 6.373333333333333e-05, "loss": 1.1979, "step": 479 }, { "epoch": 0.03252252862660072, "grad_norm": 15.902195930480957, "learning_rate": 6.386666666666667e-05, "loss": 1.3367, "step": 480 }, { "epoch": 0.0325902838945728, "grad_norm": 14.212138175964355, "learning_rate": 6.400000000000001e-05, "loss": 1.2716, "step": 481 }, { "epoch": 0.03265803916254489, "grad_norm": 17.711475372314453, "learning_rate": 6.413333333333334e-05, "loss": 1.3016, "step": 482 }, { "epoch": 0.03272579443051697, "grad_norm": 13.068260192871094, "learning_rate": 6.426666666666668e-05, "loss": 1.1346, "step": 483 }, { "epoch": 0.032793549698489056, "grad_norm": 17.422321319580078, "learning_rate": 6.440000000000001e-05, "loss": 1.0931, "step": 484 }, { "epoch": 0.03286130496646114, "grad_norm": 14.98038101196289, "learning_rate": 6.453333333333333e-05, "loss": 1.22, "step": 485 }, { "epoch": 0.032929060234433226, "grad_norm": 13.300479888916016, "learning_rate": 6.466666666666666e-05, "loss": 1.2647, "step": 486 }, { "epoch": 0.03299681550240531, "grad_norm": 14.611360549926758, "learning_rate": 6.48e-05, "loss": 1.2478, "step": 487 }, { "epoch": 0.033064570770377395, "grad_norm": 19.244020462036133, "learning_rate": 6.493333333333333e-05, "loss": 1.2126, "step": 488 }, { "epoch": 0.03313232603834948, "grad_norm": 16.285396575927734, "learning_rate": 6.506666666666666e-05, "loss": 1.1694, "step": 489 }, { "epoch": 0.033200081306321565, "grad_norm": 19.613059997558594, "learning_rate": 6.52e-05, "loss": 1.2118, "step": 490 }, { "epoch": 0.03326783657429365, "grad_norm": 19.19630241394043, "learning_rate": 6.533333333333334e-05, "loss": 1.3093, "step": 491 }, { "epoch": 0.033335591842265734, "grad_norm": 12.653651237487793, "learning_rate": 6.546666666666667e-05, "loss": 1.1545, "step": 492 }, { "epoch": 0.03340334711023782, "grad_norm": 13.434443473815918, "learning_rate": 6.560000000000001e-05, "loss": 1.2543, "step": 493 }, { "epoch": 0.033471102378209903, "grad_norm": 18.893598556518555, "learning_rate": 6.573333333333334e-05, "loss": 1.3632, "step": 494 }, { "epoch": 0.03353885764618199, "grad_norm": 18.339479446411133, "learning_rate": 6.586666666666666e-05, "loss": 1.4369, "step": 495 }, { "epoch": 0.03360661291415407, "grad_norm": 12.303078651428223, "learning_rate": 6.6e-05, "loss": 1.4032, "step": 496 }, { "epoch": 0.03367436818212616, "grad_norm": 17.041015625, "learning_rate": 6.613333333333333e-05, "loss": 1.2395, "step": 497 }, { "epoch": 0.03374212345009824, "grad_norm": 14.035640716552734, "learning_rate": 6.626666666666666e-05, "loss": 1.3541, "step": 498 }, { "epoch": 0.03380987871807033, "grad_norm": 16.93412208557129, "learning_rate": 6.64e-05, "loss": 1.3503, "step": 499 }, { "epoch": 0.03387763398604241, "grad_norm": 16.02039909362793, "learning_rate": 6.653333333333334e-05, "loss": 1.3226, "step": 500 }, { "epoch": 0.033945389254014496, "grad_norm": 16.061542510986328, "learning_rate": 6.666666666666667e-05, "loss": 1.433, "step": 501 }, { "epoch": 0.03401314452198658, "grad_norm": 14.810320854187012, "learning_rate": 6.680000000000001e-05, "loss": 1.3715, "step": 502 }, { "epoch": 0.03408089978995867, "grad_norm": 15.132527351379395, "learning_rate": 6.693333333333334e-05, "loss": 1.0683, "step": 503 }, { "epoch": 0.03414865505793076, "grad_norm": 13.768165588378906, "learning_rate": 6.706666666666667e-05, "loss": 1.234, "step": 504 }, { "epoch": 0.03421641032590284, "grad_norm": 16.227920532226562, "learning_rate": 6.720000000000001e-05, "loss": 1.4181, "step": 505 }, { "epoch": 0.03428416559387493, "grad_norm": 14.205772399902344, "learning_rate": 6.733333333333333e-05, "loss": 1.4342, "step": 506 }, { "epoch": 0.03435192086184701, "grad_norm": 17.451099395751953, "learning_rate": 6.746666666666666e-05, "loss": 1.3338, "step": 507 }, { "epoch": 0.034419676129819096, "grad_norm": 13.7041015625, "learning_rate": 6.76e-05, "loss": 1.3972, "step": 508 }, { "epoch": 0.03448743139779118, "grad_norm": 13.206351280212402, "learning_rate": 6.773333333333333e-05, "loss": 1.4233, "step": 509 }, { "epoch": 0.034555186665763266, "grad_norm": 11.867975234985352, "learning_rate": 6.786666666666667e-05, "loss": 1.111, "step": 510 }, { "epoch": 0.03462294193373535, "grad_norm": 19.726648330688477, "learning_rate": 6.800000000000001e-05, "loss": 1.2918, "step": 511 }, { "epoch": 0.034690697201707435, "grad_norm": 18.914016723632812, "learning_rate": 6.813333333333334e-05, "loss": 1.5998, "step": 512 }, { "epoch": 0.03475845246967952, "grad_norm": 18.065937042236328, "learning_rate": 6.826666666666667e-05, "loss": 1.4952, "step": 513 }, { "epoch": 0.034826207737651604, "grad_norm": 16.42376136779785, "learning_rate": 6.840000000000001e-05, "loss": 1.2807, "step": 514 }, { "epoch": 0.03489396300562369, "grad_norm": 15.537372589111328, "learning_rate": 6.853333333333334e-05, "loss": 1.3524, "step": 515 }, { "epoch": 0.034961718273595774, "grad_norm": 18.395238876342773, "learning_rate": 6.866666666666666e-05, "loss": 1.3022, "step": 516 }, { "epoch": 0.03502947354156786, "grad_norm": 15.80531120300293, "learning_rate": 6.879999999999999e-05, "loss": 1.4201, "step": 517 }, { "epoch": 0.03509722880953994, "grad_norm": 15.361504554748535, "learning_rate": 6.893333333333333e-05, "loss": 1.4351, "step": 518 }, { "epoch": 0.03516498407751203, "grad_norm": 12.501819610595703, "learning_rate": 6.906666666666667e-05, "loss": 1.1894, "step": 519 }, { "epoch": 0.03523273934548411, "grad_norm": 14.53589916229248, "learning_rate": 6.92e-05, "loss": 1.3435, "step": 520 }, { "epoch": 0.0353004946134562, "grad_norm": 15.057633399963379, "learning_rate": 6.933333333333334e-05, "loss": 1.5467, "step": 521 }, { "epoch": 0.03536824988142828, "grad_norm": 16.27166175842285, "learning_rate": 6.946666666666667e-05, "loss": 1.5171, "step": 522 }, { "epoch": 0.03543600514940037, "grad_norm": 18.050413131713867, "learning_rate": 6.96e-05, "loss": 1.2246, "step": 523 }, { "epoch": 0.03550376041737245, "grad_norm": 15.316112518310547, "learning_rate": 6.973333333333334e-05, "loss": 1.2551, "step": 524 }, { "epoch": 0.035571515685344536, "grad_norm": 15.416853904724121, "learning_rate": 6.986666666666667e-05, "loss": 1.1895, "step": 525 }, { "epoch": 0.03563927095331662, "grad_norm": 15.019004821777344, "learning_rate": 7e-05, "loss": 1.5957, "step": 526 }, { "epoch": 0.035707026221288705, "grad_norm": 15.33927059173584, "learning_rate": 7.013333333333333e-05, "loss": 1.3536, "step": 527 }, { "epoch": 0.03577478148926079, "grad_norm": 16.403501510620117, "learning_rate": 7.026666666666668e-05, "loss": 1.6104, "step": 528 }, { "epoch": 0.035842536757232875, "grad_norm": 13.962714195251465, "learning_rate": 7.04e-05, "loss": 1.2122, "step": 529 }, { "epoch": 0.03591029202520496, "grad_norm": 14.530926704406738, "learning_rate": 7.053333333333334e-05, "loss": 1.2879, "step": 530 }, { "epoch": 0.035978047293177044, "grad_norm": 13.99305248260498, "learning_rate": 7.066666666666667e-05, "loss": 1.2063, "step": 531 }, { "epoch": 0.03604580256114913, "grad_norm": 17.54342269897461, "learning_rate": 7.08e-05, "loss": 1.2823, "step": 532 }, { "epoch": 0.036113557829121214, "grad_norm": 11.097670555114746, "learning_rate": 7.093333333333334e-05, "loss": 1.1658, "step": 533 }, { "epoch": 0.0361813130970933, "grad_norm": 19.069889068603516, "learning_rate": 7.106666666666667e-05, "loss": 1.3368, "step": 534 }, { "epoch": 0.03624906836506538, "grad_norm": 12.675487518310547, "learning_rate": 7.12e-05, "loss": 1.1586, "step": 535 }, { "epoch": 0.03631682363303747, "grad_norm": 16.971637725830078, "learning_rate": 7.133333333333334e-05, "loss": 1.2458, "step": 536 }, { "epoch": 0.03638457890100955, "grad_norm": 14.513422012329102, "learning_rate": 7.146666666666666e-05, "loss": 1.236, "step": 537 }, { "epoch": 0.03645233416898164, "grad_norm": 14.271836280822754, "learning_rate": 7.16e-05, "loss": 1.0951, "step": 538 }, { "epoch": 0.03652008943695372, "grad_norm": 14.286022186279297, "learning_rate": 7.173333333333335e-05, "loss": 1.2411, "step": 539 }, { "epoch": 0.03658784470492581, "grad_norm": 16.81059455871582, "learning_rate": 7.186666666666667e-05, "loss": 1.2453, "step": 540 }, { "epoch": 0.03665559997289789, "grad_norm": 16.818300247192383, "learning_rate": 7.2e-05, "loss": 1.2302, "step": 541 }, { "epoch": 0.036723355240869976, "grad_norm": 14.839925765991211, "learning_rate": 7.213333333333334e-05, "loss": 1.3445, "step": 542 }, { "epoch": 0.03679111050884206, "grad_norm": 14.917344093322754, "learning_rate": 7.226666666666667e-05, "loss": 1.3658, "step": 543 }, { "epoch": 0.036858865776814145, "grad_norm": 15.310625076293945, "learning_rate": 7.24e-05, "loss": 1.2565, "step": 544 }, { "epoch": 0.03692662104478623, "grad_norm": 16.7796630859375, "learning_rate": 7.253333333333334e-05, "loss": 1.265, "step": 545 }, { "epoch": 0.036994376312758315, "grad_norm": 19.4224796295166, "learning_rate": 7.266666666666667e-05, "loss": 1.6152, "step": 546 }, { "epoch": 0.0370621315807304, "grad_norm": 15.8001127243042, "learning_rate": 7.280000000000001e-05, "loss": 1.4313, "step": 547 }, { "epoch": 0.037129886848702484, "grad_norm": 13.597784042358398, "learning_rate": 7.293333333333334e-05, "loss": 1.3812, "step": 548 }, { "epoch": 0.03719764211667457, "grad_norm": 13.139778137207031, "learning_rate": 7.306666666666668e-05, "loss": 1.1888, "step": 549 }, { "epoch": 0.037265397384646654, "grad_norm": 17.083406448364258, "learning_rate": 7.32e-05, "loss": 1.1668, "step": 550 }, { "epoch": 0.03733315265261874, "grad_norm": 14.46076774597168, "learning_rate": 7.333333333333333e-05, "loss": 1.2394, "step": 551 }, { "epoch": 0.03740090792059082, "grad_norm": 16.217782974243164, "learning_rate": 7.346666666666667e-05, "loss": 1.3068, "step": 552 }, { "epoch": 0.03746866318856291, "grad_norm": 16.06308937072754, "learning_rate": 7.36e-05, "loss": 0.9678, "step": 553 }, { "epoch": 0.03753641845653499, "grad_norm": 14.652907371520996, "learning_rate": 7.373333333333333e-05, "loss": 1.5375, "step": 554 }, { "epoch": 0.03760417372450708, "grad_norm": 13.870722770690918, "learning_rate": 7.386666666666667e-05, "loss": 1.29, "step": 555 }, { "epoch": 0.03767192899247917, "grad_norm": 14.649571418762207, "learning_rate": 7.4e-05, "loss": 1.3588, "step": 556 }, { "epoch": 0.03773968426045125, "grad_norm": 13.698415756225586, "learning_rate": 7.413333333333334e-05, "loss": 1.0801, "step": 557 }, { "epoch": 0.03780743952842334, "grad_norm": 19.412424087524414, "learning_rate": 7.426666666666668e-05, "loss": 1.4474, "step": 558 }, { "epoch": 0.03787519479639542, "grad_norm": 18.58599853515625, "learning_rate": 7.44e-05, "loss": 1.3294, "step": 559 }, { "epoch": 0.03794295006436751, "grad_norm": 15.283289909362793, "learning_rate": 7.453333333333333e-05, "loss": 1.1554, "step": 560 }, { "epoch": 0.03801070533233959, "grad_norm": 16.289731979370117, "learning_rate": 7.466666666666667e-05, "loss": 1.222, "step": 561 }, { "epoch": 0.03807846060031168, "grad_norm": 25.105520248413086, "learning_rate": 7.48e-05, "loss": 1.2213, "step": 562 }, { "epoch": 0.03814621586828376, "grad_norm": 15.428693771362305, "learning_rate": 7.493333333333333e-05, "loss": 1.2165, "step": 563 }, { "epoch": 0.038213971136255846, "grad_norm": 18.79145622253418, "learning_rate": 7.506666666666667e-05, "loss": 1.4799, "step": 564 }, { "epoch": 0.03828172640422793, "grad_norm": 12.498862266540527, "learning_rate": 7.52e-05, "loss": 1.4348, "step": 565 }, { "epoch": 0.038349481672200016, "grad_norm": 19.302845001220703, "learning_rate": 7.533333333333334e-05, "loss": 1.2656, "step": 566 }, { "epoch": 0.0384172369401721, "grad_norm": 16.28862953186035, "learning_rate": 7.546666666666668e-05, "loss": 1.4779, "step": 567 }, { "epoch": 0.038484992208144185, "grad_norm": 13.972156524658203, "learning_rate": 7.560000000000001e-05, "loss": 1.4775, "step": 568 }, { "epoch": 0.03855274747611627, "grad_norm": 15.443683624267578, "learning_rate": 7.573333333333334e-05, "loss": 1.2671, "step": 569 }, { "epoch": 0.038620502744088354, "grad_norm": 15.203113555908203, "learning_rate": 7.586666666666668e-05, "loss": 1.4209, "step": 570 }, { "epoch": 0.03868825801206044, "grad_norm": 18.516462326049805, "learning_rate": 7.6e-05, "loss": 1.3315, "step": 571 }, { "epoch": 0.038756013280032524, "grad_norm": 13.403026580810547, "learning_rate": 7.613333333333333e-05, "loss": 1.364, "step": 572 }, { "epoch": 0.03882376854800461, "grad_norm": 12.339615821838379, "learning_rate": 7.626666666666667e-05, "loss": 1.2107, "step": 573 }, { "epoch": 0.03889152381597669, "grad_norm": 15.647716522216797, "learning_rate": 7.64e-05, "loss": 1.3165, "step": 574 }, { "epoch": 0.03895927908394878, "grad_norm": 15.854050636291504, "learning_rate": 7.653333333333333e-05, "loss": 1.517, "step": 575 }, { "epoch": 0.03902703435192086, "grad_norm": 11.765929222106934, "learning_rate": 7.666666666666667e-05, "loss": 1.2676, "step": 576 }, { "epoch": 0.03909478961989295, "grad_norm": 12.515352249145508, "learning_rate": 7.680000000000001e-05, "loss": 1.4472, "step": 577 }, { "epoch": 0.03916254488786503, "grad_norm": 11.72417163848877, "learning_rate": 7.693333333333334e-05, "loss": 1.1874, "step": 578 }, { "epoch": 0.03923030015583712, "grad_norm": 15.03148365020752, "learning_rate": 7.706666666666668e-05, "loss": 1.6381, "step": 579 }, { "epoch": 0.0392980554238092, "grad_norm": 14.0188570022583, "learning_rate": 7.72e-05, "loss": 1.4351, "step": 580 }, { "epoch": 0.039365810691781286, "grad_norm": 12.343233108520508, "learning_rate": 7.733333333333333e-05, "loss": 1.2293, "step": 581 }, { "epoch": 0.03943356595975337, "grad_norm": 15.358945846557617, "learning_rate": 7.746666666666666e-05, "loss": 1.307, "step": 582 }, { "epoch": 0.039501321227725456, "grad_norm": 14.131333351135254, "learning_rate": 7.76e-05, "loss": 1.0432, "step": 583 }, { "epoch": 0.03956907649569754, "grad_norm": 16.46926498413086, "learning_rate": 7.773333333333333e-05, "loss": 1.4126, "step": 584 }, { "epoch": 0.039636831763669625, "grad_norm": 15.209906578063965, "learning_rate": 7.786666666666667e-05, "loss": 1.3242, "step": 585 }, { "epoch": 0.03970458703164171, "grad_norm": 15.657282829284668, "learning_rate": 7.800000000000001e-05, "loss": 1.4967, "step": 586 }, { "epoch": 0.039772342299613794, "grad_norm": 11.466882705688477, "learning_rate": 7.813333333333334e-05, "loss": 1.3003, "step": 587 }, { "epoch": 0.03984009756758588, "grad_norm": 19.529300689697266, "learning_rate": 7.826666666666667e-05, "loss": 1.3835, "step": 588 }, { "epoch": 0.039907852835557964, "grad_norm": 17.22064971923828, "learning_rate": 7.840000000000001e-05, "loss": 1.2655, "step": 589 }, { "epoch": 0.03997560810353005, "grad_norm": 14.788103103637695, "learning_rate": 7.853333333333334e-05, "loss": 1.1981, "step": 590 }, { "epoch": 0.04004336337150213, "grad_norm": 13.705521583557129, "learning_rate": 7.866666666666666e-05, "loss": 1.2064, "step": 591 }, { "epoch": 0.04011111863947422, "grad_norm": 14.901930809020996, "learning_rate": 7.88e-05, "loss": 1.2632, "step": 592 }, { "epoch": 0.0401788739074463, "grad_norm": 18.520828247070312, "learning_rate": 7.893333333333333e-05, "loss": 1.4316, "step": 593 }, { "epoch": 0.04024662917541839, "grad_norm": 15.101353645324707, "learning_rate": 7.906666666666667e-05, "loss": 1.3935, "step": 594 }, { "epoch": 0.04031438444339047, "grad_norm": 13.961483001708984, "learning_rate": 7.920000000000001e-05, "loss": 1.1477, "step": 595 }, { "epoch": 0.04038213971136256, "grad_norm": 16.015554428100586, "learning_rate": 7.933333333333334e-05, "loss": 1.5029, "step": 596 }, { "epoch": 0.04044989497933464, "grad_norm": 15.009637832641602, "learning_rate": 7.946666666666667e-05, "loss": 1.3715, "step": 597 }, { "epoch": 0.040517650247306726, "grad_norm": 16.295202255249023, "learning_rate": 7.960000000000001e-05, "loss": 1.5129, "step": 598 }, { "epoch": 0.04058540551527881, "grad_norm": 88.3338623046875, "learning_rate": 7.973333333333334e-05, "loss": 1.2395, "step": 599 }, { "epoch": 0.040653160783250895, "grad_norm": 16.769424438476562, "learning_rate": 7.986666666666667e-05, "loss": 1.4258, "step": 600 }, { "epoch": 0.04072091605122298, "grad_norm": 16.044578552246094, "learning_rate": 8e-05, "loss": 1.365, "step": 601 }, { "epoch": 0.040788671319195065, "grad_norm": 15.282588958740234, "learning_rate": 8.013333333333333e-05, "loss": 1.3212, "step": 602 }, { "epoch": 0.04085642658716715, "grad_norm": 15.275490760803223, "learning_rate": 8.026666666666666e-05, "loss": 1.6048, "step": 603 }, { "epoch": 0.040924181855139234, "grad_norm": 12.952759742736816, "learning_rate": 8.04e-05, "loss": 1.1982, "step": 604 }, { "epoch": 0.04099193712311132, "grad_norm": 16.79343032836914, "learning_rate": 8.053333333333334e-05, "loss": 1.281, "step": 605 }, { "epoch": 0.041059692391083404, "grad_norm": 13.70421314239502, "learning_rate": 8.066666666666667e-05, "loss": 1.2664, "step": 606 }, { "epoch": 0.04112744765905549, "grad_norm": 14.420117378234863, "learning_rate": 8.080000000000001e-05, "loss": 1.2758, "step": 607 }, { "epoch": 0.04119520292702757, "grad_norm": 16.421335220336914, "learning_rate": 8.093333333333334e-05, "loss": 1.3105, "step": 608 }, { "epoch": 0.041262958194999665, "grad_norm": 16.96218490600586, "learning_rate": 8.106666666666667e-05, "loss": 1.4665, "step": 609 }, { "epoch": 0.04133071346297175, "grad_norm": 13.73725700378418, "learning_rate": 8.120000000000001e-05, "loss": 1.2109, "step": 610 }, { "epoch": 0.041398468730943834, "grad_norm": 14.718022346496582, "learning_rate": 8.133333333333334e-05, "loss": 1.2051, "step": 611 }, { "epoch": 0.04146622399891592, "grad_norm": 14.119061470031738, "learning_rate": 8.146666666666666e-05, "loss": 1.0731, "step": 612 }, { "epoch": 0.041533979266888, "grad_norm": 17.90053367614746, "learning_rate": 8.16e-05, "loss": 1.3804, "step": 613 }, { "epoch": 0.04160173453486009, "grad_norm": 12.727055549621582, "learning_rate": 8.173333333333335e-05, "loss": 1.0926, "step": 614 }, { "epoch": 0.04166948980283217, "grad_norm": 14.953054428100586, "learning_rate": 8.186666666666667e-05, "loss": 1.1974, "step": 615 }, { "epoch": 0.04173724507080426, "grad_norm": 16.05322265625, "learning_rate": 8.2e-05, "loss": 1.2526, "step": 616 }, { "epoch": 0.04180500033877634, "grad_norm": 15.028168678283691, "learning_rate": 8.213333333333334e-05, "loss": 1.3593, "step": 617 }, { "epoch": 0.04187275560674843, "grad_norm": 13.127458572387695, "learning_rate": 8.226666666666667e-05, "loss": 1.3981, "step": 618 }, { "epoch": 0.04194051087472051, "grad_norm": 17.323017120361328, "learning_rate": 8.24e-05, "loss": 1.4622, "step": 619 }, { "epoch": 0.042008266142692596, "grad_norm": 16.106731414794922, "learning_rate": 8.253333333333334e-05, "loss": 1.257, "step": 620 }, { "epoch": 0.04207602141066468, "grad_norm": 13.039103507995605, "learning_rate": 8.266666666666667e-05, "loss": 1.2905, "step": 621 }, { "epoch": 0.042143776678636766, "grad_norm": 13.661933898925781, "learning_rate": 8.28e-05, "loss": 1.2779, "step": 622 }, { "epoch": 0.04221153194660885, "grad_norm": 17.325756072998047, "learning_rate": 8.293333333333333e-05, "loss": 1.3934, "step": 623 }, { "epoch": 0.042279287214580935, "grad_norm": 13.898777961730957, "learning_rate": 8.306666666666668e-05, "loss": 1.3735, "step": 624 }, { "epoch": 0.04234704248255302, "grad_norm": 16.787601470947266, "learning_rate": 8.32e-05, "loss": 1.5228, "step": 625 }, { "epoch": 0.042414797750525104, "grad_norm": 13.54299259185791, "learning_rate": 8.333333333333334e-05, "loss": 1.0538, "step": 626 }, { "epoch": 0.04248255301849719, "grad_norm": 14.418194770812988, "learning_rate": 8.346666666666667e-05, "loss": 1.2932, "step": 627 }, { "epoch": 0.042550308286469274, "grad_norm": 13.39255142211914, "learning_rate": 8.36e-05, "loss": 1.1989, "step": 628 }, { "epoch": 0.04261806355444136, "grad_norm": 15.445747375488281, "learning_rate": 8.373333333333334e-05, "loss": 1.3557, "step": 629 }, { "epoch": 0.04268581882241344, "grad_norm": 13.414338111877441, "learning_rate": 8.386666666666667e-05, "loss": 1.1729, "step": 630 }, { "epoch": 0.04275357409038553, "grad_norm": 16.649137496948242, "learning_rate": 8.4e-05, "loss": 1.4212, "step": 631 }, { "epoch": 0.04282132935835761, "grad_norm": 17.583528518676758, "learning_rate": 8.413333333333334e-05, "loss": 1.4838, "step": 632 }, { "epoch": 0.0428890846263297, "grad_norm": 19.32307243347168, "learning_rate": 8.426666666666668e-05, "loss": 1.5958, "step": 633 }, { "epoch": 0.04295683989430178, "grad_norm": 12.703327178955078, "learning_rate": 8.44e-05, "loss": 1.2752, "step": 634 }, { "epoch": 0.04302459516227387, "grad_norm": 15.72768497467041, "learning_rate": 8.453333333333335e-05, "loss": 1.4059, "step": 635 }, { "epoch": 0.04309235043024595, "grad_norm": 13.532344818115234, "learning_rate": 8.466666666666667e-05, "loss": 1.2252, "step": 636 }, { "epoch": 0.043160105698218036, "grad_norm": 13.305481910705566, "learning_rate": 8.48e-05, "loss": 1.5084, "step": 637 }, { "epoch": 0.04322786096619012, "grad_norm": 11.986043930053711, "learning_rate": 8.493333333333334e-05, "loss": 1.2006, "step": 638 }, { "epoch": 0.043295616234162206, "grad_norm": 13.28003978729248, "learning_rate": 8.506666666666667e-05, "loss": 1.2442, "step": 639 }, { "epoch": 0.04336337150213429, "grad_norm": 15.835545539855957, "learning_rate": 8.52e-05, "loss": 1.2466, "step": 640 }, { "epoch": 0.043431126770106375, "grad_norm": 14.887584686279297, "learning_rate": 8.533333333333334e-05, "loss": 1.3828, "step": 641 }, { "epoch": 0.04349888203807846, "grad_norm": 14.740251541137695, "learning_rate": 8.546666666666667e-05, "loss": 1.4847, "step": 642 }, { "epoch": 0.043566637306050544, "grad_norm": 14.109213829040527, "learning_rate": 8.560000000000001e-05, "loss": 1.2251, "step": 643 }, { "epoch": 0.04363439257402263, "grad_norm": 17.33670425415039, "learning_rate": 8.573333333333333e-05, "loss": 1.3817, "step": 644 }, { "epoch": 0.043702147841994714, "grad_norm": 16.92241096496582, "learning_rate": 8.586666666666668e-05, "loss": 1.458, "step": 645 }, { "epoch": 0.0437699031099668, "grad_norm": 15.369187355041504, "learning_rate": 8.6e-05, "loss": 1.28, "step": 646 }, { "epoch": 0.04383765837793888, "grad_norm": 18.748065948486328, "learning_rate": 8.613333333333333e-05, "loss": 1.6275, "step": 647 }, { "epoch": 0.04390541364591097, "grad_norm": 15.650605201721191, "learning_rate": 8.626666666666667e-05, "loss": 1.3742, "step": 648 }, { "epoch": 0.04397316891388305, "grad_norm": 16.170730590820312, "learning_rate": 8.64e-05, "loss": 1.174, "step": 649 }, { "epoch": 0.04404092418185514, "grad_norm": 12.61952018737793, "learning_rate": 8.653333333333333e-05, "loss": 1.2178, "step": 650 }, { "epoch": 0.04410867944982722, "grad_norm": 18.86512565612793, "learning_rate": 8.666666666666667e-05, "loss": 1.4993, "step": 651 }, { "epoch": 0.04417643471779931, "grad_norm": 16.412899017333984, "learning_rate": 8.680000000000001e-05, "loss": 1.2663, "step": 652 }, { "epoch": 0.04424418998577139, "grad_norm": 14.11953353881836, "learning_rate": 8.693333333333334e-05, "loss": 1.3772, "step": 653 }, { "epoch": 0.044311945253743476, "grad_norm": 17.232633590698242, "learning_rate": 8.706666666666668e-05, "loss": 1.3252, "step": 654 }, { "epoch": 0.04437970052171556, "grad_norm": 13.833864212036133, "learning_rate": 8.72e-05, "loss": 1.279, "step": 655 }, { "epoch": 0.044447455789687645, "grad_norm": 11.546829223632812, "learning_rate": 8.733333333333333e-05, "loss": 1.382, "step": 656 }, { "epoch": 0.04451521105765973, "grad_norm": 13.163644790649414, "learning_rate": 8.746666666666667e-05, "loss": 1.2253, "step": 657 }, { "epoch": 0.044582966325631815, "grad_norm": 17.037311553955078, "learning_rate": 8.76e-05, "loss": 1.2727, "step": 658 }, { "epoch": 0.0446507215936039, "grad_norm": 14.610177040100098, "learning_rate": 8.773333333333333e-05, "loss": 1.3082, "step": 659 }, { "epoch": 0.044718476861575984, "grad_norm": 16.305557250976562, "learning_rate": 8.786666666666667e-05, "loss": 1.3948, "step": 660 }, { "epoch": 0.04478623212954807, "grad_norm": 13.207799911499023, "learning_rate": 8.800000000000001e-05, "loss": 1.2764, "step": 661 }, { "epoch": 0.04485398739752016, "grad_norm": 11.451075553894043, "learning_rate": 8.813333333333334e-05, "loss": 1.0831, "step": 662 }, { "epoch": 0.044921742665492245, "grad_norm": 13.555370330810547, "learning_rate": 8.826666666666668e-05, "loss": 1.4996, "step": 663 }, { "epoch": 0.04498949793346433, "grad_norm": 13.544769287109375, "learning_rate": 8.840000000000001e-05, "loss": 1.2277, "step": 664 }, { "epoch": 0.045057253201436415, "grad_norm": 18.05879783630371, "learning_rate": 8.853333333333333e-05, "loss": 1.3964, "step": 665 }, { "epoch": 0.0451250084694085, "grad_norm": 17.309839248657227, "learning_rate": 8.866666666666668e-05, "loss": 1.6233, "step": 666 }, { "epoch": 0.045192763737380584, "grad_norm": 12.732510566711426, "learning_rate": 8.88e-05, "loss": 1.3237, "step": 667 }, { "epoch": 0.04526051900535267, "grad_norm": 13.541101455688477, "learning_rate": 8.893333333333333e-05, "loss": 1.2523, "step": 668 }, { "epoch": 0.04532827427332475, "grad_norm": 17.54905891418457, "learning_rate": 8.906666666666667e-05, "loss": 1.5824, "step": 669 }, { "epoch": 0.04539602954129684, "grad_norm": 12.52578353881836, "learning_rate": 8.92e-05, "loss": 1.2966, "step": 670 }, { "epoch": 0.04546378480926892, "grad_norm": 13.279097557067871, "learning_rate": 8.933333333333334e-05, "loss": 1.4898, "step": 671 }, { "epoch": 0.04553154007724101, "grad_norm": 15.892850875854492, "learning_rate": 8.946666666666668e-05, "loss": 1.3757, "step": 672 }, { "epoch": 0.04559929534521309, "grad_norm": 14.108098983764648, "learning_rate": 8.960000000000001e-05, "loss": 1.1758, "step": 673 }, { "epoch": 0.04566705061318518, "grad_norm": 17.15204429626465, "learning_rate": 8.973333333333334e-05, "loss": 1.3876, "step": 674 }, { "epoch": 0.04573480588115726, "grad_norm": 14.453113555908203, "learning_rate": 8.986666666666666e-05, "loss": 1.2152, "step": 675 }, { "epoch": 0.045802561149129346, "grad_norm": 17.9672794342041, "learning_rate": 9e-05, "loss": 1.2962, "step": 676 }, { "epoch": 0.04587031641710143, "grad_norm": 19.810890197753906, "learning_rate": 9.013333333333333e-05, "loss": 1.3374, "step": 677 }, { "epoch": 0.045938071685073516, "grad_norm": 16.13353729248047, "learning_rate": 9.026666666666666e-05, "loss": 1.4125, "step": 678 }, { "epoch": 0.0460058269530456, "grad_norm": 15.257608413696289, "learning_rate": 9.04e-05, "loss": 1.1931, "step": 679 }, { "epoch": 0.046073582221017685, "grad_norm": 16.88699722290039, "learning_rate": 9.053333333333334e-05, "loss": 1.362, "step": 680 }, { "epoch": 0.04614133748898977, "grad_norm": 15.46777057647705, "learning_rate": 9.066666666666667e-05, "loss": 1.3522, "step": 681 }, { "epoch": 0.046209092756961855, "grad_norm": 13.584056854248047, "learning_rate": 9.080000000000001e-05, "loss": 1.1998, "step": 682 }, { "epoch": 0.04627684802493394, "grad_norm": 14.226449966430664, "learning_rate": 9.093333333333334e-05, "loss": 1.3312, "step": 683 }, { "epoch": 0.046344603292906024, "grad_norm": 15.157097816467285, "learning_rate": 9.106666666666667e-05, "loss": 1.3422, "step": 684 }, { "epoch": 0.04641235856087811, "grad_norm": 14.748275756835938, "learning_rate": 9.120000000000001e-05, "loss": 1.2902, "step": 685 }, { "epoch": 0.04648011382885019, "grad_norm": 13.730619430541992, "learning_rate": 9.133333333333334e-05, "loss": 1.3034, "step": 686 }, { "epoch": 0.04654786909682228, "grad_norm": 12.804062843322754, "learning_rate": 9.146666666666666e-05, "loss": 1.1484, "step": 687 }, { "epoch": 0.04661562436479436, "grad_norm": 16.587923049926758, "learning_rate": 9.16e-05, "loss": 1.4875, "step": 688 }, { "epoch": 0.04668337963276645, "grad_norm": 12.228131294250488, "learning_rate": 9.173333333333333e-05, "loss": 0.917, "step": 689 }, { "epoch": 0.04675113490073853, "grad_norm": 18.680187225341797, "learning_rate": 9.186666666666667e-05, "loss": 1.497, "step": 690 }, { "epoch": 0.04681889016871062, "grad_norm": 14.80630111694336, "learning_rate": 9.200000000000001e-05, "loss": 1.5623, "step": 691 }, { "epoch": 0.0468866454366827, "grad_norm": 13.754642486572266, "learning_rate": 9.213333333333334e-05, "loss": 1.3921, "step": 692 }, { "epoch": 0.046954400704654786, "grad_norm": 14.264236450195312, "learning_rate": 9.226666666666667e-05, "loss": 1.3615, "step": 693 }, { "epoch": 0.04702215597262687, "grad_norm": 15.883113861083984, "learning_rate": 9.240000000000001e-05, "loss": 1.4528, "step": 694 }, { "epoch": 0.047089911240598956, "grad_norm": 13.528610229492188, "learning_rate": 9.253333333333334e-05, "loss": 1.0995, "step": 695 }, { "epoch": 0.04715766650857104, "grad_norm": 15.945343971252441, "learning_rate": 9.266666666666666e-05, "loss": 1.272, "step": 696 }, { "epoch": 0.047225421776543125, "grad_norm": 14.378050804138184, "learning_rate": 9.28e-05, "loss": 1.3545, "step": 697 }, { "epoch": 0.04729317704451521, "grad_norm": 13.457077980041504, "learning_rate": 9.293333333333333e-05, "loss": 1.1751, "step": 698 }, { "epoch": 0.047360932312487294, "grad_norm": 19.285078048706055, "learning_rate": 9.306666666666667e-05, "loss": 1.4383, "step": 699 }, { "epoch": 0.04742868758045938, "grad_norm": 16.683856964111328, "learning_rate": 9.320000000000002e-05, "loss": 1.2499, "step": 700 }, { "epoch": 0.047496442848431464, "grad_norm": 13.818337440490723, "learning_rate": 9.333333333333334e-05, "loss": 1.2325, "step": 701 }, { "epoch": 0.04756419811640355, "grad_norm": 12.51142406463623, "learning_rate": 9.346666666666667e-05, "loss": 1.3317, "step": 702 }, { "epoch": 0.04763195338437563, "grad_norm": 14.697171211242676, "learning_rate": 9.360000000000001e-05, "loss": 1.6055, "step": 703 }, { "epoch": 0.04769970865234772, "grad_norm": 16.942562103271484, "learning_rate": 9.373333333333334e-05, "loss": 1.4279, "step": 704 }, { "epoch": 0.0477674639203198, "grad_norm": 16.739248275756836, "learning_rate": 9.386666666666667e-05, "loss": 1.2285, "step": 705 }, { "epoch": 0.04783521918829189, "grad_norm": 14.479548454284668, "learning_rate": 9.4e-05, "loss": 1.1984, "step": 706 }, { "epoch": 0.04790297445626397, "grad_norm": 14.384824752807617, "learning_rate": 9.413333333333334e-05, "loss": 1.2128, "step": 707 }, { "epoch": 0.04797072972423606, "grad_norm": 15.41884994506836, "learning_rate": 9.426666666666666e-05, "loss": 1.1627, "step": 708 }, { "epoch": 0.04803848499220814, "grad_norm": 13.267902374267578, "learning_rate": 9.44e-05, "loss": 1.2916, "step": 709 }, { "epoch": 0.048106240260180226, "grad_norm": 14.909900665283203, "learning_rate": 9.453333333333335e-05, "loss": 1.1579, "step": 710 }, { "epoch": 0.04817399552815231, "grad_norm": 12.639839172363281, "learning_rate": 9.466666666666667e-05, "loss": 1.2053, "step": 711 }, { "epoch": 0.048241750796124395, "grad_norm": 14.957659721374512, "learning_rate": 9.48e-05, "loss": 1.4556, "step": 712 }, { "epoch": 0.04830950606409648, "grad_norm": 14.008201599121094, "learning_rate": 9.493333333333334e-05, "loss": 1.1575, "step": 713 }, { "epoch": 0.048377261332068565, "grad_norm": 13.857501983642578, "learning_rate": 9.506666666666667e-05, "loss": 1.4121, "step": 714 }, { "epoch": 0.048445016600040657, "grad_norm": 13.867570877075195, "learning_rate": 9.52e-05, "loss": 1.4908, "step": 715 }, { "epoch": 0.04851277186801274, "grad_norm": 14.3545503616333, "learning_rate": 9.533333333333334e-05, "loss": 1.2939, "step": 716 }, { "epoch": 0.048580527135984826, "grad_norm": 15.173371315002441, "learning_rate": 9.546666666666667e-05, "loss": 1.3912, "step": 717 }, { "epoch": 0.04864828240395691, "grad_norm": 13.799453735351562, "learning_rate": 9.56e-05, "loss": 1.4219, "step": 718 }, { "epoch": 0.048716037671928995, "grad_norm": 18.264965057373047, "learning_rate": 9.573333333333335e-05, "loss": 1.4967, "step": 719 }, { "epoch": 0.04878379293990108, "grad_norm": 13.809319496154785, "learning_rate": 9.586666666666667e-05, "loss": 1.3527, "step": 720 }, { "epoch": 0.048851548207873165, "grad_norm": 13.931517601013184, "learning_rate": 9.6e-05, "loss": 1.4018, "step": 721 }, { "epoch": 0.04891930347584525, "grad_norm": 11.74387264251709, "learning_rate": 9.613333333333334e-05, "loss": 1.1981, "step": 722 }, { "epoch": 0.048987058743817334, "grad_norm": 14.960589408874512, "learning_rate": 9.626666666666667e-05, "loss": 1.3373, "step": 723 }, { "epoch": 0.04905481401178942, "grad_norm": 14.170279502868652, "learning_rate": 9.64e-05, "loss": 1.2702, "step": 724 }, { "epoch": 0.049122569279761504, "grad_norm": 15.532767295837402, "learning_rate": 9.653333333333334e-05, "loss": 1.1329, "step": 725 }, { "epoch": 0.04919032454773359, "grad_norm": 12.141284942626953, "learning_rate": 9.666666666666667e-05, "loss": 1.2694, "step": 726 }, { "epoch": 0.04925807981570567, "grad_norm": 12.379233360290527, "learning_rate": 9.680000000000001e-05, "loss": 1.1545, "step": 727 }, { "epoch": 0.04932583508367776, "grad_norm": 14.45605182647705, "learning_rate": 9.693333333333335e-05, "loss": 1.4151, "step": 728 }, { "epoch": 0.04939359035164984, "grad_norm": 15.656912803649902, "learning_rate": 9.706666666666668e-05, "loss": 1.3782, "step": 729 }, { "epoch": 0.04946134561962193, "grad_norm": 13.992748260498047, "learning_rate": 9.72e-05, "loss": 1.2731, "step": 730 }, { "epoch": 0.04952910088759401, "grad_norm": 16.253517150878906, "learning_rate": 9.733333333333335e-05, "loss": 1.4503, "step": 731 }, { "epoch": 0.049596856155566096, "grad_norm": 14.88161563873291, "learning_rate": 9.746666666666667e-05, "loss": 1.3909, "step": 732 }, { "epoch": 0.04966461142353818, "grad_norm": 14.60545539855957, "learning_rate": 9.76e-05, "loss": 1.1803, "step": 733 }, { "epoch": 0.049732366691510266, "grad_norm": 14.589803695678711, "learning_rate": 9.773333333333334e-05, "loss": 1.3844, "step": 734 }, { "epoch": 0.04980012195948235, "grad_norm": 14.781076431274414, "learning_rate": 9.786666666666667e-05, "loss": 1.2706, "step": 735 }, { "epoch": 0.049867877227454435, "grad_norm": 13.717072486877441, "learning_rate": 9.8e-05, "loss": 1.2311, "step": 736 }, { "epoch": 0.04993563249542652, "grad_norm": 17.14280891418457, "learning_rate": 9.813333333333334e-05, "loss": 1.3423, "step": 737 }, { "epoch": 0.050003387763398605, "grad_norm": 14.521373748779297, "learning_rate": 9.826666666666668e-05, "loss": 1.3118, "step": 738 }, { "epoch": 0.05007114303137069, "grad_norm": 14.79515266418457, "learning_rate": 9.84e-05, "loss": 1.3622, "step": 739 }, { "epoch": 0.050138898299342774, "grad_norm": 13.63962459564209, "learning_rate": 9.853333333333333e-05, "loss": 1.3856, "step": 740 }, { "epoch": 0.05020665356731486, "grad_norm": 13.788541793823242, "learning_rate": 9.866666666666668e-05, "loss": 1.3439, "step": 741 }, { "epoch": 0.05027440883528694, "grad_norm": 14.015862464904785, "learning_rate": 9.88e-05, "loss": 1.3263, "step": 742 }, { "epoch": 0.05034216410325903, "grad_norm": 15.407167434692383, "learning_rate": 9.893333333333333e-05, "loss": 1.5115, "step": 743 }, { "epoch": 0.05040991937123111, "grad_norm": 14.542003631591797, "learning_rate": 9.906666666666667e-05, "loss": 1.3934, "step": 744 }, { "epoch": 0.0504776746392032, "grad_norm": 16.733686447143555, "learning_rate": 9.92e-05, "loss": 1.2098, "step": 745 }, { "epoch": 0.05054542990717528, "grad_norm": 14.979568481445312, "learning_rate": 9.933333333333334e-05, "loss": 1.3234, "step": 746 }, { "epoch": 0.05061318517514737, "grad_norm": 13.699801445007324, "learning_rate": 9.946666666666668e-05, "loss": 1.3173, "step": 747 }, { "epoch": 0.05068094044311945, "grad_norm": 13.262088775634766, "learning_rate": 9.960000000000001e-05, "loss": 1.3459, "step": 748 }, { "epoch": 0.050748695711091536, "grad_norm": 14.694448471069336, "learning_rate": 9.973333333333334e-05, "loss": 1.3639, "step": 749 }, { "epoch": 0.05081645097906362, "grad_norm": 13.999008178710938, "learning_rate": 9.986666666666668e-05, "loss": 1.4275, "step": 750 }, { "epoch": 0.050884206247035706, "grad_norm": 11.588037490844727, "learning_rate": 0.0001, "loss": 1.2866, "step": 751 }, { "epoch": 0.05095196151500779, "grad_norm": 16.174110412597656, "learning_rate": 9.999863098090219e-05, "loss": 1.5338, "step": 752 }, { "epoch": 0.051019716782979875, "grad_norm": 17.8669376373291, "learning_rate": 9.999726196180437e-05, "loss": 1.3247, "step": 753 }, { "epoch": 0.05108747205095196, "grad_norm": 13.849075317382812, "learning_rate": 9.999589294270656e-05, "loss": 1.1817, "step": 754 }, { "epoch": 0.051155227318924044, "grad_norm": 14.888331413269043, "learning_rate": 9.999452392360874e-05, "loss": 1.4019, "step": 755 }, { "epoch": 0.05122298258689613, "grad_norm": 18.68206024169922, "learning_rate": 9.999315490451092e-05, "loss": 1.7115, "step": 756 }, { "epoch": 0.051290737854868214, "grad_norm": 14.762079238891602, "learning_rate": 9.99917858854131e-05, "loss": 1.2553, "step": 757 }, { "epoch": 0.0513584931228403, "grad_norm": 14.649972915649414, "learning_rate": 9.99904168663153e-05, "loss": 1.0807, "step": 758 }, { "epoch": 0.05142624839081238, "grad_norm": 13.172977447509766, "learning_rate": 9.998904784721747e-05, "loss": 1.4966, "step": 759 }, { "epoch": 0.05149400365878447, "grad_norm": 14.144796371459961, "learning_rate": 9.998767882811965e-05, "loss": 1.234, "step": 760 }, { "epoch": 0.05156175892675655, "grad_norm": 17.565507888793945, "learning_rate": 9.998630980902184e-05, "loss": 1.4061, "step": 761 }, { "epoch": 0.05162951419472864, "grad_norm": 17.205589294433594, "learning_rate": 9.998494078992402e-05, "loss": 1.6626, "step": 762 }, { "epoch": 0.05169726946270072, "grad_norm": 16.14542007446289, "learning_rate": 9.998357177082621e-05, "loss": 1.3145, "step": 763 }, { "epoch": 0.05176502473067281, "grad_norm": 14.692976951599121, "learning_rate": 9.998220275172839e-05, "loss": 1.2085, "step": 764 }, { "epoch": 0.05183277999864489, "grad_norm": 12.572774887084961, "learning_rate": 9.998083373263057e-05, "loss": 1.3743, "step": 765 }, { "epoch": 0.051900535266616976, "grad_norm": 15.904753684997559, "learning_rate": 9.997946471353275e-05, "loss": 1.3863, "step": 766 }, { "epoch": 0.05196829053458906, "grad_norm": 14.471494674682617, "learning_rate": 9.997809569443493e-05, "loss": 1.3615, "step": 767 }, { "epoch": 0.05203604580256115, "grad_norm": 13.82172966003418, "learning_rate": 9.997672667533712e-05, "loss": 1.2104, "step": 768 }, { "epoch": 0.05210380107053324, "grad_norm": 14.648073196411133, "learning_rate": 9.99753576562393e-05, "loss": 1.1166, "step": 769 }, { "epoch": 0.05217155633850532, "grad_norm": 16.776750564575195, "learning_rate": 9.997398863714149e-05, "loss": 1.4242, "step": 770 }, { "epoch": 0.05223931160647741, "grad_norm": 13.999717712402344, "learning_rate": 9.997261961804367e-05, "loss": 1.2493, "step": 771 }, { "epoch": 0.05230706687444949, "grad_norm": 13.238443374633789, "learning_rate": 9.997125059894586e-05, "loss": 1.0469, "step": 772 }, { "epoch": 0.052374822142421576, "grad_norm": 12.79283332824707, "learning_rate": 9.996988157984804e-05, "loss": 1.1975, "step": 773 }, { "epoch": 0.05244257741039366, "grad_norm": 15.577791213989258, "learning_rate": 9.996851256075022e-05, "loss": 1.4972, "step": 774 }, { "epoch": 0.052510332678365745, "grad_norm": 16.215747833251953, "learning_rate": 9.996714354165241e-05, "loss": 1.4285, "step": 775 }, { "epoch": 0.05257808794633783, "grad_norm": 14.68825626373291, "learning_rate": 9.99657745225546e-05, "loss": 1.2028, "step": 776 }, { "epoch": 0.052645843214309915, "grad_norm": 11.535130500793457, "learning_rate": 9.996440550345677e-05, "loss": 1.2466, "step": 777 }, { "epoch": 0.052713598482282, "grad_norm": 14.865918159484863, "learning_rate": 9.996303648435897e-05, "loss": 1.3903, "step": 778 }, { "epoch": 0.052781353750254084, "grad_norm": 13.626388549804688, "learning_rate": 9.996166746526115e-05, "loss": 1.5446, "step": 779 }, { "epoch": 0.05284910901822617, "grad_norm": 12.957473754882812, "learning_rate": 9.996029844616333e-05, "loss": 1.1641, "step": 780 }, { "epoch": 0.052916864286198254, "grad_norm": 14.928487777709961, "learning_rate": 9.995892942706552e-05, "loss": 1.0929, "step": 781 }, { "epoch": 0.05298461955417034, "grad_norm": 13.592952728271484, "learning_rate": 9.99575604079677e-05, "loss": 1.4684, "step": 782 }, { "epoch": 0.05305237482214242, "grad_norm": 13.836453437805176, "learning_rate": 9.995619138886988e-05, "loss": 1.0128, "step": 783 }, { "epoch": 0.05312013009011451, "grad_norm": 16.654394149780273, "learning_rate": 9.995482236977206e-05, "loss": 1.2464, "step": 784 }, { "epoch": 0.05318788535808659, "grad_norm": 14.4434232711792, "learning_rate": 9.995345335067424e-05, "loss": 1.3649, "step": 785 }, { "epoch": 0.05325564062605868, "grad_norm": 14.15592098236084, "learning_rate": 9.995208433157644e-05, "loss": 1.4426, "step": 786 }, { "epoch": 0.05332339589403076, "grad_norm": 12.97901439666748, "learning_rate": 9.995071531247862e-05, "loss": 1.227, "step": 787 }, { "epoch": 0.053391151162002846, "grad_norm": 13.167902946472168, "learning_rate": 9.99493462933808e-05, "loss": 1.2432, "step": 788 }, { "epoch": 0.05345890642997493, "grad_norm": 15.92297077178955, "learning_rate": 9.994797727428298e-05, "loss": 1.3345, "step": 789 }, { "epoch": 0.053526661697947016, "grad_norm": 13.30277156829834, "learning_rate": 9.994660825518517e-05, "loss": 1.2361, "step": 790 }, { "epoch": 0.0535944169659191, "grad_norm": 15.98779582977295, "learning_rate": 9.994523923608735e-05, "loss": 1.1202, "step": 791 }, { "epoch": 0.053662172233891185, "grad_norm": 16.414382934570312, "learning_rate": 9.994387021698953e-05, "loss": 1.448, "step": 792 }, { "epoch": 0.05372992750186327, "grad_norm": 14.491677284240723, "learning_rate": 9.994250119789171e-05, "loss": 1.0862, "step": 793 }, { "epoch": 0.053797682769835355, "grad_norm": 13.155410766601562, "learning_rate": 9.99411321787939e-05, "loss": 1.312, "step": 794 }, { "epoch": 0.05386543803780744, "grad_norm": 16.11139488220215, "learning_rate": 9.993976315969609e-05, "loss": 1.2043, "step": 795 }, { "epoch": 0.053933193305779524, "grad_norm": 14.259698867797852, "learning_rate": 9.993839414059827e-05, "loss": 1.2256, "step": 796 }, { "epoch": 0.05400094857375161, "grad_norm": 15.751099586486816, "learning_rate": 9.993702512150045e-05, "loss": 1.0732, "step": 797 }, { "epoch": 0.05406870384172369, "grad_norm": 12.766170501708984, "learning_rate": 9.993565610240263e-05, "loss": 1.0362, "step": 798 }, { "epoch": 0.05413645910969578, "grad_norm": 14.61483097076416, "learning_rate": 9.993428708330481e-05, "loss": 1.3107, "step": 799 }, { "epoch": 0.05420421437766786, "grad_norm": 14.571990013122559, "learning_rate": 9.9932918064207e-05, "loss": 1.3616, "step": 800 }, { "epoch": 0.05427196964563995, "grad_norm": 14.69124984741211, "learning_rate": 9.993154904510918e-05, "loss": 1.4273, "step": 801 }, { "epoch": 0.05433972491361203, "grad_norm": 13.834383010864258, "learning_rate": 9.993018002601136e-05, "loss": 1.2886, "step": 802 }, { "epoch": 0.05440748018158412, "grad_norm": 14.178943634033203, "learning_rate": 9.992881100691355e-05, "loss": 1.44, "step": 803 }, { "epoch": 0.0544752354495562, "grad_norm": 13.185267448425293, "learning_rate": 9.992744198781574e-05, "loss": 1.5609, "step": 804 }, { "epoch": 0.054542990717528286, "grad_norm": 15.211031913757324, "learning_rate": 9.992607296871792e-05, "loss": 1.8873, "step": 805 }, { "epoch": 0.05461074598550037, "grad_norm": 11.855447769165039, "learning_rate": 9.99247039496201e-05, "loss": 1.4063, "step": 806 }, { "epoch": 0.054678501253472456, "grad_norm": 14.352787017822266, "learning_rate": 9.992333493052228e-05, "loss": 1.56, "step": 807 }, { "epoch": 0.05474625652144454, "grad_norm": 14.339908599853516, "learning_rate": 9.992196591142446e-05, "loss": 1.1985, "step": 808 }, { "epoch": 0.054814011789416625, "grad_norm": 12.696648597717285, "learning_rate": 9.992059689232665e-05, "loss": 1.1807, "step": 809 }, { "epoch": 0.05488176705738871, "grad_norm": 14.083525657653809, "learning_rate": 9.991922787322883e-05, "loss": 1.4012, "step": 810 }, { "epoch": 0.054949522325360795, "grad_norm": 14.298514366149902, "learning_rate": 9.991785885413101e-05, "loss": 1.4047, "step": 811 }, { "epoch": 0.05501727759333288, "grad_norm": 13.391324996948242, "learning_rate": 9.99164898350332e-05, "loss": 1.2292, "step": 812 }, { "epoch": 0.055085032861304964, "grad_norm": 14.0011625289917, "learning_rate": 9.991512081593539e-05, "loss": 1.3194, "step": 813 }, { "epoch": 0.05515278812927705, "grad_norm": 14.726574897766113, "learning_rate": 9.991375179683757e-05, "loss": 1.3245, "step": 814 }, { "epoch": 0.05522054339724913, "grad_norm": 14.615242004394531, "learning_rate": 9.991238277773975e-05, "loss": 1.3704, "step": 815 }, { "epoch": 0.05528829866522122, "grad_norm": 11.06546688079834, "learning_rate": 9.991101375864193e-05, "loss": 1.0841, "step": 816 }, { "epoch": 0.0553560539331933, "grad_norm": 13.768633842468262, "learning_rate": 9.990964473954411e-05, "loss": 1.2632, "step": 817 }, { "epoch": 0.05542380920116539, "grad_norm": 15.054973602294922, "learning_rate": 9.99082757204463e-05, "loss": 1.2931, "step": 818 }, { "epoch": 0.05549156446913747, "grad_norm": 15.27096176147461, "learning_rate": 9.990690670134848e-05, "loss": 1.1789, "step": 819 }, { "epoch": 0.055559319737109564, "grad_norm": 13.228281021118164, "learning_rate": 9.990553768225067e-05, "loss": 1.4458, "step": 820 }, { "epoch": 0.05562707500508165, "grad_norm": 16.016782760620117, "learning_rate": 9.990416866315286e-05, "loss": 1.5121, "step": 821 }, { "epoch": 0.05569483027305373, "grad_norm": 14.15912914276123, "learning_rate": 9.990279964405504e-05, "loss": 1.3702, "step": 822 }, { "epoch": 0.05576258554102582, "grad_norm": 14.61017894744873, "learning_rate": 9.990143062495722e-05, "loss": 1.2774, "step": 823 }, { "epoch": 0.0558303408089979, "grad_norm": 13.241140365600586, "learning_rate": 9.990006160585941e-05, "loss": 1.3857, "step": 824 }, { "epoch": 0.05589809607696999, "grad_norm": 11.815064430236816, "learning_rate": 9.98986925867616e-05, "loss": 1.1053, "step": 825 }, { "epoch": 0.05596585134494207, "grad_norm": 13.179222106933594, "learning_rate": 9.989732356766377e-05, "loss": 1.4686, "step": 826 }, { "epoch": 0.05603360661291416, "grad_norm": 11.888179779052734, "learning_rate": 9.989595454856597e-05, "loss": 1.3191, "step": 827 }, { "epoch": 0.05610136188088624, "grad_norm": 15.43813419342041, "learning_rate": 9.989458552946815e-05, "loss": 0.9732, "step": 828 }, { "epoch": 0.056169117148858326, "grad_norm": 20.0246524810791, "learning_rate": 9.989321651037033e-05, "loss": 1.5398, "step": 829 }, { "epoch": 0.05623687241683041, "grad_norm": 14.247052192687988, "learning_rate": 9.989184749127251e-05, "loss": 1.5093, "step": 830 }, { "epoch": 0.056304627684802495, "grad_norm": 15.63775634765625, "learning_rate": 9.989047847217469e-05, "loss": 1.3488, "step": 831 }, { "epoch": 0.05637238295277458, "grad_norm": 10.582650184631348, "learning_rate": 9.988910945307688e-05, "loss": 1.3888, "step": 832 }, { "epoch": 0.056440138220746665, "grad_norm": 12.344864845275879, "learning_rate": 9.988774043397906e-05, "loss": 1.2876, "step": 833 }, { "epoch": 0.05650789348871875, "grad_norm": 13.95814323425293, "learning_rate": 9.988637141488124e-05, "loss": 1.2648, "step": 834 }, { "epoch": 0.056575648756690834, "grad_norm": 9.740105628967285, "learning_rate": 9.988500239578342e-05, "loss": 0.9777, "step": 835 }, { "epoch": 0.05664340402466292, "grad_norm": 16.16588020324707, "learning_rate": 9.988363337668562e-05, "loss": 1.5859, "step": 836 }, { "epoch": 0.056711159292635004, "grad_norm": 14.394731521606445, "learning_rate": 9.98822643575878e-05, "loss": 1.3395, "step": 837 }, { "epoch": 0.05677891456060709, "grad_norm": 11.256571769714355, "learning_rate": 9.988089533848998e-05, "loss": 1.0528, "step": 838 }, { "epoch": 0.05684666982857917, "grad_norm": 17.595510482788086, "learning_rate": 9.987952631939216e-05, "loss": 1.2678, "step": 839 }, { "epoch": 0.05691442509655126, "grad_norm": 14.132645606994629, "learning_rate": 9.987815730029434e-05, "loss": 1.4129, "step": 840 }, { "epoch": 0.05698218036452334, "grad_norm": 14.438119888305664, "learning_rate": 9.987678828119653e-05, "loss": 1.4141, "step": 841 }, { "epoch": 0.05704993563249543, "grad_norm": 15.983094215393066, "learning_rate": 9.987541926209871e-05, "loss": 1.1959, "step": 842 }, { "epoch": 0.05711769090046751, "grad_norm": 17.300403594970703, "learning_rate": 9.98740502430009e-05, "loss": 1.1613, "step": 843 }, { "epoch": 0.057185446168439596, "grad_norm": 15.150660514831543, "learning_rate": 9.987268122390307e-05, "loss": 1.2253, "step": 844 }, { "epoch": 0.05725320143641168, "grad_norm": 14.234586715698242, "learning_rate": 9.987131220480525e-05, "loss": 1.2517, "step": 845 }, { "epoch": 0.057320956704383766, "grad_norm": 18.31337547302246, "learning_rate": 9.986994318570745e-05, "loss": 1.7506, "step": 846 }, { "epoch": 0.05738871197235585, "grad_norm": 14.818669319152832, "learning_rate": 9.986857416660963e-05, "loss": 1.358, "step": 847 }, { "epoch": 0.057456467240327935, "grad_norm": 14.640913009643555, "learning_rate": 9.986720514751181e-05, "loss": 1.4481, "step": 848 }, { "epoch": 0.05752422250830002, "grad_norm": 12.277986526489258, "learning_rate": 9.986583612841399e-05, "loss": 1.3428, "step": 849 }, { "epoch": 0.057591977776272105, "grad_norm": 13.817851066589355, "learning_rate": 9.986446710931618e-05, "loss": 1.3514, "step": 850 }, { "epoch": 0.05765973304424419, "grad_norm": 13.706515312194824, "learning_rate": 9.986309809021836e-05, "loss": 1.4295, "step": 851 }, { "epoch": 0.057727488312216274, "grad_norm": 16.649917602539062, "learning_rate": 9.986172907112054e-05, "loss": 1.3895, "step": 852 }, { "epoch": 0.05779524358018836, "grad_norm": 13.659167289733887, "learning_rate": 9.986036005202272e-05, "loss": 1.1545, "step": 853 }, { "epoch": 0.057862998848160443, "grad_norm": 11.264912605285645, "learning_rate": 9.98589910329249e-05, "loss": 1.0766, "step": 854 }, { "epoch": 0.05793075411613253, "grad_norm": 12.930856704711914, "learning_rate": 9.98576220138271e-05, "loss": 1.1665, "step": 855 }, { "epoch": 0.05799850938410461, "grad_norm": 15.999971389770508, "learning_rate": 9.985625299472928e-05, "loss": 1.4595, "step": 856 }, { "epoch": 0.0580662646520767, "grad_norm": 14.566671371459961, "learning_rate": 9.985488397563146e-05, "loss": 1.283, "step": 857 }, { "epoch": 0.05813401992004878, "grad_norm": 16.106964111328125, "learning_rate": 9.985351495653364e-05, "loss": 1.2842, "step": 858 }, { "epoch": 0.05820177518802087, "grad_norm": 15.47492790222168, "learning_rate": 9.985214593743583e-05, "loss": 1.3949, "step": 859 }, { "epoch": 0.05826953045599295, "grad_norm": 10.692886352539062, "learning_rate": 9.985077691833801e-05, "loss": 1.2896, "step": 860 }, { "epoch": 0.058337285723965036, "grad_norm": 14.13198184967041, "learning_rate": 9.98494078992402e-05, "loss": 1.1566, "step": 861 }, { "epoch": 0.05840504099193712, "grad_norm": 14.455452919006348, "learning_rate": 9.984803888014237e-05, "loss": 1.1949, "step": 862 }, { "epoch": 0.058472796259909206, "grad_norm": 15.020733833312988, "learning_rate": 9.984666986104456e-05, "loss": 1.5202, "step": 863 }, { "epoch": 0.05854055152788129, "grad_norm": 21.089344024658203, "learning_rate": 9.984530084194675e-05, "loss": 1.5083, "step": 864 }, { "epoch": 0.058608306795853375, "grad_norm": 14.414257049560547, "learning_rate": 9.984393182284893e-05, "loss": 1.3977, "step": 865 }, { "epoch": 0.05867606206382546, "grad_norm": 15.667798042297363, "learning_rate": 9.984256280375111e-05, "loss": 1.4044, "step": 866 }, { "epoch": 0.058743817331797545, "grad_norm": 12.503005981445312, "learning_rate": 9.98411937846533e-05, "loss": 1.3369, "step": 867 }, { "epoch": 0.05881157259976963, "grad_norm": 13.27022933959961, "learning_rate": 9.983982476555548e-05, "loss": 1.4134, "step": 868 }, { "epoch": 0.058879327867741714, "grad_norm": 16.2034969329834, "learning_rate": 9.983845574645766e-05, "loss": 1.1196, "step": 869 }, { "epoch": 0.0589470831357138, "grad_norm": 9.868896484375, "learning_rate": 9.983708672735986e-05, "loss": 1.0248, "step": 870 }, { "epoch": 0.05901483840368588, "grad_norm": 15.588685989379883, "learning_rate": 9.983571770826204e-05, "loss": 1.5865, "step": 871 }, { "epoch": 0.05908259367165797, "grad_norm": 14.688246726989746, "learning_rate": 9.983434868916422e-05, "loss": 1.2959, "step": 872 }, { "epoch": 0.05915034893963006, "grad_norm": 14.252961158752441, "learning_rate": 9.983297967006641e-05, "loss": 1.3536, "step": 873 }, { "epoch": 0.059218104207602144, "grad_norm": 11.79800033569336, "learning_rate": 9.98316106509686e-05, "loss": 1.331, "step": 874 }, { "epoch": 0.05928585947557423, "grad_norm": 11.900074005126953, "learning_rate": 9.983024163187077e-05, "loss": 1.2241, "step": 875 }, { "epoch": 0.059353614743546314, "grad_norm": 10.696773529052734, "learning_rate": 9.982887261277295e-05, "loss": 1.4041, "step": 876 }, { "epoch": 0.0594213700115184, "grad_norm": 13.532305717468262, "learning_rate": 9.982750359367513e-05, "loss": 1.3621, "step": 877 }, { "epoch": 0.05948912527949048, "grad_norm": 14.107857704162598, "learning_rate": 9.982613457457733e-05, "loss": 1.6327, "step": 878 }, { "epoch": 0.05955688054746257, "grad_norm": 11.584097862243652, "learning_rate": 9.982476555547951e-05, "loss": 0.9606, "step": 879 }, { "epoch": 0.05962463581543465, "grad_norm": 14.240161895751953, "learning_rate": 9.982339653638169e-05, "loss": 1.263, "step": 880 }, { "epoch": 0.05969239108340674, "grad_norm": 14.461871147155762, "learning_rate": 9.982202751728387e-05, "loss": 1.4201, "step": 881 }, { "epoch": 0.05976014635137882, "grad_norm": 14.072705268859863, "learning_rate": 9.982065849818606e-05, "loss": 1.5977, "step": 882 }, { "epoch": 0.05982790161935091, "grad_norm": 14.928994178771973, "learning_rate": 9.981928947908824e-05, "loss": 1.3346, "step": 883 }, { "epoch": 0.05989565688732299, "grad_norm": 14.898951530456543, "learning_rate": 9.981792045999042e-05, "loss": 1.312, "step": 884 }, { "epoch": 0.059963412155295076, "grad_norm": 13.089646339416504, "learning_rate": 9.98165514408926e-05, "loss": 1.0833, "step": 885 }, { "epoch": 0.06003116742326716, "grad_norm": 15.768043518066406, "learning_rate": 9.981518242179478e-05, "loss": 1.1246, "step": 886 }, { "epoch": 0.060098922691239245, "grad_norm": 11.8709135055542, "learning_rate": 9.981381340269698e-05, "loss": 1.1822, "step": 887 }, { "epoch": 0.06016667795921133, "grad_norm": 15.698454856872559, "learning_rate": 9.981244438359916e-05, "loss": 1.3733, "step": 888 }, { "epoch": 0.060234433227183415, "grad_norm": 14.827208518981934, "learning_rate": 9.981107536450134e-05, "loss": 1.3121, "step": 889 }, { "epoch": 0.0603021884951555, "grad_norm": 12.522045135498047, "learning_rate": 9.980970634540352e-05, "loss": 1.5584, "step": 890 }, { "epoch": 0.060369943763127584, "grad_norm": 14.417738914489746, "learning_rate": 9.980833732630571e-05, "loss": 1.2389, "step": 891 }, { "epoch": 0.06043769903109967, "grad_norm": 14.761930465698242, "learning_rate": 9.98069683072079e-05, "loss": 1.5007, "step": 892 }, { "epoch": 0.060505454299071754, "grad_norm": 15.882668495178223, "learning_rate": 9.980559928811007e-05, "loss": 1.2651, "step": 893 }, { "epoch": 0.06057320956704384, "grad_norm": 13.605412483215332, "learning_rate": 9.980423026901225e-05, "loss": 1.5186, "step": 894 }, { "epoch": 0.06064096483501592, "grad_norm": 10.654335021972656, "learning_rate": 9.980286124991443e-05, "loss": 1.1177, "step": 895 }, { "epoch": 0.06070872010298801, "grad_norm": 12.37457275390625, "learning_rate": 9.980149223081663e-05, "loss": 1.2362, "step": 896 }, { "epoch": 0.06077647537096009, "grad_norm": 12.591222763061523, "learning_rate": 9.980012321171881e-05, "loss": 1.2509, "step": 897 }, { "epoch": 0.06084423063893218, "grad_norm": 14.337310791015625, "learning_rate": 9.979875419262099e-05, "loss": 1.2987, "step": 898 }, { "epoch": 0.06091198590690426, "grad_norm": 15.496018409729004, "learning_rate": 9.979738517352317e-05, "loss": 1.5307, "step": 899 }, { "epoch": 0.06097974117487635, "grad_norm": 13.730890274047852, "learning_rate": 9.979601615442535e-05, "loss": 1.2975, "step": 900 }, { "epoch": 0.06104749644284843, "grad_norm": 12.314823150634766, "learning_rate": 9.979464713532754e-05, "loss": 1.1916, "step": 901 }, { "epoch": 0.061115251710820516, "grad_norm": 13.761808395385742, "learning_rate": 9.979327811622972e-05, "loss": 1.3037, "step": 902 }, { "epoch": 0.0611830069787926, "grad_norm": 13.308722496032715, "learning_rate": 9.97919090971319e-05, "loss": 1.0598, "step": 903 }, { "epoch": 0.061250762246764685, "grad_norm": 13.121098518371582, "learning_rate": 9.979054007803408e-05, "loss": 1.3378, "step": 904 }, { "epoch": 0.06131851751473677, "grad_norm": 16.975666046142578, "learning_rate": 9.978917105893628e-05, "loss": 1.2826, "step": 905 }, { "epoch": 0.061386272782708855, "grad_norm": 14.529984474182129, "learning_rate": 9.978780203983846e-05, "loss": 1.3467, "step": 906 }, { "epoch": 0.06145402805068094, "grad_norm": 11.081110000610352, "learning_rate": 9.978643302074064e-05, "loss": 1.1906, "step": 907 }, { "epoch": 0.061521783318653024, "grad_norm": 12.871200561523438, "learning_rate": 9.978506400164282e-05, "loss": 1.1057, "step": 908 }, { "epoch": 0.06158953858662511, "grad_norm": 13.982168197631836, "learning_rate": 9.9783694982545e-05, "loss": 1.3824, "step": 909 }, { "epoch": 0.061657293854597194, "grad_norm": 13.076074600219727, "learning_rate": 9.97823259634472e-05, "loss": 1.2985, "step": 910 }, { "epoch": 0.06172504912256928, "grad_norm": 11.015650749206543, "learning_rate": 9.978095694434937e-05, "loss": 1.2406, "step": 911 }, { "epoch": 0.06179280439054136, "grad_norm": 13.6082763671875, "learning_rate": 9.977958792525155e-05, "loss": 1.3873, "step": 912 }, { "epoch": 0.06186055965851345, "grad_norm": 15.930809020996094, "learning_rate": 9.977821890615375e-05, "loss": 1.3542, "step": 913 }, { "epoch": 0.06192831492648553, "grad_norm": 10.710271835327148, "learning_rate": 9.977684988705593e-05, "loss": 1.1571, "step": 914 }, { "epoch": 0.06199607019445762, "grad_norm": 11.110217094421387, "learning_rate": 9.977548086795811e-05, "loss": 1.3255, "step": 915 }, { "epoch": 0.0620638254624297, "grad_norm": 11.451903343200684, "learning_rate": 9.97741118488603e-05, "loss": 1.3681, "step": 916 }, { "epoch": 0.062131580730401786, "grad_norm": 10.884252548217773, "learning_rate": 9.977274282976248e-05, "loss": 1.1988, "step": 917 }, { "epoch": 0.06219933599837387, "grad_norm": 11.031237602233887, "learning_rate": 9.977137381066466e-05, "loss": 1.26, "step": 918 }, { "epoch": 0.062267091266345956, "grad_norm": 11.585648536682129, "learning_rate": 9.977000479156686e-05, "loss": 1.3099, "step": 919 }, { "epoch": 0.06233484653431804, "grad_norm": 10.867992401123047, "learning_rate": 9.976863577246904e-05, "loss": 1.2624, "step": 920 }, { "epoch": 0.062402601802290125, "grad_norm": 14.552916526794434, "learning_rate": 9.976726675337122e-05, "loss": 1.2244, "step": 921 }, { "epoch": 0.06247035707026221, "grad_norm": 12.101760864257812, "learning_rate": 9.97658977342734e-05, "loss": 1.1925, "step": 922 }, { "epoch": 0.0625381123382343, "grad_norm": 14.113842010498047, "learning_rate": 9.976452871517559e-05, "loss": 1.3395, "step": 923 }, { "epoch": 0.06260586760620639, "grad_norm": 17.214614868164062, "learning_rate": 9.976315969607777e-05, "loss": 1.3943, "step": 924 }, { "epoch": 0.06267362287417846, "grad_norm": 13.43308162689209, "learning_rate": 9.976179067697995e-05, "loss": 1.1596, "step": 925 }, { "epoch": 0.06274137814215056, "grad_norm": 13.806952476501465, "learning_rate": 9.976042165788213e-05, "loss": 1.0354, "step": 926 }, { "epoch": 0.06280913341012263, "grad_norm": 15.638693809509277, "learning_rate": 9.975905263878431e-05, "loss": 1.3807, "step": 927 }, { "epoch": 0.06287688867809473, "grad_norm": 14.337742805480957, "learning_rate": 9.975768361968651e-05, "loss": 1.537, "step": 928 }, { "epoch": 0.0629446439460668, "grad_norm": 14.540297508239746, "learning_rate": 9.975631460058869e-05, "loss": 1.2522, "step": 929 }, { "epoch": 0.0630123992140389, "grad_norm": 15.991955757141113, "learning_rate": 9.975494558149087e-05, "loss": 1.3855, "step": 930 }, { "epoch": 0.06308015448201097, "grad_norm": 13.957479476928711, "learning_rate": 9.975357656239305e-05, "loss": 1.43, "step": 931 }, { "epoch": 0.06314790974998306, "grad_norm": 16.805377960205078, "learning_rate": 9.975220754329523e-05, "loss": 1.383, "step": 932 }, { "epoch": 0.06321566501795514, "grad_norm": 12.41854476928711, "learning_rate": 9.975083852419742e-05, "loss": 1.068, "step": 933 }, { "epoch": 0.06328342028592723, "grad_norm": 15.929006576538086, "learning_rate": 9.97494695050996e-05, "loss": 1.212, "step": 934 }, { "epoch": 0.06335117555389931, "grad_norm": 13.205544471740723, "learning_rate": 9.974810048600178e-05, "loss": 1.3682, "step": 935 }, { "epoch": 0.0634189308218714, "grad_norm": 12.105626106262207, "learning_rate": 9.974673146690396e-05, "loss": 1.3821, "step": 936 }, { "epoch": 0.06348668608984348, "grad_norm": 13.776711463928223, "learning_rate": 9.974536244780616e-05, "loss": 1.1073, "step": 937 }, { "epoch": 0.06355444135781557, "grad_norm": 12.227380752563477, "learning_rate": 9.974399342870834e-05, "loss": 1.2026, "step": 938 }, { "epoch": 0.06362219662578765, "grad_norm": 12.723440170288086, "learning_rate": 9.974262440961052e-05, "loss": 1.1325, "step": 939 }, { "epoch": 0.06368995189375974, "grad_norm": 13.943262100219727, "learning_rate": 9.97412553905127e-05, "loss": 1.1878, "step": 940 }, { "epoch": 0.06375770716173182, "grad_norm": 12.644627571105957, "learning_rate": 9.973988637141488e-05, "loss": 1.3097, "step": 941 }, { "epoch": 0.06382546242970391, "grad_norm": 12.108241081237793, "learning_rate": 9.973851735231707e-05, "loss": 1.1686, "step": 942 }, { "epoch": 0.06389321769767599, "grad_norm": 14.375092506408691, "learning_rate": 9.973714833321925e-05, "loss": 1.2721, "step": 943 }, { "epoch": 0.06396097296564808, "grad_norm": 13.439800262451172, "learning_rate": 9.973577931412143e-05, "loss": 1.3898, "step": 944 }, { "epoch": 0.06402872823362016, "grad_norm": 13.717879295349121, "learning_rate": 9.973441029502361e-05, "loss": 1.3823, "step": 945 }, { "epoch": 0.06409648350159225, "grad_norm": 12.745361328125, "learning_rate": 9.973304127592581e-05, "loss": 1.2396, "step": 946 }, { "epoch": 0.06416423876956433, "grad_norm": 11.784343719482422, "learning_rate": 9.973167225682799e-05, "loss": 1.214, "step": 947 }, { "epoch": 0.06423199403753642, "grad_norm": 14.205467224121094, "learning_rate": 9.973030323773017e-05, "loss": 1.2803, "step": 948 }, { "epoch": 0.0642997493055085, "grad_norm": 13.257532119750977, "learning_rate": 9.972893421863235e-05, "loss": 1.378, "step": 949 }, { "epoch": 0.06436750457348059, "grad_norm": 15.153338432312012, "learning_rate": 9.972756519953453e-05, "loss": 1.2854, "step": 950 }, { "epoch": 0.06443525984145267, "grad_norm": 16.765771865844727, "learning_rate": 9.972619618043672e-05, "loss": 1.3016, "step": 951 }, { "epoch": 0.06450301510942476, "grad_norm": 14.636106491088867, "learning_rate": 9.97248271613389e-05, "loss": 1.3803, "step": 952 }, { "epoch": 0.06457077037739685, "grad_norm": 13.87410831451416, "learning_rate": 9.972345814224108e-05, "loss": 1.4126, "step": 953 }, { "epoch": 0.06463852564536893, "grad_norm": 14.328899383544922, "learning_rate": 9.972208912314326e-05, "loss": 1.061, "step": 954 }, { "epoch": 0.06470628091334102, "grad_norm": 12.485203742980957, "learning_rate": 9.972072010404544e-05, "loss": 1.0985, "step": 955 }, { "epoch": 0.0647740361813131, "grad_norm": 13.77907943725586, "learning_rate": 9.971935108494764e-05, "loss": 1.3534, "step": 956 }, { "epoch": 0.06484179144928519, "grad_norm": 10.579590797424316, "learning_rate": 9.971798206584982e-05, "loss": 1.166, "step": 957 }, { "epoch": 0.06490954671725727, "grad_norm": 14.690185546875, "learning_rate": 9.9716613046752e-05, "loss": 1.3666, "step": 958 }, { "epoch": 0.06497730198522936, "grad_norm": 12.904786109924316, "learning_rate": 9.97152440276542e-05, "loss": 1.14, "step": 959 }, { "epoch": 0.06504505725320144, "grad_norm": 12.126219749450684, "learning_rate": 9.971387500855637e-05, "loss": 1.4157, "step": 960 }, { "epoch": 0.06511281252117353, "grad_norm": 13.747931480407715, "learning_rate": 9.971250598945855e-05, "loss": 1.4557, "step": 961 }, { "epoch": 0.0651805677891456, "grad_norm": 13.232327461242676, "learning_rate": 9.971113697036075e-05, "loss": 1.3471, "step": 962 }, { "epoch": 0.0652483230571177, "grad_norm": 14.886791229248047, "learning_rate": 9.970976795126293e-05, "loss": 1.1866, "step": 963 }, { "epoch": 0.06531607832508977, "grad_norm": 11.747659683227539, "learning_rate": 9.970839893216511e-05, "loss": 1.0443, "step": 964 }, { "epoch": 0.06538383359306187, "grad_norm": 11.181273460388184, "learning_rate": 9.97070299130673e-05, "loss": 1.1391, "step": 965 }, { "epoch": 0.06545158886103394, "grad_norm": 11.9672269821167, "learning_rate": 9.970566089396948e-05, "loss": 1.2847, "step": 966 }, { "epoch": 0.06551934412900604, "grad_norm": 15.825364112854004, "learning_rate": 9.970429187487166e-05, "loss": 1.3301, "step": 967 }, { "epoch": 0.06558709939697811, "grad_norm": 12.26963996887207, "learning_rate": 9.970292285577384e-05, "loss": 1.2524, "step": 968 }, { "epoch": 0.0656548546649502, "grad_norm": 11.440977096557617, "learning_rate": 9.970155383667604e-05, "loss": 1.229, "step": 969 }, { "epoch": 0.06572260993292228, "grad_norm": 10.704546928405762, "learning_rate": 9.970018481757822e-05, "loss": 0.9936, "step": 970 }, { "epoch": 0.06579036520089437, "grad_norm": 13.20880126953125, "learning_rate": 9.96988157984804e-05, "loss": 1.2353, "step": 971 }, { "epoch": 0.06585812046886645, "grad_norm": 13.101622581481934, "learning_rate": 9.969744677938258e-05, "loss": 1.2559, "step": 972 }, { "epoch": 0.06592587573683854, "grad_norm": 11.725826263427734, "learning_rate": 9.969607776028476e-05, "loss": 1.2051, "step": 973 }, { "epoch": 0.06599363100481062, "grad_norm": 11.890633583068848, "learning_rate": 9.969470874118695e-05, "loss": 1.1607, "step": 974 }, { "epoch": 0.06606138627278271, "grad_norm": 11.066970825195312, "learning_rate": 9.969333972208913e-05, "loss": 1.1031, "step": 975 }, { "epoch": 0.06612914154075479, "grad_norm": 12.26187515258789, "learning_rate": 9.969197070299131e-05, "loss": 1.309, "step": 976 }, { "epoch": 0.06619689680872688, "grad_norm": 13.490363121032715, "learning_rate": 9.96906016838935e-05, "loss": 1.3062, "step": 977 }, { "epoch": 0.06626465207669896, "grad_norm": 12.306289672851562, "learning_rate": 9.968923266479567e-05, "loss": 0.9784, "step": 978 }, { "epoch": 0.06633240734467105, "grad_norm": 10.699983596801758, "learning_rate": 9.968786364569787e-05, "loss": 1.0547, "step": 979 }, { "epoch": 0.06640016261264313, "grad_norm": 12.298179626464844, "learning_rate": 9.968649462660005e-05, "loss": 1.2496, "step": 980 }, { "epoch": 0.06646791788061522, "grad_norm": 15.239167213439941, "learning_rate": 9.968512560750223e-05, "loss": 1.2545, "step": 981 }, { "epoch": 0.0665356731485873, "grad_norm": 11.61802864074707, "learning_rate": 9.968375658840441e-05, "loss": 1.1668, "step": 982 }, { "epoch": 0.06660342841655939, "grad_norm": 12.804032325744629, "learning_rate": 9.96823875693066e-05, "loss": 1.3203, "step": 983 }, { "epoch": 0.06667118368453147, "grad_norm": 14.511723518371582, "learning_rate": 9.968101855020878e-05, "loss": 1.7519, "step": 984 }, { "epoch": 0.06673893895250356, "grad_norm": 12.442008018493652, "learning_rate": 9.967964953111096e-05, "loss": 1.3758, "step": 985 }, { "epoch": 0.06680669422047564, "grad_norm": 14.486754417419434, "learning_rate": 9.967828051201314e-05, "loss": 1.1924, "step": 986 }, { "epoch": 0.06687444948844773, "grad_norm": 13.529693603515625, "learning_rate": 9.967691149291532e-05, "loss": 1.2698, "step": 987 }, { "epoch": 0.06694220475641981, "grad_norm": 12.980225563049316, "learning_rate": 9.967554247381752e-05, "loss": 1.1896, "step": 988 }, { "epoch": 0.0670099600243919, "grad_norm": 15.495257377624512, "learning_rate": 9.96741734547197e-05, "loss": 1.286, "step": 989 }, { "epoch": 0.06707771529236398, "grad_norm": 12.67573070526123, "learning_rate": 9.967280443562188e-05, "loss": 1.1751, "step": 990 }, { "epoch": 0.06714547056033607, "grad_norm": 11.141845703125, "learning_rate": 9.967143541652406e-05, "loss": 1.5109, "step": 991 }, { "epoch": 0.06721322582830815, "grad_norm": 11.975769996643066, "learning_rate": 9.967006639742625e-05, "loss": 1.1579, "step": 992 }, { "epoch": 0.06728098109628024, "grad_norm": 13.872209548950195, "learning_rate": 9.966869737832843e-05, "loss": 1.1852, "step": 993 }, { "epoch": 0.06734873636425232, "grad_norm": 11.52573299407959, "learning_rate": 9.966732835923061e-05, "loss": 1.2319, "step": 994 }, { "epoch": 0.0674164916322244, "grad_norm": 12.90494155883789, "learning_rate": 9.96659593401328e-05, "loss": 1.2415, "step": 995 }, { "epoch": 0.06748424690019648, "grad_norm": 14.210317611694336, "learning_rate": 9.966459032103497e-05, "loss": 1.2901, "step": 996 }, { "epoch": 0.06755200216816858, "grad_norm": 12.600135803222656, "learning_rate": 9.966322130193717e-05, "loss": 1.4516, "step": 997 }, { "epoch": 0.06761975743614065, "grad_norm": 14.462118148803711, "learning_rate": 9.966185228283935e-05, "loss": 1.2205, "step": 998 }, { "epoch": 0.06768751270411275, "grad_norm": 12.870843887329102, "learning_rate": 9.966048326374153e-05, "loss": 1.179, "step": 999 }, { "epoch": 0.06775526797208482, "grad_norm": 16.4424991607666, "learning_rate": 9.965911424464371e-05, "loss": 1.5714, "step": 1000 }, { "epoch": 0.06782302324005691, "grad_norm": 12.902230262756348, "learning_rate": 9.96577452255459e-05, "loss": 1.2803, "step": 1001 }, { "epoch": 0.06789077850802899, "grad_norm": 11.469466209411621, "learning_rate": 9.965637620644808e-05, "loss": 1.0771, "step": 1002 }, { "epoch": 0.06795853377600108, "grad_norm": 13.96650505065918, "learning_rate": 9.965500718735026e-05, "loss": 1.2427, "step": 1003 }, { "epoch": 0.06802628904397316, "grad_norm": 11.55516242980957, "learning_rate": 9.965363816825244e-05, "loss": 1.0396, "step": 1004 }, { "epoch": 0.06809404431194525, "grad_norm": 13.34827709197998, "learning_rate": 9.965226914915462e-05, "loss": 1.181, "step": 1005 }, { "epoch": 0.06816179957991735, "grad_norm": 11.243910789489746, "learning_rate": 9.965090013005682e-05, "loss": 1.2875, "step": 1006 }, { "epoch": 0.06822955484788942, "grad_norm": 14.152894020080566, "learning_rate": 9.9649531110959e-05, "loss": 1.3125, "step": 1007 }, { "epoch": 0.06829731011586151, "grad_norm": 13.010010719299316, "learning_rate": 9.964816209186118e-05, "loss": 1.3213, "step": 1008 }, { "epoch": 0.06836506538383359, "grad_norm": 15.990034103393555, "learning_rate": 9.964679307276337e-05, "loss": 1.1878, "step": 1009 }, { "epoch": 0.06843282065180568, "grad_norm": 12.943589210510254, "learning_rate": 9.964542405366555e-05, "loss": 1.2443, "step": 1010 }, { "epoch": 0.06850057591977776, "grad_norm": 12.108896255493164, "learning_rate": 9.964405503456775e-05, "loss": 1.1719, "step": 1011 }, { "epoch": 0.06856833118774985, "grad_norm": 12.097951889038086, "learning_rate": 9.964268601546993e-05, "loss": 1.0653, "step": 1012 }, { "epoch": 0.06863608645572193, "grad_norm": 14.222228050231934, "learning_rate": 9.964131699637211e-05, "loss": 1.3379, "step": 1013 }, { "epoch": 0.06870384172369402, "grad_norm": 12.636894226074219, "learning_rate": 9.963994797727429e-05, "loss": 1.5559, "step": 1014 }, { "epoch": 0.0687715969916661, "grad_norm": 15.458481788635254, "learning_rate": 9.963857895817648e-05, "loss": 1.2542, "step": 1015 }, { "epoch": 0.06883935225963819, "grad_norm": 11.246847152709961, "learning_rate": 9.963720993907866e-05, "loss": 1.1944, "step": 1016 }, { "epoch": 0.06890710752761027, "grad_norm": 11.699065208435059, "learning_rate": 9.963584091998084e-05, "loss": 1.0739, "step": 1017 }, { "epoch": 0.06897486279558236, "grad_norm": 12.259678840637207, "learning_rate": 9.963447190088302e-05, "loss": 1.1365, "step": 1018 }, { "epoch": 0.06904261806355444, "grad_norm": 13.594696998596191, "learning_rate": 9.96331028817852e-05, "loss": 1.1418, "step": 1019 }, { "epoch": 0.06911037333152653, "grad_norm": 12.90888786315918, "learning_rate": 9.96317338626874e-05, "loss": 1.1987, "step": 1020 }, { "epoch": 0.06917812859949861, "grad_norm": 13.04245662689209, "learning_rate": 9.963036484358958e-05, "loss": 1.4288, "step": 1021 }, { "epoch": 0.0692458838674707, "grad_norm": 12.706077575683594, "learning_rate": 9.962899582449176e-05, "loss": 1.222, "step": 1022 }, { "epoch": 0.06931363913544278, "grad_norm": 14.205679893493652, "learning_rate": 9.962762680539394e-05, "loss": 1.3305, "step": 1023 }, { "epoch": 0.06938139440341487, "grad_norm": 17.09891128540039, "learning_rate": 9.962625778629613e-05, "loss": 1.47, "step": 1024 }, { "epoch": 0.06944914967138695, "grad_norm": 14.603500366210938, "learning_rate": 9.962488876719831e-05, "loss": 1.3887, "step": 1025 }, { "epoch": 0.06951690493935904, "grad_norm": 10.820066452026367, "learning_rate": 9.962351974810049e-05, "loss": 1.1543, "step": 1026 }, { "epoch": 0.06958466020733112, "grad_norm": 10.99889850616455, "learning_rate": 9.962215072900267e-05, "loss": 1.2908, "step": 1027 }, { "epoch": 0.06965241547530321, "grad_norm": 13.470711708068848, "learning_rate": 9.962078170990485e-05, "loss": 1.1422, "step": 1028 }, { "epoch": 0.06972017074327529, "grad_norm": 11.086441040039062, "learning_rate": 9.961941269080705e-05, "loss": 0.9835, "step": 1029 }, { "epoch": 0.06978792601124738, "grad_norm": 16.768535614013672, "learning_rate": 9.961804367170923e-05, "loss": 1.404, "step": 1030 }, { "epoch": 0.06985568127921946, "grad_norm": 15.851200103759766, "learning_rate": 9.961667465261141e-05, "loss": 1.4874, "step": 1031 }, { "epoch": 0.06992343654719155, "grad_norm": 11.995482444763184, "learning_rate": 9.961530563351359e-05, "loss": 1.1497, "step": 1032 }, { "epoch": 0.06999119181516363, "grad_norm": 13.591619491577148, "learning_rate": 9.961393661441577e-05, "loss": 1.4773, "step": 1033 }, { "epoch": 0.07005894708313572, "grad_norm": 16.878938674926758, "learning_rate": 9.961256759531796e-05, "loss": 1.2522, "step": 1034 }, { "epoch": 0.0701267023511078, "grad_norm": 11.901616096496582, "learning_rate": 9.961119857622014e-05, "loss": 1.2164, "step": 1035 }, { "epoch": 0.07019445761907989, "grad_norm": 14.935117721557617, "learning_rate": 9.960982955712232e-05, "loss": 1.1251, "step": 1036 }, { "epoch": 0.07026221288705196, "grad_norm": 12.380253791809082, "learning_rate": 9.96084605380245e-05, "loss": 1.3965, "step": 1037 }, { "epoch": 0.07032996815502406, "grad_norm": 11.645035743713379, "learning_rate": 9.96070915189267e-05, "loss": 1.266, "step": 1038 }, { "epoch": 0.07039772342299613, "grad_norm": 14.525420188903809, "learning_rate": 9.960572249982888e-05, "loss": 1.3991, "step": 1039 }, { "epoch": 0.07046547869096823, "grad_norm": 14.775094985961914, "learning_rate": 9.960435348073106e-05, "loss": 1.5958, "step": 1040 }, { "epoch": 0.0705332339589403, "grad_norm": 10.2192964553833, "learning_rate": 9.960298446163324e-05, "loss": 1.1793, "step": 1041 }, { "epoch": 0.0706009892269124, "grad_norm": 13.074480056762695, "learning_rate": 9.960161544253542e-05, "loss": 1.4243, "step": 1042 }, { "epoch": 0.07066874449488447, "grad_norm": 12.679484367370605, "learning_rate": 9.960024642343761e-05, "loss": 1.3398, "step": 1043 }, { "epoch": 0.07073649976285656, "grad_norm": 9.061332702636719, "learning_rate": 9.95988774043398e-05, "loss": 1.0036, "step": 1044 }, { "epoch": 0.07080425503082864, "grad_norm": 13.423661231994629, "learning_rate": 9.959750838524197e-05, "loss": 1.2767, "step": 1045 }, { "epoch": 0.07087201029880073, "grad_norm": 13.955148696899414, "learning_rate": 9.959613936614415e-05, "loss": 1.311, "step": 1046 }, { "epoch": 0.07093976556677281, "grad_norm": 12.746015548706055, "learning_rate": 9.959477034704635e-05, "loss": 1.42, "step": 1047 }, { "epoch": 0.0710075208347449, "grad_norm": 11.409982681274414, "learning_rate": 9.959340132794853e-05, "loss": 1.3051, "step": 1048 }, { "epoch": 0.07107527610271698, "grad_norm": 11.801681518554688, "learning_rate": 9.959203230885071e-05, "loss": 1.2315, "step": 1049 }, { "epoch": 0.07114303137068907, "grad_norm": 13.041158676147461, "learning_rate": 9.959066328975289e-05, "loss": 1.3487, "step": 1050 }, { "epoch": 0.07121078663866115, "grad_norm": 13.474900245666504, "learning_rate": 9.958929427065507e-05, "loss": 1.4542, "step": 1051 }, { "epoch": 0.07127854190663324, "grad_norm": 12.335237503051758, "learning_rate": 9.958792525155726e-05, "loss": 1.1841, "step": 1052 }, { "epoch": 0.07134629717460532, "grad_norm": 14.909475326538086, "learning_rate": 9.958655623245944e-05, "loss": 1.272, "step": 1053 }, { "epoch": 0.07141405244257741, "grad_norm": 13.449742317199707, "learning_rate": 9.958518721336162e-05, "loss": 1.4881, "step": 1054 }, { "epoch": 0.07148180771054949, "grad_norm": 12.2557954788208, "learning_rate": 9.958381819426382e-05, "loss": 1.2059, "step": 1055 }, { "epoch": 0.07154956297852158, "grad_norm": 13.71298885345459, "learning_rate": 9.9582449175166e-05, "loss": 1.2627, "step": 1056 }, { "epoch": 0.07161731824649366, "grad_norm": 15.4293212890625, "learning_rate": 9.958108015606818e-05, "loss": 1.4663, "step": 1057 }, { "epoch": 0.07168507351446575, "grad_norm": 13.665759086608887, "learning_rate": 9.957971113697037e-05, "loss": 1.4634, "step": 1058 }, { "epoch": 0.07175282878243784, "grad_norm": 13.064310073852539, "learning_rate": 9.957834211787255e-05, "loss": 1.2686, "step": 1059 }, { "epoch": 0.07182058405040992, "grad_norm": 15.21746826171875, "learning_rate": 9.957697309877473e-05, "loss": 1.4861, "step": 1060 }, { "epoch": 0.07188833931838201, "grad_norm": 12.499883651733398, "learning_rate": 9.957560407967693e-05, "loss": 1.5275, "step": 1061 }, { "epoch": 0.07195609458635409, "grad_norm": 9.355907440185547, "learning_rate": 9.957423506057911e-05, "loss": 1.1509, "step": 1062 }, { "epoch": 0.07202384985432618, "grad_norm": 10.428252220153809, "learning_rate": 9.957286604148129e-05, "loss": 1.0874, "step": 1063 }, { "epoch": 0.07209160512229826, "grad_norm": 11.351346015930176, "learning_rate": 9.957149702238347e-05, "loss": 1.0887, "step": 1064 }, { "epoch": 0.07215936039027035, "grad_norm": 15.271830558776855, "learning_rate": 9.957012800328565e-05, "loss": 1.2466, "step": 1065 }, { "epoch": 0.07222711565824243, "grad_norm": 11.172418594360352, "learning_rate": 9.956875898418784e-05, "loss": 1.0958, "step": 1066 }, { "epoch": 0.07229487092621452, "grad_norm": 14.221702575683594, "learning_rate": 9.956738996509002e-05, "loss": 1.2288, "step": 1067 }, { "epoch": 0.0723626261941866, "grad_norm": 12.167356491088867, "learning_rate": 9.95660209459922e-05, "loss": 1.1159, "step": 1068 }, { "epoch": 0.07243038146215869, "grad_norm": 12.607329368591309, "learning_rate": 9.956465192689438e-05, "loss": 1.2475, "step": 1069 }, { "epoch": 0.07249813673013077, "grad_norm": 10.59451675415039, "learning_rate": 9.956328290779658e-05, "loss": 1.1493, "step": 1070 }, { "epoch": 0.07256589199810286, "grad_norm": 11.190742492675781, "learning_rate": 9.956191388869876e-05, "loss": 1.4549, "step": 1071 }, { "epoch": 0.07263364726607494, "grad_norm": 11.225564956665039, "learning_rate": 9.956054486960094e-05, "loss": 1.0741, "step": 1072 }, { "epoch": 0.07270140253404703, "grad_norm": 13.648139953613281, "learning_rate": 9.955917585050312e-05, "loss": 1.2942, "step": 1073 }, { "epoch": 0.0727691578020191, "grad_norm": 14.606941223144531, "learning_rate": 9.95578068314053e-05, "loss": 1.253, "step": 1074 }, { "epoch": 0.0728369130699912, "grad_norm": 11.043729782104492, "learning_rate": 9.955643781230749e-05, "loss": 1.2279, "step": 1075 }, { "epoch": 0.07290466833796327, "grad_norm": 12.463634490966797, "learning_rate": 9.955506879320967e-05, "loss": 1.4399, "step": 1076 }, { "epoch": 0.07297242360593537, "grad_norm": 13.74101734161377, "learning_rate": 9.955369977411185e-05, "loss": 1.4323, "step": 1077 }, { "epoch": 0.07304017887390744, "grad_norm": 10.1694974899292, "learning_rate": 9.955233075501403e-05, "loss": 1.0113, "step": 1078 }, { "epoch": 0.07310793414187954, "grad_norm": 14.407991409301758, "learning_rate": 9.955096173591623e-05, "loss": 1.7102, "step": 1079 }, { "epoch": 0.07317568940985161, "grad_norm": 13.84760570526123, "learning_rate": 9.954959271681841e-05, "loss": 1.4491, "step": 1080 }, { "epoch": 0.0732434446778237, "grad_norm": 12.220841407775879, "learning_rate": 9.954822369772059e-05, "loss": 1.1826, "step": 1081 }, { "epoch": 0.07331119994579578, "grad_norm": 11.380377769470215, "learning_rate": 9.954685467862277e-05, "loss": 1.3537, "step": 1082 }, { "epoch": 0.07337895521376787, "grad_norm": 12.916484832763672, "learning_rate": 9.954548565952495e-05, "loss": 1.3406, "step": 1083 }, { "epoch": 0.07344671048173995, "grad_norm": 14.107590675354004, "learning_rate": 9.954411664042714e-05, "loss": 1.0531, "step": 1084 }, { "epoch": 0.07351446574971204, "grad_norm": 13.498798370361328, "learning_rate": 9.954274762132932e-05, "loss": 1.1546, "step": 1085 }, { "epoch": 0.07358222101768412, "grad_norm": 13.155747413635254, "learning_rate": 9.95413786022315e-05, "loss": 1.212, "step": 1086 }, { "epoch": 0.07364997628565621, "grad_norm": 14.135687828063965, "learning_rate": 9.954000958313368e-05, "loss": 1.2001, "step": 1087 }, { "epoch": 0.07371773155362829, "grad_norm": 13.123790740966797, "learning_rate": 9.953864056403586e-05, "loss": 1.2735, "step": 1088 }, { "epoch": 0.07378548682160038, "grad_norm": 14.591660499572754, "learning_rate": 9.953727154493806e-05, "loss": 1.2977, "step": 1089 }, { "epoch": 0.07385324208957246, "grad_norm": 13.30614948272705, "learning_rate": 9.953590252584024e-05, "loss": 1.2445, "step": 1090 }, { "epoch": 0.07392099735754455, "grad_norm": 14.542524337768555, "learning_rate": 9.953453350674242e-05, "loss": 1.2327, "step": 1091 }, { "epoch": 0.07398875262551663, "grad_norm": 12.926522254943848, "learning_rate": 9.95331644876446e-05, "loss": 1.2139, "step": 1092 }, { "epoch": 0.07405650789348872, "grad_norm": 11.622479438781738, "learning_rate": 9.953179546854679e-05, "loss": 1.2887, "step": 1093 }, { "epoch": 0.0741242631614608, "grad_norm": 14.87485122680664, "learning_rate": 9.953042644944897e-05, "loss": 1.3132, "step": 1094 }, { "epoch": 0.07419201842943289, "grad_norm": 13.096991539001465, "learning_rate": 9.952905743035115e-05, "loss": 1.0783, "step": 1095 }, { "epoch": 0.07425977369740497, "grad_norm": 14.676551818847656, "learning_rate": 9.952768841125333e-05, "loss": 1.3171, "step": 1096 }, { "epoch": 0.07432752896537706, "grad_norm": 10.399755477905273, "learning_rate": 9.952631939215551e-05, "loss": 1.0747, "step": 1097 }, { "epoch": 0.07439528423334914, "grad_norm": 11.052745819091797, "learning_rate": 9.952495037305771e-05, "loss": 1.1653, "step": 1098 }, { "epoch": 0.07446303950132123, "grad_norm": 13.014352798461914, "learning_rate": 9.952358135395989e-05, "loss": 1.3727, "step": 1099 }, { "epoch": 0.07453079476929331, "grad_norm": 13.202262878417969, "learning_rate": 9.952221233486207e-05, "loss": 1.5063, "step": 1100 }, { "epoch": 0.0745985500372654, "grad_norm": 14.272111892700195, "learning_rate": 9.952084331576426e-05, "loss": 1.0183, "step": 1101 }, { "epoch": 0.07466630530523748, "grad_norm": 14.23975658416748, "learning_rate": 9.951947429666644e-05, "loss": 1.3518, "step": 1102 }, { "epoch": 0.07473406057320957, "grad_norm": 12.293742179870605, "learning_rate": 9.951810527756862e-05, "loss": 1.4021, "step": 1103 }, { "epoch": 0.07480181584118165, "grad_norm": 16.026020050048828, "learning_rate": 9.951673625847082e-05, "loss": 1.3412, "step": 1104 }, { "epoch": 0.07486957110915374, "grad_norm": 11.120819091796875, "learning_rate": 9.9515367239373e-05, "loss": 1.331, "step": 1105 }, { "epoch": 0.07493732637712582, "grad_norm": 12.363526344299316, "learning_rate": 9.951399822027518e-05, "loss": 1.4284, "step": 1106 }, { "epoch": 0.0750050816450979, "grad_norm": 14.377492904663086, "learning_rate": 9.951262920117737e-05, "loss": 1.3686, "step": 1107 }, { "epoch": 0.07507283691306998, "grad_norm": 11.366288185119629, "learning_rate": 9.951126018207955e-05, "loss": 1.2329, "step": 1108 }, { "epoch": 0.07514059218104208, "grad_norm": 10.26131820678711, "learning_rate": 9.950989116298173e-05, "loss": 1.1091, "step": 1109 }, { "epoch": 0.07520834744901415, "grad_norm": 12.64631462097168, "learning_rate": 9.950852214388391e-05, "loss": 1.1231, "step": 1110 }, { "epoch": 0.07527610271698625, "grad_norm": 13.042781829833984, "learning_rate": 9.95071531247861e-05, "loss": 1.2338, "step": 1111 }, { "epoch": 0.07534385798495834, "grad_norm": 11.577115058898926, "learning_rate": 9.950578410568829e-05, "loss": 1.2143, "step": 1112 }, { "epoch": 0.07541161325293042, "grad_norm": 13.640811920166016, "learning_rate": 9.950441508659047e-05, "loss": 1.224, "step": 1113 }, { "epoch": 0.0754793685209025, "grad_norm": 10.271018981933594, "learning_rate": 9.950304606749265e-05, "loss": 1.0459, "step": 1114 }, { "epoch": 0.07554712378887458, "grad_norm": 12.053836822509766, "learning_rate": 9.950167704839483e-05, "loss": 1.2137, "step": 1115 }, { "epoch": 0.07561487905684668, "grad_norm": 10.022509574890137, "learning_rate": 9.950030802929702e-05, "loss": 1.092, "step": 1116 }, { "epoch": 0.07568263432481875, "grad_norm": 12.28339672088623, "learning_rate": 9.94989390101992e-05, "loss": 1.1616, "step": 1117 }, { "epoch": 0.07575038959279085, "grad_norm": 14.182686805725098, "learning_rate": 9.949756999110138e-05, "loss": 1.5135, "step": 1118 }, { "epoch": 0.07581814486076292, "grad_norm": 10.668661117553711, "learning_rate": 9.949620097200356e-05, "loss": 1.2412, "step": 1119 }, { "epoch": 0.07588590012873501, "grad_norm": 14.443583488464355, "learning_rate": 9.949483195290574e-05, "loss": 1.207, "step": 1120 }, { "epoch": 0.07595365539670709, "grad_norm": 12.418794631958008, "learning_rate": 9.949346293380794e-05, "loss": 1.1907, "step": 1121 }, { "epoch": 0.07602141066467918, "grad_norm": 12.429618835449219, "learning_rate": 9.949209391471012e-05, "loss": 1.4403, "step": 1122 }, { "epoch": 0.07608916593265126, "grad_norm": 9.524622917175293, "learning_rate": 9.94907248956123e-05, "loss": 1.139, "step": 1123 }, { "epoch": 0.07615692120062335, "grad_norm": 10.974812507629395, "learning_rate": 9.948935587651448e-05, "loss": 1.4055, "step": 1124 }, { "epoch": 0.07622467646859543, "grad_norm": 16.152681350708008, "learning_rate": 9.948798685741667e-05, "loss": 1.0874, "step": 1125 }, { "epoch": 0.07629243173656752, "grad_norm": 12.023541450500488, "learning_rate": 9.948661783831885e-05, "loss": 1.2378, "step": 1126 }, { "epoch": 0.0763601870045396, "grad_norm": 11.597234725952148, "learning_rate": 9.948524881922103e-05, "loss": 1.0104, "step": 1127 }, { "epoch": 0.07642794227251169, "grad_norm": 11.374302864074707, "learning_rate": 9.948387980012321e-05, "loss": 1.1099, "step": 1128 }, { "epoch": 0.07649569754048377, "grad_norm": 14.398423194885254, "learning_rate": 9.94825107810254e-05, "loss": 1.5533, "step": 1129 }, { "epoch": 0.07656345280845586, "grad_norm": 13.1026611328125, "learning_rate": 9.948114176192759e-05, "loss": 1.3948, "step": 1130 }, { "epoch": 0.07663120807642794, "grad_norm": 12.012560844421387, "learning_rate": 9.947977274282977e-05, "loss": 1.2156, "step": 1131 }, { "epoch": 0.07669896334440003, "grad_norm": 12.900229454040527, "learning_rate": 9.947840372373195e-05, "loss": 1.2658, "step": 1132 }, { "epoch": 0.07676671861237211, "grad_norm": 15.250492095947266, "learning_rate": 9.947703470463413e-05, "loss": 1.6102, "step": 1133 }, { "epoch": 0.0768344738803442, "grad_norm": 15.08134937286377, "learning_rate": 9.947566568553632e-05, "loss": 1.1947, "step": 1134 }, { "epoch": 0.07690222914831628, "grad_norm": 13.353601455688477, "learning_rate": 9.94742966664385e-05, "loss": 1.3211, "step": 1135 }, { "epoch": 0.07696998441628837, "grad_norm": 11.310175895690918, "learning_rate": 9.947292764734068e-05, "loss": 1.2223, "step": 1136 }, { "epoch": 0.07703773968426045, "grad_norm": 11.800848960876465, "learning_rate": 9.947155862824286e-05, "loss": 1.2131, "step": 1137 }, { "epoch": 0.07710549495223254, "grad_norm": 11.748014450073242, "learning_rate": 9.947018960914504e-05, "loss": 1.0734, "step": 1138 }, { "epoch": 0.07717325022020462, "grad_norm": 12.282258033752441, "learning_rate": 9.946882059004724e-05, "loss": 1.3257, "step": 1139 }, { "epoch": 0.07724100548817671, "grad_norm": 11.93818473815918, "learning_rate": 9.946745157094942e-05, "loss": 1.1452, "step": 1140 }, { "epoch": 0.07730876075614879, "grad_norm": 13.397029876708984, "learning_rate": 9.94660825518516e-05, "loss": 1.2625, "step": 1141 }, { "epoch": 0.07737651602412088, "grad_norm": 12.135769844055176, "learning_rate": 9.946471353275378e-05, "loss": 1.3624, "step": 1142 }, { "epoch": 0.07744427129209296, "grad_norm": 11.304028511047363, "learning_rate": 9.946334451365596e-05, "loss": 1.3019, "step": 1143 }, { "epoch": 0.07751202656006505, "grad_norm": 10.978137016296387, "learning_rate": 9.946197549455815e-05, "loss": 1.3137, "step": 1144 }, { "epoch": 0.07757978182803713, "grad_norm": 10.997323989868164, "learning_rate": 9.946060647546033e-05, "loss": 1.3573, "step": 1145 }, { "epoch": 0.07764753709600922, "grad_norm": 11.883647918701172, "learning_rate": 9.945923745636251e-05, "loss": 1.2587, "step": 1146 }, { "epoch": 0.0777152923639813, "grad_norm": 10.70753288269043, "learning_rate": 9.945786843726471e-05, "loss": 1.0866, "step": 1147 }, { "epoch": 0.07778304763195339, "grad_norm": 13.318743705749512, "learning_rate": 9.945649941816689e-05, "loss": 1.3356, "step": 1148 }, { "epoch": 0.07785080289992546, "grad_norm": 10.656171798706055, "learning_rate": 9.945513039906907e-05, "loss": 1.2155, "step": 1149 }, { "epoch": 0.07791855816789756, "grad_norm": 14.73982048034668, "learning_rate": 9.945376137997126e-05, "loss": 1.2276, "step": 1150 }, { "epoch": 0.07798631343586963, "grad_norm": 12.461714744567871, "learning_rate": 9.945239236087344e-05, "loss": 1.2999, "step": 1151 }, { "epoch": 0.07805406870384173, "grad_norm": 12.277376174926758, "learning_rate": 9.945102334177562e-05, "loss": 1.1131, "step": 1152 }, { "epoch": 0.0781218239718138, "grad_norm": 12.726540565490723, "learning_rate": 9.944965432267782e-05, "loss": 1.4181, "step": 1153 }, { "epoch": 0.0781895792397859, "grad_norm": 11.086180686950684, "learning_rate": 9.944828530358e-05, "loss": 1.3852, "step": 1154 }, { "epoch": 0.07825733450775797, "grad_norm": 12.80537223815918, "learning_rate": 9.944691628448218e-05, "loss": 1.1101, "step": 1155 }, { "epoch": 0.07832508977573006, "grad_norm": 11.960269927978516, "learning_rate": 9.944554726538436e-05, "loss": 1.2186, "step": 1156 }, { "epoch": 0.07839284504370214, "grad_norm": 11.732439041137695, "learning_rate": 9.944417824628655e-05, "loss": 0.9854, "step": 1157 }, { "epoch": 0.07846060031167423, "grad_norm": 12.578715324401855, "learning_rate": 9.944280922718873e-05, "loss": 1.0792, "step": 1158 }, { "epoch": 0.07852835557964631, "grad_norm": 13.779712677001953, "learning_rate": 9.944144020809091e-05, "loss": 1.2468, "step": 1159 }, { "epoch": 0.0785961108476184, "grad_norm": 13.95693588256836, "learning_rate": 9.944007118899309e-05, "loss": 1.234, "step": 1160 }, { "epoch": 0.07866386611559048, "grad_norm": 12.056897163391113, "learning_rate": 9.943870216989527e-05, "loss": 1.3034, "step": 1161 }, { "epoch": 0.07873162138356257, "grad_norm": 11.633442878723145, "learning_rate": 9.943733315079747e-05, "loss": 1.2744, "step": 1162 }, { "epoch": 0.07879937665153465, "grad_norm": 14.062381744384766, "learning_rate": 9.943596413169965e-05, "loss": 1.2535, "step": 1163 }, { "epoch": 0.07886713191950674, "grad_norm": 11.60498332977295, "learning_rate": 9.943459511260183e-05, "loss": 1.1578, "step": 1164 }, { "epoch": 0.07893488718747883, "grad_norm": 9.667806625366211, "learning_rate": 9.943322609350401e-05, "loss": 1.0804, "step": 1165 }, { "epoch": 0.07900264245545091, "grad_norm": 12.30827808380127, "learning_rate": 9.943185707440619e-05, "loss": 1.4167, "step": 1166 }, { "epoch": 0.079070397723423, "grad_norm": 10.196819305419922, "learning_rate": 9.943048805530838e-05, "loss": 1.0547, "step": 1167 }, { "epoch": 0.07913815299139508, "grad_norm": 10.029928207397461, "learning_rate": 9.942911903621056e-05, "loss": 1.1798, "step": 1168 }, { "epoch": 0.07920590825936717, "grad_norm": 11.782978057861328, "learning_rate": 9.942775001711274e-05, "loss": 1.1454, "step": 1169 }, { "epoch": 0.07927366352733925, "grad_norm": 12.25143814086914, "learning_rate": 9.942638099801492e-05, "loss": 1.2854, "step": 1170 }, { "epoch": 0.07934141879531134, "grad_norm": 8.47904109954834, "learning_rate": 9.942501197891712e-05, "loss": 0.9381, "step": 1171 }, { "epoch": 0.07940917406328342, "grad_norm": 13.698802947998047, "learning_rate": 9.94236429598193e-05, "loss": 1.6261, "step": 1172 }, { "epoch": 0.07947692933125551, "grad_norm": 10.885397911071777, "learning_rate": 9.942227394072148e-05, "loss": 1.0645, "step": 1173 }, { "epoch": 0.07954468459922759, "grad_norm": 13.274818420410156, "learning_rate": 9.942090492162366e-05, "loss": 1.3411, "step": 1174 }, { "epoch": 0.07961243986719968, "grad_norm": 14.061238288879395, "learning_rate": 9.941953590252584e-05, "loss": 1.1916, "step": 1175 }, { "epoch": 0.07968019513517176, "grad_norm": 10.074264526367188, "learning_rate": 9.941816688342803e-05, "loss": 1.092, "step": 1176 }, { "epoch": 0.07974795040314385, "grad_norm": 14.741287231445312, "learning_rate": 9.941679786433021e-05, "loss": 1.3774, "step": 1177 }, { "epoch": 0.07981570567111593, "grad_norm": 11.308422088623047, "learning_rate": 9.941542884523239e-05, "loss": 0.9298, "step": 1178 }, { "epoch": 0.07988346093908802, "grad_norm": 14.375280380249023, "learning_rate": 9.941405982613457e-05, "loss": 1.4525, "step": 1179 }, { "epoch": 0.0799512162070601, "grad_norm": 11.724523544311523, "learning_rate": 9.941269080703677e-05, "loss": 1.3616, "step": 1180 }, { "epoch": 0.08001897147503219, "grad_norm": 12.578176498413086, "learning_rate": 9.941132178793895e-05, "loss": 1.1513, "step": 1181 }, { "epoch": 0.08008672674300427, "grad_norm": 12.100804328918457, "learning_rate": 9.940995276884113e-05, "loss": 1.0656, "step": 1182 }, { "epoch": 0.08015448201097636, "grad_norm": 11.680248260498047, "learning_rate": 9.940858374974331e-05, "loss": 1.2358, "step": 1183 }, { "epoch": 0.08022223727894844, "grad_norm": 10.066198348999023, "learning_rate": 9.940721473064549e-05, "loss": 0.9219, "step": 1184 }, { "epoch": 0.08028999254692053, "grad_norm": 10.813334465026855, "learning_rate": 9.940584571154768e-05, "loss": 1.1456, "step": 1185 }, { "epoch": 0.0803577478148926, "grad_norm": 14.004862785339355, "learning_rate": 9.940447669244986e-05, "loss": 1.2375, "step": 1186 }, { "epoch": 0.0804255030828647, "grad_norm": 11.868766784667969, "learning_rate": 9.940310767335204e-05, "loss": 1.3027, "step": 1187 }, { "epoch": 0.08049325835083677, "grad_norm": 12.48153018951416, "learning_rate": 9.940173865425422e-05, "loss": 1.1192, "step": 1188 }, { "epoch": 0.08056101361880887, "grad_norm": 12.340612411499023, "learning_rate": 9.940036963515642e-05, "loss": 1.246, "step": 1189 }, { "epoch": 0.08062876888678094, "grad_norm": 12.205392837524414, "learning_rate": 9.93990006160586e-05, "loss": 1.1233, "step": 1190 }, { "epoch": 0.08069652415475304, "grad_norm": 12.69509220123291, "learning_rate": 9.939763159696078e-05, "loss": 1.2202, "step": 1191 }, { "epoch": 0.08076427942272511, "grad_norm": 12.40784740447998, "learning_rate": 9.939626257786296e-05, "loss": 1.1345, "step": 1192 }, { "epoch": 0.0808320346906972, "grad_norm": 11.678507804870605, "learning_rate": 9.939489355876515e-05, "loss": 1.4513, "step": 1193 }, { "epoch": 0.08089978995866928, "grad_norm": 11.649873733520508, "learning_rate": 9.939352453966733e-05, "loss": 1.3827, "step": 1194 }, { "epoch": 0.08096754522664137, "grad_norm": 12.378853797912598, "learning_rate": 9.939215552056951e-05, "loss": 1.3632, "step": 1195 }, { "epoch": 0.08103530049461345, "grad_norm": 11.023188591003418, "learning_rate": 9.93907865014717e-05, "loss": 1.3892, "step": 1196 }, { "epoch": 0.08110305576258554, "grad_norm": 13.111897468566895, "learning_rate": 9.938941748237389e-05, "loss": 1.1973, "step": 1197 }, { "epoch": 0.08117081103055762, "grad_norm": 10.171613693237305, "learning_rate": 9.938804846327607e-05, "loss": 1.3862, "step": 1198 }, { "epoch": 0.08123856629852971, "grad_norm": 13.327658653259277, "learning_rate": 9.938667944417826e-05, "loss": 1.3173, "step": 1199 }, { "epoch": 0.08130632156650179, "grad_norm": 11.715154647827148, "learning_rate": 9.938531042508044e-05, "loss": 1.0839, "step": 1200 }, { "epoch": 0.08137407683447388, "grad_norm": 11.521212577819824, "learning_rate": 9.938394140598262e-05, "loss": 1.0285, "step": 1201 }, { "epoch": 0.08144183210244596, "grad_norm": 12.024236679077148, "learning_rate": 9.93825723868848e-05, "loss": 1.3025, "step": 1202 }, { "epoch": 0.08150958737041805, "grad_norm": 10.245376586914062, "learning_rate": 9.9381203367787e-05, "loss": 1.1658, "step": 1203 }, { "epoch": 0.08157734263839013, "grad_norm": 10.731759071350098, "learning_rate": 9.937983434868918e-05, "loss": 1.0565, "step": 1204 }, { "epoch": 0.08164509790636222, "grad_norm": 16.714153289794922, "learning_rate": 9.937846532959136e-05, "loss": 1.2761, "step": 1205 }, { "epoch": 0.0817128531743343, "grad_norm": 11.172699928283691, "learning_rate": 9.937709631049354e-05, "loss": 1.1887, "step": 1206 }, { "epoch": 0.08178060844230639, "grad_norm": 11.384743690490723, "learning_rate": 9.937572729139572e-05, "loss": 0.9546, "step": 1207 }, { "epoch": 0.08184836371027847, "grad_norm": 11.163822174072266, "learning_rate": 9.937435827229791e-05, "loss": 1.1498, "step": 1208 }, { "epoch": 0.08191611897825056, "grad_norm": 10.657593727111816, "learning_rate": 9.937298925320009e-05, "loss": 0.9506, "step": 1209 }, { "epoch": 0.08198387424622264, "grad_norm": 10.6862211227417, "learning_rate": 9.937162023410227e-05, "loss": 1.2308, "step": 1210 }, { "epoch": 0.08205162951419473, "grad_norm": 10.649473190307617, "learning_rate": 9.937025121500445e-05, "loss": 1.2679, "step": 1211 }, { "epoch": 0.08211938478216681, "grad_norm": 9.298782348632812, "learning_rate": 9.936888219590665e-05, "loss": 1.1327, "step": 1212 }, { "epoch": 0.0821871400501389, "grad_norm": 11.448348045349121, "learning_rate": 9.936751317680883e-05, "loss": 1.2796, "step": 1213 }, { "epoch": 0.08225489531811098, "grad_norm": 12.141517639160156, "learning_rate": 9.936614415771101e-05, "loss": 1.1128, "step": 1214 }, { "epoch": 0.08232265058608307, "grad_norm": 11.799830436706543, "learning_rate": 9.936477513861319e-05, "loss": 1.0743, "step": 1215 }, { "epoch": 0.08239040585405515, "grad_norm": 11.952958106994629, "learning_rate": 9.936340611951537e-05, "loss": 1.1659, "step": 1216 }, { "epoch": 0.08245816112202724, "grad_norm": 11.870144844055176, "learning_rate": 9.936203710041756e-05, "loss": 1.1195, "step": 1217 }, { "epoch": 0.08252591638999933, "grad_norm": 11.970368385314941, "learning_rate": 9.936066808131974e-05, "loss": 1.2139, "step": 1218 }, { "epoch": 0.08259367165797141, "grad_norm": 11.211687088012695, "learning_rate": 9.935929906222192e-05, "loss": 1.2107, "step": 1219 }, { "epoch": 0.0826614269259435, "grad_norm": 12.987563133239746, "learning_rate": 9.93579300431241e-05, "loss": 1.2804, "step": 1220 }, { "epoch": 0.08272918219391558, "grad_norm": 12.337888717651367, "learning_rate": 9.935656102402628e-05, "loss": 0.8713, "step": 1221 }, { "epoch": 0.08279693746188767, "grad_norm": 11.717150688171387, "learning_rate": 9.935519200492848e-05, "loss": 1.2684, "step": 1222 }, { "epoch": 0.08286469272985975, "grad_norm": 12.658769607543945, "learning_rate": 9.935382298583066e-05, "loss": 1.0231, "step": 1223 }, { "epoch": 0.08293244799783184, "grad_norm": 10.526476860046387, "learning_rate": 9.935245396673284e-05, "loss": 1.0605, "step": 1224 }, { "epoch": 0.08300020326580392, "grad_norm": 10.575004577636719, "learning_rate": 9.935108494763502e-05, "loss": 1.2169, "step": 1225 }, { "epoch": 0.083067958533776, "grad_norm": 10.499407768249512, "learning_rate": 9.934971592853721e-05, "loss": 1.3064, "step": 1226 }, { "epoch": 0.08313571380174808, "grad_norm": 12.25387191772461, "learning_rate": 9.934834690943939e-05, "loss": 1.1554, "step": 1227 }, { "epoch": 0.08320346906972018, "grad_norm": 12.173775672912598, "learning_rate": 9.934697789034157e-05, "loss": 1.0917, "step": 1228 }, { "epoch": 0.08327122433769225, "grad_norm": 12.424721717834473, "learning_rate": 9.934560887124375e-05, "loss": 1.3679, "step": 1229 }, { "epoch": 0.08333897960566435, "grad_norm": 11.291987419128418, "learning_rate": 9.934423985214593e-05, "loss": 1.1439, "step": 1230 }, { "epoch": 0.08340673487363642, "grad_norm": 10.908637046813965, "learning_rate": 9.934287083304813e-05, "loss": 1.0487, "step": 1231 }, { "epoch": 0.08347449014160851, "grad_norm": 14.767544746398926, "learning_rate": 9.934150181395031e-05, "loss": 1.2951, "step": 1232 }, { "epoch": 0.08354224540958059, "grad_norm": 11.959871292114258, "learning_rate": 9.934013279485249e-05, "loss": 1.1244, "step": 1233 }, { "epoch": 0.08361000067755268, "grad_norm": 11.19450569152832, "learning_rate": 9.933876377575467e-05, "loss": 1.2802, "step": 1234 }, { "epoch": 0.08367775594552476, "grad_norm": 10.71377182006836, "learning_rate": 9.933739475665686e-05, "loss": 1.0376, "step": 1235 }, { "epoch": 0.08374551121349685, "grad_norm": 12.174454689025879, "learning_rate": 9.933602573755904e-05, "loss": 1.5779, "step": 1236 }, { "epoch": 0.08381326648146893, "grad_norm": 9.863836288452148, "learning_rate": 9.933465671846122e-05, "loss": 1.0443, "step": 1237 }, { "epoch": 0.08388102174944102, "grad_norm": 11.964838027954102, "learning_rate": 9.93332876993634e-05, "loss": 1.0613, "step": 1238 }, { "epoch": 0.0839487770174131, "grad_norm": 11.49203109741211, "learning_rate": 9.93319186802656e-05, "loss": 1.162, "step": 1239 }, { "epoch": 0.08401653228538519, "grad_norm": 12.548815727233887, "learning_rate": 9.933054966116778e-05, "loss": 1.0652, "step": 1240 }, { "epoch": 0.08408428755335727, "grad_norm": 13.37637996673584, "learning_rate": 9.932918064206996e-05, "loss": 1.2281, "step": 1241 }, { "epoch": 0.08415204282132936, "grad_norm": 12.59211254119873, "learning_rate": 9.932781162297215e-05, "loss": 1.0624, "step": 1242 }, { "epoch": 0.08421979808930144, "grad_norm": 13.386221885681152, "learning_rate": 9.932644260387433e-05, "loss": 1.4393, "step": 1243 }, { "epoch": 0.08428755335727353, "grad_norm": 12.647525787353516, "learning_rate": 9.932507358477651e-05, "loss": 1.2228, "step": 1244 }, { "epoch": 0.08435530862524561, "grad_norm": 12.039474487304688, "learning_rate": 9.93237045656787e-05, "loss": 1.1354, "step": 1245 }, { "epoch": 0.0844230638932177, "grad_norm": 11.373556137084961, "learning_rate": 9.932233554658089e-05, "loss": 1.2418, "step": 1246 }, { "epoch": 0.08449081916118978, "grad_norm": 10.944781303405762, "learning_rate": 9.932096652748307e-05, "loss": 1.1515, "step": 1247 }, { "epoch": 0.08455857442916187, "grad_norm": 12.174854278564453, "learning_rate": 9.931959750838525e-05, "loss": 1.2063, "step": 1248 }, { "epoch": 0.08462632969713395, "grad_norm": 8.846879005432129, "learning_rate": 9.931822848928744e-05, "loss": 0.9639, "step": 1249 }, { "epoch": 0.08469408496510604, "grad_norm": 13.793547630310059, "learning_rate": 9.931685947018962e-05, "loss": 1.1236, "step": 1250 }, { "epoch": 0.08476184023307812, "grad_norm": 14.486831665039062, "learning_rate": 9.93154904510918e-05, "loss": 1.2047, "step": 1251 }, { "epoch": 0.08482959550105021, "grad_norm": 13.262588500976562, "learning_rate": 9.931412143199398e-05, "loss": 1.4091, "step": 1252 }, { "epoch": 0.08489735076902229, "grad_norm": 13.289068222045898, "learning_rate": 9.931275241289616e-05, "loss": 1.5065, "step": 1253 }, { "epoch": 0.08496510603699438, "grad_norm": 10.22205638885498, "learning_rate": 9.931138339379836e-05, "loss": 1.1116, "step": 1254 }, { "epoch": 0.08503286130496646, "grad_norm": 13.141668319702148, "learning_rate": 9.931001437470054e-05, "loss": 1.4006, "step": 1255 }, { "epoch": 0.08510061657293855, "grad_norm": 11.817032814025879, "learning_rate": 9.930864535560272e-05, "loss": 1.2062, "step": 1256 }, { "epoch": 0.08516837184091063, "grad_norm": 10.814498901367188, "learning_rate": 9.93072763365049e-05, "loss": 1.138, "step": 1257 }, { "epoch": 0.08523612710888272, "grad_norm": 12.598155975341797, "learning_rate": 9.930590731740709e-05, "loss": 1.1959, "step": 1258 }, { "epoch": 0.0853038823768548, "grad_norm": 11.909974098205566, "learning_rate": 9.930453829830927e-05, "loss": 1.2891, "step": 1259 }, { "epoch": 0.08537163764482689, "grad_norm": 12.663064956665039, "learning_rate": 9.930316927921145e-05, "loss": 1.3017, "step": 1260 }, { "epoch": 0.08543939291279896, "grad_norm": 10.810627937316895, "learning_rate": 9.930180026011363e-05, "loss": 1.16, "step": 1261 }, { "epoch": 0.08550714818077106, "grad_norm": 9.182926177978516, "learning_rate": 9.930043124101581e-05, "loss": 1.2625, "step": 1262 }, { "epoch": 0.08557490344874313, "grad_norm": 12.73978042602539, "learning_rate": 9.9299062221918e-05, "loss": 0.9904, "step": 1263 }, { "epoch": 0.08564265871671523, "grad_norm": 10.346587181091309, "learning_rate": 9.929769320282019e-05, "loss": 1.0167, "step": 1264 }, { "epoch": 0.0857104139846873, "grad_norm": 12.145682334899902, "learning_rate": 9.929632418372237e-05, "loss": 1.4282, "step": 1265 }, { "epoch": 0.0857781692526594, "grad_norm": 11.515445709228516, "learning_rate": 9.929495516462455e-05, "loss": 1.0527, "step": 1266 }, { "epoch": 0.08584592452063147, "grad_norm": 10.979050636291504, "learning_rate": 9.929358614552674e-05, "loss": 1.2888, "step": 1267 }, { "epoch": 0.08591367978860356, "grad_norm": 10.248215675354004, "learning_rate": 9.929221712642892e-05, "loss": 1.4316, "step": 1268 }, { "epoch": 0.08598143505657564, "grad_norm": 13.743851661682129, "learning_rate": 9.92908481073311e-05, "loss": 1.5047, "step": 1269 }, { "epoch": 0.08604919032454773, "grad_norm": 9.919225692749023, "learning_rate": 9.928947908823328e-05, "loss": 0.9602, "step": 1270 }, { "epoch": 0.08611694559251983, "grad_norm": 9.795915603637695, "learning_rate": 9.928811006913546e-05, "loss": 1.3736, "step": 1271 }, { "epoch": 0.0861847008604919, "grad_norm": 13.108200073242188, "learning_rate": 9.928674105003766e-05, "loss": 1.2591, "step": 1272 }, { "epoch": 0.086252456128464, "grad_norm": 13.108073234558105, "learning_rate": 9.928537203093984e-05, "loss": 1.102, "step": 1273 }, { "epoch": 0.08632021139643607, "grad_norm": 15.177817344665527, "learning_rate": 9.928400301184202e-05, "loss": 1.3164, "step": 1274 }, { "epoch": 0.08638796666440816, "grad_norm": 13.048440933227539, "learning_rate": 9.92826339927442e-05, "loss": 1.3029, "step": 1275 }, { "epoch": 0.08645572193238024, "grad_norm": 10.982895851135254, "learning_rate": 9.928126497364638e-05, "loss": 1.1829, "step": 1276 }, { "epoch": 0.08652347720035233, "grad_norm": 10.901629447937012, "learning_rate": 9.927989595454857e-05, "loss": 1.2116, "step": 1277 }, { "epoch": 0.08659123246832441, "grad_norm": 12.924722671508789, "learning_rate": 9.927852693545075e-05, "loss": 1.6292, "step": 1278 }, { "epoch": 0.0866589877362965, "grad_norm": 12.572770118713379, "learning_rate": 9.927715791635293e-05, "loss": 1.1012, "step": 1279 }, { "epoch": 0.08672674300426858, "grad_norm": 12.803020477294922, "learning_rate": 9.927578889725511e-05, "loss": 1.276, "step": 1280 }, { "epoch": 0.08679449827224067, "grad_norm": 10.92810344696045, "learning_rate": 9.92744198781573e-05, "loss": 1.1876, "step": 1281 }, { "epoch": 0.08686225354021275, "grad_norm": 12.253180503845215, "learning_rate": 9.927305085905949e-05, "loss": 1.1807, "step": 1282 }, { "epoch": 0.08693000880818484, "grad_norm": 11.163126945495605, "learning_rate": 9.927168183996167e-05, "loss": 1.1671, "step": 1283 }, { "epoch": 0.08699776407615692, "grad_norm": 10.726607322692871, "learning_rate": 9.927031282086385e-05, "loss": 1.185, "step": 1284 }, { "epoch": 0.08706551934412901, "grad_norm": 13.265491485595703, "learning_rate": 9.926894380176603e-05, "loss": 1.2916, "step": 1285 }, { "epoch": 0.08713327461210109, "grad_norm": 14.559592247009277, "learning_rate": 9.926757478266822e-05, "loss": 1.288, "step": 1286 }, { "epoch": 0.08720102988007318, "grad_norm": 14.816813468933105, "learning_rate": 9.92662057635704e-05, "loss": 1.4397, "step": 1287 }, { "epoch": 0.08726878514804526, "grad_norm": 11.811420440673828, "learning_rate": 9.926483674447258e-05, "loss": 1.348, "step": 1288 }, { "epoch": 0.08733654041601735, "grad_norm": 10.920133590698242, "learning_rate": 9.926346772537478e-05, "loss": 1.4169, "step": 1289 }, { "epoch": 0.08740429568398943, "grad_norm": 11.690089225769043, "learning_rate": 9.926209870627696e-05, "loss": 1.1199, "step": 1290 }, { "epoch": 0.08747205095196152, "grad_norm": 9.411031723022461, "learning_rate": 9.926072968717914e-05, "loss": 1.031, "step": 1291 }, { "epoch": 0.0875398062199336, "grad_norm": 12.174457550048828, "learning_rate": 9.925936066808133e-05, "loss": 1.0622, "step": 1292 }, { "epoch": 0.08760756148790569, "grad_norm": 10.346089363098145, "learning_rate": 9.925799164898351e-05, "loss": 1.2777, "step": 1293 }, { "epoch": 0.08767531675587777, "grad_norm": 12.534863471984863, "learning_rate": 9.925662262988569e-05, "loss": 1.0093, "step": 1294 }, { "epoch": 0.08774307202384986, "grad_norm": 12.050302505493164, "learning_rate": 9.925525361078789e-05, "loss": 1.0591, "step": 1295 }, { "epoch": 0.08781082729182194, "grad_norm": 11.556166648864746, "learning_rate": 9.925388459169007e-05, "loss": 1.1452, "step": 1296 }, { "epoch": 0.08787858255979403, "grad_norm": 9.693270683288574, "learning_rate": 9.925251557259225e-05, "loss": 1.0332, "step": 1297 }, { "epoch": 0.0879463378277661, "grad_norm": 12.646526336669922, "learning_rate": 9.925114655349443e-05, "loss": 1.1894, "step": 1298 }, { "epoch": 0.0880140930957382, "grad_norm": 10.676809310913086, "learning_rate": 9.924977753439661e-05, "loss": 1.1727, "step": 1299 }, { "epoch": 0.08808184836371027, "grad_norm": 11.182327270507812, "learning_rate": 9.92484085152988e-05, "loss": 1.0145, "step": 1300 }, { "epoch": 0.08814960363168237, "grad_norm": 11.55026626586914, "learning_rate": 9.924703949620098e-05, "loss": 1.2187, "step": 1301 }, { "epoch": 0.08821735889965444, "grad_norm": 11.502679824829102, "learning_rate": 9.924567047710316e-05, "loss": 1.3116, "step": 1302 }, { "epoch": 0.08828511416762654, "grad_norm": 9.676247596740723, "learning_rate": 9.924430145800534e-05, "loss": 1.2869, "step": 1303 }, { "epoch": 0.08835286943559861, "grad_norm": 15.123950004577637, "learning_rate": 9.924293243890754e-05, "loss": 1.0225, "step": 1304 }, { "epoch": 0.0884206247035707, "grad_norm": 14.030994415283203, "learning_rate": 9.924156341980972e-05, "loss": 0.9829, "step": 1305 }, { "epoch": 0.08848837997154278, "grad_norm": 10.00402545928955, "learning_rate": 9.92401944007119e-05, "loss": 1.0946, "step": 1306 }, { "epoch": 0.08855613523951487, "grad_norm": 9.077853202819824, "learning_rate": 9.923882538161408e-05, "loss": 0.852, "step": 1307 }, { "epoch": 0.08862389050748695, "grad_norm": 12.777885437011719, "learning_rate": 9.923745636251626e-05, "loss": 1.0513, "step": 1308 }, { "epoch": 0.08869164577545904, "grad_norm": 10.686469078063965, "learning_rate": 9.923608734341845e-05, "loss": 1.0873, "step": 1309 }, { "epoch": 0.08875940104343112, "grad_norm": 11.51689338684082, "learning_rate": 9.923471832432063e-05, "loss": 1.0037, "step": 1310 }, { "epoch": 0.08882715631140321, "grad_norm": 13.259784698486328, "learning_rate": 9.923334930522281e-05, "loss": 1.2004, "step": 1311 }, { "epoch": 0.08889491157937529, "grad_norm": 10.63463306427002, "learning_rate": 9.923198028612499e-05, "loss": 1.2067, "step": 1312 }, { "epoch": 0.08896266684734738, "grad_norm": 10.838210105895996, "learning_rate": 9.923061126702719e-05, "loss": 1.2939, "step": 1313 }, { "epoch": 0.08903042211531946, "grad_norm": 12.058418273925781, "learning_rate": 9.922924224792937e-05, "loss": 1.3091, "step": 1314 }, { "epoch": 0.08909817738329155, "grad_norm": 10.619451522827148, "learning_rate": 9.922787322883155e-05, "loss": 1.1635, "step": 1315 }, { "epoch": 0.08916593265126363, "grad_norm": 9.37607192993164, "learning_rate": 9.922650420973373e-05, "loss": 1.2206, "step": 1316 }, { "epoch": 0.08923368791923572, "grad_norm": 12.119776725769043, "learning_rate": 9.922513519063591e-05, "loss": 1.2159, "step": 1317 }, { "epoch": 0.0893014431872078, "grad_norm": 14.013461112976074, "learning_rate": 9.92237661715381e-05, "loss": 1.1445, "step": 1318 }, { "epoch": 0.08936919845517989, "grad_norm": 11.560707092285156, "learning_rate": 9.922239715244028e-05, "loss": 1.3617, "step": 1319 }, { "epoch": 0.08943695372315197, "grad_norm": 11.817791938781738, "learning_rate": 9.922102813334246e-05, "loss": 1.1931, "step": 1320 }, { "epoch": 0.08950470899112406, "grad_norm": 11.544127464294434, "learning_rate": 9.921965911424464e-05, "loss": 1.0766, "step": 1321 }, { "epoch": 0.08957246425909614, "grad_norm": 10.67740249633789, "learning_rate": 9.921829009514684e-05, "loss": 1.2103, "step": 1322 }, { "epoch": 0.08964021952706823, "grad_norm": 11.774645805358887, "learning_rate": 9.921692107604902e-05, "loss": 1.1921, "step": 1323 }, { "epoch": 0.08970797479504032, "grad_norm": 12.594759941101074, "learning_rate": 9.92155520569512e-05, "loss": 0.906, "step": 1324 }, { "epoch": 0.0897757300630124, "grad_norm": 10.988224983215332, "learning_rate": 9.921418303785338e-05, "loss": 1.4195, "step": 1325 }, { "epoch": 0.08984348533098449, "grad_norm": 12.22718620300293, "learning_rate": 9.921281401875556e-05, "loss": 1.2461, "step": 1326 }, { "epoch": 0.08991124059895657, "grad_norm": 10.964727401733398, "learning_rate": 9.921144499965775e-05, "loss": 1.1254, "step": 1327 }, { "epoch": 0.08997899586692866, "grad_norm": 11.327523231506348, "learning_rate": 9.921007598055993e-05, "loss": 1.145, "step": 1328 }, { "epoch": 0.09004675113490074, "grad_norm": 9.870691299438477, "learning_rate": 9.920870696146211e-05, "loss": 1.2408, "step": 1329 }, { "epoch": 0.09011450640287283, "grad_norm": 11.02373218536377, "learning_rate": 9.920733794236429e-05, "loss": 1.1084, "step": 1330 }, { "epoch": 0.09018226167084491, "grad_norm": 12.300410270690918, "learning_rate": 9.920596892326647e-05, "loss": 1.1542, "step": 1331 }, { "epoch": 0.090250016938817, "grad_norm": 9.832919120788574, "learning_rate": 9.920459990416867e-05, "loss": 1.1195, "step": 1332 }, { "epoch": 0.09031777220678908, "grad_norm": 10.432522773742676, "learning_rate": 9.920323088507085e-05, "loss": 1.1825, "step": 1333 }, { "epoch": 0.09038552747476117, "grad_norm": 11.878792762756348, "learning_rate": 9.920186186597303e-05, "loss": 0.9647, "step": 1334 }, { "epoch": 0.09045328274273325, "grad_norm": 11.866320610046387, "learning_rate": 9.920049284687522e-05, "loss": 1.3598, "step": 1335 }, { "epoch": 0.09052103801070534, "grad_norm": 14.11543083190918, "learning_rate": 9.91991238277774e-05, "loss": 1.4172, "step": 1336 }, { "epoch": 0.09058879327867742, "grad_norm": 13.841622352600098, "learning_rate": 9.919775480867958e-05, "loss": 1.4119, "step": 1337 }, { "epoch": 0.0906565485466495, "grad_norm": 11.077167510986328, "learning_rate": 9.919638578958178e-05, "loss": 0.984, "step": 1338 }, { "epoch": 0.09072430381462158, "grad_norm": 10.904266357421875, "learning_rate": 9.919501677048396e-05, "loss": 1.0439, "step": 1339 }, { "epoch": 0.09079205908259368, "grad_norm": 11.623948097229004, "learning_rate": 9.919364775138614e-05, "loss": 1.0318, "step": 1340 }, { "epoch": 0.09085981435056575, "grad_norm": 10.893725395202637, "learning_rate": 9.919227873228833e-05, "loss": 1.2626, "step": 1341 }, { "epoch": 0.09092756961853785, "grad_norm": 10.064491271972656, "learning_rate": 9.919090971319051e-05, "loss": 1.1483, "step": 1342 }, { "epoch": 0.09099532488650992, "grad_norm": 9.854101181030273, "learning_rate": 9.918954069409269e-05, "loss": 1.1595, "step": 1343 }, { "epoch": 0.09106308015448202, "grad_norm": 11.682498931884766, "learning_rate": 9.918817167499487e-05, "loss": 1.2887, "step": 1344 }, { "epoch": 0.09113083542245409, "grad_norm": 10.484097480773926, "learning_rate": 9.918680265589707e-05, "loss": 1.2, "step": 1345 }, { "epoch": 0.09119859069042618, "grad_norm": 12.332358360290527, "learning_rate": 9.918543363679925e-05, "loss": 1.262, "step": 1346 }, { "epoch": 0.09126634595839826, "grad_norm": 13.706925392150879, "learning_rate": 9.918406461770143e-05, "loss": 1.2151, "step": 1347 }, { "epoch": 0.09133410122637035, "grad_norm": 13.918478965759277, "learning_rate": 9.91826955986036e-05, "loss": 1.4653, "step": 1348 }, { "epoch": 0.09140185649434243, "grad_norm": 11.970015525817871, "learning_rate": 9.918132657950579e-05, "loss": 1.2452, "step": 1349 }, { "epoch": 0.09146961176231452, "grad_norm": 9.698074340820312, "learning_rate": 9.917995756040798e-05, "loss": 0.9142, "step": 1350 }, { "epoch": 0.0915373670302866, "grad_norm": 9.225728988647461, "learning_rate": 9.917858854131016e-05, "loss": 1.0293, "step": 1351 }, { "epoch": 0.09160512229825869, "grad_norm": 10.77661418914795, "learning_rate": 9.917721952221234e-05, "loss": 1.1865, "step": 1352 }, { "epoch": 0.09167287756623077, "grad_norm": 10.45409870147705, "learning_rate": 9.917585050311452e-05, "loss": 1.1553, "step": 1353 }, { "epoch": 0.09174063283420286, "grad_norm": 10.44918441772461, "learning_rate": 9.91744814840167e-05, "loss": 1.1621, "step": 1354 }, { "epoch": 0.09180838810217494, "grad_norm": 14.769590377807617, "learning_rate": 9.91731124649189e-05, "loss": 1.3502, "step": 1355 }, { "epoch": 0.09187614337014703, "grad_norm": 12.733844757080078, "learning_rate": 9.917174344582108e-05, "loss": 1.1973, "step": 1356 }, { "epoch": 0.09194389863811911, "grad_norm": 13.365818977355957, "learning_rate": 9.917037442672326e-05, "loss": 1.5412, "step": 1357 }, { "epoch": 0.0920116539060912, "grad_norm": 11.163050651550293, "learning_rate": 9.916900540762544e-05, "loss": 1.3142, "step": 1358 }, { "epoch": 0.09207940917406328, "grad_norm": 11.420190811157227, "learning_rate": 9.916763638852763e-05, "loss": 1.2257, "step": 1359 }, { "epoch": 0.09214716444203537, "grad_norm": 9.66398811340332, "learning_rate": 9.916626736942981e-05, "loss": 1.0584, "step": 1360 }, { "epoch": 0.09221491971000745, "grad_norm": 13.643363952636719, "learning_rate": 9.916489835033199e-05, "loss": 1.3783, "step": 1361 }, { "epoch": 0.09228267497797954, "grad_norm": 11.658889770507812, "learning_rate": 9.916352933123417e-05, "loss": 1.2126, "step": 1362 }, { "epoch": 0.09235043024595162, "grad_norm": 11.2728271484375, "learning_rate": 9.916216031213635e-05, "loss": 1.2117, "step": 1363 }, { "epoch": 0.09241818551392371, "grad_norm": 13.576864242553711, "learning_rate": 9.916079129303855e-05, "loss": 1.2922, "step": 1364 }, { "epoch": 0.09248594078189579, "grad_norm": 10.950700759887695, "learning_rate": 9.915942227394073e-05, "loss": 1.1394, "step": 1365 }, { "epoch": 0.09255369604986788, "grad_norm": 11.638351440429688, "learning_rate": 9.91580532548429e-05, "loss": 1.3927, "step": 1366 }, { "epoch": 0.09262145131783996, "grad_norm": 12.355545043945312, "learning_rate": 9.915668423574509e-05, "loss": 1.1861, "step": 1367 }, { "epoch": 0.09268920658581205, "grad_norm": 11.543237686157227, "learning_rate": 9.915531521664728e-05, "loss": 0.9868, "step": 1368 }, { "epoch": 0.09275696185378413, "grad_norm": 11.684252738952637, "learning_rate": 9.915394619754946e-05, "loss": 1.3332, "step": 1369 }, { "epoch": 0.09282471712175622, "grad_norm": 10.775650024414062, "learning_rate": 9.915257717845164e-05, "loss": 1.1164, "step": 1370 }, { "epoch": 0.0928924723897283, "grad_norm": 11.649751663208008, "learning_rate": 9.915120815935382e-05, "loss": 1.0369, "step": 1371 }, { "epoch": 0.09296022765770039, "grad_norm": 9.741403579711914, "learning_rate": 9.9149839140256e-05, "loss": 1.0509, "step": 1372 }, { "epoch": 0.09302798292567246, "grad_norm": 13.804118156433105, "learning_rate": 9.91484701211582e-05, "loss": 1.4318, "step": 1373 }, { "epoch": 0.09309573819364456, "grad_norm": 10.939459800720215, "learning_rate": 9.914710110206038e-05, "loss": 1.218, "step": 1374 }, { "epoch": 0.09316349346161663, "grad_norm": 14.076252937316895, "learning_rate": 9.914573208296256e-05, "loss": 1.4699, "step": 1375 }, { "epoch": 0.09323124872958873, "grad_norm": 12.90072250366211, "learning_rate": 9.914436306386474e-05, "loss": 1.3418, "step": 1376 }, { "epoch": 0.09329900399756082, "grad_norm": 12.7711820602417, "learning_rate": 9.914299404476693e-05, "loss": 0.9866, "step": 1377 }, { "epoch": 0.0933667592655329, "grad_norm": 9.205671310424805, "learning_rate": 9.914162502566911e-05, "loss": 1.1345, "step": 1378 }, { "epoch": 0.09343451453350499, "grad_norm": 11.027194023132324, "learning_rate": 9.914025600657129e-05, "loss": 1.2637, "step": 1379 }, { "epoch": 0.09350226980147706, "grad_norm": 12.861044883728027, "learning_rate": 9.913888698747347e-05, "loss": 1.2348, "step": 1380 }, { "epoch": 0.09357002506944916, "grad_norm": 11.266969680786133, "learning_rate": 9.913751796837567e-05, "loss": 1.0773, "step": 1381 }, { "epoch": 0.09363778033742123, "grad_norm": 13.137110710144043, "learning_rate": 9.913614894927785e-05, "loss": 1.0537, "step": 1382 }, { "epoch": 0.09370553560539333, "grad_norm": 11.343362808227539, "learning_rate": 9.913477993018003e-05, "loss": 1.335, "step": 1383 }, { "epoch": 0.0937732908733654, "grad_norm": 11.472663879394531, "learning_rate": 9.913341091108222e-05, "loss": 1.1362, "step": 1384 }, { "epoch": 0.0938410461413375, "grad_norm": 8.441573143005371, "learning_rate": 9.91320418919844e-05, "loss": 1.1646, "step": 1385 }, { "epoch": 0.09390880140930957, "grad_norm": 12.570130348205566, "learning_rate": 9.913067287288658e-05, "loss": 1.3535, "step": 1386 }, { "epoch": 0.09397655667728166, "grad_norm": 11.671664237976074, "learning_rate": 9.912930385378877e-05, "loss": 1.1707, "step": 1387 }, { "epoch": 0.09404431194525374, "grad_norm": 12.638328552246094, "learning_rate": 9.912793483469096e-05, "loss": 1.057, "step": 1388 }, { "epoch": 0.09411206721322583, "grad_norm": 10.506028175354004, "learning_rate": 9.912656581559314e-05, "loss": 0.9279, "step": 1389 }, { "epoch": 0.09417982248119791, "grad_norm": 11.536858558654785, "learning_rate": 9.912519679649532e-05, "loss": 1.3351, "step": 1390 }, { "epoch": 0.09424757774917, "grad_norm": 12.692436218261719, "learning_rate": 9.912382777739751e-05, "loss": 1.164, "step": 1391 }, { "epoch": 0.09431533301714208, "grad_norm": 12.088066101074219, "learning_rate": 9.912245875829969e-05, "loss": 1.2971, "step": 1392 }, { "epoch": 0.09438308828511417, "grad_norm": 12.133123397827148, "learning_rate": 9.912108973920187e-05, "loss": 1.2742, "step": 1393 }, { "epoch": 0.09445084355308625, "grad_norm": 10.168001174926758, "learning_rate": 9.911972072010405e-05, "loss": 1.0964, "step": 1394 }, { "epoch": 0.09451859882105834, "grad_norm": 10.561311721801758, "learning_rate": 9.911835170100623e-05, "loss": 1.1828, "step": 1395 }, { "epoch": 0.09458635408903042, "grad_norm": 11.497330665588379, "learning_rate": 9.911698268190843e-05, "loss": 1.4699, "step": 1396 }, { "epoch": 0.09465410935700251, "grad_norm": 12.190573692321777, "learning_rate": 9.91156136628106e-05, "loss": 1.1601, "step": 1397 }, { "epoch": 0.09472186462497459, "grad_norm": 10.633028030395508, "learning_rate": 9.911424464371279e-05, "loss": 1.333, "step": 1398 }, { "epoch": 0.09478961989294668, "grad_norm": 12.262279510498047, "learning_rate": 9.911287562461497e-05, "loss": 1.2214, "step": 1399 }, { "epoch": 0.09485737516091876, "grad_norm": 11.506840705871582, "learning_rate": 9.911150660551716e-05, "loss": 1.1172, "step": 1400 }, { "epoch": 0.09492513042889085, "grad_norm": 11.453936576843262, "learning_rate": 9.911013758641934e-05, "loss": 1.2205, "step": 1401 }, { "epoch": 0.09499288569686293, "grad_norm": 9.980772972106934, "learning_rate": 9.910876856732152e-05, "loss": 1.1004, "step": 1402 }, { "epoch": 0.09506064096483502, "grad_norm": 11.775416374206543, "learning_rate": 9.91073995482237e-05, "loss": 1.1587, "step": 1403 }, { "epoch": 0.0951283962328071, "grad_norm": 8.840147972106934, "learning_rate": 9.910603052912588e-05, "loss": 0.8702, "step": 1404 }, { "epoch": 0.09519615150077919, "grad_norm": 10.938506126403809, "learning_rate": 9.910466151002808e-05, "loss": 1.085, "step": 1405 }, { "epoch": 0.09526390676875127, "grad_norm": 11.733402252197266, "learning_rate": 9.910329249093026e-05, "loss": 1.1202, "step": 1406 }, { "epoch": 0.09533166203672336, "grad_norm": 11.616521835327148, "learning_rate": 9.910192347183244e-05, "loss": 1.3483, "step": 1407 }, { "epoch": 0.09539941730469544, "grad_norm": 12.477338790893555, "learning_rate": 9.910055445273462e-05, "loss": 1.4798, "step": 1408 }, { "epoch": 0.09546717257266753, "grad_norm": 11.233193397521973, "learning_rate": 9.90991854336368e-05, "loss": 1.0304, "step": 1409 }, { "epoch": 0.0955349278406396, "grad_norm": 12.586124420166016, "learning_rate": 9.909781641453899e-05, "loss": 1.2216, "step": 1410 }, { "epoch": 0.0956026831086117, "grad_norm": 12.974738121032715, "learning_rate": 9.909644739544117e-05, "loss": 1.1495, "step": 1411 }, { "epoch": 0.09567043837658377, "grad_norm": 9.613628387451172, "learning_rate": 9.909507837634335e-05, "loss": 0.8326, "step": 1412 }, { "epoch": 0.09573819364455587, "grad_norm": 10.644312858581543, "learning_rate": 9.909370935724553e-05, "loss": 1.0182, "step": 1413 }, { "epoch": 0.09580594891252794, "grad_norm": 11.155874252319336, "learning_rate": 9.909234033814773e-05, "loss": 1.2887, "step": 1414 }, { "epoch": 0.09587370418050004, "grad_norm": 12.068909645080566, "learning_rate": 9.90909713190499e-05, "loss": 1.1697, "step": 1415 }, { "epoch": 0.09594145944847211, "grad_norm": 10.66831111907959, "learning_rate": 9.908960229995209e-05, "loss": 1.1275, "step": 1416 }, { "epoch": 0.0960092147164442, "grad_norm": 11.80036449432373, "learning_rate": 9.908823328085427e-05, "loss": 1.38, "step": 1417 }, { "epoch": 0.09607696998441628, "grad_norm": 11.677534103393555, "learning_rate": 9.908686426175645e-05, "loss": 1.075, "step": 1418 }, { "epoch": 0.09614472525238837, "grad_norm": 10.54027271270752, "learning_rate": 9.908549524265864e-05, "loss": 0.9617, "step": 1419 }, { "epoch": 0.09621248052036045, "grad_norm": 9.70718002319336, "learning_rate": 9.908412622356082e-05, "loss": 1.0395, "step": 1420 }, { "epoch": 0.09628023578833254, "grad_norm": 10.439559936523438, "learning_rate": 9.9082757204463e-05, "loss": 1.4112, "step": 1421 }, { "epoch": 0.09634799105630462, "grad_norm": 9.328675270080566, "learning_rate": 9.908138818536518e-05, "loss": 1.0481, "step": 1422 }, { "epoch": 0.09641574632427671, "grad_norm": 12.834508895874023, "learning_rate": 9.908001916626738e-05, "loss": 1.0863, "step": 1423 }, { "epoch": 0.09648350159224879, "grad_norm": 11.885201454162598, "learning_rate": 9.907865014716956e-05, "loss": 1.3509, "step": 1424 }, { "epoch": 0.09655125686022088, "grad_norm": 11.299174308776855, "learning_rate": 9.907728112807174e-05, "loss": 1.1328, "step": 1425 }, { "epoch": 0.09661901212819296, "grad_norm": 13.024226188659668, "learning_rate": 9.907591210897392e-05, "loss": 1.1495, "step": 1426 }, { "epoch": 0.09668676739616505, "grad_norm": 13.418682098388672, "learning_rate": 9.907454308987611e-05, "loss": 1.3827, "step": 1427 }, { "epoch": 0.09675452266413713, "grad_norm": 11.28375244140625, "learning_rate": 9.907317407077829e-05, "loss": 1.3658, "step": 1428 }, { "epoch": 0.09682227793210922, "grad_norm": 9.711199760437012, "learning_rate": 9.907180505168047e-05, "loss": 0.8743, "step": 1429 }, { "epoch": 0.09689003320008131, "grad_norm": 12.292948722839355, "learning_rate": 9.907043603258267e-05, "loss": 1.203, "step": 1430 }, { "epoch": 0.09695778846805339, "grad_norm": 13.195072174072266, "learning_rate": 9.906906701348485e-05, "loss": 1.0403, "step": 1431 }, { "epoch": 0.09702554373602548, "grad_norm": 11.45721435546875, "learning_rate": 9.906769799438703e-05, "loss": 1.2075, "step": 1432 }, { "epoch": 0.09709329900399756, "grad_norm": 10.477989196777344, "learning_rate": 9.906632897528922e-05, "loss": 1.1646, "step": 1433 }, { "epoch": 0.09716105427196965, "grad_norm": 12.572269439697266, "learning_rate": 9.90649599561914e-05, "loss": 1.3944, "step": 1434 }, { "epoch": 0.09722880953994173, "grad_norm": 9.37205982208252, "learning_rate": 9.906359093709358e-05, "loss": 0.9902, "step": 1435 }, { "epoch": 0.09729656480791382, "grad_norm": 11.590779304504395, "learning_rate": 9.906222191799576e-05, "loss": 1.2658, "step": 1436 }, { "epoch": 0.0973643200758859, "grad_norm": 10.35207748413086, "learning_rate": 9.906085289889795e-05, "loss": 0.7905, "step": 1437 }, { "epoch": 0.09743207534385799, "grad_norm": 9.993937492370605, "learning_rate": 9.905948387980013e-05, "loss": 1.1522, "step": 1438 }, { "epoch": 0.09749983061183007, "grad_norm": 9.865569114685059, "learning_rate": 9.905811486070232e-05, "loss": 1.2536, "step": 1439 }, { "epoch": 0.09756758587980216, "grad_norm": 12.836588859558105, "learning_rate": 9.90567458416045e-05, "loss": 1.2216, "step": 1440 }, { "epoch": 0.09763534114777424, "grad_norm": 10.062298774719238, "learning_rate": 9.905537682250668e-05, "loss": 1.1576, "step": 1441 }, { "epoch": 0.09770309641574633, "grad_norm": 10.897071838378906, "learning_rate": 9.905400780340887e-05, "loss": 1.1315, "step": 1442 }, { "epoch": 0.09777085168371841, "grad_norm": 10.366122245788574, "learning_rate": 9.905263878431105e-05, "loss": 1.274, "step": 1443 }, { "epoch": 0.0978386069516905, "grad_norm": 11.632966995239258, "learning_rate": 9.905126976521323e-05, "loss": 1.1427, "step": 1444 }, { "epoch": 0.09790636221966258, "grad_norm": 10.537737846374512, "learning_rate": 9.904990074611541e-05, "loss": 1.0913, "step": 1445 }, { "epoch": 0.09797411748763467, "grad_norm": 9.52363109588623, "learning_rate": 9.90485317270176e-05, "loss": 0.8677, "step": 1446 }, { "epoch": 0.09804187275560675, "grad_norm": 11.511491775512695, "learning_rate": 9.904716270791979e-05, "loss": 1.0381, "step": 1447 }, { "epoch": 0.09810962802357884, "grad_norm": 12.085793495178223, "learning_rate": 9.904579368882197e-05, "loss": 1.1415, "step": 1448 }, { "epoch": 0.09817738329155092, "grad_norm": 8.665430068969727, "learning_rate": 9.904442466972415e-05, "loss": 1.0367, "step": 1449 }, { "epoch": 0.09824513855952301, "grad_norm": 10.900618553161621, "learning_rate": 9.904305565062633e-05, "loss": 0.9835, "step": 1450 }, { "epoch": 0.09831289382749508, "grad_norm": 10.3113431930542, "learning_rate": 9.904168663152852e-05, "loss": 0.924, "step": 1451 }, { "epoch": 0.09838064909546718, "grad_norm": 10.001591682434082, "learning_rate": 9.90403176124307e-05, "loss": 1.0835, "step": 1452 }, { "epoch": 0.09844840436343925, "grad_norm": 11.333273887634277, "learning_rate": 9.903894859333288e-05, "loss": 1.072, "step": 1453 }, { "epoch": 0.09851615963141135, "grad_norm": 10.107904434204102, "learning_rate": 9.903757957423506e-05, "loss": 1.0848, "step": 1454 }, { "epoch": 0.09858391489938342, "grad_norm": 12.578730583190918, "learning_rate": 9.903621055513725e-05, "loss": 1.2735, "step": 1455 }, { "epoch": 0.09865167016735552, "grad_norm": 10.453478813171387, "learning_rate": 9.903484153603944e-05, "loss": 1.3141, "step": 1456 }, { "epoch": 0.09871942543532759, "grad_norm": 10.383566856384277, "learning_rate": 9.903347251694162e-05, "loss": 1.0992, "step": 1457 }, { "epoch": 0.09878718070329968, "grad_norm": 9.612902641296387, "learning_rate": 9.90321034978438e-05, "loss": 1.3103, "step": 1458 }, { "epoch": 0.09885493597127176, "grad_norm": 12.111359596252441, "learning_rate": 9.903073447874598e-05, "loss": 1.3102, "step": 1459 }, { "epoch": 0.09892269123924385, "grad_norm": 9.987195014953613, "learning_rate": 9.902936545964817e-05, "loss": 1.1961, "step": 1460 }, { "epoch": 0.09899044650721593, "grad_norm": 10.900408744812012, "learning_rate": 9.902799644055035e-05, "loss": 1.0208, "step": 1461 }, { "epoch": 0.09905820177518802, "grad_norm": 9.94915771484375, "learning_rate": 9.902662742145253e-05, "loss": 1.3347, "step": 1462 }, { "epoch": 0.0991259570431601, "grad_norm": 13.393661499023438, "learning_rate": 9.902525840235471e-05, "loss": 1.2903, "step": 1463 }, { "epoch": 0.09919371231113219, "grad_norm": 10.122967720031738, "learning_rate": 9.902388938325689e-05, "loss": 1.0229, "step": 1464 }, { "epoch": 0.09926146757910427, "grad_norm": 10.775031089782715, "learning_rate": 9.902252036415909e-05, "loss": 1.1669, "step": 1465 }, { "epoch": 0.09932922284707636, "grad_norm": 9.733497619628906, "learning_rate": 9.902115134506127e-05, "loss": 1.0903, "step": 1466 }, { "epoch": 0.09939697811504844, "grad_norm": 9.230277061462402, "learning_rate": 9.901978232596345e-05, "loss": 1.1738, "step": 1467 }, { "epoch": 0.09946473338302053, "grad_norm": 10.822884559631348, "learning_rate": 9.901841330686563e-05, "loss": 1.3392, "step": 1468 }, { "epoch": 0.09953248865099261, "grad_norm": 10.64195442199707, "learning_rate": 9.901704428776782e-05, "loss": 1.0823, "step": 1469 }, { "epoch": 0.0996002439189647, "grad_norm": 13.73645305633545, "learning_rate": 9.901567526867e-05, "loss": 1.1128, "step": 1470 }, { "epoch": 0.09966799918693678, "grad_norm": 11.361958503723145, "learning_rate": 9.901430624957218e-05, "loss": 1.083, "step": 1471 }, { "epoch": 0.09973575445490887, "grad_norm": 10.839045524597168, "learning_rate": 9.901293723047436e-05, "loss": 1.0288, "step": 1472 }, { "epoch": 0.09980350972288095, "grad_norm": 10.41995906829834, "learning_rate": 9.901156821137656e-05, "loss": 1.0631, "step": 1473 }, { "epoch": 0.09987126499085304, "grad_norm": 11.87709903717041, "learning_rate": 9.901019919227874e-05, "loss": 1.0715, "step": 1474 }, { "epoch": 0.09993902025882512, "grad_norm": 10.46670913696289, "learning_rate": 9.900883017318092e-05, "loss": 1.1684, "step": 1475 }, { "epoch": 0.10000677552679721, "grad_norm": 12.163457870483398, "learning_rate": 9.900746115408311e-05, "loss": 1.4416, "step": 1476 }, { "epoch": 0.10007453079476929, "grad_norm": 13.417581558227539, "learning_rate": 9.900609213498529e-05, "loss": 1.1876, "step": 1477 }, { "epoch": 0.10014228606274138, "grad_norm": 11.35722541809082, "learning_rate": 9.900472311588747e-05, "loss": 1.1389, "step": 1478 }, { "epoch": 0.10021004133071346, "grad_norm": 10.042820930480957, "learning_rate": 9.900335409678966e-05, "loss": 1.2156, "step": 1479 }, { "epoch": 0.10027779659868555, "grad_norm": 10.823782920837402, "learning_rate": 9.900198507769184e-05, "loss": 1.1915, "step": 1480 }, { "epoch": 0.10034555186665763, "grad_norm": 13.6808443069458, "learning_rate": 9.900061605859403e-05, "loss": 1.1724, "step": 1481 }, { "epoch": 0.10041330713462972, "grad_norm": 11.16846752166748, "learning_rate": 9.89992470394962e-05, "loss": 1.2093, "step": 1482 }, { "epoch": 0.10048106240260181, "grad_norm": 10.391450881958008, "learning_rate": 9.89978780203984e-05, "loss": 1.142, "step": 1483 }, { "epoch": 0.10054881767057389, "grad_norm": 9.324288368225098, "learning_rate": 9.899650900130058e-05, "loss": 1.0579, "step": 1484 }, { "epoch": 0.10061657293854598, "grad_norm": 12.601625442504883, "learning_rate": 9.899513998220276e-05, "loss": 1.2643, "step": 1485 }, { "epoch": 0.10068432820651806, "grad_norm": 14.270779609680176, "learning_rate": 9.899377096310494e-05, "loss": 1.2903, "step": 1486 }, { "epoch": 0.10075208347449015, "grad_norm": 11.521232604980469, "learning_rate": 9.899240194400712e-05, "loss": 1.2385, "step": 1487 }, { "epoch": 0.10081983874246223, "grad_norm": 10.76693344116211, "learning_rate": 9.899103292490931e-05, "loss": 1.1745, "step": 1488 }, { "epoch": 0.10088759401043432, "grad_norm": 9.091184616088867, "learning_rate": 9.89896639058115e-05, "loss": 0.9571, "step": 1489 }, { "epoch": 0.1009553492784064, "grad_norm": 11.930106163024902, "learning_rate": 9.898829488671368e-05, "loss": 1.1226, "step": 1490 }, { "epoch": 0.10102310454637849, "grad_norm": 10.90937614440918, "learning_rate": 9.898692586761586e-05, "loss": 1.0776, "step": 1491 }, { "epoch": 0.10109085981435056, "grad_norm": 10.618545532226562, "learning_rate": 9.898555684851805e-05, "loss": 1.1251, "step": 1492 }, { "epoch": 0.10115861508232266, "grad_norm": 10.228861808776855, "learning_rate": 9.898418782942023e-05, "loss": 1.1987, "step": 1493 }, { "epoch": 0.10122637035029473, "grad_norm": 8.807862281799316, "learning_rate": 9.898281881032241e-05, "loss": 0.9339, "step": 1494 }, { "epoch": 0.10129412561826683, "grad_norm": 11.24593448638916, "learning_rate": 9.898144979122459e-05, "loss": 1.1495, "step": 1495 }, { "epoch": 0.1013618808862389, "grad_norm": 11.192438125610352, "learning_rate": 9.898008077212677e-05, "loss": 1.1361, "step": 1496 }, { "epoch": 0.101429636154211, "grad_norm": 10.440075874328613, "learning_rate": 9.897871175302896e-05, "loss": 1.2273, "step": 1497 }, { "epoch": 0.10149739142218307, "grad_norm": 11.103675842285156, "learning_rate": 9.897734273393115e-05, "loss": 1.1308, "step": 1498 }, { "epoch": 0.10156514669015516, "grad_norm": 9.78297233581543, "learning_rate": 9.897597371483333e-05, "loss": 1.0907, "step": 1499 }, { "epoch": 0.10163290195812724, "grad_norm": 10.98086166381836, "learning_rate": 9.89746046957355e-05, "loss": 0.9743, "step": 1500 }, { "epoch": 0.10170065722609933, "grad_norm": 9.268783569335938, "learning_rate": 9.89732356766377e-05, "loss": 0.8917, "step": 1501 }, { "epoch": 0.10176841249407141, "grad_norm": 12.674605369567871, "learning_rate": 9.897186665753988e-05, "loss": 1.2247, "step": 1502 }, { "epoch": 0.1018361677620435, "grad_norm": 10.987565040588379, "learning_rate": 9.897049763844206e-05, "loss": 1.0688, "step": 1503 }, { "epoch": 0.10190392303001558, "grad_norm": 16.014053344726562, "learning_rate": 9.896912861934424e-05, "loss": 0.8334, "step": 1504 }, { "epoch": 0.10197167829798767, "grad_norm": 11.119991302490234, "learning_rate": 9.896775960024642e-05, "loss": 1.3176, "step": 1505 }, { "epoch": 0.10203943356595975, "grad_norm": 13.23279094696045, "learning_rate": 9.896639058114861e-05, "loss": 1.1625, "step": 1506 }, { "epoch": 0.10210718883393184, "grad_norm": 9.3678560256958, "learning_rate": 9.89650215620508e-05, "loss": 1.2774, "step": 1507 }, { "epoch": 0.10217494410190392, "grad_norm": 10.829100608825684, "learning_rate": 9.896365254295298e-05, "loss": 1.2423, "step": 1508 }, { "epoch": 0.10224269936987601, "grad_norm": 12.12694263458252, "learning_rate": 9.896228352385516e-05, "loss": 1.2276, "step": 1509 }, { "epoch": 0.10231045463784809, "grad_norm": 11.626548767089844, "learning_rate": 9.896091450475735e-05, "loss": 1.0871, "step": 1510 }, { "epoch": 0.10237820990582018, "grad_norm": 11.388608932495117, "learning_rate": 9.895954548565953e-05, "loss": 1.5582, "step": 1511 }, { "epoch": 0.10244596517379226, "grad_norm": 9.463730812072754, "learning_rate": 9.895817646656171e-05, "loss": 1.1066, "step": 1512 }, { "epoch": 0.10251372044176435, "grad_norm": 10.291573524475098, "learning_rate": 9.895680744746389e-05, "loss": 1.4026, "step": 1513 }, { "epoch": 0.10258147570973643, "grad_norm": 9.778963088989258, "learning_rate": 9.895543842836607e-05, "loss": 1.1109, "step": 1514 }, { "epoch": 0.10264923097770852, "grad_norm": 9.685966491699219, "learning_rate": 9.895406940926827e-05, "loss": 1.1633, "step": 1515 }, { "epoch": 0.1027169862456806, "grad_norm": 10.76310920715332, "learning_rate": 9.895270039017045e-05, "loss": 1.0813, "step": 1516 }, { "epoch": 0.10278474151365269, "grad_norm": 9.795347213745117, "learning_rate": 9.895133137107263e-05, "loss": 1.2079, "step": 1517 }, { "epoch": 0.10285249678162477, "grad_norm": 9.980990409851074, "learning_rate": 9.89499623519748e-05, "loss": 1.0701, "step": 1518 }, { "epoch": 0.10292025204959686, "grad_norm": 9.682209014892578, "learning_rate": 9.894859333287699e-05, "loss": 1.4133, "step": 1519 }, { "epoch": 0.10298800731756894, "grad_norm": 10.632065773010254, "learning_rate": 9.894722431377918e-05, "loss": 1.0631, "step": 1520 }, { "epoch": 0.10305576258554103, "grad_norm": 10.099474906921387, "learning_rate": 9.894585529468136e-05, "loss": 1.2015, "step": 1521 }, { "epoch": 0.1031235178535131, "grad_norm": 8.289199829101562, "learning_rate": 9.894448627558354e-05, "loss": 1.3009, "step": 1522 }, { "epoch": 0.1031912731214852, "grad_norm": 9.403796195983887, "learning_rate": 9.894311725648573e-05, "loss": 1.113, "step": 1523 }, { "epoch": 0.10325902838945727, "grad_norm": 12.6613130569458, "learning_rate": 9.894174823738792e-05, "loss": 0.9394, "step": 1524 }, { "epoch": 0.10332678365742937, "grad_norm": 9.85255241394043, "learning_rate": 9.894037921829011e-05, "loss": 1.1007, "step": 1525 }, { "epoch": 0.10339453892540144, "grad_norm": 11.918173789978027, "learning_rate": 9.893901019919229e-05, "loss": 1.1817, "step": 1526 }, { "epoch": 0.10346229419337354, "grad_norm": 9.994447708129883, "learning_rate": 9.893764118009447e-05, "loss": 1.2522, "step": 1527 }, { "epoch": 0.10353004946134561, "grad_norm": 9.879289627075195, "learning_rate": 9.893627216099665e-05, "loss": 1.152, "step": 1528 }, { "epoch": 0.1035978047293177, "grad_norm": 10.103482246398926, "learning_rate": 9.893490314189884e-05, "loss": 1.2227, "step": 1529 }, { "epoch": 0.10366555999728978, "grad_norm": 11.173476219177246, "learning_rate": 9.893353412280102e-05, "loss": 1.2188, "step": 1530 }, { "epoch": 0.10373331526526187, "grad_norm": 11.540877342224121, "learning_rate": 9.89321651037032e-05, "loss": 1.1669, "step": 1531 }, { "epoch": 0.10380107053323395, "grad_norm": 10.706154823303223, "learning_rate": 9.893079608460539e-05, "loss": 1.2771, "step": 1532 }, { "epoch": 0.10386882580120604, "grad_norm": 11.781739234924316, "learning_rate": 9.892942706550758e-05, "loss": 1.0501, "step": 1533 }, { "epoch": 0.10393658106917812, "grad_norm": 10.482099533081055, "learning_rate": 9.892805804640976e-05, "loss": 1.2327, "step": 1534 }, { "epoch": 0.10400433633715021, "grad_norm": 12.450867652893066, "learning_rate": 9.892668902731194e-05, "loss": 1.3575, "step": 1535 }, { "epoch": 0.1040720916051223, "grad_norm": 10.236811637878418, "learning_rate": 9.892532000821412e-05, "loss": 1.2559, "step": 1536 }, { "epoch": 0.10413984687309438, "grad_norm": 9.311124801635742, "learning_rate": 9.89239509891163e-05, "loss": 1.1143, "step": 1537 }, { "epoch": 0.10420760214106647, "grad_norm": 9.182706832885742, "learning_rate": 9.89225819700185e-05, "loss": 0.8875, "step": 1538 }, { "epoch": 0.10427535740903855, "grad_norm": 12.762700080871582, "learning_rate": 9.892121295092067e-05, "loss": 1.0552, "step": 1539 }, { "epoch": 0.10434311267701064, "grad_norm": 12.222203254699707, "learning_rate": 9.891984393182285e-05, "loss": 1.3185, "step": 1540 }, { "epoch": 0.10441086794498272, "grad_norm": 11.45807933807373, "learning_rate": 9.891847491272504e-05, "loss": 1.2952, "step": 1541 }, { "epoch": 0.10447862321295481, "grad_norm": 10.445068359375, "learning_rate": 9.891710589362722e-05, "loss": 1.1637, "step": 1542 }, { "epoch": 0.10454637848092689, "grad_norm": 11.758063316345215, "learning_rate": 9.891573687452941e-05, "loss": 1.3202, "step": 1543 }, { "epoch": 0.10461413374889898, "grad_norm": 10.176533699035645, "learning_rate": 9.891436785543159e-05, "loss": 1.1762, "step": 1544 }, { "epoch": 0.10468188901687106, "grad_norm": 10.133155822753906, "learning_rate": 9.891299883633377e-05, "loss": 1.2226, "step": 1545 }, { "epoch": 0.10474964428484315, "grad_norm": 9.883895874023438, "learning_rate": 9.891162981723595e-05, "loss": 0.9687, "step": 1546 }, { "epoch": 0.10481739955281523, "grad_norm": 13.175050735473633, "learning_rate": 9.891026079813814e-05, "loss": 1.2075, "step": 1547 }, { "epoch": 0.10488515482078732, "grad_norm": 11.183597564697266, "learning_rate": 9.890889177904032e-05, "loss": 1.0917, "step": 1548 }, { "epoch": 0.1049529100887594, "grad_norm": 10.135035514831543, "learning_rate": 9.89075227599425e-05, "loss": 1.0087, "step": 1549 }, { "epoch": 0.10502066535673149, "grad_norm": 16.02760887145996, "learning_rate": 9.890615374084469e-05, "loss": 1.2015, "step": 1550 }, { "epoch": 0.10508842062470357, "grad_norm": 11.255363464355469, "learning_rate": 9.890478472174687e-05, "loss": 1.1106, "step": 1551 }, { "epoch": 0.10515617589267566, "grad_norm": 10.740998268127441, "learning_rate": 9.890341570264906e-05, "loss": 1.1835, "step": 1552 }, { "epoch": 0.10522393116064774, "grad_norm": 11.84919548034668, "learning_rate": 9.890204668355124e-05, "loss": 1.1655, "step": 1553 }, { "epoch": 0.10529168642861983, "grad_norm": 12.041108131408691, "learning_rate": 9.890067766445342e-05, "loss": 1.2544, "step": 1554 }, { "epoch": 0.10535944169659191, "grad_norm": 8.966646194458008, "learning_rate": 9.88993086453556e-05, "loss": 1.098, "step": 1555 }, { "epoch": 0.105427196964564, "grad_norm": 11.838338851928711, "learning_rate": 9.88979396262578e-05, "loss": 1.1774, "step": 1556 }, { "epoch": 0.10549495223253608, "grad_norm": 11.186326026916504, "learning_rate": 9.889657060715997e-05, "loss": 1.1778, "step": 1557 }, { "epoch": 0.10556270750050817, "grad_norm": 9.448702812194824, "learning_rate": 9.889520158806216e-05, "loss": 1.0171, "step": 1558 }, { "epoch": 0.10563046276848025, "grad_norm": 10.931096076965332, "learning_rate": 9.889383256896434e-05, "loss": 1.1797, "step": 1559 }, { "epoch": 0.10569821803645234, "grad_norm": 10.293981552124023, "learning_rate": 9.889246354986652e-05, "loss": 1.0723, "step": 1560 }, { "epoch": 0.10576597330442442, "grad_norm": 10.052331924438477, "learning_rate": 9.889109453076871e-05, "loss": 0.831, "step": 1561 }, { "epoch": 0.10583372857239651, "grad_norm": 10.593210220336914, "learning_rate": 9.888972551167089e-05, "loss": 1.3415, "step": 1562 }, { "epoch": 0.10590148384036858, "grad_norm": 11.580954551696777, "learning_rate": 9.888835649257307e-05, "loss": 1.1522, "step": 1563 }, { "epoch": 0.10596923910834068, "grad_norm": 11.495551109313965, "learning_rate": 9.888698747347525e-05, "loss": 1.2515, "step": 1564 }, { "epoch": 0.10603699437631275, "grad_norm": 10.543874740600586, "learning_rate": 9.888561845437743e-05, "loss": 1.1579, "step": 1565 }, { "epoch": 0.10610474964428485, "grad_norm": 10.588164329528809, "learning_rate": 9.888424943527963e-05, "loss": 1.3825, "step": 1566 }, { "epoch": 0.10617250491225692, "grad_norm": 13.591666221618652, "learning_rate": 9.88828804161818e-05, "loss": 1.0941, "step": 1567 }, { "epoch": 0.10624026018022902, "grad_norm": 10.866951942443848, "learning_rate": 9.888151139708399e-05, "loss": 1.478, "step": 1568 }, { "epoch": 0.10630801544820109, "grad_norm": 9.627554893493652, "learning_rate": 9.888014237798618e-05, "loss": 1.004, "step": 1569 }, { "epoch": 0.10637577071617318, "grad_norm": 10.871118545532227, "learning_rate": 9.887877335888836e-05, "loss": 1.1997, "step": 1570 }, { "epoch": 0.10644352598414526, "grad_norm": 10.74503231048584, "learning_rate": 9.887740433979054e-05, "loss": 1.005, "step": 1571 }, { "epoch": 0.10651128125211735, "grad_norm": 11.119452476501465, "learning_rate": 9.887603532069273e-05, "loss": 1.1168, "step": 1572 }, { "epoch": 0.10657903652008943, "grad_norm": 10.601544380187988, "learning_rate": 9.887466630159491e-05, "loss": 1.0256, "step": 1573 }, { "epoch": 0.10664679178806152, "grad_norm": 10.329113006591797, "learning_rate": 9.88732972824971e-05, "loss": 1.3432, "step": 1574 }, { "epoch": 0.1067145470560336, "grad_norm": 9.973999977111816, "learning_rate": 9.887192826339929e-05, "loss": 0.9961, "step": 1575 }, { "epoch": 0.10678230232400569, "grad_norm": 10.981974601745605, "learning_rate": 9.887055924430147e-05, "loss": 1.3936, "step": 1576 }, { "epoch": 0.10685005759197777, "grad_norm": 10.953417778015137, "learning_rate": 9.886919022520365e-05, "loss": 0.9669, "step": 1577 }, { "epoch": 0.10691781285994986, "grad_norm": 10.656323432922363, "learning_rate": 9.886782120610583e-05, "loss": 1.1393, "step": 1578 }, { "epoch": 0.10698556812792194, "grad_norm": 9.434617042541504, "learning_rate": 9.886645218700802e-05, "loss": 1.1813, "step": 1579 }, { "epoch": 0.10705332339589403, "grad_norm": 12.334831237792969, "learning_rate": 9.88650831679102e-05, "loss": 1.453, "step": 1580 }, { "epoch": 0.10712107866386611, "grad_norm": 9.892403602600098, "learning_rate": 9.886371414881238e-05, "loss": 1.1373, "step": 1581 }, { "epoch": 0.1071888339318382, "grad_norm": 9.248678207397461, "learning_rate": 9.886234512971456e-05, "loss": 1.0734, "step": 1582 }, { "epoch": 0.10725658919981028, "grad_norm": 10.317010879516602, "learning_rate": 9.886097611061675e-05, "loss": 1.1252, "step": 1583 }, { "epoch": 0.10732434446778237, "grad_norm": 9.586435317993164, "learning_rate": 9.885960709151894e-05, "loss": 1.1705, "step": 1584 }, { "epoch": 0.10739209973575445, "grad_norm": 8.883166313171387, "learning_rate": 9.885823807242112e-05, "loss": 1.3164, "step": 1585 }, { "epoch": 0.10745985500372654, "grad_norm": 12.870014190673828, "learning_rate": 9.88568690533233e-05, "loss": 1.2187, "step": 1586 }, { "epoch": 0.10752761027169863, "grad_norm": 10.858057975769043, "learning_rate": 9.885550003422548e-05, "loss": 1.2334, "step": 1587 }, { "epoch": 0.10759536553967071, "grad_norm": 9.855050086975098, "learning_rate": 9.885413101512767e-05, "loss": 1.068, "step": 1588 }, { "epoch": 0.1076631208076428, "grad_norm": 12.256099700927734, "learning_rate": 9.885276199602985e-05, "loss": 1.2481, "step": 1589 }, { "epoch": 0.10773087607561488, "grad_norm": 9.756118774414062, "learning_rate": 9.885139297693203e-05, "loss": 1.2365, "step": 1590 }, { "epoch": 0.10779863134358697, "grad_norm": 11.727996826171875, "learning_rate": 9.885002395783421e-05, "loss": 1.3839, "step": 1591 }, { "epoch": 0.10786638661155905, "grad_norm": 11.58463191986084, "learning_rate": 9.88486549387364e-05, "loss": 0.937, "step": 1592 }, { "epoch": 0.10793414187953114, "grad_norm": 12.663015365600586, "learning_rate": 9.884728591963859e-05, "loss": 1.3453, "step": 1593 }, { "epoch": 0.10800189714750322, "grad_norm": 10.950531005859375, "learning_rate": 9.884591690054077e-05, "loss": 1.1345, "step": 1594 }, { "epoch": 0.10806965241547531, "grad_norm": 10.361424446105957, "learning_rate": 9.884454788144295e-05, "loss": 1.2576, "step": 1595 }, { "epoch": 0.10813740768344739, "grad_norm": 11.155044555664062, "learning_rate": 9.884317886234513e-05, "loss": 1.1253, "step": 1596 }, { "epoch": 0.10820516295141948, "grad_norm": 12.457600593566895, "learning_rate": 9.884180984324731e-05, "loss": 1.0959, "step": 1597 }, { "epoch": 0.10827291821939156, "grad_norm": 13.494983673095703, "learning_rate": 9.88404408241495e-05, "loss": 1.1671, "step": 1598 }, { "epoch": 0.10834067348736365, "grad_norm": 11.870648384094238, "learning_rate": 9.883907180505168e-05, "loss": 1.2121, "step": 1599 }, { "epoch": 0.10840842875533573, "grad_norm": 10.327920913696289, "learning_rate": 9.883770278595387e-05, "loss": 1.1319, "step": 1600 }, { "epoch": 0.10847618402330782, "grad_norm": 9.578879356384277, "learning_rate": 9.883633376685605e-05, "loss": 0.9064, "step": 1601 }, { "epoch": 0.1085439392912799, "grad_norm": 9.598779678344727, "learning_rate": 9.883496474775824e-05, "loss": 1.1119, "step": 1602 }, { "epoch": 0.10861169455925199, "grad_norm": 13.522628784179688, "learning_rate": 9.883359572866042e-05, "loss": 1.2549, "step": 1603 }, { "epoch": 0.10867944982722406, "grad_norm": 8.123127937316895, "learning_rate": 9.88322267095626e-05, "loss": 0.8778, "step": 1604 }, { "epoch": 0.10874720509519616, "grad_norm": 13.100455284118652, "learning_rate": 9.883085769046478e-05, "loss": 1.2023, "step": 1605 }, { "epoch": 0.10881496036316823, "grad_norm": 9.897802352905273, "learning_rate": 9.882948867136696e-05, "loss": 1.2094, "step": 1606 }, { "epoch": 0.10888271563114033, "grad_norm": 8.55170726776123, "learning_rate": 9.882811965226915e-05, "loss": 1.0407, "step": 1607 }, { "epoch": 0.1089504708991124, "grad_norm": 10.867438316345215, "learning_rate": 9.882675063317133e-05, "loss": 1.3021, "step": 1608 }, { "epoch": 0.1090182261670845, "grad_norm": 11.062238693237305, "learning_rate": 9.882538161407352e-05, "loss": 1.2627, "step": 1609 }, { "epoch": 0.10908598143505657, "grad_norm": 12.710458755493164, "learning_rate": 9.88240125949757e-05, "loss": 1.6196, "step": 1610 }, { "epoch": 0.10915373670302866, "grad_norm": 10.986543655395508, "learning_rate": 9.882264357587789e-05, "loss": 1.2021, "step": 1611 }, { "epoch": 0.10922149197100074, "grad_norm": 12.792850494384766, "learning_rate": 9.882127455678007e-05, "loss": 1.1246, "step": 1612 }, { "epoch": 0.10928924723897283, "grad_norm": 12.120078086853027, "learning_rate": 9.881990553768225e-05, "loss": 1.2522, "step": 1613 }, { "epoch": 0.10935700250694491, "grad_norm": 10.562193870544434, "learning_rate": 9.881853651858443e-05, "loss": 1.1791, "step": 1614 }, { "epoch": 0.109424757774917, "grad_norm": 13.017698287963867, "learning_rate": 9.881716749948662e-05, "loss": 1.0733, "step": 1615 }, { "epoch": 0.10949251304288908, "grad_norm": 10.904980659484863, "learning_rate": 9.88157984803888e-05, "loss": 1.0166, "step": 1616 }, { "epoch": 0.10956026831086117, "grad_norm": 12.332829475402832, "learning_rate": 9.881442946129099e-05, "loss": 1.0688, "step": 1617 }, { "epoch": 0.10962802357883325, "grad_norm": 12.238106727600098, "learning_rate": 9.881306044219318e-05, "loss": 0.9869, "step": 1618 }, { "epoch": 0.10969577884680534, "grad_norm": 12.75059986114502, "learning_rate": 9.881169142309536e-05, "loss": 1.5365, "step": 1619 }, { "epoch": 0.10976353411477742, "grad_norm": 11.019241333007812, "learning_rate": 9.881032240399754e-05, "loss": 1.2332, "step": 1620 }, { "epoch": 0.10983128938274951, "grad_norm": 11.867498397827148, "learning_rate": 9.880895338489973e-05, "loss": 1.1955, "step": 1621 }, { "epoch": 0.10989904465072159, "grad_norm": 9.685249328613281, "learning_rate": 9.880758436580191e-05, "loss": 1.1972, "step": 1622 }, { "epoch": 0.10996679991869368, "grad_norm": 11.80540657043457, "learning_rate": 9.88062153467041e-05, "loss": 0.9501, "step": 1623 }, { "epoch": 0.11003455518666576, "grad_norm": 9.719080924987793, "learning_rate": 9.880484632760627e-05, "loss": 0.9904, "step": 1624 }, { "epoch": 0.11010231045463785, "grad_norm": 10.906637191772461, "learning_rate": 9.880347730850847e-05, "loss": 1.2807, "step": 1625 }, { "epoch": 0.11017006572260993, "grad_norm": 10.40942668914795, "learning_rate": 9.880210828941065e-05, "loss": 1.2772, "step": 1626 }, { "epoch": 0.11023782099058202, "grad_norm": 10.265239715576172, "learning_rate": 9.880073927031283e-05, "loss": 1.1583, "step": 1627 }, { "epoch": 0.1103055762585541, "grad_norm": 12.158036231994629, "learning_rate": 9.879937025121501e-05, "loss": 1.3907, "step": 1628 }, { "epoch": 0.11037333152652619, "grad_norm": 10.128400802612305, "learning_rate": 9.879800123211719e-05, "loss": 0.8838, "step": 1629 }, { "epoch": 0.11044108679449827, "grad_norm": 9.536952018737793, "learning_rate": 9.879663221301938e-05, "loss": 1.0525, "step": 1630 }, { "epoch": 0.11050884206247036, "grad_norm": 10.826987266540527, "learning_rate": 9.879526319392156e-05, "loss": 1.2005, "step": 1631 }, { "epoch": 0.11057659733044244, "grad_norm": 10.08153247833252, "learning_rate": 9.879389417482374e-05, "loss": 1.1521, "step": 1632 }, { "epoch": 0.11064435259841453, "grad_norm": 10.642118453979492, "learning_rate": 9.879252515572592e-05, "loss": 1.2316, "step": 1633 }, { "epoch": 0.1107121078663866, "grad_norm": 9.595836639404297, "learning_rate": 9.879115613662812e-05, "loss": 1.0969, "step": 1634 }, { "epoch": 0.1107798631343587, "grad_norm": 8.954519271850586, "learning_rate": 9.87897871175303e-05, "loss": 0.9422, "step": 1635 }, { "epoch": 0.11084761840233077, "grad_norm": 9.143898010253906, "learning_rate": 9.878841809843248e-05, "loss": 0.8949, "step": 1636 }, { "epoch": 0.11091537367030287, "grad_norm": 11.656847953796387, "learning_rate": 9.878704907933466e-05, "loss": 0.9739, "step": 1637 }, { "epoch": 0.11098312893827494, "grad_norm": 11.996994018554688, "learning_rate": 9.878568006023684e-05, "loss": 1.3361, "step": 1638 }, { "epoch": 0.11105088420624704, "grad_norm": 11.478599548339844, "learning_rate": 9.878431104113903e-05, "loss": 1.0726, "step": 1639 }, { "epoch": 0.11111863947421913, "grad_norm": 14.594179153442383, "learning_rate": 9.878294202204121e-05, "loss": 1.3725, "step": 1640 }, { "epoch": 0.1111863947421912, "grad_norm": 12.15703296661377, "learning_rate": 9.87815730029434e-05, "loss": 1.1996, "step": 1641 }, { "epoch": 0.1112541500101633, "grad_norm": 9.384215354919434, "learning_rate": 9.878020398384557e-05, "loss": 1.1294, "step": 1642 }, { "epoch": 0.11132190527813537, "grad_norm": 11.131610870361328, "learning_rate": 9.877883496474777e-05, "loss": 1.0526, "step": 1643 }, { "epoch": 0.11138966054610747, "grad_norm": 9.628592491149902, "learning_rate": 9.877746594564995e-05, "loss": 1.0715, "step": 1644 }, { "epoch": 0.11145741581407954, "grad_norm": 10.876824378967285, "learning_rate": 9.877609692655213e-05, "loss": 1.3337, "step": 1645 }, { "epoch": 0.11152517108205164, "grad_norm": 10.489126205444336, "learning_rate": 9.877472790745431e-05, "loss": 1.1727, "step": 1646 }, { "epoch": 0.11159292635002371, "grad_norm": 8.737699508666992, "learning_rate": 9.877335888835649e-05, "loss": 1.1252, "step": 1647 }, { "epoch": 0.1116606816179958, "grad_norm": 9.8843355178833, "learning_rate": 9.877198986925868e-05, "loss": 0.9222, "step": 1648 }, { "epoch": 0.11172843688596788, "grad_norm": 7.930290699005127, "learning_rate": 9.877062085016086e-05, "loss": 1.0095, "step": 1649 }, { "epoch": 0.11179619215393997, "grad_norm": 9.721529006958008, "learning_rate": 9.876925183106304e-05, "loss": 1.0812, "step": 1650 }, { "epoch": 0.11186394742191205, "grad_norm": 10.844324111938477, "learning_rate": 9.876788281196523e-05, "loss": 1.2245, "step": 1651 }, { "epoch": 0.11193170268988414, "grad_norm": 8.750361442565918, "learning_rate": 9.87665137928674e-05, "loss": 0.9983, "step": 1652 }, { "epoch": 0.11199945795785622, "grad_norm": 8.716521263122559, "learning_rate": 9.87651447737696e-05, "loss": 0.8612, "step": 1653 }, { "epoch": 0.11206721322582831, "grad_norm": 10.06696891784668, "learning_rate": 9.876377575467178e-05, "loss": 1.0806, "step": 1654 }, { "epoch": 0.11213496849380039, "grad_norm": 10.526103019714355, "learning_rate": 9.876240673557396e-05, "loss": 1.0799, "step": 1655 }, { "epoch": 0.11220272376177248, "grad_norm": 11.108748435974121, "learning_rate": 9.876103771647614e-05, "loss": 1.3871, "step": 1656 }, { "epoch": 0.11227047902974456, "grad_norm": 10.120033264160156, "learning_rate": 9.875966869737833e-05, "loss": 1.0399, "step": 1657 }, { "epoch": 0.11233823429771665, "grad_norm": 10.589370727539062, "learning_rate": 9.875829967828051e-05, "loss": 1.3249, "step": 1658 }, { "epoch": 0.11240598956568873, "grad_norm": 8.751884460449219, "learning_rate": 9.87569306591827e-05, "loss": 0.9404, "step": 1659 }, { "epoch": 0.11247374483366082, "grad_norm": 9.923823356628418, "learning_rate": 9.875556164008488e-05, "loss": 1.067, "step": 1660 }, { "epoch": 0.1125415001016329, "grad_norm": 9.70035171508789, "learning_rate": 9.875419262098707e-05, "loss": 1.0079, "step": 1661 }, { "epoch": 0.11260925536960499, "grad_norm": 12.337713241577148, "learning_rate": 9.875282360188925e-05, "loss": 1.0962, "step": 1662 }, { "epoch": 0.11267701063757707, "grad_norm": 10.142355918884277, "learning_rate": 9.875145458279143e-05, "loss": 1.1916, "step": 1663 }, { "epoch": 0.11274476590554916, "grad_norm": 11.82714557647705, "learning_rate": 9.875008556369362e-05, "loss": 1.4582, "step": 1664 }, { "epoch": 0.11281252117352124, "grad_norm": 10.524566650390625, "learning_rate": 9.87487165445958e-05, "loss": 1.2156, "step": 1665 }, { "epoch": 0.11288027644149333, "grad_norm": 12.232719421386719, "learning_rate": 9.874734752549798e-05, "loss": 1.2152, "step": 1666 }, { "epoch": 0.11294803170946541, "grad_norm": 10.665773391723633, "learning_rate": 9.874597850640018e-05, "loss": 1.085, "step": 1667 }, { "epoch": 0.1130157869774375, "grad_norm": 10.0956449508667, "learning_rate": 9.874460948730236e-05, "loss": 1.1142, "step": 1668 }, { "epoch": 0.11308354224540958, "grad_norm": 9.27825927734375, "learning_rate": 9.874324046820454e-05, "loss": 1.0263, "step": 1669 }, { "epoch": 0.11315129751338167, "grad_norm": 11.171673774719238, "learning_rate": 9.874187144910672e-05, "loss": 1.5049, "step": 1670 }, { "epoch": 0.11321905278135375, "grad_norm": 10.442279815673828, "learning_rate": 9.874050243000891e-05, "loss": 1.0697, "step": 1671 }, { "epoch": 0.11328680804932584, "grad_norm": 11.563700675964355, "learning_rate": 9.87391334109111e-05, "loss": 1.003, "step": 1672 }, { "epoch": 0.11335456331729792, "grad_norm": 12.447733879089355, "learning_rate": 9.873776439181327e-05, "loss": 1.2279, "step": 1673 }, { "epoch": 0.11342231858527001, "grad_norm": 9.70698070526123, "learning_rate": 9.873639537271545e-05, "loss": 1.2208, "step": 1674 }, { "epoch": 0.11349007385324208, "grad_norm": 10.89858341217041, "learning_rate": 9.873502635361763e-05, "loss": 1.1676, "step": 1675 }, { "epoch": 0.11355782912121418, "grad_norm": 11.106192588806152, "learning_rate": 9.873365733451983e-05, "loss": 1.0918, "step": 1676 }, { "epoch": 0.11362558438918625, "grad_norm": 10.186528205871582, "learning_rate": 9.873228831542201e-05, "loss": 1.1651, "step": 1677 }, { "epoch": 0.11369333965715835, "grad_norm": 10.69977855682373, "learning_rate": 9.873091929632419e-05, "loss": 1.2867, "step": 1678 }, { "epoch": 0.11376109492513042, "grad_norm": 11.934000015258789, "learning_rate": 9.872955027722637e-05, "loss": 1.2051, "step": 1679 }, { "epoch": 0.11382885019310252, "grad_norm": 9.277094841003418, "learning_rate": 9.872818125812856e-05, "loss": 1.17, "step": 1680 }, { "epoch": 0.1138966054610746, "grad_norm": 11.35097885131836, "learning_rate": 9.872681223903074e-05, "loss": 1.2433, "step": 1681 }, { "epoch": 0.11396436072904668, "grad_norm": 11.362833023071289, "learning_rate": 9.872544321993292e-05, "loss": 1.3577, "step": 1682 }, { "epoch": 0.11403211599701876, "grad_norm": 10.944365501403809, "learning_rate": 9.87240742008351e-05, "loss": 1.0731, "step": 1683 }, { "epoch": 0.11409987126499085, "grad_norm": 10.292986869812012, "learning_rate": 9.872270518173728e-05, "loss": 1.0639, "step": 1684 }, { "epoch": 0.11416762653296293, "grad_norm": 10.8629150390625, "learning_rate": 9.872133616263948e-05, "loss": 1.3503, "step": 1685 }, { "epoch": 0.11423538180093502, "grad_norm": 10.277920722961426, "learning_rate": 9.871996714354166e-05, "loss": 1.1544, "step": 1686 }, { "epoch": 0.1143031370689071, "grad_norm": 8.98864459991455, "learning_rate": 9.871859812444384e-05, "loss": 1.3638, "step": 1687 }, { "epoch": 0.11437089233687919, "grad_norm": 8.126718521118164, "learning_rate": 9.871722910534602e-05, "loss": 1.0343, "step": 1688 }, { "epoch": 0.11443864760485127, "grad_norm": 9.229771614074707, "learning_rate": 9.871586008624821e-05, "loss": 1.0137, "step": 1689 }, { "epoch": 0.11450640287282336, "grad_norm": 10.554340362548828, "learning_rate": 9.87144910671504e-05, "loss": 0.9908, "step": 1690 }, { "epoch": 0.11457415814079544, "grad_norm": 11.857934951782227, "learning_rate": 9.871312204805257e-05, "loss": 1.29, "step": 1691 }, { "epoch": 0.11464191340876753, "grad_norm": 9.334272384643555, "learning_rate": 9.871175302895475e-05, "loss": 1.0449, "step": 1692 }, { "epoch": 0.11470966867673962, "grad_norm": 9.092185974121094, "learning_rate": 9.871038400985693e-05, "loss": 1.1386, "step": 1693 }, { "epoch": 0.1147774239447117, "grad_norm": 9.379465103149414, "learning_rate": 9.870901499075913e-05, "loss": 1.0964, "step": 1694 }, { "epoch": 0.11484517921268379, "grad_norm": 10.038492202758789, "learning_rate": 9.870764597166131e-05, "loss": 0.8512, "step": 1695 }, { "epoch": 0.11491293448065587, "grad_norm": 8.2230863571167, "learning_rate": 9.870627695256349e-05, "loss": 1.0646, "step": 1696 }, { "epoch": 0.11498068974862796, "grad_norm": 9.939510345458984, "learning_rate": 9.870490793346567e-05, "loss": 1.081, "step": 1697 }, { "epoch": 0.11504844501660004, "grad_norm": 11.864813804626465, "learning_rate": 9.870353891436786e-05, "loss": 1.2447, "step": 1698 }, { "epoch": 0.11511620028457213, "grad_norm": 11.517714500427246, "learning_rate": 9.870216989527004e-05, "loss": 1.1274, "step": 1699 }, { "epoch": 0.11518395555254421, "grad_norm": 12.172054290771484, "learning_rate": 9.870080087617222e-05, "loss": 1.3278, "step": 1700 }, { "epoch": 0.1152517108205163, "grad_norm": 9.915261268615723, "learning_rate": 9.86994318570744e-05, "loss": 1.0298, "step": 1701 }, { "epoch": 0.11531946608848838, "grad_norm": 10.937396049499512, "learning_rate": 9.869806283797659e-05, "loss": 1.363, "step": 1702 }, { "epoch": 0.11538722135646047, "grad_norm": 11.586840629577637, "learning_rate": 9.869669381887878e-05, "loss": 0.9152, "step": 1703 }, { "epoch": 0.11545497662443255, "grad_norm": 10.035614967346191, "learning_rate": 9.869532479978096e-05, "loss": 1.2152, "step": 1704 }, { "epoch": 0.11552273189240464, "grad_norm": 8.775728225708008, "learning_rate": 9.869395578068314e-05, "loss": 1.0552, "step": 1705 }, { "epoch": 0.11559048716037672, "grad_norm": 10.85958480834961, "learning_rate": 9.869258676158532e-05, "loss": 1.0473, "step": 1706 }, { "epoch": 0.11565824242834881, "grad_norm": 9.311616897583008, "learning_rate": 9.869121774248751e-05, "loss": 1.1704, "step": 1707 }, { "epoch": 0.11572599769632089, "grad_norm": 9.503780364990234, "learning_rate": 9.86898487233897e-05, "loss": 1.1375, "step": 1708 }, { "epoch": 0.11579375296429298, "grad_norm": 9.02602481842041, "learning_rate": 9.868847970429187e-05, "loss": 0.9955, "step": 1709 }, { "epoch": 0.11586150823226506, "grad_norm": 11.473995208740234, "learning_rate": 9.868711068519407e-05, "loss": 0.8842, "step": 1710 }, { "epoch": 0.11592926350023715, "grad_norm": 10.124822616577148, "learning_rate": 9.868574166609625e-05, "loss": 1.0458, "step": 1711 }, { "epoch": 0.11599701876820923, "grad_norm": 11.004744529724121, "learning_rate": 9.868437264699843e-05, "loss": 1.2136, "step": 1712 }, { "epoch": 0.11606477403618132, "grad_norm": 10.497210502624512, "learning_rate": 9.868300362790062e-05, "loss": 1.3457, "step": 1713 }, { "epoch": 0.1161325293041534, "grad_norm": 10.933736801147461, "learning_rate": 9.86816346088028e-05, "loss": 1.0931, "step": 1714 }, { "epoch": 0.11620028457212549, "grad_norm": 11.045526504516602, "learning_rate": 9.868026558970498e-05, "loss": 1.2338, "step": 1715 }, { "epoch": 0.11626803984009756, "grad_norm": 10.763197898864746, "learning_rate": 9.867889657060716e-05, "loss": 1.2325, "step": 1716 }, { "epoch": 0.11633579510806966, "grad_norm": 9.53070068359375, "learning_rate": 9.867752755150936e-05, "loss": 1.1268, "step": 1717 }, { "epoch": 0.11640355037604173, "grad_norm": 10.572071075439453, "learning_rate": 9.867615853241154e-05, "loss": 1.0118, "step": 1718 }, { "epoch": 0.11647130564401383, "grad_norm": 7.633776664733887, "learning_rate": 9.867478951331372e-05, "loss": 1.081, "step": 1719 }, { "epoch": 0.1165390609119859, "grad_norm": 8.915566444396973, "learning_rate": 9.86734204942159e-05, "loss": 1.1526, "step": 1720 }, { "epoch": 0.116606816179958, "grad_norm": 8.296719551086426, "learning_rate": 9.867205147511809e-05, "loss": 1.0571, "step": 1721 }, { "epoch": 0.11667457144793007, "grad_norm": 9.387847900390625, "learning_rate": 9.867068245602027e-05, "loss": 1.0447, "step": 1722 }, { "epoch": 0.11674232671590216, "grad_norm": 10.832168579101562, "learning_rate": 9.866931343692245e-05, "loss": 1.2601, "step": 1723 }, { "epoch": 0.11681008198387424, "grad_norm": 11.631181716918945, "learning_rate": 9.866794441782463e-05, "loss": 1.4507, "step": 1724 }, { "epoch": 0.11687783725184633, "grad_norm": 8.372645378112793, "learning_rate": 9.866657539872681e-05, "loss": 1.1191, "step": 1725 }, { "epoch": 0.11694559251981841, "grad_norm": 9.577934265136719, "learning_rate": 9.866520637962901e-05, "loss": 1.0336, "step": 1726 }, { "epoch": 0.1170133477877905, "grad_norm": 10.626317977905273, "learning_rate": 9.866383736053119e-05, "loss": 1.1595, "step": 1727 }, { "epoch": 0.11708110305576258, "grad_norm": 9.758353233337402, "learning_rate": 9.866246834143337e-05, "loss": 0.8803, "step": 1728 }, { "epoch": 0.11714885832373467, "grad_norm": 9.643457412719727, "learning_rate": 9.866109932233555e-05, "loss": 1.3213, "step": 1729 }, { "epoch": 0.11721661359170675, "grad_norm": 8.910233497619629, "learning_rate": 9.865973030323773e-05, "loss": 1.1123, "step": 1730 }, { "epoch": 0.11728436885967884, "grad_norm": 8.893741607666016, "learning_rate": 9.865836128413992e-05, "loss": 1.1059, "step": 1731 }, { "epoch": 0.11735212412765092, "grad_norm": 12.937616348266602, "learning_rate": 9.86569922650421e-05, "loss": 1.2488, "step": 1732 }, { "epoch": 0.11741987939562301, "grad_norm": 9.696173667907715, "learning_rate": 9.865562324594428e-05, "loss": 1.1076, "step": 1733 }, { "epoch": 0.11748763466359509, "grad_norm": 11.568793296813965, "learning_rate": 9.865425422684646e-05, "loss": 1.1023, "step": 1734 }, { "epoch": 0.11755538993156718, "grad_norm": 11.438271522521973, "learning_rate": 9.865288520774866e-05, "loss": 1.1014, "step": 1735 }, { "epoch": 0.11762314519953926, "grad_norm": 8.217238426208496, "learning_rate": 9.865151618865084e-05, "loss": 0.9566, "step": 1736 }, { "epoch": 0.11769090046751135, "grad_norm": 9.716591835021973, "learning_rate": 9.865014716955302e-05, "loss": 1.0478, "step": 1737 }, { "epoch": 0.11775865573548343, "grad_norm": 11.152593612670898, "learning_rate": 9.86487781504552e-05, "loss": 1.3552, "step": 1738 }, { "epoch": 0.11782641100345552, "grad_norm": 8.824272155761719, "learning_rate": 9.864740913135738e-05, "loss": 1.0463, "step": 1739 }, { "epoch": 0.1178941662714276, "grad_norm": 8.941548347473145, "learning_rate": 9.864604011225957e-05, "loss": 0.9552, "step": 1740 }, { "epoch": 0.11796192153939969, "grad_norm": 10.76177978515625, "learning_rate": 9.864467109316175e-05, "loss": 1.0076, "step": 1741 }, { "epoch": 0.11802967680737177, "grad_norm": 9.87431812286377, "learning_rate": 9.864330207406393e-05, "loss": 0.9905, "step": 1742 }, { "epoch": 0.11809743207534386, "grad_norm": 12.406320571899414, "learning_rate": 9.864193305496611e-05, "loss": 1.2839, "step": 1743 }, { "epoch": 0.11816518734331594, "grad_norm": 12.649428367614746, "learning_rate": 9.864056403586831e-05, "loss": 1.0806, "step": 1744 }, { "epoch": 0.11823294261128803, "grad_norm": 10.888890266418457, "learning_rate": 9.863919501677049e-05, "loss": 0.8805, "step": 1745 }, { "epoch": 0.11830069787926012, "grad_norm": 10.59262752532959, "learning_rate": 9.863782599767267e-05, "loss": 0.9977, "step": 1746 }, { "epoch": 0.1183684531472322, "grad_norm": 11.14206314086914, "learning_rate": 9.863645697857485e-05, "loss": 1.4569, "step": 1747 }, { "epoch": 0.11843620841520429, "grad_norm": 12.148449897766113, "learning_rate": 9.863508795947703e-05, "loss": 1.5222, "step": 1748 }, { "epoch": 0.11850396368317637, "grad_norm": 11.814643859863281, "learning_rate": 9.863371894037922e-05, "loss": 1.3888, "step": 1749 }, { "epoch": 0.11857171895114846, "grad_norm": 12.437151908874512, "learning_rate": 9.86323499212814e-05, "loss": 1.024, "step": 1750 }, { "epoch": 0.11863947421912054, "grad_norm": 9.314751625061035, "learning_rate": 9.863098090218358e-05, "loss": 1.064, "step": 1751 }, { "epoch": 0.11870722948709263, "grad_norm": 12.978782653808594, "learning_rate": 9.862961188308576e-05, "loss": 1.4048, "step": 1752 }, { "epoch": 0.1187749847550647, "grad_norm": 9.825428009033203, "learning_rate": 9.862824286398796e-05, "loss": 0.9631, "step": 1753 }, { "epoch": 0.1188427400230368, "grad_norm": 9.127273559570312, "learning_rate": 9.862687384489014e-05, "loss": 1.0364, "step": 1754 }, { "epoch": 0.11891049529100887, "grad_norm": 10.290020942687988, "learning_rate": 9.862550482579232e-05, "loss": 1.329, "step": 1755 }, { "epoch": 0.11897825055898097, "grad_norm": 9.023946762084961, "learning_rate": 9.862413580669451e-05, "loss": 1.186, "step": 1756 }, { "epoch": 0.11904600582695304, "grad_norm": 11.889911651611328, "learning_rate": 9.86227667875967e-05, "loss": 1.0338, "step": 1757 }, { "epoch": 0.11911376109492514, "grad_norm": 8.938654899597168, "learning_rate": 9.862139776849887e-05, "loss": 1.1933, "step": 1758 }, { "epoch": 0.11918151636289721, "grad_norm": 9.240127563476562, "learning_rate": 9.862002874940107e-05, "loss": 1.2336, "step": 1759 }, { "epoch": 0.1192492716308693, "grad_norm": 10.340953826904297, "learning_rate": 9.861865973030325e-05, "loss": 1.1097, "step": 1760 }, { "epoch": 0.11931702689884138, "grad_norm": 10.807507514953613, "learning_rate": 9.861729071120543e-05, "loss": 0.9127, "step": 1761 }, { "epoch": 0.11938478216681347, "grad_norm": 9.670829772949219, "learning_rate": 9.861592169210761e-05, "loss": 1.2296, "step": 1762 }, { "epoch": 0.11945253743478555, "grad_norm": 10.85981273651123, "learning_rate": 9.86145526730098e-05, "loss": 1.1341, "step": 1763 }, { "epoch": 0.11952029270275764, "grad_norm": 9.198482513427734, "learning_rate": 9.861318365391198e-05, "loss": 1.4397, "step": 1764 }, { "epoch": 0.11958804797072972, "grad_norm": 11.130392074584961, "learning_rate": 9.861181463481416e-05, "loss": 1.381, "step": 1765 }, { "epoch": 0.11965580323870181, "grad_norm": 9.568144798278809, "learning_rate": 9.861044561571634e-05, "loss": 1.0284, "step": 1766 }, { "epoch": 0.11972355850667389, "grad_norm": 9.049298286437988, "learning_rate": 9.860907659661854e-05, "loss": 1.0714, "step": 1767 }, { "epoch": 0.11979131377464598, "grad_norm": 8.948347091674805, "learning_rate": 9.860770757752072e-05, "loss": 1.0248, "step": 1768 }, { "epoch": 0.11985906904261806, "grad_norm": 10.836170196533203, "learning_rate": 9.86063385584229e-05, "loss": 1.0551, "step": 1769 }, { "epoch": 0.11992682431059015, "grad_norm": 11.044917106628418, "learning_rate": 9.860496953932508e-05, "loss": 1.1374, "step": 1770 }, { "epoch": 0.11999457957856223, "grad_norm": 10.975854873657227, "learning_rate": 9.860360052022726e-05, "loss": 1.2018, "step": 1771 }, { "epoch": 0.12006233484653432, "grad_norm": 9.752070426940918, "learning_rate": 9.860223150112945e-05, "loss": 1.0843, "step": 1772 }, { "epoch": 0.1201300901145064, "grad_norm": 9.662758827209473, "learning_rate": 9.860086248203163e-05, "loss": 0.8866, "step": 1773 }, { "epoch": 0.12019784538247849, "grad_norm": 11.268694877624512, "learning_rate": 9.859949346293381e-05, "loss": 1.2653, "step": 1774 }, { "epoch": 0.12026560065045057, "grad_norm": 9.201732635498047, "learning_rate": 9.8598124443836e-05, "loss": 1.1466, "step": 1775 }, { "epoch": 0.12033335591842266, "grad_norm": 12.577048301696777, "learning_rate": 9.859675542473819e-05, "loss": 1.2606, "step": 1776 }, { "epoch": 0.12040111118639474, "grad_norm": 10.051258087158203, "learning_rate": 9.859538640564037e-05, "loss": 1.1533, "step": 1777 }, { "epoch": 0.12046886645436683, "grad_norm": 9.358968734741211, "learning_rate": 9.859401738654255e-05, "loss": 1.0501, "step": 1778 }, { "epoch": 0.12053662172233891, "grad_norm": 11.277347564697266, "learning_rate": 9.859264836744473e-05, "loss": 0.9804, "step": 1779 }, { "epoch": 0.120604376990311, "grad_norm": 12.41299057006836, "learning_rate": 9.859127934834691e-05, "loss": 1.427, "step": 1780 }, { "epoch": 0.12067213225828308, "grad_norm": 10.081144332885742, "learning_rate": 9.85899103292491e-05, "loss": 1.1321, "step": 1781 }, { "epoch": 0.12073988752625517, "grad_norm": 11.469686508178711, "learning_rate": 9.858854131015128e-05, "loss": 1.1972, "step": 1782 }, { "epoch": 0.12080764279422725, "grad_norm": 8.608443260192871, "learning_rate": 9.858717229105346e-05, "loss": 0.9914, "step": 1783 }, { "epoch": 0.12087539806219934, "grad_norm": 11.673405647277832, "learning_rate": 9.858580327195564e-05, "loss": 1.3406, "step": 1784 }, { "epoch": 0.12094315333017142, "grad_norm": 9.533596992492676, "learning_rate": 9.858443425285782e-05, "loss": 1.2119, "step": 1785 }, { "epoch": 0.12101090859814351, "grad_norm": 7.792461395263672, "learning_rate": 9.858306523376002e-05, "loss": 1.167, "step": 1786 }, { "epoch": 0.12107866386611559, "grad_norm": 10.209978103637695, "learning_rate": 9.85816962146622e-05, "loss": 1.164, "step": 1787 }, { "epoch": 0.12114641913408768, "grad_norm": 11.591109275817871, "learning_rate": 9.858032719556438e-05, "loss": 1.2598, "step": 1788 }, { "epoch": 0.12121417440205975, "grad_norm": 10.562797546386719, "learning_rate": 9.857895817646656e-05, "loss": 1.3071, "step": 1789 }, { "epoch": 0.12128192967003185, "grad_norm": 9.419530868530273, "learning_rate": 9.857758915736875e-05, "loss": 0.9951, "step": 1790 }, { "epoch": 0.12134968493800392, "grad_norm": 9.525064468383789, "learning_rate": 9.857622013827093e-05, "loss": 1.1833, "step": 1791 }, { "epoch": 0.12141744020597602, "grad_norm": 9.140012741088867, "learning_rate": 9.857485111917311e-05, "loss": 1.0573, "step": 1792 }, { "epoch": 0.1214851954739481, "grad_norm": 10.88685417175293, "learning_rate": 9.85734821000753e-05, "loss": 1.1669, "step": 1793 }, { "epoch": 0.12155295074192018, "grad_norm": 8.63829231262207, "learning_rate": 9.857211308097747e-05, "loss": 1.1733, "step": 1794 }, { "epoch": 0.12162070600989226, "grad_norm": 9.90237808227539, "learning_rate": 9.857074406187967e-05, "loss": 0.9175, "step": 1795 }, { "epoch": 0.12168846127786435, "grad_norm": 10.068902969360352, "learning_rate": 9.856937504278185e-05, "loss": 0.9422, "step": 1796 }, { "epoch": 0.12175621654583643, "grad_norm": 7.482577800750732, "learning_rate": 9.856800602368403e-05, "loss": 1.0359, "step": 1797 }, { "epoch": 0.12182397181380852, "grad_norm": 10.847206115722656, "learning_rate": 9.856663700458621e-05, "loss": 1.2258, "step": 1798 }, { "epoch": 0.12189172708178062, "grad_norm": 11.122177124023438, "learning_rate": 9.85652679854884e-05, "loss": 1.1378, "step": 1799 }, { "epoch": 0.1219594823497527, "grad_norm": 9.994139671325684, "learning_rate": 9.856389896639058e-05, "loss": 1.2663, "step": 1800 }, { "epoch": 0.12202723761772478, "grad_norm": 10.687590599060059, "learning_rate": 9.856252994729276e-05, "loss": 1.1777, "step": 1801 }, { "epoch": 0.12209499288569686, "grad_norm": 8.403971672058105, "learning_rate": 9.856116092819494e-05, "loss": 0.9812, "step": 1802 }, { "epoch": 0.12216274815366895, "grad_norm": 9.77443790435791, "learning_rate": 9.855979190909714e-05, "loss": 1.0615, "step": 1803 }, { "epoch": 0.12223050342164103, "grad_norm": 10.772642135620117, "learning_rate": 9.855842288999932e-05, "loss": 1.1835, "step": 1804 }, { "epoch": 0.12229825868961312, "grad_norm": 10.509920120239258, "learning_rate": 9.85570538709015e-05, "loss": 1.0361, "step": 1805 }, { "epoch": 0.1223660139575852, "grad_norm": 11.14194393157959, "learning_rate": 9.855568485180369e-05, "loss": 0.9947, "step": 1806 }, { "epoch": 0.12243376922555729, "grad_norm": 11.018975257873535, "learning_rate": 9.855431583270587e-05, "loss": 1.2036, "step": 1807 }, { "epoch": 0.12250152449352937, "grad_norm": 12.099370002746582, "learning_rate": 9.855294681360805e-05, "loss": 1.2977, "step": 1808 }, { "epoch": 0.12256927976150146, "grad_norm": 8.841024398803711, "learning_rate": 9.855157779451025e-05, "loss": 1.2093, "step": 1809 }, { "epoch": 0.12263703502947354, "grad_norm": 9.149311065673828, "learning_rate": 9.855020877541243e-05, "loss": 1.0088, "step": 1810 }, { "epoch": 0.12270479029744563, "grad_norm": 10.53498649597168, "learning_rate": 9.854883975631461e-05, "loss": 0.9527, "step": 1811 }, { "epoch": 0.12277254556541771, "grad_norm": 9.230859756469727, "learning_rate": 9.854747073721679e-05, "loss": 0.9584, "step": 1812 }, { "epoch": 0.1228403008333898, "grad_norm": 11.702610969543457, "learning_rate": 9.854610171811898e-05, "loss": 1.2369, "step": 1813 }, { "epoch": 0.12290805610136188, "grad_norm": 9.912004470825195, "learning_rate": 9.854473269902116e-05, "loss": 1.2571, "step": 1814 }, { "epoch": 0.12297581136933397, "grad_norm": 10.071510314941406, "learning_rate": 9.854336367992334e-05, "loss": 1.1666, "step": 1815 }, { "epoch": 0.12304356663730605, "grad_norm": 10.642035484313965, "learning_rate": 9.854199466082552e-05, "loss": 1.0309, "step": 1816 }, { "epoch": 0.12311132190527814, "grad_norm": 11.135947227478027, "learning_rate": 9.85406256417277e-05, "loss": 1.1276, "step": 1817 }, { "epoch": 0.12317907717325022, "grad_norm": 11.575469017028809, "learning_rate": 9.85392566226299e-05, "loss": 1.1507, "step": 1818 }, { "epoch": 0.12324683244122231, "grad_norm": 10.470771789550781, "learning_rate": 9.853788760353208e-05, "loss": 1.3777, "step": 1819 }, { "epoch": 0.12331458770919439, "grad_norm": 9.77783489227295, "learning_rate": 9.853651858443426e-05, "loss": 1.0492, "step": 1820 }, { "epoch": 0.12338234297716648, "grad_norm": 10.132309913635254, "learning_rate": 9.853514956533644e-05, "loss": 1.1488, "step": 1821 }, { "epoch": 0.12345009824513856, "grad_norm": 11.159482955932617, "learning_rate": 9.853378054623863e-05, "loss": 1.2571, "step": 1822 }, { "epoch": 0.12351785351311065, "grad_norm": 11.599156379699707, "learning_rate": 9.853241152714081e-05, "loss": 1.1734, "step": 1823 }, { "epoch": 0.12358560878108273, "grad_norm": 10.278475761413574, "learning_rate": 9.853104250804299e-05, "loss": 1.0637, "step": 1824 }, { "epoch": 0.12365336404905482, "grad_norm": 12.126015663146973, "learning_rate": 9.852967348894517e-05, "loss": 1.0546, "step": 1825 }, { "epoch": 0.1237211193170269, "grad_norm": 12.094749450683594, "learning_rate": 9.852830446984735e-05, "loss": 1.374, "step": 1826 }, { "epoch": 0.12378887458499899, "grad_norm": 7.607821941375732, "learning_rate": 9.852693545074955e-05, "loss": 0.827, "step": 1827 }, { "epoch": 0.12385662985297106, "grad_norm": 12.086756706237793, "learning_rate": 9.852556643165173e-05, "loss": 1.1219, "step": 1828 }, { "epoch": 0.12392438512094316, "grad_norm": 10.215773582458496, "learning_rate": 9.852419741255391e-05, "loss": 1.0148, "step": 1829 }, { "epoch": 0.12399214038891523, "grad_norm": 10.438709259033203, "learning_rate": 9.852282839345609e-05, "loss": 1.0649, "step": 1830 }, { "epoch": 0.12405989565688733, "grad_norm": 10.564906120300293, "learning_rate": 9.852145937435828e-05, "loss": 1.4346, "step": 1831 }, { "epoch": 0.1241276509248594, "grad_norm": 9.83198356628418, "learning_rate": 9.852009035526046e-05, "loss": 0.847, "step": 1832 }, { "epoch": 0.1241954061928315, "grad_norm": 10.990026473999023, "learning_rate": 9.851872133616264e-05, "loss": 1.2358, "step": 1833 }, { "epoch": 0.12426316146080357, "grad_norm": 10.108382225036621, "learning_rate": 9.851735231706482e-05, "loss": 0.9943, "step": 1834 }, { "epoch": 0.12433091672877566, "grad_norm": 10.23820686340332, "learning_rate": 9.8515983297967e-05, "loss": 1.2464, "step": 1835 }, { "epoch": 0.12439867199674774, "grad_norm": 11.552473068237305, "learning_rate": 9.85146142788692e-05, "loss": 1.2175, "step": 1836 }, { "epoch": 0.12446642726471983, "grad_norm": 8.646978378295898, "learning_rate": 9.851324525977138e-05, "loss": 0.9975, "step": 1837 }, { "epoch": 0.12453418253269191, "grad_norm": 9.344541549682617, "learning_rate": 9.851187624067356e-05, "loss": 0.9463, "step": 1838 }, { "epoch": 0.124601937800664, "grad_norm": 12.645013809204102, "learning_rate": 9.851050722157574e-05, "loss": 1.0396, "step": 1839 }, { "epoch": 0.12466969306863608, "grad_norm": 10.093807220458984, "learning_rate": 9.850913820247792e-05, "loss": 1.2554, "step": 1840 }, { "epoch": 0.12473744833660817, "grad_norm": 8.792567253112793, "learning_rate": 9.850776918338011e-05, "loss": 0.9852, "step": 1841 }, { "epoch": 0.12480520360458025, "grad_norm": 10.448987007141113, "learning_rate": 9.85064001642823e-05, "loss": 1.168, "step": 1842 }, { "epoch": 0.12487295887255234, "grad_norm": 9.953516960144043, "learning_rate": 9.850503114518447e-05, "loss": 1.0116, "step": 1843 }, { "epoch": 0.12494071414052442, "grad_norm": 10.187036514282227, "learning_rate": 9.850366212608665e-05, "loss": 1.203, "step": 1844 }, { "epoch": 0.1250084694084965, "grad_norm": 8.890667915344238, "learning_rate": 9.850229310698885e-05, "loss": 1.0104, "step": 1845 }, { "epoch": 0.1250762246764686, "grad_norm": 11.916625022888184, "learning_rate": 9.850092408789103e-05, "loss": 1.0254, "step": 1846 }, { "epoch": 0.12514397994444068, "grad_norm": 10.934864044189453, "learning_rate": 9.849955506879321e-05, "loss": 0.975, "step": 1847 }, { "epoch": 0.12521173521241277, "grad_norm": 11.214954376220703, "learning_rate": 9.849818604969539e-05, "loss": 1.45, "step": 1848 }, { "epoch": 0.12527949048038484, "grad_norm": 8.801512718200684, "learning_rate": 9.849681703059758e-05, "loss": 1.0899, "step": 1849 }, { "epoch": 0.12534724574835693, "grad_norm": 10.340089797973633, "learning_rate": 9.849544801149976e-05, "loss": 1.1704, "step": 1850 }, { "epoch": 0.12541500101632902, "grad_norm": 9.204201698303223, "learning_rate": 9.849407899240194e-05, "loss": 1.0569, "step": 1851 }, { "epoch": 0.1254827562843011, "grad_norm": 9.43604564666748, "learning_rate": 9.849270997330414e-05, "loss": 1.0721, "step": 1852 }, { "epoch": 0.1255505115522732, "grad_norm": 10.110416412353516, "learning_rate": 9.849134095420632e-05, "loss": 1.1265, "step": 1853 }, { "epoch": 0.12561826682024527, "grad_norm": 10.630755424499512, "learning_rate": 9.84899719351085e-05, "loss": 1.1103, "step": 1854 }, { "epoch": 0.12568602208821736, "grad_norm": 10.888835906982422, "learning_rate": 9.848860291601069e-05, "loss": 1.347, "step": 1855 }, { "epoch": 0.12575377735618945, "grad_norm": 8.84623908996582, "learning_rate": 9.848723389691287e-05, "loss": 0.8531, "step": 1856 }, { "epoch": 0.12582153262416154, "grad_norm": 10.99928092956543, "learning_rate": 9.848586487781505e-05, "loss": 1.1747, "step": 1857 }, { "epoch": 0.1258892878921336, "grad_norm": 8.556151390075684, "learning_rate": 9.848449585871723e-05, "loss": 1.0207, "step": 1858 }, { "epoch": 0.1259570431601057, "grad_norm": 10.207547187805176, "learning_rate": 9.848312683961943e-05, "loss": 1.4782, "step": 1859 }, { "epoch": 0.1260247984280778, "grad_norm": 9.39505386352539, "learning_rate": 9.848175782052161e-05, "loss": 1.1415, "step": 1860 }, { "epoch": 0.12609255369604988, "grad_norm": 10.647768020629883, "learning_rate": 9.848038880142379e-05, "loss": 1.1439, "step": 1861 }, { "epoch": 0.12616030896402194, "grad_norm": 11.517707824707031, "learning_rate": 9.847901978232597e-05, "loss": 1.3181, "step": 1862 }, { "epoch": 0.12622806423199404, "grad_norm": 10.916093826293945, "learning_rate": 9.847765076322815e-05, "loss": 1.0708, "step": 1863 }, { "epoch": 0.12629581949996613, "grad_norm": 9.893363952636719, "learning_rate": 9.847628174413034e-05, "loss": 1.1932, "step": 1864 }, { "epoch": 0.12636357476793822, "grad_norm": 12.349174499511719, "learning_rate": 9.847491272503252e-05, "loss": 1.3161, "step": 1865 }, { "epoch": 0.12643133003591028, "grad_norm": 10.157081604003906, "learning_rate": 9.84735437059347e-05, "loss": 1.1325, "step": 1866 }, { "epoch": 0.12649908530388237, "grad_norm": 9.772073745727539, "learning_rate": 9.847217468683688e-05, "loss": 1.2739, "step": 1867 }, { "epoch": 0.12656684057185447, "grad_norm": 10.468371391296387, "learning_rate": 9.847080566773908e-05, "loss": 1.0279, "step": 1868 }, { "epoch": 0.12663459583982656, "grad_norm": 11.055033683776855, "learning_rate": 9.846943664864126e-05, "loss": 1.0584, "step": 1869 }, { "epoch": 0.12670235110779862, "grad_norm": 10.47987174987793, "learning_rate": 9.846806762954344e-05, "loss": 1.0385, "step": 1870 }, { "epoch": 0.1267701063757707, "grad_norm": 8.933801651000977, "learning_rate": 9.846669861044562e-05, "loss": 1.0259, "step": 1871 }, { "epoch": 0.1268378616437428, "grad_norm": 10.32228946685791, "learning_rate": 9.84653295913478e-05, "loss": 1.0472, "step": 1872 }, { "epoch": 0.1269056169117149, "grad_norm": 9.294051170349121, "learning_rate": 9.846396057224999e-05, "loss": 1.1638, "step": 1873 }, { "epoch": 0.12697337217968696, "grad_norm": 8.32187271118164, "learning_rate": 9.846259155315217e-05, "loss": 0.9865, "step": 1874 }, { "epoch": 0.12704112744765905, "grad_norm": 8.782264709472656, "learning_rate": 9.846122253405435e-05, "loss": 1.0868, "step": 1875 }, { "epoch": 0.12710888271563114, "grad_norm": 10.444697380065918, "learning_rate": 9.845985351495653e-05, "loss": 1.2137, "step": 1876 }, { "epoch": 0.12717663798360324, "grad_norm": 10.543922424316406, "learning_rate": 9.845848449585873e-05, "loss": 1.2337, "step": 1877 }, { "epoch": 0.1272443932515753, "grad_norm": 8.504612922668457, "learning_rate": 9.845711547676091e-05, "loss": 1.0234, "step": 1878 }, { "epoch": 0.1273121485195474, "grad_norm": 8.83178997039795, "learning_rate": 9.845574645766309e-05, "loss": 1.0611, "step": 1879 }, { "epoch": 0.12737990378751948, "grad_norm": 14.37713623046875, "learning_rate": 9.845437743856527e-05, "loss": 1.1704, "step": 1880 }, { "epoch": 0.12744765905549157, "grad_norm": 12.43575668334961, "learning_rate": 9.845300841946745e-05, "loss": 0.848, "step": 1881 }, { "epoch": 0.12751541432346364, "grad_norm": 11.625554084777832, "learning_rate": 9.845163940036964e-05, "loss": 1.2395, "step": 1882 }, { "epoch": 0.12758316959143573, "grad_norm": 7.8962082862854, "learning_rate": 9.845027038127182e-05, "loss": 0.9702, "step": 1883 }, { "epoch": 0.12765092485940782, "grad_norm": 11.689674377441406, "learning_rate": 9.8448901362174e-05, "loss": 1.4885, "step": 1884 }, { "epoch": 0.1277186801273799, "grad_norm": 11.037276268005371, "learning_rate": 9.844753234307618e-05, "loss": 1.1751, "step": 1885 }, { "epoch": 0.12778643539535198, "grad_norm": 11.115680694580078, "learning_rate": 9.844616332397836e-05, "loss": 1.2912, "step": 1886 }, { "epoch": 0.12785419066332407, "grad_norm": 10.553492546081543, "learning_rate": 9.844479430488056e-05, "loss": 1.1001, "step": 1887 }, { "epoch": 0.12792194593129616, "grad_norm": 9.92845630645752, "learning_rate": 9.844342528578274e-05, "loss": 1.1697, "step": 1888 }, { "epoch": 0.12798970119926825, "grad_norm": 9.33232307434082, "learning_rate": 9.844205626668492e-05, "loss": 1.0931, "step": 1889 }, { "epoch": 0.12805745646724032, "grad_norm": 12.35257339477539, "learning_rate": 9.84406872475871e-05, "loss": 0.9819, "step": 1890 }, { "epoch": 0.1281252117352124, "grad_norm": 9.913105010986328, "learning_rate": 9.843931822848929e-05, "loss": 1.2515, "step": 1891 }, { "epoch": 0.1281929670031845, "grad_norm": 11.218729019165039, "learning_rate": 9.843794920939147e-05, "loss": 1.0073, "step": 1892 }, { "epoch": 0.1282607222711566, "grad_norm": 9.97374153137207, "learning_rate": 9.843658019029365e-05, "loss": 1.0527, "step": 1893 }, { "epoch": 0.12832847753912865, "grad_norm": 8.837554931640625, "learning_rate": 9.843521117119583e-05, "loss": 1.0175, "step": 1894 }, { "epoch": 0.12839623280710075, "grad_norm": 9.207158088684082, "learning_rate": 9.843384215209803e-05, "loss": 1.0729, "step": 1895 }, { "epoch": 0.12846398807507284, "grad_norm": 10.960060119628906, "learning_rate": 9.843247313300021e-05, "loss": 1.0485, "step": 1896 }, { "epoch": 0.12853174334304493, "grad_norm": 9.724952697753906, "learning_rate": 9.843110411390239e-05, "loss": 1.2624, "step": 1897 }, { "epoch": 0.128599498611017, "grad_norm": 9.02813720703125, "learning_rate": 9.842973509480458e-05, "loss": 1.011, "step": 1898 }, { "epoch": 0.12866725387898909, "grad_norm": 8.507847785949707, "learning_rate": 9.842836607570676e-05, "loss": 0.9155, "step": 1899 }, { "epoch": 0.12873500914696118, "grad_norm": 10.609807968139648, "learning_rate": 9.842699705660894e-05, "loss": 1.1984, "step": 1900 }, { "epoch": 0.12880276441493327, "grad_norm": 10.144070625305176, "learning_rate": 9.842562803751114e-05, "loss": 1.2331, "step": 1901 }, { "epoch": 0.12887051968290533, "grad_norm": 11.719651222229004, "learning_rate": 9.842425901841332e-05, "loss": 1.2086, "step": 1902 }, { "epoch": 0.12893827495087742, "grad_norm": 10.689997673034668, "learning_rate": 9.84228899993155e-05, "loss": 1.3769, "step": 1903 }, { "epoch": 0.12900603021884952, "grad_norm": 10.7424955368042, "learning_rate": 9.842152098021768e-05, "loss": 1.1844, "step": 1904 }, { "epoch": 0.1290737854868216, "grad_norm": 11.207498550415039, "learning_rate": 9.842015196111987e-05, "loss": 1.2151, "step": 1905 }, { "epoch": 0.1291415407547937, "grad_norm": 10.689212799072266, "learning_rate": 9.841878294202205e-05, "loss": 1.1182, "step": 1906 }, { "epoch": 0.12920929602276576, "grad_norm": 9.708647727966309, "learning_rate": 9.841741392292423e-05, "loss": 1.2874, "step": 1907 }, { "epoch": 0.12927705129073785, "grad_norm": 9.809847831726074, "learning_rate": 9.841604490382641e-05, "loss": 1.1716, "step": 1908 }, { "epoch": 0.12934480655870995, "grad_norm": 8.170798301696777, "learning_rate": 9.84146758847286e-05, "loss": 1.1342, "step": 1909 }, { "epoch": 0.12941256182668204, "grad_norm": 9.12773609161377, "learning_rate": 9.841330686563079e-05, "loss": 1.1506, "step": 1910 }, { "epoch": 0.1294803170946541, "grad_norm": 13.67032241821289, "learning_rate": 9.841193784653297e-05, "loss": 1.2065, "step": 1911 }, { "epoch": 0.1295480723626262, "grad_norm": 9.35611629486084, "learning_rate": 9.841056882743515e-05, "loss": 1.281, "step": 1912 }, { "epoch": 0.12961582763059828, "grad_norm": 9.044548988342285, "learning_rate": 9.840919980833733e-05, "loss": 1.0285, "step": 1913 }, { "epoch": 0.12968358289857038, "grad_norm": 9.955796241760254, "learning_rate": 9.840783078923952e-05, "loss": 1.1742, "step": 1914 }, { "epoch": 0.12975133816654244, "grad_norm": 10.456665992736816, "learning_rate": 9.84064617701417e-05, "loss": 1.3452, "step": 1915 }, { "epoch": 0.12981909343451453, "grad_norm": 10.861869812011719, "learning_rate": 9.840509275104388e-05, "loss": 1.0397, "step": 1916 }, { "epoch": 0.12988684870248662, "grad_norm": 8.546277046203613, "learning_rate": 9.840372373194606e-05, "loss": 0.8762, "step": 1917 }, { "epoch": 0.12995460397045872, "grad_norm": 9.103103637695312, "learning_rate": 9.840235471284824e-05, "loss": 1.0998, "step": 1918 }, { "epoch": 0.13002235923843078, "grad_norm": 9.794631958007812, "learning_rate": 9.840098569375044e-05, "loss": 1.0644, "step": 1919 }, { "epoch": 0.13009011450640287, "grad_norm": 8.702750205993652, "learning_rate": 9.839961667465262e-05, "loss": 1.108, "step": 1920 }, { "epoch": 0.13015786977437496, "grad_norm": 11.108800888061523, "learning_rate": 9.83982476555548e-05, "loss": 1.375, "step": 1921 }, { "epoch": 0.13022562504234705, "grad_norm": 11.253337860107422, "learning_rate": 9.839687863645698e-05, "loss": 1.0246, "step": 1922 }, { "epoch": 0.13029338031031912, "grad_norm": 9.817541122436523, "learning_rate": 9.839550961735917e-05, "loss": 1.0222, "step": 1923 }, { "epoch": 0.1303611355782912, "grad_norm": 9.378199577331543, "learning_rate": 9.839414059826135e-05, "loss": 1.0387, "step": 1924 }, { "epoch": 0.1304288908462633, "grad_norm": 8.789336204528809, "learning_rate": 9.839277157916353e-05, "loss": 1.0635, "step": 1925 }, { "epoch": 0.1304966461142354, "grad_norm": 11.900403022766113, "learning_rate": 9.839140256006571e-05, "loss": 1.1224, "step": 1926 }, { "epoch": 0.13056440138220746, "grad_norm": 10.007912635803223, "learning_rate": 9.83900335409679e-05, "loss": 0.8988, "step": 1927 }, { "epoch": 0.13063215665017955, "grad_norm": 10.157328605651855, "learning_rate": 9.838866452187009e-05, "loss": 0.952, "step": 1928 }, { "epoch": 0.13069991191815164, "grad_norm": 8.763729095458984, "learning_rate": 9.838729550277227e-05, "loss": 0.9304, "step": 1929 }, { "epoch": 0.13076766718612373, "grad_norm": 10.75432300567627, "learning_rate": 9.838592648367445e-05, "loss": 1.0438, "step": 1930 }, { "epoch": 0.1308354224540958, "grad_norm": 8.840702056884766, "learning_rate": 9.838455746457663e-05, "loss": 0.9164, "step": 1931 }, { "epoch": 0.1309031777220679, "grad_norm": 9.526811599731445, "learning_rate": 9.838318844547882e-05, "loss": 1.2181, "step": 1932 }, { "epoch": 0.13097093299003998, "grad_norm": 12.827199935913086, "learning_rate": 9.8381819426381e-05, "loss": 1.223, "step": 1933 }, { "epoch": 0.13103868825801207, "grad_norm": 9.47105884552002, "learning_rate": 9.838045040728318e-05, "loss": 1.1014, "step": 1934 }, { "epoch": 0.13110644352598413, "grad_norm": 9.044878959655762, "learning_rate": 9.837908138818536e-05, "loss": 1.1451, "step": 1935 }, { "epoch": 0.13117419879395623, "grad_norm": 9.24599838256836, "learning_rate": 9.837771236908754e-05, "loss": 1.1144, "step": 1936 }, { "epoch": 0.13124195406192832, "grad_norm": 12.543252944946289, "learning_rate": 9.837634334998974e-05, "loss": 1.1233, "step": 1937 }, { "epoch": 0.1313097093299004, "grad_norm": 10.022245407104492, "learning_rate": 9.837497433089192e-05, "loss": 1.3569, "step": 1938 }, { "epoch": 0.13137746459787247, "grad_norm": 11.967863082885742, "learning_rate": 9.83736053117941e-05, "loss": 1.2086, "step": 1939 }, { "epoch": 0.13144521986584456, "grad_norm": 9.64406967163086, "learning_rate": 9.837223629269628e-05, "loss": 1.1647, "step": 1940 }, { "epoch": 0.13151297513381666, "grad_norm": 8.538762092590332, "learning_rate": 9.837086727359847e-05, "loss": 1.012, "step": 1941 }, { "epoch": 0.13158073040178875, "grad_norm": 10.889129638671875, "learning_rate": 9.836949825450065e-05, "loss": 1.1559, "step": 1942 }, { "epoch": 0.1316484856697608, "grad_norm": 8.407093048095703, "learning_rate": 9.836812923540283e-05, "loss": 0.9624, "step": 1943 }, { "epoch": 0.1317162409377329, "grad_norm": 9.175569534301758, "learning_rate": 9.836676021630503e-05, "loss": 0.9814, "step": 1944 }, { "epoch": 0.131783996205705, "grad_norm": 11.240396499633789, "learning_rate": 9.836539119720721e-05, "loss": 1.3059, "step": 1945 }, { "epoch": 0.1318517514736771, "grad_norm": 8.791098594665527, "learning_rate": 9.836402217810939e-05, "loss": 0.9891, "step": 1946 }, { "epoch": 0.13191950674164915, "grad_norm": 7.401971340179443, "learning_rate": 9.836265315901158e-05, "loss": 0.9706, "step": 1947 }, { "epoch": 0.13198726200962124, "grad_norm": 13.127768516540527, "learning_rate": 9.836128413991376e-05, "loss": 0.8924, "step": 1948 }, { "epoch": 0.13205501727759333, "grad_norm": 9.805618286132812, "learning_rate": 9.835991512081594e-05, "loss": 0.9969, "step": 1949 }, { "epoch": 0.13212277254556543, "grad_norm": 10.500420570373535, "learning_rate": 9.835854610171812e-05, "loss": 1.1045, "step": 1950 }, { "epoch": 0.1321905278135375, "grad_norm": 10.043769836425781, "learning_rate": 9.835717708262032e-05, "loss": 1.2475, "step": 1951 }, { "epoch": 0.13225828308150958, "grad_norm": 10.6277437210083, "learning_rate": 9.83558080635225e-05, "loss": 0.9285, "step": 1952 }, { "epoch": 0.13232603834948167, "grad_norm": 9.011415481567383, "learning_rate": 9.835443904442468e-05, "loss": 1.1895, "step": 1953 }, { "epoch": 0.13239379361745376, "grad_norm": 11.031267166137695, "learning_rate": 9.835307002532686e-05, "loss": 1.1781, "step": 1954 }, { "epoch": 0.13246154888542583, "grad_norm": 9.960331916809082, "learning_rate": 9.835170100622905e-05, "loss": 1.0972, "step": 1955 }, { "epoch": 0.13252930415339792, "grad_norm": 9.549619674682617, "learning_rate": 9.835033198713123e-05, "loss": 1.0918, "step": 1956 }, { "epoch": 0.13259705942137, "grad_norm": 9.780478477478027, "learning_rate": 9.834896296803341e-05, "loss": 1.1491, "step": 1957 }, { "epoch": 0.1326648146893421, "grad_norm": 8.948554992675781, "learning_rate": 9.834759394893559e-05, "loss": 1.0969, "step": 1958 }, { "epoch": 0.1327325699573142, "grad_norm": 9.165532112121582, "learning_rate": 9.834622492983777e-05, "loss": 0.9294, "step": 1959 }, { "epoch": 0.13280032522528626, "grad_norm": 8.738619804382324, "learning_rate": 9.834485591073997e-05, "loss": 1.3074, "step": 1960 }, { "epoch": 0.13286808049325835, "grad_norm": 9.442314147949219, "learning_rate": 9.834348689164215e-05, "loss": 1.1238, "step": 1961 }, { "epoch": 0.13293583576123044, "grad_norm": 12.411934852600098, "learning_rate": 9.834211787254433e-05, "loss": 1.1405, "step": 1962 }, { "epoch": 0.13300359102920253, "grad_norm": 9.911120414733887, "learning_rate": 9.834074885344651e-05, "loss": 1.2197, "step": 1963 }, { "epoch": 0.1330713462971746, "grad_norm": 9.619095802307129, "learning_rate": 9.83393798343487e-05, "loss": 1.0526, "step": 1964 }, { "epoch": 0.1331391015651467, "grad_norm": 10.163374900817871, "learning_rate": 9.833801081525088e-05, "loss": 0.8374, "step": 1965 }, { "epoch": 0.13320685683311878, "grad_norm": 9.342517852783203, "learning_rate": 9.833664179615306e-05, "loss": 1.1908, "step": 1966 }, { "epoch": 0.13327461210109087, "grad_norm": 10.419418334960938, "learning_rate": 9.833527277705524e-05, "loss": 1.1175, "step": 1967 }, { "epoch": 0.13334236736906294, "grad_norm": 9.5196533203125, "learning_rate": 9.833390375795742e-05, "loss": 1.2347, "step": 1968 }, { "epoch": 0.13341012263703503, "grad_norm": 9.242755889892578, "learning_rate": 9.833253473885962e-05, "loss": 1.2401, "step": 1969 }, { "epoch": 0.13347787790500712, "grad_norm": 10.243762969970703, "learning_rate": 9.83311657197618e-05, "loss": 1.3492, "step": 1970 }, { "epoch": 0.1335456331729792, "grad_norm": 9.169745445251465, "learning_rate": 9.832979670066398e-05, "loss": 0.9533, "step": 1971 }, { "epoch": 0.13361338844095128, "grad_norm": 10.292695999145508, "learning_rate": 9.832842768156616e-05, "loss": 1.2216, "step": 1972 }, { "epoch": 0.13368114370892337, "grad_norm": 9.25019645690918, "learning_rate": 9.832705866246834e-05, "loss": 1.1613, "step": 1973 }, { "epoch": 0.13374889897689546, "grad_norm": 8.518020629882812, "learning_rate": 9.832568964337053e-05, "loss": 1.0767, "step": 1974 }, { "epoch": 0.13381665424486755, "grad_norm": 11.824376106262207, "learning_rate": 9.832432062427271e-05, "loss": 1.1884, "step": 1975 }, { "epoch": 0.13388440951283961, "grad_norm": 10.544015884399414, "learning_rate": 9.832295160517489e-05, "loss": 0.9675, "step": 1976 }, { "epoch": 0.1339521647808117, "grad_norm": 9.495721817016602, "learning_rate": 9.832158258607707e-05, "loss": 1.0586, "step": 1977 }, { "epoch": 0.1340199200487838, "grad_norm": 10.378434181213379, "learning_rate": 9.832021356697927e-05, "loss": 1.176, "step": 1978 }, { "epoch": 0.1340876753167559, "grad_norm": 10.026887893676758, "learning_rate": 9.831884454788145e-05, "loss": 1.035, "step": 1979 }, { "epoch": 0.13415543058472795, "grad_norm": 8.878249168395996, "learning_rate": 9.831747552878363e-05, "loss": 0.8441, "step": 1980 }, { "epoch": 0.13422318585270004, "grad_norm": 9.637616157531738, "learning_rate": 9.831610650968581e-05, "loss": 1.1435, "step": 1981 }, { "epoch": 0.13429094112067214, "grad_norm": 10.153711318969727, "learning_rate": 9.831473749058799e-05, "loss": 1.0833, "step": 1982 }, { "epoch": 0.13435869638864423, "grad_norm": 8.48596477508545, "learning_rate": 9.831336847149018e-05, "loss": 0.7752, "step": 1983 }, { "epoch": 0.1344264516566163, "grad_norm": 8.454994201660156, "learning_rate": 9.831199945239236e-05, "loss": 0.8657, "step": 1984 }, { "epoch": 0.13449420692458838, "grad_norm": 10.607659339904785, "learning_rate": 9.831063043329454e-05, "loss": 1.1836, "step": 1985 }, { "epoch": 0.13456196219256047, "grad_norm": 8.560189247131348, "learning_rate": 9.830926141419672e-05, "loss": 0.7877, "step": 1986 }, { "epoch": 0.13462971746053257, "grad_norm": 10.687662124633789, "learning_rate": 9.830789239509892e-05, "loss": 1.1586, "step": 1987 }, { "epoch": 0.13469747272850463, "grad_norm": 9.725050926208496, "learning_rate": 9.83065233760011e-05, "loss": 1.214, "step": 1988 }, { "epoch": 0.13476522799647672, "grad_norm": 9.808280944824219, "learning_rate": 9.830515435690328e-05, "loss": 1.0532, "step": 1989 }, { "epoch": 0.1348329832644488, "grad_norm": 8.838544845581055, "learning_rate": 9.830378533780547e-05, "loss": 1.0872, "step": 1990 }, { "epoch": 0.1349007385324209, "grad_norm": 12.043976783752441, "learning_rate": 9.830241631870765e-05, "loss": 1.2248, "step": 1991 }, { "epoch": 0.13496849380039297, "grad_norm": 10.044602394104004, "learning_rate": 9.830104729960983e-05, "loss": 0.9746, "step": 1992 }, { "epoch": 0.13503624906836506, "grad_norm": 10.861641883850098, "learning_rate": 9.829967828051203e-05, "loss": 0.9868, "step": 1993 }, { "epoch": 0.13510400433633715, "grad_norm": 9.005234718322754, "learning_rate": 9.82983092614142e-05, "loss": 1.0619, "step": 1994 }, { "epoch": 0.13517175960430924, "grad_norm": 9.902874946594238, "learning_rate": 9.829694024231639e-05, "loss": 1.3176, "step": 1995 }, { "epoch": 0.1352395148722813, "grad_norm": 12.133747100830078, "learning_rate": 9.829557122321857e-05, "loss": 1.3634, "step": 1996 }, { "epoch": 0.1353072701402534, "grad_norm": 9.571062088012695, "learning_rate": 9.829420220412076e-05, "loss": 1.139, "step": 1997 }, { "epoch": 0.1353750254082255, "grad_norm": 8.269545555114746, "learning_rate": 9.829283318502294e-05, "loss": 1.1422, "step": 1998 }, { "epoch": 0.13544278067619758, "grad_norm": 9.963309288024902, "learning_rate": 9.829146416592512e-05, "loss": 1.0612, "step": 1999 }, { "epoch": 0.13551053594416965, "grad_norm": 7.434775352478027, "learning_rate": 9.82900951468273e-05, "loss": 1.0418, "step": 2000 }, { "epoch": 0.13557829121214174, "grad_norm": 8.89494800567627, "learning_rate": 9.82887261277295e-05, "loss": 1.0677, "step": 2001 }, { "epoch": 0.13564604648011383, "grad_norm": 9.349754333496094, "learning_rate": 9.828735710863168e-05, "loss": 0.8932, "step": 2002 }, { "epoch": 0.13571380174808592, "grad_norm": 7.574460506439209, "learning_rate": 9.828598808953386e-05, "loss": 0.7652, "step": 2003 }, { "epoch": 0.13578155701605799, "grad_norm": 10.191296577453613, "learning_rate": 9.828461907043604e-05, "loss": 0.9881, "step": 2004 }, { "epoch": 0.13584931228403008, "grad_norm": 11.235671043395996, "learning_rate": 9.828325005133822e-05, "loss": 1.1942, "step": 2005 }, { "epoch": 0.13591706755200217, "grad_norm": 9.97700023651123, "learning_rate": 9.828188103224041e-05, "loss": 1.154, "step": 2006 }, { "epoch": 0.13598482281997426, "grad_norm": 8.283380508422852, "learning_rate": 9.828051201314259e-05, "loss": 0.8207, "step": 2007 }, { "epoch": 0.13605257808794632, "grad_norm": 9.147144317626953, "learning_rate": 9.827914299404477e-05, "loss": 1.0751, "step": 2008 }, { "epoch": 0.13612033335591842, "grad_norm": 9.99758529663086, "learning_rate": 9.827777397494695e-05, "loss": 1.1166, "step": 2009 }, { "epoch": 0.1361880886238905, "grad_norm": 8.340705871582031, "learning_rate": 9.827640495584915e-05, "loss": 1.0311, "step": 2010 }, { "epoch": 0.1362558438918626, "grad_norm": 10.536659240722656, "learning_rate": 9.827503593675133e-05, "loss": 1.1704, "step": 2011 }, { "epoch": 0.1363235991598347, "grad_norm": 9.01259994506836, "learning_rate": 9.827366691765351e-05, "loss": 1.0531, "step": 2012 }, { "epoch": 0.13639135442780675, "grad_norm": 10.476259231567383, "learning_rate": 9.827229789855569e-05, "loss": 1.053, "step": 2013 }, { "epoch": 0.13645910969577885, "grad_norm": 8.496943473815918, "learning_rate": 9.827092887945787e-05, "loss": 0.836, "step": 2014 }, { "epoch": 0.13652686496375094, "grad_norm": 11.638651847839355, "learning_rate": 9.826955986036006e-05, "loss": 1.0886, "step": 2015 }, { "epoch": 0.13659462023172303, "grad_norm": 7.483241558074951, "learning_rate": 9.826819084126224e-05, "loss": 1.0406, "step": 2016 }, { "epoch": 0.1366623754996951, "grad_norm": 8.648175239562988, "learning_rate": 9.826682182216442e-05, "loss": 1.0442, "step": 2017 }, { "epoch": 0.13673013076766719, "grad_norm": 8.593596458435059, "learning_rate": 9.82654528030666e-05, "loss": 1.1184, "step": 2018 }, { "epoch": 0.13679788603563928, "grad_norm": 8.563817977905273, "learning_rate": 9.826408378396878e-05, "loss": 0.8645, "step": 2019 }, { "epoch": 0.13686564130361137, "grad_norm": 7.812311172485352, "learning_rate": 9.826271476487098e-05, "loss": 1.0605, "step": 2020 }, { "epoch": 0.13693339657158343, "grad_norm": 10.97168254852295, "learning_rate": 9.826134574577316e-05, "loss": 1.1474, "step": 2021 }, { "epoch": 0.13700115183955552, "grad_norm": 7.749011993408203, "learning_rate": 9.825997672667534e-05, "loss": 0.9836, "step": 2022 }, { "epoch": 0.13706890710752762, "grad_norm": 11.088539123535156, "learning_rate": 9.825860770757752e-05, "loss": 1.1456, "step": 2023 }, { "epoch": 0.1371366623754997, "grad_norm": 10.960288047790527, "learning_rate": 9.825723868847971e-05, "loss": 1.4315, "step": 2024 }, { "epoch": 0.13720441764347177, "grad_norm": 10.804458618164062, "learning_rate": 9.825586966938189e-05, "loss": 1.0388, "step": 2025 }, { "epoch": 0.13727217291144386, "grad_norm": 7.906947612762451, "learning_rate": 9.825450065028407e-05, "loss": 1.1306, "step": 2026 }, { "epoch": 0.13733992817941595, "grad_norm": 10.404504776000977, "learning_rate": 9.825313163118625e-05, "loss": 1.0907, "step": 2027 }, { "epoch": 0.13740768344738805, "grad_norm": 9.684488296508789, "learning_rate": 9.825176261208843e-05, "loss": 1.1007, "step": 2028 }, { "epoch": 0.1374754387153601, "grad_norm": 8.345703125, "learning_rate": 9.825039359299063e-05, "loss": 0.9342, "step": 2029 }, { "epoch": 0.1375431939833322, "grad_norm": 8.513103485107422, "learning_rate": 9.824902457389281e-05, "loss": 1.1139, "step": 2030 }, { "epoch": 0.1376109492513043, "grad_norm": 8.675403594970703, "learning_rate": 9.824765555479499e-05, "loss": 0.8499, "step": 2031 }, { "epoch": 0.13767870451927638, "grad_norm": 10.550504684448242, "learning_rate": 9.824628653569717e-05, "loss": 1.2563, "step": 2032 }, { "epoch": 0.13774645978724845, "grad_norm": 7.775172710418701, "learning_rate": 9.824491751659936e-05, "loss": 1.0394, "step": 2033 }, { "epoch": 0.13781421505522054, "grad_norm": 10.057134628295898, "learning_rate": 9.824354849750154e-05, "loss": 1.337, "step": 2034 }, { "epoch": 0.13788197032319263, "grad_norm": 10.658480644226074, "learning_rate": 9.824217947840372e-05, "loss": 1.1675, "step": 2035 }, { "epoch": 0.13794972559116472, "grad_norm": 10.499738693237305, "learning_rate": 9.82408104593059e-05, "loss": 1.1397, "step": 2036 }, { "epoch": 0.1380174808591368, "grad_norm": 12.853378295898438, "learning_rate": 9.82394414402081e-05, "loss": 1.2373, "step": 2037 }, { "epoch": 0.13808523612710888, "grad_norm": 8.66174602508545, "learning_rate": 9.823807242111028e-05, "loss": 0.9863, "step": 2038 }, { "epoch": 0.13815299139508097, "grad_norm": 10.246259689331055, "learning_rate": 9.823670340201246e-05, "loss": 1.0892, "step": 2039 }, { "epoch": 0.13822074666305306, "grad_norm": 11.880420684814453, "learning_rate": 9.823533438291465e-05, "loss": 1.069, "step": 2040 }, { "epoch": 0.13828850193102513, "grad_norm": 9.475419998168945, "learning_rate": 9.823396536381683e-05, "loss": 0.9276, "step": 2041 }, { "epoch": 0.13835625719899722, "grad_norm": 9.827219009399414, "learning_rate": 9.823259634471903e-05, "loss": 1.2536, "step": 2042 }, { "epoch": 0.1384240124669693, "grad_norm": 8.558990478515625, "learning_rate": 9.82312273256212e-05, "loss": 1.2779, "step": 2043 }, { "epoch": 0.1384917677349414, "grad_norm": 12.038803100585938, "learning_rate": 9.822985830652339e-05, "loss": 0.9019, "step": 2044 }, { "epoch": 0.13855952300291347, "grad_norm": 10.764846801757812, "learning_rate": 9.822848928742557e-05, "loss": 1.1803, "step": 2045 }, { "epoch": 0.13862727827088556, "grad_norm": 10.788616180419922, "learning_rate": 9.822712026832775e-05, "loss": 1.3469, "step": 2046 }, { "epoch": 0.13869503353885765, "grad_norm": 8.718049049377441, "learning_rate": 9.822575124922994e-05, "loss": 0.9135, "step": 2047 }, { "epoch": 0.13876278880682974, "grad_norm": 12.245726585388184, "learning_rate": 9.822438223013212e-05, "loss": 1.3114, "step": 2048 }, { "epoch": 0.1388305440748018, "grad_norm": 10.55252456665039, "learning_rate": 9.82230132110343e-05, "loss": 1.1303, "step": 2049 }, { "epoch": 0.1388982993427739, "grad_norm": 8.177289962768555, "learning_rate": 9.822164419193648e-05, "loss": 0.9796, "step": 2050 }, { "epoch": 0.138966054610746, "grad_norm": 10.708680152893066, "learning_rate": 9.822027517283866e-05, "loss": 1.1546, "step": 2051 }, { "epoch": 0.13903380987871808, "grad_norm": 8.631631851196289, "learning_rate": 9.821890615374086e-05, "loss": 1.0055, "step": 2052 }, { "epoch": 0.13910156514669014, "grad_norm": 8.778770446777344, "learning_rate": 9.821753713464304e-05, "loss": 0.9626, "step": 2053 }, { "epoch": 0.13916932041466223, "grad_norm": 10.004143714904785, "learning_rate": 9.821616811554522e-05, "loss": 1.1411, "step": 2054 }, { "epoch": 0.13923707568263433, "grad_norm": 9.47324275970459, "learning_rate": 9.82147990964474e-05, "loss": 1.0081, "step": 2055 }, { "epoch": 0.13930483095060642, "grad_norm": 10.014934539794922, "learning_rate": 9.821343007734959e-05, "loss": 1.3015, "step": 2056 }, { "epoch": 0.13937258621857848, "grad_norm": 10.589959144592285, "learning_rate": 9.821206105825177e-05, "loss": 1.128, "step": 2057 }, { "epoch": 0.13944034148655057, "grad_norm": 9.273834228515625, "learning_rate": 9.821069203915395e-05, "loss": 0.7743, "step": 2058 }, { "epoch": 0.13950809675452266, "grad_norm": 10.72019100189209, "learning_rate": 9.820932302005613e-05, "loss": 0.9524, "step": 2059 }, { "epoch": 0.13957585202249476, "grad_norm": 11.212404251098633, "learning_rate": 9.820795400095831e-05, "loss": 1.2044, "step": 2060 }, { "epoch": 0.13964360729046682, "grad_norm": 9.800296783447266, "learning_rate": 9.82065849818605e-05, "loss": 1.0819, "step": 2061 }, { "epoch": 0.1397113625584389, "grad_norm": 8.668676376342773, "learning_rate": 9.820521596276269e-05, "loss": 1.0778, "step": 2062 }, { "epoch": 0.139779117826411, "grad_norm": 10.854613304138184, "learning_rate": 9.820384694366487e-05, "loss": 1.1181, "step": 2063 }, { "epoch": 0.1398468730943831, "grad_norm": 12.019871711730957, "learning_rate": 9.820247792456705e-05, "loss": 1.2507, "step": 2064 }, { "epoch": 0.1399146283623552, "grad_norm": 8.835234642028809, "learning_rate": 9.820110890546924e-05, "loss": 1.1338, "step": 2065 }, { "epoch": 0.13998238363032725, "grad_norm": 8.57636547088623, "learning_rate": 9.819973988637142e-05, "loss": 0.957, "step": 2066 }, { "epoch": 0.14005013889829934, "grad_norm": 9.080939292907715, "learning_rate": 9.81983708672736e-05, "loss": 1.0035, "step": 2067 }, { "epoch": 0.14011789416627143, "grad_norm": 9.616862297058105, "learning_rate": 9.819700184817578e-05, "loss": 0.9045, "step": 2068 }, { "epoch": 0.14018564943424353, "grad_norm": 10.04652214050293, "learning_rate": 9.819563282907796e-05, "loss": 1.1202, "step": 2069 }, { "epoch": 0.1402534047022156, "grad_norm": 9.132543563842773, "learning_rate": 9.819426380998016e-05, "loss": 0.9918, "step": 2070 }, { "epoch": 0.14032115997018768, "grad_norm": 12.023603439331055, "learning_rate": 9.819289479088234e-05, "loss": 1.1271, "step": 2071 }, { "epoch": 0.14038891523815977, "grad_norm": 9.167064666748047, "learning_rate": 9.819152577178452e-05, "loss": 0.9136, "step": 2072 }, { "epoch": 0.14045667050613186, "grad_norm": 11.954336166381836, "learning_rate": 9.81901567526867e-05, "loss": 1.0073, "step": 2073 }, { "epoch": 0.14052442577410393, "grad_norm": 9.166129112243652, "learning_rate": 9.818878773358888e-05, "loss": 1.1101, "step": 2074 }, { "epoch": 0.14059218104207602, "grad_norm": 8.53992748260498, "learning_rate": 9.818741871449107e-05, "loss": 0.9556, "step": 2075 }, { "epoch": 0.1406599363100481, "grad_norm": 10.769463539123535, "learning_rate": 9.818604969539325e-05, "loss": 1.3009, "step": 2076 }, { "epoch": 0.1407276915780202, "grad_norm": 10.938043594360352, "learning_rate": 9.818468067629543e-05, "loss": 1.4299, "step": 2077 }, { "epoch": 0.14079544684599227, "grad_norm": 10.8585844039917, "learning_rate": 9.818331165719761e-05, "loss": 1.0804, "step": 2078 }, { "epoch": 0.14086320211396436, "grad_norm": 8.763557434082031, "learning_rate": 9.81819426380998e-05, "loss": 0.8321, "step": 2079 }, { "epoch": 0.14093095738193645, "grad_norm": 11.917708396911621, "learning_rate": 9.818057361900199e-05, "loss": 1.4293, "step": 2080 }, { "epoch": 0.14099871264990854, "grad_norm": 10.189987182617188, "learning_rate": 9.817920459990417e-05, "loss": 1.139, "step": 2081 }, { "epoch": 0.1410664679178806, "grad_norm": 11.09156608581543, "learning_rate": 9.817783558080635e-05, "loss": 0.9866, "step": 2082 }, { "epoch": 0.1411342231858527, "grad_norm": 9.394566535949707, "learning_rate": 9.817646656170854e-05, "loss": 0.9401, "step": 2083 }, { "epoch": 0.1412019784538248, "grad_norm": 8.232394218444824, "learning_rate": 9.817509754261072e-05, "loss": 0.9193, "step": 2084 }, { "epoch": 0.14126973372179688, "grad_norm": 9.4952392578125, "learning_rate": 9.81737285235129e-05, "loss": 1.3217, "step": 2085 }, { "epoch": 0.14133748898976894, "grad_norm": 10.110014915466309, "learning_rate": 9.81723595044151e-05, "loss": 1.0413, "step": 2086 }, { "epoch": 0.14140524425774104, "grad_norm": 10.850225448608398, "learning_rate": 9.817099048531728e-05, "loss": 1.3722, "step": 2087 }, { "epoch": 0.14147299952571313, "grad_norm": 10.924457550048828, "learning_rate": 9.816962146621946e-05, "loss": 1.4004, "step": 2088 }, { "epoch": 0.14154075479368522, "grad_norm": 10.022381782531738, "learning_rate": 9.816825244712165e-05, "loss": 1.1989, "step": 2089 }, { "epoch": 0.14160851006165728, "grad_norm": 11.537693977355957, "learning_rate": 9.816688342802383e-05, "loss": 1.1922, "step": 2090 }, { "epoch": 0.14167626532962938, "grad_norm": 8.525372505187988, "learning_rate": 9.816551440892601e-05, "loss": 0.9291, "step": 2091 }, { "epoch": 0.14174402059760147, "grad_norm": 8.972722053527832, "learning_rate": 9.816414538982819e-05, "loss": 1.0917, "step": 2092 }, { "epoch": 0.14181177586557356, "grad_norm": 8.386235237121582, "learning_rate": 9.816277637073039e-05, "loss": 1.0482, "step": 2093 }, { "epoch": 0.14187953113354562, "grad_norm": 9.620473861694336, "learning_rate": 9.816140735163257e-05, "loss": 1.2523, "step": 2094 }, { "epoch": 0.1419472864015177, "grad_norm": 9.236804962158203, "learning_rate": 9.816003833253475e-05, "loss": 0.9474, "step": 2095 }, { "epoch": 0.1420150416694898, "grad_norm": 9.06925106048584, "learning_rate": 9.815866931343693e-05, "loss": 0.8921, "step": 2096 }, { "epoch": 0.1420827969374619, "grad_norm": 10.127729415893555, "learning_rate": 9.815730029433912e-05, "loss": 1.1693, "step": 2097 }, { "epoch": 0.14215055220543396, "grad_norm": 10.703007698059082, "learning_rate": 9.81559312752413e-05, "loss": 1.0652, "step": 2098 }, { "epoch": 0.14221830747340605, "grad_norm": 10.004093170166016, "learning_rate": 9.815456225614348e-05, "loss": 1.0158, "step": 2099 }, { "epoch": 0.14228606274137814, "grad_norm": 8.900236129760742, "learning_rate": 9.815319323704566e-05, "loss": 0.7327, "step": 2100 }, { "epoch": 0.14235381800935024, "grad_norm": 13.102290153503418, "learning_rate": 9.815182421794784e-05, "loss": 1.2837, "step": 2101 }, { "epoch": 0.1424215732773223, "grad_norm": 9.32657527923584, "learning_rate": 9.815045519885004e-05, "loss": 1.0812, "step": 2102 }, { "epoch": 0.1424893285452944, "grad_norm": 11.42785930633545, "learning_rate": 9.814908617975222e-05, "loss": 1.4467, "step": 2103 }, { "epoch": 0.14255708381326648, "grad_norm": 9.878450393676758, "learning_rate": 9.81477171606544e-05, "loss": 1.0774, "step": 2104 }, { "epoch": 0.14262483908123857, "grad_norm": 9.329227447509766, "learning_rate": 9.814634814155658e-05, "loss": 1.2536, "step": 2105 }, { "epoch": 0.14269259434921064, "grad_norm": 9.071858406066895, "learning_rate": 9.814497912245876e-05, "loss": 1.148, "step": 2106 }, { "epoch": 0.14276034961718273, "grad_norm": 11.595810890197754, "learning_rate": 9.814361010336095e-05, "loss": 1.1992, "step": 2107 }, { "epoch": 0.14282810488515482, "grad_norm": 10.041107177734375, "learning_rate": 9.814224108426313e-05, "loss": 1.0447, "step": 2108 }, { "epoch": 0.1428958601531269, "grad_norm": 9.913261413574219, "learning_rate": 9.814087206516531e-05, "loss": 1.1629, "step": 2109 }, { "epoch": 0.14296361542109898, "grad_norm": 10.983177185058594, "learning_rate": 9.813950304606749e-05, "loss": 1.0288, "step": 2110 }, { "epoch": 0.14303137068907107, "grad_norm": 10.479610443115234, "learning_rate": 9.813813402696969e-05, "loss": 1.5308, "step": 2111 }, { "epoch": 0.14309912595704316, "grad_norm": 8.70734691619873, "learning_rate": 9.813676500787187e-05, "loss": 0.9987, "step": 2112 }, { "epoch": 0.14316688122501525, "grad_norm": 8.242984771728516, "learning_rate": 9.813539598877405e-05, "loss": 1.0051, "step": 2113 }, { "epoch": 0.14323463649298732, "grad_norm": 8.627467155456543, "learning_rate": 9.813402696967623e-05, "loss": 1.1134, "step": 2114 }, { "epoch": 0.1433023917609594, "grad_norm": 9.970986366271973, "learning_rate": 9.813265795057841e-05, "loss": 1.148, "step": 2115 }, { "epoch": 0.1433701470289315, "grad_norm": 8.719794273376465, "learning_rate": 9.81312889314806e-05, "loss": 1.1013, "step": 2116 }, { "epoch": 0.1434379022969036, "grad_norm": 9.455860137939453, "learning_rate": 9.812991991238278e-05, "loss": 1.2333, "step": 2117 }, { "epoch": 0.14350565756487568, "grad_norm": 8.851629257202148, "learning_rate": 9.812855089328496e-05, "loss": 1.0985, "step": 2118 }, { "epoch": 0.14357341283284775, "grad_norm": 11.843599319458008, "learning_rate": 9.812718187418714e-05, "loss": 1.2292, "step": 2119 }, { "epoch": 0.14364116810081984, "grad_norm": 9.550506591796875, "learning_rate": 9.812581285508934e-05, "loss": 0.9466, "step": 2120 }, { "epoch": 0.14370892336879193, "grad_norm": 9.234643936157227, "learning_rate": 9.812444383599152e-05, "loss": 1.1968, "step": 2121 }, { "epoch": 0.14377667863676402, "grad_norm": 9.365439414978027, "learning_rate": 9.81230748168937e-05, "loss": 1.1643, "step": 2122 }, { "epoch": 0.14384443390473609, "grad_norm": 9.476024627685547, "learning_rate": 9.812170579779588e-05, "loss": 1.0541, "step": 2123 }, { "epoch": 0.14391218917270818, "grad_norm": 9.53847885131836, "learning_rate": 9.812033677869806e-05, "loss": 1.0808, "step": 2124 }, { "epoch": 0.14397994444068027, "grad_norm": 8.918699264526367, "learning_rate": 9.811896775960025e-05, "loss": 1.1055, "step": 2125 }, { "epoch": 0.14404769970865236, "grad_norm": 12.126187324523926, "learning_rate": 9.811759874050243e-05, "loss": 1.6079, "step": 2126 }, { "epoch": 0.14411545497662442, "grad_norm": 10.922599792480469, "learning_rate": 9.811622972140461e-05, "loss": 1.2569, "step": 2127 }, { "epoch": 0.14418321024459652, "grad_norm": 8.582880020141602, "learning_rate": 9.811486070230679e-05, "loss": 0.9562, "step": 2128 }, { "epoch": 0.1442509655125686, "grad_norm": 8.926568031311035, "learning_rate": 9.811349168320899e-05, "loss": 1.0108, "step": 2129 }, { "epoch": 0.1443187207805407, "grad_norm": 13.906332015991211, "learning_rate": 9.811212266411117e-05, "loss": 1.1851, "step": 2130 }, { "epoch": 0.14438647604851276, "grad_norm": 10.374212265014648, "learning_rate": 9.811075364501335e-05, "loss": 1.2661, "step": 2131 }, { "epoch": 0.14445423131648485, "grad_norm": 10.580810546875, "learning_rate": 9.810938462591554e-05, "loss": 0.8967, "step": 2132 }, { "epoch": 0.14452198658445695, "grad_norm": 8.91865062713623, "learning_rate": 9.810801560681772e-05, "loss": 1.032, "step": 2133 }, { "epoch": 0.14458974185242904, "grad_norm": 8.16041374206543, "learning_rate": 9.81066465877199e-05, "loss": 0.8622, "step": 2134 }, { "epoch": 0.1446574971204011, "grad_norm": 8.575905799865723, "learning_rate": 9.81052775686221e-05, "loss": 0.9956, "step": 2135 }, { "epoch": 0.1447252523883732, "grad_norm": 9.620558738708496, "learning_rate": 9.810390854952428e-05, "loss": 1.1442, "step": 2136 }, { "epoch": 0.14479300765634529, "grad_norm": 10.390005111694336, "learning_rate": 9.810253953042646e-05, "loss": 1.0518, "step": 2137 }, { "epoch": 0.14486076292431738, "grad_norm": 10.714217185974121, "learning_rate": 9.810117051132864e-05, "loss": 1.3215, "step": 2138 }, { "epoch": 0.14492851819228944, "grad_norm": 9.390836715698242, "learning_rate": 9.809980149223083e-05, "loss": 1.0936, "step": 2139 }, { "epoch": 0.14499627346026153, "grad_norm": 10.848445892333984, "learning_rate": 9.809843247313301e-05, "loss": 1.1946, "step": 2140 }, { "epoch": 0.14506402872823362, "grad_norm": 11.041672706604004, "learning_rate": 9.809706345403519e-05, "loss": 1.0422, "step": 2141 }, { "epoch": 0.14513178399620572, "grad_norm": 10.1151762008667, "learning_rate": 9.809569443493737e-05, "loss": 1.2199, "step": 2142 }, { "epoch": 0.14519953926417778, "grad_norm": 8.686629295349121, "learning_rate": 9.809432541583957e-05, "loss": 0.9861, "step": 2143 }, { "epoch": 0.14526729453214987, "grad_norm": 10.579313278198242, "learning_rate": 9.809295639674175e-05, "loss": 0.9372, "step": 2144 }, { "epoch": 0.14533504980012196, "grad_norm": 8.916631698608398, "learning_rate": 9.809158737764393e-05, "loss": 0.8024, "step": 2145 }, { "epoch": 0.14540280506809405, "grad_norm": 9.29333209991455, "learning_rate": 9.80902183585461e-05, "loss": 1.0785, "step": 2146 }, { "epoch": 0.14547056033606612, "grad_norm": 10.584277153015137, "learning_rate": 9.808884933944829e-05, "loss": 0.9167, "step": 2147 }, { "epoch": 0.1455383156040382, "grad_norm": 10.68551254272461, "learning_rate": 9.808748032035048e-05, "loss": 1.0019, "step": 2148 }, { "epoch": 0.1456060708720103, "grad_norm": 9.057500839233398, "learning_rate": 9.808611130125266e-05, "loss": 1.194, "step": 2149 }, { "epoch": 0.1456738261399824, "grad_norm": 8.642207145690918, "learning_rate": 9.808474228215484e-05, "loss": 1.0849, "step": 2150 }, { "epoch": 0.14574158140795446, "grad_norm": 9.460419654846191, "learning_rate": 9.808337326305702e-05, "loss": 1.1747, "step": 2151 }, { "epoch": 0.14580933667592655, "grad_norm": 9.003097534179688, "learning_rate": 9.80820042439592e-05, "loss": 0.8967, "step": 2152 }, { "epoch": 0.14587709194389864, "grad_norm": 9.85009765625, "learning_rate": 9.80806352248614e-05, "loss": 1.1698, "step": 2153 }, { "epoch": 0.14594484721187073, "grad_norm": 10.233894348144531, "learning_rate": 9.807926620576358e-05, "loss": 1.0496, "step": 2154 }, { "epoch": 0.1460126024798428, "grad_norm": 9.660355567932129, "learning_rate": 9.807789718666576e-05, "loss": 0.9616, "step": 2155 }, { "epoch": 0.1460803577478149, "grad_norm": 9.46469497680664, "learning_rate": 9.807652816756794e-05, "loss": 1.0005, "step": 2156 }, { "epoch": 0.14614811301578698, "grad_norm": 8.004712104797363, "learning_rate": 9.807515914847013e-05, "loss": 0.8455, "step": 2157 }, { "epoch": 0.14621586828375907, "grad_norm": 10.039002418518066, "learning_rate": 9.807379012937231e-05, "loss": 1.097, "step": 2158 }, { "epoch": 0.14628362355173113, "grad_norm": 11.16292667388916, "learning_rate": 9.807242111027449e-05, "loss": 1.4332, "step": 2159 }, { "epoch": 0.14635137881970323, "grad_norm": 9.34833812713623, "learning_rate": 9.807105209117667e-05, "loss": 1.0438, "step": 2160 }, { "epoch": 0.14641913408767532, "grad_norm": 10.256768226623535, "learning_rate": 9.806968307207885e-05, "loss": 1.2923, "step": 2161 }, { "epoch": 0.1464868893556474, "grad_norm": 8.890941619873047, "learning_rate": 9.806831405298105e-05, "loss": 0.8419, "step": 2162 }, { "epoch": 0.14655464462361947, "grad_norm": 9.340752601623535, "learning_rate": 9.806694503388323e-05, "loss": 0.9504, "step": 2163 }, { "epoch": 0.14662239989159156, "grad_norm": 10.89192008972168, "learning_rate": 9.80655760147854e-05, "loss": 1.012, "step": 2164 }, { "epoch": 0.14669015515956366, "grad_norm": 8.626432418823242, "learning_rate": 9.806420699568759e-05, "loss": 0.8943, "step": 2165 }, { "epoch": 0.14675791042753575, "grad_norm": 9.465259552001953, "learning_rate": 9.806283797658978e-05, "loss": 1.1617, "step": 2166 }, { "epoch": 0.1468256656955078, "grad_norm": 11.695369720458984, "learning_rate": 9.806146895749196e-05, "loss": 1.2719, "step": 2167 }, { "epoch": 0.1468934209634799, "grad_norm": 9.416814804077148, "learning_rate": 9.806009993839414e-05, "loss": 1.3615, "step": 2168 }, { "epoch": 0.146961176231452, "grad_norm": 9.6004638671875, "learning_rate": 9.805873091929632e-05, "loss": 1.2571, "step": 2169 }, { "epoch": 0.1470289314994241, "grad_norm": 9.187546730041504, "learning_rate": 9.80573619001985e-05, "loss": 0.962, "step": 2170 }, { "epoch": 0.14709668676739618, "grad_norm": 10.691286087036133, "learning_rate": 9.80559928811007e-05, "loss": 1.0619, "step": 2171 }, { "epoch": 0.14716444203536824, "grad_norm": 9.17457103729248, "learning_rate": 9.805462386200288e-05, "loss": 1.1507, "step": 2172 }, { "epoch": 0.14723219730334033, "grad_norm": 10.739082336425781, "learning_rate": 9.805325484290506e-05, "loss": 1.2929, "step": 2173 }, { "epoch": 0.14729995257131243, "grad_norm": 9.091232299804688, "learning_rate": 9.805188582380724e-05, "loss": 0.9422, "step": 2174 }, { "epoch": 0.14736770783928452, "grad_norm": 8.231295585632324, "learning_rate": 9.805051680470943e-05, "loss": 0.9695, "step": 2175 }, { "epoch": 0.14743546310725658, "grad_norm": 9.622401237487793, "learning_rate": 9.804914778561161e-05, "loss": 0.982, "step": 2176 }, { "epoch": 0.14750321837522867, "grad_norm": 9.49541187286377, "learning_rate": 9.804777876651379e-05, "loss": 0.9005, "step": 2177 }, { "epoch": 0.14757097364320076, "grad_norm": 10.584654808044434, "learning_rate": 9.804640974741599e-05, "loss": 0.9427, "step": 2178 }, { "epoch": 0.14763872891117286, "grad_norm": 9.132317543029785, "learning_rate": 9.804504072831817e-05, "loss": 1.0192, "step": 2179 }, { "epoch": 0.14770648417914492, "grad_norm": 8.598082542419434, "learning_rate": 9.804367170922035e-05, "loss": 0.9842, "step": 2180 }, { "epoch": 0.147774239447117, "grad_norm": 8.941360473632812, "learning_rate": 9.804230269012254e-05, "loss": 0.9907, "step": 2181 }, { "epoch": 0.1478419947150891, "grad_norm": 8.119913101196289, "learning_rate": 9.804093367102472e-05, "loss": 1.1259, "step": 2182 }, { "epoch": 0.1479097499830612, "grad_norm": 9.505135536193848, "learning_rate": 9.80395646519269e-05, "loss": 1.0509, "step": 2183 }, { "epoch": 0.14797750525103326, "grad_norm": 8.420906066894531, "learning_rate": 9.803819563282908e-05, "loss": 1.1827, "step": 2184 }, { "epoch": 0.14804526051900535, "grad_norm": 9.860353469848633, "learning_rate": 9.803682661373128e-05, "loss": 1.0286, "step": 2185 }, { "epoch": 0.14811301578697744, "grad_norm": 8.259954452514648, "learning_rate": 9.803545759463346e-05, "loss": 1.0094, "step": 2186 }, { "epoch": 0.14818077105494953, "grad_norm": 10.46882438659668, "learning_rate": 9.803408857553564e-05, "loss": 1.1817, "step": 2187 }, { "epoch": 0.1482485263229216, "grad_norm": 9.315580368041992, "learning_rate": 9.803271955643782e-05, "loss": 1.2816, "step": 2188 }, { "epoch": 0.1483162815908937, "grad_norm": 10.408548355102539, "learning_rate": 9.803135053734001e-05, "loss": 1.0355, "step": 2189 }, { "epoch": 0.14838403685886578, "grad_norm": 9.682003021240234, "learning_rate": 9.802998151824219e-05, "loss": 1.0413, "step": 2190 }, { "epoch": 0.14845179212683787, "grad_norm": 10.041797637939453, "learning_rate": 9.802861249914437e-05, "loss": 0.8388, "step": 2191 }, { "epoch": 0.14851954739480994, "grad_norm": 8.367657661437988, "learning_rate": 9.802724348004655e-05, "loss": 1.0724, "step": 2192 }, { "epoch": 0.14858730266278203, "grad_norm": 9.9558744430542, "learning_rate": 9.802587446094873e-05, "loss": 1.3534, "step": 2193 }, { "epoch": 0.14865505793075412, "grad_norm": 9.244332313537598, "learning_rate": 9.802450544185093e-05, "loss": 1.366, "step": 2194 }, { "epoch": 0.1487228131987262, "grad_norm": 9.560718536376953, "learning_rate": 9.80231364227531e-05, "loss": 1.0556, "step": 2195 }, { "epoch": 0.14879056846669828, "grad_norm": 8.724915504455566, "learning_rate": 9.802176740365529e-05, "loss": 1.316, "step": 2196 }, { "epoch": 0.14885832373467037, "grad_norm": 9.468677520751953, "learning_rate": 9.802039838455747e-05, "loss": 1.1289, "step": 2197 }, { "epoch": 0.14892607900264246, "grad_norm": 8.56347942352295, "learning_rate": 9.801902936545966e-05, "loss": 1.0402, "step": 2198 }, { "epoch": 0.14899383427061455, "grad_norm": 9.328559875488281, "learning_rate": 9.801766034636184e-05, "loss": 1.186, "step": 2199 }, { "epoch": 0.14906158953858661, "grad_norm": 10.20579719543457, "learning_rate": 9.801629132726402e-05, "loss": 1.0896, "step": 2200 }, { "epoch": 0.1491293448065587, "grad_norm": 11.614208221435547, "learning_rate": 9.80149223081662e-05, "loss": 1.2742, "step": 2201 }, { "epoch": 0.1491971000745308, "grad_norm": 8.291358947753906, "learning_rate": 9.801355328906838e-05, "loss": 1.0299, "step": 2202 }, { "epoch": 0.1492648553425029, "grad_norm": 8.734344482421875, "learning_rate": 9.801218426997058e-05, "loss": 1.0922, "step": 2203 }, { "epoch": 0.14933261061047495, "grad_norm": 8.498616218566895, "learning_rate": 9.801081525087276e-05, "loss": 0.8436, "step": 2204 }, { "epoch": 0.14940036587844704, "grad_norm": 7.4501447677612305, "learning_rate": 9.800944623177494e-05, "loss": 0.8629, "step": 2205 }, { "epoch": 0.14946812114641914, "grad_norm": 11.652278900146484, "learning_rate": 9.800807721267712e-05, "loss": 1.1089, "step": 2206 }, { "epoch": 0.14953587641439123, "grad_norm": 11.043471336364746, "learning_rate": 9.80067081935793e-05, "loss": 1.2758, "step": 2207 }, { "epoch": 0.1496036316823633, "grad_norm": 9.376245498657227, "learning_rate": 9.800533917448149e-05, "loss": 1.0178, "step": 2208 }, { "epoch": 0.14967138695033538, "grad_norm": 8.678750038146973, "learning_rate": 9.800397015538367e-05, "loss": 1.0276, "step": 2209 }, { "epoch": 0.14973914221830747, "grad_norm": 10.233052253723145, "learning_rate": 9.800260113628585e-05, "loss": 1.0468, "step": 2210 }, { "epoch": 0.14980689748627957, "grad_norm": 10.711477279663086, "learning_rate": 9.800123211718803e-05, "loss": 1.3919, "step": 2211 }, { "epoch": 0.14987465275425163, "grad_norm": 10.130411148071289, "learning_rate": 9.799986309809023e-05, "loss": 1.1085, "step": 2212 }, { "epoch": 0.14994240802222372, "grad_norm": 9.64091682434082, "learning_rate": 9.79984940789924e-05, "loss": 1.0661, "step": 2213 }, { "epoch": 0.1500101632901958, "grad_norm": 9.80176830291748, "learning_rate": 9.799712505989459e-05, "loss": 1.3426, "step": 2214 }, { "epoch": 0.1500779185581679, "grad_norm": 9.34835147857666, "learning_rate": 9.799575604079677e-05, "loss": 1.0281, "step": 2215 }, { "epoch": 0.15014567382613997, "grad_norm": 8.638712882995605, "learning_rate": 9.799438702169895e-05, "loss": 0.9037, "step": 2216 }, { "epoch": 0.15021342909411206, "grad_norm": 10.499733924865723, "learning_rate": 9.799301800260114e-05, "loss": 1.1152, "step": 2217 }, { "epoch": 0.15028118436208415, "grad_norm": 9.093478202819824, "learning_rate": 9.799164898350332e-05, "loss": 1.2026, "step": 2218 }, { "epoch": 0.15034893963005624, "grad_norm": 13.181863784790039, "learning_rate": 9.79902799644055e-05, "loss": 1.3256, "step": 2219 }, { "epoch": 0.1504166948980283, "grad_norm": 10.221563339233398, "learning_rate": 9.798891094530768e-05, "loss": 1.1867, "step": 2220 }, { "epoch": 0.1504844501660004, "grad_norm": 9.512944221496582, "learning_rate": 9.798754192620988e-05, "loss": 1.1145, "step": 2221 }, { "epoch": 0.1505522054339725, "grad_norm": 8.991315841674805, "learning_rate": 9.798617290711206e-05, "loss": 1.0289, "step": 2222 }, { "epoch": 0.15061996070194458, "grad_norm": 9.003118515014648, "learning_rate": 9.798480388801424e-05, "loss": 0.9618, "step": 2223 }, { "epoch": 0.15068771596991667, "grad_norm": 11.337166786193848, "learning_rate": 9.798343486891643e-05, "loss": 1.3864, "step": 2224 }, { "epoch": 0.15075547123788874, "grad_norm": 9.193288803100586, "learning_rate": 9.798206584981861e-05, "loss": 1.1314, "step": 2225 }, { "epoch": 0.15082322650586083, "grad_norm": 10.137048721313477, "learning_rate": 9.798069683072079e-05, "loss": 1.3325, "step": 2226 }, { "epoch": 0.15089098177383292, "grad_norm": 8.248672485351562, "learning_rate": 9.797932781162299e-05, "loss": 1.0688, "step": 2227 }, { "epoch": 0.150958737041805, "grad_norm": 10.51007080078125, "learning_rate": 9.797795879252517e-05, "loss": 1.2191, "step": 2228 }, { "epoch": 0.15102649230977708, "grad_norm": 14.15853214263916, "learning_rate": 9.797658977342735e-05, "loss": 1.0961, "step": 2229 }, { "epoch": 0.15109424757774917, "grad_norm": 8.42485523223877, "learning_rate": 9.797522075432954e-05, "loss": 1.0314, "step": 2230 }, { "epoch": 0.15116200284572126, "grad_norm": 9.3310546875, "learning_rate": 9.797385173523172e-05, "loss": 1.2372, "step": 2231 }, { "epoch": 0.15122975811369335, "grad_norm": 9.323569297790527, "learning_rate": 9.79724827161339e-05, "loss": 1.1823, "step": 2232 }, { "epoch": 0.15129751338166542, "grad_norm": 9.719592094421387, "learning_rate": 9.797111369703608e-05, "loss": 1.0689, "step": 2233 }, { "epoch": 0.1513652686496375, "grad_norm": 9.37340259552002, "learning_rate": 9.796974467793826e-05, "loss": 1.0703, "step": 2234 }, { "epoch": 0.1514330239176096, "grad_norm": 9.803778648376465, "learning_rate": 9.796837565884045e-05, "loss": 1.0422, "step": 2235 }, { "epoch": 0.1515007791855817, "grad_norm": 8.317301750183105, "learning_rate": 9.796700663974264e-05, "loss": 0.9829, "step": 2236 }, { "epoch": 0.15156853445355375, "grad_norm": 8.473258972167969, "learning_rate": 9.796563762064482e-05, "loss": 1.064, "step": 2237 }, { "epoch": 0.15163628972152585, "grad_norm": 8.495006561279297, "learning_rate": 9.7964268601547e-05, "loss": 0.8646, "step": 2238 }, { "epoch": 0.15170404498949794, "grad_norm": 9.140898704528809, "learning_rate": 9.796289958244918e-05, "loss": 1.068, "step": 2239 }, { "epoch": 0.15177180025747003, "grad_norm": 10.264453887939453, "learning_rate": 9.796153056335137e-05, "loss": 0.9681, "step": 2240 }, { "epoch": 0.1518395555254421, "grad_norm": 10.407776832580566, "learning_rate": 9.796016154425355e-05, "loss": 1.2538, "step": 2241 }, { "epoch": 0.15190731079341419, "grad_norm": 10.143677711486816, "learning_rate": 9.795879252515573e-05, "loss": 1.138, "step": 2242 }, { "epoch": 0.15197506606138628, "grad_norm": 12.528799057006836, "learning_rate": 9.795742350605791e-05, "loss": 1.2654, "step": 2243 }, { "epoch": 0.15204282132935837, "grad_norm": 10.635498046875, "learning_rate": 9.79560544869601e-05, "loss": 1.2116, "step": 2244 }, { "epoch": 0.15211057659733043, "grad_norm": 10.71164608001709, "learning_rate": 9.795468546786229e-05, "loss": 1.0262, "step": 2245 }, { "epoch": 0.15217833186530252, "grad_norm": 9.121109962463379, "learning_rate": 9.795331644876447e-05, "loss": 1.1657, "step": 2246 }, { "epoch": 0.15224608713327462, "grad_norm": 8.174636840820312, "learning_rate": 9.795194742966665e-05, "loss": 1.0027, "step": 2247 }, { "epoch": 0.1523138424012467, "grad_norm": 8.763494491577148, "learning_rate": 9.795057841056883e-05, "loss": 1.1103, "step": 2248 }, { "epoch": 0.15238159766921877, "grad_norm": 8.028278350830078, "learning_rate": 9.794920939147102e-05, "loss": 1.0205, "step": 2249 }, { "epoch": 0.15244935293719086, "grad_norm": 8.96112060546875, "learning_rate": 9.79478403723732e-05, "loss": 1.2949, "step": 2250 }, { "epoch": 0.15251710820516295, "grad_norm": 8.67423152923584, "learning_rate": 9.794647135327538e-05, "loss": 1.0602, "step": 2251 }, { "epoch": 0.15258486347313505, "grad_norm": 9.45671272277832, "learning_rate": 9.794510233417756e-05, "loss": 1.0362, "step": 2252 }, { "epoch": 0.1526526187411071, "grad_norm": 10.24669075012207, "learning_rate": 9.794373331507976e-05, "loss": 0.9918, "step": 2253 }, { "epoch": 0.1527203740090792, "grad_norm": 9.014079093933105, "learning_rate": 9.794236429598194e-05, "loss": 1.1636, "step": 2254 }, { "epoch": 0.1527881292770513, "grad_norm": 6.800943851470947, "learning_rate": 9.794099527688412e-05, "loss": 0.9089, "step": 2255 }, { "epoch": 0.15285588454502338, "grad_norm": 9.90794849395752, "learning_rate": 9.79396262577863e-05, "loss": 0.7669, "step": 2256 }, { "epoch": 0.15292363981299545, "grad_norm": 9.870927810668945, "learning_rate": 9.793825723868848e-05, "loss": 1.1243, "step": 2257 }, { "epoch": 0.15299139508096754, "grad_norm": 9.707404136657715, "learning_rate": 9.793688821959067e-05, "loss": 1.1558, "step": 2258 }, { "epoch": 0.15305915034893963, "grad_norm": 8.362896919250488, "learning_rate": 9.793551920049285e-05, "loss": 0.8601, "step": 2259 }, { "epoch": 0.15312690561691172, "grad_norm": 9.536920547485352, "learning_rate": 9.793415018139503e-05, "loss": 1.2741, "step": 2260 }, { "epoch": 0.1531946608848838, "grad_norm": 11.108535766601562, "learning_rate": 9.793278116229721e-05, "loss": 1.3118, "step": 2261 }, { "epoch": 0.15326241615285588, "grad_norm": 7.281479358673096, "learning_rate": 9.793141214319939e-05, "loss": 0.9165, "step": 2262 }, { "epoch": 0.15333017142082797, "grad_norm": 9.166728973388672, "learning_rate": 9.793004312410159e-05, "loss": 1.0845, "step": 2263 }, { "epoch": 0.15339792668880006, "grad_norm": 11.539854049682617, "learning_rate": 9.792867410500377e-05, "loss": 1.1129, "step": 2264 }, { "epoch": 0.15346568195677213, "grad_norm": 8.588869094848633, "learning_rate": 9.792730508590595e-05, "loss": 0.9698, "step": 2265 }, { "epoch": 0.15353343722474422, "grad_norm": 8.270078659057617, "learning_rate": 9.792593606680813e-05, "loss": 1.2306, "step": 2266 }, { "epoch": 0.1536011924927163, "grad_norm": 7.908688545227051, "learning_rate": 9.792456704771032e-05, "loss": 1.283, "step": 2267 }, { "epoch": 0.1536689477606884, "grad_norm": 10.376410484313965, "learning_rate": 9.79231980286125e-05, "loss": 1.0246, "step": 2268 }, { "epoch": 0.15373670302866047, "grad_norm": 9.517715454101562, "learning_rate": 9.792182900951468e-05, "loss": 0.9975, "step": 2269 }, { "epoch": 0.15380445829663256, "grad_norm": 8.789438247680664, "learning_rate": 9.792045999041688e-05, "loss": 1.0886, "step": 2270 }, { "epoch": 0.15387221356460465, "grad_norm": 9.649114608764648, "learning_rate": 9.791909097131906e-05, "loss": 1.1497, "step": 2271 }, { "epoch": 0.15393996883257674, "grad_norm": 8.533876419067383, "learning_rate": 9.791772195222124e-05, "loss": 0.8701, "step": 2272 }, { "epoch": 0.1540077241005488, "grad_norm": 10.64561653137207, "learning_rate": 9.791635293312343e-05, "loss": 1.2755, "step": 2273 }, { "epoch": 0.1540754793685209, "grad_norm": 9.510658264160156, "learning_rate": 9.791498391402561e-05, "loss": 0.8772, "step": 2274 }, { "epoch": 0.154143234636493, "grad_norm": 9.849981307983398, "learning_rate": 9.791361489492779e-05, "loss": 1.1689, "step": 2275 }, { "epoch": 0.15421098990446508, "grad_norm": 8.152152061462402, "learning_rate": 9.791224587582998e-05, "loss": 0.8148, "step": 2276 }, { "epoch": 0.15427874517243717, "grad_norm": 8.653456687927246, "learning_rate": 9.791087685673216e-05, "loss": 1.0061, "step": 2277 }, { "epoch": 0.15434650044040923, "grad_norm": 6.928426742553711, "learning_rate": 9.790950783763435e-05, "loss": 0.9192, "step": 2278 }, { "epoch": 0.15441425570838133, "grad_norm": 9.606708526611328, "learning_rate": 9.790813881853653e-05, "loss": 0.9956, "step": 2279 }, { "epoch": 0.15448201097635342, "grad_norm": 9.42773151397705, "learning_rate": 9.79067697994387e-05, "loss": 0.9302, "step": 2280 }, { "epoch": 0.1545497662443255, "grad_norm": 8.248319625854492, "learning_rate": 9.79054007803409e-05, "loss": 1.2318, "step": 2281 }, { "epoch": 0.15461752151229757, "grad_norm": 9.706393241882324, "learning_rate": 9.790403176124308e-05, "loss": 1.1489, "step": 2282 }, { "epoch": 0.15468527678026966, "grad_norm": 8.88716983795166, "learning_rate": 9.790266274214526e-05, "loss": 0.8686, "step": 2283 }, { "epoch": 0.15475303204824176, "grad_norm": 8.596991539001465, "learning_rate": 9.790129372304744e-05, "loss": 1.0623, "step": 2284 }, { "epoch": 0.15482078731621385, "grad_norm": 9.262425422668457, "learning_rate": 9.789992470394962e-05, "loss": 1.2067, "step": 2285 }, { "epoch": 0.1548885425841859, "grad_norm": 7.772172927856445, "learning_rate": 9.789855568485181e-05, "loss": 0.9976, "step": 2286 }, { "epoch": 0.154956297852158, "grad_norm": 9.320001602172852, "learning_rate": 9.7897186665754e-05, "loss": 1.1377, "step": 2287 }, { "epoch": 0.1550240531201301, "grad_norm": 11.02434253692627, "learning_rate": 9.789581764665618e-05, "loss": 1.1365, "step": 2288 }, { "epoch": 0.1550918083881022, "grad_norm": 9.90654182434082, "learning_rate": 9.789444862755836e-05, "loss": 1.208, "step": 2289 }, { "epoch": 0.15515956365607425, "grad_norm": 9.591778755187988, "learning_rate": 9.789307960846055e-05, "loss": 1.2426, "step": 2290 }, { "epoch": 0.15522731892404634, "grad_norm": 9.221457481384277, "learning_rate": 9.789171058936273e-05, "loss": 1.1449, "step": 2291 }, { "epoch": 0.15529507419201843, "grad_norm": 7.294323444366455, "learning_rate": 9.789034157026491e-05, "loss": 0.8846, "step": 2292 }, { "epoch": 0.15536282945999053, "grad_norm": 10.6463041305542, "learning_rate": 9.788897255116709e-05, "loss": 1.1397, "step": 2293 }, { "epoch": 0.1554305847279626, "grad_norm": 9.412870407104492, "learning_rate": 9.788760353206927e-05, "loss": 1.1857, "step": 2294 }, { "epoch": 0.15549833999593468, "grad_norm": 9.163009643554688, "learning_rate": 9.788623451297147e-05, "loss": 0.8907, "step": 2295 }, { "epoch": 0.15556609526390677, "grad_norm": 8.157798767089844, "learning_rate": 9.788486549387365e-05, "loss": 0.893, "step": 2296 }, { "epoch": 0.15563385053187886, "grad_norm": 9.155460357666016, "learning_rate": 9.788349647477583e-05, "loss": 1.0361, "step": 2297 }, { "epoch": 0.15570160579985093, "grad_norm": 10.280989646911621, "learning_rate": 9.7882127455678e-05, "loss": 1.2953, "step": 2298 }, { "epoch": 0.15576936106782302, "grad_norm": 9.654706954956055, "learning_rate": 9.78807584365802e-05, "loss": 0.9012, "step": 2299 }, { "epoch": 0.1558371163357951, "grad_norm": 12.425939559936523, "learning_rate": 9.787938941748238e-05, "loss": 1.4103, "step": 2300 }, { "epoch": 0.1559048716037672, "grad_norm": 9.888884544372559, "learning_rate": 9.787802039838456e-05, "loss": 1.1599, "step": 2301 }, { "epoch": 0.15597262687173927, "grad_norm": 10.30229377746582, "learning_rate": 9.787665137928674e-05, "loss": 1.0505, "step": 2302 }, { "epoch": 0.15604038213971136, "grad_norm": 11.208477973937988, "learning_rate": 9.787528236018892e-05, "loss": 1.009, "step": 2303 }, { "epoch": 0.15610813740768345, "grad_norm": 9.264431953430176, "learning_rate": 9.787391334109112e-05, "loss": 1.1453, "step": 2304 }, { "epoch": 0.15617589267565554, "grad_norm": 9.0980224609375, "learning_rate": 9.78725443219933e-05, "loss": 1.0824, "step": 2305 }, { "epoch": 0.1562436479436276, "grad_norm": 8.346585273742676, "learning_rate": 9.787117530289548e-05, "loss": 1.0632, "step": 2306 }, { "epoch": 0.1563114032115997, "grad_norm": 10.607507705688477, "learning_rate": 9.786980628379766e-05, "loss": 1.0102, "step": 2307 }, { "epoch": 0.1563791584795718, "grad_norm": 8.189208984375, "learning_rate": 9.786843726469985e-05, "loss": 1.0872, "step": 2308 }, { "epoch": 0.15644691374754388, "grad_norm": 9.84202766418457, "learning_rate": 9.786706824560203e-05, "loss": 0.9871, "step": 2309 }, { "epoch": 0.15651466901551594, "grad_norm": 11.937589645385742, "learning_rate": 9.786569922650421e-05, "loss": 1.2255, "step": 2310 }, { "epoch": 0.15658242428348804, "grad_norm": 9.855016708374023, "learning_rate": 9.786433020740639e-05, "loss": 1.0097, "step": 2311 }, { "epoch": 0.15665017955146013, "grad_norm": 8.661060333251953, "learning_rate": 9.786296118830857e-05, "loss": 0.8732, "step": 2312 }, { "epoch": 0.15671793481943222, "grad_norm": 10.57170295715332, "learning_rate": 9.786159216921077e-05, "loss": 0.862, "step": 2313 }, { "epoch": 0.15678569008740428, "grad_norm": 7.759045600891113, "learning_rate": 9.786022315011295e-05, "loss": 0.77, "step": 2314 }, { "epoch": 0.15685344535537638, "grad_norm": 10.758045196533203, "learning_rate": 9.785885413101513e-05, "loss": 0.8919, "step": 2315 }, { "epoch": 0.15692120062334847, "grad_norm": 8.521660804748535, "learning_rate": 9.78574851119173e-05, "loss": 1.131, "step": 2316 }, { "epoch": 0.15698895589132056, "grad_norm": 8.72917652130127, "learning_rate": 9.78561160928195e-05, "loss": 0.8359, "step": 2317 }, { "epoch": 0.15705671115929262, "grad_norm": 11.679365158081055, "learning_rate": 9.785474707372168e-05, "loss": 1.0078, "step": 2318 }, { "epoch": 0.15712446642726471, "grad_norm": 11.50632381439209, "learning_rate": 9.785337805462386e-05, "loss": 1.208, "step": 2319 }, { "epoch": 0.1571922216952368, "grad_norm": 9.6107759475708, "learning_rate": 9.785200903552605e-05, "loss": 1.0967, "step": 2320 }, { "epoch": 0.1572599769632089, "grad_norm": 8.629117012023926, "learning_rate": 9.785064001642824e-05, "loss": 1.0594, "step": 2321 }, { "epoch": 0.15732773223118096, "grad_norm": 11.136920928955078, "learning_rate": 9.784927099733042e-05, "loss": 1.2874, "step": 2322 }, { "epoch": 0.15739548749915305, "grad_norm": 11.097023963928223, "learning_rate": 9.784790197823261e-05, "loss": 1.1598, "step": 2323 }, { "epoch": 0.15746324276712514, "grad_norm": 11.117433547973633, "learning_rate": 9.784653295913479e-05, "loss": 1.0601, "step": 2324 }, { "epoch": 0.15753099803509724, "grad_norm": 10.152684211730957, "learning_rate": 9.784516394003697e-05, "loss": 1.2763, "step": 2325 }, { "epoch": 0.1575987533030693, "grad_norm": 8.37531852722168, "learning_rate": 9.784379492093915e-05, "loss": 1.1055, "step": 2326 }, { "epoch": 0.1576665085710414, "grad_norm": 10.463939666748047, "learning_rate": 9.784242590184134e-05, "loss": 1.3088, "step": 2327 }, { "epoch": 0.15773426383901348, "grad_norm": 8.74315357208252, "learning_rate": 9.784105688274352e-05, "loss": 1.1165, "step": 2328 }, { "epoch": 0.15780201910698557, "grad_norm": 8.691280364990234, "learning_rate": 9.78396878636457e-05, "loss": 1.1019, "step": 2329 }, { "epoch": 0.15786977437495767, "grad_norm": 10.424938201904297, "learning_rate": 9.783831884454789e-05, "loss": 1.1957, "step": 2330 }, { "epoch": 0.15793752964292973, "grad_norm": 6.867722034454346, "learning_rate": 9.783694982545008e-05, "loss": 1.0476, "step": 2331 }, { "epoch": 0.15800528491090182, "grad_norm": 9.434804916381836, "learning_rate": 9.783558080635226e-05, "loss": 1.0247, "step": 2332 }, { "epoch": 0.1580730401788739, "grad_norm": 7.771170616149902, "learning_rate": 9.783421178725444e-05, "loss": 1.2582, "step": 2333 }, { "epoch": 0.158140795446846, "grad_norm": 8.366608619689941, "learning_rate": 9.783284276815662e-05, "loss": 1.1078, "step": 2334 }, { "epoch": 0.15820855071481807, "grad_norm": 8.85851764678955, "learning_rate": 9.78314737490588e-05, "loss": 1.2405, "step": 2335 }, { "epoch": 0.15827630598279016, "grad_norm": 9.134325981140137, "learning_rate": 9.7830104729961e-05, "loss": 1.2277, "step": 2336 }, { "epoch": 0.15834406125076225, "grad_norm": 9.150130271911621, "learning_rate": 9.782873571086317e-05, "loss": 1.1355, "step": 2337 }, { "epoch": 0.15841181651873434, "grad_norm": 8.687469482421875, "learning_rate": 9.782736669176536e-05, "loss": 1.0454, "step": 2338 }, { "epoch": 0.1584795717867064, "grad_norm": 10.071285247802734, "learning_rate": 9.782599767266754e-05, "loss": 1.0041, "step": 2339 }, { "epoch": 0.1585473270546785, "grad_norm": 8.373369216918945, "learning_rate": 9.782462865356972e-05, "loss": 0.8378, "step": 2340 }, { "epoch": 0.1586150823226506, "grad_norm": 9.427014350891113, "learning_rate": 9.782325963447191e-05, "loss": 1.1413, "step": 2341 }, { "epoch": 0.15868283759062268, "grad_norm": 8.38814640045166, "learning_rate": 9.782189061537409e-05, "loss": 1.0696, "step": 2342 }, { "epoch": 0.15875059285859475, "grad_norm": 8.518644332885742, "learning_rate": 9.782052159627627e-05, "loss": 0.9814, "step": 2343 }, { "epoch": 0.15881834812656684, "grad_norm": 9.926252365112305, "learning_rate": 9.781915257717845e-05, "loss": 1.1971, "step": 2344 }, { "epoch": 0.15888610339453893, "grad_norm": 7.946019649505615, "learning_rate": 9.781778355808064e-05, "loss": 1.1902, "step": 2345 }, { "epoch": 0.15895385866251102, "grad_norm": 8.686945915222168, "learning_rate": 9.781641453898283e-05, "loss": 0.9939, "step": 2346 }, { "epoch": 0.15902161393048309, "grad_norm": 8.225680351257324, "learning_rate": 9.7815045519885e-05, "loss": 1.3665, "step": 2347 }, { "epoch": 0.15908936919845518, "grad_norm": 10.381987571716309, "learning_rate": 9.781367650078719e-05, "loss": 1.09, "step": 2348 }, { "epoch": 0.15915712446642727, "grad_norm": 8.57552719116211, "learning_rate": 9.781230748168937e-05, "loss": 0.8261, "step": 2349 }, { "epoch": 0.15922487973439936, "grad_norm": 9.105220794677734, "learning_rate": 9.781093846259156e-05, "loss": 1.102, "step": 2350 }, { "epoch": 0.15929263500237142, "grad_norm": 10.08092212677002, "learning_rate": 9.780956944349374e-05, "loss": 0.943, "step": 2351 }, { "epoch": 0.15936039027034352, "grad_norm": 10.259852409362793, "learning_rate": 9.780820042439592e-05, "loss": 0.8822, "step": 2352 }, { "epoch": 0.1594281455383156, "grad_norm": 8.31139087677002, "learning_rate": 9.78068314052981e-05, "loss": 1.2167, "step": 2353 }, { "epoch": 0.1594959008062877, "grad_norm": 7.529703617095947, "learning_rate": 9.78054623862003e-05, "loss": 0.8913, "step": 2354 }, { "epoch": 0.15956365607425976, "grad_norm": 8.792675971984863, "learning_rate": 9.780409336710248e-05, "loss": 0.966, "step": 2355 }, { "epoch": 0.15963141134223185, "grad_norm": 9.329866409301758, "learning_rate": 9.780272434800466e-05, "loss": 1.1659, "step": 2356 }, { "epoch": 0.15969916661020395, "grad_norm": 12.14089298248291, "learning_rate": 9.780135532890684e-05, "loss": 1.2019, "step": 2357 }, { "epoch": 0.15976692187817604, "grad_norm": 9.12912654876709, "learning_rate": 9.779998630980902e-05, "loss": 1.1615, "step": 2358 }, { "epoch": 0.1598346771461481, "grad_norm": 9.554464340209961, "learning_rate": 9.779861729071121e-05, "loss": 1.1695, "step": 2359 }, { "epoch": 0.1599024324141202, "grad_norm": 9.317673683166504, "learning_rate": 9.779724827161339e-05, "loss": 0.9606, "step": 2360 }, { "epoch": 0.15997018768209229, "grad_norm": 8.80395793914795, "learning_rate": 9.779587925251557e-05, "loss": 0.9693, "step": 2361 }, { "epoch": 0.16003794295006438, "grad_norm": 11.990642547607422, "learning_rate": 9.779451023341775e-05, "loss": 1.2901, "step": 2362 }, { "epoch": 0.16010569821803644, "grad_norm": 8.188547134399414, "learning_rate": 9.779314121431995e-05, "loss": 0.979, "step": 2363 }, { "epoch": 0.16017345348600853, "grad_norm": 8.316620826721191, "learning_rate": 9.779177219522213e-05, "loss": 0.8601, "step": 2364 }, { "epoch": 0.16024120875398062, "grad_norm": 7.58405876159668, "learning_rate": 9.77904031761243e-05, "loss": 1.1812, "step": 2365 }, { "epoch": 0.16030896402195272, "grad_norm": 7.725598335266113, "learning_rate": 9.77890341570265e-05, "loss": 0.9335, "step": 2366 }, { "epoch": 0.16037671928992478, "grad_norm": 8.6231107711792, "learning_rate": 9.778766513792868e-05, "loss": 1.0162, "step": 2367 }, { "epoch": 0.16044447455789687, "grad_norm": 9.762526512145996, "learning_rate": 9.778629611883086e-05, "loss": 1.3186, "step": 2368 }, { "epoch": 0.16051222982586896, "grad_norm": 11.384220123291016, "learning_rate": 9.778492709973305e-05, "loss": 1.2397, "step": 2369 }, { "epoch": 0.16057998509384105, "grad_norm": 8.841899871826172, "learning_rate": 9.778355808063523e-05, "loss": 0.8708, "step": 2370 }, { "epoch": 0.16064774036181312, "grad_norm": 7.778527736663818, "learning_rate": 9.778218906153741e-05, "loss": 0.9968, "step": 2371 }, { "epoch": 0.1607154956297852, "grad_norm": 8.559181213378906, "learning_rate": 9.77808200424396e-05, "loss": 0.9759, "step": 2372 }, { "epoch": 0.1607832508977573, "grad_norm": 10.273273468017578, "learning_rate": 9.777945102334179e-05, "loss": 0.9485, "step": 2373 }, { "epoch": 0.1608510061657294, "grad_norm": 7.946044445037842, "learning_rate": 9.777808200424397e-05, "loss": 1.105, "step": 2374 }, { "epoch": 0.16091876143370146, "grad_norm": 9.917662620544434, "learning_rate": 9.777671298514615e-05, "loss": 1.0272, "step": 2375 }, { "epoch": 0.16098651670167355, "grad_norm": 10.438239097595215, "learning_rate": 9.777534396604833e-05, "loss": 1.0197, "step": 2376 }, { "epoch": 0.16105427196964564, "grad_norm": 8.799901962280273, "learning_rate": 9.777397494695052e-05, "loss": 1.1401, "step": 2377 }, { "epoch": 0.16112202723761773, "grad_norm": 8.569243431091309, "learning_rate": 9.77726059278527e-05, "loss": 1.2019, "step": 2378 }, { "epoch": 0.1611897825055898, "grad_norm": 10.793002128601074, "learning_rate": 9.777123690875488e-05, "loss": 1.0932, "step": 2379 }, { "epoch": 0.1612575377735619, "grad_norm": 7.825850963592529, "learning_rate": 9.776986788965707e-05, "loss": 1.2226, "step": 2380 }, { "epoch": 0.16132529304153398, "grad_norm": 8.780813217163086, "learning_rate": 9.776849887055925e-05, "loss": 0.7939, "step": 2381 }, { "epoch": 0.16139304830950607, "grad_norm": 8.927638053894043, "learning_rate": 9.776712985146144e-05, "loss": 1.0551, "step": 2382 }, { "epoch": 0.16146080357747816, "grad_norm": 9.08043384552002, "learning_rate": 9.776576083236362e-05, "loss": 1.0037, "step": 2383 }, { "epoch": 0.16152855884545023, "grad_norm": 9.362268447875977, "learning_rate": 9.77643918132658e-05, "loss": 1.332, "step": 2384 }, { "epoch": 0.16159631411342232, "grad_norm": 10.533197402954102, "learning_rate": 9.776302279416798e-05, "loss": 1.0966, "step": 2385 }, { "epoch": 0.1616640693813944, "grad_norm": 9.579266548156738, "learning_rate": 9.776165377507017e-05, "loss": 0.9325, "step": 2386 }, { "epoch": 0.1617318246493665, "grad_norm": 10.071297645568848, "learning_rate": 9.776028475597235e-05, "loss": 0.9496, "step": 2387 }, { "epoch": 0.16179957991733857, "grad_norm": 9.243900299072266, "learning_rate": 9.775891573687453e-05, "loss": 1.13, "step": 2388 }, { "epoch": 0.16186733518531066, "grad_norm": 9.327018737792969, "learning_rate": 9.775754671777672e-05, "loss": 1.2587, "step": 2389 }, { "epoch": 0.16193509045328275, "grad_norm": 7.614950180053711, "learning_rate": 9.77561776986789e-05, "loss": 1.0848, "step": 2390 }, { "epoch": 0.16200284572125484, "grad_norm": 9.86501407623291, "learning_rate": 9.775480867958109e-05, "loss": 1.2504, "step": 2391 }, { "epoch": 0.1620706009892269, "grad_norm": 9.08303451538086, "learning_rate": 9.775343966048327e-05, "loss": 1.4359, "step": 2392 }, { "epoch": 0.162138356257199, "grad_norm": 8.417489051818848, "learning_rate": 9.775207064138545e-05, "loss": 1.244, "step": 2393 }, { "epoch": 0.1622061115251711, "grad_norm": 8.35366439819336, "learning_rate": 9.775070162228763e-05, "loss": 1.0706, "step": 2394 }, { "epoch": 0.16227386679314318, "grad_norm": 9.732915878295898, "learning_rate": 9.774933260318981e-05, "loss": 1.4237, "step": 2395 }, { "epoch": 0.16234162206111524, "grad_norm": 9.131365776062012, "learning_rate": 9.7747963584092e-05, "loss": 0.9207, "step": 2396 }, { "epoch": 0.16240937732908733, "grad_norm": 8.93538761138916, "learning_rate": 9.774659456499419e-05, "loss": 1.0054, "step": 2397 }, { "epoch": 0.16247713259705943, "grad_norm": 8.939055442810059, "learning_rate": 9.774522554589637e-05, "loss": 0.9782, "step": 2398 }, { "epoch": 0.16254488786503152, "grad_norm": 9.251758575439453, "learning_rate": 9.774385652679855e-05, "loss": 1.0968, "step": 2399 }, { "epoch": 0.16261264313300358, "grad_norm": 9.240782737731934, "learning_rate": 9.774248750770074e-05, "loss": 0.7982, "step": 2400 }, { "epoch": 0.16268039840097567, "grad_norm": 8.697726249694824, "learning_rate": 9.774111848860292e-05, "loss": 1.0545, "step": 2401 }, { "epoch": 0.16274815366894776, "grad_norm": 7.9362053871154785, "learning_rate": 9.77397494695051e-05, "loss": 0.8364, "step": 2402 }, { "epoch": 0.16281590893691986, "grad_norm": 11.944025993347168, "learning_rate": 9.773838045040728e-05, "loss": 1.2313, "step": 2403 }, { "epoch": 0.16288366420489192, "grad_norm": 9.496225357055664, "learning_rate": 9.773701143130946e-05, "loss": 1.1363, "step": 2404 }, { "epoch": 0.162951419472864, "grad_norm": 12.96069622039795, "learning_rate": 9.773564241221165e-05, "loss": 1.1205, "step": 2405 }, { "epoch": 0.1630191747408361, "grad_norm": 8.915671348571777, "learning_rate": 9.773427339311384e-05, "loss": 1.1149, "step": 2406 }, { "epoch": 0.1630869300088082, "grad_norm": 10.23763370513916, "learning_rate": 9.773290437401602e-05, "loss": 1.1867, "step": 2407 }, { "epoch": 0.16315468527678026, "grad_norm": 9.117730140686035, "learning_rate": 9.77315353549182e-05, "loss": 1.1792, "step": 2408 }, { "epoch": 0.16322244054475235, "grad_norm": 9.380385398864746, "learning_rate": 9.773016633582039e-05, "loss": 1.1621, "step": 2409 }, { "epoch": 0.16329019581272444, "grad_norm": 8.56508731842041, "learning_rate": 9.772879731672257e-05, "loss": 0.8372, "step": 2410 }, { "epoch": 0.16335795108069653, "grad_norm": 11.707832336425781, "learning_rate": 9.772742829762475e-05, "loss": 1.3191, "step": 2411 }, { "epoch": 0.1634257063486686, "grad_norm": 7.720577716827393, "learning_rate": 9.772605927852694e-05, "loss": 0.9617, "step": 2412 }, { "epoch": 0.1634934616166407, "grad_norm": 8.586542129516602, "learning_rate": 9.772469025942912e-05, "loss": 1.0978, "step": 2413 }, { "epoch": 0.16356121688461278, "grad_norm": 9.021394729614258, "learning_rate": 9.77233212403313e-05, "loss": 0.9358, "step": 2414 }, { "epoch": 0.16362897215258487, "grad_norm": 9.078686714172363, "learning_rate": 9.77219522212335e-05, "loss": 1.3338, "step": 2415 }, { "epoch": 0.16369672742055694, "grad_norm": 9.810312271118164, "learning_rate": 9.772058320213568e-05, "loss": 1.2472, "step": 2416 }, { "epoch": 0.16376448268852903, "grad_norm": 8.933609962463379, "learning_rate": 9.771921418303786e-05, "loss": 0.9115, "step": 2417 }, { "epoch": 0.16383223795650112, "grad_norm": 7.044286251068115, "learning_rate": 9.771784516394005e-05, "loss": 0.7983, "step": 2418 }, { "epoch": 0.1638999932244732, "grad_norm": 11.711495399475098, "learning_rate": 9.771647614484223e-05, "loss": 1.096, "step": 2419 }, { "epoch": 0.16396774849244528, "grad_norm": 9.31049633026123, "learning_rate": 9.771510712574441e-05, "loss": 1.2711, "step": 2420 }, { "epoch": 0.16403550376041737, "grad_norm": 8.10503101348877, "learning_rate": 9.77137381066466e-05, "loss": 0.9095, "step": 2421 }, { "epoch": 0.16410325902838946, "grad_norm": 7.915055751800537, "learning_rate": 9.771236908754877e-05, "loss": 1.0161, "step": 2422 }, { "epoch": 0.16417101429636155, "grad_norm": 8.185515403747559, "learning_rate": 9.771100006845097e-05, "loss": 1.1109, "step": 2423 }, { "epoch": 0.16423876956433361, "grad_norm": 9.960200309753418, "learning_rate": 9.770963104935315e-05, "loss": 0.9757, "step": 2424 }, { "epoch": 0.1643065248323057, "grad_norm": 9.646814346313477, "learning_rate": 9.770826203025533e-05, "loss": 1.0835, "step": 2425 }, { "epoch": 0.1643742801002778, "grad_norm": 9.701393127441406, "learning_rate": 9.770689301115751e-05, "loss": 1.0717, "step": 2426 }, { "epoch": 0.1644420353682499, "grad_norm": 7.887824058532715, "learning_rate": 9.770552399205969e-05, "loss": 1.0937, "step": 2427 }, { "epoch": 0.16450979063622195, "grad_norm": 10.94339370727539, "learning_rate": 9.770415497296188e-05, "loss": 1.2766, "step": 2428 }, { "epoch": 0.16457754590419404, "grad_norm": 10.051490783691406, "learning_rate": 9.770278595386406e-05, "loss": 1.248, "step": 2429 }, { "epoch": 0.16464530117216614, "grad_norm": 8.380006790161133, "learning_rate": 9.770141693476624e-05, "loss": 1.1657, "step": 2430 }, { "epoch": 0.16471305644013823, "grad_norm": 8.077753067016602, "learning_rate": 9.770004791566843e-05, "loss": 0.9511, "step": 2431 }, { "epoch": 0.1647808117081103, "grad_norm": 8.744999885559082, "learning_rate": 9.769867889657062e-05, "loss": 1.0736, "step": 2432 }, { "epoch": 0.16484856697608238, "grad_norm": 8.203909873962402, "learning_rate": 9.76973098774728e-05, "loss": 1.208, "step": 2433 }, { "epoch": 0.16491632224405448, "grad_norm": 9.462398529052734, "learning_rate": 9.769594085837498e-05, "loss": 1.0011, "step": 2434 }, { "epoch": 0.16498407751202657, "grad_norm": 11.190359115600586, "learning_rate": 9.769457183927716e-05, "loss": 1.139, "step": 2435 }, { "epoch": 0.16505183277999866, "grad_norm": 10.454118728637695, "learning_rate": 9.769320282017934e-05, "loss": 1.0799, "step": 2436 }, { "epoch": 0.16511958804797072, "grad_norm": 14.411054611206055, "learning_rate": 9.769183380108153e-05, "loss": 1.0369, "step": 2437 }, { "epoch": 0.16518734331594281, "grad_norm": 11.42679214477539, "learning_rate": 9.769046478198371e-05, "loss": 1.2486, "step": 2438 }, { "epoch": 0.1652550985839149, "grad_norm": 10.520325660705566, "learning_rate": 9.76890957628859e-05, "loss": 1.2355, "step": 2439 }, { "epoch": 0.165322853851887, "grad_norm": 7.958998680114746, "learning_rate": 9.768772674378808e-05, "loss": 1.0822, "step": 2440 }, { "epoch": 0.16539060911985906, "grad_norm": 8.649806022644043, "learning_rate": 9.768635772469027e-05, "loss": 0.9439, "step": 2441 }, { "epoch": 0.16545836438783115, "grad_norm": 8.539712905883789, "learning_rate": 9.768498870559245e-05, "loss": 0.9541, "step": 2442 }, { "epoch": 0.16552611965580324, "grad_norm": 11.40131950378418, "learning_rate": 9.768361968649463e-05, "loss": 1.0568, "step": 2443 }, { "epoch": 0.16559387492377534, "grad_norm": 8.333579063415527, "learning_rate": 9.768225066739681e-05, "loss": 0.924, "step": 2444 }, { "epoch": 0.1656616301917474, "grad_norm": 9.02564525604248, "learning_rate": 9.768088164829899e-05, "loss": 1.1901, "step": 2445 }, { "epoch": 0.1657293854597195, "grad_norm": 9.721306800842285, "learning_rate": 9.767951262920118e-05, "loss": 1.2313, "step": 2446 }, { "epoch": 0.16579714072769158, "grad_norm": 9.876781463623047, "learning_rate": 9.767814361010336e-05, "loss": 1.0165, "step": 2447 }, { "epoch": 0.16586489599566367, "grad_norm": 11.69865894317627, "learning_rate": 9.767677459100555e-05, "loss": 1.1301, "step": 2448 }, { "epoch": 0.16593265126363574, "grad_norm": 11.38391399383545, "learning_rate": 9.767540557190773e-05, "loss": 1.1522, "step": 2449 }, { "epoch": 0.16600040653160783, "grad_norm": 9.18020248413086, "learning_rate": 9.76740365528099e-05, "loss": 1.0515, "step": 2450 }, { "epoch": 0.16606816179957992, "grad_norm": 9.30802059173584, "learning_rate": 9.76726675337121e-05, "loss": 1.0789, "step": 2451 }, { "epoch": 0.166135917067552, "grad_norm": 9.58259391784668, "learning_rate": 9.767129851461428e-05, "loss": 1.1299, "step": 2452 }, { "epoch": 0.16620367233552408, "grad_norm": 9.137594223022461, "learning_rate": 9.766992949551646e-05, "loss": 1.0854, "step": 2453 }, { "epoch": 0.16627142760349617, "grad_norm": 12.410299301147461, "learning_rate": 9.766856047641864e-05, "loss": 1.1717, "step": 2454 }, { "epoch": 0.16633918287146826, "grad_norm": 9.016322135925293, "learning_rate": 9.766719145732083e-05, "loss": 1.1028, "step": 2455 }, { "epoch": 0.16640693813944035, "grad_norm": 10.166184425354004, "learning_rate": 9.766582243822301e-05, "loss": 1.0626, "step": 2456 }, { "epoch": 0.16647469340741242, "grad_norm": 9.030965805053711, "learning_rate": 9.76644534191252e-05, "loss": 1.1761, "step": 2457 }, { "epoch": 0.1665424486753845, "grad_norm": 9.294576644897461, "learning_rate": 9.766308440002739e-05, "loss": 0.943, "step": 2458 }, { "epoch": 0.1666102039433566, "grad_norm": 6.732856273651123, "learning_rate": 9.766171538092957e-05, "loss": 0.7342, "step": 2459 }, { "epoch": 0.1666779592113287, "grad_norm": 8.178942680358887, "learning_rate": 9.766034636183175e-05, "loss": 0.7889, "step": 2460 }, { "epoch": 0.16674571447930076, "grad_norm": 7.771929740905762, "learning_rate": 9.765897734273394e-05, "loss": 1.1129, "step": 2461 }, { "epoch": 0.16681346974727285, "grad_norm": 8.695874214172363, "learning_rate": 9.765760832363612e-05, "loss": 1.1968, "step": 2462 }, { "epoch": 0.16688122501524494, "grad_norm": 10.18800163269043, "learning_rate": 9.76562393045383e-05, "loss": 1.0835, "step": 2463 }, { "epoch": 0.16694898028321703, "grad_norm": 9.310625076293945, "learning_rate": 9.76548702854405e-05, "loss": 1.2042, "step": 2464 }, { "epoch": 0.1670167355511891, "grad_norm": 11.684195518493652, "learning_rate": 9.765350126634268e-05, "loss": 1.2518, "step": 2465 }, { "epoch": 0.16708449081916119, "grad_norm": 10.9056978225708, "learning_rate": 9.765213224724486e-05, "loss": 1.2465, "step": 2466 }, { "epoch": 0.16715224608713328, "grad_norm": 9.871830940246582, "learning_rate": 9.765076322814704e-05, "loss": 1.3192, "step": 2467 }, { "epoch": 0.16722000135510537, "grad_norm": 7.725397109985352, "learning_rate": 9.764939420904922e-05, "loss": 0.9082, "step": 2468 }, { "epoch": 0.16728775662307743, "grad_norm": 8.7415189743042, "learning_rate": 9.764802518995141e-05, "loss": 0.8569, "step": 2469 }, { "epoch": 0.16735551189104952, "grad_norm": 7.8990888595581055, "learning_rate": 9.76466561708536e-05, "loss": 1.0175, "step": 2470 }, { "epoch": 0.16742326715902162, "grad_norm": 8.688074111938477, "learning_rate": 9.764528715175577e-05, "loss": 1.0131, "step": 2471 }, { "epoch": 0.1674910224269937, "grad_norm": 7.902133941650391, "learning_rate": 9.764391813265795e-05, "loss": 0.8956, "step": 2472 }, { "epoch": 0.16755877769496577, "grad_norm": 9.179505348205566, "learning_rate": 9.764254911356013e-05, "loss": 1.175, "step": 2473 }, { "epoch": 0.16762653296293786, "grad_norm": 10.701058387756348, "learning_rate": 9.764118009446233e-05, "loss": 1.003, "step": 2474 }, { "epoch": 0.16769428823090995, "grad_norm": 8.76916217803955, "learning_rate": 9.763981107536451e-05, "loss": 1.102, "step": 2475 }, { "epoch": 0.16776204349888205, "grad_norm": 8.682199478149414, "learning_rate": 9.763844205626669e-05, "loss": 1.2042, "step": 2476 }, { "epoch": 0.1678297987668541, "grad_norm": 9.789544105529785, "learning_rate": 9.763707303716887e-05, "loss": 1.2469, "step": 2477 }, { "epoch": 0.1678975540348262, "grad_norm": 8.250391960144043, "learning_rate": 9.763570401807106e-05, "loss": 0.9742, "step": 2478 }, { "epoch": 0.1679653093027983, "grad_norm": 8.938610076904297, "learning_rate": 9.763433499897324e-05, "loss": 0.9968, "step": 2479 }, { "epoch": 0.16803306457077039, "grad_norm": 9.956622123718262, "learning_rate": 9.763296597987542e-05, "loss": 1.0464, "step": 2480 }, { "epoch": 0.16810081983874245, "grad_norm": 8.582858085632324, "learning_rate": 9.76315969607776e-05, "loss": 1.1915, "step": 2481 }, { "epoch": 0.16816857510671454, "grad_norm": 7.73312520980835, "learning_rate": 9.763022794167979e-05, "loss": 0.8506, "step": 2482 }, { "epoch": 0.16823633037468663, "grad_norm": 8.986891746520996, "learning_rate": 9.762885892258198e-05, "loss": 0.8959, "step": 2483 }, { "epoch": 0.16830408564265872, "grad_norm": 10.047099113464355, "learning_rate": 9.762748990348416e-05, "loss": 1.24, "step": 2484 }, { "epoch": 0.1683718409106308, "grad_norm": 8.517911911010742, "learning_rate": 9.762612088438634e-05, "loss": 0.8171, "step": 2485 }, { "epoch": 0.16843959617860288, "grad_norm": 9.586174964904785, "learning_rate": 9.762475186528852e-05, "loss": 1.1603, "step": 2486 }, { "epoch": 0.16850735144657497, "grad_norm": 9.85086441040039, "learning_rate": 9.762338284619071e-05, "loss": 0.9157, "step": 2487 }, { "epoch": 0.16857510671454706, "grad_norm": 12.023639678955078, "learning_rate": 9.76220138270929e-05, "loss": 1.2032, "step": 2488 }, { "epoch": 0.16864286198251915, "grad_norm": 8.932641983032227, "learning_rate": 9.762064480799507e-05, "loss": 1.0678, "step": 2489 }, { "epoch": 0.16871061725049122, "grad_norm": 10.568282127380371, "learning_rate": 9.761927578889725e-05, "loss": 1.3213, "step": 2490 }, { "epoch": 0.1687783725184633, "grad_norm": 9.116564750671387, "learning_rate": 9.761790676979944e-05, "loss": 1.0525, "step": 2491 }, { "epoch": 0.1688461277864354, "grad_norm": 8.192644119262695, "learning_rate": 9.761653775070163e-05, "loss": 0.9554, "step": 2492 }, { "epoch": 0.1689138830544075, "grad_norm": 9.146562576293945, "learning_rate": 9.761516873160381e-05, "loss": 0.8852, "step": 2493 }, { "epoch": 0.16898163832237956, "grad_norm": 8.82610034942627, "learning_rate": 9.761379971250599e-05, "loss": 1.0521, "step": 2494 }, { "epoch": 0.16904939359035165, "grad_norm": 9.051412582397461, "learning_rate": 9.761243069340817e-05, "loss": 0.913, "step": 2495 }, { "epoch": 0.16911714885832374, "grad_norm": 8.97696304321289, "learning_rate": 9.761106167431036e-05, "loss": 0.887, "step": 2496 }, { "epoch": 0.16918490412629583, "grad_norm": 10.083110809326172, "learning_rate": 9.760969265521254e-05, "loss": 1.3521, "step": 2497 }, { "epoch": 0.1692526593942679, "grad_norm": 7.585256099700928, "learning_rate": 9.760832363611472e-05, "loss": 0.9001, "step": 2498 }, { "epoch": 0.16932041466224, "grad_norm": 10.301995277404785, "learning_rate": 9.76069546170169e-05, "loss": 1.0915, "step": 2499 }, { "epoch": 0.16938816993021208, "grad_norm": 8.517580032348633, "learning_rate": 9.760558559791909e-05, "loss": 1.0097, "step": 2500 }, { "epoch": 0.16945592519818417, "grad_norm": 8.20002555847168, "learning_rate": 9.760421657882128e-05, "loss": 1.2988, "step": 2501 }, { "epoch": 0.16952368046615623, "grad_norm": 9.705124855041504, "learning_rate": 9.760284755972346e-05, "loss": 1.2661, "step": 2502 }, { "epoch": 0.16959143573412833, "grad_norm": 9.167060852050781, "learning_rate": 9.760147854062564e-05, "loss": 1.0556, "step": 2503 }, { "epoch": 0.16965919100210042, "grad_norm": 8.415916442871094, "learning_rate": 9.760010952152783e-05, "loss": 0.7964, "step": 2504 }, { "epoch": 0.1697269462700725, "grad_norm": 7.626298427581787, "learning_rate": 9.759874050243001e-05, "loss": 0.9153, "step": 2505 }, { "epoch": 0.16979470153804457, "grad_norm": 10.595001220703125, "learning_rate": 9.75973714833322e-05, "loss": 0.9911, "step": 2506 }, { "epoch": 0.16986245680601667, "grad_norm": 7.564423084259033, "learning_rate": 9.759600246423439e-05, "loss": 0.9843, "step": 2507 }, { "epoch": 0.16993021207398876, "grad_norm": 7.644829273223877, "learning_rate": 9.759463344513657e-05, "loss": 0.9116, "step": 2508 }, { "epoch": 0.16999796734196085, "grad_norm": 7.54351282119751, "learning_rate": 9.759326442603875e-05, "loss": 1.1528, "step": 2509 }, { "epoch": 0.1700657226099329, "grad_norm": 9.259818077087402, "learning_rate": 9.759189540694094e-05, "loss": 1.0867, "step": 2510 }, { "epoch": 0.170133477877905, "grad_norm": 8.022993087768555, "learning_rate": 9.759052638784312e-05, "loss": 0.7524, "step": 2511 }, { "epoch": 0.1702012331458771, "grad_norm": 8.264616966247559, "learning_rate": 9.75891573687453e-05, "loss": 0.9906, "step": 2512 }, { "epoch": 0.1702689884138492, "grad_norm": 9.606420516967773, "learning_rate": 9.758778834964748e-05, "loss": 0.9287, "step": 2513 }, { "epoch": 0.17033674368182125, "grad_norm": 8.019355773925781, "learning_rate": 9.758641933054966e-05, "loss": 0.879, "step": 2514 }, { "epoch": 0.17040449894979334, "grad_norm": 9.819777488708496, "learning_rate": 9.758505031145186e-05, "loss": 1.3677, "step": 2515 }, { "epoch": 0.17047225421776543, "grad_norm": 9.9795560836792, "learning_rate": 9.758368129235404e-05, "loss": 1.1614, "step": 2516 }, { "epoch": 0.17054000948573753, "grad_norm": 9.51271915435791, "learning_rate": 9.758231227325622e-05, "loss": 1.0423, "step": 2517 }, { "epoch": 0.1706077647537096, "grad_norm": 10.511359214782715, "learning_rate": 9.75809432541584e-05, "loss": 0.9492, "step": 2518 }, { "epoch": 0.17067552002168168, "grad_norm": 9.61755657196045, "learning_rate": 9.757957423506059e-05, "loss": 1.1481, "step": 2519 }, { "epoch": 0.17074327528965377, "grad_norm": 11.246142387390137, "learning_rate": 9.757820521596277e-05, "loss": 1.3981, "step": 2520 }, { "epoch": 0.17081103055762586, "grad_norm": 9.273181915283203, "learning_rate": 9.757683619686495e-05, "loss": 0.9773, "step": 2521 }, { "epoch": 0.17087878582559793, "grad_norm": 11.215822219848633, "learning_rate": 9.757546717776713e-05, "loss": 1.2303, "step": 2522 }, { "epoch": 0.17094654109357002, "grad_norm": 10.139853477478027, "learning_rate": 9.757409815866931e-05, "loss": 1.0807, "step": 2523 }, { "epoch": 0.1710142963615421, "grad_norm": 13.275606155395508, "learning_rate": 9.757272913957151e-05, "loss": 1.3326, "step": 2524 }, { "epoch": 0.1710820516295142, "grad_norm": 8.32109260559082, "learning_rate": 9.757136012047369e-05, "loss": 0.9428, "step": 2525 }, { "epoch": 0.17114980689748627, "grad_norm": 9.275816917419434, "learning_rate": 9.756999110137587e-05, "loss": 1.1531, "step": 2526 }, { "epoch": 0.17121756216545836, "grad_norm": 10.61928939819336, "learning_rate": 9.756862208227805e-05, "loss": 0.9422, "step": 2527 }, { "epoch": 0.17128531743343045, "grad_norm": 7.793631553649902, "learning_rate": 9.756725306318023e-05, "loss": 1.216, "step": 2528 }, { "epoch": 0.17135307270140254, "grad_norm": 7.453477382659912, "learning_rate": 9.756588404408242e-05, "loss": 0.7787, "step": 2529 }, { "epoch": 0.1714208279693746, "grad_norm": 10.39784049987793, "learning_rate": 9.75645150249846e-05, "loss": 1.3171, "step": 2530 }, { "epoch": 0.1714885832373467, "grad_norm": 8.57040786743164, "learning_rate": 9.756314600588678e-05, "loss": 0.8319, "step": 2531 }, { "epoch": 0.1715563385053188, "grad_norm": 8.696785926818848, "learning_rate": 9.756177698678896e-05, "loss": 1.0078, "step": 2532 }, { "epoch": 0.17162409377329088, "grad_norm": 8.212169647216797, "learning_rate": 9.756040796769116e-05, "loss": 0.7851, "step": 2533 }, { "epoch": 0.17169184904126294, "grad_norm": 10.94201946258545, "learning_rate": 9.755903894859334e-05, "loss": 1.0065, "step": 2534 }, { "epoch": 0.17175960430923504, "grad_norm": 12.041540145874023, "learning_rate": 9.755766992949552e-05, "loss": 1.2938, "step": 2535 }, { "epoch": 0.17182735957720713, "grad_norm": 9.289467811584473, "learning_rate": 9.75563009103977e-05, "loss": 1.1521, "step": 2536 }, { "epoch": 0.17189511484517922, "grad_norm": 9.231005668640137, "learning_rate": 9.755493189129988e-05, "loss": 0.9, "step": 2537 }, { "epoch": 0.17196287011315128, "grad_norm": 8.934699058532715, "learning_rate": 9.755356287220207e-05, "loss": 1.2817, "step": 2538 }, { "epoch": 0.17203062538112338, "grad_norm": 10.096917152404785, "learning_rate": 9.755219385310425e-05, "loss": 1.1587, "step": 2539 }, { "epoch": 0.17209838064909547, "grad_norm": 8.864645004272461, "learning_rate": 9.755082483400643e-05, "loss": 1.1781, "step": 2540 }, { "epoch": 0.17216613591706756, "grad_norm": 10.964715957641602, "learning_rate": 9.754945581490861e-05, "loss": 1.2729, "step": 2541 }, { "epoch": 0.17223389118503965, "grad_norm": 10.845703125, "learning_rate": 9.754808679581081e-05, "loss": 1.4475, "step": 2542 }, { "epoch": 0.17230164645301171, "grad_norm": 9.800530433654785, "learning_rate": 9.754671777671299e-05, "loss": 1.0296, "step": 2543 }, { "epoch": 0.1723694017209838, "grad_norm": 8.789834022521973, "learning_rate": 9.754534875761517e-05, "loss": 0.9573, "step": 2544 }, { "epoch": 0.1724371569889559, "grad_norm": 7.731616973876953, "learning_rate": 9.754397973851735e-05, "loss": 0.9875, "step": 2545 }, { "epoch": 0.172504912256928, "grad_norm": 8.942553520202637, "learning_rate": 9.754261071941953e-05, "loss": 1.1897, "step": 2546 }, { "epoch": 0.17257266752490005, "grad_norm": 7.468856334686279, "learning_rate": 9.754124170032172e-05, "loss": 0.8869, "step": 2547 }, { "epoch": 0.17264042279287214, "grad_norm": 8.798864364624023, "learning_rate": 9.75398726812239e-05, "loss": 1.0034, "step": 2548 }, { "epoch": 0.17270817806084424, "grad_norm": 8.579094886779785, "learning_rate": 9.753850366212608e-05, "loss": 1.0211, "step": 2549 }, { "epoch": 0.17277593332881633, "grad_norm": 10.90807819366455, "learning_rate": 9.753713464302827e-05, "loss": 1.3639, "step": 2550 }, { "epoch": 0.1728436885967884, "grad_norm": 8.337204933166504, "learning_rate": 9.753576562393046e-05, "loss": 1.323, "step": 2551 }, { "epoch": 0.17291144386476048, "grad_norm": 8.731851577758789, "learning_rate": 9.753439660483264e-05, "loss": 0.8805, "step": 2552 }, { "epoch": 0.17297919913273258, "grad_norm": 9.541427612304688, "learning_rate": 9.753302758573482e-05, "loss": 1.0368, "step": 2553 }, { "epoch": 0.17304695440070467, "grad_norm": 7.938154697418213, "learning_rate": 9.753165856663701e-05, "loss": 0.9128, "step": 2554 }, { "epoch": 0.17311470966867673, "grad_norm": 8.111212730407715, "learning_rate": 9.75302895475392e-05, "loss": 0.9857, "step": 2555 }, { "epoch": 0.17318246493664882, "grad_norm": 8.583324432373047, "learning_rate": 9.752892052844139e-05, "loss": 1.0975, "step": 2556 }, { "epoch": 0.1732502202046209, "grad_norm": 8.180643081665039, "learning_rate": 9.752755150934357e-05, "loss": 1.21, "step": 2557 }, { "epoch": 0.173317975472593, "grad_norm": 9.740217208862305, "learning_rate": 9.752618249024575e-05, "loss": 1.18, "step": 2558 }, { "epoch": 0.17338573074056507, "grad_norm": 8.725831031799316, "learning_rate": 9.752481347114793e-05, "loss": 0.9816, "step": 2559 }, { "epoch": 0.17345348600853716, "grad_norm": 10.786824226379395, "learning_rate": 9.752344445205011e-05, "loss": 1.0402, "step": 2560 }, { "epoch": 0.17352124127650925, "grad_norm": 8.91720962524414, "learning_rate": 9.75220754329523e-05, "loss": 1.2357, "step": 2561 }, { "epoch": 0.17358899654448134, "grad_norm": 9.02492618560791, "learning_rate": 9.752070641385448e-05, "loss": 1.1264, "step": 2562 }, { "epoch": 0.1736567518124534, "grad_norm": 7.917794227600098, "learning_rate": 9.751933739475666e-05, "loss": 1.0937, "step": 2563 }, { "epoch": 0.1737245070804255, "grad_norm": 11.543112754821777, "learning_rate": 9.751796837565884e-05, "loss": 1.0851, "step": 2564 }, { "epoch": 0.1737922623483976, "grad_norm": 9.114936828613281, "learning_rate": 9.751659935656104e-05, "loss": 0.9565, "step": 2565 }, { "epoch": 0.17386001761636968, "grad_norm": 12.322575569152832, "learning_rate": 9.751523033746322e-05, "loss": 1.0356, "step": 2566 }, { "epoch": 0.17392777288434175, "grad_norm": 11.39137077331543, "learning_rate": 9.75138613183654e-05, "loss": 1.2141, "step": 2567 }, { "epoch": 0.17399552815231384, "grad_norm": 9.505644798278809, "learning_rate": 9.751249229926758e-05, "loss": 0.8307, "step": 2568 }, { "epoch": 0.17406328342028593, "grad_norm": 11.05932331085205, "learning_rate": 9.751112328016976e-05, "loss": 1.1915, "step": 2569 }, { "epoch": 0.17413103868825802, "grad_norm": 11.104498863220215, "learning_rate": 9.750975426107195e-05, "loss": 1.0032, "step": 2570 }, { "epoch": 0.17419879395623009, "grad_norm": 11.366668701171875, "learning_rate": 9.750838524197413e-05, "loss": 1.0857, "step": 2571 }, { "epoch": 0.17426654922420218, "grad_norm": 8.775167465209961, "learning_rate": 9.750701622287631e-05, "loss": 1.1545, "step": 2572 }, { "epoch": 0.17433430449217427, "grad_norm": 8.820073127746582, "learning_rate": 9.75056472037785e-05, "loss": 1.0528, "step": 2573 }, { "epoch": 0.17440205976014636, "grad_norm": 9.53591537475586, "learning_rate": 9.750427818468069e-05, "loss": 1.0562, "step": 2574 }, { "epoch": 0.17446981502811842, "grad_norm": 10.08950138092041, "learning_rate": 9.750290916558287e-05, "loss": 1.0456, "step": 2575 }, { "epoch": 0.17453757029609052, "grad_norm": 8.222607612609863, "learning_rate": 9.750154014648505e-05, "loss": 0.9168, "step": 2576 }, { "epoch": 0.1746053255640626, "grad_norm": 10.067093849182129, "learning_rate": 9.750017112738723e-05, "loss": 0.9952, "step": 2577 }, { "epoch": 0.1746730808320347, "grad_norm": 9.726996421813965, "learning_rate": 9.749880210828941e-05, "loss": 0.9428, "step": 2578 }, { "epoch": 0.17474083610000676, "grad_norm": 9.970647811889648, "learning_rate": 9.74974330891916e-05, "loss": 1.1838, "step": 2579 }, { "epoch": 0.17480859136797886, "grad_norm": 7.8667893409729, "learning_rate": 9.749606407009378e-05, "loss": 1.0341, "step": 2580 }, { "epoch": 0.17487634663595095, "grad_norm": 9.227079391479492, "learning_rate": 9.749469505099596e-05, "loss": 1.0515, "step": 2581 }, { "epoch": 0.17494410190392304, "grad_norm": 9.198224067687988, "learning_rate": 9.749332603189814e-05, "loss": 1.0935, "step": 2582 }, { "epoch": 0.1750118571718951, "grad_norm": 11.105298042297363, "learning_rate": 9.749195701280032e-05, "loss": 1.2115, "step": 2583 }, { "epoch": 0.1750796124398672, "grad_norm": 8.815799713134766, "learning_rate": 9.749058799370252e-05, "loss": 1.1308, "step": 2584 }, { "epoch": 0.17514736770783929, "grad_norm": 10.571581840515137, "learning_rate": 9.74892189746047e-05, "loss": 1.1982, "step": 2585 }, { "epoch": 0.17521512297581138, "grad_norm": 7.368075370788574, "learning_rate": 9.748784995550688e-05, "loss": 0.9352, "step": 2586 }, { "epoch": 0.17528287824378344, "grad_norm": 7.103427410125732, "learning_rate": 9.748648093640906e-05, "loss": 1.0578, "step": 2587 }, { "epoch": 0.17535063351175553, "grad_norm": 7.678786754608154, "learning_rate": 9.748511191731125e-05, "loss": 0.8549, "step": 2588 }, { "epoch": 0.17541838877972762, "grad_norm": 9.514643669128418, "learning_rate": 9.748374289821343e-05, "loss": 1.1835, "step": 2589 }, { "epoch": 0.17548614404769972, "grad_norm": 7.552379131317139, "learning_rate": 9.748237387911561e-05, "loss": 0.9425, "step": 2590 }, { "epoch": 0.17555389931567178, "grad_norm": 7.663018226623535, "learning_rate": 9.74810048600178e-05, "loss": 1.0948, "step": 2591 }, { "epoch": 0.17562165458364387, "grad_norm": 10.293536186218262, "learning_rate": 9.747963584091997e-05, "loss": 1.1636, "step": 2592 }, { "epoch": 0.17568940985161596, "grad_norm": 9.024083137512207, "learning_rate": 9.747826682182217e-05, "loss": 1.1157, "step": 2593 }, { "epoch": 0.17575716511958805, "grad_norm": 8.870935440063477, "learning_rate": 9.747689780272435e-05, "loss": 0.9634, "step": 2594 }, { "epoch": 0.17582492038756015, "grad_norm": 8.553323745727539, "learning_rate": 9.747552878362653e-05, "loss": 1.1515, "step": 2595 }, { "epoch": 0.1758926756555322, "grad_norm": 9.13661003112793, "learning_rate": 9.747415976452871e-05, "loss": 0.9267, "step": 2596 }, { "epoch": 0.1759604309235043, "grad_norm": 8.66097640991211, "learning_rate": 9.74727907454309e-05, "loss": 0.6839, "step": 2597 }, { "epoch": 0.1760281861914764, "grad_norm": 10.07618236541748, "learning_rate": 9.747142172633308e-05, "loss": 1.2822, "step": 2598 }, { "epoch": 0.17609594145944849, "grad_norm": 9.136283874511719, "learning_rate": 9.747005270723526e-05, "loss": 1.0053, "step": 2599 }, { "epoch": 0.17616369672742055, "grad_norm": 9.42113208770752, "learning_rate": 9.746868368813746e-05, "loss": 1.15, "step": 2600 }, { "epoch": 0.17623145199539264, "grad_norm": 8.789713859558105, "learning_rate": 9.746731466903964e-05, "loss": 0.8504, "step": 2601 }, { "epoch": 0.17629920726336473, "grad_norm": 8.704665184020996, "learning_rate": 9.746594564994182e-05, "loss": 1.1516, "step": 2602 }, { "epoch": 0.17636696253133682, "grad_norm": 8.030630111694336, "learning_rate": 9.746457663084401e-05, "loss": 1.044, "step": 2603 }, { "epoch": 0.1764347177993089, "grad_norm": 8.640777587890625, "learning_rate": 9.746320761174619e-05, "loss": 1.0013, "step": 2604 }, { "epoch": 0.17650247306728098, "grad_norm": 7.806771278381348, "learning_rate": 9.746183859264837e-05, "loss": 1.0678, "step": 2605 }, { "epoch": 0.17657022833525307, "grad_norm": 8.883776664733887, "learning_rate": 9.746046957355055e-05, "loss": 0.9277, "step": 2606 }, { "epoch": 0.17663798360322516, "grad_norm": 7.539346694946289, "learning_rate": 9.745910055445275e-05, "loss": 1.0437, "step": 2607 }, { "epoch": 0.17670573887119723, "grad_norm": 9.873644828796387, "learning_rate": 9.745773153535493e-05, "loss": 1.206, "step": 2608 }, { "epoch": 0.17677349413916932, "grad_norm": 10.031026840209961, "learning_rate": 9.745636251625711e-05, "loss": 1.1934, "step": 2609 }, { "epoch": 0.1768412494071414, "grad_norm": 9.194849014282227, "learning_rate": 9.745499349715929e-05, "loss": 1.2365, "step": 2610 }, { "epoch": 0.1769090046751135, "grad_norm": 7.951476097106934, "learning_rate": 9.745362447806148e-05, "loss": 0.8829, "step": 2611 }, { "epoch": 0.17697675994308557, "grad_norm": 8.77597427368164, "learning_rate": 9.745225545896366e-05, "loss": 1.0097, "step": 2612 }, { "epoch": 0.17704451521105766, "grad_norm": 10.356508255004883, "learning_rate": 9.745088643986584e-05, "loss": 1.2603, "step": 2613 }, { "epoch": 0.17711227047902975, "grad_norm": 9.24199390411377, "learning_rate": 9.744951742076802e-05, "loss": 1.0181, "step": 2614 }, { "epoch": 0.17718002574700184, "grad_norm": 8.244451522827148, "learning_rate": 9.74481484016702e-05, "loss": 0.9602, "step": 2615 }, { "epoch": 0.1772477810149739, "grad_norm": 10.769842147827148, "learning_rate": 9.74467793825724e-05, "loss": 1.0646, "step": 2616 }, { "epoch": 0.177315536282946, "grad_norm": 9.381787300109863, "learning_rate": 9.744541036347458e-05, "loss": 0.8985, "step": 2617 }, { "epoch": 0.1773832915509181, "grad_norm": 8.379899978637695, "learning_rate": 9.744404134437676e-05, "loss": 1.0368, "step": 2618 }, { "epoch": 0.17745104681889018, "grad_norm": 9.59954833984375, "learning_rate": 9.744267232527894e-05, "loss": 1.1447, "step": 2619 }, { "epoch": 0.17751880208686224, "grad_norm": 8.83703327178955, "learning_rate": 9.744130330618113e-05, "loss": 1.3435, "step": 2620 }, { "epoch": 0.17758655735483433, "grad_norm": 9.448990821838379, "learning_rate": 9.743993428708331e-05, "loss": 1.1837, "step": 2621 }, { "epoch": 0.17765431262280643, "grad_norm": 9.466961860656738, "learning_rate": 9.74385652679855e-05, "loss": 1.1265, "step": 2622 }, { "epoch": 0.17772206789077852, "grad_norm": 8.68281364440918, "learning_rate": 9.743719624888767e-05, "loss": 0.8804, "step": 2623 }, { "epoch": 0.17778982315875058, "grad_norm": 7.007611274719238, "learning_rate": 9.743582722978985e-05, "loss": 1.03, "step": 2624 }, { "epoch": 0.17785757842672267, "grad_norm": 8.254279136657715, "learning_rate": 9.743445821069205e-05, "loss": 1.0328, "step": 2625 }, { "epoch": 0.17792533369469477, "grad_norm": 9.134196281433105, "learning_rate": 9.743308919159423e-05, "loss": 0.9671, "step": 2626 }, { "epoch": 0.17799308896266686, "grad_norm": 7.803997039794922, "learning_rate": 9.743172017249641e-05, "loss": 0.9692, "step": 2627 }, { "epoch": 0.17806084423063892, "grad_norm": 8.37303638458252, "learning_rate": 9.743035115339859e-05, "loss": 0.8949, "step": 2628 }, { "epoch": 0.178128599498611, "grad_norm": 9.928305625915527, "learning_rate": 9.742898213430078e-05, "loss": 1.2679, "step": 2629 }, { "epoch": 0.1781963547665831, "grad_norm": 8.58604621887207, "learning_rate": 9.742761311520296e-05, "loss": 1.23, "step": 2630 }, { "epoch": 0.1782641100345552, "grad_norm": 9.47903823852539, "learning_rate": 9.742624409610514e-05, "loss": 0.8701, "step": 2631 }, { "epoch": 0.17833186530252726, "grad_norm": 8.013197898864746, "learning_rate": 9.742487507700732e-05, "loss": 0.8325, "step": 2632 }, { "epoch": 0.17839962057049935, "grad_norm": 9.541396141052246, "learning_rate": 9.74235060579095e-05, "loss": 1.0996, "step": 2633 }, { "epoch": 0.17846737583847144, "grad_norm": 7.515398979187012, "learning_rate": 9.74221370388117e-05, "loss": 1.1174, "step": 2634 }, { "epoch": 0.17853513110644353, "grad_norm": 9.562670707702637, "learning_rate": 9.742076801971388e-05, "loss": 0.9727, "step": 2635 }, { "epoch": 0.1786028863744156, "grad_norm": 10.072449684143066, "learning_rate": 9.741939900061606e-05, "loss": 1.0021, "step": 2636 }, { "epoch": 0.1786706416423877, "grad_norm": 9.634955406188965, "learning_rate": 9.741802998151824e-05, "loss": 1.3998, "step": 2637 }, { "epoch": 0.17873839691035978, "grad_norm": 8.78954792022705, "learning_rate": 9.741666096242042e-05, "loss": 0.9602, "step": 2638 }, { "epoch": 0.17880615217833187, "grad_norm": 9.597916603088379, "learning_rate": 9.741529194332261e-05, "loss": 1.0174, "step": 2639 }, { "epoch": 0.17887390744630394, "grad_norm": 9.543583869934082, "learning_rate": 9.74139229242248e-05, "loss": 1.0686, "step": 2640 }, { "epoch": 0.17894166271427603, "grad_norm": 8.247551918029785, "learning_rate": 9.741255390512697e-05, "loss": 1.1026, "step": 2641 }, { "epoch": 0.17900941798224812, "grad_norm": 8.487943649291992, "learning_rate": 9.741118488602915e-05, "loss": 0.8843, "step": 2642 }, { "epoch": 0.1790771732502202, "grad_norm": 9.061832427978516, "learning_rate": 9.740981586693135e-05, "loss": 0.9708, "step": 2643 }, { "epoch": 0.17914492851819228, "grad_norm": 9.242405891418457, "learning_rate": 9.740844684783353e-05, "loss": 1.1344, "step": 2644 }, { "epoch": 0.17921268378616437, "grad_norm": 8.192344665527344, "learning_rate": 9.740707782873571e-05, "loss": 0.7587, "step": 2645 }, { "epoch": 0.17928043905413646, "grad_norm": 6.6164445877075195, "learning_rate": 9.74057088096379e-05, "loss": 0.842, "step": 2646 }, { "epoch": 0.17934819432210855, "grad_norm": 10.285326957702637, "learning_rate": 9.740433979054008e-05, "loss": 1.138, "step": 2647 }, { "epoch": 0.17941594959008064, "grad_norm": 8.55659294128418, "learning_rate": 9.740297077144226e-05, "loss": 0.7925, "step": 2648 }, { "epoch": 0.1794837048580527, "grad_norm": 8.384642601013184, "learning_rate": 9.740160175234446e-05, "loss": 0.9719, "step": 2649 }, { "epoch": 0.1795514601260248, "grad_norm": 9.303678512573242, "learning_rate": 9.740023273324664e-05, "loss": 1.0688, "step": 2650 }, { "epoch": 0.1796192153939969, "grad_norm": 9.642799377441406, "learning_rate": 9.739886371414882e-05, "loss": 0.9758, "step": 2651 }, { "epoch": 0.17968697066196898, "grad_norm": 9.616509437561035, "learning_rate": 9.739749469505101e-05, "loss": 0.9999, "step": 2652 }, { "epoch": 0.17975472592994104, "grad_norm": 10.511337280273438, "learning_rate": 9.739612567595319e-05, "loss": 1.0079, "step": 2653 }, { "epoch": 0.17982248119791314, "grad_norm": 7.480882167816162, "learning_rate": 9.739475665685537e-05, "loss": 0.8353, "step": 2654 }, { "epoch": 0.17989023646588523, "grad_norm": 10.277608871459961, "learning_rate": 9.739338763775755e-05, "loss": 0.9709, "step": 2655 }, { "epoch": 0.17995799173385732, "grad_norm": 9.135882377624512, "learning_rate": 9.739201861865973e-05, "loss": 1.3888, "step": 2656 }, { "epoch": 0.18002574700182938, "grad_norm": 8.147950172424316, "learning_rate": 9.739064959956193e-05, "loss": 1.2235, "step": 2657 }, { "epoch": 0.18009350226980148, "grad_norm": 8.583501815795898, "learning_rate": 9.738928058046411e-05, "loss": 0.9123, "step": 2658 }, { "epoch": 0.18016125753777357, "grad_norm": 8.202300071716309, "learning_rate": 9.738791156136629e-05, "loss": 1.0435, "step": 2659 }, { "epoch": 0.18022901280574566, "grad_norm": 8.121417045593262, "learning_rate": 9.738654254226847e-05, "loss": 0.9351, "step": 2660 }, { "epoch": 0.18029676807371772, "grad_norm": 10.645029067993164, "learning_rate": 9.738517352317065e-05, "loss": 1.5219, "step": 2661 }, { "epoch": 0.18036452334168981, "grad_norm": 9.766422271728516, "learning_rate": 9.738380450407284e-05, "loss": 1.0499, "step": 2662 }, { "epoch": 0.1804322786096619, "grad_norm": 8.137174606323242, "learning_rate": 9.738243548497502e-05, "loss": 1.1871, "step": 2663 }, { "epoch": 0.180500033877634, "grad_norm": 8.422198295593262, "learning_rate": 9.73810664658772e-05, "loss": 0.9524, "step": 2664 }, { "epoch": 0.18056778914560606, "grad_norm": 6.721381664276123, "learning_rate": 9.737969744677938e-05, "loss": 1.0449, "step": 2665 }, { "epoch": 0.18063554441357815, "grad_norm": 9.175302505493164, "learning_rate": 9.737832842768158e-05, "loss": 1.0508, "step": 2666 }, { "epoch": 0.18070329968155024, "grad_norm": 10.165428161621094, "learning_rate": 9.737695940858376e-05, "loss": 0.8744, "step": 2667 }, { "epoch": 0.18077105494952234, "grad_norm": 9.396173477172852, "learning_rate": 9.737559038948594e-05, "loss": 1.0304, "step": 2668 }, { "epoch": 0.1808388102174944, "grad_norm": 9.680516242980957, "learning_rate": 9.737422137038812e-05, "loss": 0.9773, "step": 2669 }, { "epoch": 0.1809065654854665, "grad_norm": 7.570343017578125, "learning_rate": 9.73728523512903e-05, "loss": 0.9159, "step": 2670 }, { "epoch": 0.18097432075343858, "grad_norm": 9.5789213180542, "learning_rate": 9.737148333219249e-05, "loss": 1.1269, "step": 2671 }, { "epoch": 0.18104207602141068, "grad_norm": 8.559327125549316, "learning_rate": 9.737011431309467e-05, "loss": 1.2318, "step": 2672 }, { "epoch": 0.18110983128938274, "grad_norm": 7.630974292755127, "learning_rate": 9.736874529399685e-05, "loss": 0.8996, "step": 2673 }, { "epoch": 0.18117758655735483, "grad_norm": 8.078895568847656, "learning_rate": 9.736737627489903e-05, "loss": 0.9942, "step": 2674 }, { "epoch": 0.18124534182532692, "grad_norm": 8.830656051635742, "learning_rate": 9.736600725580123e-05, "loss": 1.0614, "step": 2675 }, { "epoch": 0.181313097093299, "grad_norm": 7.66297721862793, "learning_rate": 9.736463823670341e-05, "loss": 1.025, "step": 2676 }, { "epoch": 0.18138085236127108, "grad_norm": 8.318365097045898, "learning_rate": 9.736326921760559e-05, "loss": 1.1286, "step": 2677 }, { "epoch": 0.18144860762924317, "grad_norm": 10.180238723754883, "learning_rate": 9.736190019850777e-05, "loss": 1.1304, "step": 2678 }, { "epoch": 0.18151636289721526, "grad_norm": 9.2420654296875, "learning_rate": 9.736053117940995e-05, "loss": 1.157, "step": 2679 }, { "epoch": 0.18158411816518735, "grad_norm": 7.984904766082764, "learning_rate": 9.735916216031214e-05, "loss": 1.1325, "step": 2680 }, { "epoch": 0.18165187343315942, "grad_norm": 11.136275291442871, "learning_rate": 9.735779314121432e-05, "loss": 1.0151, "step": 2681 }, { "epoch": 0.1817196287011315, "grad_norm": 9.954483032226562, "learning_rate": 9.73564241221165e-05, "loss": 1.0934, "step": 2682 }, { "epoch": 0.1817873839691036, "grad_norm": 8.491388320922852, "learning_rate": 9.735505510301868e-05, "loss": 1.2288, "step": 2683 }, { "epoch": 0.1818551392370757, "grad_norm": 9.355586051940918, "learning_rate": 9.735368608392088e-05, "loss": 0.7999, "step": 2684 }, { "epoch": 0.18192289450504776, "grad_norm": 8.829736709594727, "learning_rate": 9.735231706482306e-05, "loss": 0.9711, "step": 2685 }, { "epoch": 0.18199064977301985, "grad_norm": 9.667959213256836, "learning_rate": 9.735094804572524e-05, "loss": 1.1132, "step": 2686 }, { "epoch": 0.18205840504099194, "grad_norm": 6.971467971801758, "learning_rate": 9.734957902662742e-05, "loss": 1.0252, "step": 2687 }, { "epoch": 0.18212616030896403, "grad_norm": 9.69013500213623, "learning_rate": 9.73482100075296e-05, "loss": 0.9562, "step": 2688 }, { "epoch": 0.1821939155769361, "grad_norm": 9.47673511505127, "learning_rate": 9.734684098843179e-05, "loss": 0.9822, "step": 2689 }, { "epoch": 0.18226167084490819, "grad_norm": 9.708051681518555, "learning_rate": 9.734547196933397e-05, "loss": 1.0772, "step": 2690 }, { "epoch": 0.18232942611288028, "grad_norm": 10.617173194885254, "learning_rate": 9.734410295023615e-05, "loss": 1.2686, "step": 2691 }, { "epoch": 0.18239718138085237, "grad_norm": 9.52670955657959, "learning_rate": 9.734273393113835e-05, "loss": 0.9477, "step": 2692 }, { "epoch": 0.18246493664882443, "grad_norm": 10.090290069580078, "learning_rate": 9.734136491204053e-05, "loss": 1.1766, "step": 2693 }, { "epoch": 0.18253269191679652, "grad_norm": 8.709790229797363, "learning_rate": 9.733999589294271e-05, "loss": 1.1327, "step": 2694 }, { "epoch": 0.18260044718476862, "grad_norm": 12.07381534576416, "learning_rate": 9.73386268738449e-05, "loss": 1.3694, "step": 2695 }, { "epoch": 0.1826682024527407, "grad_norm": 8.329826354980469, "learning_rate": 9.733725785474708e-05, "loss": 1.0448, "step": 2696 }, { "epoch": 0.18273595772071277, "grad_norm": 7.551383972167969, "learning_rate": 9.733588883564926e-05, "loss": 1.029, "step": 2697 }, { "epoch": 0.18280371298868486, "grad_norm": 9.393105506896973, "learning_rate": 9.733451981655146e-05, "loss": 0.9729, "step": 2698 }, { "epoch": 0.18287146825665695, "grad_norm": 9.283944129943848, "learning_rate": 9.733315079745364e-05, "loss": 1.2459, "step": 2699 }, { "epoch": 0.18293922352462905, "grad_norm": 9.02878189086914, "learning_rate": 9.733178177835582e-05, "loss": 1.3235, "step": 2700 }, { "epoch": 0.18300697879260114, "grad_norm": 8.735793113708496, "learning_rate": 9.7330412759258e-05, "loss": 1.1447, "step": 2701 }, { "epoch": 0.1830747340605732, "grad_norm": 8.511741638183594, "learning_rate": 9.732904374016018e-05, "loss": 1.0148, "step": 2702 }, { "epoch": 0.1831424893285453, "grad_norm": 6.590076446533203, "learning_rate": 9.732767472106237e-05, "loss": 1.0904, "step": 2703 }, { "epoch": 0.18321024459651739, "grad_norm": 11.523942947387695, "learning_rate": 9.732630570196455e-05, "loss": 0.9349, "step": 2704 }, { "epoch": 0.18327799986448948, "grad_norm": 6.972995281219482, "learning_rate": 9.732493668286673e-05, "loss": 0.9837, "step": 2705 }, { "epoch": 0.18334575513246154, "grad_norm": 7.834453105926514, "learning_rate": 9.732356766376891e-05, "loss": 1.0374, "step": 2706 }, { "epoch": 0.18341351040043363, "grad_norm": 9.44674015045166, "learning_rate": 9.732219864467111e-05, "loss": 1.4479, "step": 2707 }, { "epoch": 0.18348126566840572, "grad_norm": 9.017583847045898, "learning_rate": 9.732082962557329e-05, "loss": 1.136, "step": 2708 }, { "epoch": 0.18354902093637782, "grad_norm": 8.40336799621582, "learning_rate": 9.731946060647547e-05, "loss": 0.9914, "step": 2709 }, { "epoch": 0.18361677620434988, "grad_norm": 10.586843490600586, "learning_rate": 9.731809158737765e-05, "loss": 1.1515, "step": 2710 }, { "epoch": 0.18368453147232197, "grad_norm": 7.611972808837891, "learning_rate": 9.731672256827983e-05, "loss": 1.0821, "step": 2711 }, { "epoch": 0.18375228674029406, "grad_norm": 9.093097686767578, "learning_rate": 9.731535354918202e-05, "loss": 1.1295, "step": 2712 }, { "epoch": 0.18382004200826615, "grad_norm": 8.013279914855957, "learning_rate": 9.73139845300842e-05, "loss": 0.839, "step": 2713 }, { "epoch": 0.18388779727623822, "grad_norm": 7.941558837890625, "learning_rate": 9.731261551098638e-05, "loss": 1.0819, "step": 2714 }, { "epoch": 0.1839555525442103, "grad_norm": 7.09537935256958, "learning_rate": 9.731124649188856e-05, "loss": 0.8606, "step": 2715 }, { "epoch": 0.1840233078121824, "grad_norm": 7.867136478424072, "learning_rate": 9.730987747279074e-05, "loss": 0.9942, "step": 2716 }, { "epoch": 0.1840910630801545, "grad_norm": 8.66762638092041, "learning_rate": 9.730850845369294e-05, "loss": 1.0792, "step": 2717 }, { "epoch": 0.18415881834812656, "grad_norm": 7.97219705581665, "learning_rate": 9.730713943459512e-05, "loss": 1.0366, "step": 2718 }, { "epoch": 0.18422657361609865, "grad_norm": 8.341264724731445, "learning_rate": 9.73057704154973e-05, "loss": 1.0833, "step": 2719 }, { "epoch": 0.18429432888407074, "grad_norm": 8.704034805297852, "learning_rate": 9.730440139639948e-05, "loss": 1.2909, "step": 2720 }, { "epoch": 0.18436208415204283, "grad_norm": 9.759525299072266, "learning_rate": 9.730303237730167e-05, "loss": 1.1749, "step": 2721 }, { "epoch": 0.1844298394200149, "grad_norm": 9.2392578125, "learning_rate": 9.730166335820385e-05, "loss": 1.4037, "step": 2722 }, { "epoch": 0.184497594687987, "grad_norm": 10.519861221313477, "learning_rate": 9.730029433910603e-05, "loss": 1.2692, "step": 2723 }, { "epoch": 0.18456534995595908, "grad_norm": 8.285715103149414, "learning_rate": 9.729892532000821e-05, "loss": 0.8014, "step": 2724 }, { "epoch": 0.18463310522393117, "grad_norm": 7.261229991912842, "learning_rate": 9.72975563009104e-05, "loss": 0.9302, "step": 2725 }, { "epoch": 0.18470086049190323, "grad_norm": 10.149426460266113, "learning_rate": 9.729618728181259e-05, "loss": 1.2855, "step": 2726 }, { "epoch": 0.18476861575987533, "grad_norm": 7.495121955871582, "learning_rate": 9.729481826271477e-05, "loss": 0.9771, "step": 2727 }, { "epoch": 0.18483637102784742, "grad_norm": 9.102607727050781, "learning_rate": 9.729344924361695e-05, "loss": 1.1974, "step": 2728 }, { "epoch": 0.1849041262958195, "grad_norm": 9.57135009765625, "learning_rate": 9.729208022451913e-05, "loss": 0.9758, "step": 2729 }, { "epoch": 0.18497188156379157, "grad_norm": 8.745903015136719, "learning_rate": 9.729071120542132e-05, "loss": 1.1719, "step": 2730 }, { "epoch": 0.18503963683176367, "grad_norm": 10.697607040405273, "learning_rate": 9.72893421863235e-05, "loss": 0.952, "step": 2731 }, { "epoch": 0.18510739209973576, "grad_norm": 9.907280921936035, "learning_rate": 9.728797316722568e-05, "loss": 0.9571, "step": 2732 }, { "epoch": 0.18517514736770785, "grad_norm": 7.285250186920166, "learning_rate": 9.728660414812786e-05, "loss": 0.9002, "step": 2733 }, { "epoch": 0.1852429026356799, "grad_norm": 8.134112358093262, "learning_rate": 9.728523512903004e-05, "loss": 0.8645, "step": 2734 }, { "epoch": 0.185310657903652, "grad_norm": 9.427742004394531, "learning_rate": 9.728386610993224e-05, "loss": 1.2333, "step": 2735 }, { "epoch": 0.1853784131716241, "grad_norm": 8.804586410522461, "learning_rate": 9.728249709083442e-05, "loss": 1.2601, "step": 2736 }, { "epoch": 0.1854461684395962, "grad_norm": 9.144674301147461, "learning_rate": 9.72811280717366e-05, "loss": 0.9503, "step": 2737 }, { "epoch": 0.18551392370756825, "grad_norm": 9.399337768554688, "learning_rate": 9.727975905263879e-05, "loss": 0.8586, "step": 2738 }, { "epoch": 0.18558167897554034, "grad_norm": 7.7377119064331055, "learning_rate": 9.727839003354097e-05, "loss": 0.7511, "step": 2739 }, { "epoch": 0.18564943424351243, "grad_norm": 9.146937370300293, "learning_rate": 9.727702101444315e-05, "loss": 1.0037, "step": 2740 }, { "epoch": 0.18571718951148453, "grad_norm": 7.722357273101807, "learning_rate": 9.727565199534535e-05, "loss": 1.077, "step": 2741 }, { "epoch": 0.1857849447794566, "grad_norm": 7.808566093444824, "learning_rate": 9.727428297624753e-05, "loss": 0.8953, "step": 2742 }, { "epoch": 0.18585270004742868, "grad_norm": 8.765763282775879, "learning_rate": 9.727291395714971e-05, "loss": 1.0691, "step": 2743 }, { "epoch": 0.18592045531540077, "grad_norm": 11.350706100463867, "learning_rate": 9.72715449380519e-05, "loss": 0.9308, "step": 2744 }, { "epoch": 0.18598821058337286, "grad_norm": 7.838237285614014, "learning_rate": 9.727017591895408e-05, "loss": 0.9452, "step": 2745 }, { "epoch": 0.18605596585134493, "grad_norm": 9.660740852355957, "learning_rate": 9.726880689985626e-05, "loss": 0.8705, "step": 2746 }, { "epoch": 0.18612372111931702, "grad_norm": 8.146308898925781, "learning_rate": 9.726743788075844e-05, "loss": 1.0362, "step": 2747 }, { "epoch": 0.1861914763872891, "grad_norm": 9.901165008544922, "learning_rate": 9.726606886166062e-05, "loss": 1.0842, "step": 2748 }, { "epoch": 0.1862592316552612, "grad_norm": 7.650402069091797, "learning_rate": 9.726469984256282e-05, "loss": 0.997, "step": 2749 }, { "epoch": 0.18632698692323327, "grad_norm": 7.760092735290527, "learning_rate": 9.7263330823465e-05, "loss": 0.9903, "step": 2750 }, { "epoch": 0.18639474219120536, "grad_norm": 9.523726463317871, "learning_rate": 9.726196180436718e-05, "loss": 0.9276, "step": 2751 }, { "epoch": 0.18646249745917745, "grad_norm": 7.759490489959717, "learning_rate": 9.726059278526936e-05, "loss": 0.9356, "step": 2752 }, { "epoch": 0.18653025272714954, "grad_norm": 10.949979782104492, "learning_rate": 9.725922376617155e-05, "loss": 1.1451, "step": 2753 }, { "epoch": 0.18659800799512163, "grad_norm": 8.312686920166016, "learning_rate": 9.725785474707373e-05, "loss": 0.973, "step": 2754 }, { "epoch": 0.1866657632630937, "grad_norm": 6.999983787536621, "learning_rate": 9.725648572797591e-05, "loss": 1.0699, "step": 2755 }, { "epoch": 0.1867335185310658, "grad_norm": 9.422745704650879, "learning_rate": 9.725511670887809e-05, "loss": 1.0697, "step": 2756 }, { "epoch": 0.18680127379903788, "grad_norm": 8.054603576660156, "learning_rate": 9.725374768978027e-05, "loss": 0.8764, "step": 2757 }, { "epoch": 0.18686902906700997, "grad_norm": 7.902176856994629, "learning_rate": 9.725237867068247e-05, "loss": 0.9893, "step": 2758 }, { "epoch": 0.18693678433498204, "grad_norm": 8.409537315368652, "learning_rate": 9.725100965158465e-05, "loss": 1.0526, "step": 2759 }, { "epoch": 0.18700453960295413, "grad_norm": 9.867463111877441, "learning_rate": 9.724964063248683e-05, "loss": 1.1218, "step": 2760 }, { "epoch": 0.18707229487092622, "grad_norm": 6.852199554443359, "learning_rate": 9.724827161338901e-05, "loss": 0.9838, "step": 2761 }, { "epoch": 0.1871400501388983, "grad_norm": 9.132448196411133, "learning_rate": 9.72469025942912e-05, "loss": 1.038, "step": 2762 }, { "epoch": 0.18720780540687038, "grad_norm": 8.699213981628418, "learning_rate": 9.724553357519338e-05, "loss": 1.0532, "step": 2763 }, { "epoch": 0.18727556067484247, "grad_norm": 8.520672798156738, "learning_rate": 9.724416455609556e-05, "loss": 1.2942, "step": 2764 }, { "epoch": 0.18734331594281456, "grad_norm": 9.690433502197266, "learning_rate": 9.724279553699774e-05, "loss": 0.9552, "step": 2765 }, { "epoch": 0.18741107121078665, "grad_norm": 7.540090560913086, "learning_rate": 9.724142651789992e-05, "loss": 1.0718, "step": 2766 }, { "epoch": 0.18747882647875871, "grad_norm": 9.072039604187012, "learning_rate": 9.724005749880212e-05, "loss": 1.0313, "step": 2767 }, { "epoch": 0.1875465817467308, "grad_norm": 10.155011177062988, "learning_rate": 9.72386884797043e-05, "loss": 1.3498, "step": 2768 }, { "epoch": 0.1876143370147029, "grad_norm": 7.816718101501465, "learning_rate": 9.723731946060648e-05, "loss": 0.9594, "step": 2769 }, { "epoch": 0.187682092282675, "grad_norm": 10.980911254882812, "learning_rate": 9.723595044150866e-05, "loss": 1.1675, "step": 2770 }, { "epoch": 0.18774984755064705, "grad_norm": 8.865739822387695, "learning_rate": 9.723458142241084e-05, "loss": 1.1796, "step": 2771 }, { "epoch": 0.18781760281861914, "grad_norm": 8.97850227355957, "learning_rate": 9.723321240331303e-05, "loss": 1.0907, "step": 2772 }, { "epoch": 0.18788535808659124, "grad_norm": 8.16921329498291, "learning_rate": 9.723184338421521e-05, "loss": 0.95, "step": 2773 }, { "epoch": 0.18795311335456333, "grad_norm": 8.766203880310059, "learning_rate": 9.723047436511739e-05, "loss": 0.9087, "step": 2774 }, { "epoch": 0.1880208686225354, "grad_norm": 7.410607814788818, "learning_rate": 9.722910534601957e-05, "loss": 0.8082, "step": 2775 }, { "epoch": 0.18808862389050748, "grad_norm": 9.640182495117188, "learning_rate": 9.722773632692177e-05, "loss": 0.99, "step": 2776 }, { "epoch": 0.18815637915847958, "grad_norm": 9.038297653198242, "learning_rate": 9.722636730782395e-05, "loss": 1.0017, "step": 2777 }, { "epoch": 0.18822413442645167, "grad_norm": 10.167421340942383, "learning_rate": 9.722499828872613e-05, "loss": 1.1588, "step": 2778 }, { "epoch": 0.18829188969442373, "grad_norm": 9.911538124084473, "learning_rate": 9.722362926962831e-05, "loss": 1.052, "step": 2779 }, { "epoch": 0.18835964496239582, "grad_norm": 8.78661060333252, "learning_rate": 9.722226025053049e-05, "loss": 0.9827, "step": 2780 }, { "epoch": 0.18842740023036791, "grad_norm": 8.58356761932373, "learning_rate": 9.722089123143268e-05, "loss": 0.9663, "step": 2781 }, { "epoch": 0.18849515549834, "grad_norm": 7.882653713226318, "learning_rate": 9.721952221233486e-05, "loss": 1.0282, "step": 2782 }, { "epoch": 0.18856291076631207, "grad_norm": 8.180728912353516, "learning_rate": 9.721815319323704e-05, "loss": 0.8983, "step": 2783 }, { "epoch": 0.18863066603428416, "grad_norm": 10.785475730895996, "learning_rate": 9.721678417413924e-05, "loss": 1.0949, "step": 2784 }, { "epoch": 0.18869842130225625, "grad_norm": 8.2493257522583, "learning_rate": 9.721541515504142e-05, "loss": 0.8891, "step": 2785 }, { "epoch": 0.18876617657022834, "grad_norm": 8.298515319824219, "learning_rate": 9.72140461359436e-05, "loss": 0.9384, "step": 2786 }, { "epoch": 0.1888339318382004, "grad_norm": 11.643486022949219, "learning_rate": 9.721267711684579e-05, "loss": 1.0915, "step": 2787 }, { "epoch": 0.1889016871061725, "grad_norm": 10.728472709655762, "learning_rate": 9.721130809774797e-05, "loss": 1.562, "step": 2788 }, { "epoch": 0.1889694423741446, "grad_norm": 7.9253435134887695, "learning_rate": 9.720993907865015e-05, "loss": 0.876, "step": 2789 }, { "epoch": 0.18903719764211668, "grad_norm": 9.075439453125, "learning_rate": 9.720857005955235e-05, "loss": 1.1712, "step": 2790 }, { "epoch": 0.18910495291008875, "grad_norm": 8.572853088378906, "learning_rate": 9.720720104045453e-05, "loss": 0.9697, "step": 2791 }, { "epoch": 0.18917270817806084, "grad_norm": 8.40988826751709, "learning_rate": 9.720583202135671e-05, "loss": 1.0904, "step": 2792 }, { "epoch": 0.18924046344603293, "grad_norm": 8.015021324157715, "learning_rate": 9.720446300225889e-05, "loss": 1.0264, "step": 2793 }, { "epoch": 0.18930821871400502, "grad_norm": 10.295256614685059, "learning_rate": 9.720309398316107e-05, "loss": 0.9608, "step": 2794 }, { "epoch": 0.18937597398197709, "grad_norm": 9.377728462219238, "learning_rate": 9.720172496406326e-05, "loss": 1.2169, "step": 2795 }, { "epoch": 0.18944372924994918, "grad_norm": 8.55238151550293, "learning_rate": 9.720035594496544e-05, "loss": 1.0951, "step": 2796 }, { "epoch": 0.18951148451792127, "grad_norm": 10.069438934326172, "learning_rate": 9.719898692586762e-05, "loss": 1.3206, "step": 2797 }, { "epoch": 0.18957923978589336, "grad_norm": 9.229057312011719, "learning_rate": 9.71976179067698e-05, "loss": 1.18, "step": 2798 }, { "epoch": 0.18964699505386542, "grad_norm": 8.911051750183105, "learning_rate": 9.7196248887672e-05, "loss": 1.1351, "step": 2799 }, { "epoch": 0.18971475032183752, "grad_norm": 9.285752296447754, "learning_rate": 9.719487986857418e-05, "loss": 1.0626, "step": 2800 }, { "epoch": 0.1897825055898096, "grad_norm": 8.793561935424805, "learning_rate": 9.719351084947636e-05, "loss": 1.033, "step": 2801 }, { "epoch": 0.1898502608577817, "grad_norm": 10.635384559631348, "learning_rate": 9.719214183037854e-05, "loss": 1.5004, "step": 2802 }, { "epoch": 0.18991801612575376, "grad_norm": 7.9627275466918945, "learning_rate": 9.719077281128072e-05, "loss": 1.0246, "step": 2803 }, { "epoch": 0.18998577139372586, "grad_norm": 8.960352897644043, "learning_rate": 9.718940379218291e-05, "loss": 1.0316, "step": 2804 }, { "epoch": 0.19005352666169795, "grad_norm": 9.522171020507812, "learning_rate": 9.718803477308509e-05, "loss": 1.0219, "step": 2805 }, { "epoch": 0.19012128192967004, "grad_norm": 8.412702560424805, "learning_rate": 9.718666575398727e-05, "loss": 1.0609, "step": 2806 }, { "epoch": 0.19018903719764213, "grad_norm": 8.777839660644531, "learning_rate": 9.718529673488945e-05, "loss": 1.0854, "step": 2807 }, { "epoch": 0.1902567924656142, "grad_norm": 8.932796478271484, "learning_rate": 9.718392771579165e-05, "loss": 1.1661, "step": 2808 }, { "epoch": 0.19032454773358629, "grad_norm": 8.514800071716309, "learning_rate": 9.718255869669383e-05, "loss": 1.0584, "step": 2809 }, { "epoch": 0.19039230300155838, "grad_norm": 7.001948356628418, "learning_rate": 9.718118967759601e-05, "loss": 0.8479, "step": 2810 }, { "epoch": 0.19046005826953047, "grad_norm": 8.977307319641113, "learning_rate": 9.717982065849819e-05, "loss": 1.2556, "step": 2811 }, { "epoch": 0.19052781353750253, "grad_norm": 8.513920783996582, "learning_rate": 9.717845163940037e-05, "loss": 1.2076, "step": 2812 }, { "epoch": 0.19059556880547462, "grad_norm": 8.556622505187988, "learning_rate": 9.717708262030256e-05, "loss": 1.0491, "step": 2813 }, { "epoch": 0.19066332407344672, "grad_norm": 9.8518648147583, "learning_rate": 9.717571360120474e-05, "loss": 1.1179, "step": 2814 }, { "epoch": 0.1907310793414188, "grad_norm": 8.887413024902344, "learning_rate": 9.717434458210692e-05, "loss": 1.0675, "step": 2815 }, { "epoch": 0.19079883460939087, "grad_norm": 7.888981342315674, "learning_rate": 9.71729755630091e-05, "loss": 1.0489, "step": 2816 }, { "epoch": 0.19086658987736296, "grad_norm": 7.692848205566406, "learning_rate": 9.71716065439113e-05, "loss": 1.0602, "step": 2817 }, { "epoch": 0.19093434514533505, "grad_norm": 8.591787338256836, "learning_rate": 9.717023752481348e-05, "loss": 0.8737, "step": 2818 }, { "epoch": 0.19100210041330715, "grad_norm": 9.622870445251465, "learning_rate": 9.716886850571566e-05, "loss": 0.9717, "step": 2819 }, { "epoch": 0.1910698556812792, "grad_norm": 7.949582576751709, "learning_rate": 9.716749948661784e-05, "loss": 0.8651, "step": 2820 }, { "epoch": 0.1911376109492513, "grad_norm": 9.46272087097168, "learning_rate": 9.716613046752002e-05, "loss": 0.9975, "step": 2821 }, { "epoch": 0.1912053662172234, "grad_norm": 8.86549186706543, "learning_rate": 9.716476144842221e-05, "loss": 1.0805, "step": 2822 }, { "epoch": 0.19127312148519549, "grad_norm": 8.039673805236816, "learning_rate": 9.716339242932439e-05, "loss": 1.121, "step": 2823 }, { "epoch": 0.19134087675316755, "grad_norm": 8.606979370117188, "learning_rate": 9.716202341022657e-05, "loss": 0.9217, "step": 2824 }, { "epoch": 0.19140863202113964, "grad_norm": 10.429420471191406, "learning_rate": 9.716065439112875e-05, "loss": 1.2266, "step": 2825 }, { "epoch": 0.19147638728911173, "grad_norm": 7.339411735534668, "learning_rate": 9.715928537203093e-05, "loss": 1.0235, "step": 2826 }, { "epoch": 0.19154414255708382, "grad_norm": 8.247300148010254, "learning_rate": 9.715791635293313e-05, "loss": 0.8993, "step": 2827 }, { "epoch": 0.1916118978250559, "grad_norm": 8.475278854370117, "learning_rate": 9.715654733383531e-05, "loss": 0.9611, "step": 2828 }, { "epoch": 0.19167965309302798, "grad_norm": 9.792519569396973, "learning_rate": 9.715517831473749e-05, "loss": 1.0831, "step": 2829 }, { "epoch": 0.19174740836100007, "grad_norm": 6.757070541381836, "learning_rate": 9.715380929563967e-05, "loss": 1.0282, "step": 2830 }, { "epoch": 0.19181516362897216, "grad_norm": 6.707785129547119, "learning_rate": 9.715244027654186e-05, "loss": 0.7813, "step": 2831 }, { "epoch": 0.19188291889694423, "grad_norm": 9.990489959716797, "learning_rate": 9.715107125744404e-05, "loss": 1.0504, "step": 2832 }, { "epoch": 0.19195067416491632, "grad_norm": 10.57358169555664, "learning_rate": 9.714970223834622e-05, "loss": 0.9355, "step": 2833 }, { "epoch": 0.1920184294328884, "grad_norm": 9.225300788879395, "learning_rate": 9.714833321924842e-05, "loss": 1.2404, "step": 2834 }, { "epoch": 0.1920861847008605, "grad_norm": 8.36042594909668, "learning_rate": 9.71469642001506e-05, "loss": 0.9012, "step": 2835 }, { "epoch": 0.19215393996883257, "grad_norm": 9.624984741210938, "learning_rate": 9.714559518105278e-05, "loss": 1.0509, "step": 2836 }, { "epoch": 0.19222169523680466, "grad_norm": 12.569930076599121, "learning_rate": 9.714422616195497e-05, "loss": 1.2663, "step": 2837 }, { "epoch": 0.19228945050477675, "grad_norm": 12.278885841369629, "learning_rate": 9.714285714285715e-05, "loss": 1.1547, "step": 2838 }, { "epoch": 0.19235720577274884, "grad_norm": 9.3023681640625, "learning_rate": 9.714148812375933e-05, "loss": 1.0873, "step": 2839 }, { "epoch": 0.1924249610407209, "grad_norm": 8.241714477539062, "learning_rate": 9.714011910466153e-05, "loss": 0.8222, "step": 2840 }, { "epoch": 0.192492716308693, "grad_norm": 10.45174503326416, "learning_rate": 9.71387500855637e-05, "loss": 1.1974, "step": 2841 }, { "epoch": 0.1925604715766651, "grad_norm": 8.096826553344727, "learning_rate": 9.713738106646589e-05, "loss": 1.1147, "step": 2842 }, { "epoch": 0.19262822684463718, "grad_norm": 9.053191184997559, "learning_rate": 9.713601204736807e-05, "loss": 0.9835, "step": 2843 }, { "epoch": 0.19269598211260924, "grad_norm": 8.050823211669922, "learning_rate": 9.713464302827025e-05, "loss": 1.0826, "step": 2844 }, { "epoch": 0.19276373738058133, "grad_norm": 10.000917434692383, "learning_rate": 9.713327400917244e-05, "loss": 1.1723, "step": 2845 }, { "epoch": 0.19283149264855343, "grad_norm": 8.992589950561523, "learning_rate": 9.713190499007462e-05, "loss": 1.0203, "step": 2846 }, { "epoch": 0.19289924791652552, "grad_norm": 10.340285301208496, "learning_rate": 9.71305359709768e-05, "loss": 1.2523, "step": 2847 }, { "epoch": 0.19296700318449758, "grad_norm": 8.250594139099121, "learning_rate": 9.712916695187898e-05, "loss": 1.0606, "step": 2848 }, { "epoch": 0.19303475845246967, "grad_norm": 7.16335916519165, "learning_rate": 9.712779793278116e-05, "loss": 0.9988, "step": 2849 }, { "epoch": 0.19310251372044177, "grad_norm": 9.089055061340332, "learning_rate": 9.712642891368336e-05, "loss": 0.9368, "step": 2850 }, { "epoch": 0.19317026898841386, "grad_norm": 7.568434238433838, "learning_rate": 9.712505989458554e-05, "loss": 0.9417, "step": 2851 }, { "epoch": 0.19323802425638592, "grad_norm": 8.470823287963867, "learning_rate": 9.712369087548772e-05, "loss": 1.0271, "step": 2852 }, { "epoch": 0.193305779524358, "grad_norm": 7.746623992919922, "learning_rate": 9.71223218563899e-05, "loss": 0.7641, "step": 2853 }, { "epoch": 0.1933735347923301, "grad_norm": 7.44852352142334, "learning_rate": 9.712095283729209e-05, "loss": 0.9984, "step": 2854 }, { "epoch": 0.1934412900603022, "grad_norm": 7.393777847290039, "learning_rate": 9.711958381819427e-05, "loss": 0.8515, "step": 2855 }, { "epoch": 0.19350904532827426, "grad_norm": 8.247236251831055, "learning_rate": 9.711821479909645e-05, "loss": 0.9867, "step": 2856 }, { "epoch": 0.19357680059624635, "grad_norm": 8.484920501708984, "learning_rate": 9.711684577999863e-05, "loss": 0.9099, "step": 2857 }, { "epoch": 0.19364455586421844, "grad_norm": 9.85857105255127, "learning_rate": 9.711547676090081e-05, "loss": 1.0134, "step": 2858 }, { "epoch": 0.19371231113219053, "grad_norm": 12.191691398620605, "learning_rate": 9.7114107741803e-05, "loss": 1.15, "step": 2859 }, { "epoch": 0.19378006640016263, "grad_norm": 9.691742897033691, "learning_rate": 9.711273872270519e-05, "loss": 1.2415, "step": 2860 }, { "epoch": 0.1938478216681347, "grad_norm": 11.395289421081543, "learning_rate": 9.711136970360737e-05, "loss": 1.3239, "step": 2861 }, { "epoch": 0.19391557693610678, "grad_norm": 9.222856521606445, "learning_rate": 9.711000068450955e-05, "loss": 0.9087, "step": 2862 }, { "epoch": 0.19398333220407887, "grad_norm": 8.062904357910156, "learning_rate": 9.710863166541174e-05, "loss": 1.093, "step": 2863 }, { "epoch": 0.19405108747205096, "grad_norm": 8.160481452941895, "learning_rate": 9.710726264631392e-05, "loss": 0.842, "step": 2864 }, { "epoch": 0.19411884274002303, "grad_norm": 8.165858268737793, "learning_rate": 9.71058936272161e-05, "loss": 0.8807, "step": 2865 }, { "epoch": 0.19418659800799512, "grad_norm": 8.120240211486816, "learning_rate": 9.710452460811828e-05, "loss": 0.9733, "step": 2866 }, { "epoch": 0.1942543532759672, "grad_norm": 11.363536834716797, "learning_rate": 9.710315558902046e-05, "loss": 1.1144, "step": 2867 }, { "epoch": 0.1943221085439393, "grad_norm": 8.776150703430176, "learning_rate": 9.710178656992266e-05, "loss": 0.7561, "step": 2868 }, { "epoch": 0.19438986381191137, "grad_norm": 9.393696784973145, "learning_rate": 9.710041755082484e-05, "loss": 0.8092, "step": 2869 }, { "epoch": 0.19445761907988346, "grad_norm": 10.820277214050293, "learning_rate": 9.709904853172702e-05, "loss": 0.9905, "step": 2870 }, { "epoch": 0.19452537434785555, "grad_norm": 8.041844367980957, "learning_rate": 9.70976795126292e-05, "loss": 0.8801, "step": 2871 }, { "epoch": 0.19459312961582764, "grad_norm": 8.05355167388916, "learning_rate": 9.709631049353138e-05, "loss": 0.9946, "step": 2872 }, { "epoch": 0.1946608848837997, "grad_norm": 9.46949577331543, "learning_rate": 9.709494147443357e-05, "loss": 1.0582, "step": 2873 }, { "epoch": 0.1947286401517718, "grad_norm": 8.796204566955566, "learning_rate": 9.709357245533575e-05, "loss": 1.1542, "step": 2874 }, { "epoch": 0.1947963954197439, "grad_norm": 10.45006275177002, "learning_rate": 9.709220343623793e-05, "loss": 1.2363, "step": 2875 }, { "epoch": 0.19486415068771598, "grad_norm": 10.490102767944336, "learning_rate": 9.709083441714011e-05, "loss": 1.1977, "step": 2876 }, { "epoch": 0.19493190595568805, "grad_norm": 9.048376083374023, "learning_rate": 9.708946539804231e-05, "loss": 0.8127, "step": 2877 }, { "epoch": 0.19499966122366014, "grad_norm": 6.523111343383789, "learning_rate": 9.708809637894449e-05, "loss": 0.8182, "step": 2878 }, { "epoch": 0.19506741649163223, "grad_norm": 7.170145511627197, "learning_rate": 9.708672735984667e-05, "loss": 0.9643, "step": 2879 }, { "epoch": 0.19513517175960432, "grad_norm": 7.9981818199157715, "learning_rate": 9.708535834074886e-05, "loss": 1.3179, "step": 2880 }, { "epoch": 0.19520292702757638, "grad_norm": 8.786405563354492, "learning_rate": 9.708398932165104e-05, "loss": 0.9217, "step": 2881 }, { "epoch": 0.19527068229554848, "grad_norm": 6.7907867431640625, "learning_rate": 9.708262030255322e-05, "loss": 1.0381, "step": 2882 }, { "epoch": 0.19533843756352057, "grad_norm": 7.1751556396484375, "learning_rate": 9.708125128345542e-05, "loss": 0.8334, "step": 2883 }, { "epoch": 0.19540619283149266, "grad_norm": 8.236610412597656, "learning_rate": 9.70798822643576e-05, "loss": 1.2058, "step": 2884 }, { "epoch": 0.19547394809946472, "grad_norm": 7.576273441314697, "learning_rate": 9.707851324525978e-05, "loss": 1.091, "step": 2885 }, { "epoch": 0.19554170336743681, "grad_norm": 8.698029518127441, "learning_rate": 9.707714422616197e-05, "loss": 1.0262, "step": 2886 }, { "epoch": 0.1956094586354089, "grad_norm": 8.583345413208008, "learning_rate": 9.707577520706415e-05, "loss": 1.0401, "step": 2887 }, { "epoch": 0.195677213903381, "grad_norm": 7.242405891418457, "learning_rate": 9.707440618796633e-05, "loss": 0.785, "step": 2888 }, { "epoch": 0.19574496917135306, "grad_norm": 8.4541654586792, "learning_rate": 9.707303716886851e-05, "loss": 1.0537, "step": 2889 }, { "epoch": 0.19581272443932515, "grad_norm": 7.838657855987549, "learning_rate": 9.707166814977069e-05, "loss": 1.0062, "step": 2890 }, { "epoch": 0.19588047970729724, "grad_norm": 6.812248229980469, "learning_rate": 9.707029913067289e-05, "loss": 0.885, "step": 2891 }, { "epoch": 0.19594823497526934, "grad_norm": 8.789966583251953, "learning_rate": 9.706893011157507e-05, "loss": 1.0366, "step": 2892 }, { "epoch": 0.1960159902432414, "grad_norm": 8.860052108764648, "learning_rate": 9.706756109247725e-05, "loss": 1.2247, "step": 2893 }, { "epoch": 0.1960837455112135, "grad_norm": 11.235320091247559, "learning_rate": 9.706619207337943e-05, "loss": 1.1216, "step": 2894 }, { "epoch": 0.19615150077918558, "grad_norm": 8.865259170532227, "learning_rate": 9.706482305428162e-05, "loss": 0.9467, "step": 2895 }, { "epoch": 0.19621925604715768, "grad_norm": 8.345112800598145, "learning_rate": 9.70634540351838e-05, "loss": 0.9849, "step": 2896 }, { "epoch": 0.19628701131512974, "grad_norm": 8.98128604888916, "learning_rate": 9.706208501608598e-05, "loss": 1.1421, "step": 2897 }, { "epoch": 0.19635476658310183, "grad_norm": 7.786384582519531, "learning_rate": 9.706071599698816e-05, "loss": 1.0926, "step": 2898 }, { "epoch": 0.19642252185107392, "grad_norm": 8.816730499267578, "learning_rate": 9.705934697789034e-05, "loss": 1.2626, "step": 2899 }, { "epoch": 0.19649027711904601, "grad_norm": 7.831095218658447, "learning_rate": 9.705797795879254e-05, "loss": 0.9355, "step": 2900 }, { "epoch": 0.19655803238701808, "grad_norm": 8.788371086120605, "learning_rate": 9.705660893969472e-05, "loss": 0.8883, "step": 2901 }, { "epoch": 0.19662578765499017, "grad_norm": 8.88425064086914, "learning_rate": 9.70552399205969e-05, "loss": 1.1169, "step": 2902 }, { "epoch": 0.19669354292296226, "grad_norm": 9.648268699645996, "learning_rate": 9.705387090149908e-05, "loss": 1.0628, "step": 2903 }, { "epoch": 0.19676129819093435, "grad_norm": 8.19676685333252, "learning_rate": 9.705250188240126e-05, "loss": 0.9395, "step": 2904 }, { "epoch": 0.19682905345890642, "grad_norm": 7.420725345611572, "learning_rate": 9.705113286330345e-05, "loss": 0.947, "step": 2905 }, { "epoch": 0.1968968087268785, "grad_norm": 7.221796989440918, "learning_rate": 9.704976384420563e-05, "loss": 0.8491, "step": 2906 }, { "epoch": 0.1969645639948506, "grad_norm": 9.932676315307617, "learning_rate": 9.704839482510781e-05, "loss": 1.117, "step": 2907 }, { "epoch": 0.1970323192628227, "grad_norm": 11.816266059875488, "learning_rate": 9.704702580600999e-05, "loss": 1.4347, "step": 2908 }, { "epoch": 0.19710007453079476, "grad_norm": 8.804407119750977, "learning_rate": 9.704565678691219e-05, "loss": 0.808, "step": 2909 }, { "epoch": 0.19716782979876685, "grad_norm": 7.740353584289551, "learning_rate": 9.704428776781437e-05, "loss": 1.0732, "step": 2910 }, { "epoch": 0.19723558506673894, "grad_norm": 12.040196418762207, "learning_rate": 9.704291874871655e-05, "loss": 0.947, "step": 2911 }, { "epoch": 0.19730334033471103, "grad_norm": 7.727171421051025, "learning_rate": 9.704154972961873e-05, "loss": 0.8211, "step": 2912 }, { "epoch": 0.19737109560268312, "grad_norm": 8.779428482055664, "learning_rate": 9.704018071052091e-05, "loss": 1.2204, "step": 2913 }, { "epoch": 0.19743885087065519, "grad_norm": 7.907576084136963, "learning_rate": 9.70388116914231e-05, "loss": 1.0594, "step": 2914 }, { "epoch": 0.19750660613862728, "grad_norm": 6.769292831420898, "learning_rate": 9.703744267232528e-05, "loss": 0.8743, "step": 2915 }, { "epoch": 0.19757436140659937, "grad_norm": 8.966355323791504, "learning_rate": 9.703607365322746e-05, "loss": 1.1607, "step": 2916 }, { "epoch": 0.19764211667457146, "grad_norm": 7.389810085296631, "learning_rate": 9.703470463412964e-05, "loss": 0.9728, "step": 2917 }, { "epoch": 0.19770987194254352, "grad_norm": 10.877386093139648, "learning_rate": 9.703333561503184e-05, "loss": 1.0623, "step": 2918 }, { "epoch": 0.19777762721051562, "grad_norm": 9.160116195678711, "learning_rate": 9.703196659593402e-05, "loss": 1.1374, "step": 2919 }, { "epoch": 0.1978453824784877, "grad_norm": 7.6289167404174805, "learning_rate": 9.70305975768362e-05, "loss": 0.9498, "step": 2920 }, { "epoch": 0.1979131377464598, "grad_norm": 8.663583755493164, "learning_rate": 9.702922855773838e-05, "loss": 1.1595, "step": 2921 }, { "epoch": 0.19798089301443186, "grad_norm": 7.595486640930176, "learning_rate": 9.702785953864056e-05, "loss": 1.0623, "step": 2922 }, { "epoch": 0.19804864828240396, "grad_norm": 8.680171966552734, "learning_rate": 9.702649051954275e-05, "loss": 1.0138, "step": 2923 }, { "epoch": 0.19811640355037605, "grad_norm": 9.998015403747559, "learning_rate": 9.702512150044493e-05, "loss": 1.2006, "step": 2924 }, { "epoch": 0.19818415881834814, "grad_norm": 7.801429271697998, "learning_rate": 9.702375248134711e-05, "loss": 0.8467, "step": 2925 }, { "epoch": 0.1982519140863202, "grad_norm": 8.570688247680664, "learning_rate": 9.70223834622493e-05, "loss": 1.1439, "step": 2926 }, { "epoch": 0.1983196693542923, "grad_norm": 7.835936069488525, "learning_rate": 9.702101444315149e-05, "loss": 0.9671, "step": 2927 }, { "epoch": 0.19838742462226439, "grad_norm": 8.415708541870117, "learning_rate": 9.701964542405367e-05, "loss": 1.0381, "step": 2928 }, { "epoch": 0.19845517989023648, "grad_norm": 10.370524406433105, "learning_rate": 9.701827640495586e-05, "loss": 1.151, "step": 2929 }, { "epoch": 0.19852293515820854, "grad_norm": 8.228797912597656, "learning_rate": 9.701690738585804e-05, "loss": 1.093, "step": 2930 }, { "epoch": 0.19859069042618063, "grad_norm": 8.070756912231445, "learning_rate": 9.701553836676022e-05, "loss": 1.0272, "step": 2931 }, { "epoch": 0.19865844569415272, "grad_norm": 9.19532585144043, "learning_rate": 9.701416934766242e-05, "loss": 1.1195, "step": 2932 }, { "epoch": 0.19872620096212482, "grad_norm": 10.692606925964355, "learning_rate": 9.70128003285646e-05, "loss": 1.3148, "step": 2933 }, { "epoch": 0.19879395623009688, "grad_norm": 10.058424949645996, "learning_rate": 9.701143130946678e-05, "loss": 1.4236, "step": 2934 }, { "epoch": 0.19886171149806897, "grad_norm": 6.197395324707031, "learning_rate": 9.701006229036896e-05, "loss": 0.8822, "step": 2935 }, { "epoch": 0.19892946676604106, "grad_norm": 7.603270053863525, "learning_rate": 9.700869327127114e-05, "loss": 0.9655, "step": 2936 }, { "epoch": 0.19899722203401315, "grad_norm": 8.192676544189453, "learning_rate": 9.700732425217333e-05, "loss": 0.9668, "step": 2937 }, { "epoch": 0.19906497730198522, "grad_norm": 7.121623992919922, "learning_rate": 9.700595523307551e-05, "loss": 1.0037, "step": 2938 }, { "epoch": 0.1991327325699573, "grad_norm": 6.8974127769470215, "learning_rate": 9.700458621397769e-05, "loss": 0.876, "step": 2939 }, { "epoch": 0.1992004878379294, "grad_norm": 7.590656757354736, "learning_rate": 9.700321719487987e-05, "loss": 1.0346, "step": 2940 }, { "epoch": 0.1992682431059015, "grad_norm": 8.530266761779785, "learning_rate": 9.700184817578207e-05, "loss": 1.0882, "step": 2941 }, { "epoch": 0.19933599837387356, "grad_norm": 8.064129829406738, "learning_rate": 9.700047915668425e-05, "loss": 0.9949, "step": 2942 }, { "epoch": 0.19940375364184565, "grad_norm": 7.23117208480835, "learning_rate": 9.699911013758643e-05, "loss": 0.7269, "step": 2943 }, { "epoch": 0.19947150890981774, "grad_norm": 7.326268196105957, "learning_rate": 9.69977411184886e-05, "loss": 0.8542, "step": 2944 }, { "epoch": 0.19953926417778983, "grad_norm": 7.708505153656006, "learning_rate": 9.699637209939079e-05, "loss": 0.8206, "step": 2945 }, { "epoch": 0.1996070194457619, "grad_norm": 10.134513854980469, "learning_rate": 9.699500308029298e-05, "loss": 1.0079, "step": 2946 }, { "epoch": 0.199674774713734, "grad_norm": 9.264663696289062, "learning_rate": 9.699363406119516e-05, "loss": 1.1702, "step": 2947 }, { "epoch": 0.19974252998170608, "grad_norm": 8.894827842712402, "learning_rate": 9.699226504209734e-05, "loss": 0.9184, "step": 2948 }, { "epoch": 0.19981028524967817, "grad_norm": 6.935434341430664, "learning_rate": 9.699089602299952e-05, "loss": 0.8554, "step": 2949 }, { "epoch": 0.19987804051765024, "grad_norm": 9.57607364654541, "learning_rate": 9.698952700390172e-05, "loss": 1.0937, "step": 2950 }, { "epoch": 0.19994579578562233, "grad_norm": 7.99752140045166, "learning_rate": 9.69881579848039e-05, "loss": 1.0797, "step": 2951 }, { "epoch": 0.20001355105359442, "grad_norm": 8.067659378051758, "learning_rate": 9.698678896570608e-05, "loss": 1.0515, "step": 2952 }, { "epoch": 0.2000813063215665, "grad_norm": 9.66697883605957, "learning_rate": 9.698541994660826e-05, "loss": 1.289, "step": 2953 }, { "epoch": 0.20014906158953857, "grad_norm": 7.3660664558410645, "learning_rate": 9.698405092751044e-05, "loss": 0.7409, "step": 2954 }, { "epoch": 0.20021681685751067, "grad_norm": 8.254073143005371, "learning_rate": 9.698268190841263e-05, "loss": 1.1095, "step": 2955 }, { "epoch": 0.20028457212548276, "grad_norm": 8.221102714538574, "learning_rate": 9.698131288931481e-05, "loss": 1.0083, "step": 2956 }, { "epoch": 0.20035232739345485, "grad_norm": 9.813411712646484, "learning_rate": 9.697994387021699e-05, "loss": 1.2888, "step": 2957 }, { "epoch": 0.2004200826614269, "grad_norm": 10.87628173828125, "learning_rate": 9.697857485111917e-05, "loss": 1.16, "step": 2958 }, { "epoch": 0.200487837929399, "grad_norm": 7.094732284545898, "learning_rate": 9.697720583202135e-05, "loss": 0.8959, "step": 2959 }, { "epoch": 0.2005555931973711, "grad_norm": 9.814677238464355, "learning_rate": 9.697583681292355e-05, "loss": 1.1011, "step": 2960 }, { "epoch": 0.2006233484653432, "grad_norm": 8.982966423034668, "learning_rate": 9.697446779382573e-05, "loss": 1.001, "step": 2961 }, { "epoch": 0.20069110373331525, "grad_norm": 8.49453067779541, "learning_rate": 9.697309877472791e-05, "loss": 0.9848, "step": 2962 }, { "epoch": 0.20075885900128734, "grad_norm": 7.239814758300781, "learning_rate": 9.697172975563009e-05, "loss": 0.8252, "step": 2963 }, { "epoch": 0.20082661426925943, "grad_norm": 10.874746322631836, "learning_rate": 9.697036073653228e-05, "loss": 1.0683, "step": 2964 }, { "epoch": 0.20089436953723153, "grad_norm": 7.679197311401367, "learning_rate": 9.696899171743446e-05, "loss": 1.2797, "step": 2965 }, { "epoch": 0.20096212480520362, "grad_norm": 10.089177131652832, "learning_rate": 9.696762269833664e-05, "loss": 1.1905, "step": 2966 }, { "epoch": 0.20102988007317568, "grad_norm": 8.182350158691406, "learning_rate": 9.696625367923882e-05, "loss": 0.9917, "step": 2967 }, { "epoch": 0.20109763534114777, "grad_norm": 7.8756256103515625, "learning_rate": 9.6964884660141e-05, "loss": 1.0368, "step": 2968 }, { "epoch": 0.20116539060911987, "grad_norm": 9.193910598754883, "learning_rate": 9.69635156410432e-05, "loss": 1.094, "step": 2969 }, { "epoch": 0.20123314587709196, "grad_norm": 8.854869842529297, "learning_rate": 9.696214662194538e-05, "loss": 1.1344, "step": 2970 }, { "epoch": 0.20130090114506402, "grad_norm": 10.419108390808105, "learning_rate": 9.696077760284756e-05, "loss": 0.9937, "step": 2971 }, { "epoch": 0.2013686564130361, "grad_norm": 9.329347610473633, "learning_rate": 9.695940858374975e-05, "loss": 0.9454, "step": 2972 }, { "epoch": 0.2014364116810082, "grad_norm": 8.23154067993164, "learning_rate": 9.695803956465193e-05, "loss": 0.8691, "step": 2973 }, { "epoch": 0.2015041669489803, "grad_norm": 8.016939163208008, "learning_rate": 9.695667054555411e-05, "loss": 0.9182, "step": 2974 }, { "epoch": 0.20157192221695236, "grad_norm": 9.717400550842285, "learning_rate": 9.69553015264563e-05, "loss": 1.1251, "step": 2975 }, { "epoch": 0.20163967748492445, "grad_norm": 10.470111846923828, "learning_rate": 9.695393250735849e-05, "loss": 1.1634, "step": 2976 }, { "epoch": 0.20170743275289654, "grad_norm": 8.540326118469238, "learning_rate": 9.695256348826067e-05, "loss": 0.9229, "step": 2977 }, { "epoch": 0.20177518802086863, "grad_norm": 6.997597694396973, "learning_rate": 9.695119446916286e-05, "loss": 0.9911, "step": 2978 }, { "epoch": 0.2018429432888407, "grad_norm": 7.23951530456543, "learning_rate": 9.694982545006504e-05, "loss": 0.8372, "step": 2979 }, { "epoch": 0.2019106985568128, "grad_norm": 9.217951774597168, "learning_rate": 9.694845643096722e-05, "loss": 0.8154, "step": 2980 }, { "epoch": 0.20197845382478488, "grad_norm": 8.128033638000488, "learning_rate": 9.69470874118694e-05, "loss": 0.9494, "step": 2981 }, { "epoch": 0.20204620909275697, "grad_norm": 8.328935623168945, "learning_rate": 9.694571839277158e-05, "loss": 1.0537, "step": 2982 }, { "epoch": 0.20211396436072904, "grad_norm": 8.877389907836914, "learning_rate": 9.694434937367378e-05, "loss": 0.8683, "step": 2983 }, { "epoch": 0.20218171962870113, "grad_norm": 7.285436630249023, "learning_rate": 9.694298035457596e-05, "loss": 0.7514, "step": 2984 }, { "epoch": 0.20224947489667322, "grad_norm": 9.209798812866211, "learning_rate": 9.694161133547814e-05, "loss": 1.0771, "step": 2985 }, { "epoch": 0.2023172301646453, "grad_norm": 8.625777244567871, "learning_rate": 9.694024231638032e-05, "loss": 1.0085, "step": 2986 }, { "epoch": 0.20238498543261738, "grad_norm": 8.50123405456543, "learning_rate": 9.693887329728251e-05, "loss": 0.969, "step": 2987 }, { "epoch": 0.20245274070058947, "grad_norm": 7.314642429351807, "learning_rate": 9.693750427818469e-05, "loss": 0.8075, "step": 2988 }, { "epoch": 0.20252049596856156, "grad_norm": 9.474241256713867, "learning_rate": 9.693613525908687e-05, "loss": 1.0989, "step": 2989 }, { "epoch": 0.20258825123653365, "grad_norm": 7.3510637283325195, "learning_rate": 9.693476623998905e-05, "loss": 0.8044, "step": 2990 }, { "epoch": 0.20265600650450571, "grad_norm": 7.203106880187988, "learning_rate": 9.693339722089123e-05, "loss": 1.0359, "step": 2991 }, { "epoch": 0.2027237617724778, "grad_norm": 6.71024227142334, "learning_rate": 9.693202820179343e-05, "loss": 0.8687, "step": 2992 }, { "epoch": 0.2027915170404499, "grad_norm": 8.327759742736816, "learning_rate": 9.69306591826956e-05, "loss": 1.0042, "step": 2993 }, { "epoch": 0.202859272308422, "grad_norm": 8.682476997375488, "learning_rate": 9.692929016359779e-05, "loss": 1.0273, "step": 2994 }, { "epoch": 0.20292702757639405, "grad_norm": 8.555792808532715, "learning_rate": 9.692792114449997e-05, "loss": 0.8712, "step": 2995 }, { "epoch": 0.20299478284436615, "grad_norm": 11.036639213562012, "learning_rate": 9.692655212540216e-05, "loss": 1.1452, "step": 2996 }, { "epoch": 0.20306253811233824, "grad_norm": 10.207952499389648, "learning_rate": 9.692518310630434e-05, "loss": 1.2267, "step": 2997 }, { "epoch": 0.20313029338031033, "grad_norm": 8.715107917785645, "learning_rate": 9.692381408720652e-05, "loss": 1.151, "step": 2998 }, { "epoch": 0.2031980486482824, "grad_norm": 8.72461986541748, "learning_rate": 9.69224450681087e-05, "loss": 0.9787, "step": 2999 }, { "epoch": 0.20326580391625448, "grad_norm": 8.717243194580078, "learning_rate": 9.692107604901088e-05, "loss": 1.0734, "step": 3000 }, { "epoch": 0.20333355918422658, "grad_norm": 7.039597511291504, "learning_rate": 9.691970702991308e-05, "loss": 0.9215, "step": 3001 }, { "epoch": 0.20340131445219867, "grad_norm": 10.568238258361816, "learning_rate": 9.691833801081526e-05, "loss": 1.2248, "step": 3002 }, { "epoch": 0.20346906972017073, "grad_norm": 9.515549659729004, "learning_rate": 9.691696899171744e-05, "loss": 1.1401, "step": 3003 }, { "epoch": 0.20353682498814282, "grad_norm": 9.650483131408691, "learning_rate": 9.691559997261962e-05, "loss": 0.9839, "step": 3004 }, { "epoch": 0.20360458025611491, "grad_norm": 6.817119598388672, "learning_rate": 9.691423095352181e-05, "loss": 0.8542, "step": 3005 }, { "epoch": 0.203672335524087, "grad_norm": 7.616591930389404, "learning_rate": 9.691286193442399e-05, "loss": 0.9647, "step": 3006 }, { "epoch": 0.20374009079205907, "grad_norm": 7.2600274085998535, "learning_rate": 9.691149291532617e-05, "loss": 0.962, "step": 3007 }, { "epoch": 0.20380784606003116, "grad_norm": 9.714008331298828, "learning_rate": 9.691012389622835e-05, "loss": 1.1567, "step": 3008 }, { "epoch": 0.20387560132800325, "grad_norm": 9.095394134521484, "learning_rate": 9.690875487713053e-05, "loss": 1.1315, "step": 3009 }, { "epoch": 0.20394335659597534, "grad_norm": 9.683954238891602, "learning_rate": 9.690738585803273e-05, "loss": 1.4039, "step": 3010 }, { "epoch": 0.2040111118639474, "grad_norm": 8.829015731811523, "learning_rate": 9.69060168389349e-05, "loss": 0.9176, "step": 3011 }, { "epoch": 0.2040788671319195, "grad_norm": 7.835269927978516, "learning_rate": 9.690464781983709e-05, "loss": 1.0077, "step": 3012 }, { "epoch": 0.2041466223998916, "grad_norm": 9.94642448425293, "learning_rate": 9.690327880073927e-05, "loss": 1.0815, "step": 3013 }, { "epoch": 0.20421437766786368, "grad_norm": 8.184757232666016, "learning_rate": 9.690190978164145e-05, "loss": 0.84, "step": 3014 }, { "epoch": 0.20428213293583575, "grad_norm": 9.060220718383789, "learning_rate": 9.690054076254364e-05, "loss": 0.907, "step": 3015 }, { "epoch": 0.20434988820380784, "grad_norm": 6.848534107208252, "learning_rate": 9.689917174344582e-05, "loss": 0.7549, "step": 3016 }, { "epoch": 0.20441764347177993, "grad_norm": 7.820966720581055, "learning_rate": 9.6897802724348e-05, "loss": 0.9899, "step": 3017 }, { "epoch": 0.20448539873975202, "grad_norm": 10.148963928222656, "learning_rate": 9.68964337052502e-05, "loss": 1.2403, "step": 3018 }, { "epoch": 0.20455315400772411, "grad_norm": 8.273184776306152, "learning_rate": 9.689506468615238e-05, "loss": 1.0002, "step": 3019 }, { "epoch": 0.20462090927569618, "grad_norm": 7.618801593780518, "learning_rate": 9.689369566705456e-05, "loss": 0.8864, "step": 3020 }, { "epoch": 0.20468866454366827, "grad_norm": 7.952611446380615, "learning_rate": 9.689232664795675e-05, "loss": 0.8002, "step": 3021 }, { "epoch": 0.20475641981164036, "grad_norm": 9.938977241516113, "learning_rate": 9.689095762885893e-05, "loss": 0.8956, "step": 3022 }, { "epoch": 0.20482417507961245, "grad_norm": 7.807236194610596, "learning_rate": 9.688958860976111e-05, "loss": 1.1777, "step": 3023 }, { "epoch": 0.20489193034758452, "grad_norm": 7.7249369621276855, "learning_rate": 9.68882195906633e-05, "loss": 0.8881, "step": 3024 }, { "epoch": 0.2049596856155566, "grad_norm": 7.747461795806885, "learning_rate": 9.688685057156549e-05, "loss": 0.9844, "step": 3025 }, { "epoch": 0.2050274408835287, "grad_norm": 10.017412185668945, "learning_rate": 9.688548155246767e-05, "loss": 1.0815, "step": 3026 }, { "epoch": 0.2050951961515008, "grad_norm": 6.54990816116333, "learning_rate": 9.688411253336985e-05, "loss": 0.9319, "step": 3027 }, { "epoch": 0.20516295141947286, "grad_norm": 7.358734130859375, "learning_rate": 9.688274351427204e-05, "loss": 0.9313, "step": 3028 }, { "epoch": 0.20523070668744495, "grad_norm": 7.608468055725098, "learning_rate": 9.688137449517422e-05, "loss": 1.0071, "step": 3029 }, { "epoch": 0.20529846195541704, "grad_norm": 7.013155937194824, "learning_rate": 9.68800054760764e-05, "loss": 0.9419, "step": 3030 }, { "epoch": 0.20536621722338913, "grad_norm": 9.907796859741211, "learning_rate": 9.687863645697858e-05, "loss": 1.2082, "step": 3031 }, { "epoch": 0.2054339724913612, "grad_norm": 9.059138298034668, "learning_rate": 9.687726743788076e-05, "loss": 1.0712, "step": 3032 }, { "epoch": 0.20550172775933329, "grad_norm": 10.519928932189941, "learning_rate": 9.687589841878296e-05, "loss": 1.1627, "step": 3033 }, { "epoch": 0.20556948302730538, "grad_norm": 8.381184577941895, "learning_rate": 9.687452939968514e-05, "loss": 1.087, "step": 3034 }, { "epoch": 0.20563723829527747, "grad_norm": 6.514460563659668, "learning_rate": 9.687316038058732e-05, "loss": 0.8657, "step": 3035 }, { "epoch": 0.20570499356324953, "grad_norm": 10.034708023071289, "learning_rate": 9.68717913614895e-05, "loss": 1.1529, "step": 3036 }, { "epoch": 0.20577274883122162, "grad_norm": 7.202263355255127, "learning_rate": 9.687042234239168e-05, "loss": 0.7923, "step": 3037 }, { "epoch": 0.20584050409919372, "grad_norm": 9.7435302734375, "learning_rate": 9.686905332329387e-05, "loss": 1.5071, "step": 3038 }, { "epoch": 0.2059082593671658, "grad_norm": 9.57016658782959, "learning_rate": 9.686768430419605e-05, "loss": 0.883, "step": 3039 }, { "epoch": 0.20597601463513787, "grad_norm": 7.3575029373168945, "learning_rate": 9.686631528509823e-05, "loss": 0.9999, "step": 3040 }, { "epoch": 0.20604376990310996, "grad_norm": 9.0224027633667, "learning_rate": 9.686494626600041e-05, "loss": 1.0121, "step": 3041 }, { "epoch": 0.20611152517108206, "grad_norm": 11.173224449157715, "learning_rate": 9.68635772469026e-05, "loss": 1.0556, "step": 3042 }, { "epoch": 0.20617928043905415, "grad_norm": 8.858287811279297, "learning_rate": 9.686220822780479e-05, "loss": 0.9377, "step": 3043 }, { "epoch": 0.2062470357070262, "grad_norm": 7.3096795082092285, "learning_rate": 9.686083920870697e-05, "loss": 0.8808, "step": 3044 }, { "epoch": 0.2063147909749983, "grad_norm": 8.700214385986328, "learning_rate": 9.685947018960915e-05, "loss": 1.2121, "step": 3045 }, { "epoch": 0.2063825462429704, "grad_norm": 10.57944107055664, "learning_rate": 9.685810117051133e-05, "loss": 1.0931, "step": 3046 }, { "epoch": 0.20645030151094249, "grad_norm": 8.765487670898438, "learning_rate": 9.685673215141352e-05, "loss": 1.0522, "step": 3047 }, { "epoch": 0.20651805677891455, "grad_norm": 7.717139720916748, "learning_rate": 9.68553631323157e-05, "loss": 0.9492, "step": 3048 }, { "epoch": 0.20658581204688664, "grad_norm": 9.301026344299316, "learning_rate": 9.685399411321788e-05, "loss": 1.0191, "step": 3049 }, { "epoch": 0.20665356731485873, "grad_norm": 10.251668930053711, "learning_rate": 9.685262509412006e-05, "loss": 0.991, "step": 3050 }, { "epoch": 0.20672132258283082, "grad_norm": 11.597551345825195, "learning_rate": 9.685125607502226e-05, "loss": 1.0169, "step": 3051 }, { "epoch": 0.2067890778508029, "grad_norm": 10.293901443481445, "learning_rate": 9.684988705592444e-05, "loss": 1.0623, "step": 3052 }, { "epoch": 0.20685683311877498, "grad_norm": 8.496854782104492, "learning_rate": 9.684851803682662e-05, "loss": 1.0466, "step": 3053 }, { "epoch": 0.20692458838674707, "grad_norm": 6.76383638381958, "learning_rate": 9.68471490177288e-05, "loss": 0.8155, "step": 3054 }, { "epoch": 0.20699234365471916, "grad_norm": 8.168519020080566, "learning_rate": 9.684577999863098e-05, "loss": 0.9663, "step": 3055 }, { "epoch": 0.20706009892269123, "grad_norm": 8.0086030960083, "learning_rate": 9.684441097953317e-05, "loss": 0.889, "step": 3056 }, { "epoch": 0.20712785419066332, "grad_norm": 8.347359657287598, "learning_rate": 9.684304196043535e-05, "loss": 0.938, "step": 3057 }, { "epoch": 0.2071956094586354, "grad_norm": 8.0283203125, "learning_rate": 9.684167294133753e-05, "loss": 0.7633, "step": 3058 }, { "epoch": 0.2072633647266075, "grad_norm": 8.509317398071289, "learning_rate": 9.684030392223971e-05, "loss": 1.1052, "step": 3059 }, { "epoch": 0.20733111999457957, "grad_norm": 9.441505432128906, "learning_rate": 9.683893490314189e-05, "loss": 1.1177, "step": 3060 }, { "epoch": 0.20739887526255166, "grad_norm": 8.131098747253418, "learning_rate": 9.683756588404409e-05, "loss": 0.9272, "step": 3061 }, { "epoch": 0.20746663053052375, "grad_norm": 8.423643112182617, "learning_rate": 9.683619686494627e-05, "loss": 0.875, "step": 3062 }, { "epoch": 0.20753438579849584, "grad_norm": 7.513223171234131, "learning_rate": 9.683482784584845e-05, "loss": 1.0659, "step": 3063 }, { "epoch": 0.2076021410664679, "grad_norm": 7.5881171226501465, "learning_rate": 9.683345882675063e-05, "loss": 0.9466, "step": 3064 }, { "epoch": 0.20766989633444, "grad_norm": 8.717775344848633, "learning_rate": 9.683208980765282e-05, "loss": 1.1019, "step": 3065 }, { "epoch": 0.2077376516024121, "grad_norm": 11.917694091796875, "learning_rate": 9.6830720788555e-05, "loss": 1.1085, "step": 3066 }, { "epoch": 0.20780540687038418, "grad_norm": 9.28741455078125, "learning_rate": 9.682935176945718e-05, "loss": 1.1458, "step": 3067 }, { "epoch": 0.20787316213835624, "grad_norm": 6.746860980987549, "learning_rate": 9.682798275035938e-05, "loss": 0.929, "step": 3068 }, { "epoch": 0.20794091740632833, "grad_norm": 8.48763370513916, "learning_rate": 9.682661373126156e-05, "loss": 1.0702, "step": 3069 }, { "epoch": 0.20800867267430043, "grad_norm": 8.871308326721191, "learning_rate": 9.682524471216374e-05, "loss": 0.9199, "step": 3070 }, { "epoch": 0.20807642794227252, "grad_norm": 8.275801658630371, "learning_rate": 9.682387569306593e-05, "loss": 1.0756, "step": 3071 }, { "epoch": 0.2081441832102446, "grad_norm": 8.985222816467285, "learning_rate": 9.682250667396811e-05, "loss": 1.2364, "step": 3072 }, { "epoch": 0.20821193847821667, "grad_norm": 7.749682426452637, "learning_rate": 9.682113765487029e-05, "loss": 0.9444, "step": 3073 }, { "epoch": 0.20827969374618877, "grad_norm": 9.111614227294922, "learning_rate": 9.681976863577248e-05, "loss": 1.338, "step": 3074 }, { "epoch": 0.20834744901416086, "grad_norm": 7.2874674797058105, "learning_rate": 9.681839961667466e-05, "loss": 0.838, "step": 3075 }, { "epoch": 0.20841520428213295, "grad_norm": 6.873099327087402, "learning_rate": 9.681703059757685e-05, "loss": 0.9194, "step": 3076 }, { "epoch": 0.208482959550105, "grad_norm": 7.564418792724609, "learning_rate": 9.681566157847903e-05, "loss": 1.1209, "step": 3077 }, { "epoch": 0.2085507148180771, "grad_norm": 9.088560104370117, "learning_rate": 9.68142925593812e-05, "loss": 0.8425, "step": 3078 }, { "epoch": 0.2086184700860492, "grad_norm": 7.182369709014893, "learning_rate": 9.68129235402834e-05, "loss": 1.0938, "step": 3079 }, { "epoch": 0.2086862253540213, "grad_norm": 8.853677749633789, "learning_rate": 9.681155452118558e-05, "loss": 1.2611, "step": 3080 }, { "epoch": 0.20875398062199335, "grad_norm": 8.56440258026123, "learning_rate": 9.681018550208776e-05, "loss": 1.1112, "step": 3081 }, { "epoch": 0.20882173588996544, "grad_norm": 8.356021881103516, "learning_rate": 9.680881648298994e-05, "loss": 1.0044, "step": 3082 }, { "epoch": 0.20888949115793753, "grad_norm": 9.083736419677734, "learning_rate": 9.680744746389213e-05, "loss": 1.2065, "step": 3083 }, { "epoch": 0.20895724642590963, "grad_norm": 7.990222454071045, "learning_rate": 9.680607844479432e-05, "loss": 1.0179, "step": 3084 }, { "epoch": 0.2090250016938817, "grad_norm": 8.381364822387695, "learning_rate": 9.68047094256965e-05, "loss": 0.8392, "step": 3085 }, { "epoch": 0.20909275696185378, "grad_norm": 9.017950057983398, "learning_rate": 9.680334040659868e-05, "loss": 0.9757, "step": 3086 }, { "epoch": 0.20916051222982587, "grad_norm": 8.525566101074219, "learning_rate": 9.680197138750086e-05, "loss": 1.1521, "step": 3087 }, { "epoch": 0.20922826749779797, "grad_norm": 8.432148933410645, "learning_rate": 9.680060236840305e-05, "loss": 0.8559, "step": 3088 }, { "epoch": 0.20929602276577003, "grad_norm": 9.985367774963379, "learning_rate": 9.679923334930523e-05, "loss": 1.2035, "step": 3089 }, { "epoch": 0.20936377803374212, "grad_norm": 9.771974563598633, "learning_rate": 9.679786433020741e-05, "loss": 1.1614, "step": 3090 }, { "epoch": 0.2094315333017142, "grad_norm": 10.646146774291992, "learning_rate": 9.679649531110959e-05, "loss": 1.2165, "step": 3091 }, { "epoch": 0.2094992885696863, "grad_norm": 8.93340015411377, "learning_rate": 9.679512629201177e-05, "loss": 0.9883, "step": 3092 }, { "epoch": 0.20956704383765837, "grad_norm": 8.498669624328613, "learning_rate": 9.679375727291397e-05, "loss": 1.0318, "step": 3093 }, { "epoch": 0.20963479910563046, "grad_norm": 7.536258220672607, "learning_rate": 9.679238825381615e-05, "loss": 0.9949, "step": 3094 }, { "epoch": 0.20970255437360255, "grad_norm": 8.314896583557129, "learning_rate": 9.679101923471833e-05, "loss": 1.0502, "step": 3095 }, { "epoch": 0.20977030964157464, "grad_norm": 7.470542907714844, "learning_rate": 9.67896502156205e-05, "loss": 0.8871, "step": 3096 }, { "epoch": 0.2098380649095467, "grad_norm": 8.951095581054688, "learning_rate": 9.67882811965227e-05, "loss": 0.9793, "step": 3097 }, { "epoch": 0.2099058201775188, "grad_norm": 7.879035472869873, "learning_rate": 9.678691217742488e-05, "loss": 1.0193, "step": 3098 }, { "epoch": 0.2099735754454909, "grad_norm": 8.890814781188965, "learning_rate": 9.678554315832706e-05, "loss": 0.9738, "step": 3099 }, { "epoch": 0.21004133071346298, "grad_norm": 8.863816261291504, "learning_rate": 9.678417413922924e-05, "loss": 1.1213, "step": 3100 }, { "epoch": 0.21010908598143505, "grad_norm": 9.59538745880127, "learning_rate": 9.678280512013142e-05, "loss": 1.0215, "step": 3101 }, { "epoch": 0.21017684124940714, "grad_norm": 8.811614990234375, "learning_rate": 9.678143610103362e-05, "loss": 0.8703, "step": 3102 }, { "epoch": 0.21024459651737923, "grad_norm": 7.274720191955566, "learning_rate": 9.67800670819358e-05, "loss": 0.898, "step": 3103 }, { "epoch": 0.21031235178535132, "grad_norm": 11.239364624023438, "learning_rate": 9.677869806283798e-05, "loss": 0.9823, "step": 3104 }, { "epoch": 0.21038010705332338, "grad_norm": 8.807086944580078, "learning_rate": 9.677732904374016e-05, "loss": 1.0074, "step": 3105 }, { "epoch": 0.21044786232129548, "grad_norm": 9.065536499023438, "learning_rate": 9.677596002464235e-05, "loss": 1.1861, "step": 3106 }, { "epoch": 0.21051561758926757, "grad_norm": 11.035104751586914, "learning_rate": 9.677459100554453e-05, "loss": 1.0067, "step": 3107 }, { "epoch": 0.21058337285723966, "grad_norm": 8.010696411132812, "learning_rate": 9.677322198644671e-05, "loss": 1.0855, "step": 3108 }, { "epoch": 0.21065112812521172, "grad_norm": 9.104195594787598, "learning_rate": 9.677185296734889e-05, "loss": 1.0497, "step": 3109 }, { "epoch": 0.21071888339318381, "grad_norm": 8.731512069702148, "learning_rate": 9.677048394825107e-05, "loss": 1.1108, "step": 3110 }, { "epoch": 0.2107866386611559, "grad_norm": 8.823514938354492, "learning_rate": 9.676911492915327e-05, "loss": 1.0271, "step": 3111 }, { "epoch": 0.210854393929128, "grad_norm": 7.446425914764404, "learning_rate": 9.676774591005545e-05, "loss": 0.9182, "step": 3112 }, { "epoch": 0.21092214919710006, "grad_norm": 10.75915241241455, "learning_rate": 9.676637689095763e-05, "loss": 0.93, "step": 3113 }, { "epoch": 0.21098990446507215, "grad_norm": 10.065240859985352, "learning_rate": 9.676500787185982e-05, "loss": 1.0508, "step": 3114 }, { "epoch": 0.21105765973304424, "grad_norm": 8.790117263793945, "learning_rate": 9.6763638852762e-05, "loss": 0.8971, "step": 3115 }, { "epoch": 0.21112541500101634, "grad_norm": 8.286596298217773, "learning_rate": 9.676226983366418e-05, "loss": 0.9403, "step": 3116 }, { "epoch": 0.2111931702689884, "grad_norm": 8.542399406433105, "learning_rate": 9.676090081456637e-05, "loss": 1.1638, "step": 3117 }, { "epoch": 0.2112609255369605, "grad_norm": 9.461727142333984, "learning_rate": 9.675953179546856e-05, "loss": 1.0859, "step": 3118 }, { "epoch": 0.21132868080493258, "grad_norm": 7.0917229652404785, "learning_rate": 9.675816277637074e-05, "loss": 0.9931, "step": 3119 }, { "epoch": 0.21139643607290468, "grad_norm": 9.192744255065918, "learning_rate": 9.675679375727293e-05, "loss": 1.2138, "step": 3120 }, { "epoch": 0.21146419134087674, "grad_norm": 7.744256973266602, "learning_rate": 9.675542473817511e-05, "loss": 1.3615, "step": 3121 }, { "epoch": 0.21153194660884883, "grad_norm": 6.915426254272461, "learning_rate": 9.675405571907729e-05, "loss": 0.89, "step": 3122 }, { "epoch": 0.21159970187682092, "grad_norm": 9.888227462768555, "learning_rate": 9.675268669997947e-05, "loss": 0.9569, "step": 3123 }, { "epoch": 0.21166745714479301, "grad_norm": 7.541590690612793, "learning_rate": 9.675131768088165e-05, "loss": 0.9638, "step": 3124 }, { "epoch": 0.2117352124127651, "grad_norm": 7.883132457733154, "learning_rate": 9.674994866178384e-05, "loss": 0.8901, "step": 3125 }, { "epoch": 0.21180296768073717, "grad_norm": 8.493675231933594, "learning_rate": 9.674857964268602e-05, "loss": 0.8894, "step": 3126 }, { "epoch": 0.21187072294870926, "grad_norm": 7.348284721374512, "learning_rate": 9.67472106235882e-05, "loss": 0.8721, "step": 3127 }, { "epoch": 0.21193847821668135, "grad_norm": 9.094710350036621, "learning_rate": 9.674584160449039e-05, "loss": 0.9255, "step": 3128 }, { "epoch": 0.21200623348465344, "grad_norm": 7.316446304321289, "learning_rate": 9.674447258539258e-05, "loss": 1.2577, "step": 3129 }, { "epoch": 0.2120739887526255, "grad_norm": 8.105271339416504, "learning_rate": 9.674310356629476e-05, "loss": 0.8157, "step": 3130 }, { "epoch": 0.2121417440205976, "grad_norm": 8.433457374572754, "learning_rate": 9.674173454719694e-05, "loss": 1.0398, "step": 3131 }, { "epoch": 0.2122094992885697, "grad_norm": 7.787237644195557, "learning_rate": 9.674036552809912e-05, "loss": 0.7095, "step": 3132 }, { "epoch": 0.21227725455654178, "grad_norm": 10.60180950164795, "learning_rate": 9.67389965090013e-05, "loss": 1.0482, "step": 3133 }, { "epoch": 0.21234500982451385, "grad_norm": 8.428773880004883, "learning_rate": 9.67376274899035e-05, "loss": 0.8677, "step": 3134 }, { "epoch": 0.21241276509248594, "grad_norm": 8.204195022583008, "learning_rate": 9.673625847080568e-05, "loss": 1.14, "step": 3135 }, { "epoch": 0.21248052036045803, "grad_norm": 6.449087619781494, "learning_rate": 9.673488945170786e-05, "loss": 1.0181, "step": 3136 }, { "epoch": 0.21254827562843012, "grad_norm": 9.588041305541992, "learning_rate": 9.673352043261004e-05, "loss": 0.9996, "step": 3137 }, { "epoch": 0.21261603089640219, "grad_norm": 9.626228332519531, "learning_rate": 9.673215141351223e-05, "loss": 1.0501, "step": 3138 }, { "epoch": 0.21268378616437428, "grad_norm": 9.036309242248535, "learning_rate": 9.673078239441441e-05, "loss": 1.093, "step": 3139 }, { "epoch": 0.21275154143234637, "grad_norm": 9.415257453918457, "learning_rate": 9.672941337531659e-05, "loss": 1.0983, "step": 3140 }, { "epoch": 0.21281929670031846, "grad_norm": 8.309000015258789, "learning_rate": 9.672804435621877e-05, "loss": 1.0803, "step": 3141 }, { "epoch": 0.21288705196829052, "grad_norm": 7.420774459838867, "learning_rate": 9.672667533712095e-05, "loss": 1.1027, "step": 3142 }, { "epoch": 0.21295480723626262, "grad_norm": 9.291664123535156, "learning_rate": 9.672530631802314e-05, "loss": 1.1617, "step": 3143 }, { "epoch": 0.2130225625042347, "grad_norm": 7.656317234039307, "learning_rate": 9.672393729892533e-05, "loss": 0.9423, "step": 3144 }, { "epoch": 0.2130903177722068, "grad_norm": 5.812994003295898, "learning_rate": 9.67225682798275e-05, "loss": 0.9187, "step": 3145 }, { "epoch": 0.21315807304017886, "grad_norm": 9.492706298828125, "learning_rate": 9.672119926072969e-05, "loss": 1.1804, "step": 3146 }, { "epoch": 0.21322582830815096, "grad_norm": 9.37720012664795, "learning_rate": 9.671983024163187e-05, "loss": 0.8885, "step": 3147 }, { "epoch": 0.21329358357612305, "grad_norm": 9.183650016784668, "learning_rate": 9.671846122253406e-05, "loss": 0.8967, "step": 3148 }, { "epoch": 0.21336133884409514, "grad_norm": 8.037003517150879, "learning_rate": 9.671709220343624e-05, "loss": 1.2605, "step": 3149 }, { "epoch": 0.2134290941120672, "grad_norm": 8.694345474243164, "learning_rate": 9.671572318433842e-05, "loss": 1.003, "step": 3150 }, { "epoch": 0.2134968493800393, "grad_norm": 6.429176330566406, "learning_rate": 9.67143541652406e-05, "loss": 1.1018, "step": 3151 }, { "epoch": 0.21356460464801139, "grad_norm": 7.964774131774902, "learning_rate": 9.67129851461428e-05, "loss": 0.9121, "step": 3152 }, { "epoch": 0.21363235991598348, "grad_norm": 7.5509033203125, "learning_rate": 9.671161612704498e-05, "loss": 0.9577, "step": 3153 }, { "epoch": 0.21370011518395554, "grad_norm": 7.968616962432861, "learning_rate": 9.671024710794716e-05, "loss": 1.1337, "step": 3154 }, { "epoch": 0.21376787045192763, "grad_norm": 7.340758800506592, "learning_rate": 9.670887808884934e-05, "loss": 0.8068, "step": 3155 }, { "epoch": 0.21383562571989972, "grad_norm": 7.890449523925781, "learning_rate": 9.670750906975152e-05, "loss": 1.0263, "step": 3156 }, { "epoch": 0.21390338098787182, "grad_norm": 10.908242225646973, "learning_rate": 9.670614005065371e-05, "loss": 1.0384, "step": 3157 }, { "epoch": 0.21397113625584388, "grad_norm": 7.62697696685791, "learning_rate": 9.670477103155589e-05, "loss": 0.918, "step": 3158 }, { "epoch": 0.21403889152381597, "grad_norm": 10.487192153930664, "learning_rate": 9.670340201245807e-05, "loss": 1.1135, "step": 3159 }, { "epoch": 0.21410664679178806, "grad_norm": 7.076718807220459, "learning_rate": 9.670203299336026e-05, "loss": 0.9286, "step": 3160 }, { "epoch": 0.21417440205976015, "grad_norm": 10.05949592590332, "learning_rate": 9.670066397426245e-05, "loss": 1.1317, "step": 3161 }, { "epoch": 0.21424215732773222, "grad_norm": 8.656492233276367, "learning_rate": 9.669929495516463e-05, "loss": 0.9864, "step": 3162 }, { "epoch": 0.2143099125957043, "grad_norm": 10.591787338256836, "learning_rate": 9.669792593606682e-05, "loss": 1.0801, "step": 3163 }, { "epoch": 0.2143776678636764, "grad_norm": 8.19133472442627, "learning_rate": 9.6696556916969e-05, "loss": 0.9377, "step": 3164 }, { "epoch": 0.2144454231316485, "grad_norm": 8.402669906616211, "learning_rate": 9.669518789787118e-05, "loss": 0.9283, "step": 3165 }, { "epoch": 0.21451317839962056, "grad_norm": 9.713315963745117, "learning_rate": 9.669381887877337e-05, "loss": 1.1865, "step": 3166 }, { "epoch": 0.21458093366759265, "grad_norm": 6.777700424194336, "learning_rate": 9.669244985967555e-05, "loss": 0.9094, "step": 3167 }, { "epoch": 0.21464868893556474, "grad_norm": 9.381082534790039, "learning_rate": 9.669108084057773e-05, "loss": 0.7461, "step": 3168 }, { "epoch": 0.21471644420353683, "grad_norm": 8.568790435791016, "learning_rate": 9.668971182147992e-05, "loss": 0.7339, "step": 3169 }, { "epoch": 0.2147841994715089, "grad_norm": 9.257226943969727, "learning_rate": 9.66883428023821e-05, "loss": 1.1892, "step": 3170 }, { "epoch": 0.214851954739481, "grad_norm": 8.544146537780762, "learning_rate": 9.668697378328429e-05, "loss": 1.0307, "step": 3171 }, { "epoch": 0.21491971000745308, "grad_norm": 9.540971755981445, "learning_rate": 9.668560476418647e-05, "loss": 0.8464, "step": 3172 }, { "epoch": 0.21498746527542517, "grad_norm": 9.180089950561523, "learning_rate": 9.668423574508865e-05, "loss": 0.951, "step": 3173 }, { "epoch": 0.21505522054339726, "grad_norm": 10.706409454345703, "learning_rate": 9.668286672599083e-05, "loss": 1.141, "step": 3174 }, { "epoch": 0.21512297581136933, "grad_norm": 9.421865463256836, "learning_rate": 9.668149770689302e-05, "loss": 1.0701, "step": 3175 }, { "epoch": 0.21519073107934142, "grad_norm": 9.120182037353516, "learning_rate": 9.66801286877952e-05, "loss": 1.1618, "step": 3176 }, { "epoch": 0.2152584863473135, "grad_norm": 7.879681587219238, "learning_rate": 9.667875966869738e-05, "loss": 0.8048, "step": 3177 }, { "epoch": 0.2153262416152856, "grad_norm": 7.233819007873535, "learning_rate": 9.667739064959957e-05, "loss": 0.9718, "step": 3178 }, { "epoch": 0.21539399688325767, "grad_norm": 7.79316520690918, "learning_rate": 9.667602163050175e-05, "loss": 1.0999, "step": 3179 }, { "epoch": 0.21546175215122976, "grad_norm": 8.873052597045898, "learning_rate": 9.667465261140394e-05, "loss": 0.9444, "step": 3180 }, { "epoch": 0.21552950741920185, "grad_norm": 7.386806488037109, "learning_rate": 9.667328359230612e-05, "loss": 0.8661, "step": 3181 }, { "epoch": 0.21559726268717394, "grad_norm": 7.682179927825928, "learning_rate": 9.66719145732083e-05, "loss": 0.7904, "step": 3182 }, { "epoch": 0.215665017955146, "grad_norm": 7.9192914962768555, "learning_rate": 9.667054555411048e-05, "loss": 1.0563, "step": 3183 }, { "epoch": 0.2157327732231181, "grad_norm": 6.972530364990234, "learning_rate": 9.666917653501267e-05, "loss": 0.7482, "step": 3184 }, { "epoch": 0.2158005284910902, "grad_norm": 9.844091415405273, "learning_rate": 9.666780751591485e-05, "loss": 1.0289, "step": 3185 }, { "epoch": 0.21586828375906228, "grad_norm": 8.724159240722656, "learning_rate": 9.666643849681704e-05, "loss": 1.1419, "step": 3186 }, { "epoch": 0.21593603902703434, "grad_norm": 8.859795570373535, "learning_rate": 9.666506947771922e-05, "loss": 0.9252, "step": 3187 }, { "epoch": 0.21600379429500643, "grad_norm": 10.741375923156738, "learning_rate": 9.66637004586214e-05, "loss": 1.2328, "step": 3188 }, { "epoch": 0.21607154956297853, "grad_norm": 8.468697547912598, "learning_rate": 9.666233143952359e-05, "loss": 1.1779, "step": 3189 }, { "epoch": 0.21613930483095062, "grad_norm": 7.220376014709473, "learning_rate": 9.666096242042577e-05, "loss": 0.9271, "step": 3190 }, { "epoch": 0.21620706009892268, "grad_norm": 8.3795804977417, "learning_rate": 9.665959340132795e-05, "loss": 1.0941, "step": 3191 }, { "epoch": 0.21627481536689477, "grad_norm": 9.743870735168457, "learning_rate": 9.665822438223013e-05, "loss": 0.9396, "step": 3192 }, { "epoch": 0.21634257063486687, "grad_norm": 8.927998542785645, "learning_rate": 9.665685536313231e-05, "loss": 1.0915, "step": 3193 }, { "epoch": 0.21641032590283896, "grad_norm": 8.214877128601074, "learning_rate": 9.66554863440345e-05, "loss": 0.8767, "step": 3194 }, { "epoch": 0.21647808117081102, "grad_norm": 9.605053901672363, "learning_rate": 9.665411732493669e-05, "loss": 1.1829, "step": 3195 }, { "epoch": 0.2165458364387831, "grad_norm": 8.430122375488281, "learning_rate": 9.665274830583887e-05, "loss": 0.8234, "step": 3196 }, { "epoch": 0.2166135917067552, "grad_norm": 8.868674278259277, "learning_rate": 9.665137928674105e-05, "loss": 1.0467, "step": 3197 }, { "epoch": 0.2166813469747273, "grad_norm": 8.451126098632812, "learning_rate": 9.665001026764324e-05, "loss": 1.134, "step": 3198 }, { "epoch": 0.21674910224269936, "grad_norm": 8.749373435974121, "learning_rate": 9.664864124854542e-05, "loss": 0.9825, "step": 3199 }, { "epoch": 0.21681685751067145, "grad_norm": 8.478997230529785, "learning_rate": 9.66472722294476e-05, "loss": 0.9823, "step": 3200 }, { "epoch": 0.21688461277864354, "grad_norm": 6.554266929626465, "learning_rate": 9.664590321034978e-05, "loss": 1.2293, "step": 3201 }, { "epoch": 0.21695236804661563, "grad_norm": 7.415618896484375, "learning_rate": 9.664453419125196e-05, "loss": 0.9767, "step": 3202 }, { "epoch": 0.2170201233145877, "grad_norm": 9.277578353881836, "learning_rate": 9.664316517215416e-05, "loss": 1.0042, "step": 3203 }, { "epoch": 0.2170878785825598, "grad_norm": 7.1379241943359375, "learning_rate": 9.664179615305634e-05, "loss": 0.924, "step": 3204 }, { "epoch": 0.21715563385053188, "grad_norm": 8.837613105773926, "learning_rate": 9.664042713395852e-05, "loss": 0.9889, "step": 3205 }, { "epoch": 0.21722338911850397, "grad_norm": 8.118345260620117, "learning_rate": 9.663905811486071e-05, "loss": 1.2376, "step": 3206 }, { "epoch": 0.21729114438647604, "grad_norm": 10.383713722229004, "learning_rate": 9.663768909576289e-05, "loss": 0.7988, "step": 3207 }, { "epoch": 0.21735889965444813, "grad_norm": 7.0938262939453125, "learning_rate": 9.663632007666507e-05, "loss": 0.8589, "step": 3208 }, { "epoch": 0.21742665492242022, "grad_norm": 7.732020854949951, "learning_rate": 9.663495105756726e-05, "loss": 1.0837, "step": 3209 }, { "epoch": 0.2174944101903923, "grad_norm": 7.718788146972656, "learning_rate": 9.663358203846944e-05, "loss": 0.8949, "step": 3210 }, { "epoch": 0.21756216545836438, "grad_norm": 9.187674522399902, "learning_rate": 9.663221301937162e-05, "loss": 0.9443, "step": 3211 }, { "epoch": 0.21762992072633647, "grad_norm": 8.596949577331543, "learning_rate": 9.663084400027382e-05, "loss": 1.0855, "step": 3212 }, { "epoch": 0.21769767599430856, "grad_norm": 7.854368209838867, "learning_rate": 9.6629474981176e-05, "loss": 0.9203, "step": 3213 }, { "epoch": 0.21776543126228065, "grad_norm": 9.96597957611084, "learning_rate": 9.662810596207818e-05, "loss": 0.836, "step": 3214 }, { "epoch": 0.21783318653025271, "grad_norm": 7.254636764526367, "learning_rate": 9.662673694298036e-05, "loss": 0.7926, "step": 3215 }, { "epoch": 0.2179009417982248, "grad_norm": 9.079703330993652, "learning_rate": 9.662536792388255e-05, "loss": 1.2072, "step": 3216 }, { "epoch": 0.2179686970661969, "grad_norm": 8.051166534423828, "learning_rate": 9.662399890478473e-05, "loss": 0.9719, "step": 3217 }, { "epoch": 0.218036452334169, "grad_norm": 9.189994812011719, "learning_rate": 9.662262988568691e-05, "loss": 0.9878, "step": 3218 }, { "epoch": 0.21810420760214105, "grad_norm": 9.806111335754395, "learning_rate": 9.66212608665891e-05, "loss": 1.1621, "step": 3219 }, { "epoch": 0.21817196287011315, "grad_norm": 7.833159923553467, "learning_rate": 9.661989184749128e-05, "loss": 0.9811, "step": 3220 }, { "epoch": 0.21823971813808524, "grad_norm": 7.537334442138672, "learning_rate": 9.661852282839347e-05, "loss": 0.9916, "step": 3221 }, { "epoch": 0.21830747340605733, "grad_norm": 9.828896522521973, "learning_rate": 9.661715380929565e-05, "loss": 1.09, "step": 3222 }, { "epoch": 0.2183752286740294, "grad_norm": 7.99267578125, "learning_rate": 9.661578479019783e-05, "loss": 1.0852, "step": 3223 }, { "epoch": 0.21844298394200148, "grad_norm": 8.085976600646973, "learning_rate": 9.661441577110001e-05, "loss": 1.1762, "step": 3224 }, { "epoch": 0.21851073920997358, "grad_norm": 6.523219585418701, "learning_rate": 9.661304675200219e-05, "loss": 1.0814, "step": 3225 }, { "epoch": 0.21857849447794567, "grad_norm": 9.796062469482422, "learning_rate": 9.661167773290438e-05, "loss": 1.1447, "step": 3226 }, { "epoch": 0.21864624974591776, "grad_norm": 8.570687294006348, "learning_rate": 9.661030871380656e-05, "loss": 1.0552, "step": 3227 }, { "epoch": 0.21871400501388982, "grad_norm": 9.70113468170166, "learning_rate": 9.660893969470874e-05, "loss": 1.0379, "step": 3228 }, { "epoch": 0.21878176028186191, "grad_norm": 8.195375442504883, "learning_rate": 9.660757067561093e-05, "loss": 0.9348, "step": 3229 }, { "epoch": 0.218849515549834, "grad_norm": 7.33494758605957, "learning_rate": 9.660620165651312e-05, "loss": 0.8977, "step": 3230 }, { "epoch": 0.2189172708178061, "grad_norm": 7.543430328369141, "learning_rate": 9.66048326374153e-05, "loss": 1.023, "step": 3231 }, { "epoch": 0.21898502608577816, "grad_norm": 8.45494556427002, "learning_rate": 9.660346361831748e-05, "loss": 1.2995, "step": 3232 }, { "epoch": 0.21905278135375025, "grad_norm": 8.197903633117676, "learning_rate": 9.660209459921966e-05, "loss": 0.9651, "step": 3233 }, { "epoch": 0.21912053662172234, "grad_norm": 8.392546653747559, "learning_rate": 9.660072558012184e-05, "loss": 0.9858, "step": 3234 }, { "epoch": 0.21918829188969444, "grad_norm": 6.499540328979492, "learning_rate": 9.659935656102403e-05, "loss": 0.9679, "step": 3235 }, { "epoch": 0.2192560471576665, "grad_norm": 7.784618377685547, "learning_rate": 9.659798754192621e-05, "loss": 0.9316, "step": 3236 }, { "epoch": 0.2193238024256386, "grad_norm": 9.014801979064941, "learning_rate": 9.65966185228284e-05, "loss": 0.9261, "step": 3237 }, { "epoch": 0.21939155769361068, "grad_norm": 8.238675117492676, "learning_rate": 9.659524950373058e-05, "loss": 1.0156, "step": 3238 }, { "epoch": 0.21945931296158278, "grad_norm": 9.447864532470703, "learning_rate": 9.659388048463277e-05, "loss": 1.2186, "step": 3239 }, { "epoch": 0.21952706822955484, "grad_norm": 6.894035816192627, "learning_rate": 9.659251146553495e-05, "loss": 0.9157, "step": 3240 }, { "epoch": 0.21959482349752693, "grad_norm": 7.24024772644043, "learning_rate": 9.659114244643713e-05, "loss": 1.1564, "step": 3241 }, { "epoch": 0.21966257876549902, "grad_norm": 9.135485649108887, "learning_rate": 9.658977342733931e-05, "loss": 1.2002, "step": 3242 }, { "epoch": 0.21973033403347111, "grad_norm": 6.119181156158447, "learning_rate": 9.658840440824149e-05, "loss": 0.788, "step": 3243 }, { "epoch": 0.21979808930144318, "grad_norm": 8.550108909606934, "learning_rate": 9.658703538914368e-05, "loss": 0.9251, "step": 3244 }, { "epoch": 0.21986584456941527, "grad_norm": 9.546792984008789, "learning_rate": 9.658566637004586e-05, "loss": 1.2231, "step": 3245 }, { "epoch": 0.21993359983738736, "grad_norm": 9.451306343078613, "learning_rate": 9.658429735094805e-05, "loss": 0.8991, "step": 3246 }, { "epoch": 0.22000135510535945, "grad_norm": 9.914873123168945, "learning_rate": 9.658292833185023e-05, "loss": 1.3102, "step": 3247 }, { "epoch": 0.22006911037333152, "grad_norm": 10.038002967834473, "learning_rate": 9.65815593127524e-05, "loss": 1.254, "step": 3248 }, { "epoch": 0.2201368656413036, "grad_norm": 7.590802192687988, "learning_rate": 9.65801902936546e-05, "loss": 1.0032, "step": 3249 }, { "epoch": 0.2202046209092757, "grad_norm": 8.632338523864746, "learning_rate": 9.657882127455678e-05, "loss": 0.8922, "step": 3250 }, { "epoch": 0.2202723761772478, "grad_norm": 6.483764171600342, "learning_rate": 9.657745225545896e-05, "loss": 0.9167, "step": 3251 }, { "epoch": 0.22034013144521986, "grad_norm": 8.28577709197998, "learning_rate": 9.657608323636115e-05, "loss": 1.0413, "step": 3252 }, { "epoch": 0.22040788671319195, "grad_norm": 9.218433380126953, "learning_rate": 9.657471421726333e-05, "loss": 1.1681, "step": 3253 }, { "epoch": 0.22047564198116404, "grad_norm": 6.653755187988281, "learning_rate": 9.657334519816552e-05, "loss": 0.9645, "step": 3254 }, { "epoch": 0.22054339724913613, "grad_norm": 8.784723281860352, "learning_rate": 9.657197617906771e-05, "loss": 0.7489, "step": 3255 }, { "epoch": 0.2206111525171082, "grad_norm": 9.041324615478516, "learning_rate": 9.657060715996989e-05, "loss": 1.0478, "step": 3256 }, { "epoch": 0.22067890778508029, "grad_norm": 7.811746597290039, "learning_rate": 9.656923814087207e-05, "loss": 0.9511, "step": 3257 }, { "epoch": 0.22074666305305238, "grad_norm": 9.811897277832031, "learning_rate": 9.656786912177426e-05, "loss": 1.4235, "step": 3258 }, { "epoch": 0.22081441832102447, "grad_norm": 7.313972473144531, "learning_rate": 9.656650010267644e-05, "loss": 1.0075, "step": 3259 }, { "epoch": 0.22088217358899653, "grad_norm": 9.360795021057129, "learning_rate": 9.656513108357862e-05, "loss": 0.9634, "step": 3260 }, { "epoch": 0.22094992885696862, "grad_norm": 9.322724342346191, "learning_rate": 9.65637620644808e-05, "loss": 1.0669, "step": 3261 }, { "epoch": 0.22101768412494072, "grad_norm": 10.269414901733398, "learning_rate": 9.6562393045383e-05, "loss": 1.1288, "step": 3262 }, { "epoch": 0.2210854393929128, "grad_norm": 7.672987937927246, "learning_rate": 9.656102402628518e-05, "loss": 1.0517, "step": 3263 }, { "epoch": 0.22115319466088487, "grad_norm": 8.725695610046387, "learning_rate": 9.655965500718736e-05, "loss": 1.0987, "step": 3264 }, { "epoch": 0.22122094992885696, "grad_norm": 8.577162742614746, "learning_rate": 9.655828598808954e-05, "loss": 0.821, "step": 3265 }, { "epoch": 0.22128870519682906, "grad_norm": 8.497530937194824, "learning_rate": 9.655691696899172e-05, "loss": 0.9171, "step": 3266 }, { "epoch": 0.22135646046480115, "grad_norm": 9.665203094482422, "learning_rate": 9.655554794989391e-05, "loss": 1.105, "step": 3267 }, { "epoch": 0.2214242157327732, "grad_norm": 9.42651081085205, "learning_rate": 9.65541789307961e-05, "loss": 0.9526, "step": 3268 }, { "epoch": 0.2214919710007453, "grad_norm": 7.461474418640137, "learning_rate": 9.655280991169827e-05, "loss": 0.9554, "step": 3269 }, { "epoch": 0.2215597262687174, "grad_norm": 10.56189250946045, "learning_rate": 9.655144089260045e-05, "loss": 1.1504, "step": 3270 }, { "epoch": 0.22162748153668949, "grad_norm": 8.245696067810059, "learning_rate": 9.655007187350265e-05, "loss": 1.0352, "step": 3271 }, { "epoch": 0.22169523680466155, "grad_norm": 9.182881355285645, "learning_rate": 9.654870285440483e-05, "loss": 1.1153, "step": 3272 }, { "epoch": 0.22176299207263364, "grad_norm": 8.101743698120117, "learning_rate": 9.654733383530701e-05, "loss": 1.1399, "step": 3273 }, { "epoch": 0.22183074734060573, "grad_norm": 9.072972297668457, "learning_rate": 9.654596481620919e-05, "loss": 1.1508, "step": 3274 }, { "epoch": 0.22189850260857782, "grad_norm": 8.480910301208496, "learning_rate": 9.654459579711137e-05, "loss": 0.9659, "step": 3275 }, { "epoch": 0.2219662578765499, "grad_norm": 7.095332622528076, "learning_rate": 9.654322677801356e-05, "loss": 0.9417, "step": 3276 }, { "epoch": 0.22203401314452198, "grad_norm": 8.183112144470215, "learning_rate": 9.654185775891574e-05, "loss": 0.9307, "step": 3277 }, { "epoch": 0.22210176841249407, "grad_norm": 8.60648250579834, "learning_rate": 9.654048873981792e-05, "loss": 1.141, "step": 3278 }, { "epoch": 0.22216952368046616, "grad_norm": 6.992738246917725, "learning_rate": 9.65391197207201e-05, "loss": 0.9429, "step": 3279 }, { "epoch": 0.22223727894843825, "grad_norm": 6.998913764953613, "learning_rate": 9.653775070162229e-05, "loss": 1.0159, "step": 3280 }, { "epoch": 0.22230503421641032, "grad_norm": 7.922050952911377, "learning_rate": 9.653638168252448e-05, "loss": 1.1349, "step": 3281 }, { "epoch": 0.2223727894843824, "grad_norm": 9.171984672546387, "learning_rate": 9.653501266342666e-05, "loss": 1.149, "step": 3282 }, { "epoch": 0.2224405447523545, "grad_norm": 9.88930606842041, "learning_rate": 9.653364364432884e-05, "loss": 1.1036, "step": 3283 }, { "epoch": 0.2225083000203266, "grad_norm": 10.978727340698242, "learning_rate": 9.653227462523102e-05, "loss": 1.0345, "step": 3284 }, { "epoch": 0.22257605528829866, "grad_norm": 8.712138175964355, "learning_rate": 9.653090560613321e-05, "loss": 1.1404, "step": 3285 }, { "epoch": 0.22264381055627075, "grad_norm": 8.79491901397705, "learning_rate": 9.65295365870354e-05, "loss": 1.4334, "step": 3286 }, { "epoch": 0.22271156582424284, "grad_norm": 9.580904960632324, "learning_rate": 9.652816756793757e-05, "loss": 1.0693, "step": 3287 }, { "epoch": 0.22277932109221493, "grad_norm": 7.335433006286621, "learning_rate": 9.652679854883976e-05, "loss": 1.0559, "step": 3288 }, { "epoch": 0.222847076360187, "grad_norm": 8.534089088439941, "learning_rate": 9.652542952974194e-05, "loss": 0.9835, "step": 3289 }, { "epoch": 0.2229148316281591, "grad_norm": 6.626898288726807, "learning_rate": 9.652406051064413e-05, "loss": 0.8056, "step": 3290 }, { "epoch": 0.22298258689613118, "grad_norm": 7.774187088012695, "learning_rate": 9.652269149154631e-05, "loss": 0.8032, "step": 3291 }, { "epoch": 0.22305034216410327, "grad_norm": 8.029982566833496, "learning_rate": 9.652132247244849e-05, "loss": 1.0536, "step": 3292 }, { "epoch": 0.22311809743207534, "grad_norm": 8.701712608337402, "learning_rate": 9.651995345335067e-05, "loss": 0.9422, "step": 3293 }, { "epoch": 0.22318585270004743, "grad_norm": 9.17507553100586, "learning_rate": 9.651858443425286e-05, "loss": 1.0094, "step": 3294 }, { "epoch": 0.22325360796801952, "grad_norm": 8.464656829833984, "learning_rate": 9.651721541515504e-05, "loss": 0.9546, "step": 3295 }, { "epoch": 0.2233213632359916, "grad_norm": 6.839638710021973, "learning_rate": 9.651584639605722e-05, "loss": 0.8296, "step": 3296 }, { "epoch": 0.22338911850396367, "grad_norm": 7.773430824279785, "learning_rate": 9.65144773769594e-05, "loss": 1.1344, "step": 3297 }, { "epoch": 0.22345687377193577, "grad_norm": 9.725523948669434, "learning_rate": 9.65131083578616e-05, "loss": 0.9814, "step": 3298 }, { "epoch": 0.22352462903990786, "grad_norm": 8.995366096496582, "learning_rate": 9.651173933876378e-05, "loss": 1.2263, "step": 3299 }, { "epoch": 0.22359238430787995, "grad_norm": 8.735838890075684, "learning_rate": 9.651037031966596e-05, "loss": 1.1707, "step": 3300 }, { "epoch": 0.223660139575852, "grad_norm": 7.177740097045898, "learning_rate": 9.650900130056815e-05, "loss": 0.9491, "step": 3301 }, { "epoch": 0.2237278948438241, "grad_norm": 8.393296241760254, "learning_rate": 9.650763228147033e-05, "loss": 0.9765, "step": 3302 }, { "epoch": 0.2237956501117962, "grad_norm": 9.773447036743164, "learning_rate": 9.650626326237251e-05, "loss": 1.0409, "step": 3303 }, { "epoch": 0.2238634053797683, "grad_norm": 6.951483249664307, "learning_rate": 9.650489424327471e-05, "loss": 0.9848, "step": 3304 }, { "epoch": 0.22393116064774035, "grad_norm": 8.579717636108398, "learning_rate": 9.650352522417689e-05, "loss": 0.9722, "step": 3305 }, { "epoch": 0.22399891591571244, "grad_norm": 7.798871040344238, "learning_rate": 9.650215620507907e-05, "loss": 0.8046, "step": 3306 }, { "epoch": 0.22406667118368453, "grad_norm": 9.504851341247559, "learning_rate": 9.650078718598125e-05, "loss": 1.1972, "step": 3307 }, { "epoch": 0.22413442645165663, "grad_norm": 8.384056091308594, "learning_rate": 9.649941816688344e-05, "loss": 1.0654, "step": 3308 }, { "epoch": 0.2242021817196287, "grad_norm": 10.469207763671875, "learning_rate": 9.649804914778562e-05, "loss": 1.0694, "step": 3309 }, { "epoch": 0.22426993698760078, "grad_norm": 8.5567045211792, "learning_rate": 9.64966801286878e-05, "loss": 1.0371, "step": 3310 }, { "epoch": 0.22433769225557287, "grad_norm": 9.427155494689941, "learning_rate": 9.649531110958998e-05, "loss": 1.0185, "step": 3311 }, { "epoch": 0.22440544752354497, "grad_norm": 8.238412857055664, "learning_rate": 9.649394209049216e-05, "loss": 0.9744, "step": 3312 }, { "epoch": 0.22447320279151703, "grad_norm": 9.36423110961914, "learning_rate": 9.649257307139436e-05, "loss": 0.9184, "step": 3313 }, { "epoch": 0.22454095805948912, "grad_norm": 7.485356330871582, "learning_rate": 9.649120405229654e-05, "loss": 0.9948, "step": 3314 }, { "epoch": 0.2246087133274612, "grad_norm": 7.247208118438721, "learning_rate": 9.648983503319872e-05, "loss": 0.8592, "step": 3315 }, { "epoch": 0.2246764685954333, "grad_norm": 9.266085624694824, "learning_rate": 9.64884660141009e-05, "loss": 1.0314, "step": 3316 }, { "epoch": 0.22474422386340537, "grad_norm": 11.147274017333984, "learning_rate": 9.64870969950031e-05, "loss": 1.0403, "step": 3317 }, { "epoch": 0.22481197913137746, "grad_norm": 10.961670875549316, "learning_rate": 9.648572797590527e-05, "loss": 1.3147, "step": 3318 }, { "epoch": 0.22487973439934955, "grad_norm": 7.002868175506592, "learning_rate": 9.648435895680745e-05, "loss": 0.9044, "step": 3319 }, { "epoch": 0.22494748966732164, "grad_norm": 7.886247634887695, "learning_rate": 9.648298993770963e-05, "loss": 0.8901, "step": 3320 }, { "epoch": 0.2250152449352937, "grad_norm": 8.933539390563965, "learning_rate": 9.648162091861181e-05, "loss": 0.8758, "step": 3321 }, { "epoch": 0.2250830002032658, "grad_norm": 7.223681449890137, "learning_rate": 9.648025189951401e-05, "loss": 1.1502, "step": 3322 }, { "epoch": 0.2251507554712379, "grad_norm": 8.808623313903809, "learning_rate": 9.647888288041619e-05, "loss": 1.0706, "step": 3323 }, { "epoch": 0.22521851073920998, "grad_norm": 9.557942390441895, "learning_rate": 9.647751386131837e-05, "loss": 0.8113, "step": 3324 }, { "epoch": 0.22528626600718205, "grad_norm": 9.855717658996582, "learning_rate": 9.647614484222055e-05, "loss": 1.251, "step": 3325 }, { "epoch": 0.22535402127515414, "grad_norm": 9.588946342468262, "learning_rate": 9.647477582312273e-05, "loss": 1.3029, "step": 3326 }, { "epoch": 0.22542177654312623, "grad_norm": 9.277341842651367, "learning_rate": 9.647340680402492e-05, "loss": 0.9708, "step": 3327 }, { "epoch": 0.22548953181109832, "grad_norm": 9.607316970825195, "learning_rate": 9.64720377849271e-05, "loss": 0.9614, "step": 3328 }, { "epoch": 0.22555728707907038, "grad_norm": 7.949220657348633, "learning_rate": 9.647066876582928e-05, "loss": 1.0066, "step": 3329 }, { "epoch": 0.22562504234704248, "grad_norm": 8.383744239807129, "learning_rate": 9.646929974673146e-05, "loss": 0.797, "step": 3330 }, { "epoch": 0.22569279761501457, "grad_norm": 7.203914642333984, "learning_rate": 9.646793072763366e-05, "loss": 0.9625, "step": 3331 }, { "epoch": 0.22576055288298666, "grad_norm": 8.95102596282959, "learning_rate": 9.646656170853584e-05, "loss": 1.0728, "step": 3332 }, { "epoch": 0.22582830815095875, "grad_norm": 8.044751167297363, "learning_rate": 9.646519268943802e-05, "loss": 1.2155, "step": 3333 }, { "epoch": 0.22589606341893081, "grad_norm": 9.164902687072754, "learning_rate": 9.64638236703402e-05, "loss": 1.2923, "step": 3334 }, { "epoch": 0.2259638186869029, "grad_norm": 6.792164325714111, "learning_rate": 9.646245465124238e-05, "loss": 0.9956, "step": 3335 }, { "epoch": 0.226031573954875, "grad_norm": 7.76467752456665, "learning_rate": 9.646108563214457e-05, "loss": 1.0057, "step": 3336 }, { "epoch": 0.2260993292228471, "grad_norm": 8.541545867919922, "learning_rate": 9.645971661304675e-05, "loss": 1.1013, "step": 3337 }, { "epoch": 0.22616708449081915, "grad_norm": 7.8545050621032715, "learning_rate": 9.645834759394893e-05, "loss": 0.8565, "step": 3338 }, { "epoch": 0.22623483975879125, "grad_norm": 9.322896003723145, "learning_rate": 9.645697857485112e-05, "loss": 1.0509, "step": 3339 }, { "epoch": 0.22630259502676334, "grad_norm": 9.540191650390625, "learning_rate": 9.645560955575331e-05, "loss": 1.1757, "step": 3340 }, { "epoch": 0.22637035029473543, "grad_norm": 8.97028923034668, "learning_rate": 9.645424053665549e-05, "loss": 1.0342, "step": 3341 }, { "epoch": 0.2264381055627075, "grad_norm": 6.444105625152588, "learning_rate": 9.645287151755767e-05, "loss": 0.8377, "step": 3342 }, { "epoch": 0.22650586083067958, "grad_norm": 9.237889289855957, "learning_rate": 9.645150249845985e-05, "loss": 1.0468, "step": 3343 }, { "epoch": 0.22657361609865168, "grad_norm": 9.567046165466309, "learning_rate": 9.645013347936203e-05, "loss": 1.3875, "step": 3344 }, { "epoch": 0.22664137136662377, "grad_norm": 8.302481651306152, "learning_rate": 9.644876446026422e-05, "loss": 1.0455, "step": 3345 }, { "epoch": 0.22670912663459583, "grad_norm": 9.124307632446289, "learning_rate": 9.64473954411664e-05, "loss": 0.9828, "step": 3346 }, { "epoch": 0.22677688190256792, "grad_norm": 7.700011730194092, "learning_rate": 9.644602642206858e-05, "loss": 0.8475, "step": 3347 }, { "epoch": 0.22684463717054001, "grad_norm": 6.1064276695251465, "learning_rate": 9.644465740297078e-05, "loss": 0.7779, "step": 3348 }, { "epoch": 0.2269123924385121, "grad_norm": 8.924312591552734, "learning_rate": 9.644328838387296e-05, "loss": 1.3825, "step": 3349 }, { "epoch": 0.22698014770648417, "grad_norm": 8.169050216674805, "learning_rate": 9.644191936477514e-05, "loss": 1.1198, "step": 3350 }, { "epoch": 0.22704790297445626, "grad_norm": 8.368382453918457, "learning_rate": 9.644055034567733e-05, "loss": 1.0729, "step": 3351 }, { "epoch": 0.22711565824242835, "grad_norm": 10.206897735595703, "learning_rate": 9.643918132657951e-05, "loss": 0.9022, "step": 3352 }, { "epoch": 0.22718341351040044, "grad_norm": 6.288288116455078, "learning_rate": 9.64378123074817e-05, "loss": 0.7868, "step": 3353 }, { "epoch": 0.2272511687783725, "grad_norm": 8.446578025817871, "learning_rate": 9.643644328838389e-05, "loss": 1.0426, "step": 3354 }, { "epoch": 0.2273189240463446, "grad_norm": 8.425249099731445, "learning_rate": 9.643507426928607e-05, "loss": 0.823, "step": 3355 }, { "epoch": 0.2273866793143167, "grad_norm": 6.610576629638672, "learning_rate": 9.643370525018825e-05, "loss": 0.9251, "step": 3356 }, { "epoch": 0.22745443458228878, "grad_norm": 8.365503311157227, "learning_rate": 9.643233623109043e-05, "loss": 1.0258, "step": 3357 }, { "epoch": 0.22752218985026085, "grad_norm": 8.654791831970215, "learning_rate": 9.643096721199261e-05, "loss": 0.8781, "step": 3358 }, { "epoch": 0.22758994511823294, "grad_norm": 6.612964153289795, "learning_rate": 9.64295981928948e-05, "loss": 0.7472, "step": 3359 }, { "epoch": 0.22765770038620503, "grad_norm": 9.4874267578125, "learning_rate": 9.642822917379698e-05, "loss": 0.994, "step": 3360 }, { "epoch": 0.22772545565417712, "grad_norm": 9.7284574508667, "learning_rate": 9.642686015469916e-05, "loss": 0.9293, "step": 3361 }, { "epoch": 0.2277932109221492, "grad_norm": 8.787792205810547, "learning_rate": 9.642549113560134e-05, "loss": 1.1368, "step": 3362 }, { "epoch": 0.22786096619012128, "grad_norm": 9.218424797058105, "learning_rate": 9.642412211650354e-05, "loss": 1.2437, "step": 3363 }, { "epoch": 0.22792872145809337, "grad_norm": 7.9437127113342285, "learning_rate": 9.642275309740572e-05, "loss": 0.925, "step": 3364 }, { "epoch": 0.22799647672606546, "grad_norm": 10.552806854248047, "learning_rate": 9.64213840783079e-05, "loss": 0.936, "step": 3365 }, { "epoch": 0.22806423199403753, "grad_norm": 6.572626113891602, "learning_rate": 9.642001505921008e-05, "loss": 0.8968, "step": 3366 }, { "epoch": 0.22813198726200962, "grad_norm": 8.689478874206543, "learning_rate": 9.641864604011226e-05, "loss": 0.8605, "step": 3367 }, { "epoch": 0.2281997425299817, "grad_norm": 10.450199127197266, "learning_rate": 9.641727702101445e-05, "loss": 1.0871, "step": 3368 }, { "epoch": 0.2282674977979538, "grad_norm": 9.260807991027832, "learning_rate": 9.641590800191663e-05, "loss": 0.8469, "step": 3369 }, { "epoch": 0.22833525306592586, "grad_norm": 13.626687049865723, "learning_rate": 9.641453898281881e-05, "loss": 1.0615, "step": 3370 }, { "epoch": 0.22840300833389796, "grad_norm": 7.419554710388184, "learning_rate": 9.6413169963721e-05, "loss": 0.9351, "step": 3371 }, { "epoch": 0.22847076360187005, "grad_norm": 9.33206558227539, "learning_rate": 9.641180094462319e-05, "loss": 1.1826, "step": 3372 }, { "epoch": 0.22853851886984214, "grad_norm": 7.148665904998779, "learning_rate": 9.641043192552537e-05, "loss": 1.1121, "step": 3373 }, { "epoch": 0.2286062741378142, "grad_norm": 6.784035682678223, "learning_rate": 9.640906290642755e-05, "loss": 0.9649, "step": 3374 }, { "epoch": 0.2286740294057863, "grad_norm": 7.357334613800049, "learning_rate": 9.640769388732973e-05, "loss": 0.9005, "step": 3375 }, { "epoch": 0.22874178467375839, "grad_norm": 9.157288551330566, "learning_rate": 9.640632486823191e-05, "loss": 0.9585, "step": 3376 }, { "epoch": 0.22880953994173048, "grad_norm": 9.068450927734375, "learning_rate": 9.64049558491341e-05, "loss": 0.9248, "step": 3377 }, { "epoch": 0.22887729520970254, "grad_norm": 9.446860313415527, "learning_rate": 9.640358683003628e-05, "loss": 1.0016, "step": 3378 }, { "epoch": 0.22894505047767463, "grad_norm": 7.63693904876709, "learning_rate": 9.640221781093846e-05, "loss": 1.0158, "step": 3379 }, { "epoch": 0.22901280574564672, "grad_norm": 7.569469451904297, "learning_rate": 9.640084879184064e-05, "loss": 0.9149, "step": 3380 }, { "epoch": 0.22908056101361882, "grad_norm": 8.837080955505371, "learning_rate": 9.639947977274282e-05, "loss": 0.9379, "step": 3381 }, { "epoch": 0.22914831628159088, "grad_norm": 9.141901969909668, "learning_rate": 9.639811075364502e-05, "loss": 0.9121, "step": 3382 }, { "epoch": 0.22921607154956297, "grad_norm": 7.68120813369751, "learning_rate": 9.63967417345472e-05, "loss": 1.0235, "step": 3383 }, { "epoch": 0.22928382681753506, "grad_norm": 8.82229995727539, "learning_rate": 9.639537271544938e-05, "loss": 1.1573, "step": 3384 }, { "epoch": 0.22935158208550716, "grad_norm": 9.490239143371582, "learning_rate": 9.639400369635156e-05, "loss": 0.8776, "step": 3385 }, { "epoch": 0.22941933735347925, "grad_norm": 10.257567405700684, "learning_rate": 9.639263467725375e-05, "loss": 1.0696, "step": 3386 }, { "epoch": 0.2294870926214513, "grad_norm": 7.793918609619141, "learning_rate": 9.639126565815593e-05, "loss": 1.0892, "step": 3387 }, { "epoch": 0.2295548478894234, "grad_norm": 8.983718872070312, "learning_rate": 9.638989663905811e-05, "loss": 0.908, "step": 3388 }, { "epoch": 0.2296226031573955, "grad_norm": 8.31851577758789, "learning_rate": 9.63885276199603e-05, "loss": 1.0168, "step": 3389 }, { "epoch": 0.22969035842536759, "grad_norm": 9.343503952026367, "learning_rate": 9.638715860086248e-05, "loss": 1.1174, "step": 3390 }, { "epoch": 0.22975811369333965, "grad_norm": 7.617075443267822, "learning_rate": 9.638578958176467e-05, "loss": 0.9076, "step": 3391 }, { "epoch": 0.22982586896131174, "grad_norm": 8.102355003356934, "learning_rate": 9.638442056266685e-05, "loss": 1.0476, "step": 3392 }, { "epoch": 0.22989362422928383, "grad_norm": 9.366684913635254, "learning_rate": 9.638305154356903e-05, "loss": 1.1892, "step": 3393 }, { "epoch": 0.22996137949725592, "grad_norm": 8.204293251037598, "learning_rate": 9.638168252447122e-05, "loss": 0.7731, "step": 3394 }, { "epoch": 0.230029134765228, "grad_norm": 9.68787670135498, "learning_rate": 9.63803135053734e-05, "loss": 1.1126, "step": 3395 }, { "epoch": 0.23009689003320008, "grad_norm": 6.6209330558776855, "learning_rate": 9.637894448627558e-05, "loss": 0.9006, "step": 3396 }, { "epoch": 0.23016464530117217, "grad_norm": 7.963149547576904, "learning_rate": 9.637757546717778e-05, "loss": 0.8275, "step": 3397 }, { "epoch": 0.23023240056914426, "grad_norm": 8.74716567993164, "learning_rate": 9.637620644807996e-05, "loss": 1.0592, "step": 3398 }, { "epoch": 0.23030015583711633, "grad_norm": 8.819621086120605, "learning_rate": 9.637483742898214e-05, "loss": 0.8799, "step": 3399 }, { "epoch": 0.23036791110508842, "grad_norm": 8.538536071777344, "learning_rate": 9.637346840988433e-05, "loss": 1.1498, "step": 3400 }, { "epoch": 0.2304356663730605, "grad_norm": 8.787203788757324, "learning_rate": 9.637209939078651e-05, "loss": 0.8149, "step": 3401 }, { "epoch": 0.2305034216410326, "grad_norm": 8.85105037689209, "learning_rate": 9.63707303716887e-05, "loss": 0.8525, "step": 3402 }, { "epoch": 0.23057117690900467, "grad_norm": 9.773297309875488, "learning_rate": 9.636936135259087e-05, "loss": 0.9448, "step": 3403 }, { "epoch": 0.23063893217697676, "grad_norm": 7.804196357727051, "learning_rate": 9.636799233349307e-05, "loss": 0.9549, "step": 3404 }, { "epoch": 0.23070668744494885, "grad_norm": 8.398775100708008, "learning_rate": 9.636662331439525e-05, "loss": 0.9111, "step": 3405 }, { "epoch": 0.23077444271292094, "grad_norm": 8.960867881774902, "learning_rate": 9.636525429529743e-05, "loss": 1.1533, "step": 3406 }, { "epoch": 0.230842197980893, "grad_norm": 7.513526439666748, "learning_rate": 9.636388527619961e-05, "loss": 1.113, "step": 3407 }, { "epoch": 0.2309099532488651, "grad_norm": 8.891728401184082, "learning_rate": 9.636251625710179e-05, "loss": 0.8727, "step": 3408 }, { "epoch": 0.2309777085168372, "grad_norm": 6.972326755523682, "learning_rate": 9.636114723800398e-05, "loss": 0.8085, "step": 3409 }, { "epoch": 0.23104546378480928, "grad_norm": 9.195303916931152, "learning_rate": 9.635977821890616e-05, "loss": 1.0276, "step": 3410 }, { "epoch": 0.23111321905278134, "grad_norm": 9.36483383178711, "learning_rate": 9.635840919980834e-05, "loss": 1.1369, "step": 3411 }, { "epoch": 0.23118097432075344, "grad_norm": 7.282619953155518, "learning_rate": 9.635704018071052e-05, "loss": 0.9214, "step": 3412 }, { "epoch": 0.23124872958872553, "grad_norm": 6.289726257324219, "learning_rate": 9.63556711616127e-05, "loss": 0.8556, "step": 3413 }, { "epoch": 0.23131648485669762, "grad_norm": 8.646320343017578, "learning_rate": 9.63543021425149e-05, "loss": 1.0833, "step": 3414 }, { "epoch": 0.23138424012466968, "grad_norm": 8.372559547424316, "learning_rate": 9.635293312341708e-05, "loss": 0.9944, "step": 3415 }, { "epoch": 0.23145199539264177, "grad_norm": 8.002387046813965, "learning_rate": 9.635156410431926e-05, "loss": 1.015, "step": 3416 }, { "epoch": 0.23151975066061387, "grad_norm": 8.4190092086792, "learning_rate": 9.635019508522144e-05, "loss": 0.9766, "step": 3417 }, { "epoch": 0.23158750592858596, "grad_norm": 8.759613037109375, "learning_rate": 9.634882606612363e-05, "loss": 1.2348, "step": 3418 }, { "epoch": 0.23165526119655802, "grad_norm": 8.439151763916016, "learning_rate": 9.634745704702581e-05, "loss": 0.8778, "step": 3419 }, { "epoch": 0.2317230164645301, "grad_norm": 8.05675220489502, "learning_rate": 9.6346088027928e-05, "loss": 0.9525, "step": 3420 }, { "epoch": 0.2317907717325022, "grad_norm": 9.196937561035156, "learning_rate": 9.634471900883017e-05, "loss": 1.0663, "step": 3421 }, { "epoch": 0.2318585270004743, "grad_norm": 7.622048854827881, "learning_rate": 9.634334998973235e-05, "loss": 0.9522, "step": 3422 }, { "epoch": 0.23192628226844636, "grad_norm": 10.235919952392578, "learning_rate": 9.634198097063455e-05, "loss": 1.1084, "step": 3423 }, { "epoch": 0.23199403753641845, "grad_norm": 7.920719623565674, "learning_rate": 9.634061195153673e-05, "loss": 0.9443, "step": 3424 }, { "epoch": 0.23206179280439054, "grad_norm": 7.440132141113281, "learning_rate": 9.633924293243891e-05, "loss": 1.0757, "step": 3425 }, { "epoch": 0.23212954807236263, "grad_norm": 7.0108771324157715, "learning_rate": 9.633787391334109e-05, "loss": 0.8043, "step": 3426 }, { "epoch": 0.2321973033403347, "grad_norm": 6.620635032653809, "learning_rate": 9.633650489424328e-05, "loss": 0.999, "step": 3427 }, { "epoch": 0.2322650586083068, "grad_norm": 7.27518367767334, "learning_rate": 9.633513587514546e-05, "loss": 0.888, "step": 3428 }, { "epoch": 0.23233281387627888, "grad_norm": 7.081945419311523, "learning_rate": 9.633376685604764e-05, "loss": 0.8837, "step": 3429 }, { "epoch": 0.23240056914425097, "grad_norm": 7.225597858428955, "learning_rate": 9.633239783694982e-05, "loss": 0.7774, "step": 3430 }, { "epoch": 0.23246832441222304, "grad_norm": 7.0490217208862305, "learning_rate": 9.6331028817852e-05, "loss": 1.0654, "step": 3431 }, { "epoch": 0.23253607968019513, "grad_norm": 8.641639709472656, "learning_rate": 9.63296597987542e-05, "loss": 1.1071, "step": 3432 }, { "epoch": 0.23260383494816722, "grad_norm": 7.442448139190674, "learning_rate": 9.632829077965638e-05, "loss": 0.9392, "step": 3433 }, { "epoch": 0.2326715902161393, "grad_norm": 6.6959452629089355, "learning_rate": 9.632692176055856e-05, "loss": 0.8334, "step": 3434 }, { "epoch": 0.23273934548411138, "grad_norm": 9.67419719696045, "learning_rate": 9.632555274146074e-05, "loss": 1.1948, "step": 3435 }, { "epoch": 0.23280710075208347, "grad_norm": 7.384359836578369, "learning_rate": 9.632418372236292e-05, "loss": 0.7992, "step": 3436 }, { "epoch": 0.23287485602005556, "grad_norm": 6.566294193267822, "learning_rate": 9.632281470326511e-05, "loss": 0.9393, "step": 3437 }, { "epoch": 0.23294261128802765, "grad_norm": 7.842831134796143, "learning_rate": 9.63214456841673e-05, "loss": 1.1372, "step": 3438 }, { "epoch": 0.23301036655599974, "grad_norm": 9.487961769104004, "learning_rate": 9.632007666506947e-05, "loss": 1.0381, "step": 3439 }, { "epoch": 0.2330781218239718, "grad_norm": 8.06712532043457, "learning_rate": 9.631870764597167e-05, "loss": 1.4106, "step": 3440 }, { "epoch": 0.2331458770919439, "grad_norm": 7.026076793670654, "learning_rate": 9.631733862687385e-05, "loss": 1.0662, "step": 3441 }, { "epoch": 0.233213632359916, "grad_norm": 10.926567077636719, "learning_rate": 9.631596960777603e-05, "loss": 1.2932, "step": 3442 }, { "epoch": 0.23328138762788808, "grad_norm": 8.380082130432129, "learning_rate": 9.631460058867822e-05, "loss": 1.0784, "step": 3443 }, { "epoch": 0.23334914289586015, "grad_norm": 8.768819808959961, "learning_rate": 9.63132315695804e-05, "loss": 1.1501, "step": 3444 }, { "epoch": 0.23341689816383224, "grad_norm": 8.467490196228027, "learning_rate": 9.631186255048258e-05, "loss": 0.7555, "step": 3445 }, { "epoch": 0.23348465343180433, "grad_norm": 6.0409770011901855, "learning_rate": 9.631049353138478e-05, "loss": 0.9099, "step": 3446 }, { "epoch": 0.23355240869977642, "grad_norm": 7.160163402557373, "learning_rate": 9.630912451228696e-05, "loss": 0.8386, "step": 3447 }, { "epoch": 0.23362016396774848, "grad_norm": 9.102558135986328, "learning_rate": 9.630775549318914e-05, "loss": 0.9259, "step": 3448 }, { "epoch": 0.23368791923572058, "grad_norm": 7.071728706359863, "learning_rate": 9.630638647409132e-05, "loss": 1.0426, "step": 3449 }, { "epoch": 0.23375567450369267, "grad_norm": 8.358567237854004, "learning_rate": 9.630501745499351e-05, "loss": 0.9806, "step": 3450 }, { "epoch": 0.23382342977166476, "grad_norm": 12.588993072509766, "learning_rate": 9.630364843589569e-05, "loss": 1.0796, "step": 3451 }, { "epoch": 0.23389118503963682, "grad_norm": 8.665871620178223, "learning_rate": 9.630227941679787e-05, "loss": 1.0224, "step": 3452 }, { "epoch": 0.23395894030760891, "grad_norm": 9.288796424865723, "learning_rate": 9.630091039770005e-05, "loss": 0.9329, "step": 3453 }, { "epoch": 0.234026695575581, "grad_norm": 7.319186210632324, "learning_rate": 9.629954137860223e-05, "loss": 1.057, "step": 3454 }, { "epoch": 0.2340944508435531, "grad_norm": 8.640669822692871, "learning_rate": 9.629817235950443e-05, "loss": 0.9756, "step": 3455 }, { "epoch": 0.23416220611152516, "grad_norm": 10.73513126373291, "learning_rate": 9.629680334040661e-05, "loss": 0.958, "step": 3456 }, { "epoch": 0.23422996137949725, "grad_norm": 7.40097713470459, "learning_rate": 9.629543432130879e-05, "loss": 0.9062, "step": 3457 }, { "epoch": 0.23429771664746935, "grad_norm": 9.506468772888184, "learning_rate": 9.629406530221097e-05, "loss": 1.0083, "step": 3458 }, { "epoch": 0.23436547191544144, "grad_norm": 7.291172504425049, "learning_rate": 9.629269628311315e-05, "loss": 0.9966, "step": 3459 }, { "epoch": 0.2344332271834135, "grad_norm": 8.962395668029785, "learning_rate": 9.629132726401534e-05, "loss": 0.8504, "step": 3460 }, { "epoch": 0.2345009824513856, "grad_norm": 10.29174518585205, "learning_rate": 9.628995824491752e-05, "loss": 1.0807, "step": 3461 }, { "epoch": 0.23456873771935768, "grad_norm": 9.461160659790039, "learning_rate": 9.62885892258197e-05, "loss": 0.996, "step": 3462 }, { "epoch": 0.23463649298732978, "grad_norm": 7.13394021987915, "learning_rate": 9.628722020672188e-05, "loss": 0.7163, "step": 3463 }, { "epoch": 0.23470424825530184, "grad_norm": 9.379724502563477, "learning_rate": 9.628585118762408e-05, "loss": 0.9571, "step": 3464 }, { "epoch": 0.23477200352327393, "grad_norm": 9.72339916229248, "learning_rate": 9.628448216852626e-05, "loss": 1.2559, "step": 3465 }, { "epoch": 0.23483975879124602, "grad_norm": 7.353830337524414, "learning_rate": 9.628311314942844e-05, "loss": 1.0563, "step": 3466 }, { "epoch": 0.23490751405921811, "grad_norm": 8.423667907714844, "learning_rate": 9.628174413033062e-05, "loss": 1.0799, "step": 3467 }, { "epoch": 0.23497526932719018, "grad_norm": 6.426609516143799, "learning_rate": 9.62803751112328e-05, "loss": 0.9417, "step": 3468 }, { "epoch": 0.23504302459516227, "grad_norm": 10.882522583007812, "learning_rate": 9.627900609213499e-05, "loss": 1.1498, "step": 3469 }, { "epoch": 0.23511077986313436, "grad_norm": 7.768298625946045, "learning_rate": 9.627763707303717e-05, "loss": 0.7389, "step": 3470 }, { "epoch": 0.23517853513110645, "grad_norm": 9.002137184143066, "learning_rate": 9.627626805393935e-05, "loss": 1.2338, "step": 3471 }, { "epoch": 0.23524629039907852, "grad_norm": 8.671159744262695, "learning_rate": 9.627489903484153e-05, "loss": 1.1114, "step": 3472 }, { "epoch": 0.2353140456670506, "grad_norm": 5.836034774780273, "learning_rate": 9.627353001574373e-05, "loss": 0.7075, "step": 3473 }, { "epoch": 0.2353818009350227, "grad_norm": 8.145447731018066, "learning_rate": 9.627216099664591e-05, "loss": 0.9504, "step": 3474 }, { "epoch": 0.2354495562029948, "grad_norm": 7.3333563804626465, "learning_rate": 9.627079197754809e-05, "loss": 0.8443, "step": 3475 }, { "epoch": 0.23551731147096686, "grad_norm": 8.191438674926758, "learning_rate": 9.626942295845027e-05, "loss": 0.8163, "step": 3476 }, { "epoch": 0.23558506673893895, "grad_norm": 10.042975425720215, "learning_rate": 9.626805393935245e-05, "loss": 1.0715, "step": 3477 }, { "epoch": 0.23565282200691104, "grad_norm": 7.854464054107666, "learning_rate": 9.626668492025464e-05, "loss": 0.9263, "step": 3478 }, { "epoch": 0.23572057727488313, "grad_norm": 9.286520004272461, "learning_rate": 9.626531590115682e-05, "loss": 1.0172, "step": 3479 }, { "epoch": 0.2357883325428552, "grad_norm": 7.321778297424316, "learning_rate": 9.6263946882059e-05, "loss": 1.0661, "step": 3480 }, { "epoch": 0.2358560878108273, "grad_norm": 7.432394981384277, "learning_rate": 9.626257786296118e-05, "loss": 1.1682, "step": 3481 }, { "epoch": 0.23592384307879938, "grad_norm": 10.230997085571289, "learning_rate": 9.626120884386338e-05, "loss": 1.0498, "step": 3482 }, { "epoch": 0.23599159834677147, "grad_norm": 8.530696868896484, "learning_rate": 9.625983982476556e-05, "loss": 1.0598, "step": 3483 }, { "epoch": 0.23605935361474353, "grad_norm": 7.35584020614624, "learning_rate": 9.625847080566774e-05, "loss": 1.0234, "step": 3484 }, { "epoch": 0.23612710888271563, "grad_norm": 8.030210494995117, "learning_rate": 9.625710178656992e-05, "loss": 1.0241, "step": 3485 }, { "epoch": 0.23619486415068772, "grad_norm": 7.508440971374512, "learning_rate": 9.625573276747211e-05, "loss": 0.9466, "step": 3486 }, { "epoch": 0.2362626194186598, "grad_norm": 7.4618401527404785, "learning_rate": 9.62543637483743e-05, "loss": 0.88, "step": 3487 }, { "epoch": 0.23633037468663187, "grad_norm": 7.738553524017334, "learning_rate": 9.625299472927647e-05, "loss": 1.0146, "step": 3488 }, { "epoch": 0.23639812995460396, "grad_norm": 6.782055854797363, "learning_rate": 9.625162571017867e-05, "loss": 0.8196, "step": 3489 }, { "epoch": 0.23646588522257606, "grad_norm": 7.495883464813232, "learning_rate": 9.625025669108085e-05, "loss": 1.0025, "step": 3490 }, { "epoch": 0.23653364049054815, "grad_norm": 7.224970817565918, "learning_rate": 9.624888767198303e-05, "loss": 1.0069, "step": 3491 }, { "epoch": 0.23660139575852024, "grad_norm": 8.686829566955566, "learning_rate": 9.624751865288522e-05, "loss": 0.8927, "step": 3492 }, { "epoch": 0.2366691510264923, "grad_norm": 10.551370620727539, "learning_rate": 9.62461496337874e-05, "loss": 1.0973, "step": 3493 }, { "epoch": 0.2367369062944644, "grad_norm": 8.38442611694336, "learning_rate": 9.624478061468958e-05, "loss": 0.8145, "step": 3494 }, { "epoch": 0.23680466156243649, "grad_norm": 7.099575996398926, "learning_rate": 9.624341159559176e-05, "loss": 0.8304, "step": 3495 }, { "epoch": 0.23687241683040858, "grad_norm": 7.905203342437744, "learning_rate": 9.624204257649396e-05, "loss": 1.1708, "step": 3496 }, { "epoch": 0.23694017209838064, "grad_norm": 8.08218765258789, "learning_rate": 9.624067355739614e-05, "loss": 0.8367, "step": 3497 }, { "epoch": 0.23700792736635273, "grad_norm": 7.766988277435303, "learning_rate": 9.623930453829832e-05, "loss": 1.1126, "step": 3498 }, { "epoch": 0.23707568263432482, "grad_norm": 8.3229398727417, "learning_rate": 9.62379355192005e-05, "loss": 0.9798, "step": 3499 }, { "epoch": 0.23714343790229692, "grad_norm": 7.113401889801025, "learning_rate": 9.623656650010268e-05, "loss": 0.7711, "step": 3500 }, { "epoch": 0.23721119317026898, "grad_norm": 7.286252975463867, "learning_rate": 9.623519748100487e-05, "loss": 0.8018, "step": 3501 }, { "epoch": 0.23727894843824107, "grad_norm": 8.724101066589355, "learning_rate": 9.623382846190705e-05, "loss": 0.9077, "step": 3502 }, { "epoch": 0.23734670370621316, "grad_norm": 9.201606750488281, "learning_rate": 9.623245944280923e-05, "loss": 1.2277, "step": 3503 }, { "epoch": 0.23741445897418526, "grad_norm": 6.827572822570801, "learning_rate": 9.623109042371141e-05, "loss": 0.9262, "step": 3504 }, { "epoch": 0.23748221424215732, "grad_norm": 7.41288423538208, "learning_rate": 9.622972140461361e-05, "loss": 0.762, "step": 3505 }, { "epoch": 0.2375499695101294, "grad_norm": 9.649503707885742, "learning_rate": 9.622835238551579e-05, "loss": 1.0371, "step": 3506 }, { "epoch": 0.2376177247781015, "grad_norm": 9.630754470825195, "learning_rate": 9.622698336641797e-05, "loss": 0.9994, "step": 3507 }, { "epoch": 0.2376854800460736, "grad_norm": 8.949817657470703, "learning_rate": 9.622561434732015e-05, "loss": 0.8555, "step": 3508 }, { "epoch": 0.23775323531404566, "grad_norm": 8.096979141235352, "learning_rate": 9.622424532822233e-05, "loss": 0.9939, "step": 3509 }, { "epoch": 0.23782099058201775, "grad_norm": 8.714512825012207, "learning_rate": 9.622287630912452e-05, "loss": 0.9533, "step": 3510 }, { "epoch": 0.23788874584998984, "grad_norm": 8.663968086242676, "learning_rate": 9.62215072900267e-05, "loss": 0.9974, "step": 3511 }, { "epoch": 0.23795650111796193, "grad_norm": 8.704265594482422, "learning_rate": 9.622013827092888e-05, "loss": 1.2524, "step": 3512 }, { "epoch": 0.238024256385934, "grad_norm": 6.585339069366455, "learning_rate": 9.621876925183106e-05, "loss": 0.9021, "step": 3513 }, { "epoch": 0.2380920116539061, "grad_norm": 7.992496013641357, "learning_rate": 9.621740023273324e-05, "loss": 0.9345, "step": 3514 }, { "epoch": 0.23815976692187818, "grad_norm": 7.076025009155273, "learning_rate": 9.621603121363544e-05, "loss": 1.0487, "step": 3515 }, { "epoch": 0.23822752218985027, "grad_norm": 8.833822250366211, "learning_rate": 9.621466219453762e-05, "loss": 0.9612, "step": 3516 }, { "epoch": 0.23829527745782234, "grad_norm": 7.8553385734558105, "learning_rate": 9.62132931754398e-05, "loss": 1.0452, "step": 3517 }, { "epoch": 0.23836303272579443, "grad_norm": 8.44243049621582, "learning_rate": 9.621192415634198e-05, "loss": 0.6874, "step": 3518 }, { "epoch": 0.23843078799376652, "grad_norm": 8.1638822555542, "learning_rate": 9.621055513724417e-05, "loss": 0.9239, "step": 3519 }, { "epoch": 0.2384985432617386, "grad_norm": 8.315443992614746, "learning_rate": 9.620918611814635e-05, "loss": 1.1256, "step": 3520 }, { "epoch": 0.23856629852971067, "grad_norm": 10.451863288879395, "learning_rate": 9.620781709904853e-05, "loss": 1.1453, "step": 3521 }, { "epoch": 0.23863405379768277, "grad_norm": 9.117147445678711, "learning_rate": 9.620644807995071e-05, "loss": 1.0751, "step": 3522 }, { "epoch": 0.23870180906565486, "grad_norm": 7.746578216552734, "learning_rate": 9.62050790608529e-05, "loss": 0.867, "step": 3523 }, { "epoch": 0.23876956433362695, "grad_norm": 8.366421699523926, "learning_rate": 9.620371004175509e-05, "loss": 1.0863, "step": 3524 }, { "epoch": 0.238837319601599, "grad_norm": 9.951937675476074, "learning_rate": 9.620234102265727e-05, "loss": 1.0431, "step": 3525 }, { "epoch": 0.2389050748695711, "grad_norm": 9.077424049377441, "learning_rate": 9.620097200355945e-05, "loss": 1.0433, "step": 3526 }, { "epoch": 0.2389728301375432, "grad_norm": 6.919139385223389, "learning_rate": 9.619960298446163e-05, "loss": 0.8655, "step": 3527 }, { "epoch": 0.2390405854055153, "grad_norm": 7.729475975036621, "learning_rate": 9.619823396536382e-05, "loss": 1.0943, "step": 3528 }, { "epoch": 0.23910834067348735, "grad_norm": 9.782391548156738, "learning_rate": 9.6196864946266e-05, "loss": 1.1362, "step": 3529 }, { "epoch": 0.23917609594145944, "grad_norm": 8.685064315795898, "learning_rate": 9.619549592716818e-05, "loss": 0.9885, "step": 3530 }, { "epoch": 0.23924385120943154, "grad_norm": 6.963294982910156, "learning_rate": 9.619412690807036e-05, "loss": 0.9061, "step": 3531 }, { "epoch": 0.23931160647740363, "grad_norm": 8.584861755371094, "learning_rate": 9.619275788897256e-05, "loss": 0.8527, "step": 3532 }, { "epoch": 0.2393793617453757, "grad_norm": 9.253739356994629, "learning_rate": 9.619138886987474e-05, "loss": 1.0643, "step": 3533 }, { "epoch": 0.23944711701334778, "grad_norm": 7.659543037414551, "learning_rate": 9.619001985077692e-05, "loss": 0.9553, "step": 3534 }, { "epoch": 0.23951487228131987, "grad_norm": 8.769670486450195, "learning_rate": 9.618865083167911e-05, "loss": 1.0576, "step": 3535 }, { "epoch": 0.23958262754929197, "grad_norm": 7.416141033172607, "learning_rate": 9.618728181258129e-05, "loss": 1.0064, "step": 3536 }, { "epoch": 0.23965038281726403, "grad_norm": 7.91802978515625, "learning_rate": 9.618591279348347e-05, "loss": 1.0095, "step": 3537 }, { "epoch": 0.23971813808523612, "grad_norm": 6.665622234344482, "learning_rate": 9.618454377438567e-05, "loss": 0.8099, "step": 3538 }, { "epoch": 0.2397858933532082, "grad_norm": 7.3240203857421875, "learning_rate": 9.618317475528785e-05, "loss": 0.9496, "step": 3539 }, { "epoch": 0.2398536486211803, "grad_norm": 7.268299102783203, "learning_rate": 9.618180573619003e-05, "loss": 1.0504, "step": 3540 }, { "epoch": 0.23992140388915237, "grad_norm": 8.710535049438477, "learning_rate": 9.618043671709221e-05, "loss": 0.9228, "step": 3541 }, { "epoch": 0.23998915915712446, "grad_norm": 8.5223970413208, "learning_rate": 9.61790676979944e-05, "loss": 1.0383, "step": 3542 }, { "epoch": 0.24005691442509655, "grad_norm": 7.635293960571289, "learning_rate": 9.617769867889658e-05, "loss": 0.99, "step": 3543 }, { "epoch": 0.24012466969306864, "grad_norm": 9.41180419921875, "learning_rate": 9.617632965979876e-05, "loss": 1.1214, "step": 3544 }, { "epoch": 0.24019242496104073, "grad_norm": 9.31615924835205, "learning_rate": 9.617496064070094e-05, "loss": 0.9369, "step": 3545 }, { "epoch": 0.2402601802290128, "grad_norm": 9.427112579345703, "learning_rate": 9.617359162160312e-05, "loss": 1.1829, "step": 3546 }, { "epoch": 0.2403279354969849, "grad_norm": 7.825446605682373, "learning_rate": 9.617222260250532e-05, "loss": 0.9459, "step": 3547 }, { "epoch": 0.24039569076495698, "grad_norm": 7.8191633224487305, "learning_rate": 9.61708535834075e-05, "loss": 1.1144, "step": 3548 }, { "epoch": 0.24046344603292907, "grad_norm": 8.224778175354004, "learning_rate": 9.616948456430968e-05, "loss": 0.8544, "step": 3549 }, { "epoch": 0.24053120130090114, "grad_norm": 9.618694305419922, "learning_rate": 9.616811554521186e-05, "loss": 1.2254, "step": 3550 }, { "epoch": 0.24059895656887323, "grad_norm": 7.786314964294434, "learning_rate": 9.616674652611405e-05, "loss": 0.8867, "step": 3551 }, { "epoch": 0.24066671183684532, "grad_norm": 9.397835731506348, "learning_rate": 9.616537750701623e-05, "loss": 1.0505, "step": 3552 }, { "epoch": 0.2407344671048174, "grad_norm": 7.701049327850342, "learning_rate": 9.616400848791841e-05, "loss": 0.897, "step": 3553 }, { "epoch": 0.24080222237278948, "grad_norm": 7.573019504547119, "learning_rate": 9.616263946882059e-05, "loss": 0.9972, "step": 3554 }, { "epoch": 0.24086997764076157, "grad_norm": 8.337100982666016, "learning_rate": 9.616127044972277e-05, "loss": 1.0835, "step": 3555 }, { "epoch": 0.24093773290873366, "grad_norm": 8.702056884765625, "learning_rate": 9.615990143062497e-05, "loss": 0.7688, "step": 3556 }, { "epoch": 0.24100548817670575, "grad_norm": 8.482346534729004, "learning_rate": 9.615853241152715e-05, "loss": 1.1972, "step": 3557 }, { "epoch": 0.24107324344467781, "grad_norm": 7.193674087524414, "learning_rate": 9.615716339242933e-05, "loss": 1.0389, "step": 3558 }, { "epoch": 0.2411409987126499, "grad_norm": 8.803317070007324, "learning_rate": 9.615579437333151e-05, "loss": 1.0429, "step": 3559 }, { "epoch": 0.241208753980622, "grad_norm": 9.055732727050781, "learning_rate": 9.61544253542337e-05, "loss": 1.1161, "step": 3560 }, { "epoch": 0.2412765092485941, "grad_norm": 10.80001449584961, "learning_rate": 9.615305633513588e-05, "loss": 0.9628, "step": 3561 }, { "epoch": 0.24134426451656615, "grad_norm": 7.707313060760498, "learning_rate": 9.615168731603806e-05, "loss": 0.8806, "step": 3562 }, { "epoch": 0.24141201978453825, "grad_norm": 8.823626518249512, "learning_rate": 9.615031829694024e-05, "loss": 0.8771, "step": 3563 }, { "epoch": 0.24147977505251034, "grad_norm": 7.984725475311279, "learning_rate": 9.614894927784242e-05, "loss": 0.9456, "step": 3564 }, { "epoch": 0.24154753032048243, "grad_norm": 7.335816860198975, "learning_rate": 9.614758025874462e-05, "loss": 1.0323, "step": 3565 }, { "epoch": 0.2416152855884545, "grad_norm": 5.959085941314697, "learning_rate": 9.61462112396468e-05, "loss": 0.8049, "step": 3566 }, { "epoch": 0.24168304085642658, "grad_norm": 8.775632858276367, "learning_rate": 9.614484222054898e-05, "loss": 0.8875, "step": 3567 }, { "epoch": 0.24175079612439868, "grad_norm": 6.589362621307373, "learning_rate": 9.614347320145116e-05, "loss": 1.0551, "step": 3568 }, { "epoch": 0.24181855139237077, "grad_norm": 7.49434232711792, "learning_rate": 9.614210418235334e-05, "loss": 0.7662, "step": 3569 }, { "epoch": 0.24188630666034283, "grad_norm": 7.759862899780273, "learning_rate": 9.614073516325553e-05, "loss": 0.7364, "step": 3570 }, { "epoch": 0.24195406192831492, "grad_norm": 7.9510273933410645, "learning_rate": 9.613936614415771e-05, "loss": 0.9277, "step": 3571 }, { "epoch": 0.24202181719628701, "grad_norm": 8.308568954467773, "learning_rate": 9.61379971250599e-05, "loss": 1.1689, "step": 3572 }, { "epoch": 0.2420895724642591, "grad_norm": 9.44938850402832, "learning_rate": 9.613662810596207e-05, "loss": 1.0937, "step": 3573 }, { "epoch": 0.24215732773223117, "grad_norm": 11.66707706451416, "learning_rate": 9.613525908686427e-05, "loss": 0.77, "step": 3574 }, { "epoch": 0.24222508300020326, "grad_norm": 9.25683879852295, "learning_rate": 9.613389006776645e-05, "loss": 0.9661, "step": 3575 }, { "epoch": 0.24229283826817535, "grad_norm": 7.289797782897949, "learning_rate": 9.613252104866863e-05, "loss": 0.8597, "step": 3576 }, { "epoch": 0.24236059353614745, "grad_norm": 6.669293403625488, "learning_rate": 9.613115202957081e-05, "loss": 0.9134, "step": 3577 }, { "epoch": 0.2424283488041195, "grad_norm": 8.785436630249023, "learning_rate": 9.612978301047299e-05, "loss": 0.7546, "step": 3578 }, { "epoch": 0.2424961040720916, "grad_norm": 7.386310577392578, "learning_rate": 9.612841399137518e-05, "loss": 1.0635, "step": 3579 }, { "epoch": 0.2425638593400637, "grad_norm": 8.23388957977295, "learning_rate": 9.612704497227736e-05, "loss": 0.8598, "step": 3580 }, { "epoch": 0.24263161460803578, "grad_norm": 8.498323440551758, "learning_rate": 9.612567595317954e-05, "loss": 1.083, "step": 3581 }, { "epoch": 0.24269936987600785, "grad_norm": 7.849715709686279, "learning_rate": 9.612430693408174e-05, "loss": 0.7673, "step": 3582 }, { "epoch": 0.24276712514397994, "grad_norm": 8.113242149353027, "learning_rate": 9.612293791498392e-05, "loss": 1.1003, "step": 3583 }, { "epoch": 0.24283488041195203, "grad_norm": 6.983048915863037, "learning_rate": 9.61215688958861e-05, "loss": 0.8668, "step": 3584 }, { "epoch": 0.24290263567992412, "grad_norm": 8.389126777648926, "learning_rate": 9.612019987678829e-05, "loss": 1.0008, "step": 3585 }, { "epoch": 0.2429703909478962, "grad_norm": 7.593414783477783, "learning_rate": 9.611883085769047e-05, "loss": 0.9829, "step": 3586 }, { "epoch": 0.24303814621586828, "grad_norm": 7.836172103881836, "learning_rate": 9.611746183859265e-05, "loss": 0.8128, "step": 3587 }, { "epoch": 0.24310590148384037, "grad_norm": 8.981040954589844, "learning_rate": 9.611609281949485e-05, "loss": 1.1249, "step": 3588 }, { "epoch": 0.24317365675181246, "grad_norm": 6.724935054779053, "learning_rate": 9.611472380039703e-05, "loss": 0.7699, "step": 3589 }, { "epoch": 0.24324141201978453, "grad_norm": 9.456436157226562, "learning_rate": 9.611335478129921e-05, "loss": 1.0085, "step": 3590 }, { "epoch": 0.24330916728775662, "grad_norm": 11.044548988342285, "learning_rate": 9.611198576220139e-05, "loss": 1.0671, "step": 3591 }, { "epoch": 0.2433769225557287, "grad_norm": 10.590521812438965, "learning_rate": 9.611061674310358e-05, "loss": 1.0051, "step": 3592 }, { "epoch": 0.2434446778237008, "grad_norm": 8.595380783081055, "learning_rate": 9.610924772400576e-05, "loss": 0.9655, "step": 3593 }, { "epoch": 0.24351243309167286, "grad_norm": 7.794788837432861, "learning_rate": 9.610787870490794e-05, "loss": 0.9551, "step": 3594 }, { "epoch": 0.24358018835964496, "grad_norm": 7.609074115753174, "learning_rate": 9.610650968581012e-05, "loss": 0.9856, "step": 3595 }, { "epoch": 0.24364794362761705, "grad_norm": 6.909607410430908, "learning_rate": 9.61051406667123e-05, "loss": 1.0195, "step": 3596 }, { "epoch": 0.24371569889558914, "grad_norm": 7.958381652832031, "learning_rate": 9.61037716476145e-05, "loss": 0.8263, "step": 3597 }, { "epoch": 0.24378345416356123, "grad_norm": 7.38173246383667, "learning_rate": 9.610240262851668e-05, "loss": 0.8551, "step": 3598 }, { "epoch": 0.2438512094315333, "grad_norm": 7.1448822021484375, "learning_rate": 9.610103360941886e-05, "loss": 0.8189, "step": 3599 }, { "epoch": 0.2439189646995054, "grad_norm": 7.44658088684082, "learning_rate": 9.609966459032104e-05, "loss": 0.8056, "step": 3600 }, { "epoch": 0.24398671996747748, "grad_norm": 7.480542182922363, "learning_rate": 9.609829557122322e-05, "loss": 0.8591, "step": 3601 }, { "epoch": 0.24405447523544957, "grad_norm": 9.895995140075684, "learning_rate": 9.609692655212541e-05, "loss": 1.1256, "step": 3602 }, { "epoch": 0.24412223050342163, "grad_norm": 7.009078025817871, "learning_rate": 9.609555753302759e-05, "loss": 0.9045, "step": 3603 }, { "epoch": 0.24418998577139372, "grad_norm": 8.714953422546387, "learning_rate": 9.609418851392977e-05, "loss": 1.1207, "step": 3604 }, { "epoch": 0.24425774103936582, "grad_norm": 7.239734172821045, "learning_rate": 9.609281949483195e-05, "loss": 0.7811, "step": 3605 }, { "epoch": 0.2443254963073379, "grad_norm": 10.486507415771484, "learning_rate": 9.609145047573415e-05, "loss": 1.2213, "step": 3606 }, { "epoch": 0.24439325157530997, "grad_norm": 12.031790733337402, "learning_rate": 9.609008145663633e-05, "loss": 1.0316, "step": 3607 }, { "epoch": 0.24446100684328206, "grad_norm": 7.607183456420898, "learning_rate": 9.608871243753851e-05, "loss": 0.7754, "step": 3608 }, { "epoch": 0.24452876211125416, "grad_norm": 9.313577651977539, "learning_rate": 9.608734341844069e-05, "loss": 0.9907, "step": 3609 }, { "epoch": 0.24459651737922625, "grad_norm": 7.672274589538574, "learning_rate": 9.608597439934287e-05, "loss": 0.8428, "step": 3610 }, { "epoch": 0.2446642726471983, "grad_norm": 8.259462356567383, "learning_rate": 9.608460538024506e-05, "loss": 0.8863, "step": 3611 }, { "epoch": 0.2447320279151704, "grad_norm": 8.50256061553955, "learning_rate": 9.608323636114724e-05, "loss": 0.7771, "step": 3612 }, { "epoch": 0.2447997831831425, "grad_norm": 6.8893818855285645, "learning_rate": 9.608186734204942e-05, "loss": 0.9791, "step": 3613 }, { "epoch": 0.24486753845111459, "grad_norm": 8.954825401306152, "learning_rate": 9.60804983229516e-05, "loss": 1.0712, "step": 3614 }, { "epoch": 0.24493529371908665, "grad_norm": 8.83995532989502, "learning_rate": 9.60791293038538e-05, "loss": 1.1982, "step": 3615 }, { "epoch": 0.24500304898705874, "grad_norm": 8.96689224243164, "learning_rate": 9.607776028475598e-05, "loss": 0.9816, "step": 3616 }, { "epoch": 0.24507080425503083, "grad_norm": 15.17086124420166, "learning_rate": 9.607639126565816e-05, "loss": 1.1416, "step": 3617 }, { "epoch": 0.24513855952300292, "grad_norm": 9.527314186096191, "learning_rate": 9.607502224656034e-05, "loss": 1.1699, "step": 3618 }, { "epoch": 0.245206314790975, "grad_norm": 7.378002166748047, "learning_rate": 9.607365322746252e-05, "loss": 1.1303, "step": 3619 }, { "epoch": 0.24527407005894708, "grad_norm": 7.204291343688965, "learning_rate": 9.607228420836471e-05, "loss": 0.8078, "step": 3620 }, { "epoch": 0.24534182532691917, "grad_norm": 8.181205749511719, "learning_rate": 9.607091518926689e-05, "loss": 0.9221, "step": 3621 }, { "epoch": 0.24540958059489126, "grad_norm": 8.479545593261719, "learning_rate": 9.606954617016907e-05, "loss": 0.8133, "step": 3622 }, { "epoch": 0.24547733586286333, "grad_norm": 7.591360092163086, "learning_rate": 9.606817715107125e-05, "loss": 1.0632, "step": 3623 }, { "epoch": 0.24554509113083542, "grad_norm": 8.558969497680664, "learning_rate": 9.606680813197343e-05, "loss": 1.0755, "step": 3624 }, { "epoch": 0.2456128463988075, "grad_norm": 8.02037525177002, "learning_rate": 9.606543911287563e-05, "loss": 1.0164, "step": 3625 }, { "epoch": 0.2456806016667796, "grad_norm": 7.49207878112793, "learning_rate": 9.606407009377781e-05, "loss": 1.1325, "step": 3626 }, { "epoch": 0.24574835693475167, "grad_norm": 7.376079559326172, "learning_rate": 9.606270107467999e-05, "loss": 0.7917, "step": 3627 }, { "epoch": 0.24581611220272376, "grad_norm": 7.331247329711914, "learning_rate": 9.606133205558218e-05, "loss": 1.2186, "step": 3628 }, { "epoch": 0.24588386747069585, "grad_norm": 7.393257141113281, "learning_rate": 9.605996303648436e-05, "loss": 0.7155, "step": 3629 }, { "epoch": 0.24595162273866794, "grad_norm": 9.15472412109375, "learning_rate": 9.605859401738654e-05, "loss": 0.9562, "step": 3630 }, { "epoch": 0.24601937800664, "grad_norm": 6.846646785736084, "learning_rate": 9.605722499828874e-05, "loss": 1.109, "step": 3631 }, { "epoch": 0.2460871332746121, "grad_norm": 8.57854175567627, "learning_rate": 9.605585597919092e-05, "loss": 0.8706, "step": 3632 }, { "epoch": 0.2461548885425842, "grad_norm": 7.089768886566162, "learning_rate": 9.60544869600931e-05, "loss": 0.8776, "step": 3633 }, { "epoch": 0.24622264381055628, "grad_norm": 7.549044132232666, "learning_rate": 9.605311794099529e-05, "loss": 0.9956, "step": 3634 }, { "epoch": 0.24629039907852834, "grad_norm": 6.839412689208984, "learning_rate": 9.605174892189747e-05, "loss": 0.8722, "step": 3635 }, { "epoch": 0.24635815434650044, "grad_norm": 7.378058910369873, "learning_rate": 9.605037990279965e-05, "loss": 0.8663, "step": 3636 }, { "epoch": 0.24642590961447253, "grad_norm": 7.893070220947266, "learning_rate": 9.604901088370183e-05, "loss": 0.9213, "step": 3637 }, { "epoch": 0.24649366488244462, "grad_norm": 7.3345232009887695, "learning_rate": 9.604764186460403e-05, "loss": 0.9827, "step": 3638 }, { "epoch": 0.24656142015041668, "grad_norm": 6.120781421661377, "learning_rate": 9.60462728455062e-05, "loss": 0.6371, "step": 3639 }, { "epoch": 0.24662917541838877, "grad_norm": 8.695615768432617, "learning_rate": 9.604490382640839e-05, "loss": 0.9769, "step": 3640 }, { "epoch": 0.24669693068636087, "grad_norm": 8.469325065612793, "learning_rate": 9.604353480731057e-05, "loss": 0.9267, "step": 3641 }, { "epoch": 0.24676468595433296, "grad_norm": 9.191173553466797, "learning_rate": 9.604216578821275e-05, "loss": 1.1818, "step": 3642 }, { "epoch": 0.24683244122230502, "grad_norm": 9.337483406066895, "learning_rate": 9.604079676911494e-05, "loss": 0.9904, "step": 3643 }, { "epoch": 0.2469001964902771, "grad_norm": 7.597773551940918, "learning_rate": 9.603942775001712e-05, "loss": 0.8928, "step": 3644 }, { "epoch": 0.2469679517582492, "grad_norm": 8.155903816223145, "learning_rate": 9.60380587309193e-05, "loss": 0.9511, "step": 3645 }, { "epoch": 0.2470357070262213, "grad_norm": 7.695154666900635, "learning_rate": 9.603668971182148e-05, "loss": 1.0724, "step": 3646 }, { "epoch": 0.24710346229419336, "grad_norm": 7.492908000946045, "learning_rate": 9.603532069272366e-05, "loss": 0.9133, "step": 3647 }, { "epoch": 0.24717121756216545, "grad_norm": 8.190613746643066, "learning_rate": 9.603395167362586e-05, "loss": 0.8972, "step": 3648 }, { "epoch": 0.24723897283013754, "grad_norm": 7.9199347496032715, "learning_rate": 9.603258265452804e-05, "loss": 1.0501, "step": 3649 }, { "epoch": 0.24730672809810963, "grad_norm": 8.386896133422852, "learning_rate": 9.603121363543022e-05, "loss": 0.998, "step": 3650 }, { "epoch": 0.24737448336608173, "grad_norm": 6.536781311035156, "learning_rate": 9.60298446163324e-05, "loss": 0.8379, "step": 3651 }, { "epoch": 0.2474422386340538, "grad_norm": 6.918766021728516, "learning_rate": 9.602847559723459e-05, "loss": 0.9397, "step": 3652 }, { "epoch": 0.24750999390202588, "grad_norm": 7.00775146484375, "learning_rate": 9.602710657813677e-05, "loss": 1.0079, "step": 3653 }, { "epoch": 0.24757774916999797, "grad_norm": 7.693192958831787, "learning_rate": 9.602573755903895e-05, "loss": 1.0657, "step": 3654 }, { "epoch": 0.24764550443797007, "grad_norm": 10.667771339416504, "learning_rate": 9.602436853994113e-05, "loss": 1.0657, "step": 3655 }, { "epoch": 0.24771325970594213, "grad_norm": 8.622758865356445, "learning_rate": 9.602299952084331e-05, "loss": 0.9792, "step": 3656 }, { "epoch": 0.24778101497391422, "grad_norm": 8.834444046020508, "learning_rate": 9.602163050174551e-05, "loss": 1.1014, "step": 3657 }, { "epoch": 0.2478487702418863, "grad_norm": 7.717538356781006, "learning_rate": 9.602026148264769e-05, "loss": 0.8781, "step": 3658 }, { "epoch": 0.2479165255098584, "grad_norm": 7.797954559326172, "learning_rate": 9.601889246354987e-05, "loss": 0.9772, "step": 3659 }, { "epoch": 0.24798428077783047, "grad_norm": 7.376112937927246, "learning_rate": 9.601752344445205e-05, "loss": 1.0468, "step": 3660 }, { "epoch": 0.24805203604580256, "grad_norm": 7.7266387939453125, "learning_rate": 9.601615442535424e-05, "loss": 1.007, "step": 3661 }, { "epoch": 0.24811979131377465, "grad_norm": 9.196928977966309, "learning_rate": 9.601478540625642e-05, "loss": 1.1327, "step": 3662 }, { "epoch": 0.24818754658174674, "grad_norm": 7.892288684844971, "learning_rate": 9.60134163871586e-05, "loss": 0.7511, "step": 3663 }, { "epoch": 0.2482553018497188, "grad_norm": 8.854056358337402, "learning_rate": 9.601204736806078e-05, "loss": 0.9546, "step": 3664 }, { "epoch": 0.2483230571176909, "grad_norm": 7.985452651977539, "learning_rate": 9.601067834896296e-05, "loss": 1.2186, "step": 3665 }, { "epoch": 0.248390812385663, "grad_norm": 8.336162567138672, "learning_rate": 9.600930932986516e-05, "loss": 0.864, "step": 3666 }, { "epoch": 0.24845856765363508, "grad_norm": 10.857757568359375, "learning_rate": 9.600794031076734e-05, "loss": 1.1948, "step": 3667 }, { "epoch": 0.24852632292160715, "grad_norm": 8.168721199035645, "learning_rate": 9.600657129166952e-05, "loss": 1.1151, "step": 3668 }, { "epoch": 0.24859407818957924, "grad_norm": 7.509332656860352, "learning_rate": 9.60052022725717e-05, "loss": 0.9215, "step": 3669 }, { "epoch": 0.24866183345755133, "grad_norm": 10.4354829788208, "learning_rate": 9.600383325347389e-05, "loss": 1.0696, "step": 3670 }, { "epoch": 0.24872958872552342, "grad_norm": 6.930381774902344, "learning_rate": 9.600246423437607e-05, "loss": 0.9229, "step": 3671 }, { "epoch": 0.24879734399349548, "grad_norm": 6.608088970184326, "learning_rate": 9.600109521527825e-05, "loss": 0.928, "step": 3672 }, { "epoch": 0.24886509926146758, "grad_norm": 7.4217095375061035, "learning_rate": 9.599972619618043e-05, "loss": 0.9608, "step": 3673 }, { "epoch": 0.24893285452943967, "grad_norm": 7.46991491317749, "learning_rate": 9.599835717708263e-05, "loss": 0.8928, "step": 3674 }, { "epoch": 0.24900060979741176, "grad_norm": 8.76001262664795, "learning_rate": 9.599698815798481e-05, "loss": 1.191, "step": 3675 }, { "epoch": 0.24906836506538382, "grad_norm": 8.395779609680176, "learning_rate": 9.599561913888699e-05, "loss": 1.1479, "step": 3676 }, { "epoch": 0.24913612033335591, "grad_norm": 8.00460147857666, "learning_rate": 9.599425011978918e-05, "loss": 1.1005, "step": 3677 }, { "epoch": 0.249203875601328, "grad_norm": 7.817287445068359, "learning_rate": 9.599288110069136e-05, "loss": 1.1568, "step": 3678 }, { "epoch": 0.2492716308693001, "grad_norm": 9.60706615447998, "learning_rate": 9.599151208159354e-05, "loss": 0.9734, "step": 3679 }, { "epoch": 0.24933938613727216, "grad_norm": 8.347691535949707, "learning_rate": 9.599014306249574e-05, "loss": 1.2208, "step": 3680 }, { "epoch": 0.24940714140524425, "grad_norm": 8.106613159179688, "learning_rate": 9.598877404339792e-05, "loss": 0.9253, "step": 3681 }, { "epoch": 0.24947489667321635, "grad_norm": 8.099063873291016, "learning_rate": 9.59874050243001e-05, "loss": 1.0516, "step": 3682 }, { "epoch": 0.24954265194118844, "grad_norm": 7.839589595794678, "learning_rate": 9.598603600520228e-05, "loss": 0.9764, "step": 3683 }, { "epoch": 0.2496104072091605, "grad_norm": 8.679840087890625, "learning_rate": 9.598466698610447e-05, "loss": 1.0407, "step": 3684 }, { "epoch": 0.2496781624771326, "grad_norm": 6.854926109313965, "learning_rate": 9.598329796700665e-05, "loss": 0.9457, "step": 3685 }, { "epoch": 0.24974591774510468, "grad_norm": 9.133661270141602, "learning_rate": 9.598192894790883e-05, "loss": 1.0391, "step": 3686 }, { "epoch": 0.24981367301307678, "grad_norm": 7.744726657867432, "learning_rate": 9.598055992881101e-05, "loss": 1.0256, "step": 3687 }, { "epoch": 0.24988142828104884, "grad_norm": 8.9452543258667, "learning_rate": 9.597919090971319e-05, "loss": 0.9649, "step": 3688 }, { "epoch": 0.24994918354902093, "grad_norm": 8.396431922912598, "learning_rate": 9.597782189061539e-05, "loss": 0.8189, "step": 3689 }, { "epoch": 0.24994918354902093, "eval_loss": 0.9518795609474182, "eval_noise_accuracy": 0.0, "eval_runtime": 1533.2421, "eval_samples_per_second": 3.352, "eval_steps_per_second": 0.21, "eval_wer": 88.77438705594233, "step": 3689 }, { "epoch": 0.250016938816993, "grad_norm": 7.252607822418213, "learning_rate": 9.597645287151757e-05, "loss": 1.0734, "step": 3690 }, { "epoch": 0.2500846940849651, "grad_norm": 8.768789291381836, "learning_rate": 9.597508385241975e-05, "loss": 0.9763, "step": 3691 }, { "epoch": 0.2501524493529372, "grad_norm": 6.9475321769714355, "learning_rate": 9.597371483332193e-05, "loss": 0.8121, "step": 3692 }, { "epoch": 0.25022020462090927, "grad_norm": 7.9007248878479, "learning_rate": 9.597234581422412e-05, "loss": 0.9668, "step": 3693 }, { "epoch": 0.25028795988888136, "grad_norm": 6.524989604949951, "learning_rate": 9.59709767951263e-05, "loss": 0.9151, "step": 3694 }, { "epoch": 0.25035571515685345, "grad_norm": 7.83770751953125, "learning_rate": 9.596960777602848e-05, "loss": 0.8338, "step": 3695 }, { "epoch": 0.25042347042482554, "grad_norm": 8.619128227233887, "learning_rate": 9.596823875693066e-05, "loss": 1.0394, "step": 3696 }, { "epoch": 0.25049122569279764, "grad_norm": 7.238592624664307, "learning_rate": 9.596686973783284e-05, "loss": 0.8773, "step": 3697 }, { "epoch": 0.2505589809607697, "grad_norm": 10.257181167602539, "learning_rate": 9.596550071873504e-05, "loss": 1.0137, "step": 3698 }, { "epoch": 0.25062673622874176, "grad_norm": 9.302980422973633, "learning_rate": 9.596413169963722e-05, "loss": 0.8317, "step": 3699 }, { "epoch": 0.25069449149671386, "grad_norm": 7.485314846038818, "learning_rate": 9.59627626805394e-05, "loss": 0.9272, "step": 3700 }, { "epoch": 0.25076224676468595, "grad_norm": 7.870807647705078, "learning_rate": 9.596139366144158e-05, "loss": 1.009, "step": 3701 }, { "epoch": 0.25083000203265804, "grad_norm": 8.245805740356445, "learning_rate": 9.596002464234376e-05, "loss": 0.8616, "step": 3702 }, { "epoch": 0.25089775730063013, "grad_norm": 10.324470520019531, "learning_rate": 9.595865562324595e-05, "loss": 1.1567, "step": 3703 }, { "epoch": 0.2509655125686022, "grad_norm": 7.588774681091309, "learning_rate": 9.595728660414813e-05, "loss": 1.0025, "step": 3704 }, { "epoch": 0.2510332678365743, "grad_norm": 8.32935905456543, "learning_rate": 9.595591758505031e-05, "loss": 1.1826, "step": 3705 }, { "epoch": 0.2511010231045464, "grad_norm": 7.146906852722168, "learning_rate": 9.595454856595249e-05, "loss": 0.8108, "step": 3706 }, { "epoch": 0.25116877837251844, "grad_norm": 8.91852855682373, "learning_rate": 9.595317954685469e-05, "loss": 1.1377, "step": 3707 }, { "epoch": 0.25123653364049053, "grad_norm": 7.726437568664551, "learning_rate": 9.595181052775687e-05, "loss": 0.9971, "step": 3708 }, { "epoch": 0.2513042889084626, "grad_norm": 11.102527618408203, "learning_rate": 9.595044150865905e-05, "loss": 1.1075, "step": 3709 }, { "epoch": 0.2513720441764347, "grad_norm": 6.124303340911865, "learning_rate": 9.594907248956123e-05, "loss": 0.8523, "step": 3710 }, { "epoch": 0.2514397994444068, "grad_norm": 8.56926441192627, "learning_rate": 9.594770347046341e-05, "loss": 1.0801, "step": 3711 }, { "epoch": 0.2515075547123789, "grad_norm": 6.994394779205322, "learning_rate": 9.59463344513656e-05, "loss": 0.8559, "step": 3712 }, { "epoch": 0.251575309980351, "grad_norm": 7.428825378417969, "learning_rate": 9.594496543226778e-05, "loss": 0.8861, "step": 3713 }, { "epoch": 0.2516430652483231, "grad_norm": 8.7849760055542, "learning_rate": 9.594359641316996e-05, "loss": 0.9083, "step": 3714 }, { "epoch": 0.2517108205162951, "grad_norm": 7.608119964599609, "learning_rate": 9.594222739407214e-05, "loss": 0.8529, "step": 3715 }, { "epoch": 0.2517785757842672, "grad_norm": 7.076242446899414, "learning_rate": 9.594085837497434e-05, "loss": 0.8817, "step": 3716 }, { "epoch": 0.2518463310522393, "grad_norm": 8.78627872467041, "learning_rate": 9.593948935587652e-05, "loss": 1.1752, "step": 3717 }, { "epoch": 0.2519140863202114, "grad_norm": 8.589457511901855, "learning_rate": 9.59381203367787e-05, "loss": 0.9099, "step": 3718 }, { "epoch": 0.2519818415881835, "grad_norm": 8.67271900177002, "learning_rate": 9.593675131768088e-05, "loss": 1.0304, "step": 3719 }, { "epoch": 0.2520495968561556, "grad_norm": 9.180156707763672, "learning_rate": 9.593538229858307e-05, "loss": 1.0815, "step": 3720 }, { "epoch": 0.25211735212412767, "grad_norm": 7.973734378814697, "learning_rate": 9.593401327948525e-05, "loss": 1.0816, "step": 3721 }, { "epoch": 0.25218510739209976, "grad_norm": 7.272556781768799, "learning_rate": 9.593264426038743e-05, "loss": 0.9027, "step": 3722 }, { "epoch": 0.2522528626600718, "grad_norm": 8.033550262451172, "learning_rate": 9.593127524128963e-05, "loss": 1.0772, "step": 3723 }, { "epoch": 0.2523206179280439, "grad_norm": 7.861289978027344, "learning_rate": 9.59299062221918e-05, "loss": 0.8774, "step": 3724 }, { "epoch": 0.252388373196016, "grad_norm": 7.043121337890625, "learning_rate": 9.592853720309399e-05, "loss": 1.0165, "step": 3725 }, { "epoch": 0.25245612846398807, "grad_norm": 7.830938816070557, "learning_rate": 9.592716818399618e-05, "loss": 1.0379, "step": 3726 }, { "epoch": 0.25252388373196016, "grad_norm": 8.138580322265625, "learning_rate": 9.592579916489836e-05, "loss": 0.954, "step": 3727 }, { "epoch": 0.25259163899993226, "grad_norm": 6.810534477233887, "learning_rate": 9.592443014580054e-05, "loss": 0.7475, "step": 3728 }, { "epoch": 0.25265939426790435, "grad_norm": 6.66425895690918, "learning_rate": 9.592306112670272e-05, "loss": 0.8203, "step": 3729 }, { "epoch": 0.25272714953587644, "grad_norm": 7.962128162384033, "learning_rate": 9.592169210760492e-05, "loss": 0.9923, "step": 3730 }, { "epoch": 0.2527949048038485, "grad_norm": 8.372082710266113, "learning_rate": 9.59203230885071e-05, "loss": 0.809, "step": 3731 }, { "epoch": 0.25286266007182057, "grad_norm": 8.343878746032715, "learning_rate": 9.591895406940928e-05, "loss": 0.9373, "step": 3732 }, { "epoch": 0.25293041533979266, "grad_norm": 9.935523986816406, "learning_rate": 9.591758505031146e-05, "loss": 1.0037, "step": 3733 }, { "epoch": 0.25299817060776475, "grad_norm": 8.378336906433105, "learning_rate": 9.591621603121364e-05, "loss": 0.9834, "step": 3734 }, { "epoch": 0.25306592587573684, "grad_norm": 8.581600189208984, "learning_rate": 9.591484701211583e-05, "loss": 1.1192, "step": 3735 }, { "epoch": 0.25313368114370893, "grad_norm": 8.007279396057129, "learning_rate": 9.591347799301801e-05, "loss": 1.0261, "step": 3736 }, { "epoch": 0.253201436411681, "grad_norm": 7.40525484085083, "learning_rate": 9.591210897392019e-05, "loss": 0.9273, "step": 3737 }, { "epoch": 0.2532691916796531, "grad_norm": 7.228291988372803, "learning_rate": 9.591073995482237e-05, "loss": 1.0219, "step": 3738 }, { "epoch": 0.25333694694762515, "grad_norm": 6.714911460876465, "learning_rate": 9.590937093572457e-05, "loss": 0.9134, "step": 3739 }, { "epoch": 0.25340470221559724, "grad_norm": 6.150938510894775, "learning_rate": 9.590800191662675e-05, "loss": 0.8577, "step": 3740 }, { "epoch": 0.25347245748356934, "grad_norm": 10.693168640136719, "learning_rate": 9.590663289752893e-05, "loss": 1.2867, "step": 3741 }, { "epoch": 0.2535402127515414, "grad_norm": 8.114953994750977, "learning_rate": 9.590526387843111e-05, "loss": 1.0602, "step": 3742 }, { "epoch": 0.2536079680195135, "grad_norm": 8.472567558288574, "learning_rate": 9.590389485933329e-05, "loss": 0.9992, "step": 3743 }, { "epoch": 0.2536757232874856, "grad_norm": 8.681047439575195, "learning_rate": 9.590252584023548e-05, "loss": 1.003, "step": 3744 }, { "epoch": 0.2537434785554577, "grad_norm": 8.753557205200195, "learning_rate": 9.590115682113766e-05, "loss": 0.9447, "step": 3745 }, { "epoch": 0.2538112338234298, "grad_norm": 6.80125093460083, "learning_rate": 9.589978780203984e-05, "loss": 0.8522, "step": 3746 }, { "epoch": 0.25387898909140183, "grad_norm": 8.829830169677734, "learning_rate": 9.589841878294202e-05, "loss": 0.9131, "step": 3747 }, { "epoch": 0.2539467443593739, "grad_norm": 7.068274021148682, "learning_rate": 9.589704976384422e-05, "loss": 0.8348, "step": 3748 }, { "epoch": 0.254014499627346, "grad_norm": 8.19235897064209, "learning_rate": 9.58956807447464e-05, "loss": 0.8975, "step": 3749 }, { "epoch": 0.2540822548953181, "grad_norm": 8.1896333694458, "learning_rate": 9.589431172564858e-05, "loss": 1.1285, "step": 3750 }, { "epoch": 0.2541500101632902, "grad_norm": 7.581019878387451, "learning_rate": 9.589294270655076e-05, "loss": 0.8838, "step": 3751 }, { "epoch": 0.2542177654312623, "grad_norm": 6.806415557861328, "learning_rate": 9.589157368745294e-05, "loss": 0.9144, "step": 3752 }, { "epoch": 0.2542855206992344, "grad_norm": 6.077991485595703, "learning_rate": 9.589020466835513e-05, "loss": 0.7032, "step": 3753 }, { "epoch": 0.25435327596720647, "grad_norm": 9.278702735900879, "learning_rate": 9.588883564925731e-05, "loss": 1.1806, "step": 3754 }, { "epoch": 0.2544210312351785, "grad_norm": 7.136252403259277, "learning_rate": 9.588746663015949e-05, "loss": 0.9589, "step": 3755 }, { "epoch": 0.2544887865031506, "grad_norm": 6.349377632141113, "learning_rate": 9.588609761106167e-05, "loss": 0.9972, "step": 3756 }, { "epoch": 0.2545565417711227, "grad_norm": 10.120612144470215, "learning_rate": 9.588472859196385e-05, "loss": 1.124, "step": 3757 }, { "epoch": 0.2546242970390948, "grad_norm": 6.840261459350586, "learning_rate": 9.588335957286605e-05, "loss": 0.8897, "step": 3758 }, { "epoch": 0.2546920523070669, "grad_norm": 6.069836616516113, "learning_rate": 9.588199055376823e-05, "loss": 0.7794, "step": 3759 }, { "epoch": 0.25475980757503897, "grad_norm": 6.336123943328857, "learning_rate": 9.588062153467041e-05, "loss": 0.8936, "step": 3760 }, { "epoch": 0.25482756284301106, "grad_norm": 7.337663173675537, "learning_rate": 9.587925251557259e-05, "loss": 0.9241, "step": 3761 }, { "epoch": 0.25489531811098315, "grad_norm": 9.220332145690918, "learning_rate": 9.587788349647478e-05, "loss": 1.2426, "step": 3762 }, { "epoch": 0.25496307337895524, "grad_norm": 6.4654951095581055, "learning_rate": 9.587651447737696e-05, "loss": 0.7795, "step": 3763 }, { "epoch": 0.2550308286469273, "grad_norm": 8.458954811096191, "learning_rate": 9.587514545827914e-05, "loss": 1.0493, "step": 3764 }, { "epoch": 0.25509858391489937, "grad_norm": 6.73598575592041, "learning_rate": 9.587377643918132e-05, "loss": 0.9777, "step": 3765 }, { "epoch": 0.25516633918287146, "grad_norm": 8.673493385314941, "learning_rate": 9.587240742008352e-05, "loss": 1.1108, "step": 3766 }, { "epoch": 0.25523409445084355, "grad_norm": 7.328574180603027, "learning_rate": 9.58710384009857e-05, "loss": 0.9502, "step": 3767 }, { "epoch": 0.25530184971881564, "grad_norm": 7.9878692626953125, "learning_rate": 9.586966938188788e-05, "loss": 1.0191, "step": 3768 }, { "epoch": 0.25536960498678773, "grad_norm": 7.247650623321533, "learning_rate": 9.586830036279007e-05, "loss": 0.9356, "step": 3769 }, { "epoch": 0.2554373602547598, "grad_norm": 7.29271125793457, "learning_rate": 9.586693134369225e-05, "loss": 0.9877, "step": 3770 }, { "epoch": 0.2555051155227319, "grad_norm": 7.802029132843018, "learning_rate": 9.586556232459443e-05, "loss": 0.8923, "step": 3771 }, { "epoch": 0.25557287079070395, "grad_norm": 8.129645347595215, "learning_rate": 9.586419330549663e-05, "loss": 0.9451, "step": 3772 }, { "epoch": 0.25564062605867605, "grad_norm": 7.033285140991211, "learning_rate": 9.58628242863988e-05, "loss": 0.8529, "step": 3773 }, { "epoch": 0.25570838132664814, "grad_norm": 7.490065574645996, "learning_rate": 9.586145526730099e-05, "loss": 0.8912, "step": 3774 }, { "epoch": 0.25577613659462023, "grad_norm": 8.730104446411133, "learning_rate": 9.586008624820317e-05, "loss": 1.2685, "step": 3775 }, { "epoch": 0.2558438918625923, "grad_norm": 9.958065032958984, "learning_rate": 9.585871722910536e-05, "loss": 1.1792, "step": 3776 }, { "epoch": 0.2559116471305644, "grad_norm": 6.82180118560791, "learning_rate": 9.585734821000754e-05, "loss": 0.8734, "step": 3777 }, { "epoch": 0.2559794023985365, "grad_norm": 9.444950103759766, "learning_rate": 9.585597919090972e-05, "loss": 1.0368, "step": 3778 }, { "epoch": 0.2560471576665086, "grad_norm": 9.437919616699219, "learning_rate": 9.58546101718119e-05, "loss": 1.2728, "step": 3779 }, { "epoch": 0.25611491293448063, "grad_norm": 8.925026893615723, "learning_rate": 9.585324115271408e-05, "loss": 0.944, "step": 3780 }, { "epoch": 0.2561826682024527, "grad_norm": 8.421260833740234, "learning_rate": 9.585187213361628e-05, "loss": 0.9913, "step": 3781 }, { "epoch": 0.2562504234704248, "grad_norm": 8.5851469039917, "learning_rate": 9.585050311451846e-05, "loss": 0.9885, "step": 3782 }, { "epoch": 0.2563181787383969, "grad_norm": 7.179548263549805, "learning_rate": 9.584913409542064e-05, "loss": 1.0647, "step": 3783 }, { "epoch": 0.256385934006369, "grad_norm": 8.60708999633789, "learning_rate": 9.584776507632282e-05, "loss": 1.0475, "step": 3784 }, { "epoch": 0.2564536892743411, "grad_norm": 8.462443351745605, "learning_rate": 9.584639605722501e-05, "loss": 1.1381, "step": 3785 }, { "epoch": 0.2565214445423132, "grad_norm": 7.044154644012451, "learning_rate": 9.584502703812719e-05, "loss": 0.8223, "step": 3786 }, { "epoch": 0.2565891998102853, "grad_norm": 8.842753410339355, "learning_rate": 9.584365801902937e-05, "loss": 1.1205, "step": 3787 }, { "epoch": 0.2566569550782573, "grad_norm": 7.294439792633057, "learning_rate": 9.584228899993155e-05, "loss": 0.9563, "step": 3788 }, { "epoch": 0.2567247103462294, "grad_norm": 7.842654228210449, "learning_rate": 9.584091998083373e-05, "loss": 1.2798, "step": 3789 }, { "epoch": 0.2567924656142015, "grad_norm": 5.479234218597412, "learning_rate": 9.583955096173593e-05, "loss": 0.7835, "step": 3790 }, { "epoch": 0.2568602208821736, "grad_norm": 7.670284271240234, "learning_rate": 9.58381819426381e-05, "loss": 0.8263, "step": 3791 }, { "epoch": 0.2569279761501457, "grad_norm": 8.564105987548828, "learning_rate": 9.583681292354029e-05, "loss": 1.032, "step": 3792 }, { "epoch": 0.25699573141811777, "grad_norm": 8.706098556518555, "learning_rate": 9.583544390444247e-05, "loss": 1.0166, "step": 3793 }, { "epoch": 0.25706348668608986, "grad_norm": 6.727125644683838, "learning_rate": 9.583407488534466e-05, "loss": 0.756, "step": 3794 }, { "epoch": 0.25713124195406195, "grad_norm": 8.941418647766113, "learning_rate": 9.583270586624684e-05, "loss": 1.0223, "step": 3795 }, { "epoch": 0.257198997222034, "grad_norm": 7.35167932510376, "learning_rate": 9.583133684714902e-05, "loss": 0.9117, "step": 3796 }, { "epoch": 0.2572667524900061, "grad_norm": 9.245199203491211, "learning_rate": 9.58299678280512e-05, "loss": 1.0131, "step": 3797 }, { "epoch": 0.25733450775797817, "grad_norm": 10.583107948303223, "learning_rate": 9.582859880895338e-05, "loss": 1.0452, "step": 3798 }, { "epoch": 0.25740226302595026, "grad_norm": 7.672145843505859, "learning_rate": 9.582722978985558e-05, "loss": 0.9961, "step": 3799 }, { "epoch": 0.25747001829392235, "grad_norm": 6.5851640701293945, "learning_rate": 9.582586077075776e-05, "loss": 0.9217, "step": 3800 }, { "epoch": 0.25753777356189445, "grad_norm": 9.444985389709473, "learning_rate": 9.582449175165994e-05, "loss": 1.1704, "step": 3801 }, { "epoch": 0.25760552882986654, "grad_norm": 7.136216163635254, "learning_rate": 9.582312273256212e-05, "loss": 0.9597, "step": 3802 }, { "epoch": 0.25767328409783863, "grad_norm": 9.74889087677002, "learning_rate": 9.582175371346431e-05, "loss": 0.9866, "step": 3803 }, { "epoch": 0.25774103936581066, "grad_norm": 7.692512512207031, "learning_rate": 9.582038469436649e-05, "loss": 1.0412, "step": 3804 }, { "epoch": 0.25780879463378276, "grad_norm": 7.882124423980713, "learning_rate": 9.581901567526867e-05, "loss": 1.1129, "step": 3805 }, { "epoch": 0.25787654990175485, "grad_norm": 8.117066383361816, "learning_rate": 9.581764665617085e-05, "loss": 0.968, "step": 3806 }, { "epoch": 0.25794430516972694, "grad_norm": 8.883692741394043, "learning_rate": 9.581627763707303e-05, "loss": 1.0859, "step": 3807 }, { "epoch": 0.25801206043769903, "grad_norm": 7.985278129577637, "learning_rate": 9.581490861797523e-05, "loss": 0.9511, "step": 3808 }, { "epoch": 0.2580798157056711, "grad_norm": 10.487812995910645, "learning_rate": 9.58135395988774e-05, "loss": 1.2395, "step": 3809 }, { "epoch": 0.2581475709736432, "grad_norm": 7.5507707595825195, "learning_rate": 9.581217057977959e-05, "loss": 0.9959, "step": 3810 }, { "epoch": 0.2582153262416153, "grad_norm": 9.950063705444336, "learning_rate": 9.581080156068177e-05, "loss": 0.9453, "step": 3811 }, { "epoch": 0.2582830815095874, "grad_norm": 7.301966190338135, "learning_rate": 9.580943254158396e-05, "loss": 0.8468, "step": 3812 }, { "epoch": 0.25835083677755943, "grad_norm": 6.434390544891357, "learning_rate": 9.580806352248614e-05, "loss": 1.0513, "step": 3813 }, { "epoch": 0.2584185920455315, "grad_norm": 8.878791809082031, "learning_rate": 9.580669450338832e-05, "loss": 1.1264, "step": 3814 }, { "epoch": 0.2584863473135036, "grad_norm": 8.107111930847168, "learning_rate": 9.580532548429052e-05, "loss": 1.3037, "step": 3815 }, { "epoch": 0.2585541025814757, "grad_norm": 7.54078483581543, "learning_rate": 9.58039564651927e-05, "loss": 0.917, "step": 3816 }, { "epoch": 0.2586218578494478, "grad_norm": 9.369047164916992, "learning_rate": 9.580258744609488e-05, "loss": 0.8761, "step": 3817 }, { "epoch": 0.2586896131174199, "grad_norm": 8.927732467651367, "learning_rate": 9.580121842699707e-05, "loss": 1.0591, "step": 3818 }, { "epoch": 0.258757368385392, "grad_norm": 9.261579513549805, "learning_rate": 9.579984940789925e-05, "loss": 1.0671, "step": 3819 }, { "epoch": 0.2588251236533641, "grad_norm": 7.396904468536377, "learning_rate": 9.579848038880143e-05, "loss": 0.8242, "step": 3820 }, { "epoch": 0.2588928789213361, "grad_norm": 6.665501594543457, "learning_rate": 9.579711136970361e-05, "loss": 0.7686, "step": 3821 }, { "epoch": 0.2589606341893082, "grad_norm": 7.0808281898498535, "learning_rate": 9.57957423506058e-05, "loss": 0.9757, "step": 3822 }, { "epoch": 0.2590283894572803, "grad_norm": 7.734886646270752, "learning_rate": 9.579437333150799e-05, "loss": 1.0365, "step": 3823 }, { "epoch": 0.2590961447252524, "grad_norm": 7.383622169494629, "learning_rate": 9.579300431241017e-05, "loss": 0.8458, "step": 3824 }, { "epoch": 0.2591638999932245, "grad_norm": 7.758030891418457, "learning_rate": 9.579163529331235e-05, "loss": 0.9454, "step": 3825 }, { "epoch": 0.25923165526119657, "grad_norm": 9.456972122192383, "learning_rate": 9.579026627421454e-05, "loss": 1.065, "step": 3826 }, { "epoch": 0.25929941052916866, "grad_norm": 8.046957969665527, "learning_rate": 9.578889725511672e-05, "loss": 0.7021, "step": 3827 }, { "epoch": 0.25936716579714075, "grad_norm": 9.415145874023438, "learning_rate": 9.57875282360189e-05, "loss": 1.2344, "step": 3828 }, { "epoch": 0.2594349210651128, "grad_norm": 6.525821685791016, "learning_rate": 9.578615921692108e-05, "loss": 0.8005, "step": 3829 }, { "epoch": 0.2595026763330849, "grad_norm": 6.948854923248291, "learning_rate": 9.578479019782326e-05, "loss": 0.8313, "step": 3830 }, { "epoch": 0.259570431601057, "grad_norm": 6.991540431976318, "learning_rate": 9.578342117872546e-05, "loss": 0.945, "step": 3831 }, { "epoch": 0.25963818686902906, "grad_norm": 7.0269551277160645, "learning_rate": 9.578205215962764e-05, "loss": 0.8927, "step": 3832 }, { "epoch": 0.25970594213700116, "grad_norm": 7.773914813995361, "learning_rate": 9.578068314052982e-05, "loss": 0.8469, "step": 3833 }, { "epoch": 0.25977369740497325, "grad_norm": 9.6503267288208, "learning_rate": 9.5779314121432e-05, "loss": 1.1192, "step": 3834 }, { "epoch": 0.25984145267294534, "grad_norm": 6.582554340362549, "learning_rate": 9.577794510233418e-05, "loss": 0.8996, "step": 3835 }, { "epoch": 0.25990920794091743, "grad_norm": 10.097637176513672, "learning_rate": 9.577657608323637e-05, "loss": 1.211, "step": 3836 }, { "epoch": 0.25997696320888947, "grad_norm": 8.63124942779541, "learning_rate": 9.577520706413855e-05, "loss": 1.0353, "step": 3837 }, { "epoch": 0.26004471847686156, "grad_norm": 8.282122611999512, "learning_rate": 9.577383804504073e-05, "loss": 0.9677, "step": 3838 }, { "epoch": 0.26011247374483365, "grad_norm": 8.298484802246094, "learning_rate": 9.577246902594291e-05, "loss": 0.909, "step": 3839 }, { "epoch": 0.26018022901280574, "grad_norm": 7.91752290725708, "learning_rate": 9.57711000068451e-05, "loss": 1.1106, "step": 3840 }, { "epoch": 0.26024798428077783, "grad_norm": 8.073543548583984, "learning_rate": 9.576973098774729e-05, "loss": 0.9777, "step": 3841 }, { "epoch": 0.2603157395487499, "grad_norm": 8.225390434265137, "learning_rate": 9.576836196864947e-05, "loss": 1.0302, "step": 3842 }, { "epoch": 0.260383494816722, "grad_norm": 7.086613655090332, "learning_rate": 9.576699294955165e-05, "loss": 1.0061, "step": 3843 }, { "epoch": 0.2604512500846941, "grad_norm": 6.9043965339660645, "learning_rate": 9.576562393045383e-05, "loss": 0.8707, "step": 3844 }, { "epoch": 0.26051900535266614, "grad_norm": 9.083130836486816, "learning_rate": 9.576425491135602e-05, "loss": 1.0567, "step": 3845 }, { "epoch": 0.26058676062063824, "grad_norm": 6.962080478668213, "learning_rate": 9.57628858922582e-05, "loss": 0.8592, "step": 3846 }, { "epoch": 0.2606545158886103, "grad_norm": 7.196011066436768, "learning_rate": 9.576151687316038e-05, "loss": 0.8318, "step": 3847 }, { "epoch": 0.2607222711565824, "grad_norm": 7.421074867248535, "learning_rate": 9.576014785406256e-05, "loss": 1.0162, "step": 3848 }, { "epoch": 0.2607900264245545, "grad_norm": 7.065299034118652, "learning_rate": 9.575877883496476e-05, "loss": 0.9999, "step": 3849 }, { "epoch": 0.2608577816925266, "grad_norm": 7.442328929901123, "learning_rate": 9.575740981586694e-05, "loss": 0.8118, "step": 3850 }, { "epoch": 0.2609255369604987, "grad_norm": 6.888897895812988, "learning_rate": 9.575604079676912e-05, "loss": 1.0343, "step": 3851 }, { "epoch": 0.2609932922284708, "grad_norm": 7.788427829742432, "learning_rate": 9.57546717776713e-05, "loss": 0.9931, "step": 3852 }, { "epoch": 0.2610610474964428, "grad_norm": 7.247363567352295, "learning_rate": 9.575330275857348e-05, "loss": 0.8403, "step": 3853 }, { "epoch": 0.2611288027644149, "grad_norm": 7.305066108703613, "learning_rate": 9.575193373947567e-05, "loss": 1.0222, "step": 3854 }, { "epoch": 0.261196558032387, "grad_norm": 5.8723249435424805, "learning_rate": 9.575056472037785e-05, "loss": 0.7461, "step": 3855 }, { "epoch": 0.2612643133003591, "grad_norm": 8.933609008789062, "learning_rate": 9.574919570128003e-05, "loss": 1.1537, "step": 3856 }, { "epoch": 0.2613320685683312, "grad_norm": 7.3203125, "learning_rate": 9.574782668218221e-05, "loss": 1.0277, "step": 3857 }, { "epoch": 0.2613998238363033, "grad_norm": 7.455322742462158, "learning_rate": 9.57464576630844e-05, "loss": 0.8136, "step": 3858 }, { "epoch": 0.26146757910427537, "grad_norm": 8.074299812316895, "learning_rate": 9.574508864398659e-05, "loss": 0.9597, "step": 3859 }, { "epoch": 0.26153533437224746, "grad_norm": 8.732856750488281, "learning_rate": 9.574371962488877e-05, "loss": 1.2009, "step": 3860 }, { "epoch": 0.2616030896402195, "grad_norm": 7.179652690887451, "learning_rate": 9.574235060579095e-05, "loss": 0.8365, "step": 3861 }, { "epoch": 0.2616708449081916, "grad_norm": 8.975394248962402, "learning_rate": 9.574098158669314e-05, "loss": 0.9099, "step": 3862 }, { "epoch": 0.2617386001761637, "grad_norm": 9.083860397338867, "learning_rate": 9.573961256759532e-05, "loss": 1.0307, "step": 3863 }, { "epoch": 0.2618063554441358, "grad_norm": 7.449617385864258, "learning_rate": 9.57382435484975e-05, "loss": 1.1442, "step": 3864 }, { "epoch": 0.26187411071210787, "grad_norm": 8.299210548400879, "learning_rate": 9.57368745293997e-05, "loss": 1.0119, "step": 3865 }, { "epoch": 0.26194186598007996, "grad_norm": 6.847742080688477, "learning_rate": 9.573550551030188e-05, "loss": 0.9058, "step": 3866 }, { "epoch": 0.26200962124805205, "grad_norm": 7.057496070861816, "learning_rate": 9.573413649120406e-05, "loss": 1.0022, "step": 3867 }, { "epoch": 0.26207737651602414, "grad_norm": 7.9798359870910645, "learning_rate": 9.573276747210625e-05, "loss": 0.9555, "step": 3868 }, { "epoch": 0.26214513178399623, "grad_norm": 8.119134902954102, "learning_rate": 9.573139845300843e-05, "loss": 1.1039, "step": 3869 }, { "epoch": 0.26221288705196827, "grad_norm": 6.998579502105713, "learning_rate": 9.573002943391061e-05, "loss": 1.0448, "step": 3870 }, { "epoch": 0.26228064231994036, "grad_norm": 6.59659481048584, "learning_rate": 9.572866041481279e-05, "loss": 0.8946, "step": 3871 }, { "epoch": 0.26234839758791245, "grad_norm": 8.110078811645508, "learning_rate": 9.572729139571498e-05, "loss": 0.8568, "step": 3872 }, { "epoch": 0.26241615285588454, "grad_norm": 9.192879676818848, "learning_rate": 9.572592237661717e-05, "loss": 0.9517, "step": 3873 }, { "epoch": 0.26248390812385664, "grad_norm": 7.378695964813232, "learning_rate": 9.572455335751935e-05, "loss": 1.0917, "step": 3874 }, { "epoch": 0.2625516633918287, "grad_norm": 10.016194343566895, "learning_rate": 9.572318433842153e-05, "loss": 0.9754, "step": 3875 }, { "epoch": 0.2626194186598008, "grad_norm": 7.392832279205322, "learning_rate": 9.57218153193237e-05, "loss": 0.8258, "step": 3876 }, { "epoch": 0.2626871739277729, "grad_norm": 6.59785795211792, "learning_rate": 9.57204463002259e-05, "loss": 0.9656, "step": 3877 }, { "epoch": 0.26275492919574495, "grad_norm": 7.799993991851807, "learning_rate": 9.571907728112808e-05, "loss": 0.9776, "step": 3878 }, { "epoch": 0.26282268446371704, "grad_norm": 9.275368690490723, "learning_rate": 9.571770826203026e-05, "loss": 0.8827, "step": 3879 }, { "epoch": 0.26289043973168913, "grad_norm": 9.413054466247559, "learning_rate": 9.571633924293244e-05, "loss": 1.0671, "step": 3880 }, { "epoch": 0.2629581949996612, "grad_norm": 8.446796417236328, "learning_rate": 9.571497022383464e-05, "loss": 0.8702, "step": 3881 }, { "epoch": 0.2630259502676333, "grad_norm": 12.962898254394531, "learning_rate": 9.571360120473682e-05, "loss": 0.9281, "step": 3882 }, { "epoch": 0.2630937055356054, "grad_norm": 7.7250213623046875, "learning_rate": 9.5712232185639e-05, "loss": 0.8908, "step": 3883 }, { "epoch": 0.2631614608035775, "grad_norm": 10.405988693237305, "learning_rate": 9.571086316654118e-05, "loss": 1.2047, "step": 3884 }, { "epoch": 0.2632292160715496, "grad_norm": 9.009016036987305, "learning_rate": 9.570949414744336e-05, "loss": 1.0619, "step": 3885 }, { "epoch": 0.2632969713395216, "grad_norm": 8.600632667541504, "learning_rate": 9.570812512834555e-05, "loss": 1.4186, "step": 3886 }, { "epoch": 0.2633647266074937, "grad_norm": 7.9944071769714355, "learning_rate": 9.570675610924773e-05, "loss": 1.1493, "step": 3887 }, { "epoch": 0.2634324818754658, "grad_norm": 7.7023539543151855, "learning_rate": 9.570538709014991e-05, "loss": 0.8716, "step": 3888 }, { "epoch": 0.2635002371434379, "grad_norm": 7.743750095367432, "learning_rate": 9.570401807105209e-05, "loss": 0.9091, "step": 3889 }, { "epoch": 0.26356799241141, "grad_norm": 7.080264091491699, "learning_rate": 9.570264905195427e-05, "loss": 0.8064, "step": 3890 }, { "epoch": 0.2636357476793821, "grad_norm": 10.498579025268555, "learning_rate": 9.570128003285647e-05, "loss": 1.2763, "step": 3891 }, { "epoch": 0.2637035029473542, "grad_norm": 8.932741165161133, "learning_rate": 9.569991101375865e-05, "loss": 1.0108, "step": 3892 }, { "epoch": 0.26377125821532627, "grad_norm": 7.670261859893799, "learning_rate": 9.569854199466083e-05, "loss": 0.9312, "step": 3893 }, { "epoch": 0.2638390134832983, "grad_norm": 6.713436126708984, "learning_rate": 9.5697172975563e-05, "loss": 0.9319, "step": 3894 }, { "epoch": 0.2639067687512704, "grad_norm": 6.828521728515625, "learning_rate": 9.56958039564652e-05, "loss": 0.908, "step": 3895 }, { "epoch": 0.2639745240192425, "grad_norm": 7.664526462554932, "learning_rate": 9.569443493736738e-05, "loss": 0.9204, "step": 3896 }, { "epoch": 0.2640422792872146, "grad_norm": 7.330194473266602, "learning_rate": 9.569306591826956e-05, "loss": 0.9971, "step": 3897 }, { "epoch": 0.26411003455518667, "grad_norm": 7.202576160430908, "learning_rate": 9.569169689917174e-05, "loss": 1.1183, "step": 3898 }, { "epoch": 0.26417778982315876, "grad_norm": 7.7107720375061035, "learning_rate": 9.569032788007392e-05, "loss": 1.0794, "step": 3899 }, { "epoch": 0.26424554509113085, "grad_norm": 8.634172439575195, "learning_rate": 9.568895886097612e-05, "loss": 0.9124, "step": 3900 }, { "epoch": 0.26431330035910294, "grad_norm": 7.264395236968994, "learning_rate": 9.56875898418783e-05, "loss": 1.0044, "step": 3901 }, { "epoch": 0.264381055627075, "grad_norm": 7.352424144744873, "learning_rate": 9.568622082278048e-05, "loss": 0.9213, "step": 3902 }, { "epoch": 0.26444881089504707, "grad_norm": 8.39152717590332, "learning_rate": 9.568485180368266e-05, "loss": 0.914, "step": 3903 }, { "epoch": 0.26451656616301916, "grad_norm": 6.5833611488342285, "learning_rate": 9.568348278458485e-05, "loss": 0.9906, "step": 3904 }, { "epoch": 0.26458432143099125, "grad_norm": 7.952385902404785, "learning_rate": 9.568211376548703e-05, "loss": 0.9624, "step": 3905 }, { "epoch": 0.26465207669896335, "grad_norm": 7.232090950012207, "learning_rate": 9.568074474638921e-05, "loss": 0.8852, "step": 3906 }, { "epoch": 0.26471983196693544, "grad_norm": 7.817921161651611, "learning_rate": 9.567937572729139e-05, "loss": 0.8474, "step": 3907 }, { "epoch": 0.26478758723490753, "grad_norm": 8.926132202148438, "learning_rate": 9.567800670819359e-05, "loss": 1.0302, "step": 3908 }, { "epoch": 0.2648553425028796, "grad_norm": 8.923449516296387, "learning_rate": 9.567663768909577e-05, "loss": 1.0837, "step": 3909 }, { "epoch": 0.26492309777085166, "grad_norm": 8.431096076965332, "learning_rate": 9.567526866999795e-05, "loss": 0.7992, "step": 3910 }, { "epoch": 0.26499085303882375, "grad_norm": 10.121541976928711, "learning_rate": 9.567389965090014e-05, "loss": 1.0082, "step": 3911 }, { "epoch": 0.26505860830679584, "grad_norm": 9.349747657775879, "learning_rate": 9.567253063180232e-05, "loss": 0.8257, "step": 3912 }, { "epoch": 0.26512636357476793, "grad_norm": 9.438392639160156, "learning_rate": 9.56711616127045e-05, "loss": 0.979, "step": 3913 }, { "epoch": 0.26519411884274, "grad_norm": 7.422990798950195, "learning_rate": 9.56697925936067e-05, "loss": 0.8456, "step": 3914 }, { "epoch": 0.2652618741107121, "grad_norm": 6.354233741760254, "learning_rate": 9.566842357450888e-05, "loss": 0.6639, "step": 3915 }, { "epoch": 0.2653296293786842, "grad_norm": 9.160786628723145, "learning_rate": 9.566705455541106e-05, "loss": 1.1126, "step": 3916 }, { "epoch": 0.2653973846466563, "grad_norm": 7.786096096038818, "learning_rate": 9.566568553631324e-05, "loss": 1.2703, "step": 3917 }, { "epoch": 0.2654651399146284, "grad_norm": 7.358225345611572, "learning_rate": 9.566431651721543e-05, "loss": 1.0625, "step": 3918 }, { "epoch": 0.2655328951826004, "grad_norm": 8.920319557189941, "learning_rate": 9.566294749811761e-05, "loss": 1.0037, "step": 3919 }, { "epoch": 0.2656006504505725, "grad_norm": 7.16439151763916, "learning_rate": 9.566157847901979e-05, "loss": 1.0486, "step": 3920 }, { "epoch": 0.2656684057185446, "grad_norm": 7.374850749969482, "learning_rate": 9.566020945992197e-05, "loss": 1.011, "step": 3921 }, { "epoch": 0.2657361609865167, "grad_norm": 5.965388298034668, "learning_rate": 9.565884044082415e-05, "loss": 0.9043, "step": 3922 }, { "epoch": 0.2658039162544888, "grad_norm": 7.1143879890441895, "learning_rate": 9.565747142172634e-05, "loss": 0.923, "step": 3923 }, { "epoch": 0.2658716715224609, "grad_norm": 9.05667495727539, "learning_rate": 9.565610240262853e-05, "loss": 1.1247, "step": 3924 }, { "epoch": 0.265939426790433, "grad_norm": 6.407328128814697, "learning_rate": 9.56547333835307e-05, "loss": 0.9028, "step": 3925 }, { "epoch": 0.26600718205840507, "grad_norm": 9.335012435913086, "learning_rate": 9.565336436443289e-05, "loss": 0.9761, "step": 3926 }, { "epoch": 0.2660749373263771, "grad_norm": 7.462203025817871, "learning_rate": 9.565199534533508e-05, "loss": 0.8812, "step": 3927 }, { "epoch": 0.2661426925943492, "grad_norm": 8.162378311157227, "learning_rate": 9.565062632623726e-05, "loss": 0.991, "step": 3928 }, { "epoch": 0.2662104478623213, "grad_norm": 8.835287094116211, "learning_rate": 9.564925730713944e-05, "loss": 1.3085, "step": 3929 }, { "epoch": 0.2662782031302934, "grad_norm": 9.219624519348145, "learning_rate": 9.564788828804162e-05, "loss": 0.989, "step": 3930 }, { "epoch": 0.26634595839826547, "grad_norm": 6.832587718963623, "learning_rate": 9.56465192689438e-05, "loss": 1.0186, "step": 3931 }, { "epoch": 0.26641371366623756, "grad_norm": 8.075157165527344, "learning_rate": 9.5645150249846e-05, "loss": 0.9734, "step": 3932 }, { "epoch": 0.26648146893420965, "grad_norm": 9.403346061706543, "learning_rate": 9.564378123074818e-05, "loss": 1.1875, "step": 3933 }, { "epoch": 0.26654922420218174, "grad_norm": 8.556446075439453, "learning_rate": 9.564241221165036e-05, "loss": 1.0932, "step": 3934 }, { "epoch": 0.2666169794701538, "grad_norm": 8.178442001342773, "learning_rate": 9.564104319255254e-05, "loss": 0.7654, "step": 3935 }, { "epoch": 0.2666847347381259, "grad_norm": 8.82776165008545, "learning_rate": 9.563967417345473e-05, "loss": 1.0518, "step": 3936 }, { "epoch": 0.26675249000609796, "grad_norm": 8.001256942749023, "learning_rate": 9.563830515435691e-05, "loss": 1.0109, "step": 3937 }, { "epoch": 0.26682024527407006, "grad_norm": 7.439608573913574, "learning_rate": 9.563693613525909e-05, "loss": 0.9365, "step": 3938 }, { "epoch": 0.26688800054204215, "grad_norm": 5.78077507019043, "learning_rate": 9.563556711616127e-05, "loss": 0.9356, "step": 3939 }, { "epoch": 0.26695575581001424, "grad_norm": 7.6134819984436035, "learning_rate": 9.563419809706345e-05, "loss": 1.0003, "step": 3940 }, { "epoch": 0.26702351107798633, "grad_norm": 8.467934608459473, "learning_rate": 9.563282907796565e-05, "loss": 0.8052, "step": 3941 }, { "epoch": 0.2670912663459584, "grad_norm": 8.88598346710205, "learning_rate": 9.563146005886783e-05, "loss": 1.0069, "step": 3942 }, { "epoch": 0.26715902161393046, "grad_norm": 7.627633094787598, "learning_rate": 9.563009103977e-05, "loss": 1.1463, "step": 3943 }, { "epoch": 0.26722677688190255, "grad_norm": 7.948824882507324, "learning_rate": 9.562872202067219e-05, "loss": 0.9451, "step": 3944 }, { "epoch": 0.26729453214987464, "grad_norm": 8.10439395904541, "learning_rate": 9.562735300157437e-05, "loss": 1.1375, "step": 3945 }, { "epoch": 0.26736228741784673, "grad_norm": 6.488743782043457, "learning_rate": 9.562598398247656e-05, "loss": 0.8608, "step": 3946 }, { "epoch": 0.2674300426858188, "grad_norm": 9.731819152832031, "learning_rate": 9.562461496337874e-05, "loss": 1.2055, "step": 3947 }, { "epoch": 0.2674977979537909, "grad_norm": 10.455330848693848, "learning_rate": 9.562324594428092e-05, "loss": 1.1095, "step": 3948 }, { "epoch": 0.267565553221763, "grad_norm": 6.7713212966918945, "learning_rate": 9.56218769251831e-05, "loss": 0.9047, "step": 3949 }, { "epoch": 0.2676333084897351, "grad_norm": 9.292582511901855, "learning_rate": 9.56205079060853e-05, "loss": 0.9404, "step": 3950 }, { "epoch": 0.26770106375770714, "grad_norm": 8.252067565917969, "learning_rate": 9.561913888698748e-05, "loss": 0.8826, "step": 3951 }, { "epoch": 0.26776881902567923, "grad_norm": 7.126963138580322, "learning_rate": 9.561776986788966e-05, "loss": 0.8445, "step": 3952 }, { "epoch": 0.2678365742936513, "grad_norm": 8.352923393249512, "learning_rate": 9.561640084879184e-05, "loss": 1.1081, "step": 3953 }, { "epoch": 0.2679043295616234, "grad_norm": 6.933292388916016, "learning_rate": 9.561503182969403e-05, "loss": 0.8228, "step": 3954 }, { "epoch": 0.2679720848295955, "grad_norm": 7.9115986824035645, "learning_rate": 9.561366281059621e-05, "loss": 1.0984, "step": 3955 }, { "epoch": 0.2680398400975676, "grad_norm": 6.988186359405518, "learning_rate": 9.561229379149839e-05, "loss": 0.9334, "step": 3956 }, { "epoch": 0.2681075953655397, "grad_norm": 6.6764631271362305, "learning_rate": 9.561092477240058e-05, "loss": 0.907, "step": 3957 }, { "epoch": 0.2681753506335118, "grad_norm": 9.355245590209961, "learning_rate": 9.560955575330277e-05, "loss": 0.9087, "step": 3958 }, { "epoch": 0.2682431059014838, "grad_norm": 8.177611351013184, "learning_rate": 9.560818673420495e-05, "loss": 1.0815, "step": 3959 }, { "epoch": 0.2683108611694559, "grad_norm": 9.085289001464844, "learning_rate": 9.560681771510714e-05, "loss": 1.1291, "step": 3960 }, { "epoch": 0.268378616437428, "grad_norm": 7.056759357452393, "learning_rate": 9.560544869600932e-05, "loss": 0.8623, "step": 3961 }, { "epoch": 0.2684463717054001, "grad_norm": 8.175825119018555, "learning_rate": 9.56040796769115e-05, "loss": 0.8755, "step": 3962 }, { "epoch": 0.2685141269733722, "grad_norm": 7.127376079559326, "learning_rate": 9.560271065781368e-05, "loss": 0.8877, "step": 3963 }, { "epoch": 0.26858188224134427, "grad_norm": 9.635464668273926, "learning_rate": 9.560134163871587e-05, "loss": 0.8675, "step": 3964 }, { "epoch": 0.26864963750931636, "grad_norm": 7.8138275146484375, "learning_rate": 9.559997261961805e-05, "loss": 0.9997, "step": 3965 }, { "epoch": 0.26871739277728846, "grad_norm": 7.242639064788818, "learning_rate": 9.559860360052024e-05, "loss": 0.7692, "step": 3966 }, { "epoch": 0.2687851480452605, "grad_norm": 8.106497764587402, "learning_rate": 9.559723458142242e-05, "loss": 0.9801, "step": 3967 }, { "epoch": 0.2688529033132326, "grad_norm": 7.468952178955078, "learning_rate": 9.55958655623246e-05, "loss": 0.939, "step": 3968 }, { "epoch": 0.2689206585812047, "grad_norm": 9.002805709838867, "learning_rate": 9.559449654322679e-05, "loss": 1.1655, "step": 3969 }, { "epoch": 0.26898841384917677, "grad_norm": 8.787810325622559, "learning_rate": 9.559312752412897e-05, "loss": 0.9663, "step": 3970 }, { "epoch": 0.26905616911714886, "grad_norm": 9.113668441772461, "learning_rate": 9.559175850503115e-05, "loss": 1.0041, "step": 3971 }, { "epoch": 0.26912392438512095, "grad_norm": 6.289670944213867, "learning_rate": 9.559038948593333e-05, "loss": 0.8714, "step": 3972 }, { "epoch": 0.26919167965309304, "grad_norm": 7.755144119262695, "learning_rate": 9.558902046683552e-05, "loss": 0.8606, "step": 3973 }, { "epoch": 0.26925943492106513, "grad_norm": 8.989197731018066, "learning_rate": 9.55876514477377e-05, "loss": 0.9956, "step": 3974 }, { "epoch": 0.2693271901890372, "grad_norm": 6.45689058303833, "learning_rate": 9.558628242863989e-05, "loss": 0.7416, "step": 3975 }, { "epoch": 0.26939494545700926, "grad_norm": 7.772951126098633, "learning_rate": 9.558491340954207e-05, "loss": 0.927, "step": 3976 }, { "epoch": 0.26946270072498135, "grad_norm": 7.347445487976074, "learning_rate": 9.558354439044425e-05, "loss": 0.9337, "step": 3977 }, { "epoch": 0.26953045599295344, "grad_norm": 8.20067310333252, "learning_rate": 9.558217537134644e-05, "loss": 0.8324, "step": 3978 }, { "epoch": 0.26959821126092554, "grad_norm": 10.375189781188965, "learning_rate": 9.558080635224862e-05, "loss": 1.1103, "step": 3979 }, { "epoch": 0.2696659665288976, "grad_norm": 8.187355041503906, "learning_rate": 9.55794373331508e-05, "loss": 0.8469, "step": 3980 }, { "epoch": 0.2697337217968697, "grad_norm": 6.336839199066162, "learning_rate": 9.557806831405298e-05, "loss": 1.037, "step": 3981 }, { "epoch": 0.2698014770648418, "grad_norm": 6.368093967437744, "learning_rate": 9.557669929495517e-05, "loss": 0.6851, "step": 3982 }, { "epoch": 0.2698692323328139, "grad_norm": 7.394474506378174, "learning_rate": 9.557533027585736e-05, "loss": 1.0921, "step": 3983 }, { "epoch": 0.26993698760078594, "grad_norm": 9.0152006149292, "learning_rate": 9.557396125675954e-05, "loss": 0.9765, "step": 3984 }, { "epoch": 0.27000474286875803, "grad_norm": 8.247949600219727, "learning_rate": 9.557259223766172e-05, "loss": 1.0941, "step": 3985 }, { "epoch": 0.2700724981367301, "grad_norm": 7.9166579246521, "learning_rate": 9.55712232185639e-05, "loss": 0.798, "step": 3986 }, { "epoch": 0.2701402534047022, "grad_norm": 8.501713752746582, "learning_rate": 9.556985419946609e-05, "loss": 0.923, "step": 3987 }, { "epoch": 0.2702080086726743, "grad_norm": 7.450741767883301, "learning_rate": 9.556848518036827e-05, "loss": 0.9485, "step": 3988 }, { "epoch": 0.2702757639406464, "grad_norm": 6.7950239181518555, "learning_rate": 9.556711616127045e-05, "loss": 0.9041, "step": 3989 }, { "epoch": 0.2703435192086185, "grad_norm": 8.021660804748535, "learning_rate": 9.556574714217263e-05, "loss": 0.9494, "step": 3990 }, { "epoch": 0.2704112744765906, "grad_norm": 8.049949645996094, "learning_rate": 9.556437812307482e-05, "loss": 0.9629, "step": 3991 }, { "epoch": 0.2704790297445626, "grad_norm": 9.56615161895752, "learning_rate": 9.5563009103977e-05, "loss": 0.8822, "step": 3992 }, { "epoch": 0.2705467850125347, "grad_norm": 7.12232780456543, "learning_rate": 9.556164008487919e-05, "loss": 1.1377, "step": 3993 }, { "epoch": 0.2706145402805068, "grad_norm": 7.224277496337891, "learning_rate": 9.556027106578137e-05, "loss": 1.1023, "step": 3994 }, { "epoch": 0.2706822955484789, "grad_norm": 8.0076322555542, "learning_rate": 9.555890204668355e-05, "loss": 1.0599, "step": 3995 }, { "epoch": 0.270750050816451, "grad_norm": 7.8958845138549805, "learning_rate": 9.555753302758574e-05, "loss": 1.0455, "step": 3996 }, { "epoch": 0.2708178060844231, "grad_norm": 7.802896976470947, "learning_rate": 9.555616400848792e-05, "loss": 1.1089, "step": 3997 }, { "epoch": 0.27088556135239517, "grad_norm": 8.122269630432129, "learning_rate": 9.55547949893901e-05, "loss": 1.0708, "step": 3998 }, { "epoch": 0.27095331662036726, "grad_norm": 6.3488383293151855, "learning_rate": 9.555342597029228e-05, "loss": 0.85, "step": 3999 }, { "epoch": 0.2710210718883393, "grad_norm": 10.236666679382324, "learning_rate": 9.555205695119448e-05, "loss": 0.962, "step": 4000 }, { "epoch": 0.2710888271563114, "grad_norm": 7.594062328338623, "learning_rate": 9.555068793209666e-05, "loss": 1.1641, "step": 4001 }, { "epoch": 0.2711565824242835, "grad_norm": 6.824306964874268, "learning_rate": 9.554931891299884e-05, "loss": 0.7914, "step": 4002 }, { "epoch": 0.27122433769225557, "grad_norm": 7.826432228088379, "learning_rate": 9.554794989390103e-05, "loss": 1.0062, "step": 4003 }, { "epoch": 0.27129209296022766, "grad_norm": 7.189459800720215, "learning_rate": 9.554658087480321e-05, "loss": 0.7324, "step": 4004 }, { "epoch": 0.27135984822819975, "grad_norm": 7.949024200439453, "learning_rate": 9.554521185570539e-05, "loss": 1.1033, "step": 4005 }, { "epoch": 0.27142760349617184, "grad_norm": 6.575378894805908, "learning_rate": 9.554384283660758e-05, "loss": 1.0048, "step": 4006 }, { "epoch": 0.27149535876414393, "grad_norm": 8.585273742675781, "learning_rate": 9.554247381750976e-05, "loss": 1.0338, "step": 4007 }, { "epoch": 0.27156311403211597, "grad_norm": 7.999851703643799, "learning_rate": 9.554110479841194e-05, "loss": 1.1788, "step": 4008 }, { "epoch": 0.27163086930008806, "grad_norm": 8.40134334564209, "learning_rate": 9.553973577931413e-05, "loss": 1.0986, "step": 4009 }, { "epoch": 0.27169862456806015, "grad_norm": 6.380734920501709, "learning_rate": 9.553836676021632e-05, "loss": 0.717, "step": 4010 }, { "epoch": 0.27176637983603225, "grad_norm": 7.673857688903809, "learning_rate": 9.55369977411185e-05, "loss": 1.1646, "step": 4011 }, { "epoch": 0.27183413510400434, "grad_norm": 10.897799491882324, "learning_rate": 9.553562872202068e-05, "loss": 1.2796, "step": 4012 }, { "epoch": 0.27190189037197643, "grad_norm": 9.101582527160645, "learning_rate": 9.553425970292286e-05, "loss": 0.8778, "step": 4013 }, { "epoch": 0.2719696456399485, "grad_norm": 6.685849666595459, "learning_rate": 9.553289068382505e-05, "loss": 1.2329, "step": 4014 }, { "epoch": 0.2720374009079206, "grad_norm": 6.580325603485107, "learning_rate": 9.553152166472723e-05, "loss": 0.81, "step": 4015 }, { "epoch": 0.27210515617589265, "grad_norm": 7.854914665222168, "learning_rate": 9.553015264562941e-05, "loss": 1.1152, "step": 4016 }, { "epoch": 0.27217291144386474, "grad_norm": 7.293428421020508, "learning_rate": 9.55287836265316e-05, "loss": 0.9238, "step": 4017 }, { "epoch": 0.27224066671183683, "grad_norm": 6.944539546966553, "learning_rate": 9.552741460743378e-05, "loss": 1.018, "step": 4018 }, { "epoch": 0.2723084219798089, "grad_norm": 7.550015926361084, "learning_rate": 9.552604558833597e-05, "loss": 1.0332, "step": 4019 }, { "epoch": 0.272376177247781, "grad_norm": 8.035116195678711, "learning_rate": 9.552467656923815e-05, "loss": 1.0752, "step": 4020 }, { "epoch": 0.2724439325157531, "grad_norm": 7.088611125946045, "learning_rate": 9.552330755014033e-05, "loss": 0.833, "step": 4021 }, { "epoch": 0.2725116877837252, "grad_norm": 7.615128040313721, "learning_rate": 9.552193853104251e-05, "loss": 1.0236, "step": 4022 }, { "epoch": 0.2725794430516973, "grad_norm": 7.366427421569824, "learning_rate": 9.552056951194469e-05, "loss": 0.892, "step": 4023 }, { "epoch": 0.2726471983196694, "grad_norm": 7.073375225067139, "learning_rate": 9.551920049284688e-05, "loss": 0.8082, "step": 4024 }, { "epoch": 0.2727149535876414, "grad_norm": 6.350280284881592, "learning_rate": 9.551783147374906e-05, "loss": 0.8154, "step": 4025 }, { "epoch": 0.2727827088556135, "grad_norm": 5.656667709350586, "learning_rate": 9.551646245465125e-05, "loss": 0.8558, "step": 4026 }, { "epoch": 0.2728504641235856, "grad_norm": 6.565401077270508, "learning_rate": 9.551509343555343e-05, "loss": 1.0397, "step": 4027 }, { "epoch": 0.2729182193915577, "grad_norm": 8.4253511428833, "learning_rate": 9.551372441645562e-05, "loss": 0.9872, "step": 4028 }, { "epoch": 0.2729859746595298, "grad_norm": 7.32992696762085, "learning_rate": 9.55123553973578e-05, "loss": 0.8262, "step": 4029 }, { "epoch": 0.2730537299275019, "grad_norm": 7.277110576629639, "learning_rate": 9.551098637825998e-05, "loss": 1.218, "step": 4030 }, { "epoch": 0.27312148519547397, "grad_norm": 9.594376564025879, "learning_rate": 9.550961735916216e-05, "loss": 0.9785, "step": 4031 }, { "epoch": 0.27318924046344606, "grad_norm": 9.339418411254883, "learning_rate": 9.550824834006434e-05, "loss": 0.911, "step": 4032 }, { "epoch": 0.2732569957314181, "grad_norm": 6.717375755310059, "learning_rate": 9.550687932096653e-05, "loss": 0.8084, "step": 4033 }, { "epoch": 0.2733247509993902, "grad_norm": 6.447595596313477, "learning_rate": 9.550551030186872e-05, "loss": 0.9882, "step": 4034 }, { "epoch": 0.2733925062673623, "grad_norm": 7.6800312995910645, "learning_rate": 9.55041412827709e-05, "loss": 0.7895, "step": 4035 }, { "epoch": 0.27346026153533437, "grad_norm": 7.270735263824463, "learning_rate": 9.550277226367308e-05, "loss": 0.83, "step": 4036 }, { "epoch": 0.27352801680330646, "grad_norm": 8.246411323547363, "learning_rate": 9.550140324457527e-05, "loss": 0.8406, "step": 4037 }, { "epoch": 0.27359577207127855, "grad_norm": 9.59301471710205, "learning_rate": 9.550003422547745e-05, "loss": 0.9433, "step": 4038 }, { "epoch": 0.27366352733925065, "grad_norm": 7.304765701293945, "learning_rate": 9.549866520637963e-05, "loss": 0.8551, "step": 4039 }, { "epoch": 0.27373128260722274, "grad_norm": 10.581608772277832, "learning_rate": 9.549729618728181e-05, "loss": 1.0734, "step": 4040 }, { "epoch": 0.2737990378751948, "grad_norm": 8.858924865722656, "learning_rate": 9.549592716818399e-05, "loss": 1.1424, "step": 4041 }, { "epoch": 0.27386679314316686, "grad_norm": 7.042451858520508, "learning_rate": 9.549455814908618e-05, "loss": 0.9556, "step": 4042 }, { "epoch": 0.27393454841113896, "grad_norm": 7.213229656219482, "learning_rate": 9.549318912998837e-05, "loss": 1.0575, "step": 4043 }, { "epoch": 0.27400230367911105, "grad_norm": 10.457990646362305, "learning_rate": 9.549182011089055e-05, "loss": 1.1868, "step": 4044 }, { "epoch": 0.27407005894708314, "grad_norm": 7.598734378814697, "learning_rate": 9.549045109179273e-05, "loss": 0.7216, "step": 4045 }, { "epoch": 0.27413781421505523, "grad_norm": 7.892279148101807, "learning_rate": 9.548908207269492e-05, "loss": 1.1437, "step": 4046 }, { "epoch": 0.2742055694830273, "grad_norm": 7.455031394958496, "learning_rate": 9.54877130535971e-05, "loss": 0.8402, "step": 4047 }, { "epoch": 0.2742733247509994, "grad_norm": 7.1315107345581055, "learning_rate": 9.548634403449928e-05, "loss": 0.653, "step": 4048 }, { "epoch": 0.27434108001897145, "grad_norm": 7.116184234619141, "learning_rate": 9.548497501540147e-05, "loss": 0.9, "step": 4049 }, { "epoch": 0.27440883528694354, "grad_norm": 7.833000659942627, "learning_rate": 9.548360599630365e-05, "loss": 0.9328, "step": 4050 }, { "epoch": 0.27447659055491563, "grad_norm": 7.393906116485596, "learning_rate": 9.548223697720584e-05, "loss": 1.0643, "step": 4051 }, { "epoch": 0.2745443458228877, "grad_norm": 8.286185264587402, "learning_rate": 9.548086795810803e-05, "loss": 1.0402, "step": 4052 }, { "epoch": 0.2746121010908598, "grad_norm": 7.88281774520874, "learning_rate": 9.547949893901021e-05, "loss": 1.0117, "step": 4053 }, { "epoch": 0.2746798563588319, "grad_norm": 9.544231414794922, "learning_rate": 9.547812991991239e-05, "loss": 1.1522, "step": 4054 }, { "epoch": 0.274747611626804, "grad_norm": 7.637237071990967, "learning_rate": 9.547676090081457e-05, "loss": 1.0805, "step": 4055 }, { "epoch": 0.2748153668947761, "grad_norm": 7.00446891784668, "learning_rate": 9.547539188171676e-05, "loss": 0.9941, "step": 4056 }, { "epoch": 0.27488312216274813, "grad_norm": 6.8821306228637695, "learning_rate": 9.547402286261894e-05, "loss": 0.997, "step": 4057 }, { "epoch": 0.2749508774307202, "grad_norm": 6.466810703277588, "learning_rate": 9.547265384352112e-05, "loss": 0.8872, "step": 4058 }, { "epoch": 0.2750186326986923, "grad_norm": 7.136430263519287, "learning_rate": 9.54712848244233e-05, "loss": 0.768, "step": 4059 }, { "epoch": 0.2750863879666644, "grad_norm": 7.117071151733398, "learning_rate": 9.54699158053255e-05, "loss": 0.965, "step": 4060 }, { "epoch": 0.2751541432346365, "grad_norm": 6.811083793640137, "learning_rate": 9.546854678622768e-05, "loss": 0.8448, "step": 4061 }, { "epoch": 0.2752218985026086, "grad_norm": 6.055437088012695, "learning_rate": 9.546717776712986e-05, "loss": 1.0217, "step": 4062 }, { "epoch": 0.2752896537705807, "grad_norm": 8.154548645019531, "learning_rate": 9.546580874803204e-05, "loss": 1.0642, "step": 4063 }, { "epoch": 0.27535740903855277, "grad_norm": 7.394543170928955, "learning_rate": 9.546443972893422e-05, "loss": 1.0716, "step": 4064 }, { "epoch": 0.2754251643065248, "grad_norm": 8.716939926147461, "learning_rate": 9.546307070983641e-05, "loss": 0.8457, "step": 4065 }, { "epoch": 0.2754929195744969, "grad_norm": 8.732163429260254, "learning_rate": 9.54617016907386e-05, "loss": 1.0349, "step": 4066 }, { "epoch": 0.275560674842469, "grad_norm": 8.67320442199707, "learning_rate": 9.546033267164077e-05, "loss": 1.1179, "step": 4067 }, { "epoch": 0.2756284301104411, "grad_norm": 8.010993003845215, "learning_rate": 9.545896365254296e-05, "loss": 1.0168, "step": 4068 }, { "epoch": 0.27569618537841317, "grad_norm": 6.747826099395752, "learning_rate": 9.545759463344515e-05, "loss": 0.7905, "step": 4069 }, { "epoch": 0.27576394064638526, "grad_norm": 8.352065086364746, "learning_rate": 9.545622561434733e-05, "loss": 1.1287, "step": 4070 }, { "epoch": 0.27583169591435736, "grad_norm": 8.072574615478516, "learning_rate": 9.545485659524951e-05, "loss": 1.1582, "step": 4071 }, { "epoch": 0.27589945118232945, "grad_norm": 8.851838111877441, "learning_rate": 9.545348757615169e-05, "loss": 1.0434, "step": 4072 }, { "epoch": 0.2759672064503015, "grad_norm": 9.833956718444824, "learning_rate": 9.545211855705387e-05, "loss": 1.1956, "step": 4073 }, { "epoch": 0.2760349617182736, "grad_norm": 8.043981552124023, "learning_rate": 9.545074953795606e-05, "loss": 1.0625, "step": 4074 }, { "epoch": 0.27610271698624567, "grad_norm": 7.420129776000977, "learning_rate": 9.544938051885824e-05, "loss": 0.8933, "step": 4075 }, { "epoch": 0.27617047225421776, "grad_norm": 7.062417030334473, "learning_rate": 9.544801149976042e-05, "loss": 0.9655, "step": 4076 }, { "epoch": 0.27623822752218985, "grad_norm": 8.733392715454102, "learning_rate": 9.54466424806626e-05, "loss": 0.9182, "step": 4077 }, { "epoch": 0.27630598279016194, "grad_norm": 8.931736946105957, "learning_rate": 9.544527346156479e-05, "loss": 1.0243, "step": 4078 }, { "epoch": 0.27637373805813403, "grad_norm": 7.487978935241699, "learning_rate": 9.544390444246698e-05, "loss": 0.9967, "step": 4079 }, { "epoch": 0.2764414933261061, "grad_norm": 8.259819030761719, "learning_rate": 9.544253542336916e-05, "loss": 0.942, "step": 4080 }, { "epoch": 0.2765092485940782, "grad_norm": 9.625347137451172, "learning_rate": 9.544116640427134e-05, "loss": 1.2148, "step": 4081 }, { "epoch": 0.27657700386205025, "grad_norm": 7.737034797668457, "learning_rate": 9.543979738517352e-05, "loss": 0.7905, "step": 4082 }, { "epoch": 0.27664475913002234, "grad_norm": 7.118561744689941, "learning_rate": 9.543842836607571e-05, "loss": 0.9883, "step": 4083 }, { "epoch": 0.27671251439799444, "grad_norm": 9.749618530273438, "learning_rate": 9.54370593469779e-05, "loss": 1.0249, "step": 4084 }, { "epoch": 0.2767802696659665, "grad_norm": 8.805608749389648, "learning_rate": 9.543569032788008e-05, "loss": 1.041, "step": 4085 }, { "epoch": 0.2768480249339386, "grad_norm": 9.240931510925293, "learning_rate": 9.543432130878226e-05, "loss": 1.0598, "step": 4086 }, { "epoch": 0.2769157802019107, "grad_norm": 6.621399879455566, "learning_rate": 9.543295228968444e-05, "loss": 0.8884, "step": 4087 }, { "epoch": 0.2769835354698828, "grad_norm": 6.869698524475098, "learning_rate": 9.543158327058663e-05, "loss": 0.8309, "step": 4088 }, { "epoch": 0.2770512907378549, "grad_norm": 7.135868549346924, "learning_rate": 9.543021425148881e-05, "loss": 0.9399, "step": 4089 }, { "epoch": 0.27711904600582693, "grad_norm": 7.172493934631348, "learning_rate": 9.542884523239099e-05, "loss": 0.9626, "step": 4090 }, { "epoch": 0.277186801273799, "grad_norm": 6.903214931488037, "learning_rate": 9.542747621329317e-05, "loss": 1.0047, "step": 4091 }, { "epoch": 0.2772545565417711, "grad_norm": 7.557178020477295, "learning_rate": 9.542610719419536e-05, "loss": 0.6978, "step": 4092 }, { "epoch": 0.2773223118097432, "grad_norm": 7.468019485473633, "learning_rate": 9.542473817509754e-05, "loss": 0.8109, "step": 4093 }, { "epoch": 0.2773900670777153, "grad_norm": 8.699142456054688, "learning_rate": 9.542336915599973e-05, "loss": 0.7031, "step": 4094 }, { "epoch": 0.2774578223456874, "grad_norm": 8.03862190246582, "learning_rate": 9.54220001369019e-05, "loss": 0.9824, "step": 4095 }, { "epoch": 0.2775255776136595, "grad_norm": 9.884957313537598, "learning_rate": 9.54206311178041e-05, "loss": 0.8122, "step": 4096 }, { "epoch": 0.27759333288163157, "grad_norm": 9.435370445251465, "learning_rate": 9.541926209870628e-05, "loss": 1.2642, "step": 4097 }, { "epoch": 0.2776610881496036, "grad_norm": 8.154888153076172, "learning_rate": 9.541789307960846e-05, "loss": 0.9814, "step": 4098 }, { "epoch": 0.2777288434175757, "grad_norm": 9.771589279174805, "learning_rate": 9.541652406051065e-05, "loss": 1.0959, "step": 4099 }, { "epoch": 0.2777965986855478, "grad_norm": 7.635507106781006, "learning_rate": 9.541515504141283e-05, "loss": 0.8268, "step": 4100 }, { "epoch": 0.2778643539535199, "grad_norm": 9.028327941894531, "learning_rate": 9.541378602231501e-05, "loss": 1.227, "step": 4101 }, { "epoch": 0.277932109221492, "grad_norm": 8.327515602111816, "learning_rate": 9.541241700321721e-05, "loss": 0.8644, "step": 4102 }, { "epoch": 0.27799986448946407, "grad_norm": 7.547940254211426, "learning_rate": 9.541104798411939e-05, "loss": 1.0169, "step": 4103 }, { "epoch": 0.27806761975743616, "grad_norm": 8.0435152053833, "learning_rate": 9.540967896502157e-05, "loss": 1.0883, "step": 4104 }, { "epoch": 0.27813537502540825, "grad_norm": 7.7741217613220215, "learning_rate": 9.540830994592375e-05, "loss": 0.9389, "step": 4105 }, { "epoch": 0.2782031302933803, "grad_norm": 8.059552192687988, "learning_rate": 9.540694092682594e-05, "loss": 1.0024, "step": 4106 }, { "epoch": 0.2782708855613524, "grad_norm": 9.13268756866455, "learning_rate": 9.540557190772812e-05, "loss": 1.0448, "step": 4107 }, { "epoch": 0.27833864082932447, "grad_norm": 7.901900768280029, "learning_rate": 9.54042028886303e-05, "loss": 0.8235, "step": 4108 }, { "epoch": 0.27840639609729656, "grad_norm": 8.727076530456543, "learning_rate": 9.540283386953248e-05, "loss": 1.1047, "step": 4109 }, { "epoch": 0.27847415136526865, "grad_norm": 7.1972880363464355, "learning_rate": 9.540146485043466e-05, "loss": 0.9456, "step": 4110 }, { "epoch": 0.27854190663324074, "grad_norm": 6.886523246765137, "learning_rate": 9.540009583133686e-05, "loss": 1.0041, "step": 4111 }, { "epoch": 0.27860966190121284, "grad_norm": 7.595452308654785, "learning_rate": 9.539872681223904e-05, "loss": 1.0038, "step": 4112 }, { "epoch": 0.2786774171691849, "grad_norm": 6.007086753845215, "learning_rate": 9.539735779314122e-05, "loss": 0.8042, "step": 4113 }, { "epoch": 0.27874517243715696, "grad_norm": 7.112758159637451, "learning_rate": 9.53959887740434e-05, "loss": 0.8252, "step": 4114 }, { "epoch": 0.27881292770512905, "grad_norm": 10.120092391967773, "learning_rate": 9.53946197549456e-05, "loss": 1.1589, "step": 4115 }, { "epoch": 0.27888068297310115, "grad_norm": 7.587961196899414, "learning_rate": 9.539325073584777e-05, "loss": 1.0699, "step": 4116 }, { "epoch": 0.27894843824107324, "grad_norm": 7.671876430511475, "learning_rate": 9.539188171674995e-05, "loss": 0.9699, "step": 4117 }, { "epoch": 0.27901619350904533, "grad_norm": 7.345922470092773, "learning_rate": 9.539051269765213e-05, "loss": 0.9957, "step": 4118 }, { "epoch": 0.2790839487770174, "grad_norm": 9.215903282165527, "learning_rate": 9.538914367855432e-05, "loss": 1.156, "step": 4119 }, { "epoch": 0.2791517040449895, "grad_norm": 10.056458473205566, "learning_rate": 9.538777465945651e-05, "loss": 1.1229, "step": 4120 }, { "epoch": 0.2792194593129616, "grad_norm": 7.9655938148498535, "learning_rate": 9.538640564035869e-05, "loss": 1.017, "step": 4121 }, { "epoch": 0.27928721458093364, "grad_norm": 8.49431324005127, "learning_rate": 9.538503662126087e-05, "loss": 1.0035, "step": 4122 }, { "epoch": 0.27935496984890573, "grad_norm": 8.746543884277344, "learning_rate": 9.538366760216305e-05, "loss": 1.0711, "step": 4123 }, { "epoch": 0.2794227251168778, "grad_norm": 7.75557279586792, "learning_rate": 9.538229858306524e-05, "loss": 0.6868, "step": 4124 }, { "epoch": 0.2794904803848499, "grad_norm": 7.1494622230529785, "learning_rate": 9.538092956396742e-05, "loss": 0.721, "step": 4125 }, { "epoch": 0.279558235652822, "grad_norm": 6.971895217895508, "learning_rate": 9.53795605448696e-05, "loss": 1.0669, "step": 4126 }, { "epoch": 0.2796259909207941, "grad_norm": 6.478157043457031, "learning_rate": 9.537819152577178e-05, "loss": 0.9348, "step": 4127 }, { "epoch": 0.2796937461887662, "grad_norm": 6.307050704956055, "learning_rate": 9.537682250667397e-05, "loss": 0.9622, "step": 4128 }, { "epoch": 0.2797615014567383, "grad_norm": 9.505130767822266, "learning_rate": 9.537545348757616e-05, "loss": 1.1675, "step": 4129 }, { "epoch": 0.2798292567247104, "grad_norm": 8.11099624633789, "learning_rate": 9.537408446847834e-05, "loss": 0.853, "step": 4130 }, { "epoch": 0.2798970119926824, "grad_norm": 6.637272834777832, "learning_rate": 9.537271544938052e-05, "loss": 1.0993, "step": 4131 }, { "epoch": 0.2799647672606545, "grad_norm": 7.888055801391602, "learning_rate": 9.53713464302827e-05, "loss": 0.9582, "step": 4132 }, { "epoch": 0.2800325225286266, "grad_norm": 6.289199352264404, "learning_rate": 9.536997741118488e-05, "loss": 0.9027, "step": 4133 }, { "epoch": 0.2801002777965987, "grad_norm": 7.488378047943115, "learning_rate": 9.536860839208707e-05, "loss": 1.0697, "step": 4134 }, { "epoch": 0.2801680330645708, "grad_norm": 8.341411590576172, "learning_rate": 9.536723937298925e-05, "loss": 1.0705, "step": 4135 }, { "epoch": 0.28023578833254287, "grad_norm": 7.55519437789917, "learning_rate": 9.536587035389144e-05, "loss": 0.7589, "step": 4136 }, { "epoch": 0.28030354360051496, "grad_norm": 6.104217052459717, "learning_rate": 9.536450133479362e-05, "loss": 0.9161, "step": 4137 }, { "epoch": 0.28037129886848705, "grad_norm": 6.97914457321167, "learning_rate": 9.536313231569581e-05, "loss": 1.0342, "step": 4138 }, { "epoch": 0.2804390541364591, "grad_norm": 8.791030883789062, "learning_rate": 9.536176329659799e-05, "loss": 0.863, "step": 4139 }, { "epoch": 0.2805068094044312, "grad_norm": 6.868939399719238, "learning_rate": 9.536039427750017e-05, "loss": 0.9005, "step": 4140 }, { "epoch": 0.28057456467240327, "grad_norm": 9.854182243347168, "learning_rate": 9.535902525840235e-05, "loss": 0.773, "step": 4141 }, { "epoch": 0.28064231994037536, "grad_norm": 7.64580774307251, "learning_rate": 9.535765623930454e-05, "loss": 0.9999, "step": 4142 }, { "epoch": 0.28071007520834745, "grad_norm": 6.032886028289795, "learning_rate": 9.535628722020672e-05, "loss": 0.8916, "step": 4143 }, { "epoch": 0.28077783047631955, "grad_norm": 7.0441060066223145, "learning_rate": 9.53549182011089e-05, "loss": 0.9787, "step": 4144 }, { "epoch": 0.28084558574429164, "grad_norm": 6.4428629875183105, "learning_rate": 9.53535491820111e-05, "loss": 0.6749, "step": 4145 }, { "epoch": 0.28091334101226373, "grad_norm": 8.476522445678711, "learning_rate": 9.535218016291328e-05, "loss": 0.9408, "step": 4146 }, { "epoch": 0.28098109628023576, "grad_norm": 8.720208168029785, "learning_rate": 9.535081114381546e-05, "loss": 0.8571, "step": 4147 }, { "epoch": 0.28104885154820786, "grad_norm": 7.846646785736084, "learning_rate": 9.534944212471765e-05, "loss": 0.9193, "step": 4148 }, { "epoch": 0.28111660681617995, "grad_norm": 7.848026275634766, "learning_rate": 9.534807310561983e-05, "loss": 1.1975, "step": 4149 }, { "epoch": 0.28118436208415204, "grad_norm": 9.543595314025879, "learning_rate": 9.534670408652201e-05, "loss": 0.8769, "step": 4150 }, { "epoch": 0.28125211735212413, "grad_norm": 7.252998352050781, "learning_rate": 9.53453350674242e-05, "loss": 1.029, "step": 4151 }, { "epoch": 0.2813198726200962, "grad_norm": 6.271702289581299, "learning_rate": 9.534396604832639e-05, "loss": 0.8992, "step": 4152 }, { "epoch": 0.2813876278880683, "grad_norm": 9.071548461914062, "learning_rate": 9.534259702922857e-05, "loss": 1.0518, "step": 4153 }, { "epoch": 0.2814553831560404, "grad_norm": 7.452267169952393, "learning_rate": 9.534122801013075e-05, "loss": 0.9573, "step": 4154 }, { "epoch": 0.28152313842401244, "grad_norm": 8.72459602355957, "learning_rate": 9.533985899103293e-05, "loss": 1.09, "step": 4155 }, { "epoch": 0.28159089369198453, "grad_norm": 8.032079696655273, "learning_rate": 9.533848997193511e-05, "loss": 0.9595, "step": 4156 }, { "epoch": 0.2816586489599566, "grad_norm": 7.622939109802246, "learning_rate": 9.53371209528373e-05, "loss": 0.8722, "step": 4157 }, { "epoch": 0.2817264042279287, "grad_norm": 8.330899238586426, "learning_rate": 9.533575193373948e-05, "loss": 0.985, "step": 4158 }, { "epoch": 0.2817941594959008, "grad_norm": 9.378608703613281, "learning_rate": 9.533438291464166e-05, "loss": 0.8458, "step": 4159 }, { "epoch": 0.2818619147638729, "grad_norm": 7.305957317352295, "learning_rate": 9.533301389554384e-05, "loss": 0.9653, "step": 4160 }, { "epoch": 0.281929670031845, "grad_norm": 8.557588577270508, "learning_rate": 9.533164487644604e-05, "loss": 0.8742, "step": 4161 }, { "epoch": 0.2819974252998171, "grad_norm": 8.77814769744873, "learning_rate": 9.533027585734822e-05, "loss": 1.1782, "step": 4162 }, { "epoch": 0.2820651805677891, "grad_norm": 7.212672233581543, "learning_rate": 9.53289068382504e-05, "loss": 0.6333, "step": 4163 }, { "epoch": 0.2821329358357612, "grad_norm": 10.004176139831543, "learning_rate": 9.532753781915258e-05, "loss": 0.9544, "step": 4164 }, { "epoch": 0.2822006911037333, "grad_norm": 7.577065467834473, "learning_rate": 9.532616880005476e-05, "loss": 1.0708, "step": 4165 }, { "epoch": 0.2822684463717054, "grad_norm": 7.181521415710449, "learning_rate": 9.532479978095695e-05, "loss": 0.829, "step": 4166 }, { "epoch": 0.2823362016396775, "grad_norm": 5.7575249671936035, "learning_rate": 9.532343076185913e-05, "loss": 0.9353, "step": 4167 }, { "epoch": 0.2824039569076496, "grad_norm": 7.974564075469971, "learning_rate": 9.532206174276131e-05, "loss": 0.9976, "step": 4168 }, { "epoch": 0.28247171217562167, "grad_norm": 8.870126724243164, "learning_rate": 9.53206927236635e-05, "loss": 0.9704, "step": 4169 }, { "epoch": 0.28253946744359376, "grad_norm": 6.596248149871826, "learning_rate": 9.531932370456569e-05, "loss": 0.8763, "step": 4170 }, { "epoch": 0.2826072227115658, "grad_norm": 7.725964546203613, "learning_rate": 9.531795468546787e-05, "loss": 1.4009, "step": 4171 }, { "epoch": 0.2826749779795379, "grad_norm": 6.741204261779785, "learning_rate": 9.531658566637005e-05, "loss": 0.9103, "step": 4172 }, { "epoch": 0.28274273324751, "grad_norm": 9.002605438232422, "learning_rate": 9.531521664727223e-05, "loss": 1.1446, "step": 4173 }, { "epoch": 0.2828104885154821, "grad_norm": 7.860680103302002, "learning_rate": 9.531384762817441e-05, "loss": 1.081, "step": 4174 }, { "epoch": 0.28287824378345416, "grad_norm": 7.939533233642578, "learning_rate": 9.53124786090766e-05, "loss": 0.8876, "step": 4175 }, { "epoch": 0.28294599905142626, "grad_norm": 7.104232311248779, "learning_rate": 9.531110958997878e-05, "loss": 0.8687, "step": 4176 }, { "epoch": 0.28301375431939835, "grad_norm": 9.095148086547852, "learning_rate": 9.530974057088096e-05, "loss": 0.873, "step": 4177 }, { "epoch": 0.28308150958737044, "grad_norm": 6.869518280029297, "learning_rate": 9.530837155178314e-05, "loss": 0.8372, "step": 4178 }, { "epoch": 0.2831492648553425, "grad_norm": 7.7245049476623535, "learning_rate": 9.530700253268534e-05, "loss": 1.0267, "step": 4179 }, { "epoch": 0.28321702012331457, "grad_norm": 8.298705101013184, "learning_rate": 9.530563351358752e-05, "loss": 0.8919, "step": 4180 }, { "epoch": 0.28328477539128666, "grad_norm": 6.176532745361328, "learning_rate": 9.53042644944897e-05, "loss": 0.8603, "step": 4181 }, { "epoch": 0.28335253065925875, "grad_norm": 6.592353820800781, "learning_rate": 9.530289547539188e-05, "loss": 0.9415, "step": 4182 }, { "epoch": 0.28342028592723084, "grad_norm": 7.962296962738037, "learning_rate": 9.530152645629406e-05, "loss": 0.8764, "step": 4183 }, { "epoch": 0.28348804119520293, "grad_norm": 6.484033584594727, "learning_rate": 9.530015743719625e-05, "loss": 0.9577, "step": 4184 }, { "epoch": 0.283555796463175, "grad_norm": 8.156810760498047, "learning_rate": 9.529878841809843e-05, "loss": 1.1628, "step": 4185 }, { "epoch": 0.2836235517311471, "grad_norm": 6.916367053985596, "learning_rate": 9.529741939900061e-05, "loss": 1.1518, "step": 4186 }, { "epoch": 0.2836913069991192, "grad_norm": 7.70388650894165, "learning_rate": 9.52960503799028e-05, "loss": 0.9309, "step": 4187 }, { "epoch": 0.28375906226709124, "grad_norm": 8.770346641540527, "learning_rate": 9.529468136080499e-05, "loss": 0.891, "step": 4188 }, { "epoch": 0.28382681753506334, "grad_norm": 7.100319862365723, "learning_rate": 9.529331234170717e-05, "loss": 1.0302, "step": 4189 }, { "epoch": 0.2838945728030354, "grad_norm": 7.376253128051758, "learning_rate": 9.529194332260935e-05, "loss": 0.7906, "step": 4190 }, { "epoch": 0.2839623280710075, "grad_norm": 10.125496864318848, "learning_rate": 9.529057430351154e-05, "loss": 0.9829, "step": 4191 }, { "epoch": 0.2840300833389796, "grad_norm": 7.877635955810547, "learning_rate": 9.528920528441372e-05, "loss": 0.8803, "step": 4192 }, { "epoch": 0.2840978386069517, "grad_norm": 8.096887588500977, "learning_rate": 9.52878362653159e-05, "loss": 0.9342, "step": 4193 }, { "epoch": 0.2841655938749238, "grad_norm": 7.934850215911865, "learning_rate": 9.52864672462181e-05, "loss": 1.1699, "step": 4194 }, { "epoch": 0.2842333491428959, "grad_norm": 8.237794876098633, "learning_rate": 9.528509822712028e-05, "loss": 1.1718, "step": 4195 }, { "epoch": 0.2843011044108679, "grad_norm": 7.528624057769775, "learning_rate": 9.528372920802246e-05, "loss": 0.8229, "step": 4196 }, { "epoch": 0.28436885967884, "grad_norm": 7.210242748260498, "learning_rate": 9.528236018892464e-05, "loss": 0.8875, "step": 4197 }, { "epoch": 0.2844366149468121, "grad_norm": 7.630309581756592, "learning_rate": 9.528099116982683e-05, "loss": 0.8811, "step": 4198 }, { "epoch": 0.2845043702147842, "grad_norm": 11.624275207519531, "learning_rate": 9.527962215072901e-05, "loss": 1.0702, "step": 4199 }, { "epoch": 0.2845721254827563, "grad_norm": 7.52834415435791, "learning_rate": 9.52782531316312e-05, "loss": 0.9066, "step": 4200 }, { "epoch": 0.2846398807507284, "grad_norm": 8.418506622314453, "learning_rate": 9.527688411253337e-05, "loss": 0.8147, "step": 4201 }, { "epoch": 0.28470763601870047, "grad_norm": 9.67719554901123, "learning_rate": 9.527551509343557e-05, "loss": 0.976, "step": 4202 }, { "epoch": 0.28477539128667256, "grad_norm": 7.107409954071045, "learning_rate": 9.527414607433775e-05, "loss": 0.695, "step": 4203 }, { "epoch": 0.2848431465546446, "grad_norm": 7.840113162994385, "learning_rate": 9.527277705523993e-05, "loss": 0.9629, "step": 4204 }, { "epoch": 0.2849109018226167, "grad_norm": 8.170151710510254, "learning_rate": 9.527140803614211e-05, "loss": 0.8715, "step": 4205 }, { "epoch": 0.2849786570905888, "grad_norm": 7.882331848144531, "learning_rate": 9.527003901704429e-05, "loss": 1.0364, "step": 4206 }, { "epoch": 0.2850464123585609, "grad_norm": 10.077646255493164, "learning_rate": 9.526866999794648e-05, "loss": 1.2749, "step": 4207 }, { "epoch": 0.28511416762653297, "grad_norm": 8.749690055847168, "learning_rate": 9.526730097884866e-05, "loss": 0.9815, "step": 4208 }, { "epoch": 0.28518192289450506, "grad_norm": 6.793465614318848, "learning_rate": 9.526593195975084e-05, "loss": 1.0165, "step": 4209 }, { "epoch": 0.28524967816247715, "grad_norm": 7.186471939086914, "learning_rate": 9.526456294065302e-05, "loss": 0.911, "step": 4210 }, { "epoch": 0.28531743343044924, "grad_norm": 8.116944313049316, "learning_rate": 9.52631939215552e-05, "loss": 1.051, "step": 4211 }, { "epoch": 0.2853851886984213, "grad_norm": 5.9162750244140625, "learning_rate": 9.52618249024574e-05, "loss": 0.8165, "step": 4212 }, { "epoch": 0.28545294396639337, "grad_norm": 7.20265531539917, "learning_rate": 9.526045588335958e-05, "loss": 1.0693, "step": 4213 }, { "epoch": 0.28552069923436546, "grad_norm": 7.380153179168701, "learning_rate": 9.525908686426176e-05, "loss": 0.9611, "step": 4214 }, { "epoch": 0.28558845450233755, "grad_norm": 7.211367607116699, "learning_rate": 9.525771784516394e-05, "loss": 0.7595, "step": 4215 }, { "epoch": 0.28565620977030964, "grad_norm": 6.8030104637146, "learning_rate": 9.525634882606613e-05, "loss": 0.8281, "step": 4216 }, { "epoch": 0.28572396503828174, "grad_norm": 7.774519443511963, "learning_rate": 9.525497980696831e-05, "loss": 0.963, "step": 4217 }, { "epoch": 0.2857917203062538, "grad_norm": 8.210673332214355, "learning_rate": 9.52536107878705e-05, "loss": 1.3076, "step": 4218 }, { "epoch": 0.2858594755742259, "grad_norm": 7.3878583908081055, "learning_rate": 9.525224176877267e-05, "loss": 0.9546, "step": 4219 }, { "epoch": 0.28592723084219795, "grad_norm": 10.107827186584473, "learning_rate": 9.525087274967485e-05, "loss": 1.3039, "step": 4220 }, { "epoch": 0.28599498611017005, "grad_norm": 7.974700450897217, "learning_rate": 9.524950373057705e-05, "loss": 0.9651, "step": 4221 }, { "epoch": 0.28606274137814214, "grad_norm": 8.606413841247559, "learning_rate": 9.524813471147923e-05, "loss": 0.9459, "step": 4222 }, { "epoch": 0.28613049664611423, "grad_norm": 6.7952799797058105, "learning_rate": 9.524676569238141e-05, "loss": 0.8818, "step": 4223 }, { "epoch": 0.2861982519140863, "grad_norm": 7.2026214599609375, "learning_rate": 9.524539667328359e-05, "loss": 0.8578, "step": 4224 }, { "epoch": 0.2862660071820584, "grad_norm": 9.135619163513184, "learning_rate": 9.524402765418578e-05, "loss": 1.1594, "step": 4225 }, { "epoch": 0.2863337624500305, "grad_norm": 7.704957962036133, "learning_rate": 9.524265863508796e-05, "loss": 0.9715, "step": 4226 }, { "epoch": 0.2864015177180026, "grad_norm": 6.570467948913574, "learning_rate": 9.524128961599014e-05, "loss": 0.8916, "step": 4227 }, { "epoch": 0.28646927298597463, "grad_norm": 8.359966278076172, "learning_rate": 9.523992059689232e-05, "loss": 1.0021, "step": 4228 }, { "epoch": 0.2865370282539467, "grad_norm": 7.011820316314697, "learning_rate": 9.52385515777945e-05, "loss": 0.8718, "step": 4229 }, { "epoch": 0.2866047835219188, "grad_norm": 6.853650093078613, "learning_rate": 9.52371825586967e-05, "loss": 0.8041, "step": 4230 }, { "epoch": 0.2866725387898909, "grad_norm": 6.95853328704834, "learning_rate": 9.523581353959888e-05, "loss": 0.8763, "step": 4231 }, { "epoch": 0.286740294057863, "grad_norm": 9.217144012451172, "learning_rate": 9.523444452050106e-05, "loss": 1.046, "step": 4232 }, { "epoch": 0.2868080493258351, "grad_norm": 8.338934898376465, "learning_rate": 9.523307550140324e-05, "loss": 0.9205, "step": 4233 }, { "epoch": 0.2868758045938072, "grad_norm": 9.220541954040527, "learning_rate": 9.523170648230543e-05, "loss": 1.1468, "step": 4234 }, { "epoch": 0.2869435598617793, "grad_norm": 7.641387462615967, "learning_rate": 9.523033746320761e-05, "loss": 1.118, "step": 4235 }, { "epoch": 0.28701131512975137, "grad_norm": 7.52994441986084, "learning_rate": 9.52289684441098e-05, "loss": 0.851, "step": 4236 }, { "epoch": 0.2870790703977234, "grad_norm": 8.712708473205566, "learning_rate": 9.522759942501199e-05, "loss": 1.1741, "step": 4237 }, { "epoch": 0.2871468256656955, "grad_norm": 7.8549723625183105, "learning_rate": 9.522623040591417e-05, "loss": 0.9744, "step": 4238 }, { "epoch": 0.2872145809336676, "grad_norm": 6.8177642822265625, "learning_rate": 9.522486138681635e-05, "loss": 1.0432, "step": 4239 }, { "epoch": 0.2872823362016397, "grad_norm": 10.273691177368164, "learning_rate": 9.522349236771854e-05, "loss": 1.0777, "step": 4240 }, { "epoch": 0.28735009146961177, "grad_norm": 8.261405944824219, "learning_rate": 9.522212334862072e-05, "loss": 0.9415, "step": 4241 }, { "epoch": 0.28741784673758386, "grad_norm": 10.067824363708496, "learning_rate": 9.52207543295229e-05, "loss": 1.1091, "step": 4242 }, { "epoch": 0.28748560200555595, "grad_norm": 9.639914512634277, "learning_rate": 9.521938531042508e-05, "loss": 0.9534, "step": 4243 }, { "epoch": 0.28755335727352804, "grad_norm": 6.993269920349121, "learning_rate": 9.521801629132728e-05, "loss": 1.1193, "step": 4244 }, { "epoch": 0.2876211125415001, "grad_norm": 7.779829502105713, "learning_rate": 9.521664727222946e-05, "loss": 0.8744, "step": 4245 }, { "epoch": 0.28768886780947217, "grad_norm": 7.070007801055908, "learning_rate": 9.521527825313164e-05, "loss": 0.8609, "step": 4246 }, { "epoch": 0.28775662307744426, "grad_norm": 6.091519355773926, "learning_rate": 9.521390923403382e-05, "loss": 0.8018, "step": 4247 }, { "epoch": 0.28782437834541635, "grad_norm": 6.76396369934082, "learning_rate": 9.521254021493601e-05, "loss": 0.8509, "step": 4248 }, { "epoch": 0.28789213361338845, "grad_norm": 8.478080749511719, "learning_rate": 9.521117119583819e-05, "loss": 1.0355, "step": 4249 }, { "epoch": 0.28795988888136054, "grad_norm": 7.830933094024658, "learning_rate": 9.520980217674037e-05, "loss": 0.8699, "step": 4250 }, { "epoch": 0.28802764414933263, "grad_norm": 8.354218482971191, "learning_rate": 9.520843315764255e-05, "loss": 0.9228, "step": 4251 }, { "epoch": 0.2880953994173047, "grad_norm": 8.529300689697266, "learning_rate": 9.520706413854473e-05, "loss": 1.1215, "step": 4252 }, { "epoch": 0.28816315468527676, "grad_norm": 6.831529140472412, "learning_rate": 9.520569511944693e-05, "loss": 1.0665, "step": 4253 }, { "epoch": 0.28823090995324885, "grad_norm": 7.824577808380127, "learning_rate": 9.520432610034911e-05, "loss": 0.8864, "step": 4254 }, { "epoch": 0.28829866522122094, "grad_norm": 7.795472621917725, "learning_rate": 9.520295708125129e-05, "loss": 1.0775, "step": 4255 }, { "epoch": 0.28836642048919303, "grad_norm": 7.210735321044922, "learning_rate": 9.520158806215347e-05, "loss": 1.0244, "step": 4256 }, { "epoch": 0.2884341757571651, "grad_norm": 7.224759101867676, "learning_rate": 9.520021904305566e-05, "loss": 1.0527, "step": 4257 }, { "epoch": 0.2885019310251372, "grad_norm": 7.608676910400391, "learning_rate": 9.519885002395784e-05, "loss": 0.7978, "step": 4258 }, { "epoch": 0.2885696862931093, "grad_norm": 7.426436424255371, "learning_rate": 9.519748100486002e-05, "loss": 0.859, "step": 4259 }, { "epoch": 0.2886374415610814, "grad_norm": 7.585330963134766, "learning_rate": 9.51961119857622e-05, "loss": 1.1091, "step": 4260 }, { "epoch": 0.28870519682905343, "grad_norm": 6.930294990539551, "learning_rate": 9.519474296666438e-05, "loss": 0.8862, "step": 4261 }, { "epoch": 0.2887729520970255, "grad_norm": 8.092456817626953, "learning_rate": 9.519337394756658e-05, "loss": 1.051, "step": 4262 }, { "epoch": 0.2888407073649976, "grad_norm": 7.897385597229004, "learning_rate": 9.519200492846876e-05, "loss": 1.0589, "step": 4263 }, { "epoch": 0.2889084626329697, "grad_norm": 7.344932556152344, "learning_rate": 9.519063590937094e-05, "loss": 1.0319, "step": 4264 }, { "epoch": 0.2889762179009418, "grad_norm": 8.676694869995117, "learning_rate": 9.518926689027312e-05, "loss": 0.9119, "step": 4265 }, { "epoch": 0.2890439731689139, "grad_norm": 7.291116714477539, "learning_rate": 9.51878978711753e-05, "loss": 0.8249, "step": 4266 }, { "epoch": 0.289111728436886, "grad_norm": 7.467698574066162, "learning_rate": 9.51865288520775e-05, "loss": 0.9807, "step": 4267 }, { "epoch": 0.2891794837048581, "grad_norm": 7.744437217712402, "learning_rate": 9.518515983297967e-05, "loss": 0.8792, "step": 4268 }, { "epoch": 0.2892472389728301, "grad_norm": 6.8492560386657715, "learning_rate": 9.518379081388185e-05, "loss": 0.9216, "step": 4269 }, { "epoch": 0.2893149942408022, "grad_norm": 7.247424602508545, "learning_rate": 9.518242179478403e-05, "loss": 1.0865, "step": 4270 }, { "epoch": 0.2893827495087743, "grad_norm": 8.125252723693848, "learning_rate": 9.518105277568623e-05, "loss": 1.0298, "step": 4271 }, { "epoch": 0.2894505047767464, "grad_norm": 6.578275680541992, "learning_rate": 9.517968375658841e-05, "loss": 0.9353, "step": 4272 }, { "epoch": 0.2895182600447185, "grad_norm": 8.099616050720215, "learning_rate": 9.517831473749059e-05, "loss": 1.038, "step": 4273 }, { "epoch": 0.28958601531269057, "grad_norm": 7.529900074005127, "learning_rate": 9.517694571839277e-05, "loss": 0.9753, "step": 4274 }, { "epoch": 0.28965377058066266, "grad_norm": 7.521812915802002, "learning_rate": 9.517557669929495e-05, "loss": 0.972, "step": 4275 }, { "epoch": 0.28972152584863475, "grad_norm": 7.780272483825684, "learning_rate": 9.517420768019714e-05, "loss": 0.9927, "step": 4276 }, { "epoch": 0.2897892811166068, "grad_norm": 7.982085704803467, "learning_rate": 9.517283866109932e-05, "loss": 0.9462, "step": 4277 }, { "epoch": 0.2898570363845789, "grad_norm": 7.325984477996826, "learning_rate": 9.51714696420015e-05, "loss": 1.0814, "step": 4278 }, { "epoch": 0.289924791652551, "grad_norm": 7.833248138427734, "learning_rate": 9.517010062290368e-05, "loss": 1.1736, "step": 4279 }, { "epoch": 0.28999254692052306, "grad_norm": 6.983424186706543, "learning_rate": 9.516873160380588e-05, "loss": 0.9662, "step": 4280 }, { "epoch": 0.29006030218849516, "grad_norm": 7.954293251037598, "learning_rate": 9.516736258470806e-05, "loss": 0.8449, "step": 4281 }, { "epoch": 0.29012805745646725, "grad_norm": 7.47749137878418, "learning_rate": 9.516599356561024e-05, "loss": 0.7869, "step": 4282 }, { "epoch": 0.29019581272443934, "grad_norm": 8.70733642578125, "learning_rate": 9.516462454651243e-05, "loss": 0.915, "step": 4283 }, { "epoch": 0.29026356799241143, "grad_norm": 7.003527641296387, "learning_rate": 9.516325552741461e-05, "loss": 0.8543, "step": 4284 }, { "epoch": 0.2903313232603835, "grad_norm": 8.230785369873047, "learning_rate": 9.51618865083168e-05, "loss": 1.0022, "step": 4285 }, { "epoch": 0.29039907852835556, "grad_norm": 6.20754337310791, "learning_rate": 9.516051748921899e-05, "loss": 0.8967, "step": 4286 }, { "epoch": 0.29046683379632765, "grad_norm": 7.0158538818359375, "learning_rate": 9.515914847012117e-05, "loss": 0.9162, "step": 4287 }, { "epoch": 0.29053458906429974, "grad_norm": 6.215134143829346, "learning_rate": 9.515777945102335e-05, "loss": 0.7362, "step": 4288 }, { "epoch": 0.29060234433227183, "grad_norm": 7.366815090179443, "learning_rate": 9.515641043192553e-05, "loss": 1.0174, "step": 4289 }, { "epoch": 0.2906700996002439, "grad_norm": 7.514114856719971, "learning_rate": 9.515504141282772e-05, "loss": 0.9979, "step": 4290 }, { "epoch": 0.290737854868216, "grad_norm": 6.899611949920654, "learning_rate": 9.51536723937299e-05, "loss": 0.6909, "step": 4291 }, { "epoch": 0.2908056101361881, "grad_norm": 7.254974842071533, "learning_rate": 9.515230337463208e-05, "loss": 1.1744, "step": 4292 }, { "epoch": 0.2908733654041602, "grad_norm": 7.3820719718933105, "learning_rate": 9.515093435553426e-05, "loss": 1.0514, "step": 4293 }, { "epoch": 0.29094112067213224, "grad_norm": 7.556882858276367, "learning_rate": 9.514956533643646e-05, "loss": 1.0515, "step": 4294 }, { "epoch": 0.29100887594010433, "grad_norm": 7.479793548583984, "learning_rate": 9.514819631733864e-05, "loss": 0.9634, "step": 4295 }, { "epoch": 0.2910766312080764, "grad_norm": 6.750072956085205, "learning_rate": 9.514682729824082e-05, "loss": 0.8536, "step": 4296 }, { "epoch": 0.2911443864760485, "grad_norm": 6.5008931159973145, "learning_rate": 9.5145458279143e-05, "loss": 0.8414, "step": 4297 }, { "epoch": 0.2912121417440206, "grad_norm": 5.633193016052246, "learning_rate": 9.514408926004518e-05, "loss": 0.9628, "step": 4298 }, { "epoch": 0.2912798970119927, "grad_norm": 7.052159309387207, "learning_rate": 9.514272024094737e-05, "loss": 0.9612, "step": 4299 }, { "epoch": 0.2913476522799648, "grad_norm": 6.944911003112793, "learning_rate": 9.514135122184955e-05, "loss": 0.8237, "step": 4300 }, { "epoch": 0.2914154075479369, "grad_norm": 6.553284645080566, "learning_rate": 9.513998220275173e-05, "loss": 1.103, "step": 4301 }, { "epoch": 0.2914831628159089, "grad_norm": 6.414531707763672, "learning_rate": 9.513861318365391e-05, "loss": 1.163, "step": 4302 }, { "epoch": 0.291550918083881, "grad_norm": 9.551664352416992, "learning_rate": 9.513724416455611e-05, "loss": 1.0118, "step": 4303 }, { "epoch": 0.2916186733518531, "grad_norm": 8.332045555114746, "learning_rate": 9.513587514545829e-05, "loss": 1.0938, "step": 4304 }, { "epoch": 0.2916864286198252, "grad_norm": 7.512511253356934, "learning_rate": 9.513450612636047e-05, "loss": 0.9499, "step": 4305 }, { "epoch": 0.2917541838877973, "grad_norm": 7.233335494995117, "learning_rate": 9.513313710726265e-05, "loss": 0.9298, "step": 4306 }, { "epoch": 0.29182193915576937, "grad_norm": 6.711233615875244, "learning_rate": 9.513176808816483e-05, "loss": 0.8409, "step": 4307 }, { "epoch": 0.29188969442374146, "grad_norm": 6.433267593383789, "learning_rate": 9.513039906906702e-05, "loss": 0.7654, "step": 4308 }, { "epoch": 0.29195744969171356, "grad_norm": 7.049487113952637, "learning_rate": 9.51290300499692e-05, "loss": 0.8363, "step": 4309 }, { "epoch": 0.2920252049596856, "grad_norm": 7.030927658081055, "learning_rate": 9.512766103087138e-05, "loss": 0.8536, "step": 4310 }, { "epoch": 0.2920929602276577, "grad_norm": 7.723023414611816, "learning_rate": 9.512629201177356e-05, "loss": 0.8738, "step": 4311 }, { "epoch": 0.2921607154956298, "grad_norm": 6.293034553527832, "learning_rate": 9.512492299267576e-05, "loss": 0.8247, "step": 4312 }, { "epoch": 0.29222847076360187, "grad_norm": 7.551876544952393, "learning_rate": 9.512355397357794e-05, "loss": 0.9144, "step": 4313 }, { "epoch": 0.29229622603157396, "grad_norm": 7.78217077255249, "learning_rate": 9.512218495448012e-05, "loss": 0.911, "step": 4314 }, { "epoch": 0.29236398129954605, "grad_norm": 6.316245079040527, "learning_rate": 9.51208159353823e-05, "loss": 0.7426, "step": 4315 }, { "epoch": 0.29243173656751814, "grad_norm": 6.730735778808594, "learning_rate": 9.511944691628448e-05, "loss": 0.8938, "step": 4316 }, { "epoch": 0.29249949183549023, "grad_norm": 8.128830909729004, "learning_rate": 9.511807789718667e-05, "loss": 1.129, "step": 4317 }, { "epoch": 0.29256724710346227, "grad_norm": 8.023150444030762, "learning_rate": 9.511670887808885e-05, "loss": 1.1377, "step": 4318 }, { "epoch": 0.29263500237143436, "grad_norm": 9.075041770935059, "learning_rate": 9.511533985899103e-05, "loss": 1.0358, "step": 4319 }, { "epoch": 0.29270275763940645, "grad_norm": 9.679409980773926, "learning_rate": 9.511397083989321e-05, "loss": 0.9415, "step": 4320 }, { "epoch": 0.29277051290737854, "grad_norm": 7.316888809204102, "learning_rate": 9.51126018207954e-05, "loss": 0.9422, "step": 4321 }, { "epoch": 0.29283826817535064, "grad_norm": 8.017754554748535, "learning_rate": 9.511123280169759e-05, "loss": 0.9685, "step": 4322 }, { "epoch": 0.2929060234433227, "grad_norm": 7.990202903747559, "learning_rate": 9.510986378259977e-05, "loss": 0.9028, "step": 4323 }, { "epoch": 0.2929737787112948, "grad_norm": 7.538297653198242, "learning_rate": 9.510849476350195e-05, "loss": 0.8146, "step": 4324 }, { "epoch": 0.2930415339792669, "grad_norm": 6.794483184814453, "learning_rate": 9.510712574440413e-05, "loss": 0.8859, "step": 4325 }, { "epoch": 0.29310928924723895, "grad_norm": 9.820608139038086, "learning_rate": 9.510575672530632e-05, "loss": 1.0486, "step": 4326 }, { "epoch": 0.29317704451521104, "grad_norm": 8.107736587524414, "learning_rate": 9.51043877062085e-05, "loss": 0.9978, "step": 4327 }, { "epoch": 0.29324479978318313, "grad_norm": 8.129117965698242, "learning_rate": 9.510301868711068e-05, "loss": 0.9966, "step": 4328 }, { "epoch": 0.2933125550511552, "grad_norm": 7.15314245223999, "learning_rate": 9.510164966801288e-05, "loss": 0.8078, "step": 4329 }, { "epoch": 0.2933803103191273, "grad_norm": 7.951611518859863, "learning_rate": 9.510028064891506e-05, "loss": 0.8566, "step": 4330 }, { "epoch": 0.2934480655870994, "grad_norm": 8.350584030151367, "learning_rate": 9.509891162981724e-05, "loss": 1.1757, "step": 4331 }, { "epoch": 0.2935158208550715, "grad_norm": 7.556168079376221, "learning_rate": 9.509754261071943e-05, "loss": 1.175, "step": 4332 }, { "epoch": 0.2935835761230436, "grad_norm": 8.102100372314453, "learning_rate": 9.509617359162161e-05, "loss": 0.8948, "step": 4333 }, { "epoch": 0.2936513313910156, "grad_norm": 6.762083530426025, "learning_rate": 9.509480457252379e-05, "loss": 0.9597, "step": 4334 }, { "epoch": 0.2937190866589877, "grad_norm": 7.2654948234558105, "learning_rate": 9.509343555342599e-05, "loss": 0.9352, "step": 4335 }, { "epoch": 0.2937868419269598, "grad_norm": 8.114389419555664, "learning_rate": 9.509206653432817e-05, "loss": 0.9758, "step": 4336 }, { "epoch": 0.2938545971949319, "grad_norm": 6.972601890563965, "learning_rate": 9.509069751523035e-05, "loss": 0.9322, "step": 4337 }, { "epoch": 0.293922352462904, "grad_norm": 7.261653423309326, "learning_rate": 9.508932849613253e-05, "loss": 0.8714, "step": 4338 }, { "epoch": 0.2939901077308761, "grad_norm": 8.368372917175293, "learning_rate": 9.508795947703471e-05, "loss": 1.1235, "step": 4339 }, { "epoch": 0.2940578629988482, "grad_norm": 8.295226097106934, "learning_rate": 9.50865904579369e-05, "loss": 1.0647, "step": 4340 }, { "epoch": 0.29412561826682027, "grad_norm": 8.720281600952148, "learning_rate": 9.508522143883908e-05, "loss": 1.0779, "step": 4341 }, { "epoch": 0.29419337353479236, "grad_norm": 6.721040725708008, "learning_rate": 9.508385241974126e-05, "loss": 1.0219, "step": 4342 }, { "epoch": 0.2942611288027644, "grad_norm": 6.298686504364014, "learning_rate": 9.508248340064344e-05, "loss": 0.8011, "step": 4343 }, { "epoch": 0.2943288840707365, "grad_norm": 6.915674686431885, "learning_rate": 9.508111438154562e-05, "loss": 0.9702, "step": 4344 }, { "epoch": 0.2943966393387086, "grad_norm": 6.907165050506592, "learning_rate": 9.507974536244782e-05, "loss": 0.861, "step": 4345 }, { "epoch": 0.29446439460668067, "grad_norm": 7.540262699127197, "learning_rate": 9.507837634335e-05, "loss": 0.9426, "step": 4346 }, { "epoch": 0.29453214987465276, "grad_norm": 7.145787715911865, "learning_rate": 9.507700732425218e-05, "loss": 0.911, "step": 4347 }, { "epoch": 0.29459990514262485, "grad_norm": 9.937151908874512, "learning_rate": 9.507563830515436e-05, "loss": 0.9825, "step": 4348 }, { "epoch": 0.29466766041059694, "grad_norm": 7.125603199005127, "learning_rate": 9.507426928605655e-05, "loss": 0.855, "step": 4349 }, { "epoch": 0.29473541567856903, "grad_norm": 7.014605522155762, "learning_rate": 9.507290026695873e-05, "loss": 0.9282, "step": 4350 }, { "epoch": 0.29480317094654107, "grad_norm": 8.753725051879883, "learning_rate": 9.507153124786091e-05, "loss": 1.1047, "step": 4351 }, { "epoch": 0.29487092621451316, "grad_norm": 7.921840190887451, "learning_rate": 9.50701622287631e-05, "loss": 0.7857, "step": 4352 }, { "epoch": 0.29493868148248525, "grad_norm": 6.762521266937256, "learning_rate": 9.506879320966527e-05, "loss": 0.8537, "step": 4353 }, { "epoch": 0.29500643675045735, "grad_norm": 8.441102981567383, "learning_rate": 9.506742419056747e-05, "loss": 1.1356, "step": 4354 }, { "epoch": 0.29507419201842944, "grad_norm": 6.748636245727539, "learning_rate": 9.506605517146965e-05, "loss": 1.0962, "step": 4355 }, { "epoch": 0.29514194728640153, "grad_norm": 8.168306350708008, "learning_rate": 9.506468615237183e-05, "loss": 0.9628, "step": 4356 }, { "epoch": 0.2952097025543736, "grad_norm": 6.545300483703613, "learning_rate": 9.506331713327401e-05, "loss": 1.0041, "step": 4357 }, { "epoch": 0.2952774578223457, "grad_norm": 6.958950996398926, "learning_rate": 9.50619481141762e-05, "loss": 0.9742, "step": 4358 }, { "epoch": 0.29534521309031775, "grad_norm": 6.817789554595947, "learning_rate": 9.506057909507838e-05, "loss": 0.8281, "step": 4359 }, { "epoch": 0.29541296835828984, "grad_norm": 8.255404472351074, "learning_rate": 9.505921007598056e-05, "loss": 0.9171, "step": 4360 }, { "epoch": 0.29548072362626193, "grad_norm": 7.552668571472168, "learning_rate": 9.505784105688274e-05, "loss": 0.8665, "step": 4361 }, { "epoch": 0.295548478894234, "grad_norm": 6.980686664581299, "learning_rate": 9.505647203778492e-05, "loss": 0.9461, "step": 4362 }, { "epoch": 0.2956162341622061, "grad_norm": 6.765429973602295, "learning_rate": 9.505510301868712e-05, "loss": 0.7946, "step": 4363 }, { "epoch": 0.2956839894301782, "grad_norm": 8.403508186340332, "learning_rate": 9.50537339995893e-05, "loss": 0.9015, "step": 4364 }, { "epoch": 0.2957517446981503, "grad_norm": 7.822012901306152, "learning_rate": 9.505236498049148e-05, "loss": 1.0313, "step": 4365 }, { "epoch": 0.2958194999661224, "grad_norm": 9.843314170837402, "learning_rate": 9.505099596139366e-05, "loss": 1.11, "step": 4366 }, { "epoch": 0.2958872552340944, "grad_norm": 8.155379295349121, "learning_rate": 9.504962694229584e-05, "loss": 1.1565, "step": 4367 }, { "epoch": 0.2959550105020665, "grad_norm": 8.446005821228027, "learning_rate": 9.504825792319803e-05, "loss": 1.0588, "step": 4368 }, { "epoch": 0.2960227657700386, "grad_norm": 7.244038105010986, "learning_rate": 9.504688890410021e-05, "loss": 0.8648, "step": 4369 }, { "epoch": 0.2960905210380107, "grad_norm": 8.324630737304688, "learning_rate": 9.50455198850024e-05, "loss": 0.9862, "step": 4370 }, { "epoch": 0.2961582763059828, "grad_norm": 7.308585166931152, "learning_rate": 9.504415086590457e-05, "loss": 1.082, "step": 4371 }, { "epoch": 0.2962260315739549, "grad_norm": 8.17287826538086, "learning_rate": 9.504278184680677e-05, "loss": 1.0359, "step": 4372 }, { "epoch": 0.296293786841927, "grad_norm": 6.8774943351745605, "learning_rate": 9.504141282770895e-05, "loss": 0.904, "step": 4373 }, { "epoch": 0.29636154210989907, "grad_norm": 8.641596794128418, "learning_rate": 9.504004380861113e-05, "loss": 0.9822, "step": 4374 }, { "epoch": 0.2964292973778711, "grad_norm": 7.289584159851074, "learning_rate": 9.503867478951331e-05, "loss": 1.0274, "step": 4375 }, { "epoch": 0.2964970526458432, "grad_norm": 9.820521354675293, "learning_rate": 9.50373057704155e-05, "loss": 1.0895, "step": 4376 }, { "epoch": 0.2965648079138153, "grad_norm": 8.6587553024292, "learning_rate": 9.503593675131768e-05, "loss": 0.7513, "step": 4377 }, { "epoch": 0.2966325631817874, "grad_norm": 7.444339752197266, "learning_rate": 9.503456773221986e-05, "loss": 1.0892, "step": 4378 }, { "epoch": 0.29670031844975947, "grad_norm": 6.4494948387146, "learning_rate": 9.503319871312206e-05, "loss": 0.9465, "step": 4379 }, { "epoch": 0.29676807371773156, "grad_norm": 7.744002819061279, "learning_rate": 9.503182969402424e-05, "loss": 1.0613, "step": 4380 }, { "epoch": 0.29683582898570365, "grad_norm": 8.152511596679688, "learning_rate": 9.503046067492642e-05, "loss": 1.0115, "step": 4381 }, { "epoch": 0.29690358425367575, "grad_norm": 6.076781749725342, "learning_rate": 9.502909165582861e-05, "loss": 0.8255, "step": 4382 }, { "epoch": 0.2969713395216478, "grad_norm": 6.787397861480713, "learning_rate": 9.502772263673079e-05, "loss": 1.1132, "step": 4383 }, { "epoch": 0.2970390947896199, "grad_norm": 8.592107772827148, "learning_rate": 9.502635361763297e-05, "loss": 0.8715, "step": 4384 }, { "epoch": 0.29710685005759196, "grad_norm": 7.620471954345703, "learning_rate": 9.502498459853515e-05, "loss": 0.9521, "step": 4385 }, { "epoch": 0.29717460532556406, "grad_norm": 8.026017189025879, "learning_rate": 9.502361557943735e-05, "loss": 0.8575, "step": 4386 }, { "epoch": 0.29724236059353615, "grad_norm": 8.82768726348877, "learning_rate": 9.502224656033953e-05, "loss": 1.0642, "step": 4387 }, { "epoch": 0.29731011586150824, "grad_norm": 7.179310321807861, "learning_rate": 9.502087754124171e-05, "loss": 0.9161, "step": 4388 }, { "epoch": 0.29737787112948033, "grad_norm": 7.4363861083984375, "learning_rate": 9.501950852214389e-05, "loss": 1.0825, "step": 4389 }, { "epoch": 0.2974456263974524, "grad_norm": 6.218450546264648, "learning_rate": 9.501813950304608e-05, "loss": 0.8461, "step": 4390 }, { "epoch": 0.2975133816654245, "grad_norm": 6.540037631988525, "learning_rate": 9.501677048394826e-05, "loss": 0.9099, "step": 4391 }, { "epoch": 0.29758113693339655, "grad_norm": 9.617361068725586, "learning_rate": 9.501540146485044e-05, "loss": 0.9205, "step": 4392 }, { "epoch": 0.29764889220136864, "grad_norm": 8.519254684448242, "learning_rate": 9.501403244575262e-05, "loss": 0.9774, "step": 4393 }, { "epoch": 0.29771664746934073, "grad_norm": 8.101237297058105, "learning_rate": 9.50126634266548e-05, "loss": 0.9019, "step": 4394 }, { "epoch": 0.2977844027373128, "grad_norm": 6.703762531280518, "learning_rate": 9.5011294407557e-05, "loss": 0.9486, "step": 4395 }, { "epoch": 0.2978521580052849, "grad_norm": 8.651348114013672, "learning_rate": 9.500992538845918e-05, "loss": 0.9189, "step": 4396 }, { "epoch": 0.297919913273257, "grad_norm": 7.783169269561768, "learning_rate": 9.500855636936136e-05, "loss": 0.933, "step": 4397 }, { "epoch": 0.2979876685412291, "grad_norm": 8.011998176574707, "learning_rate": 9.500718735026354e-05, "loss": 1.0642, "step": 4398 }, { "epoch": 0.2980554238092012, "grad_norm": 10.782837867736816, "learning_rate": 9.500581833116572e-05, "loss": 1.1406, "step": 4399 }, { "epoch": 0.29812317907717323, "grad_norm": 7.699460029602051, "learning_rate": 9.500444931206791e-05, "loss": 0.9627, "step": 4400 }, { "epoch": 0.2981909343451453, "grad_norm": 8.144030570983887, "learning_rate": 9.500308029297009e-05, "loss": 0.8278, "step": 4401 }, { "epoch": 0.2982586896131174, "grad_norm": 7.882628917694092, "learning_rate": 9.500171127387227e-05, "loss": 0.7924, "step": 4402 }, { "epoch": 0.2983264448810895, "grad_norm": 6.887333393096924, "learning_rate": 9.500034225477445e-05, "loss": 0.8385, "step": 4403 }, { "epoch": 0.2983942001490616, "grad_norm": 8.446837425231934, "learning_rate": 9.499897323567665e-05, "loss": 1.0352, "step": 4404 }, { "epoch": 0.2984619554170337, "grad_norm": 8.389452934265137, "learning_rate": 9.499760421657883e-05, "loss": 0.8053, "step": 4405 }, { "epoch": 0.2985297106850058, "grad_norm": 7.09201717376709, "learning_rate": 9.499623519748101e-05, "loss": 0.999, "step": 4406 }, { "epoch": 0.29859746595297787, "grad_norm": 9.670032501220703, "learning_rate": 9.499486617838319e-05, "loss": 0.8558, "step": 4407 }, { "epoch": 0.2986652212209499, "grad_norm": 9.050987243652344, "learning_rate": 9.499349715928537e-05, "loss": 1.0356, "step": 4408 }, { "epoch": 0.298732976488922, "grad_norm": 6.220524787902832, "learning_rate": 9.499212814018756e-05, "loss": 0.8278, "step": 4409 }, { "epoch": 0.2988007317568941, "grad_norm": 7.464169979095459, "learning_rate": 9.499075912108974e-05, "loss": 0.963, "step": 4410 }, { "epoch": 0.2988684870248662, "grad_norm": 8.479538917541504, "learning_rate": 9.498939010199192e-05, "loss": 1.1108, "step": 4411 }, { "epoch": 0.2989362422928383, "grad_norm": 8.23353385925293, "learning_rate": 9.49880210828941e-05, "loss": 1.0156, "step": 4412 }, { "epoch": 0.29900399756081036, "grad_norm": 7.2161335945129395, "learning_rate": 9.49866520637963e-05, "loss": 0.9964, "step": 4413 }, { "epoch": 0.29907175282878246, "grad_norm": 7.893895149230957, "learning_rate": 9.498528304469848e-05, "loss": 0.9353, "step": 4414 }, { "epoch": 0.29913950809675455, "grad_norm": 8.448126792907715, "learning_rate": 9.498391402560066e-05, "loss": 0.9488, "step": 4415 }, { "epoch": 0.2992072633647266, "grad_norm": 8.160601615905762, "learning_rate": 9.498254500650284e-05, "loss": 0.8934, "step": 4416 }, { "epoch": 0.2992750186326987, "grad_norm": 7.6815266609191895, "learning_rate": 9.498117598740502e-05, "loss": 0.8938, "step": 4417 }, { "epoch": 0.29934277390067077, "grad_norm": 6.795403480529785, "learning_rate": 9.497980696830721e-05, "loss": 0.7074, "step": 4418 }, { "epoch": 0.29941052916864286, "grad_norm": 7.904134750366211, "learning_rate": 9.497843794920939e-05, "loss": 0.8747, "step": 4419 }, { "epoch": 0.29947828443661495, "grad_norm": 7.776915073394775, "learning_rate": 9.497706893011157e-05, "loss": 1.1141, "step": 4420 }, { "epoch": 0.29954603970458704, "grad_norm": 7.425099849700928, "learning_rate": 9.497569991101375e-05, "loss": 0.8826, "step": 4421 }, { "epoch": 0.29961379497255913, "grad_norm": 10.530119895935059, "learning_rate": 9.497433089191595e-05, "loss": 1.0893, "step": 4422 }, { "epoch": 0.2996815502405312, "grad_norm": 7.118913650512695, "learning_rate": 9.497296187281813e-05, "loss": 0.9179, "step": 4423 }, { "epoch": 0.29974930550850326, "grad_norm": 6.6407060623168945, "learning_rate": 9.497159285372031e-05, "loss": 0.8763, "step": 4424 }, { "epoch": 0.29981706077647535, "grad_norm": 6.975287437438965, "learning_rate": 9.49702238346225e-05, "loss": 0.9192, "step": 4425 }, { "epoch": 0.29988481604444744, "grad_norm": 11.773819923400879, "learning_rate": 9.496885481552468e-05, "loss": 1.3173, "step": 4426 }, { "epoch": 0.29995257131241954, "grad_norm": 13.157849311828613, "learning_rate": 9.496748579642686e-05, "loss": 0.8773, "step": 4427 }, { "epoch": 0.3000203265803916, "grad_norm": 7.372555732727051, "learning_rate": 9.496611677732906e-05, "loss": 1.0028, "step": 4428 }, { "epoch": 0.3000880818483637, "grad_norm": 8.804738998413086, "learning_rate": 9.496474775823124e-05, "loss": 1.0684, "step": 4429 }, { "epoch": 0.3001558371163358, "grad_norm": 7.723897933959961, "learning_rate": 9.496337873913342e-05, "loss": 0.9113, "step": 4430 }, { "epoch": 0.3002235923843079, "grad_norm": 10.261975288391113, "learning_rate": 9.49620097200356e-05, "loss": 1.242, "step": 4431 }, { "epoch": 0.30029134765227994, "grad_norm": 7.396425247192383, "learning_rate": 9.496064070093779e-05, "loss": 1.1045, "step": 4432 }, { "epoch": 0.30035910292025203, "grad_norm": 9.235404014587402, "learning_rate": 9.495927168183997e-05, "loss": 1.1058, "step": 4433 }, { "epoch": 0.3004268581882241, "grad_norm": 6.465355396270752, "learning_rate": 9.495790266274215e-05, "loss": 0.9163, "step": 4434 }, { "epoch": 0.3004946134561962, "grad_norm": 6.401477813720703, "learning_rate": 9.495653364364433e-05, "loss": 0.8224, "step": 4435 }, { "epoch": 0.3005623687241683, "grad_norm": 6.599462985992432, "learning_rate": 9.495516462454653e-05, "loss": 0.8104, "step": 4436 }, { "epoch": 0.3006301239921404, "grad_norm": 8.553694725036621, "learning_rate": 9.495379560544871e-05, "loss": 1.1467, "step": 4437 }, { "epoch": 0.3006978792601125, "grad_norm": 6.262206554412842, "learning_rate": 9.495242658635089e-05, "loss": 0.7903, "step": 4438 }, { "epoch": 0.3007656345280846, "grad_norm": 7.284942150115967, "learning_rate": 9.495105756725307e-05, "loss": 1.0585, "step": 4439 }, { "epoch": 0.3008333897960566, "grad_norm": 8.880256652832031, "learning_rate": 9.494968854815525e-05, "loss": 1.0553, "step": 4440 }, { "epoch": 0.3009011450640287, "grad_norm": 9.358749389648438, "learning_rate": 9.494831952905744e-05, "loss": 1.1874, "step": 4441 }, { "epoch": 0.3009689003320008, "grad_norm": 6.7088470458984375, "learning_rate": 9.494695050995962e-05, "loss": 0.8641, "step": 4442 }, { "epoch": 0.3010366555999729, "grad_norm": 9.420478820800781, "learning_rate": 9.49455814908618e-05, "loss": 0.8829, "step": 4443 }, { "epoch": 0.301104410867945, "grad_norm": 8.877934455871582, "learning_rate": 9.494421247176398e-05, "loss": 1.1206, "step": 4444 }, { "epoch": 0.3011721661359171, "grad_norm": 9.361932754516602, "learning_rate": 9.494284345266618e-05, "loss": 0.7993, "step": 4445 }, { "epoch": 0.30123992140388917, "grad_norm": 7.4104790687561035, "learning_rate": 9.494147443356836e-05, "loss": 1.0655, "step": 4446 }, { "epoch": 0.30130767667186126, "grad_norm": 7.0485992431640625, "learning_rate": 9.494010541447054e-05, "loss": 0.8878, "step": 4447 }, { "epoch": 0.30137543193983335, "grad_norm": 7.162468433380127, "learning_rate": 9.493873639537272e-05, "loss": 0.9467, "step": 4448 }, { "epoch": 0.3014431872078054, "grad_norm": 9.214662551879883, "learning_rate": 9.49373673762749e-05, "loss": 0.9288, "step": 4449 }, { "epoch": 0.3015109424757775, "grad_norm": 11.189544677734375, "learning_rate": 9.493599835717709e-05, "loss": 0.9933, "step": 4450 }, { "epoch": 0.30157869774374957, "grad_norm": 9.398331642150879, "learning_rate": 9.493462933807927e-05, "loss": 0.9248, "step": 4451 }, { "epoch": 0.30164645301172166, "grad_norm": 8.26975154876709, "learning_rate": 9.493326031898145e-05, "loss": 1.0308, "step": 4452 }, { "epoch": 0.30171420827969375, "grad_norm": 9.262918472290039, "learning_rate": 9.493189129988363e-05, "loss": 0.9367, "step": 4453 }, { "epoch": 0.30178196354766584, "grad_norm": 7.079284191131592, "learning_rate": 9.493052228078581e-05, "loss": 1.0415, "step": 4454 }, { "epoch": 0.30184971881563794, "grad_norm": 9.081875801086426, "learning_rate": 9.492915326168801e-05, "loss": 1.0783, "step": 4455 }, { "epoch": 0.30191747408361, "grad_norm": 8.476323127746582, "learning_rate": 9.492778424259019e-05, "loss": 0.9743, "step": 4456 }, { "epoch": 0.30198522935158206, "grad_norm": 7.064181327819824, "learning_rate": 9.492641522349237e-05, "loss": 0.7966, "step": 4457 }, { "epoch": 0.30205298461955415, "grad_norm": 7.837399959564209, "learning_rate": 9.492504620439455e-05, "loss": 0.8757, "step": 4458 }, { "epoch": 0.30212073988752625, "grad_norm": 7.2140936851501465, "learning_rate": 9.492367718529674e-05, "loss": 1.1334, "step": 4459 }, { "epoch": 0.30218849515549834, "grad_norm": 7.836158752441406, "learning_rate": 9.492230816619892e-05, "loss": 1.1179, "step": 4460 }, { "epoch": 0.30225625042347043, "grad_norm": 8.686471939086914, "learning_rate": 9.49209391471011e-05, "loss": 0.9826, "step": 4461 }, { "epoch": 0.3023240056914425, "grad_norm": 8.442061424255371, "learning_rate": 9.491957012800328e-05, "loss": 1.0392, "step": 4462 }, { "epoch": 0.3023917609594146, "grad_norm": 7.024100303649902, "learning_rate": 9.491820110890546e-05, "loss": 0.7857, "step": 4463 }, { "epoch": 0.3024595162273867, "grad_norm": 7.306611061096191, "learning_rate": 9.491683208980766e-05, "loss": 1.0154, "step": 4464 }, { "epoch": 0.30252727149535874, "grad_norm": 7.3064470291137695, "learning_rate": 9.491546307070984e-05, "loss": 1.0311, "step": 4465 }, { "epoch": 0.30259502676333083, "grad_norm": 7.264878749847412, "learning_rate": 9.491409405161202e-05, "loss": 0.932, "step": 4466 }, { "epoch": 0.3026627820313029, "grad_norm": 7.43487024307251, "learning_rate": 9.49127250325142e-05, "loss": 0.8024, "step": 4467 }, { "epoch": 0.302730537299275, "grad_norm": 7.710512161254883, "learning_rate": 9.491135601341639e-05, "loss": 0.7989, "step": 4468 }, { "epoch": 0.3027982925672471, "grad_norm": 7.263044834136963, "learning_rate": 9.490998699431857e-05, "loss": 0.9481, "step": 4469 }, { "epoch": 0.3028660478352192, "grad_norm": 7.284390449523926, "learning_rate": 9.490861797522075e-05, "loss": 0.8008, "step": 4470 }, { "epoch": 0.3029338031031913, "grad_norm": 8.490337371826172, "learning_rate": 9.490724895612295e-05, "loss": 1.1867, "step": 4471 }, { "epoch": 0.3030015583711634, "grad_norm": 6.555050849914551, "learning_rate": 9.490587993702513e-05, "loss": 0.8615, "step": 4472 }, { "epoch": 0.3030693136391354, "grad_norm": 6.497532367706299, "learning_rate": 9.490451091792731e-05, "loss": 0.9643, "step": 4473 }, { "epoch": 0.3031370689071075, "grad_norm": 6.556066036224365, "learning_rate": 9.49031418988295e-05, "loss": 0.9757, "step": 4474 }, { "epoch": 0.3032048241750796, "grad_norm": 6.649857044219971, "learning_rate": 9.490177287973168e-05, "loss": 1.0196, "step": 4475 }, { "epoch": 0.3032725794430517, "grad_norm": 8.725894927978516, "learning_rate": 9.490040386063386e-05, "loss": 1.1662, "step": 4476 }, { "epoch": 0.3033403347110238, "grad_norm": 6.520106792449951, "learning_rate": 9.489903484153604e-05, "loss": 0.8353, "step": 4477 }, { "epoch": 0.3034080899789959, "grad_norm": 6.688689708709717, "learning_rate": 9.489766582243824e-05, "loss": 0.8101, "step": 4478 }, { "epoch": 0.30347584524696797, "grad_norm": 7.817204475402832, "learning_rate": 9.489629680334042e-05, "loss": 0.9354, "step": 4479 }, { "epoch": 0.30354360051494006, "grad_norm": 7.536436080932617, "learning_rate": 9.48949277842426e-05, "loss": 1.2358, "step": 4480 }, { "epoch": 0.3036113557829121, "grad_norm": 8.635372161865234, "learning_rate": 9.489355876514478e-05, "loss": 0.8344, "step": 4481 }, { "epoch": 0.3036791110508842, "grad_norm": 7.561103343963623, "learning_rate": 9.489218974604697e-05, "loss": 0.864, "step": 4482 }, { "epoch": 0.3037468663188563, "grad_norm": 8.284873962402344, "learning_rate": 9.489082072694915e-05, "loss": 0.8756, "step": 4483 }, { "epoch": 0.30381462158682837, "grad_norm": 8.976608276367188, "learning_rate": 9.488945170785133e-05, "loss": 1.0523, "step": 4484 }, { "epoch": 0.30388237685480046, "grad_norm": 11.891230583190918, "learning_rate": 9.488808268875351e-05, "loss": 1.0367, "step": 4485 }, { "epoch": 0.30395013212277255, "grad_norm": 8.247873306274414, "learning_rate": 9.488671366965569e-05, "loss": 0.8173, "step": 4486 }, { "epoch": 0.30401788739074465, "grad_norm": 6.74050760269165, "learning_rate": 9.488534465055789e-05, "loss": 0.8192, "step": 4487 }, { "epoch": 0.30408564265871674, "grad_norm": 7.035127639770508, "learning_rate": 9.488397563146007e-05, "loss": 0.8416, "step": 4488 }, { "epoch": 0.3041533979266888, "grad_norm": 7.281885623931885, "learning_rate": 9.488260661236225e-05, "loss": 1.072, "step": 4489 }, { "epoch": 0.30422115319466086, "grad_norm": 6.236363887786865, "learning_rate": 9.488123759326443e-05, "loss": 0.8738, "step": 4490 }, { "epoch": 0.30428890846263296, "grad_norm": 8.20182991027832, "learning_rate": 9.487986857416662e-05, "loss": 1.1965, "step": 4491 }, { "epoch": 0.30435666373060505, "grad_norm": 9.837231636047363, "learning_rate": 9.48784995550688e-05, "loss": 0.9125, "step": 4492 }, { "epoch": 0.30442441899857714, "grad_norm": 8.810354232788086, "learning_rate": 9.487713053597098e-05, "loss": 1.0623, "step": 4493 }, { "epoch": 0.30449217426654923, "grad_norm": 6.360396385192871, "learning_rate": 9.487576151687316e-05, "loss": 0.8683, "step": 4494 }, { "epoch": 0.3045599295345213, "grad_norm": 8.158722877502441, "learning_rate": 9.487439249777534e-05, "loss": 0.9562, "step": 4495 }, { "epoch": 0.3046276848024934, "grad_norm": 12.142204284667969, "learning_rate": 9.487302347867754e-05, "loss": 0.8937, "step": 4496 }, { "epoch": 0.3046954400704655, "grad_norm": 8.125468254089355, "learning_rate": 9.487165445957972e-05, "loss": 1.0587, "step": 4497 }, { "epoch": 0.30476319533843754, "grad_norm": 7.607559680938721, "learning_rate": 9.48702854404819e-05, "loss": 0.8296, "step": 4498 }, { "epoch": 0.30483095060640963, "grad_norm": 6.430202484130859, "learning_rate": 9.486891642138408e-05, "loss": 1.067, "step": 4499 }, { "epoch": 0.3048987058743817, "grad_norm": 7.2482805252075195, "learning_rate": 9.486754740228626e-05, "loss": 1.0611, "step": 4500 }, { "epoch": 0.3049664611423538, "grad_norm": 9.431131362915039, "learning_rate": 9.486617838318845e-05, "loss": 0.9665, "step": 4501 }, { "epoch": 0.3050342164103259, "grad_norm": 8.319073677062988, "learning_rate": 9.486480936409063e-05, "loss": 0.983, "step": 4502 }, { "epoch": 0.305101971678298, "grad_norm": 6.784060478210449, "learning_rate": 9.486344034499281e-05, "loss": 0.8534, "step": 4503 }, { "epoch": 0.3051697269462701, "grad_norm": 7.521092414855957, "learning_rate": 9.486207132589499e-05, "loss": 0.9167, "step": 4504 }, { "epoch": 0.3052374822142422, "grad_norm": 7.7156982421875, "learning_rate": 9.486070230679719e-05, "loss": 0.7691, "step": 4505 }, { "epoch": 0.3053052374822142, "grad_norm": 6.727222919464111, "learning_rate": 9.485933328769937e-05, "loss": 0.7156, "step": 4506 }, { "epoch": 0.3053729927501863, "grad_norm": 7.416358470916748, "learning_rate": 9.485796426860155e-05, "loss": 1.0458, "step": 4507 }, { "epoch": 0.3054407480181584, "grad_norm": 8.601702690124512, "learning_rate": 9.485659524950373e-05, "loss": 1.1664, "step": 4508 }, { "epoch": 0.3055085032861305, "grad_norm": 6.742228984832764, "learning_rate": 9.485522623040591e-05, "loss": 0.9715, "step": 4509 }, { "epoch": 0.3055762585541026, "grad_norm": 6.782869815826416, "learning_rate": 9.48538572113081e-05, "loss": 0.7644, "step": 4510 }, { "epoch": 0.3056440138220747, "grad_norm": 5.753969669342041, "learning_rate": 9.485248819221028e-05, "loss": 0.8468, "step": 4511 }, { "epoch": 0.30571176909004677, "grad_norm": 10.150800704956055, "learning_rate": 9.485111917311246e-05, "loss": 1.1521, "step": 4512 }, { "epoch": 0.30577952435801886, "grad_norm": 7.476456642150879, "learning_rate": 9.484975015401464e-05, "loss": 1.0091, "step": 4513 }, { "epoch": 0.3058472796259909, "grad_norm": 8.955781936645508, "learning_rate": 9.484838113491684e-05, "loss": 1.1837, "step": 4514 }, { "epoch": 0.305915034893963, "grad_norm": 8.779487609863281, "learning_rate": 9.484701211581902e-05, "loss": 0.9747, "step": 4515 }, { "epoch": 0.3059827901619351, "grad_norm": 9.432082176208496, "learning_rate": 9.48456430967212e-05, "loss": 0.9298, "step": 4516 }, { "epoch": 0.3060505454299072, "grad_norm": 8.186026573181152, "learning_rate": 9.484427407762339e-05, "loss": 0.9026, "step": 4517 }, { "epoch": 0.30611830069787926, "grad_norm": 7.902477264404297, "learning_rate": 9.484290505852557e-05, "loss": 1.0116, "step": 4518 }, { "epoch": 0.30618605596585136, "grad_norm": 9.117185592651367, "learning_rate": 9.484153603942775e-05, "loss": 1.0867, "step": 4519 }, { "epoch": 0.30625381123382345, "grad_norm": 8.473755836486816, "learning_rate": 9.484016702032995e-05, "loss": 0.8742, "step": 4520 }, { "epoch": 0.30632156650179554, "grad_norm": 12.328755378723145, "learning_rate": 9.483879800123213e-05, "loss": 1.2981, "step": 4521 }, { "epoch": 0.3063893217697676, "grad_norm": 7.099585056304932, "learning_rate": 9.483742898213431e-05, "loss": 0.9685, "step": 4522 }, { "epoch": 0.30645707703773967, "grad_norm": 8.027973175048828, "learning_rate": 9.48360599630365e-05, "loss": 1.0194, "step": 4523 }, { "epoch": 0.30652483230571176, "grad_norm": 8.275866508483887, "learning_rate": 9.483469094393868e-05, "loss": 1.2424, "step": 4524 }, { "epoch": 0.30659258757368385, "grad_norm": 8.546550750732422, "learning_rate": 9.483332192484086e-05, "loss": 0.9284, "step": 4525 }, { "epoch": 0.30666034284165594, "grad_norm": 5.6628522872924805, "learning_rate": 9.483195290574304e-05, "loss": 0.882, "step": 4526 }, { "epoch": 0.30672809810962803, "grad_norm": 7.189582824707031, "learning_rate": 9.483058388664522e-05, "loss": 0.8087, "step": 4527 }, { "epoch": 0.3067958533776001, "grad_norm": 6.341355323791504, "learning_rate": 9.482921486754742e-05, "loss": 0.8759, "step": 4528 }, { "epoch": 0.3068636086455722, "grad_norm": 6.838935852050781, "learning_rate": 9.48278458484496e-05, "loss": 0.6662, "step": 4529 }, { "epoch": 0.30693136391354425, "grad_norm": 5.441661834716797, "learning_rate": 9.482647682935178e-05, "loss": 0.7008, "step": 4530 }, { "epoch": 0.30699911918151634, "grad_norm": 9.242436408996582, "learning_rate": 9.482510781025396e-05, "loss": 0.8675, "step": 4531 }, { "epoch": 0.30706687444948844, "grad_norm": 7.393209934234619, "learning_rate": 9.482373879115614e-05, "loss": 0.7829, "step": 4532 }, { "epoch": 0.30713462971746053, "grad_norm": 8.613391876220703, "learning_rate": 9.482236977205833e-05, "loss": 1.0276, "step": 4533 }, { "epoch": 0.3072023849854326, "grad_norm": 8.517586708068848, "learning_rate": 9.482100075296051e-05, "loss": 0.8115, "step": 4534 }, { "epoch": 0.3072701402534047, "grad_norm": 7.575396537780762, "learning_rate": 9.481963173386269e-05, "loss": 0.8254, "step": 4535 }, { "epoch": 0.3073378955213768, "grad_norm": 8.90748405456543, "learning_rate": 9.481826271476487e-05, "loss": 0.9864, "step": 4536 }, { "epoch": 0.3074056507893489, "grad_norm": 10.321487426757812, "learning_rate": 9.481689369566707e-05, "loss": 1.0247, "step": 4537 }, { "epoch": 0.30747340605732093, "grad_norm": 8.15263843536377, "learning_rate": 9.481552467656925e-05, "loss": 0.8475, "step": 4538 }, { "epoch": 0.307541161325293, "grad_norm": 7.198974609375, "learning_rate": 9.481415565747143e-05, "loss": 1.1028, "step": 4539 }, { "epoch": 0.3076089165932651, "grad_norm": 8.286428451538086, "learning_rate": 9.481278663837361e-05, "loss": 0.9451, "step": 4540 }, { "epoch": 0.3076766718612372, "grad_norm": 7.620658874511719, "learning_rate": 9.481141761927579e-05, "loss": 0.8243, "step": 4541 }, { "epoch": 0.3077444271292093, "grad_norm": 7.8145341873168945, "learning_rate": 9.481004860017798e-05, "loss": 0.7741, "step": 4542 }, { "epoch": 0.3078121823971814, "grad_norm": 7.216073989868164, "learning_rate": 9.480867958108016e-05, "loss": 0.7973, "step": 4543 }, { "epoch": 0.3078799376651535, "grad_norm": 7.704121112823486, "learning_rate": 9.480731056198234e-05, "loss": 0.9388, "step": 4544 }, { "epoch": 0.30794769293312557, "grad_norm": 7.872618675231934, "learning_rate": 9.480594154288452e-05, "loss": 0.8628, "step": 4545 }, { "epoch": 0.3080154482010976, "grad_norm": 5.696948528289795, "learning_rate": 9.480457252378672e-05, "loss": 0.8319, "step": 4546 }, { "epoch": 0.3080832034690697, "grad_norm": 7.372182846069336, "learning_rate": 9.48032035046889e-05, "loss": 0.9198, "step": 4547 }, { "epoch": 0.3081509587370418, "grad_norm": 6.9523420333862305, "learning_rate": 9.480183448559108e-05, "loss": 1.0383, "step": 4548 }, { "epoch": 0.3082187140050139, "grad_norm": 7.862281799316406, "learning_rate": 9.480046546649326e-05, "loss": 0.9764, "step": 4549 }, { "epoch": 0.308286469272986, "grad_norm": 6.563671588897705, "learning_rate": 9.479909644739544e-05, "loss": 0.9587, "step": 4550 }, { "epoch": 0.30835422454095807, "grad_norm": 6.150197505950928, "learning_rate": 9.479772742829763e-05, "loss": 0.8265, "step": 4551 }, { "epoch": 0.30842197980893016, "grad_norm": 5.330137729644775, "learning_rate": 9.479635840919981e-05, "loss": 0.697, "step": 4552 }, { "epoch": 0.30848973507690225, "grad_norm": 7.886617183685303, "learning_rate": 9.479498939010199e-05, "loss": 0.8645, "step": 4553 }, { "epoch": 0.30855749034487434, "grad_norm": 8.9354248046875, "learning_rate": 9.479362037100417e-05, "loss": 0.9669, "step": 4554 }, { "epoch": 0.3086252456128464, "grad_norm": 6.7398457527160645, "learning_rate": 9.479225135190635e-05, "loss": 0.6935, "step": 4555 }, { "epoch": 0.30869300088081847, "grad_norm": 6.668737888336182, "learning_rate": 9.479088233280855e-05, "loss": 1.0038, "step": 4556 }, { "epoch": 0.30876075614879056, "grad_norm": 9.82985782623291, "learning_rate": 9.478951331371073e-05, "loss": 1.1031, "step": 4557 }, { "epoch": 0.30882851141676265, "grad_norm": 7.345351696014404, "learning_rate": 9.478814429461291e-05, "loss": 1.032, "step": 4558 }, { "epoch": 0.30889626668473474, "grad_norm": 8.863839149475098, "learning_rate": 9.478677527551509e-05, "loss": 1.0042, "step": 4559 }, { "epoch": 0.30896402195270684, "grad_norm": 7.280172824859619, "learning_rate": 9.478540625641728e-05, "loss": 0.977, "step": 4560 }, { "epoch": 0.3090317772206789, "grad_norm": 7.121008396148682, "learning_rate": 9.478403723731946e-05, "loss": 0.7969, "step": 4561 }, { "epoch": 0.309099532488651, "grad_norm": 8.222799301147461, "learning_rate": 9.478266821822164e-05, "loss": 1.0655, "step": 4562 }, { "epoch": 0.30916728775662305, "grad_norm": 7.74297571182251, "learning_rate": 9.478129919912384e-05, "loss": 0.7976, "step": 4563 }, { "epoch": 0.30923504302459515, "grad_norm": 8.211404800415039, "learning_rate": 9.477993018002602e-05, "loss": 0.9004, "step": 4564 }, { "epoch": 0.30930279829256724, "grad_norm": 6.897336006164551, "learning_rate": 9.47785611609282e-05, "loss": 0.9347, "step": 4565 }, { "epoch": 0.30937055356053933, "grad_norm": 6.508474826812744, "learning_rate": 9.477719214183039e-05, "loss": 0.862, "step": 4566 }, { "epoch": 0.3094383088285114, "grad_norm": 6.972619533538818, "learning_rate": 9.477582312273257e-05, "loss": 0.8142, "step": 4567 }, { "epoch": 0.3095060640964835, "grad_norm": 8.018939971923828, "learning_rate": 9.477445410363475e-05, "loss": 1.1858, "step": 4568 }, { "epoch": 0.3095738193644556, "grad_norm": 7.123754024505615, "learning_rate": 9.477308508453695e-05, "loss": 0.9347, "step": 4569 }, { "epoch": 0.3096415746324277, "grad_norm": 7.701295375823975, "learning_rate": 9.477171606543913e-05, "loss": 1.0511, "step": 4570 }, { "epoch": 0.30970932990039973, "grad_norm": 7.858259677886963, "learning_rate": 9.47703470463413e-05, "loss": 0.8332, "step": 4571 }, { "epoch": 0.3097770851683718, "grad_norm": 9.541449546813965, "learning_rate": 9.476897802724349e-05, "loss": 1.3054, "step": 4572 }, { "epoch": 0.3098448404363439, "grad_norm": 7.877323150634766, "learning_rate": 9.476760900814567e-05, "loss": 0.9087, "step": 4573 }, { "epoch": 0.309912595704316, "grad_norm": 6.667144775390625, "learning_rate": 9.476623998904786e-05, "loss": 0.9066, "step": 4574 }, { "epoch": 0.3099803509722881, "grad_norm": 6.78439998626709, "learning_rate": 9.476487096995004e-05, "loss": 0.9524, "step": 4575 }, { "epoch": 0.3100481062402602, "grad_norm": 6.174951076507568, "learning_rate": 9.476350195085222e-05, "loss": 0.9853, "step": 4576 }, { "epoch": 0.3101158615082323, "grad_norm": 7.872679233551025, "learning_rate": 9.47621329317544e-05, "loss": 1.2686, "step": 4577 }, { "epoch": 0.3101836167762044, "grad_norm": 6.736785411834717, "learning_rate": 9.47607639126566e-05, "loss": 0.8149, "step": 4578 }, { "epoch": 0.3102513720441764, "grad_norm": 7.353178977966309, "learning_rate": 9.475939489355878e-05, "loss": 0.7092, "step": 4579 }, { "epoch": 0.3103191273121485, "grad_norm": 8.10339069366455, "learning_rate": 9.475802587446096e-05, "loss": 1.1926, "step": 4580 }, { "epoch": 0.3103868825801206, "grad_norm": 7.225834369659424, "learning_rate": 9.475665685536314e-05, "loss": 1.1534, "step": 4581 }, { "epoch": 0.3104546378480927, "grad_norm": 7.356152057647705, "learning_rate": 9.475528783626532e-05, "loss": 1.1098, "step": 4582 }, { "epoch": 0.3105223931160648, "grad_norm": 10.347126960754395, "learning_rate": 9.475391881716751e-05, "loss": 1.136, "step": 4583 }, { "epoch": 0.31059014838403687, "grad_norm": 7.562332630157471, "learning_rate": 9.475254979806969e-05, "loss": 0.9211, "step": 4584 }, { "epoch": 0.31065790365200896, "grad_norm": 6.505655288696289, "learning_rate": 9.475118077897187e-05, "loss": 0.9538, "step": 4585 }, { "epoch": 0.31072565891998105, "grad_norm": 6.551809787750244, "learning_rate": 9.474981175987405e-05, "loss": 1.1335, "step": 4586 }, { "epoch": 0.3107934141879531, "grad_norm": 7.4724321365356445, "learning_rate": 9.474844274077623e-05, "loss": 1.0441, "step": 4587 }, { "epoch": 0.3108611694559252, "grad_norm": 8.463167190551758, "learning_rate": 9.474707372167843e-05, "loss": 1.0979, "step": 4588 }, { "epoch": 0.31092892472389727, "grad_norm": 6.868551731109619, "learning_rate": 9.47457047025806e-05, "loss": 0.6893, "step": 4589 }, { "epoch": 0.31099667999186936, "grad_norm": 5.831715106964111, "learning_rate": 9.474433568348279e-05, "loss": 0.9197, "step": 4590 }, { "epoch": 0.31106443525984145, "grad_norm": 7.562331676483154, "learning_rate": 9.474296666438497e-05, "loss": 0.8768, "step": 4591 }, { "epoch": 0.31113219052781355, "grad_norm": 7.750473499298096, "learning_rate": 9.474159764528716e-05, "loss": 0.8046, "step": 4592 }, { "epoch": 0.31119994579578564, "grad_norm": 7.836174964904785, "learning_rate": 9.474022862618934e-05, "loss": 0.9346, "step": 4593 }, { "epoch": 0.31126770106375773, "grad_norm": 6.159252166748047, "learning_rate": 9.473885960709152e-05, "loss": 0.8661, "step": 4594 }, { "epoch": 0.31133545633172977, "grad_norm": 7.959331512451172, "learning_rate": 9.47374905879937e-05, "loss": 0.919, "step": 4595 }, { "epoch": 0.31140321159970186, "grad_norm": 8.349802017211914, "learning_rate": 9.473612156889588e-05, "loss": 0.9762, "step": 4596 }, { "epoch": 0.31147096686767395, "grad_norm": 6.385254859924316, "learning_rate": 9.473475254979808e-05, "loss": 1.0168, "step": 4597 }, { "epoch": 0.31153872213564604, "grad_norm": 6.848907947540283, "learning_rate": 9.473338353070026e-05, "loss": 0.8235, "step": 4598 }, { "epoch": 0.31160647740361813, "grad_norm": 8.07978630065918, "learning_rate": 9.473201451160244e-05, "loss": 0.8394, "step": 4599 }, { "epoch": 0.3116742326715902, "grad_norm": 8.702666282653809, "learning_rate": 9.473064549250462e-05, "loss": 1.1859, "step": 4600 }, { "epoch": 0.3117419879395623, "grad_norm": 7.322251796722412, "learning_rate": 9.472927647340681e-05, "loss": 0.7089, "step": 4601 }, { "epoch": 0.3118097432075344, "grad_norm": 7.717091083526611, "learning_rate": 9.472790745430899e-05, "loss": 0.8609, "step": 4602 }, { "epoch": 0.3118774984755065, "grad_norm": 7.298448085784912, "learning_rate": 9.472653843521117e-05, "loss": 1.0044, "step": 4603 }, { "epoch": 0.31194525374347853, "grad_norm": 8.221037864685059, "learning_rate": 9.472516941611335e-05, "loss": 1.0403, "step": 4604 }, { "epoch": 0.3120130090114506, "grad_norm": 8.83371639251709, "learning_rate": 9.472380039701553e-05, "loss": 1.2884, "step": 4605 }, { "epoch": 0.3120807642794227, "grad_norm": 6.774711608886719, "learning_rate": 9.472243137791773e-05, "loss": 0.8077, "step": 4606 }, { "epoch": 0.3121485195473948, "grad_norm": 8.470376968383789, "learning_rate": 9.472106235881991e-05, "loss": 0.9549, "step": 4607 }, { "epoch": 0.3122162748153669, "grad_norm": 6.509616374969482, "learning_rate": 9.471969333972209e-05, "loss": 0.9595, "step": 4608 }, { "epoch": 0.312284030083339, "grad_norm": 6.885564804077148, "learning_rate": 9.471832432062427e-05, "loss": 0.9934, "step": 4609 }, { "epoch": 0.3123517853513111, "grad_norm": 12.500927925109863, "learning_rate": 9.471695530152646e-05, "loss": 0.9493, "step": 4610 }, { "epoch": 0.3124195406192832, "grad_norm": 8.701812744140625, "learning_rate": 9.471558628242864e-05, "loss": 0.9975, "step": 4611 }, { "epoch": 0.3124872958872552, "grad_norm": 8.313292503356934, "learning_rate": 9.471421726333082e-05, "loss": 1.0069, "step": 4612 }, { "epoch": 0.3125550511552273, "grad_norm": 6.841222763061523, "learning_rate": 9.471284824423302e-05, "loss": 0.8078, "step": 4613 }, { "epoch": 0.3126228064231994, "grad_norm": 9.038453102111816, "learning_rate": 9.47114792251352e-05, "loss": 1.0445, "step": 4614 }, { "epoch": 0.3126905616911715, "grad_norm": 7.772367000579834, "learning_rate": 9.471011020603738e-05, "loss": 1.0336, "step": 4615 }, { "epoch": 0.3127583169591436, "grad_norm": 6.843810558319092, "learning_rate": 9.470874118693957e-05, "loss": 0.8171, "step": 4616 }, { "epoch": 0.31282607222711567, "grad_norm": 7.012472629547119, "learning_rate": 9.470737216784175e-05, "loss": 0.8906, "step": 4617 }, { "epoch": 0.31289382749508776, "grad_norm": 7.138260364532471, "learning_rate": 9.470600314874393e-05, "loss": 0.9742, "step": 4618 }, { "epoch": 0.31296158276305985, "grad_norm": 7.92933988571167, "learning_rate": 9.470463412964611e-05, "loss": 0.8097, "step": 4619 }, { "epoch": 0.3130293380310319, "grad_norm": 8.222073554992676, "learning_rate": 9.47032651105483e-05, "loss": 0.9354, "step": 4620 }, { "epoch": 0.313097093299004, "grad_norm": 7.705626010894775, "learning_rate": 9.470189609145049e-05, "loss": 0.7776, "step": 4621 }, { "epoch": 0.3131648485669761, "grad_norm": 7.6136345863342285, "learning_rate": 9.470052707235267e-05, "loss": 1.0204, "step": 4622 }, { "epoch": 0.31323260383494816, "grad_norm": 6.365856647491455, "learning_rate": 9.469915805325485e-05, "loss": 0.8812, "step": 4623 }, { "epoch": 0.31330035910292026, "grad_norm": 6.876771926879883, "learning_rate": 9.469778903415704e-05, "loss": 0.7518, "step": 4624 }, { "epoch": 0.31336811437089235, "grad_norm": 6.329056262969971, "learning_rate": 9.469642001505922e-05, "loss": 0.8902, "step": 4625 }, { "epoch": 0.31343586963886444, "grad_norm": 6.1461310386657715, "learning_rate": 9.46950509959614e-05, "loss": 1.0617, "step": 4626 }, { "epoch": 0.31350362490683653, "grad_norm": 7.062450408935547, "learning_rate": 9.469368197686358e-05, "loss": 0.9081, "step": 4627 }, { "epoch": 0.31357138017480857, "grad_norm": 7.412983417510986, "learning_rate": 9.469231295776576e-05, "loss": 1.0845, "step": 4628 }, { "epoch": 0.31363913544278066, "grad_norm": 7.904543876647949, "learning_rate": 9.469094393866796e-05, "loss": 0.803, "step": 4629 }, { "epoch": 0.31370689071075275, "grad_norm": 6.614920616149902, "learning_rate": 9.468957491957014e-05, "loss": 0.8369, "step": 4630 }, { "epoch": 0.31377464597872484, "grad_norm": 8.245738983154297, "learning_rate": 9.468820590047232e-05, "loss": 0.9232, "step": 4631 }, { "epoch": 0.31384240124669693, "grad_norm": 7.923001766204834, "learning_rate": 9.46868368813745e-05, "loss": 0.822, "step": 4632 }, { "epoch": 0.313910156514669, "grad_norm": 8.80931282043457, "learning_rate": 9.468546786227668e-05, "loss": 0.9253, "step": 4633 }, { "epoch": 0.3139779117826411, "grad_norm": 6.997625350952148, "learning_rate": 9.468409884317887e-05, "loss": 0.8564, "step": 4634 }, { "epoch": 0.3140456670506132, "grad_norm": 7.699014186859131, "learning_rate": 9.468272982408105e-05, "loss": 1.0097, "step": 4635 }, { "epoch": 0.31411342231858524, "grad_norm": 7.379316329956055, "learning_rate": 9.468136080498323e-05, "loss": 0.8455, "step": 4636 }, { "epoch": 0.31418117758655734, "grad_norm": 7.040482521057129, "learning_rate": 9.467999178588541e-05, "loss": 0.9065, "step": 4637 }, { "epoch": 0.31424893285452943, "grad_norm": 8.588302612304688, "learning_rate": 9.46786227667876e-05, "loss": 0.8063, "step": 4638 }, { "epoch": 0.3143166881225015, "grad_norm": 8.310260772705078, "learning_rate": 9.467725374768979e-05, "loss": 0.9012, "step": 4639 }, { "epoch": 0.3143844433904736, "grad_norm": 5.794011116027832, "learning_rate": 9.467588472859197e-05, "loss": 0.7026, "step": 4640 }, { "epoch": 0.3144521986584457, "grad_norm": 8.277170181274414, "learning_rate": 9.467451570949415e-05, "loss": 0.8472, "step": 4641 }, { "epoch": 0.3145199539264178, "grad_norm": 8.856977462768555, "learning_rate": 9.467314669039633e-05, "loss": 1.1543, "step": 4642 }, { "epoch": 0.3145877091943899, "grad_norm": 7.343659400939941, "learning_rate": 9.467177767129852e-05, "loss": 0.8366, "step": 4643 }, { "epoch": 0.3146554644623619, "grad_norm": 6.619773864746094, "learning_rate": 9.46704086522007e-05, "loss": 0.9176, "step": 4644 }, { "epoch": 0.314723219730334, "grad_norm": 8.375277519226074, "learning_rate": 9.466903963310288e-05, "loss": 1.0437, "step": 4645 }, { "epoch": 0.3147909749983061, "grad_norm": 6.792671203613281, "learning_rate": 9.466767061400506e-05, "loss": 0.7271, "step": 4646 }, { "epoch": 0.3148587302662782, "grad_norm": 7.419206619262695, "learning_rate": 9.466630159490726e-05, "loss": 0.9376, "step": 4647 }, { "epoch": 0.3149264855342503, "grad_norm": 6.413398742675781, "learning_rate": 9.466493257580944e-05, "loss": 0.8988, "step": 4648 }, { "epoch": 0.3149942408022224, "grad_norm": 7.8477373123168945, "learning_rate": 9.466356355671162e-05, "loss": 1.0791, "step": 4649 }, { "epoch": 0.31506199607019447, "grad_norm": 7.070709705352783, "learning_rate": 9.46621945376138e-05, "loss": 0.7796, "step": 4650 }, { "epoch": 0.31512975133816656, "grad_norm": 7.023820400238037, "learning_rate": 9.466082551851598e-05, "loss": 0.928, "step": 4651 }, { "epoch": 0.3151975066061386, "grad_norm": 8.347658157348633, "learning_rate": 9.465945649941817e-05, "loss": 0.8768, "step": 4652 }, { "epoch": 0.3152652618741107, "grad_norm": 7.446930408477783, "learning_rate": 9.465808748032035e-05, "loss": 0.7215, "step": 4653 }, { "epoch": 0.3153330171420828, "grad_norm": 9.261749267578125, "learning_rate": 9.465671846122253e-05, "loss": 1.0612, "step": 4654 }, { "epoch": 0.3154007724100549, "grad_norm": 6.765881061553955, "learning_rate": 9.465534944212471e-05, "loss": 0.9636, "step": 4655 }, { "epoch": 0.31546852767802697, "grad_norm": 7.752283096313477, "learning_rate": 9.46539804230269e-05, "loss": 1.0348, "step": 4656 }, { "epoch": 0.31553628294599906, "grad_norm": 8.769716262817383, "learning_rate": 9.465261140392909e-05, "loss": 0.9147, "step": 4657 }, { "epoch": 0.31560403821397115, "grad_norm": 9.27787971496582, "learning_rate": 9.465124238483127e-05, "loss": 1.0287, "step": 4658 }, { "epoch": 0.31567179348194324, "grad_norm": 5.802475452423096, "learning_rate": 9.464987336573346e-05, "loss": 0.7968, "step": 4659 }, { "epoch": 0.31573954874991533, "grad_norm": 7.436465263366699, "learning_rate": 9.464850434663564e-05, "loss": 1.1087, "step": 4660 }, { "epoch": 0.31580730401788737, "grad_norm": 8.243688583374023, "learning_rate": 9.464713532753782e-05, "loss": 1.0257, "step": 4661 }, { "epoch": 0.31587505928585946, "grad_norm": 7.885406017303467, "learning_rate": 9.464576630844002e-05, "loss": 0.9944, "step": 4662 }, { "epoch": 0.31594281455383155, "grad_norm": 8.342294692993164, "learning_rate": 9.46443972893422e-05, "loss": 0.8351, "step": 4663 }, { "epoch": 0.31601056982180364, "grad_norm": 7.476551055908203, "learning_rate": 9.464302827024438e-05, "loss": 0.8484, "step": 4664 }, { "epoch": 0.31607832508977574, "grad_norm": 7.239434719085693, "learning_rate": 9.464165925114656e-05, "loss": 0.749, "step": 4665 }, { "epoch": 0.3161460803577478, "grad_norm": 8.02225399017334, "learning_rate": 9.464029023204875e-05, "loss": 1.0679, "step": 4666 }, { "epoch": 0.3162138356257199, "grad_norm": 5.708010673522949, "learning_rate": 9.463892121295093e-05, "loss": 0.6935, "step": 4667 }, { "epoch": 0.316281590893692, "grad_norm": 9.107060432434082, "learning_rate": 9.463755219385311e-05, "loss": 0.7951, "step": 4668 }, { "epoch": 0.31634934616166405, "grad_norm": 8.045939445495605, "learning_rate": 9.463618317475529e-05, "loss": 0.9943, "step": 4669 }, { "epoch": 0.31641710142963614, "grad_norm": 8.284626007080078, "learning_rate": 9.463481415565749e-05, "loss": 0.9638, "step": 4670 }, { "epoch": 0.31648485669760823, "grad_norm": 6.9800920486450195, "learning_rate": 9.463344513655967e-05, "loss": 0.9211, "step": 4671 }, { "epoch": 0.3165526119655803, "grad_norm": 7.476776599884033, "learning_rate": 9.463207611746185e-05, "loss": 0.8704, "step": 4672 }, { "epoch": 0.3166203672335524, "grad_norm": 6.303709506988525, "learning_rate": 9.463070709836403e-05, "loss": 0.9031, "step": 4673 }, { "epoch": 0.3166881225015245, "grad_norm": 8.267644882202148, "learning_rate": 9.46293380792662e-05, "loss": 1.0899, "step": 4674 }, { "epoch": 0.3167558777694966, "grad_norm": 8.25999641418457, "learning_rate": 9.46279690601684e-05, "loss": 0.9286, "step": 4675 }, { "epoch": 0.3168236330374687, "grad_norm": 8.577073097229004, "learning_rate": 9.462660004107058e-05, "loss": 0.8837, "step": 4676 }, { "epoch": 0.3168913883054407, "grad_norm": 7.183437824249268, "learning_rate": 9.462523102197276e-05, "loss": 0.9972, "step": 4677 }, { "epoch": 0.3169591435734128, "grad_norm": 7.93414831161499, "learning_rate": 9.462386200287494e-05, "loss": 0.9806, "step": 4678 }, { "epoch": 0.3170268988413849, "grad_norm": 8.037418365478516, "learning_rate": 9.462249298377714e-05, "loss": 0.8945, "step": 4679 }, { "epoch": 0.317094654109357, "grad_norm": 7.5245585441589355, "learning_rate": 9.462112396467932e-05, "loss": 0.9444, "step": 4680 }, { "epoch": 0.3171624093773291, "grad_norm": 7.428105354309082, "learning_rate": 9.46197549455815e-05, "loss": 0.9916, "step": 4681 }, { "epoch": 0.3172301646453012, "grad_norm": 7.977080345153809, "learning_rate": 9.461838592648368e-05, "loss": 1.2206, "step": 4682 }, { "epoch": 0.3172979199132733, "grad_norm": 6.285130023956299, "learning_rate": 9.461701690738586e-05, "loss": 0.8222, "step": 4683 }, { "epoch": 0.31736567518124537, "grad_norm": 6.9027018547058105, "learning_rate": 9.461564788828805e-05, "loss": 1.0265, "step": 4684 }, { "epoch": 0.3174334304492174, "grad_norm": 7.620064735412598, "learning_rate": 9.461427886919023e-05, "loss": 0.9672, "step": 4685 }, { "epoch": 0.3175011857171895, "grad_norm": 7.635505676269531, "learning_rate": 9.461290985009241e-05, "loss": 0.9329, "step": 4686 }, { "epoch": 0.3175689409851616, "grad_norm": 7.532449245452881, "learning_rate": 9.461154083099459e-05, "loss": 1.0509, "step": 4687 }, { "epoch": 0.3176366962531337, "grad_norm": 6.428747177124023, "learning_rate": 9.461017181189677e-05, "loss": 0.9151, "step": 4688 }, { "epoch": 0.31770445152110577, "grad_norm": 7.214437007904053, "learning_rate": 9.460880279279897e-05, "loss": 0.9758, "step": 4689 }, { "epoch": 0.31777220678907786, "grad_norm": 7.525144577026367, "learning_rate": 9.460743377370115e-05, "loss": 0.7723, "step": 4690 }, { "epoch": 0.31783996205704995, "grad_norm": 8.008615493774414, "learning_rate": 9.460606475460333e-05, "loss": 0.9945, "step": 4691 }, { "epoch": 0.31790771732502204, "grad_norm": 6.715225696563721, "learning_rate": 9.460469573550551e-05, "loss": 0.9805, "step": 4692 }, { "epoch": 0.3179754725929941, "grad_norm": 7.146914005279541, "learning_rate": 9.46033267164077e-05, "loss": 0.9583, "step": 4693 }, { "epoch": 0.31804322786096617, "grad_norm": 7.280580997467041, "learning_rate": 9.460195769730988e-05, "loss": 0.9772, "step": 4694 }, { "epoch": 0.31811098312893826, "grad_norm": 7.84500789642334, "learning_rate": 9.460058867821206e-05, "loss": 0.9755, "step": 4695 }, { "epoch": 0.31817873839691035, "grad_norm": 7.289769649505615, "learning_rate": 9.459921965911424e-05, "loss": 1.1441, "step": 4696 }, { "epoch": 0.31824649366488245, "grad_norm": 9.061880111694336, "learning_rate": 9.459785064001642e-05, "loss": 0.9699, "step": 4697 }, { "epoch": 0.31831424893285454, "grad_norm": 7.284442901611328, "learning_rate": 9.459648162091862e-05, "loss": 1.0023, "step": 4698 }, { "epoch": 0.31838200420082663, "grad_norm": 7.017299175262451, "learning_rate": 9.45951126018208e-05, "loss": 0.8598, "step": 4699 }, { "epoch": 0.3184497594687987, "grad_norm": 7.236936092376709, "learning_rate": 9.459374358272298e-05, "loss": 0.9888, "step": 4700 }, { "epoch": 0.31851751473677076, "grad_norm": 8.148765563964844, "learning_rate": 9.459237456362516e-05, "loss": 0.9978, "step": 4701 }, { "epoch": 0.31858527000474285, "grad_norm": 8.023640632629395, "learning_rate": 9.459100554452735e-05, "loss": 1.0941, "step": 4702 }, { "epoch": 0.31865302527271494, "grad_norm": 6.2704291343688965, "learning_rate": 9.458963652542953e-05, "loss": 0.8616, "step": 4703 }, { "epoch": 0.31872078054068703, "grad_norm": 7.873881816864014, "learning_rate": 9.458826750633171e-05, "loss": 0.9212, "step": 4704 }, { "epoch": 0.3187885358086591, "grad_norm": 7.088031768798828, "learning_rate": 9.45868984872339e-05, "loss": 0.6453, "step": 4705 }, { "epoch": 0.3188562910766312, "grad_norm": 7.319702625274658, "learning_rate": 9.458552946813609e-05, "loss": 0.8328, "step": 4706 }, { "epoch": 0.3189240463446033, "grad_norm": 6.979453086853027, "learning_rate": 9.458416044903827e-05, "loss": 0.7806, "step": 4707 }, { "epoch": 0.3189918016125754, "grad_norm": 7.868389129638672, "learning_rate": 9.458279142994046e-05, "loss": 0.8052, "step": 4708 }, { "epoch": 0.3190595568805475, "grad_norm": 7.2435431480407715, "learning_rate": 9.458142241084264e-05, "loss": 0.8922, "step": 4709 }, { "epoch": 0.3191273121485195, "grad_norm": 8.62594985961914, "learning_rate": 9.458005339174482e-05, "loss": 1.0572, "step": 4710 }, { "epoch": 0.3191950674164916, "grad_norm": 8.404034614562988, "learning_rate": 9.457868437264701e-05, "loss": 1.1592, "step": 4711 }, { "epoch": 0.3192628226844637, "grad_norm": 6.669247150421143, "learning_rate": 9.45773153535492e-05, "loss": 0.8727, "step": 4712 }, { "epoch": 0.3193305779524358, "grad_norm": 7.565460681915283, "learning_rate": 9.457594633445138e-05, "loss": 0.7803, "step": 4713 }, { "epoch": 0.3193983332204079, "grad_norm": 7.726403713226318, "learning_rate": 9.457457731535356e-05, "loss": 0.81, "step": 4714 }, { "epoch": 0.31946608848838, "grad_norm": 7.344959259033203, "learning_rate": 9.457320829625574e-05, "loss": 0.6959, "step": 4715 }, { "epoch": 0.3195338437563521, "grad_norm": 5.9748759269714355, "learning_rate": 9.457183927715793e-05, "loss": 0.8332, "step": 4716 }, { "epoch": 0.31960159902432417, "grad_norm": 7.241995334625244, "learning_rate": 9.457047025806011e-05, "loss": 0.8243, "step": 4717 }, { "epoch": 0.3196693542922962, "grad_norm": 7.812702655792236, "learning_rate": 9.456910123896229e-05, "loss": 0.8603, "step": 4718 }, { "epoch": 0.3197371095602683, "grad_norm": 9.492036819458008, "learning_rate": 9.456773221986447e-05, "loss": 1.0208, "step": 4719 }, { "epoch": 0.3198048648282404, "grad_norm": 8.922654151916504, "learning_rate": 9.456636320076665e-05, "loss": 1.1939, "step": 4720 }, { "epoch": 0.3198726200962125, "grad_norm": 6.459314346313477, "learning_rate": 9.456499418166885e-05, "loss": 1.0332, "step": 4721 }, { "epoch": 0.31994037536418457, "grad_norm": 7.104556560516357, "learning_rate": 9.456362516257103e-05, "loss": 1.0608, "step": 4722 }, { "epoch": 0.32000813063215666, "grad_norm": 7.620473861694336, "learning_rate": 9.45622561434732e-05, "loss": 0.888, "step": 4723 }, { "epoch": 0.32007588590012875, "grad_norm": 7.757092475891113, "learning_rate": 9.456088712437539e-05, "loss": 1.0355, "step": 4724 }, { "epoch": 0.32014364116810085, "grad_norm": 7.084576606750488, "learning_rate": 9.455951810527758e-05, "loss": 0.7148, "step": 4725 }, { "epoch": 0.3202113964360729, "grad_norm": 6.5413079261779785, "learning_rate": 9.455814908617976e-05, "loss": 0.8678, "step": 4726 }, { "epoch": 0.320279151704045, "grad_norm": 8.085969924926758, "learning_rate": 9.455678006708194e-05, "loss": 1.0137, "step": 4727 }, { "epoch": 0.32034690697201706, "grad_norm": 6.338340759277344, "learning_rate": 9.455541104798412e-05, "loss": 0.9573, "step": 4728 }, { "epoch": 0.32041466223998916, "grad_norm": 8.724467277526855, "learning_rate": 9.45540420288863e-05, "loss": 1.0585, "step": 4729 }, { "epoch": 0.32048241750796125, "grad_norm": 5.904287815093994, "learning_rate": 9.45526730097885e-05, "loss": 0.6882, "step": 4730 }, { "epoch": 0.32055017277593334, "grad_norm": 6.975876331329346, "learning_rate": 9.455130399069068e-05, "loss": 0.8641, "step": 4731 }, { "epoch": 0.32061792804390543, "grad_norm": 7.307252407073975, "learning_rate": 9.454993497159286e-05, "loss": 0.8961, "step": 4732 }, { "epoch": 0.3206856833118775, "grad_norm": 6.06977653503418, "learning_rate": 9.454856595249504e-05, "loss": 0.8388, "step": 4733 }, { "epoch": 0.32075343857984956, "grad_norm": 5.903672218322754, "learning_rate": 9.454719693339723e-05, "loss": 0.6962, "step": 4734 }, { "epoch": 0.32082119384782165, "grad_norm": 9.061569213867188, "learning_rate": 9.454582791429941e-05, "loss": 1.0246, "step": 4735 }, { "epoch": 0.32088894911579374, "grad_norm": 9.154926300048828, "learning_rate": 9.454445889520159e-05, "loss": 0.9336, "step": 4736 }, { "epoch": 0.32095670438376583, "grad_norm": 6.51992654800415, "learning_rate": 9.454308987610377e-05, "loss": 0.8753, "step": 4737 }, { "epoch": 0.3210244596517379, "grad_norm": 7.160184383392334, "learning_rate": 9.454172085700595e-05, "loss": 1.0036, "step": 4738 }, { "epoch": 0.32109221491971, "grad_norm": 6.895291805267334, "learning_rate": 9.454035183790815e-05, "loss": 0.8359, "step": 4739 }, { "epoch": 0.3211599701876821, "grad_norm": 7.363986015319824, "learning_rate": 9.453898281881033e-05, "loss": 1.0207, "step": 4740 }, { "epoch": 0.3212277254556542, "grad_norm": 9.040234565734863, "learning_rate": 9.45376137997125e-05, "loss": 1.0123, "step": 4741 }, { "epoch": 0.32129548072362624, "grad_norm": 7.679563045501709, "learning_rate": 9.453624478061469e-05, "loss": 1.1201, "step": 4742 }, { "epoch": 0.32136323599159833, "grad_norm": 7.640948295593262, "learning_rate": 9.453487576151687e-05, "loss": 1.1834, "step": 4743 }, { "epoch": 0.3214309912595704, "grad_norm": 6.348153591156006, "learning_rate": 9.453350674241906e-05, "loss": 0.735, "step": 4744 }, { "epoch": 0.3214987465275425, "grad_norm": 6.899050712585449, "learning_rate": 9.453213772332124e-05, "loss": 0.9054, "step": 4745 }, { "epoch": 0.3215665017955146, "grad_norm": 8.880489349365234, "learning_rate": 9.453076870422342e-05, "loss": 1.0253, "step": 4746 }, { "epoch": 0.3216342570634867, "grad_norm": 7.2754597663879395, "learning_rate": 9.45293996851256e-05, "loss": 0.75, "step": 4747 }, { "epoch": 0.3217020123314588, "grad_norm": 7.26841402053833, "learning_rate": 9.45280306660278e-05, "loss": 1.2198, "step": 4748 }, { "epoch": 0.3217697675994309, "grad_norm": 6.0724310874938965, "learning_rate": 9.452666164692998e-05, "loss": 0.7444, "step": 4749 }, { "epoch": 0.3218375228674029, "grad_norm": 7.82569694519043, "learning_rate": 9.452529262783216e-05, "loss": 0.9266, "step": 4750 }, { "epoch": 0.321905278135375, "grad_norm": 7.153421878814697, "learning_rate": 9.452392360873435e-05, "loss": 1.2223, "step": 4751 }, { "epoch": 0.3219730334033471, "grad_norm": 6.586106777191162, "learning_rate": 9.452255458963653e-05, "loss": 0.9157, "step": 4752 }, { "epoch": 0.3220407886713192, "grad_norm": 7.776567459106445, "learning_rate": 9.452118557053871e-05, "loss": 0.7396, "step": 4753 }, { "epoch": 0.3221085439392913, "grad_norm": 7.4870381355285645, "learning_rate": 9.45198165514409e-05, "loss": 1.0812, "step": 4754 }, { "epoch": 0.3221762992072634, "grad_norm": 8.112491607666016, "learning_rate": 9.451844753234309e-05, "loss": 1.2816, "step": 4755 }, { "epoch": 0.32224405447523546, "grad_norm": 8.562600135803223, "learning_rate": 9.451707851324527e-05, "loss": 0.9872, "step": 4756 }, { "epoch": 0.32231180974320756, "grad_norm": 9.139601707458496, "learning_rate": 9.451570949414746e-05, "loss": 1.019, "step": 4757 }, { "epoch": 0.3223795650111796, "grad_norm": 7.6649370193481445, "learning_rate": 9.451434047504964e-05, "loss": 1.1594, "step": 4758 }, { "epoch": 0.3224473202791517, "grad_norm": 8.193527221679688, "learning_rate": 9.451297145595182e-05, "loss": 0.8804, "step": 4759 }, { "epoch": 0.3225150755471238, "grad_norm": 5.558340072631836, "learning_rate": 9.4511602436854e-05, "loss": 0.8778, "step": 4760 }, { "epoch": 0.32258283081509587, "grad_norm": 7.296480178833008, "learning_rate": 9.451023341775618e-05, "loss": 1.0039, "step": 4761 }, { "epoch": 0.32265058608306796, "grad_norm": 8.916117668151855, "learning_rate": 9.450886439865837e-05, "loss": 0.9516, "step": 4762 }, { "epoch": 0.32271834135104005, "grad_norm": 7.691675662994385, "learning_rate": 9.450749537956056e-05, "loss": 1.1872, "step": 4763 }, { "epoch": 0.32278609661901214, "grad_norm": 6.425968647003174, "learning_rate": 9.450612636046274e-05, "loss": 0.7914, "step": 4764 }, { "epoch": 0.32285385188698423, "grad_norm": 6.476465225219727, "learning_rate": 9.450475734136492e-05, "loss": 0.9449, "step": 4765 }, { "epoch": 0.3229216071549563, "grad_norm": 7.470714569091797, "learning_rate": 9.450338832226711e-05, "loss": 0.8132, "step": 4766 }, { "epoch": 0.32298936242292836, "grad_norm": 6.947244644165039, "learning_rate": 9.450201930316929e-05, "loss": 1.0203, "step": 4767 }, { "epoch": 0.32305711769090045, "grad_norm": 6.939558982849121, "learning_rate": 9.450065028407147e-05, "loss": 0.7484, "step": 4768 }, { "epoch": 0.32312487295887254, "grad_norm": 6.562455177307129, "learning_rate": 9.449928126497365e-05, "loss": 0.9659, "step": 4769 }, { "epoch": 0.32319262822684464, "grad_norm": 7.884627819061279, "learning_rate": 9.449791224587583e-05, "loss": 0.9394, "step": 4770 }, { "epoch": 0.3232603834948167, "grad_norm": 8.175631523132324, "learning_rate": 9.449654322677802e-05, "loss": 1.3089, "step": 4771 }, { "epoch": 0.3233281387627888, "grad_norm": 7.907345294952393, "learning_rate": 9.44951742076802e-05, "loss": 0.7641, "step": 4772 }, { "epoch": 0.3233958940307609, "grad_norm": 8.59745979309082, "learning_rate": 9.449380518858239e-05, "loss": 1.1506, "step": 4773 }, { "epoch": 0.323463649298733, "grad_norm": 8.216800689697266, "learning_rate": 9.449243616948457e-05, "loss": 1.1007, "step": 4774 }, { "epoch": 0.32353140456670504, "grad_norm": 7.630847930908203, "learning_rate": 9.449106715038675e-05, "loss": 0.6435, "step": 4775 }, { "epoch": 0.32359915983467713, "grad_norm": 8.40709400177002, "learning_rate": 9.448969813128894e-05, "loss": 1.0299, "step": 4776 }, { "epoch": 0.3236669151026492, "grad_norm": 7.3637166023254395, "learning_rate": 9.448832911219112e-05, "loss": 1.2733, "step": 4777 }, { "epoch": 0.3237346703706213, "grad_norm": 7.707301139831543, "learning_rate": 9.44869600930933e-05, "loss": 0.91, "step": 4778 }, { "epoch": 0.3238024256385934, "grad_norm": 6.911712169647217, "learning_rate": 9.448559107399548e-05, "loss": 1.0425, "step": 4779 }, { "epoch": 0.3238701809065655, "grad_norm": 7.387215614318848, "learning_rate": 9.448422205489768e-05, "loss": 1.1508, "step": 4780 }, { "epoch": 0.3239379361745376, "grad_norm": 6.074267864227295, "learning_rate": 9.448285303579986e-05, "loss": 0.7861, "step": 4781 }, { "epoch": 0.3240056914425097, "grad_norm": 7.4029436111450195, "learning_rate": 9.448148401670204e-05, "loss": 1.1438, "step": 4782 }, { "epoch": 0.3240734467104817, "grad_norm": 8.514384269714355, "learning_rate": 9.448011499760422e-05, "loss": 1.1075, "step": 4783 }, { "epoch": 0.3241412019784538, "grad_norm": 8.503793716430664, "learning_rate": 9.44787459785064e-05, "loss": 0.9377, "step": 4784 }, { "epoch": 0.3242089572464259, "grad_norm": 6.770750522613525, "learning_rate": 9.447737695940859e-05, "loss": 0.8342, "step": 4785 }, { "epoch": 0.324276712514398, "grad_norm": 7.019729137420654, "learning_rate": 9.447600794031077e-05, "loss": 0.8236, "step": 4786 }, { "epoch": 0.3243444677823701, "grad_norm": 6.147645950317383, "learning_rate": 9.447463892121295e-05, "loss": 0.9493, "step": 4787 }, { "epoch": 0.3244122230503422, "grad_norm": 9.160635948181152, "learning_rate": 9.447326990211513e-05, "loss": 1.1632, "step": 4788 }, { "epoch": 0.32447997831831427, "grad_norm": 7.918509483337402, "learning_rate": 9.447190088301733e-05, "loss": 1.0154, "step": 4789 }, { "epoch": 0.32454773358628636, "grad_norm": 8.457235336303711, "learning_rate": 9.44705318639195e-05, "loss": 1.0373, "step": 4790 }, { "epoch": 0.3246154888542584, "grad_norm": 7.332309722900391, "learning_rate": 9.446916284482169e-05, "loss": 1.09, "step": 4791 }, { "epoch": 0.3246832441222305, "grad_norm": 7.7085700035095215, "learning_rate": 9.446779382572387e-05, "loss": 0.8492, "step": 4792 }, { "epoch": 0.3247509993902026, "grad_norm": 6.121610641479492, "learning_rate": 9.446642480662605e-05, "loss": 1.0399, "step": 4793 }, { "epoch": 0.32481875465817467, "grad_norm": 6.767165184020996, "learning_rate": 9.446505578752824e-05, "loss": 0.6908, "step": 4794 }, { "epoch": 0.32488650992614676, "grad_norm": 7.629088401794434, "learning_rate": 9.446368676843042e-05, "loss": 0.9356, "step": 4795 }, { "epoch": 0.32495426519411885, "grad_norm": 7.590803146362305, "learning_rate": 9.44623177493326e-05, "loss": 0.7671, "step": 4796 }, { "epoch": 0.32502202046209094, "grad_norm": 7.876105785369873, "learning_rate": 9.44609487302348e-05, "loss": 0.9654, "step": 4797 }, { "epoch": 0.32508977573006304, "grad_norm": 8.503900527954102, "learning_rate": 9.445957971113698e-05, "loss": 0.9726, "step": 4798 }, { "epoch": 0.32515753099803507, "grad_norm": 9.96375846862793, "learning_rate": 9.445821069203916e-05, "loss": 0.7587, "step": 4799 }, { "epoch": 0.32522528626600716, "grad_norm": 6.24782133102417, "learning_rate": 9.445684167294135e-05, "loss": 0.8028, "step": 4800 }, { "epoch": 0.32529304153397925, "grad_norm": 7.5481181144714355, "learning_rate": 9.445547265384353e-05, "loss": 0.8194, "step": 4801 }, { "epoch": 0.32536079680195135, "grad_norm": 8.148533821105957, "learning_rate": 9.445410363474571e-05, "loss": 1.0009, "step": 4802 }, { "epoch": 0.32542855206992344, "grad_norm": 7.8531341552734375, "learning_rate": 9.44527346156479e-05, "loss": 1.0629, "step": 4803 }, { "epoch": 0.32549630733789553, "grad_norm": 7.7668843269348145, "learning_rate": 9.445136559655008e-05, "loss": 1.1211, "step": 4804 }, { "epoch": 0.3255640626058676, "grad_norm": 9.245609283447266, "learning_rate": 9.444999657745226e-05, "loss": 1.0407, "step": 4805 }, { "epoch": 0.3256318178738397, "grad_norm": 7.387469291687012, "learning_rate": 9.444862755835445e-05, "loss": 1.128, "step": 4806 }, { "epoch": 0.32569957314181175, "grad_norm": 6.636757850646973, "learning_rate": 9.444725853925663e-05, "loss": 0.8386, "step": 4807 }, { "epoch": 0.32576732840978384, "grad_norm": 8.264979362487793, "learning_rate": 9.444588952015882e-05, "loss": 0.904, "step": 4808 }, { "epoch": 0.32583508367775593, "grad_norm": 7.77110481262207, "learning_rate": 9.4444520501061e-05, "loss": 1.0292, "step": 4809 }, { "epoch": 0.325902838945728, "grad_norm": 6.825851917266846, "learning_rate": 9.444315148196318e-05, "loss": 0.7484, "step": 4810 }, { "epoch": 0.3259705942137001, "grad_norm": 6.475069999694824, "learning_rate": 9.444178246286536e-05, "loss": 0.9085, "step": 4811 }, { "epoch": 0.3260383494816722, "grad_norm": 8.736098289489746, "learning_rate": 9.444041344376755e-05, "loss": 1.1817, "step": 4812 }, { "epoch": 0.3261061047496443, "grad_norm": 7.0992608070373535, "learning_rate": 9.443904442466973e-05, "loss": 0.8654, "step": 4813 }, { "epoch": 0.3261738600176164, "grad_norm": 6.594883441925049, "learning_rate": 9.443767540557192e-05, "loss": 1.0002, "step": 4814 }, { "epoch": 0.3262416152855885, "grad_norm": 7.857585906982422, "learning_rate": 9.44363063864741e-05, "loss": 1.1711, "step": 4815 }, { "epoch": 0.3263093705535605, "grad_norm": 6.884295463562012, "learning_rate": 9.443493736737628e-05, "loss": 0.8003, "step": 4816 }, { "epoch": 0.3263771258215326, "grad_norm": 7.344529628753662, "learning_rate": 9.443356834827847e-05, "loss": 1.0009, "step": 4817 }, { "epoch": 0.3264448810895047, "grad_norm": 6.892088413238525, "learning_rate": 9.443219932918065e-05, "loss": 0.9797, "step": 4818 }, { "epoch": 0.3265126363574768, "grad_norm": 12.833809852600098, "learning_rate": 9.443083031008283e-05, "loss": 1.1143, "step": 4819 }, { "epoch": 0.3265803916254489, "grad_norm": 7.343682765960693, "learning_rate": 9.442946129098501e-05, "loss": 0.6302, "step": 4820 }, { "epoch": 0.326648146893421, "grad_norm": 7.089320659637451, "learning_rate": 9.442809227188719e-05, "loss": 1.0129, "step": 4821 }, { "epoch": 0.32671590216139307, "grad_norm": 6.60029935836792, "learning_rate": 9.442672325278938e-05, "loss": 0.9259, "step": 4822 }, { "epoch": 0.32678365742936516, "grad_norm": 10.470613479614258, "learning_rate": 9.442535423369157e-05, "loss": 1.1124, "step": 4823 }, { "epoch": 0.3268514126973372, "grad_norm": 8.491450309753418, "learning_rate": 9.442398521459375e-05, "loss": 0.9943, "step": 4824 }, { "epoch": 0.3269191679653093, "grad_norm": 6.033353328704834, "learning_rate": 9.442261619549593e-05, "loss": 1.0003, "step": 4825 }, { "epoch": 0.3269869232332814, "grad_norm": 7.0092549324035645, "learning_rate": 9.442124717639812e-05, "loss": 0.7843, "step": 4826 }, { "epoch": 0.32705467850125347, "grad_norm": 8.52950382232666, "learning_rate": 9.44198781573003e-05, "loss": 1.1036, "step": 4827 }, { "epoch": 0.32712243376922556, "grad_norm": 7.509829044342041, "learning_rate": 9.441850913820248e-05, "loss": 0.9969, "step": 4828 }, { "epoch": 0.32719018903719765, "grad_norm": 7.235622406005859, "learning_rate": 9.441714011910466e-05, "loss": 0.9131, "step": 4829 }, { "epoch": 0.32725794430516975, "grad_norm": 8.332746505737305, "learning_rate": 9.441577110000684e-05, "loss": 1.1663, "step": 4830 }, { "epoch": 0.32732569957314184, "grad_norm": 8.321161270141602, "learning_rate": 9.441440208090904e-05, "loss": 1.0091, "step": 4831 }, { "epoch": 0.3273934548411139, "grad_norm": 7.0502753257751465, "learning_rate": 9.441303306181122e-05, "loss": 1.0273, "step": 4832 }, { "epoch": 0.32746121010908597, "grad_norm": 6.921250343322754, "learning_rate": 9.44116640427134e-05, "loss": 0.873, "step": 4833 }, { "epoch": 0.32752896537705806, "grad_norm": 7.386787414550781, "learning_rate": 9.441029502361558e-05, "loss": 0.7415, "step": 4834 }, { "epoch": 0.32759672064503015, "grad_norm": 6.814700603485107, "learning_rate": 9.440892600451777e-05, "loss": 0.8827, "step": 4835 }, { "epoch": 0.32766447591300224, "grad_norm": 7.551968574523926, "learning_rate": 9.440755698541995e-05, "loss": 0.9752, "step": 4836 }, { "epoch": 0.32773223118097433, "grad_norm": 8.299920082092285, "learning_rate": 9.440618796632213e-05, "loss": 1.0253, "step": 4837 }, { "epoch": 0.3277999864489464, "grad_norm": 7.607963562011719, "learning_rate": 9.440481894722431e-05, "loss": 1.0562, "step": 4838 }, { "epoch": 0.3278677417169185, "grad_norm": 8.098003387451172, "learning_rate": 9.440344992812649e-05, "loss": 0.9055, "step": 4839 }, { "epoch": 0.32793549698489055, "grad_norm": 6.6647257804870605, "learning_rate": 9.440208090902869e-05, "loss": 0.8188, "step": 4840 }, { "epoch": 0.32800325225286264, "grad_norm": 7.541131496429443, "learning_rate": 9.440071188993087e-05, "loss": 0.7095, "step": 4841 }, { "epoch": 0.32807100752083473, "grad_norm": 10.203042984008789, "learning_rate": 9.439934287083305e-05, "loss": 0.8256, "step": 4842 }, { "epoch": 0.3281387627888068, "grad_norm": 8.049646377563477, "learning_rate": 9.439797385173524e-05, "loss": 1.0244, "step": 4843 }, { "epoch": 0.3282065180567789, "grad_norm": 6.727071285247803, "learning_rate": 9.439660483263742e-05, "loss": 0.9681, "step": 4844 }, { "epoch": 0.328274273324751, "grad_norm": 8.106125831604004, "learning_rate": 9.43952358135396e-05, "loss": 1.0944, "step": 4845 }, { "epoch": 0.3283420285927231, "grad_norm": 8.768218994140625, "learning_rate": 9.43938667944418e-05, "loss": 0.7493, "step": 4846 }, { "epoch": 0.3284097838606952, "grad_norm": 6.333378791809082, "learning_rate": 9.439249777534397e-05, "loss": 0.9187, "step": 4847 }, { "epoch": 0.32847753912866723, "grad_norm": 7.732221603393555, "learning_rate": 9.439112875624616e-05, "loss": 1.2522, "step": 4848 }, { "epoch": 0.3285452943966393, "grad_norm": 7.67783784866333, "learning_rate": 9.438975973714835e-05, "loss": 0.8682, "step": 4849 }, { "epoch": 0.3286130496646114, "grad_norm": 7.557129859924316, "learning_rate": 9.438839071805053e-05, "loss": 0.7843, "step": 4850 }, { "epoch": 0.3286808049325835, "grad_norm": 6.559933185577393, "learning_rate": 9.438702169895271e-05, "loss": 0.902, "step": 4851 }, { "epoch": 0.3287485602005556, "grad_norm": 7.604280471801758, "learning_rate": 9.438565267985489e-05, "loss": 0.8905, "step": 4852 }, { "epoch": 0.3288163154685277, "grad_norm": 7.508764266967773, "learning_rate": 9.438428366075707e-05, "loss": 0.932, "step": 4853 }, { "epoch": 0.3288840707364998, "grad_norm": 6.650167942047119, "learning_rate": 9.438291464165926e-05, "loss": 1.0507, "step": 4854 }, { "epoch": 0.32895182600447187, "grad_norm": 8.334061622619629, "learning_rate": 9.438154562256144e-05, "loss": 0.7949, "step": 4855 }, { "epoch": 0.3290195812724439, "grad_norm": 9.498878479003906, "learning_rate": 9.438017660346362e-05, "loss": 0.8487, "step": 4856 }, { "epoch": 0.329087336540416, "grad_norm": 7.810210227966309, "learning_rate": 9.43788075843658e-05, "loss": 0.9917, "step": 4857 }, { "epoch": 0.3291550918083881, "grad_norm": 7.337824821472168, "learning_rate": 9.4377438565268e-05, "loss": 0.8683, "step": 4858 }, { "epoch": 0.3292228470763602, "grad_norm": 8.331491470336914, "learning_rate": 9.437606954617018e-05, "loss": 1.178, "step": 4859 }, { "epoch": 0.3292906023443323, "grad_norm": 6.5013227462768555, "learning_rate": 9.437470052707236e-05, "loss": 0.7093, "step": 4860 }, { "epoch": 0.32935835761230436, "grad_norm": 8.814985275268555, "learning_rate": 9.437333150797454e-05, "loss": 0.9212, "step": 4861 }, { "epoch": 0.32942611288027646, "grad_norm": 8.78661060333252, "learning_rate": 9.437196248887672e-05, "loss": 0.8827, "step": 4862 }, { "epoch": 0.32949386814824855, "grad_norm": 8.912193298339844, "learning_rate": 9.437059346977891e-05, "loss": 1.133, "step": 4863 }, { "epoch": 0.3295616234162206, "grad_norm": 7.901734352111816, "learning_rate": 9.43692244506811e-05, "loss": 0.8293, "step": 4864 }, { "epoch": 0.3296293786841927, "grad_norm": 8.270600318908691, "learning_rate": 9.436785543158328e-05, "loss": 1.1584, "step": 4865 }, { "epoch": 0.32969713395216477, "grad_norm": 6.765751838684082, "learning_rate": 9.436648641248546e-05, "loss": 0.7705, "step": 4866 }, { "epoch": 0.32976488922013686, "grad_norm": 7.272820949554443, "learning_rate": 9.436511739338765e-05, "loss": 1.2151, "step": 4867 }, { "epoch": 0.32983264448810895, "grad_norm": 6.639741897583008, "learning_rate": 9.436374837428983e-05, "loss": 0.8868, "step": 4868 }, { "epoch": 0.32990039975608104, "grad_norm": 7.6254143714904785, "learning_rate": 9.436237935519201e-05, "loss": 1.0342, "step": 4869 }, { "epoch": 0.32996815502405313, "grad_norm": 6.546008586883545, "learning_rate": 9.436101033609419e-05, "loss": 0.7223, "step": 4870 }, { "epoch": 0.3300359102920252, "grad_norm": 6.215434551239014, "learning_rate": 9.435964131699637e-05, "loss": 0.8506, "step": 4871 }, { "epoch": 0.3301036655599973, "grad_norm": 7.897797584533691, "learning_rate": 9.435827229789856e-05, "loss": 0.8699, "step": 4872 }, { "epoch": 0.33017142082796935, "grad_norm": 8.3803129196167, "learning_rate": 9.435690327880074e-05, "loss": 1.0922, "step": 4873 }, { "epoch": 0.33023917609594144, "grad_norm": 6.675380706787109, "learning_rate": 9.435553425970293e-05, "loss": 0.8401, "step": 4874 }, { "epoch": 0.33030693136391354, "grad_norm": 7.380627632141113, "learning_rate": 9.43541652406051e-05, "loss": 0.615, "step": 4875 }, { "epoch": 0.33037468663188563, "grad_norm": 8.120410919189453, "learning_rate": 9.435279622150729e-05, "loss": 1.2622, "step": 4876 }, { "epoch": 0.3304424418998577, "grad_norm": 7.170663356781006, "learning_rate": 9.435142720240948e-05, "loss": 0.893, "step": 4877 }, { "epoch": 0.3305101971678298, "grad_norm": 12.477727890014648, "learning_rate": 9.435005818331166e-05, "loss": 0.9654, "step": 4878 }, { "epoch": 0.3305779524358019, "grad_norm": 6.562829971313477, "learning_rate": 9.434868916421384e-05, "loss": 0.7042, "step": 4879 }, { "epoch": 0.330645707703774, "grad_norm": 8.406328201293945, "learning_rate": 9.434732014511602e-05, "loss": 1.0807, "step": 4880 }, { "epoch": 0.33071346297174603, "grad_norm": 9.06248950958252, "learning_rate": 9.434595112601821e-05, "loss": 0.8586, "step": 4881 }, { "epoch": 0.3307812182397181, "grad_norm": 8.743175506591797, "learning_rate": 9.43445821069204e-05, "loss": 0.9501, "step": 4882 }, { "epoch": 0.3308489735076902, "grad_norm": 7.666022777557373, "learning_rate": 9.434321308782258e-05, "loss": 0.9411, "step": 4883 }, { "epoch": 0.3309167287756623, "grad_norm": 6.989424705505371, "learning_rate": 9.434184406872476e-05, "loss": 0.9511, "step": 4884 }, { "epoch": 0.3309844840436344, "grad_norm": 6.9908905029296875, "learning_rate": 9.434047504962694e-05, "loss": 0.9769, "step": 4885 }, { "epoch": 0.3310522393116065, "grad_norm": 7.985763072967529, "learning_rate": 9.433910603052913e-05, "loss": 1.1041, "step": 4886 }, { "epoch": 0.3311199945795786, "grad_norm": 7.9841694831848145, "learning_rate": 9.433773701143131e-05, "loss": 0.8341, "step": 4887 }, { "epoch": 0.33118774984755067, "grad_norm": 6.9602952003479, "learning_rate": 9.433636799233349e-05, "loss": 0.9388, "step": 4888 }, { "epoch": 0.3312555051155227, "grad_norm": 7.2530012130737305, "learning_rate": 9.433499897323567e-05, "loss": 0.9396, "step": 4889 }, { "epoch": 0.3313232603834948, "grad_norm": 7.338861465454102, "learning_rate": 9.433362995413786e-05, "loss": 0.9168, "step": 4890 }, { "epoch": 0.3313910156514669, "grad_norm": 7.08353853225708, "learning_rate": 9.433226093504005e-05, "loss": 0.9332, "step": 4891 }, { "epoch": 0.331458770919439, "grad_norm": 7.379842758178711, "learning_rate": 9.433089191594223e-05, "loss": 0.7139, "step": 4892 }, { "epoch": 0.3315265261874111, "grad_norm": 8.599993705749512, "learning_rate": 9.432952289684442e-05, "loss": 1.0023, "step": 4893 }, { "epoch": 0.33159428145538317, "grad_norm": 7.096752643585205, "learning_rate": 9.43281538777466e-05, "loss": 0.9052, "step": 4894 }, { "epoch": 0.33166203672335526, "grad_norm": 6.28333044052124, "learning_rate": 9.432678485864878e-05, "loss": 0.8721, "step": 4895 }, { "epoch": 0.33172979199132735, "grad_norm": 8.48362922668457, "learning_rate": 9.432541583955097e-05, "loss": 0.9774, "step": 4896 }, { "epoch": 0.3317975472592994, "grad_norm": 8.214259147644043, "learning_rate": 9.432404682045315e-05, "loss": 0.999, "step": 4897 }, { "epoch": 0.3318653025272715, "grad_norm": 9.603363990783691, "learning_rate": 9.432267780135533e-05, "loss": 0.7848, "step": 4898 }, { "epoch": 0.33193305779524357, "grad_norm": 7.982606887817383, "learning_rate": 9.432130878225753e-05, "loss": 0.7167, "step": 4899 }, { "epoch": 0.33200081306321566, "grad_norm": 6.8360915184021, "learning_rate": 9.431993976315971e-05, "loss": 1.1884, "step": 4900 }, { "epoch": 0.33206856833118775, "grad_norm": 9.70743179321289, "learning_rate": 9.431857074406189e-05, "loss": 1.0198, "step": 4901 }, { "epoch": 0.33213632359915984, "grad_norm": 7.7961554527282715, "learning_rate": 9.431720172496407e-05, "loss": 0.8637, "step": 4902 }, { "epoch": 0.33220407886713194, "grad_norm": 7.809814453125, "learning_rate": 9.431583270586625e-05, "loss": 1.0737, "step": 4903 }, { "epoch": 0.332271834135104, "grad_norm": 6.5765862464904785, "learning_rate": 9.431446368676844e-05, "loss": 0.6574, "step": 4904 }, { "epoch": 0.33233958940307606, "grad_norm": 6.912911891937256, "learning_rate": 9.431309466767062e-05, "loss": 0.9647, "step": 4905 }, { "epoch": 0.33240734467104815, "grad_norm": 7.2532877922058105, "learning_rate": 9.43117256485728e-05, "loss": 1.098, "step": 4906 }, { "epoch": 0.33247509993902025, "grad_norm": 6.589138984680176, "learning_rate": 9.431035662947498e-05, "loss": 0.9948, "step": 4907 }, { "epoch": 0.33254285520699234, "grad_norm": 9.1469144821167, "learning_rate": 9.430898761037717e-05, "loss": 0.9656, "step": 4908 }, { "epoch": 0.33261061047496443, "grad_norm": 7.124277591705322, "learning_rate": 9.430761859127936e-05, "loss": 0.8704, "step": 4909 }, { "epoch": 0.3326783657429365, "grad_norm": 6.862776279449463, "learning_rate": 9.430624957218154e-05, "loss": 0.8008, "step": 4910 }, { "epoch": 0.3327461210109086, "grad_norm": 6.245189666748047, "learning_rate": 9.430488055308372e-05, "loss": 0.8529, "step": 4911 }, { "epoch": 0.3328138762788807, "grad_norm": 8.546821594238281, "learning_rate": 9.43035115339859e-05, "loss": 1.1747, "step": 4912 }, { "epoch": 0.33288163154685274, "grad_norm": 7.5279765129089355, "learning_rate": 9.43021425148881e-05, "loss": 0.8725, "step": 4913 }, { "epoch": 0.33294938681482483, "grad_norm": 7.987123966217041, "learning_rate": 9.430077349579027e-05, "loss": 0.8752, "step": 4914 }, { "epoch": 0.3330171420827969, "grad_norm": 10.1973876953125, "learning_rate": 9.429940447669245e-05, "loss": 1.0813, "step": 4915 }, { "epoch": 0.333084897350769, "grad_norm": 7.406893253326416, "learning_rate": 9.429803545759464e-05, "loss": 0.7064, "step": 4916 }, { "epoch": 0.3331526526187411, "grad_norm": 6.545569896697998, "learning_rate": 9.429666643849682e-05, "loss": 0.9005, "step": 4917 }, { "epoch": 0.3332204078867132, "grad_norm": 8.624588012695312, "learning_rate": 9.429529741939901e-05, "loss": 0.939, "step": 4918 }, { "epoch": 0.3332881631546853, "grad_norm": 6.811842441558838, "learning_rate": 9.429392840030119e-05, "loss": 0.8367, "step": 4919 }, { "epoch": 0.3333559184226574, "grad_norm": 7.256634712219238, "learning_rate": 9.429255938120337e-05, "loss": 1.0455, "step": 4920 }, { "epoch": 0.3334236736906295, "grad_norm": 6.900022983551025, "learning_rate": 9.429119036210555e-05, "loss": 0.954, "step": 4921 }, { "epoch": 0.3334914289586015, "grad_norm": 9.217741012573242, "learning_rate": 9.428982134300774e-05, "loss": 0.6392, "step": 4922 }, { "epoch": 0.3335591842265736, "grad_norm": 7.729586124420166, "learning_rate": 9.428845232390992e-05, "loss": 0.9372, "step": 4923 }, { "epoch": 0.3336269394945457, "grad_norm": 6.606268405914307, "learning_rate": 9.42870833048121e-05, "loss": 0.8127, "step": 4924 }, { "epoch": 0.3336946947625178, "grad_norm": 8.078627586364746, "learning_rate": 9.428571428571429e-05, "loss": 0.847, "step": 4925 }, { "epoch": 0.3337624500304899, "grad_norm": 8.212615966796875, "learning_rate": 9.428434526661647e-05, "loss": 1.0133, "step": 4926 }, { "epoch": 0.33383020529846197, "grad_norm": 6.68360710144043, "learning_rate": 9.428297624751866e-05, "loss": 0.9796, "step": 4927 }, { "epoch": 0.33389796056643406, "grad_norm": 7.115147590637207, "learning_rate": 9.428160722842084e-05, "loss": 0.9106, "step": 4928 }, { "epoch": 0.33396571583440615, "grad_norm": 7.178501129150391, "learning_rate": 9.428023820932302e-05, "loss": 0.7531, "step": 4929 }, { "epoch": 0.3340334711023782, "grad_norm": 7.303642749786377, "learning_rate": 9.42788691902252e-05, "loss": 0.9163, "step": 4930 }, { "epoch": 0.3341012263703503, "grad_norm": 7.532678604125977, "learning_rate": 9.427750017112738e-05, "loss": 1.001, "step": 4931 }, { "epoch": 0.33416898163832237, "grad_norm": 6.791550636291504, "learning_rate": 9.427613115202957e-05, "loss": 0.9005, "step": 4932 }, { "epoch": 0.33423673690629446, "grad_norm": 7.264156341552734, "learning_rate": 9.427476213293176e-05, "loss": 0.8076, "step": 4933 }, { "epoch": 0.33430449217426655, "grad_norm": 8.477554321289062, "learning_rate": 9.427339311383394e-05, "loss": 0.9363, "step": 4934 }, { "epoch": 0.33437224744223865, "grad_norm": 7.020787239074707, "learning_rate": 9.427202409473612e-05, "loss": 0.6933, "step": 4935 }, { "epoch": 0.33444000271021074, "grad_norm": 7.990420818328857, "learning_rate": 9.427065507563831e-05, "loss": 0.9759, "step": 4936 }, { "epoch": 0.33450775797818283, "grad_norm": 8.950580596923828, "learning_rate": 9.426928605654049e-05, "loss": 1.2876, "step": 4937 }, { "epoch": 0.33457551324615487, "grad_norm": 6.879635334014893, "learning_rate": 9.426791703744267e-05, "loss": 0.992, "step": 4938 }, { "epoch": 0.33464326851412696, "grad_norm": 7.640803813934326, "learning_rate": 9.426654801834486e-05, "loss": 1.1523, "step": 4939 }, { "epoch": 0.33471102378209905, "grad_norm": 7.156732559204102, "learning_rate": 9.426517899924704e-05, "loss": 0.6521, "step": 4940 }, { "epoch": 0.33477877905007114, "grad_norm": 7.543867588043213, "learning_rate": 9.426380998014922e-05, "loss": 0.8495, "step": 4941 }, { "epoch": 0.33484653431804323, "grad_norm": 8.554939270019531, "learning_rate": 9.426244096105142e-05, "loss": 1.0593, "step": 4942 }, { "epoch": 0.3349142895860153, "grad_norm": 7.902100563049316, "learning_rate": 9.42610719419536e-05, "loss": 1.0271, "step": 4943 }, { "epoch": 0.3349820448539874, "grad_norm": 7.945007801055908, "learning_rate": 9.425970292285578e-05, "loss": 0.7839, "step": 4944 }, { "epoch": 0.3350498001219595, "grad_norm": 7.539274215698242, "learning_rate": 9.425833390375797e-05, "loss": 0.968, "step": 4945 }, { "epoch": 0.33511755538993154, "grad_norm": 7.23228120803833, "learning_rate": 9.425696488466015e-05, "loss": 0.8319, "step": 4946 }, { "epoch": 0.33518531065790363, "grad_norm": 7.251431941986084, "learning_rate": 9.425559586556233e-05, "loss": 0.9513, "step": 4947 }, { "epoch": 0.3352530659258757, "grad_norm": 7.047804355621338, "learning_rate": 9.425422684646451e-05, "loss": 0.8677, "step": 4948 }, { "epoch": 0.3353208211938478, "grad_norm": 7.7244696617126465, "learning_rate": 9.42528578273667e-05, "loss": 0.8705, "step": 4949 }, { "epoch": 0.3353885764618199, "grad_norm": 6.078210830688477, "learning_rate": 9.425148880826889e-05, "loss": 0.9205, "step": 4950 }, { "epoch": 0.335456331729792, "grad_norm": 7.856949329376221, "learning_rate": 9.425011978917107e-05, "loss": 0.8637, "step": 4951 }, { "epoch": 0.3355240869977641, "grad_norm": 7.341653347015381, "learning_rate": 9.424875077007325e-05, "loss": 0.9582, "step": 4952 }, { "epoch": 0.3355918422657362, "grad_norm": 7.456873893737793, "learning_rate": 9.424738175097543e-05, "loss": 0.8581, "step": 4953 }, { "epoch": 0.3356595975337082, "grad_norm": 7.885173797607422, "learning_rate": 9.424601273187761e-05, "loss": 0.9551, "step": 4954 }, { "epoch": 0.3357273528016803, "grad_norm": 7.016728401184082, "learning_rate": 9.42446437127798e-05, "loss": 0.9493, "step": 4955 }, { "epoch": 0.3357951080696524, "grad_norm": 6.383670806884766, "learning_rate": 9.424327469368198e-05, "loss": 0.8178, "step": 4956 }, { "epoch": 0.3358628633376245, "grad_norm": 6.930068492889404, "learning_rate": 9.424190567458416e-05, "loss": 1.1073, "step": 4957 }, { "epoch": 0.3359306186055966, "grad_norm": 7.001153469085693, "learning_rate": 9.424053665548634e-05, "loss": 0.844, "step": 4958 }, { "epoch": 0.3359983738735687, "grad_norm": 8.316204071044922, "learning_rate": 9.423916763638854e-05, "loss": 0.9363, "step": 4959 }, { "epoch": 0.33606612914154077, "grad_norm": 7.931922912597656, "learning_rate": 9.423779861729072e-05, "loss": 1.1617, "step": 4960 }, { "epoch": 0.33613388440951286, "grad_norm": 7.447847366333008, "learning_rate": 9.42364295981929e-05, "loss": 0.9137, "step": 4961 }, { "epoch": 0.3362016396774849, "grad_norm": 6.582141876220703, "learning_rate": 9.423506057909508e-05, "loss": 1.0516, "step": 4962 }, { "epoch": 0.336269394945457, "grad_norm": 6.278825283050537, "learning_rate": 9.423369155999726e-05, "loss": 0.9489, "step": 4963 }, { "epoch": 0.3363371502134291, "grad_norm": 7.415517330169678, "learning_rate": 9.423232254089945e-05, "loss": 0.9275, "step": 4964 }, { "epoch": 0.3364049054814012, "grad_norm": 6.073827266693115, "learning_rate": 9.423095352180163e-05, "loss": 0.7514, "step": 4965 }, { "epoch": 0.33647266074937326, "grad_norm": 9.100361824035645, "learning_rate": 9.422958450270381e-05, "loss": 1.0882, "step": 4966 }, { "epoch": 0.33654041601734536, "grad_norm": 8.23529052734375, "learning_rate": 9.4228215483606e-05, "loss": 0.8323, "step": 4967 }, { "epoch": 0.33660817128531745, "grad_norm": 9.86119270324707, "learning_rate": 9.422684646450819e-05, "loss": 0.8997, "step": 4968 }, { "epoch": 0.33667592655328954, "grad_norm": 7.328428268432617, "learning_rate": 9.422547744541037e-05, "loss": 0.8558, "step": 4969 }, { "epoch": 0.3367436818212616, "grad_norm": 6.703019618988037, "learning_rate": 9.422410842631255e-05, "loss": 0.782, "step": 4970 }, { "epoch": 0.33681143708923367, "grad_norm": 7.780190944671631, "learning_rate": 9.422273940721473e-05, "loss": 1.1681, "step": 4971 }, { "epoch": 0.33687919235720576, "grad_norm": 6.972787380218506, "learning_rate": 9.422137038811691e-05, "loss": 1.0137, "step": 4972 }, { "epoch": 0.33694694762517785, "grad_norm": 6.521172523498535, "learning_rate": 9.42200013690191e-05, "loss": 0.8866, "step": 4973 }, { "epoch": 0.33701470289314994, "grad_norm": 6.414315223693848, "learning_rate": 9.421863234992128e-05, "loss": 0.7614, "step": 4974 }, { "epoch": 0.33708245816112203, "grad_norm": 8.930048942565918, "learning_rate": 9.421726333082346e-05, "loss": 1.0757, "step": 4975 }, { "epoch": 0.3371502134290941, "grad_norm": 6.710857391357422, "learning_rate": 9.421589431172565e-05, "loss": 0.8966, "step": 4976 }, { "epoch": 0.3372179686970662, "grad_norm": 10.788898468017578, "learning_rate": 9.421452529262784e-05, "loss": 0.906, "step": 4977 }, { "epoch": 0.3372857239650383, "grad_norm": 6.601465225219727, "learning_rate": 9.421315627353002e-05, "loss": 0.967, "step": 4978 }, { "epoch": 0.33735347923301034, "grad_norm": 7.0998406410217285, "learning_rate": 9.42117872544322e-05, "loss": 1.0584, "step": 4979 }, { "epoch": 0.33742123450098244, "grad_norm": 8.517425537109375, "learning_rate": 9.421041823533438e-05, "loss": 0.83, "step": 4980 }, { "epoch": 0.33748898976895453, "grad_norm": 7.631463050842285, "learning_rate": 9.420904921623656e-05, "loss": 0.7431, "step": 4981 }, { "epoch": 0.3375567450369266, "grad_norm": 7.006965637207031, "learning_rate": 9.420768019713875e-05, "loss": 1.0081, "step": 4982 }, { "epoch": 0.3376245003048987, "grad_norm": 7.085225582122803, "learning_rate": 9.420631117804093e-05, "loss": 1.0094, "step": 4983 }, { "epoch": 0.3376922555728708, "grad_norm": 8.096879959106445, "learning_rate": 9.420494215894312e-05, "loss": 1.0376, "step": 4984 }, { "epoch": 0.3377600108408429, "grad_norm": 7.823955535888672, "learning_rate": 9.420357313984531e-05, "loss": 0.975, "step": 4985 }, { "epoch": 0.337827766108815, "grad_norm": 6.340082168579102, "learning_rate": 9.420220412074749e-05, "loss": 0.6133, "step": 4986 }, { "epoch": 0.337895521376787, "grad_norm": 9.930416107177734, "learning_rate": 9.420083510164967e-05, "loss": 1.0132, "step": 4987 }, { "epoch": 0.3379632766447591, "grad_norm": 6.7059221267700195, "learning_rate": 9.419946608255186e-05, "loss": 0.7784, "step": 4988 }, { "epoch": 0.3380310319127312, "grad_norm": 7.869287967681885, "learning_rate": 9.419809706345404e-05, "loss": 0.9114, "step": 4989 }, { "epoch": 0.3380987871807033, "grad_norm": 8.061338424682617, "learning_rate": 9.419672804435622e-05, "loss": 1.0982, "step": 4990 }, { "epoch": 0.3381665424486754, "grad_norm": 6.226239204406738, "learning_rate": 9.419535902525842e-05, "loss": 0.8251, "step": 4991 }, { "epoch": 0.3382342977166475, "grad_norm": 6.236758232116699, "learning_rate": 9.41939900061606e-05, "loss": 1.0367, "step": 4992 }, { "epoch": 0.3383020529846196, "grad_norm": 8.880475044250488, "learning_rate": 9.419262098706278e-05, "loss": 1.0941, "step": 4993 }, { "epoch": 0.33836980825259166, "grad_norm": 7.7189621925354, "learning_rate": 9.419125196796496e-05, "loss": 1.2868, "step": 4994 }, { "epoch": 0.3384375635205637, "grad_norm": 8.368658065795898, "learning_rate": 9.418988294886714e-05, "loss": 1.1495, "step": 4995 }, { "epoch": 0.3385053187885358, "grad_norm": 6.596102237701416, "learning_rate": 9.418851392976933e-05, "loss": 0.9639, "step": 4996 }, { "epoch": 0.3385730740565079, "grad_norm": 6.297356605529785, "learning_rate": 9.418714491067151e-05, "loss": 0.8802, "step": 4997 }, { "epoch": 0.33864082932448, "grad_norm": 6.869907855987549, "learning_rate": 9.41857758915737e-05, "loss": 0.8247, "step": 4998 }, { "epoch": 0.33870858459245207, "grad_norm": 8.147067070007324, "learning_rate": 9.418440687247587e-05, "loss": 0.7532, "step": 4999 }, { "epoch": 0.33877633986042416, "grad_norm": 7.368498802185059, "learning_rate": 9.418303785337807e-05, "loss": 0.8697, "step": 5000 }, { "epoch": 0.33884409512839625, "grad_norm": 6.064372539520264, "learning_rate": 9.418166883428025e-05, "loss": 0.8134, "step": 5001 }, { "epoch": 0.33891185039636834, "grad_norm": 7.22601318359375, "learning_rate": 9.418029981518243e-05, "loss": 0.8224, "step": 5002 }, { "epoch": 0.3389796056643404, "grad_norm": 7.832242012023926, "learning_rate": 9.417893079608461e-05, "loss": 0.9114, "step": 5003 }, { "epoch": 0.33904736093231247, "grad_norm": 7.05675745010376, "learning_rate": 9.417756177698679e-05, "loss": 0.8475, "step": 5004 }, { "epoch": 0.33911511620028456, "grad_norm": 6.385340690612793, "learning_rate": 9.417619275788898e-05, "loss": 0.8604, "step": 5005 }, { "epoch": 0.33918287146825665, "grad_norm": 7.100057601928711, "learning_rate": 9.417482373879116e-05, "loss": 0.8418, "step": 5006 }, { "epoch": 0.33925062673622874, "grad_norm": 8.983677864074707, "learning_rate": 9.417345471969334e-05, "loss": 1.1282, "step": 5007 }, { "epoch": 0.33931838200420084, "grad_norm": 8.526215553283691, "learning_rate": 9.417208570059552e-05, "loss": 0.8759, "step": 5008 }, { "epoch": 0.3393861372721729, "grad_norm": 7.278728485107422, "learning_rate": 9.41707166814977e-05, "loss": 1.0722, "step": 5009 }, { "epoch": 0.339453892540145, "grad_norm": 9.66884994506836, "learning_rate": 9.41693476623999e-05, "loss": 0.8776, "step": 5010 }, { "epoch": 0.33952164780811706, "grad_norm": 7.001797199249268, "learning_rate": 9.416797864330208e-05, "loss": 0.7863, "step": 5011 }, { "epoch": 0.33958940307608915, "grad_norm": 7.707855224609375, "learning_rate": 9.416660962420426e-05, "loss": 0.9138, "step": 5012 }, { "epoch": 0.33965715834406124, "grad_norm": 7.721848964691162, "learning_rate": 9.416524060510644e-05, "loss": 1.1121, "step": 5013 }, { "epoch": 0.33972491361203333, "grad_norm": 8.91602611541748, "learning_rate": 9.416387158600863e-05, "loss": 0.9152, "step": 5014 }, { "epoch": 0.3397926688800054, "grad_norm": 7.332693099975586, "learning_rate": 9.416250256691081e-05, "loss": 0.9559, "step": 5015 }, { "epoch": 0.3398604241479775, "grad_norm": 7.198998928070068, "learning_rate": 9.4161133547813e-05, "loss": 0.9239, "step": 5016 }, { "epoch": 0.3399281794159496, "grad_norm": 7.566831588745117, "learning_rate": 9.415976452871517e-05, "loss": 0.9281, "step": 5017 }, { "epoch": 0.3399959346839217, "grad_norm": 6.832939624786377, "learning_rate": 9.415839550961736e-05, "loss": 0.9164, "step": 5018 }, { "epoch": 0.34006368995189373, "grad_norm": 7.087849140167236, "learning_rate": 9.415702649051955e-05, "loss": 0.9314, "step": 5019 }, { "epoch": 0.3401314452198658, "grad_norm": 7.417871952056885, "learning_rate": 9.415565747142173e-05, "loss": 0.9687, "step": 5020 }, { "epoch": 0.3401992004878379, "grad_norm": 6.9238362312316895, "learning_rate": 9.415428845232391e-05, "loss": 1.0757, "step": 5021 }, { "epoch": 0.34026695575581, "grad_norm": 7.4261698722839355, "learning_rate": 9.415291943322609e-05, "loss": 1.0636, "step": 5022 }, { "epoch": 0.3403347110237821, "grad_norm": 6.1964030265808105, "learning_rate": 9.415155041412828e-05, "loss": 0.739, "step": 5023 }, { "epoch": 0.3404024662917542, "grad_norm": 7.07076358795166, "learning_rate": 9.415018139503046e-05, "loss": 0.9435, "step": 5024 }, { "epoch": 0.3404702215597263, "grad_norm": 8.069670677185059, "learning_rate": 9.414881237593264e-05, "loss": 0.9745, "step": 5025 }, { "epoch": 0.3405379768276984, "grad_norm": 7.559169769287109, "learning_rate": 9.414744335683482e-05, "loss": 0.9329, "step": 5026 }, { "epoch": 0.34060573209567047, "grad_norm": 8.438983917236328, "learning_rate": 9.4146074337737e-05, "loss": 1.0608, "step": 5027 }, { "epoch": 0.3406734873636425, "grad_norm": 6.522724151611328, "learning_rate": 9.41447053186392e-05, "loss": 0.7912, "step": 5028 }, { "epoch": 0.3407412426316146, "grad_norm": 8.31757926940918, "learning_rate": 9.414333629954138e-05, "loss": 0.7535, "step": 5029 }, { "epoch": 0.3408089978995867, "grad_norm": 7.540639877319336, "learning_rate": 9.414196728044356e-05, "loss": 0.7239, "step": 5030 }, { "epoch": 0.3408767531675588, "grad_norm": 7.016546726226807, "learning_rate": 9.414059826134575e-05, "loss": 1.0658, "step": 5031 }, { "epoch": 0.34094450843553087, "grad_norm": 5.6253814697265625, "learning_rate": 9.413922924224793e-05, "loss": 0.8522, "step": 5032 }, { "epoch": 0.34101226370350296, "grad_norm": 7.693328380584717, "learning_rate": 9.413786022315011e-05, "loss": 1.1011, "step": 5033 }, { "epoch": 0.34108001897147505, "grad_norm": 8.77978229522705, "learning_rate": 9.413649120405231e-05, "loss": 1.0617, "step": 5034 }, { "epoch": 0.34114777423944714, "grad_norm": 6.854990005493164, "learning_rate": 9.413512218495449e-05, "loss": 1.0232, "step": 5035 }, { "epoch": 0.3412155295074192, "grad_norm": 8.380729675292969, "learning_rate": 9.413375316585667e-05, "loss": 0.7554, "step": 5036 }, { "epoch": 0.34128328477539127, "grad_norm": 6.667641639709473, "learning_rate": 9.413238414675886e-05, "loss": 0.8992, "step": 5037 }, { "epoch": 0.34135104004336336, "grad_norm": 7.5778489112854, "learning_rate": 9.413101512766104e-05, "loss": 0.9521, "step": 5038 }, { "epoch": 0.34141879531133545, "grad_norm": 10.053051948547363, "learning_rate": 9.412964610856322e-05, "loss": 0.9595, "step": 5039 }, { "epoch": 0.34148655057930755, "grad_norm": 7.460272789001465, "learning_rate": 9.41282770894654e-05, "loss": 0.683, "step": 5040 }, { "epoch": 0.34155430584727964, "grad_norm": 7.724341869354248, "learning_rate": 9.412690807036758e-05, "loss": 0.9902, "step": 5041 }, { "epoch": 0.34162206111525173, "grad_norm": 7.729767799377441, "learning_rate": 9.412553905126978e-05, "loss": 0.9249, "step": 5042 }, { "epoch": 0.3416898163832238, "grad_norm": 6.388404846191406, "learning_rate": 9.412417003217196e-05, "loss": 1.0154, "step": 5043 }, { "epoch": 0.34175757165119586, "grad_norm": 6.2206130027771, "learning_rate": 9.412280101307414e-05, "loss": 0.8169, "step": 5044 }, { "epoch": 0.34182532691916795, "grad_norm": 7.383151054382324, "learning_rate": 9.412143199397632e-05, "loss": 1.0476, "step": 5045 }, { "epoch": 0.34189308218714004, "grad_norm": 7.733860969543457, "learning_rate": 9.412006297487851e-05, "loss": 0.9861, "step": 5046 }, { "epoch": 0.34196083745511213, "grad_norm": 7.823349475860596, "learning_rate": 9.41186939557807e-05, "loss": 0.8351, "step": 5047 }, { "epoch": 0.3420285927230842, "grad_norm": 10.20052433013916, "learning_rate": 9.411732493668287e-05, "loss": 1.2573, "step": 5048 }, { "epoch": 0.3420963479910563, "grad_norm": 7.878631114959717, "learning_rate": 9.411595591758505e-05, "loss": 1.0979, "step": 5049 }, { "epoch": 0.3421641032590284, "grad_norm": 7.230095386505127, "learning_rate": 9.411458689848723e-05, "loss": 0.9929, "step": 5050 }, { "epoch": 0.3422318585270005, "grad_norm": 7.354804992675781, "learning_rate": 9.411321787938943e-05, "loss": 1.0552, "step": 5051 }, { "epoch": 0.34229961379497253, "grad_norm": 9.846439361572266, "learning_rate": 9.411184886029161e-05, "loss": 1.0616, "step": 5052 }, { "epoch": 0.3423673690629446, "grad_norm": 6.999122142791748, "learning_rate": 9.411047984119379e-05, "loss": 0.9958, "step": 5053 }, { "epoch": 0.3424351243309167, "grad_norm": 5.719442367553711, "learning_rate": 9.410911082209597e-05, "loss": 0.8239, "step": 5054 }, { "epoch": 0.3425028795988888, "grad_norm": 7.650005340576172, "learning_rate": 9.410774180299816e-05, "loss": 0.9736, "step": 5055 }, { "epoch": 0.3425706348668609, "grad_norm": 7.938320159912109, "learning_rate": 9.410637278390034e-05, "loss": 0.7809, "step": 5056 }, { "epoch": 0.342638390134833, "grad_norm": 8.319568634033203, "learning_rate": 9.410500376480252e-05, "loss": 0.9162, "step": 5057 }, { "epoch": 0.3427061454028051, "grad_norm": 8.151114463806152, "learning_rate": 9.41036347457047e-05, "loss": 1.1071, "step": 5058 }, { "epoch": 0.3427739006707772, "grad_norm": 7.333863735198975, "learning_rate": 9.410226572660688e-05, "loss": 0.8425, "step": 5059 }, { "epoch": 0.3428416559387492, "grad_norm": 6.848262310028076, "learning_rate": 9.410089670750908e-05, "loss": 0.9385, "step": 5060 }, { "epoch": 0.3429094112067213, "grad_norm": 7.205636024475098, "learning_rate": 9.409952768841126e-05, "loss": 0.8448, "step": 5061 }, { "epoch": 0.3429771664746934, "grad_norm": 7.133283615112305, "learning_rate": 9.409815866931344e-05, "loss": 0.8306, "step": 5062 }, { "epoch": 0.3430449217426655, "grad_norm": 6.693148136138916, "learning_rate": 9.409678965021562e-05, "loss": 1.0321, "step": 5063 }, { "epoch": 0.3431126770106376, "grad_norm": 7.183022975921631, "learning_rate": 9.40954206311178e-05, "loss": 1.0067, "step": 5064 }, { "epoch": 0.34318043227860967, "grad_norm": 7.537884712219238, "learning_rate": 9.409405161202e-05, "loss": 0.9719, "step": 5065 }, { "epoch": 0.34324818754658176, "grad_norm": 7.812978267669678, "learning_rate": 9.409268259292217e-05, "loss": 0.9936, "step": 5066 }, { "epoch": 0.34331594281455385, "grad_norm": 7.93212366104126, "learning_rate": 9.409131357382435e-05, "loss": 1.1685, "step": 5067 }, { "epoch": 0.3433836980825259, "grad_norm": 7.298057556152344, "learning_rate": 9.408994455472653e-05, "loss": 0.908, "step": 5068 }, { "epoch": 0.343451453350498, "grad_norm": 8.108441352844238, "learning_rate": 9.408857553562873e-05, "loss": 0.9916, "step": 5069 }, { "epoch": 0.3435192086184701, "grad_norm": 7.206883430480957, "learning_rate": 9.408720651653091e-05, "loss": 0.9431, "step": 5070 }, { "epoch": 0.34358696388644216, "grad_norm": 8.54073715209961, "learning_rate": 9.408583749743309e-05, "loss": 0.9156, "step": 5071 }, { "epoch": 0.34365471915441426, "grad_norm": 7.274294376373291, "learning_rate": 9.408446847833527e-05, "loss": 0.7436, "step": 5072 }, { "epoch": 0.34372247442238635, "grad_norm": 7.734363079071045, "learning_rate": 9.408309945923745e-05, "loss": 1.054, "step": 5073 }, { "epoch": 0.34379022969035844, "grad_norm": 7.952118873596191, "learning_rate": 9.408173044013964e-05, "loss": 0.9152, "step": 5074 }, { "epoch": 0.34385798495833053, "grad_norm": 7.481127738952637, "learning_rate": 9.408036142104182e-05, "loss": 1.0134, "step": 5075 }, { "epoch": 0.34392574022630257, "grad_norm": 8.52331256866455, "learning_rate": 9.4078992401944e-05, "loss": 0.9724, "step": 5076 }, { "epoch": 0.34399349549427466, "grad_norm": 7.454250335693359, "learning_rate": 9.40776233828462e-05, "loss": 0.9535, "step": 5077 }, { "epoch": 0.34406125076224675, "grad_norm": 7.192932605743408, "learning_rate": 9.407625436374838e-05, "loss": 0.8923, "step": 5078 }, { "epoch": 0.34412900603021884, "grad_norm": 8.29765796661377, "learning_rate": 9.407488534465056e-05, "loss": 1.0374, "step": 5079 }, { "epoch": 0.34419676129819093, "grad_norm": 7.1159820556640625, "learning_rate": 9.407351632555275e-05, "loss": 0.7876, "step": 5080 }, { "epoch": 0.344264516566163, "grad_norm": 7.157162189483643, "learning_rate": 9.407214730645493e-05, "loss": 0.8747, "step": 5081 }, { "epoch": 0.3443322718341351, "grad_norm": 6.686028003692627, "learning_rate": 9.407077828735711e-05, "loss": 0.8189, "step": 5082 }, { "epoch": 0.3444000271021072, "grad_norm": 8.957246780395508, "learning_rate": 9.406940926825931e-05, "loss": 1.0617, "step": 5083 }, { "epoch": 0.3444677823700793, "grad_norm": 7.599720001220703, "learning_rate": 9.406804024916149e-05, "loss": 1.0455, "step": 5084 }, { "epoch": 0.34453553763805134, "grad_norm": 5.757546424865723, "learning_rate": 9.406667123006367e-05, "loss": 0.7654, "step": 5085 }, { "epoch": 0.34460329290602343, "grad_norm": 6.592649936676025, "learning_rate": 9.406530221096585e-05, "loss": 0.9713, "step": 5086 }, { "epoch": 0.3446710481739955, "grad_norm": 7.119808673858643, "learning_rate": 9.406393319186803e-05, "loss": 0.7413, "step": 5087 }, { "epoch": 0.3447388034419676, "grad_norm": 6.288692951202393, "learning_rate": 9.406256417277022e-05, "loss": 0.9479, "step": 5088 }, { "epoch": 0.3448065587099397, "grad_norm": 6.267573356628418, "learning_rate": 9.40611951536724e-05, "loss": 0.9745, "step": 5089 }, { "epoch": 0.3448743139779118, "grad_norm": 7.639352798461914, "learning_rate": 9.405982613457458e-05, "loss": 0.9655, "step": 5090 }, { "epoch": 0.3449420692458839, "grad_norm": 6.32698917388916, "learning_rate": 9.405845711547676e-05, "loss": 0.789, "step": 5091 }, { "epoch": 0.345009824513856, "grad_norm": 8.769354820251465, "learning_rate": 9.405708809637896e-05, "loss": 1.1124, "step": 5092 }, { "epoch": 0.345077579781828, "grad_norm": 7.179650783538818, "learning_rate": 9.405571907728114e-05, "loss": 0.7939, "step": 5093 }, { "epoch": 0.3451453350498001, "grad_norm": 10.112159729003906, "learning_rate": 9.405435005818332e-05, "loss": 0.943, "step": 5094 }, { "epoch": 0.3452130903177722, "grad_norm": 6.925206661224365, "learning_rate": 9.40529810390855e-05, "loss": 1.0444, "step": 5095 }, { "epoch": 0.3452808455857443, "grad_norm": 7.8664398193359375, "learning_rate": 9.405161201998768e-05, "loss": 0.9643, "step": 5096 }, { "epoch": 0.3453486008537164, "grad_norm": 7.060378551483154, "learning_rate": 9.405024300088987e-05, "loss": 0.9043, "step": 5097 }, { "epoch": 0.3454163561216885, "grad_norm": 8.105093002319336, "learning_rate": 9.404887398179205e-05, "loss": 1.1106, "step": 5098 }, { "epoch": 0.34548411138966056, "grad_norm": 7.8055739402771, "learning_rate": 9.404750496269423e-05, "loss": 0.9638, "step": 5099 }, { "epoch": 0.34555186665763266, "grad_norm": 8.225363731384277, "learning_rate": 9.404613594359641e-05, "loss": 0.7878, "step": 5100 }, { "epoch": 0.3456196219256047, "grad_norm": 7.809800148010254, "learning_rate": 9.404476692449861e-05, "loss": 0.8241, "step": 5101 }, { "epoch": 0.3456873771935768, "grad_norm": 7.520929336547852, "learning_rate": 9.404339790540079e-05, "loss": 1.0689, "step": 5102 }, { "epoch": 0.3457551324615489, "grad_norm": 5.996449947357178, "learning_rate": 9.404202888630297e-05, "loss": 0.9163, "step": 5103 }, { "epoch": 0.34582288772952097, "grad_norm": 7.2040114402771, "learning_rate": 9.404065986720515e-05, "loss": 1.0615, "step": 5104 }, { "epoch": 0.34589064299749306, "grad_norm": 7.268972873687744, "learning_rate": 9.403929084810733e-05, "loss": 0.8785, "step": 5105 }, { "epoch": 0.34595839826546515, "grad_norm": 9.244166374206543, "learning_rate": 9.403792182900952e-05, "loss": 0.8714, "step": 5106 }, { "epoch": 0.34602615353343724, "grad_norm": 6.766915321350098, "learning_rate": 9.40365528099117e-05, "loss": 0.8075, "step": 5107 }, { "epoch": 0.34609390880140933, "grad_norm": 8.137444496154785, "learning_rate": 9.403518379081388e-05, "loss": 1.0687, "step": 5108 }, { "epoch": 0.34616166406938137, "grad_norm": 7.488312244415283, "learning_rate": 9.403381477171606e-05, "loss": 0.8056, "step": 5109 }, { "epoch": 0.34622941933735346, "grad_norm": 6.555777549743652, "learning_rate": 9.403244575261826e-05, "loss": 0.8761, "step": 5110 }, { "epoch": 0.34629717460532555, "grad_norm": 11.631979942321777, "learning_rate": 9.403107673352044e-05, "loss": 0.8139, "step": 5111 }, { "epoch": 0.34636492987329764, "grad_norm": 5.973453998565674, "learning_rate": 9.402970771442262e-05, "loss": 0.8816, "step": 5112 }, { "epoch": 0.34643268514126974, "grad_norm": 5.817539215087891, "learning_rate": 9.40283386953248e-05, "loss": 0.6034, "step": 5113 }, { "epoch": 0.3465004404092418, "grad_norm": 8.215511322021484, "learning_rate": 9.402696967622698e-05, "loss": 0.8876, "step": 5114 }, { "epoch": 0.3465681956772139, "grad_norm": 5.848570346832275, "learning_rate": 9.402560065712917e-05, "loss": 0.7093, "step": 5115 }, { "epoch": 0.346635950945186, "grad_norm": 7.996893405914307, "learning_rate": 9.402423163803135e-05, "loss": 0.8865, "step": 5116 }, { "epoch": 0.34670370621315805, "grad_norm": 7.348632335662842, "learning_rate": 9.402286261893353e-05, "loss": 0.8042, "step": 5117 }, { "epoch": 0.34677146148113014, "grad_norm": 6.820111274719238, "learning_rate": 9.402149359983571e-05, "loss": 0.8744, "step": 5118 }, { "epoch": 0.34683921674910223, "grad_norm": 7.354914665222168, "learning_rate": 9.40201245807379e-05, "loss": 0.892, "step": 5119 }, { "epoch": 0.3469069720170743, "grad_norm": 7.751887321472168, "learning_rate": 9.401875556164009e-05, "loss": 0.8752, "step": 5120 }, { "epoch": 0.3469747272850464, "grad_norm": 9.630827903747559, "learning_rate": 9.401738654254227e-05, "loss": 0.999, "step": 5121 }, { "epoch": 0.3470424825530185, "grad_norm": 9.546615600585938, "learning_rate": 9.401601752344445e-05, "loss": 1.341, "step": 5122 }, { "epoch": 0.3471102378209906, "grad_norm": 7.031918048858643, "learning_rate": 9.401464850434663e-05, "loss": 1.1371, "step": 5123 }, { "epoch": 0.3471779930889627, "grad_norm": 9.030802726745605, "learning_rate": 9.401327948524882e-05, "loss": 0.9883, "step": 5124 }, { "epoch": 0.3472457483569347, "grad_norm": 6.786712646484375, "learning_rate": 9.4011910466151e-05, "loss": 0.8692, "step": 5125 }, { "epoch": 0.3473135036249068, "grad_norm": 7.327836990356445, "learning_rate": 9.401054144705318e-05, "loss": 0.8157, "step": 5126 }, { "epoch": 0.3473812588928789, "grad_norm": 6.41863489151001, "learning_rate": 9.400917242795538e-05, "loss": 0.8565, "step": 5127 }, { "epoch": 0.347449014160851, "grad_norm": 5.737055778503418, "learning_rate": 9.400780340885756e-05, "loss": 0.7385, "step": 5128 }, { "epoch": 0.3475167694288231, "grad_norm": 6.321473121643066, "learning_rate": 9.400643438975974e-05, "loss": 0.6459, "step": 5129 }, { "epoch": 0.3475845246967952, "grad_norm": 9.07598876953125, "learning_rate": 9.400506537066193e-05, "loss": 1.3719, "step": 5130 }, { "epoch": 0.3476522799647673, "grad_norm": 6.3348388671875, "learning_rate": 9.400369635156411e-05, "loss": 0.903, "step": 5131 }, { "epoch": 0.34772003523273937, "grad_norm": 7.836405277252197, "learning_rate": 9.40023273324663e-05, "loss": 0.8854, "step": 5132 }, { "epoch": 0.34778779050071146, "grad_norm": 7.712037086486816, "learning_rate": 9.400095831336849e-05, "loss": 0.8756, "step": 5133 }, { "epoch": 0.3478555457686835, "grad_norm": 7.445090293884277, "learning_rate": 9.399958929427067e-05, "loss": 1.0776, "step": 5134 }, { "epoch": 0.3479233010366556, "grad_norm": 6.7496724128723145, "learning_rate": 9.399822027517285e-05, "loss": 0.9425, "step": 5135 }, { "epoch": 0.3479910563046277, "grad_norm": 7.703073501586914, "learning_rate": 9.399685125607503e-05, "loss": 0.9217, "step": 5136 }, { "epoch": 0.34805881157259977, "grad_norm": 5.972830772399902, "learning_rate": 9.399548223697721e-05, "loss": 0.5922, "step": 5137 }, { "epoch": 0.34812656684057186, "grad_norm": 7.165718078613281, "learning_rate": 9.39941132178794e-05, "loss": 0.9624, "step": 5138 }, { "epoch": 0.34819432210854395, "grad_norm": 8.589313507080078, "learning_rate": 9.399274419878158e-05, "loss": 1.0485, "step": 5139 }, { "epoch": 0.34826207737651604, "grad_norm": 6.503042697906494, "learning_rate": 9.399137517968376e-05, "loss": 0.7833, "step": 5140 }, { "epoch": 0.34832983264448814, "grad_norm": 9.673978805541992, "learning_rate": 9.399000616058594e-05, "loss": 1.2266, "step": 5141 }, { "epoch": 0.34839758791246017, "grad_norm": 6.86154842376709, "learning_rate": 9.398863714148812e-05, "loss": 0.8709, "step": 5142 }, { "epoch": 0.34846534318043226, "grad_norm": 8.111627578735352, "learning_rate": 9.398726812239032e-05, "loss": 0.9657, "step": 5143 }, { "epoch": 0.34853309844840435, "grad_norm": 7.725754737854004, "learning_rate": 9.39858991032925e-05, "loss": 0.9551, "step": 5144 }, { "epoch": 0.34860085371637645, "grad_norm": 8.093070030212402, "learning_rate": 9.398453008419468e-05, "loss": 0.9814, "step": 5145 }, { "epoch": 0.34866860898434854, "grad_norm": 8.375652313232422, "learning_rate": 9.398316106509686e-05, "loss": 0.9232, "step": 5146 }, { "epoch": 0.34873636425232063, "grad_norm": 7.012859344482422, "learning_rate": 9.398179204599905e-05, "loss": 0.8322, "step": 5147 }, { "epoch": 0.3488041195202927, "grad_norm": 8.437539100646973, "learning_rate": 9.398042302690123e-05, "loss": 0.852, "step": 5148 }, { "epoch": 0.3488718747882648, "grad_norm": 9.844721794128418, "learning_rate": 9.397905400780341e-05, "loss": 0.8601, "step": 5149 }, { "epoch": 0.34893963005623685, "grad_norm": 7.359288215637207, "learning_rate": 9.39776849887056e-05, "loss": 0.9313, "step": 5150 }, { "epoch": 0.34900738532420894, "grad_norm": 7.528818607330322, "learning_rate": 9.397631596960777e-05, "loss": 0.923, "step": 5151 }, { "epoch": 0.34907514059218103, "grad_norm": 7.8577399253845215, "learning_rate": 9.397494695050997e-05, "loss": 0.9569, "step": 5152 }, { "epoch": 0.3491428958601531, "grad_norm": 6.960932731628418, "learning_rate": 9.397357793141215e-05, "loss": 0.9025, "step": 5153 }, { "epoch": 0.3492106511281252, "grad_norm": 6.392679691314697, "learning_rate": 9.397220891231433e-05, "loss": 0.7948, "step": 5154 }, { "epoch": 0.3492784063960973, "grad_norm": 8.22850227355957, "learning_rate": 9.397083989321651e-05, "loss": 0.9932, "step": 5155 }, { "epoch": 0.3493461616640694, "grad_norm": 6.1851277351379395, "learning_rate": 9.39694708741187e-05, "loss": 0.776, "step": 5156 }, { "epoch": 0.3494139169320415, "grad_norm": 9.058121681213379, "learning_rate": 9.396810185502088e-05, "loss": 1.1036, "step": 5157 }, { "epoch": 0.3494816722000135, "grad_norm": 7.11410665512085, "learning_rate": 9.396673283592306e-05, "loss": 0.8862, "step": 5158 }, { "epoch": 0.3495494274679856, "grad_norm": 7.148082256317139, "learning_rate": 9.396536381682524e-05, "loss": 0.9796, "step": 5159 }, { "epoch": 0.3496171827359577, "grad_norm": 7.258500576019287, "learning_rate": 9.396399479772742e-05, "loss": 0.9617, "step": 5160 }, { "epoch": 0.3496849380039298, "grad_norm": 5.1490044593811035, "learning_rate": 9.396262577862962e-05, "loss": 0.6844, "step": 5161 }, { "epoch": 0.3497526932719019, "grad_norm": 6.73121452331543, "learning_rate": 9.39612567595318e-05, "loss": 1.0669, "step": 5162 }, { "epoch": 0.349820448539874, "grad_norm": 7.891530990600586, "learning_rate": 9.395988774043398e-05, "loss": 1.0439, "step": 5163 }, { "epoch": 0.3498882038078461, "grad_norm": 7.256270885467529, "learning_rate": 9.395851872133616e-05, "loss": 0.9682, "step": 5164 }, { "epoch": 0.34995595907581817, "grad_norm": 7.153442859649658, "learning_rate": 9.395714970223835e-05, "loss": 0.913, "step": 5165 }, { "epoch": 0.3500237143437902, "grad_norm": 8.722851753234863, "learning_rate": 9.395578068314053e-05, "loss": 0.8749, "step": 5166 }, { "epoch": 0.3500914696117623, "grad_norm": 6.8031005859375, "learning_rate": 9.395441166404271e-05, "loss": 0.8755, "step": 5167 }, { "epoch": 0.3501592248797344, "grad_norm": 5.449770450592041, "learning_rate": 9.39530426449449e-05, "loss": 0.8813, "step": 5168 }, { "epoch": 0.3502269801477065, "grad_norm": 7.74420690536499, "learning_rate": 9.395167362584707e-05, "loss": 0.8361, "step": 5169 }, { "epoch": 0.35029473541567857, "grad_norm": 7.8701677322387695, "learning_rate": 9.395030460674927e-05, "loss": 0.9294, "step": 5170 }, { "epoch": 0.35036249068365066, "grad_norm": 5.897029876708984, "learning_rate": 9.394893558765145e-05, "loss": 0.7739, "step": 5171 }, { "epoch": 0.35043024595162275, "grad_norm": 6.7868781089782715, "learning_rate": 9.394756656855363e-05, "loss": 0.7252, "step": 5172 }, { "epoch": 0.35049800121959485, "grad_norm": 10.289764404296875, "learning_rate": 9.394619754945582e-05, "loss": 1.0943, "step": 5173 }, { "epoch": 0.3505657564875669, "grad_norm": 7.676051616668701, "learning_rate": 9.3944828530358e-05, "loss": 0.881, "step": 5174 }, { "epoch": 0.350633511755539, "grad_norm": 7.702653408050537, "learning_rate": 9.394345951126018e-05, "loss": 1.0346, "step": 5175 }, { "epoch": 0.35070126702351107, "grad_norm": 7.525374889373779, "learning_rate": 9.394209049216238e-05, "loss": 0.9955, "step": 5176 }, { "epoch": 0.35076902229148316, "grad_norm": 7.942657470703125, "learning_rate": 9.394072147306456e-05, "loss": 0.9715, "step": 5177 }, { "epoch": 0.35083677755945525, "grad_norm": 9.328110694885254, "learning_rate": 9.393935245396674e-05, "loss": 1.0753, "step": 5178 }, { "epoch": 0.35090453282742734, "grad_norm": 8.149508476257324, "learning_rate": 9.393798343486893e-05, "loss": 0.9818, "step": 5179 }, { "epoch": 0.35097228809539943, "grad_norm": 6.882593154907227, "learning_rate": 9.393661441577111e-05, "loss": 1.105, "step": 5180 }, { "epoch": 0.3510400433633715, "grad_norm": 7.821188926696777, "learning_rate": 9.393524539667329e-05, "loss": 0.8106, "step": 5181 }, { "epoch": 0.35110779863134356, "grad_norm": 7.235447406768799, "learning_rate": 9.393387637757547e-05, "loss": 0.9304, "step": 5182 }, { "epoch": 0.35117555389931565, "grad_norm": 6.859196662902832, "learning_rate": 9.393250735847765e-05, "loss": 0.7797, "step": 5183 }, { "epoch": 0.35124330916728774, "grad_norm": 8.33879566192627, "learning_rate": 9.393113833937985e-05, "loss": 1.0075, "step": 5184 }, { "epoch": 0.35131106443525983, "grad_norm": 6.733922004699707, "learning_rate": 9.392976932028203e-05, "loss": 0.7304, "step": 5185 }, { "epoch": 0.3513788197032319, "grad_norm": 8.839714050292969, "learning_rate": 9.392840030118421e-05, "loss": 0.8852, "step": 5186 }, { "epoch": 0.351446574971204, "grad_norm": 9.954099655151367, "learning_rate": 9.392703128208639e-05, "loss": 1.1631, "step": 5187 }, { "epoch": 0.3515143302391761, "grad_norm": 8.042675971984863, "learning_rate": 9.392566226298858e-05, "loss": 0.8741, "step": 5188 }, { "epoch": 0.3515820855071482, "grad_norm": 6.661304950714111, "learning_rate": 9.392429324389076e-05, "loss": 0.8879, "step": 5189 }, { "epoch": 0.3516498407751203, "grad_norm": 6.568962097167969, "learning_rate": 9.392292422479294e-05, "loss": 0.6823, "step": 5190 }, { "epoch": 0.35171759604309233, "grad_norm": 6.836343288421631, "learning_rate": 9.392155520569512e-05, "loss": 0.8174, "step": 5191 }, { "epoch": 0.3517853513110644, "grad_norm": 7.555830478668213, "learning_rate": 9.39201861865973e-05, "loss": 0.803, "step": 5192 }, { "epoch": 0.3518531065790365, "grad_norm": 7.264036178588867, "learning_rate": 9.39188171674995e-05, "loss": 1.0786, "step": 5193 }, { "epoch": 0.3519208618470086, "grad_norm": 5.6876420974731445, "learning_rate": 9.391744814840168e-05, "loss": 0.7553, "step": 5194 }, { "epoch": 0.3519886171149807, "grad_norm": 8.890271186828613, "learning_rate": 9.391607912930386e-05, "loss": 1.0685, "step": 5195 }, { "epoch": 0.3520563723829528, "grad_norm": 6.010801315307617, "learning_rate": 9.391471011020604e-05, "loss": 0.7915, "step": 5196 }, { "epoch": 0.3521241276509249, "grad_norm": 8.496731758117676, "learning_rate": 9.391334109110822e-05, "loss": 1.2103, "step": 5197 }, { "epoch": 0.35219188291889697, "grad_norm": 8.196046829223633, "learning_rate": 9.391197207201041e-05, "loss": 1.0173, "step": 5198 }, { "epoch": 0.352259638186869, "grad_norm": 8.809300422668457, "learning_rate": 9.391060305291259e-05, "loss": 0.906, "step": 5199 }, { "epoch": 0.3523273934548411, "grad_norm": 8.437650680541992, "learning_rate": 9.390923403381477e-05, "loss": 0.8865, "step": 5200 }, { "epoch": 0.3523951487228132, "grad_norm": 8.659364700317383, "learning_rate": 9.390786501471695e-05, "loss": 0.8878, "step": 5201 }, { "epoch": 0.3524629039907853, "grad_norm": 6.658010482788086, "learning_rate": 9.390649599561915e-05, "loss": 0.9141, "step": 5202 }, { "epoch": 0.3525306592587574, "grad_norm": 6.702786445617676, "learning_rate": 9.390512697652133e-05, "loss": 0.8193, "step": 5203 }, { "epoch": 0.35259841452672946, "grad_norm": 8.0852632522583, "learning_rate": 9.390375795742351e-05, "loss": 0.9655, "step": 5204 }, { "epoch": 0.35266616979470156, "grad_norm": 6.598480224609375, "learning_rate": 9.390238893832569e-05, "loss": 0.8858, "step": 5205 }, { "epoch": 0.35273392506267365, "grad_norm": 8.510173797607422, "learning_rate": 9.390101991922787e-05, "loss": 1.0543, "step": 5206 }, { "epoch": 0.3528016803306457, "grad_norm": 7.498827934265137, "learning_rate": 9.389965090013006e-05, "loss": 1.138, "step": 5207 }, { "epoch": 0.3528694355986178, "grad_norm": 6.370652675628662, "learning_rate": 9.389828188103224e-05, "loss": 0.996, "step": 5208 }, { "epoch": 0.35293719086658987, "grad_norm": 6.830577373504639, "learning_rate": 9.389691286193442e-05, "loss": 0.7562, "step": 5209 }, { "epoch": 0.35300494613456196, "grad_norm": 7.01290225982666, "learning_rate": 9.38955438428366e-05, "loss": 0.8141, "step": 5210 }, { "epoch": 0.35307270140253405, "grad_norm": 6.957060813903809, "learning_rate": 9.38941748237388e-05, "loss": 1.1923, "step": 5211 }, { "epoch": 0.35314045667050614, "grad_norm": 5.929959297180176, "learning_rate": 9.389280580464098e-05, "loss": 0.7928, "step": 5212 }, { "epoch": 0.35320821193847823, "grad_norm": 8.654718399047852, "learning_rate": 9.389143678554316e-05, "loss": 1.0896, "step": 5213 }, { "epoch": 0.3532759672064503, "grad_norm": 5.981748580932617, "learning_rate": 9.389006776644534e-05, "loss": 0.9777, "step": 5214 }, { "epoch": 0.35334372247442236, "grad_norm": 6.251374244689941, "learning_rate": 9.388869874734752e-05, "loss": 0.8562, "step": 5215 }, { "epoch": 0.35341147774239445, "grad_norm": 7.049698352813721, "learning_rate": 9.388732972824971e-05, "loss": 0.8297, "step": 5216 }, { "epoch": 0.35347923301036654, "grad_norm": 8.04577350616455, "learning_rate": 9.38859607091519e-05, "loss": 1.2035, "step": 5217 }, { "epoch": 0.35354698827833864, "grad_norm": 6.800088405609131, "learning_rate": 9.388459169005407e-05, "loss": 0.9551, "step": 5218 }, { "epoch": 0.35361474354631073, "grad_norm": 7.368710517883301, "learning_rate": 9.388322267095627e-05, "loss": 0.8174, "step": 5219 }, { "epoch": 0.3536824988142828, "grad_norm": 7.573550701141357, "learning_rate": 9.388185365185845e-05, "loss": 1.0921, "step": 5220 }, { "epoch": 0.3537502540822549, "grad_norm": 8.282757759094238, "learning_rate": 9.388048463276063e-05, "loss": 0.9124, "step": 5221 }, { "epoch": 0.353818009350227, "grad_norm": 8.21078872680664, "learning_rate": 9.387911561366282e-05, "loss": 1.0818, "step": 5222 }, { "epoch": 0.35388576461819904, "grad_norm": 7.401734352111816, "learning_rate": 9.3877746594565e-05, "loss": 0.9906, "step": 5223 }, { "epoch": 0.35395351988617113, "grad_norm": 8.078129768371582, "learning_rate": 9.387637757546718e-05, "loss": 1.1233, "step": 5224 }, { "epoch": 0.3540212751541432, "grad_norm": 7.231998920440674, "learning_rate": 9.387500855636938e-05, "loss": 0.8412, "step": 5225 }, { "epoch": 0.3540890304221153, "grad_norm": 8.709794044494629, "learning_rate": 9.387363953727156e-05, "loss": 0.8346, "step": 5226 }, { "epoch": 0.3541567856900874, "grad_norm": 8.45758056640625, "learning_rate": 9.387227051817374e-05, "loss": 1.0201, "step": 5227 }, { "epoch": 0.3542245409580595, "grad_norm": 5.156687259674072, "learning_rate": 9.387090149907592e-05, "loss": 0.8029, "step": 5228 }, { "epoch": 0.3542922962260316, "grad_norm": 6.2013163566589355, "learning_rate": 9.38695324799781e-05, "loss": 0.8984, "step": 5229 }, { "epoch": 0.3543600514940037, "grad_norm": 7.414056301116943, "learning_rate": 9.386816346088029e-05, "loss": 0.8606, "step": 5230 }, { "epoch": 0.3544278067619757, "grad_norm": 8.167763710021973, "learning_rate": 9.386679444178247e-05, "loss": 0.973, "step": 5231 }, { "epoch": 0.3544955620299478, "grad_norm": 7.290304183959961, "learning_rate": 9.386542542268465e-05, "loss": 0.8988, "step": 5232 }, { "epoch": 0.3545633172979199, "grad_norm": 7.181061744689941, "learning_rate": 9.386405640358683e-05, "loss": 0.8689, "step": 5233 }, { "epoch": 0.354631072565892, "grad_norm": 8.87985610961914, "learning_rate": 9.386268738448903e-05, "loss": 1.0076, "step": 5234 }, { "epoch": 0.3546988278338641, "grad_norm": 6.205500602722168, "learning_rate": 9.386131836539121e-05, "loss": 0.8101, "step": 5235 }, { "epoch": 0.3547665831018362, "grad_norm": 7.425187587738037, "learning_rate": 9.385994934629339e-05, "loss": 0.9122, "step": 5236 }, { "epoch": 0.35483433836980827, "grad_norm": 9.541454315185547, "learning_rate": 9.385858032719557e-05, "loss": 0.9868, "step": 5237 }, { "epoch": 0.35490209363778036, "grad_norm": 6.853835582733154, "learning_rate": 9.385721130809775e-05, "loss": 0.9506, "step": 5238 }, { "epoch": 0.35496984890575245, "grad_norm": 9.07067584991455, "learning_rate": 9.385584228899994e-05, "loss": 0.9146, "step": 5239 }, { "epoch": 0.3550376041737245, "grad_norm": 7.5352373123168945, "learning_rate": 9.385447326990212e-05, "loss": 1.0089, "step": 5240 }, { "epoch": 0.3551053594416966, "grad_norm": 5.626684188842773, "learning_rate": 9.38531042508043e-05, "loss": 0.7057, "step": 5241 }, { "epoch": 0.35517311470966867, "grad_norm": 6.1156768798828125, "learning_rate": 9.385173523170648e-05, "loss": 0.808, "step": 5242 }, { "epoch": 0.35524086997764076, "grad_norm": 7.227247714996338, "learning_rate": 9.385036621260868e-05, "loss": 0.9477, "step": 5243 }, { "epoch": 0.35530862524561285, "grad_norm": 7.034331798553467, "learning_rate": 9.384899719351086e-05, "loss": 0.7929, "step": 5244 }, { "epoch": 0.35537638051358494, "grad_norm": 5.943993091583252, "learning_rate": 9.384762817441304e-05, "loss": 0.8886, "step": 5245 }, { "epoch": 0.35544413578155704, "grad_norm": 7.859194278717041, "learning_rate": 9.384625915531522e-05, "loss": 1.0028, "step": 5246 }, { "epoch": 0.3555118910495291, "grad_norm": 6.8557448387146, "learning_rate": 9.38448901362174e-05, "loss": 0.8946, "step": 5247 }, { "epoch": 0.35557964631750116, "grad_norm": 6.733648777008057, "learning_rate": 9.384352111711959e-05, "loss": 0.9186, "step": 5248 }, { "epoch": 0.35564740158547326, "grad_norm": 5.929673194885254, "learning_rate": 9.384215209802177e-05, "loss": 0.8132, "step": 5249 }, { "epoch": 0.35571515685344535, "grad_norm": 6.680620193481445, "learning_rate": 9.384078307892395e-05, "loss": 0.9197, "step": 5250 }, { "epoch": 0.35578291212141744, "grad_norm": 8.043455123901367, "learning_rate": 9.383941405982613e-05, "loss": 1.0841, "step": 5251 }, { "epoch": 0.35585066738938953, "grad_norm": 9.225475311279297, "learning_rate": 9.383804504072831e-05, "loss": 1.3691, "step": 5252 }, { "epoch": 0.3559184226573616, "grad_norm": 8.588724136352539, "learning_rate": 9.383667602163051e-05, "loss": 1.1282, "step": 5253 }, { "epoch": 0.3559861779253337, "grad_norm": 8.21908950805664, "learning_rate": 9.383530700253269e-05, "loss": 0.7606, "step": 5254 }, { "epoch": 0.3560539331933058, "grad_norm": 6.107995510101318, "learning_rate": 9.383393798343487e-05, "loss": 1.1818, "step": 5255 }, { "epoch": 0.35612168846127784, "grad_norm": 6.612033367156982, "learning_rate": 9.383256896433705e-05, "loss": 0.8892, "step": 5256 }, { "epoch": 0.35618944372924993, "grad_norm": 6.935641288757324, "learning_rate": 9.383119994523924e-05, "loss": 0.8565, "step": 5257 }, { "epoch": 0.356257198997222, "grad_norm": 7.328373908996582, "learning_rate": 9.382983092614142e-05, "loss": 0.9409, "step": 5258 }, { "epoch": 0.3563249542651941, "grad_norm": 7.016412734985352, "learning_rate": 9.38284619070436e-05, "loss": 0.8768, "step": 5259 }, { "epoch": 0.3563927095331662, "grad_norm": 7.122356414794922, "learning_rate": 9.382709288794578e-05, "loss": 0.9048, "step": 5260 }, { "epoch": 0.3564604648011383, "grad_norm": 7.590730667114258, "learning_rate": 9.382572386884796e-05, "loss": 0.8154, "step": 5261 }, { "epoch": 0.3565282200691104, "grad_norm": 5.5446858406066895, "learning_rate": 9.382435484975016e-05, "loss": 0.8783, "step": 5262 }, { "epoch": 0.3565959753370825, "grad_norm": 7.153842449188232, "learning_rate": 9.382298583065234e-05, "loss": 1.2412, "step": 5263 }, { "epoch": 0.3566637306050545, "grad_norm": 7.115203857421875, "learning_rate": 9.382161681155452e-05, "loss": 1.0302, "step": 5264 }, { "epoch": 0.3567314858730266, "grad_norm": 5.7724833488464355, "learning_rate": 9.382024779245671e-05, "loss": 0.5839, "step": 5265 }, { "epoch": 0.3567992411409987, "grad_norm": 7.793944358825684, "learning_rate": 9.381887877335889e-05, "loss": 1.0893, "step": 5266 }, { "epoch": 0.3568669964089708, "grad_norm": 6.561145782470703, "learning_rate": 9.381750975426107e-05, "loss": 0.911, "step": 5267 }, { "epoch": 0.3569347516769429, "grad_norm": 6.053153038024902, "learning_rate": 9.381614073516327e-05, "loss": 0.9239, "step": 5268 }, { "epoch": 0.357002506944915, "grad_norm": 7.162718772888184, "learning_rate": 9.381477171606545e-05, "loss": 0.7193, "step": 5269 }, { "epoch": 0.35707026221288707, "grad_norm": 7.634250640869141, "learning_rate": 9.381340269696763e-05, "loss": 0.8826, "step": 5270 }, { "epoch": 0.35713801748085916, "grad_norm": 6.502168655395508, "learning_rate": 9.381203367786982e-05, "loss": 0.8278, "step": 5271 }, { "epoch": 0.3572057727488312, "grad_norm": 7.339065074920654, "learning_rate": 9.3810664658772e-05, "loss": 0.8818, "step": 5272 }, { "epoch": 0.3572735280168033, "grad_norm": 8.079582214355469, "learning_rate": 9.380929563967418e-05, "loss": 0.8596, "step": 5273 }, { "epoch": 0.3573412832847754, "grad_norm": 6.368169784545898, "learning_rate": 9.380792662057636e-05, "loss": 0.7352, "step": 5274 }, { "epoch": 0.35740903855274747, "grad_norm": 7.813303470611572, "learning_rate": 9.380655760147854e-05, "loss": 0.876, "step": 5275 }, { "epoch": 0.35747679382071956, "grad_norm": 8.180051803588867, "learning_rate": 9.380518858238074e-05, "loss": 0.8893, "step": 5276 }, { "epoch": 0.35754454908869165, "grad_norm": 7.618046283721924, "learning_rate": 9.380381956328292e-05, "loss": 0.9647, "step": 5277 }, { "epoch": 0.35761230435666375, "grad_norm": 7.507559776306152, "learning_rate": 9.38024505441851e-05, "loss": 0.9524, "step": 5278 }, { "epoch": 0.35768005962463584, "grad_norm": 9.470221519470215, "learning_rate": 9.380108152508728e-05, "loss": 1.0057, "step": 5279 }, { "epoch": 0.3577478148926079, "grad_norm": 6.612621784210205, "learning_rate": 9.379971250598947e-05, "loss": 0.8489, "step": 5280 }, { "epoch": 0.35781557016057997, "grad_norm": 7.146090030670166, "learning_rate": 9.379834348689165e-05, "loss": 0.9585, "step": 5281 }, { "epoch": 0.35788332542855206, "grad_norm": 6.300119400024414, "learning_rate": 9.379697446779383e-05, "loss": 0.881, "step": 5282 }, { "epoch": 0.35795108069652415, "grad_norm": 7.285689353942871, "learning_rate": 9.379560544869601e-05, "loss": 1.0182, "step": 5283 }, { "epoch": 0.35801883596449624, "grad_norm": 8.943527221679688, "learning_rate": 9.379423642959819e-05, "loss": 0.9782, "step": 5284 }, { "epoch": 0.35808659123246833, "grad_norm": 8.551790237426758, "learning_rate": 9.379286741050039e-05, "loss": 0.8513, "step": 5285 }, { "epoch": 0.3581543465004404, "grad_norm": 8.670862197875977, "learning_rate": 9.379149839140257e-05, "loss": 0.9026, "step": 5286 }, { "epoch": 0.3582221017684125, "grad_norm": 8.31614875793457, "learning_rate": 9.379012937230475e-05, "loss": 0.7666, "step": 5287 }, { "epoch": 0.35828985703638455, "grad_norm": 6.347659111022949, "learning_rate": 9.378876035320693e-05, "loss": 0.953, "step": 5288 }, { "epoch": 0.35835761230435664, "grad_norm": 9.098658561706543, "learning_rate": 9.378739133410912e-05, "loss": 1.3235, "step": 5289 }, { "epoch": 0.35842536757232873, "grad_norm": 7.218830585479736, "learning_rate": 9.37860223150113e-05, "loss": 1.0008, "step": 5290 }, { "epoch": 0.3584931228403008, "grad_norm": 7.346166133880615, "learning_rate": 9.378465329591348e-05, "loss": 0.8441, "step": 5291 }, { "epoch": 0.3585608781082729, "grad_norm": 6.566136360168457, "learning_rate": 9.378328427681566e-05, "loss": 1.0381, "step": 5292 }, { "epoch": 0.358628633376245, "grad_norm": 7.599377632141113, "learning_rate": 9.378191525771784e-05, "loss": 0.8131, "step": 5293 }, { "epoch": 0.3586963886442171, "grad_norm": 6.4648284912109375, "learning_rate": 9.378054623862004e-05, "loss": 0.9599, "step": 5294 }, { "epoch": 0.3587641439121892, "grad_norm": 6.155094146728516, "learning_rate": 9.377917721952222e-05, "loss": 1.0911, "step": 5295 }, { "epoch": 0.3588318991801613, "grad_norm": 5.981008529663086, "learning_rate": 9.37778082004244e-05, "loss": 0.7692, "step": 5296 }, { "epoch": 0.3588996544481333, "grad_norm": 7.424664497375488, "learning_rate": 9.377643918132658e-05, "loss": 0.9248, "step": 5297 }, { "epoch": 0.3589674097161054, "grad_norm": 8.310460090637207, "learning_rate": 9.377507016222877e-05, "loss": 0.9659, "step": 5298 }, { "epoch": 0.3590351649840775, "grad_norm": 7.189448833465576, "learning_rate": 9.377370114313095e-05, "loss": 0.9844, "step": 5299 }, { "epoch": 0.3591029202520496, "grad_norm": 6.688578128814697, "learning_rate": 9.377233212403313e-05, "loss": 0.8488, "step": 5300 }, { "epoch": 0.3591706755200217, "grad_norm": 8.917186737060547, "learning_rate": 9.377096310493531e-05, "loss": 1.0579, "step": 5301 }, { "epoch": 0.3592384307879938, "grad_norm": 8.004783630371094, "learning_rate": 9.37695940858375e-05, "loss": 0.8347, "step": 5302 }, { "epoch": 0.35930618605596587, "grad_norm": 8.461618423461914, "learning_rate": 9.376822506673969e-05, "loss": 1.1304, "step": 5303 }, { "epoch": 0.35937394132393796, "grad_norm": 7.35521936416626, "learning_rate": 9.376685604764187e-05, "loss": 0.8222, "step": 5304 }, { "epoch": 0.35944169659191, "grad_norm": 7.697165012359619, "learning_rate": 9.376548702854405e-05, "loss": 0.8885, "step": 5305 }, { "epoch": 0.3595094518598821, "grad_norm": 8.730046272277832, "learning_rate": 9.376411800944623e-05, "loss": 0.9201, "step": 5306 }, { "epoch": 0.3595772071278542, "grad_norm": 7.738508701324463, "learning_rate": 9.376274899034841e-05, "loss": 0.828, "step": 5307 }, { "epoch": 0.3596449623958263, "grad_norm": 6.961246013641357, "learning_rate": 9.37613799712506e-05, "loss": 1.2298, "step": 5308 }, { "epoch": 0.35971271766379836, "grad_norm": 7.723383903503418, "learning_rate": 9.376001095215278e-05, "loss": 0.9714, "step": 5309 }, { "epoch": 0.35978047293177046, "grad_norm": 6.885270595550537, "learning_rate": 9.375864193305496e-05, "loss": 0.8352, "step": 5310 }, { "epoch": 0.35984822819974255, "grad_norm": 8.623848915100098, "learning_rate": 9.375727291395716e-05, "loss": 0.7216, "step": 5311 }, { "epoch": 0.35991598346771464, "grad_norm": 7.042871952056885, "learning_rate": 9.375590389485934e-05, "loss": 0.8098, "step": 5312 }, { "epoch": 0.3599837387356867, "grad_norm": 6.125864505767822, "learning_rate": 9.375453487576152e-05, "loss": 0.7628, "step": 5313 }, { "epoch": 0.36005149400365877, "grad_norm": 7.560012340545654, "learning_rate": 9.375316585666371e-05, "loss": 0.9114, "step": 5314 }, { "epoch": 0.36011924927163086, "grad_norm": 6.125675678253174, "learning_rate": 9.375179683756589e-05, "loss": 0.7894, "step": 5315 }, { "epoch": 0.36018700453960295, "grad_norm": 7.076975345611572, "learning_rate": 9.375042781846807e-05, "loss": 1.1084, "step": 5316 }, { "epoch": 0.36025475980757504, "grad_norm": 6.712325096130371, "learning_rate": 9.374905879937027e-05, "loss": 0.9326, "step": 5317 }, { "epoch": 0.36032251507554713, "grad_norm": 5.849967956542969, "learning_rate": 9.374768978027245e-05, "loss": 0.7884, "step": 5318 }, { "epoch": 0.3603902703435192, "grad_norm": 7.5400614738464355, "learning_rate": 9.374632076117463e-05, "loss": 0.9844, "step": 5319 }, { "epoch": 0.3604580256114913, "grad_norm": 5.720027446746826, "learning_rate": 9.374495174207681e-05, "loss": 0.8233, "step": 5320 }, { "epoch": 0.36052578087946335, "grad_norm": 6.872372627258301, "learning_rate": 9.3743582722979e-05, "loss": 0.6808, "step": 5321 }, { "epoch": 0.36059353614743545, "grad_norm": 6.98433780670166, "learning_rate": 9.374221370388118e-05, "loss": 0.642, "step": 5322 }, { "epoch": 0.36066129141540754, "grad_norm": 7.231049537658691, "learning_rate": 9.374084468478336e-05, "loss": 1.1379, "step": 5323 }, { "epoch": 0.36072904668337963, "grad_norm": 8.301416397094727, "learning_rate": 9.373947566568554e-05, "loss": 0.9184, "step": 5324 }, { "epoch": 0.3607968019513517, "grad_norm": 6.9752068519592285, "learning_rate": 9.373810664658772e-05, "loss": 0.9984, "step": 5325 }, { "epoch": 0.3608645572193238, "grad_norm": 6.826324939727783, "learning_rate": 9.373673762748992e-05, "loss": 0.8923, "step": 5326 }, { "epoch": 0.3609323124872959, "grad_norm": 6.605769634246826, "learning_rate": 9.37353686083921e-05, "loss": 0.8809, "step": 5327 }, { "epoch": 0.361000067755268, "grad_norm": 7.791666030883789, "learning_rate": 9.373399958929428e-05, "loss": 0.9199, "step": 5328 }, { "epoch": 0.36106782302324003, "grad_norm": 7.015176773071289, "learning_rate": 9.373263057019646e-05, "loss": 0.8271, "step": 5329 }, { "epoch": 0.3611355782912121, "grad_norm": 6.63329553604126, "learning_rate": 9.373126155109864e-05, "loss": 0.9052, "step": 5330 }, { "epoch": 0.3612033335591842, "grad_norm": 9.516122817993164, "learning_rate": 9.372989253200083e-05, "loss": 1.1517, "step": 5331 }, { "epoch": 0.3612710888271563, "grad_norm": 7.7722978591918945, "learning_rate": 9.372852351290301e-05, "loss": 1.0195, "step": 5332 }, { "epoch": 0.3613388440951284, "grad_norm": 7.2577338218688965, "learning_rate": 9.372715449380519e-05, "loss": 0.9093, "step": 5333 }, { "epoch": 0.3614065993631005, "grad_norm": 6.668231010437012, "learning_rate": 9.372578547470737e-05, "loss": 0.7677, "step": 5334 }, { "epoch": 0.3614743546310726, "grad_norm": 8.366500854492188, "learning_rate": 9.372441645560957e-05, "loss": 1.0869, "step": 5335 }, { "epoch": 0.3615421098990447, "grad_norm": 7.495104789733887, "learning_rate": 9.372304743651175e-05, "loss": 0.8074, "step": 5336 }, { "epoch": 0.3616098651670167, "grad_norm": 6.546140670776367, "learning_rate": 9.372167841741393e-05, "loss": 0.8792, "step": 5337 }, { "epoch": 0.3616776204349888, "grad_norm": 7.664621353149414, "learning_rate": 9.372030939831611e-05, "loss": 0.8177, "step": 5338 }, { "epoch": 0.3617453757029609, "grad_norm": 7.482497215270996, "learning_rate": 9.371894037921829e-05, "loss": 0.7624, "step": 5339 }, { "epoch": 0.361813130970933, "grad_norm": 11.148695945739746, "learning_rate": 9.371757136012048e-05, "loss": 1.0792, "step": 5340 }, { "epoch": 0.3618808862389051, "grad_norm": 9.08121109008789, "learning_rate": 9.371620234102266e-05, "loss": 1.0027, "step": 5341 }, { "epoch": 0.36194864150687717, "grad_norm": 8.424308776855469, "learning_rate": 9.371483332192484e-05, "loss": 1.2137, "step": 5342 }, { "epoch": 0.36201639677484926, "grad_norm": 8.55742359161377, "learning_rate": 9.371346430282702e-05, "loss": 0.8023, "step": 5343 }, { "epoch": 0.36208415204282135, "grad_norm": 7.197443008422852, "learning_rate": 9.371209528372922e-05, "loss": 1.0458, "step": 5344 }, { "epoch": 0.36215190731079344, "grad_norm": 8.317964553833008, "learning_rate": 9.37107262646314e-05, "loss": 0.9829, "step": 5345 }, { "epoch": 0.3622196625787655, "grad_norm": 8.819951057434082, "learning_rate": 9.370935724553358e-05, "loss": 1.0024, "step": 5346 }, { "epoch": 0.36228741784673757, "grad_norm": 6.795368671417236, "learning_rate": 9.370798822643576e-05, "loss": 1.0216, "step": 5347 }, { "epoch": 0.36235517311470966, "grad_norm": 5.370431900024414, "learning_rate": 9.370661920733794e-05, "loss": 0.8693, "step": 5348 }, { "epoch": 0.36242292838268175, "grad_norm": 7.330100059509277, "learning_rate": 9.370525018824013e-05, "loss": 1.0576, "step": 5349 }, { "epoch": 0.36249068365065384, "grad_norm": 6.56719446182251, "learning_rate": 9.370388116914231e-05, "loss": 0.8367, "step": 5350 }, { "epoch": 0.36255843891862594, "grad_norm": 7.462427616119385, "learning_rate": 9.370251215004449e-05, "loss": 0.8284, "step": 5351 }, { "epoch": 0.362626194186598, "grad_norm": 10.382340431213379, "learning_rate": 9.370114313094667e-05, "loss": 1.1184, "step": 5352 }, { "epoch": 0.3626939494545701, "grad_norm": 6.872713088989258, "learning_rate": 9.369977411184885e-05, "loss": 0.9656, "step": 5353 }, { "epoch": 0.36276170472254216, "grad_norm": 6.2565741539001465, "learning_rate": 9.369840509275105e-05, "loss": 0.9206, "step": 5354 }, { "epoch": 0.36282945999051425, "grad_norm": 8.036307334899902, "learning_rate": 9.369703607365323e-05, "loss": 1.2556, "step": 5355 }, { "epoch": 0.36289721525848634, "grad_norm": 7.401971340179443, "learning_rate": 9.369566705455541e-05, "loss": 0.9584, "step": 5356 }, { "epoch": 0.36296497052645843, "grad_norm": 7.890522003173828, "learning_rate": 9.369429803545759e-05, "loss": 1.0172, "step": 5357 }, { "epoch": 0.3630327257944305, "grad_norm": 7.4986348152160645, "learning_rate": 9.369292901635978e-05, "loss": 0.9985, "step": 5358 }, { "epoch": 0.3631004810624026, "grad_norm": 6.275259494781494, "learning_rate": 9.369155999726196e-05, "loss": 0.9051, "step": 5359 }, { "epoch": 0.3631682363303747, "grad_norm": 6.2667365074157715, "learning_rate": 9.369019097816416e-05, "loss": 0.9137, "step": 5360 }, { "epoch": 0.3632359915983468, "grad_norm": 7.5247483253479, "learning_rate": 9.368882195906634e-05, "loss": 0.9157, "step": 5361 }, { "epoch": 0.36330374686631883, "grad_norm": 8.757854461669922, "learning_rate": 9.368745293996852e-05, "loss": 1.2596, "step": 5362 }, { "epoch": 0.3633715021342909, "grad_norm": 7.257226943969727, "learning_rate": 9.368608392087071e-05, "loss": 0.8211, "step": 5363 }, { "epoch": 0.363439257402263, "grad_norm": 6.194060325622559, "learning_rate": 9.368471490177289e-05, "loss": 0.8816, "step": 5364 }, { "epoch": 0.3635070126702351, "grad_norm": 10.445760726928711, "learning_rate": 9.368334588267507e-05, "loss": 0.858, "step": 5365 }, { "epoch": 0.3635747679382072, "grad_norm": 6.156910419464111, "learning_rate": 9.368197686357725e-05, "loss": 1.0348, "step": 5366 }, { "epoch": 0.3636425232061793, "grad_norm": 6.490479946136475, "learning_rate": 9.368060784447945e-05, "loss": 0.8834, "step": 5367 }, { "epoch": 0.3637102784741514, "grad_norm": 6.535744667053223, "learning_rate": 9.367923882538163e-05, "loss": 0.6612, "step": 5368 }, { "epoch": 0.3637780337421235, "grad_norm": 8.068840980529785, "learning_rate": 9.36778698062838e-05, "loss": 1.0348, "step": 5369 }, { "epoch": 0.3638457890100955, "grad_norm": 6.666348934173584, "learning_rate": 9.367650078718599e-05, "loss": 0.8346, "step": 5370 }, { "epoch": 0.3639135442780676, "grad_norm": 7.482582092285156, "learning_rate": 9.367513176808817e-05, "loss": 0.7356, "step": 5371 }, { "epoch": 0.3639812995460397, "grad_norm": 6.928842067718506, "learning_rate": 9.367376274899036e-05, "loss": 0.7963, "step": 5372 }, { "epoch": 0.3640490548140118, "grad_norm": 6.574978351593018, "learning_rate": 9.367239372989254e-05, "loss": 0.7893, "step": 5373 }, { "epoch": 0.3641168100819839, "grad_norm": 6.267110824584961, "learning_rate": 9.367102471079472e-05, "loss": 0.6282, "step": 5374 }, { "epoch": 0.36418456534995597, "grad_norm": 6.368738651275635, "learning_rate": 9.36696556916969e-05, "loss": 0.8983, "step": 5375 }, { "epoch": 0.36425232061792806, "grad_norm": 7.211498737335205, "learning_rate": 9.36682866725991e-05, "loss": 0.9471, "step": 5376 }, { "epoch": 0.36432007588590015, "grad_norm": 6.124608516693115, "learning_rate": 9.366691765350128e-05, "loss": 0.8649, "step": 5377 }, { "epoch": 0.3643878311538722, "grad_norm": 7.325745582580566, "learning_rate": 9.366554863440346e-05, "loss": 0.9811, "step": 5378 }, { "epoch": 0.3644555864218443, "grad_norm": 5.563783645629883, "learning_rate": 9.366417961530564e-05, "loss": 0.833, "step": 5379 }, { "epoch": 0.36452334168981637, "grad_norm": 7.581454753875732, "learning_rate": 9.366281059620782e-05, "loss": 0.9078, "step": 5380 }, { "epoch": 0.36459109695778846, "grad_norm": 7.746644020080566, "learning_rate": 9.366144157711001e-05, "loss": 0.9143, "step": 5381 }, { "epoch": 0.36465885222576055, "grad_norm": 7.729243278503418, "learning_rate": 9.366007255801219e-05, "loss": 0.856, "step": 5382 }, { "epoch": 0.36472660749373265, "grad_norm": 7.337301731109619, "learning_rate": 9.365870353891437e-05, "loss": 0.9059, "step": 5383 }, { "epoch": 0.36479436276170474, "grad_norm": 7.622936248779297, "learning_rate": 9.365733451981655e-05, "loss": 1.0493, "step": 5384 }, { "epoch": 0.36486211802967683, "grad_norm": 7.159453392028809, "learning_rate": 9.365596550071873e-05, "loss": 0.824, "step": 5385 }, { "epoch": 0.36492987329764887, "grad_norm": 7.111423492431641, "learning_rate": 9.365459648162093e-05, "loss": 0.7201, "step": 5386 }, { "epoch": 0.36499762856562096, "grad_norm": 8.7478666305542, "learning_rate": 9.365322746252311e-05, "loss": 1.2497, "step": 5387 }, { "epoch": 0.36506538383359305, "grad_norm": 9.522677421569824, "learning_rate": 9.365185844342529e-05, "loss": 0.9175, "step": 5388 }, { "epoch": 0.36513313910156514, "grad_norm": 7.881449222564697, "learning_rate": 9.365048942432747e-05, "loss": 1.1123, "step": 5389 }, { "epoch": 0.36520089436953723, "grad_norm": 6.60174560546875, "learning_rate": 9.364912040522966e-05, "loss": 0.9148, "step": 5390 }, { "epoch": 0.3652686496375093, "grad_norm": 7.549520492553711, "learning_rate": 9.364775138613184e-05, "loss": 0.991, "step": 5391 }, { "epoch": 0.3653364049054814, "grad_norm": 7.287946701049805, "learning_rate": 9.364638236703402e-05, "loss": 0.984, "step": 5392 }, { "epoch": 0.3654041601734535, "grad_norm": 9.351056098937988, "learning_rate": 9.36450133479362e-05, "loss": 1.1052, "step": 5393 }, { "epoch": 0.36547191544142554, "grad_norm": 8.443605422973633, "learning_rate": 9.364364432883838e-05, "loss": 0.941, "step": 5394 }, { "epoch": 0.36553967070939763, "grad_norm": 6.416137218475342, "learning_rate": 9.364227530974058e-05, "loss": 0.9359, "step": 5395 }, { "epoch": 0.3656074259773697, "grad_norm": 7.125934600830078, "learning_rate": 9.364090629064276e-05, "loss": 1.0006, "step": 5396 }, { "epoch": 0.3656751812453418, "grad_norm": 8.161697387695312, "learning_rate": 9.363953727154494e-05, "loss": 0.8284, "step": 5397 }, { "epoch": 0.3657429365133139, "grad_norm": 6.955849647521973, "learning_rate": 9.363816825244712e-05, "loss": 0.7467, "step": 5398 }, { "epoch": 0.365810691781286, "grad_norm": 6.489741325378418, "learning_rate": 9.363679923334931e-05, "loss": 0.7611, "step": 5399 }, { "epoch": 0.3658784470492581, "grad_norm": 7.332844257354736, "learning_rate": 9.363543021425149e-05, "loss": 0.7666, "step": 5400 }, { "epoch": 0.3659462023172302, "grad_norm": 9.001418113708496, "learning_rate": 9.363406119515367e-05, "loss": 1.0219, "step": 5401 }, { "epoch": 0.3660139575852023, "grad_norm": 8.972160339355469, "learning_rate": 9.363269217605585e-05, "loss": 1.0122, "step": 5402 }, { "epoch": 0.3660817128531743, "grad_norm": 7.487551212310791, "learning_rate": 9.363132315695803e-05, "loss": 1.2551, "step": 5403 }, { "epoch": 0.3661494681211464, "grad_norm": 5.9293904304504395, "learning_rate": 9.362995413786023e-05, "loss": 0.7594, "step": 5404 }, { "epoch": 0.3662172233891185, "grad_norm": 7.69260311126709, "learning_rate": 9.362858511876241e-05, "loss": 0.9297, "step": 5405 }, { "epoch": 0.3662849786570906, "grad_norm": 6.723849296569824, "learning_rate": 9.362721609966459e-05, "loss": 1.0034, "step": 5406 }, { "epoch": 0.3663527339250627, "grad_norm": 8.467827796936035, "learning_rate": 9.362584708056678e-05, "loss": 0.9831, "step": 5407 }, { "epoch": 0.36642048919303477, "grad_norm": 6.563052654266357, "learning_rate": 9.362447806146896e-05, "loss": 0.8464, "step": 5408 }, { "epoch": 0.36648824446100686, "grad_norm": 7.124545574188232, "learning_rate": 9.362310904237114e-05, "loss": 1.1, "step": 5409 }, { "epoch": 0.36655599972897895, "grad_norm": 8.016179084777832, "learning_rate": 9.362174002327334e-05, "loss": 0.9962, "step": 5410 }, { "epoch": 0.366623754996951, "grad_norm": 5.937708854675293, "learning_rate": 9.362037100417552e-05, "loss": 0.5908, "step": 5411 }, { "epoch": 0.3666915102649231, "grad_norm": 6.945435523986816, "learning_rate": 9.36190019850777e-05, "loss": 0.7576, "step": 5412 }, { "epoch": 0.3667592655328952, "grad_norm": 6.756751537322998, "learning_rate": 9.361763296597989e-05, "loss": 0.7966, "step": 5413 }, { "epoch": 0.36682702080086727, "grad_norm": 6.434855937957764, "learning_rate": 9.361626394688207e-05, "loss": 0.8401, "step": 5414 }, { "epoch": 0.36689477606883936, "grad_norm": 6.420316696166992, "learning_rate": 9.361489492778425e-05, "loss": 0.8031, "step": 5415 }, { "epoch": 0.36696253133681145, "grad_norm": 5.286546230316162, "learning_rate": 9.361352590868643e-05, "loss": 0.6665, "step": 5416 }, { "epoch": 0.36703028660478354, "grad_norm": 8.99885368347168, "learning_rate": 9.361215688958861e-05, "loss": 0.7545, "step": 5417 }, { "epoch": 0.36709804187275563, "grad_norm": 7.546742916107178, "learning_rate": 9.36107878704908e-05, "loss": 1.0391, "step": 5418 }, { "epoch": 0.36716579714072767, "grad_norm": 6.452430248260498, "learning_rate": 9.360941885139299e-05, "loss": 0.8458, "step": 5419 }, { "epoch": 0.36723355240869976, "grad_norm": 7.125503063201904, "learning_rate": 9.360804983229517e-05, "loss": 0.8061, "step": 5420 }, { "epoch": 0.36730130767667185, "grad_norm": 7.046570301055908, "learning_rate": 9.360668081319735e-05, "loss": 1.0968, "step": 5421 }, { "epoch": 0.36736906294464394, "grad_norm": 7.032671928405762, "learning_rate": 9.360531179409954e-05, "loss": 0.9834, "step": 5422 }, { "epoch": 0.36743681821261603, "grad_norm": 8.430721282958984, "learning_rate": 9.360394277500172e-05, "loss": 0.8875, "step": 5423 }, { "epoch": 0.3675045734805881, "grad_norm": 8.67082405090332, "learning_rate": 9.36025737559039e-05, "loss": 0.9981, "step": 5424 }, { "epoch": 0.3675723287485602, "grad_norm": 6.5781707763671875, "learning_rate": 9.360120473680608e-05, "loss": 0.9702, "step": 5425 }, { "epoch": 0.3676400840165323, "grad_norm": 9.087675094604492, "learning_rate": 9.359983571770826e-05, "loss": 1.1141, "step": 5426 }, { "epoch": 0.36770783928450435, "grad_norm": 6.949502468109131, "learning_rate": 9.359846669861046e-05, "loss": 0.9167, "step": 5427 }, { "epoch": 0.36777559455247644, "grad_norm": 9.335396766662598, "learning_rate": 9.359709767951264e-05, "loss": 0.8119, "step": 5428 }, { "epoch": 0.36784334982044853, "grad_norm": 8.041236877441406, "learning_rate": 9.359572866041482e-05, "loss": 1.0112, "step": 5429 }, { "epoch": 0.3679111050884206, "grad_norm": 6.435655117034912, "learning_rate": 9.3594359641317e-05, "loss": 0.9589, "step": 5430 }, { "epoch": 0.3679788603563927, "grad_norm": 8.047952651977539, "learning_rate": 9.359299062221919e-05, "loss": 0.7143, "step": 5431 }, { "epoch": 0.3680466156243648, "grad_norm": 6.986575126647949, "learning_rate": 9.359162160312137e-05, "loss": 0.8857, "step": 5432 }, { "epoch": 0.3681143708923369, "grad_norm": 5.447303771972656, "learning_rate": 9.359025258402355e-05, "loss": 0.7229, "step": 5433 }, { "epoch": 0.368182126160309, "grad_norm": 6.337569713592529, "learning_rate": 9.358888356492573e-05, "loss": 0.8595, "step": 5434 }, { "epoch": 0.368249881428281, "grad_norm": 7.318792819976807, "learning_rate": 9.358751454582791e-05, "loss": 0.8131, "step": 5435 }, { "epoch": 0.3683176366962531, "grad_norm": 6.816128730773926, "learning_rate": 9.35861455267301e-05, "loss": 0.6972, "step": 5436 }, { "epoch": 0.3683853919642252, "grad_norm": 6.886023998260498, "learning_rate": 9.358477650763229e-05, "loss": 0.9075, "step": 5437 }, { "epoch": 0.3684531472321973, "grad_norm": 7.4018049240112305, "learning_rate": 9.358340748853447e-05, "loss": 0.7913, "step": 5438 }, { "epoch": 0.3685209025001694, "grad_norm": 6.976071834564209, "learning_rate": 9.358203846943665e-05, "loss": 0.8306, "step": 5439 }, { "epoch": 0.3685886577681415, "grad_norm": 8.005870819091797, "learning_rate": 9.358066945033883e-05, "loss": 1.0109, "step": 5440 }, { "epoch": 0.3686564130361136, "grad_norm": 8.600711822509766, "learning_rate": 9.357930043124102e-05, "loss": 1.022, "step": 5441 }, { "epoch": 0.36872416830408566, "grad_norm": 9.857340812683105, "learning_rate": 9.35779314121432e-05, "loss": 1.1266, "step": 5442 }, { "epoch": 0.3687919235720577, "grad_norm": 8.064130783081055, "learning_rate": 9.357656239304538e-05, "loss": 0.9224, "step": 5443 }, { "epoch": 0.3688596788400298, "grad_norm": 9.217019081115723, "learning_rate": 9.357519337394756e-05, "loss": 1.0041, "step": 5444 }, { "epoch": 0.3689274341080019, "grad_norm": 7.042741298675537, "learning_rate": 9.357382435484976e-05, "loss": 0.6798, "step": 5445 }, { "epoch": 0.368995189375974, "grad_norm": 6.679181098937988, "learning_rate": 9.357245533575194e-05, "loss": 0.9641, "step": 5446 }, { "epoch": 0.36906294464394607, "grad_norm": 8.608474731445312, "learning_rate": 9.357108631665412e-05, "loss": 0.8224, "step": 5447 }, { "epoch": 0.36913069991191816, "grad_norm": 8.975968360900879, "learning_rate": 9.35697172975563e-05, "loss": 0.9103, "step": 5448 }, { "epoch": 0.36919845517989025, "grad_norm": 7.249898910522461, "learning_rate": 9.356834827845848e-05, "loss": 1.0178, "step": 5449 }, { "epoch": 0.36926621044786234, "grad_norm": 6.6788787841796875, "learning_rate": 9.356697925936067e-05, "loss": 0.7786, "step": 5450 }, { "epoch": 0.36933396571583443, "grad_norm": 7.834086894989014, "learning_rate": 9.356561024026285e-05, "loss": 1.0282, "step": 5451 }, { "epoch": 0.36940172098380647, "grad_norm": 7.36384391784668, "learning_rate": 9.356424122116503e-05, "loss": 1.0448, "step": 5452 }, { "epoch": 0.36946947625177856, "grad_norm": 7.843540191650391, "learning_rate": 9.356287220206723e-05, "loss": 1.0646, "step": 5453 }, { "epoch": 0.36953723151975065, "grad_norm": 8.009191513061523, "learning_rate": 9.35615031829694e-05, "loss": 1.0279, "step": 5454 }, { "epoch": 0.36960498678772274, "grad_norm": 7.594022274017334, "learning_rate": 9.356013416387159e-05, "loss": 1.0235, "step": 5455 }, { "epoch": 0.36967274205569484, "grad_norm": 6.167095184326172, "learning_rate": 9.355876514477378e-05, "loss": 0.7107, "step": 5456 }, { "epoch": 0.36974049732366693, "grad_norm": 8.07007884979248, "learning_rate": 9.355739612567596e-05, "loss": 0.9667, "step": 5457 }, { "epoch": 0.369808252591639, "grad_norm": 7.6446099281311035, "learning_rate": 9.355602710657814e-05, "loss": 0.9758, "step": 5458 }, { "epoch": 0.3698760078596111, "grad_norm": 7.282294273376465, "learning_rate": 9.355465808748034e-05, "loss": 0.929, "step": 5459 }, { "epoch": 0.36994376312758315, "grad_norm": 9.878494262695312, "learning_rate": 9.355328906838252e-05, "loss": 0.9909, "step": 5460 }, { "epoch": 0.37001151839555524, "grad_norm": 7.331822395324707, "learning_rate": 9.35519200492847e-05, "loss": 0.9693, "step": 5461 }, { "epoch": 0.37007927366352733, "grad_norm": 7.3209404945373535, "learning_rate": 9.355055103018688e-05, "loss": 0.8587, "step": 5462 }, { "epoch": 0.3701470289314994, "grad_norm": 8.261675834655762, "learning_rate": 9.354918201108906e-05, "loss": 0.9391, "step": 5463 }, { "epoch": 0.3702147841994715, "grad_norm": 7.214691162109375, "learning_rate": 9.354781299199125e-05, "loss": 0.8805, "step": 5464 }, { "epoch": 0.3702825394674436, "grad_norm": 7.409287452697754, "learning_rate": 9.354644397289343e-05, "loss": 0.6792, "step": 5465 }, { "epoch": 0.3703502947354157, "grad_norm": 6.368542194366455, "learning_rate": 9.354507495379561e-05, "loss": 0.6464, "step": 5466 }, { "epoch": 0.3704180500033878, "grad_norm": 7.106698513031006, "learning_rate": 9.354370593469779e-05, "loss": 1.0956, "step": 5467 }, { "epoch": 0.3704858052713598, "grad_norm": 7.591500759124756, "learning_rate": 9.354233691559999e-05, "loss": 0.9643, "step": 5468 }, { "epoch": 0.3705535605393319, "grad_norm": 7.255499839782715, "learning_rate": 9.354096789650217e-05, "loss": 0.7782, "step": 5469 }, { "epoch": 0.370621315807304, "grad_norm": 8.414693832397461, "learning_rate": 9.353959887740435e-05, "loss": 0.9433, "step": 5470 }, { "epoch": 0.3706890710752761, "grad_norm": 7.294419765472412, "learning_rate": 9.353822985830653e-05, "loss": 0.9643, "step": 5471 }, { "epoch": 0.3707568263432482, "grad_norm": 7.249752998352051, "learning_rate": 9.353686083920871e-05, "loss": 0.8529, "step": 5472 }, { "epoch": 0.3708245816112203, "grad_norm": 9.345986366271973, "learning_rate": 9.35354918201109e-05, "loss": 0.9641, "step": 5473 }, { "epoch": 0.3708923368791924, "grad_norm": 7.828972816467285, "learning_rate": 9.353412280101308e-05, "loss": 1.1, "step": 5474 }, { "epoch": 0.37096009214716447, "grad_norm": 7.739612579345703, "learning_rate": 9.353275378191526e-05, "loss": 0.8228, "step": 5475 }, { "epoch": 0.3710278474151365, "grad_norm": 7.853143215179443, "learning_rate": 9.353138476281744e-05, "loss": 0.9088, "step": 5476 }, { "epoch": 0.3710956026831086, "grad_norm": 7.996474742889404, "learning_rate": 9.353001574371964e-05, "loss": 0.9291, "step": 5477 }, { "epoch": 0.3711633579510807, "grad_norm": 7.374247074127197, "learning_rate": 9.352864672462182e-05, "loss": 1.0619, "step": 5478 }, { "epoch": 0.3712311132190528, "grad_norm": 8.037210464477539, "learning_rate": 9.3527277705524e-05, "loss": 0.9426, "step": 5479 }, { "epoch": 0.37129886848702487, "grad_norm": 5.515965938568115, "learning_rate": 9.352590868642618e-05, "loss": 0.6838, "step": 5480 }, { "epoch": 0.37136662375499696, "grad_norm": 8.530059814453125, "learning_rate": 9.352453966732836e-05, "loss": 0.9221, "step": 5481 }, { "epoch": 0.37143437902296905, "grad_norm": 8.07000732421875, "learning_rate": 9.352317064823055e-05, "loss": 0.9321, "step": 5482 }, { "epoch": 0.37150213429094114, "grad_norm": 8.79816722869873, "learning_rate": 9.352180162913273e-05, "loss": 1.0875, "step": 5483 }, { "epoch": 0.3715698895589132, "grad_norm": 8.476999282836914, "learning_rate": 9.352043261003491e-05, "loss": 1.0144, "step": 5484 }, { "epoch": 0.37163764482688527, "grad_norm": 5.7845540046691895, "learning_rate": 9.351906359093709e-05, "loss": 0.6802, "step": 5485 }, { "epoch": 0.37170540009485736, "grad_norm": 6.311066627502441, "learning_rate": 9.351769457183929e-05, "loss": 0.7673, "step": 5486 }, { "epoch": 0.37177315536282945, "grad_norm": 6.929514408111572, "learning_rate": 9.351632555274147e-05, "loss": 1.1287, "step": 5487 }, { "epoch": 0.37184091063080155, "grad_norm": 7.7819132804870605, "learning_rate": 9.351495653364365e-05, "loss": 0.8828, "step": 5488 }, { "epoch": 0.37190866589877364, "grad_norm": 7.906317710876465, "learning_rate": 9.351358751454583e-05, "loss": 0.7961, "step": 5489 }, { "epoch": 0.37197642116674573, "grad_norm": 8.190624237060547, "learning_rate": 9.351221849544801e-05, "loss": 0.8106, "step": 5490 }, { "epoch": 0.3720441764347178, "grad_norm": 6.0358734130859375, "learning_rate": 9.35108494763502e-05, "loss": 1.0342, "step": 5491 }, { "epoch": 0.37211193170268986, "grad_norm": 7.096914291381836, "learning_rate": 9.350948045725238e-05, "loss": 0.6652, "step": 5492 }, { "epoch": 0.37217968697066195, "grad_norm": 7.488151550292969, "learning_rate": 9.350811143815456e-05, "loss": 0.9283, "step": 5493 }, { "epoch": 0.37224744223863404, "grad_norm": 7.084420680999756, "learning_rate": 9.350674241905674e-05, "loss": 1.0424, "step": 5494 }, { "epoch": 0.37231519750660613, "grad_norm": 7.483108997344971, "learning_rate": 9.350537339995892e-05, "loss": 1.0657, "step": 5495 }, { "epoch": 0.3723829527745782, "grad_norm": 7.312635898590088, "learning_rate": 9.350400438086112e-05, "loss": 0.7831, "step": 5496 }, { "epoch": 0.3724507080425503, "grad_norm": 6.901159763336182, "learning_rate": 9.35026353617633e-05, "loss": 0.9832, "step": 5497 }, { "epoch": 0.3725184633105224, "grad_norm": 7.787426471710205, "learning_rate": 9.350126634266548e-05, "loss": 0.9112, "step": 5498 }, { "epoch": 0.3725862185784945, "grad_norm": 7.2583746910095215, "learning_rate": 9.349989732356767e-05, "loss": 0.757, "step": 5499 }, { "epoch": 0.37265397384646654, "grad_norm": 7.068667411804199, "learning_rate": 9.349852830446985e-05, "loss": 0.8516, "step": 5500 }, { "epoch": 0.3727217291144386, "grad_norm": 8.109786987304688, "learning_rate": 9.349715928537203e-05, "loss": 0.7925, "step": 5501 }, { "epoch": 0.3727894843824107, "grad_norm": 7.539520740509033, "learning_rate": 9.349579026627423e-05, "loss": 1.283, "step": 5502 }, { "epoch": 0.3728572396503828, "grad_norm": 8.608511924743652, "learning_rate": 9.34944212471764e-05, "loss": 1.2367, "step": 5503 }, { "epoch": 0.3729249949183549, "grad_norm": 7.932346820831299, "learning_rate": 9.349305222807859e-05, "loss": 0.894, "step": 5504 }, { "epoch": 0.372992750186327, "grad_norm": 6.520984172821045, "learning_rate": 9.349168320898078e-05, "loss": 0.884, "step": 5505 }, { "epoch": 0.3730605054542991, "grad_norm": 6.876826286315918, "learning_rate": 9.349031418988296e-05, "loss": 0.9447, "step": 5506 }, { "epoch": 0.3731282607222712, "grad_norm": 8.540080070495605, "learning_rate": 9.348894517078514e-05, "loss": 1.0709, "step": 5507 }, { "epoch": 0.37319601599024327, "grad_norm": 7.760754585266113, "learning_rate": 9.348757615168732e-05, "loss": 1.0666, "step": 5508 }, { "epoch": 0.3732637712582153, "grad_norm": 6.1746134757995605, "learning_rate": 9.348620713258951e-05, "loss": 0.8367, "step": 5509 }, { "epoch": 0.3733315265261874, "grad_norm": 8.053776741027832, "learning_rate": 9.34848381134917e-05, "loss": 1.253, "step": 5510 }, { "epoch": 0.3733992817941595, "grad_norm": 10.046021461486816, "learning_rate": 9.348346909439388e-05, "loss": 0.9086, "step": 5511 }, { "epoch": 0.3734670370621316, "grad_norm": 7.247015953063965, "learning_rate": 9.348210007529606e-05, "loss": 1.3036, "step": 5512 }, { "epoch": 0.37353479233010367, "grad_norm": 6.103085517883301, "learning_rate": 9.348073105619824e-05, "loss": 0.9778, "step": 5513 }, { "epoch": 0.37360254759807576, "grad_norm": 7.330771446228027, "learning_rate": 9.347936203710043e-05, "loss": 0.8335, "step": 5514 }, { "epoch": 0.37367030286604785, "grad_norm": 7.571770668029785, "learning_rate": 9.347799301800261e-05, "loss": 0.9963, "step": 5515 }, { "epoch": 0.37373805813401995, "grad_norm": 8.018990516662598, "learning_rate": 9.347662399890479e-05, "loss": 0.7925, "step": 5516 }, { "epoch": 0.373805813401992, "grad_norm": 7.156185626983643, "learning_rate": 9.347525497980697e-05, "loss": 0.7821, "step": 5517 }, { "epoch": 0.3738735686699641, "grad_norm": 8.189929008483887, "learning_rate": 9.347388596070915e-05, "loss": 0.8997, "step": 5518 }, { "epoch": 0.37394132393793617, "grad_norm": 6.826793670654297, "learning_rate": 9.347251694161135e-05, "loss": 0.7392, "step": 5519 }, { "epoch": 0.37400907920590826, "grad_norm": 7.931643962860107, "learning_rate": 9.347114792251353e-05, "loss": 1.1607, "step": 5520 }, { "epoch": 0.37407683447388035, "grad_norm": 6.973018646240234, "learning_rate": 9.34697789034157e-05, "loss": 0.7419, "step": 5521 }, { "epoch": 0.37414458974185244, "grad_norm": 7.43549919128418, "learning_rate": 9.346840988431789e-05, "loss": 1.0035, "step": 5522 }, { "epoch": 0.37421234500982453, "grad_norm": 6.700629234313965, "learning_rate": 9.346704086522008e-05, "loss": 1.0953, "step": 5523 }, { "epoch": 0.3742801002777966, "grad_norm": 6.039200782775879, "learning_rate": 9.346567184612226e-05, "loss": 0.6512, "step": 5524 }, { "epoch": 0.37434785554576866, "grad_norm": 8.45660400390625, "learning_rate": 9.346430282702444e-05, "loss": 1.0725, "step": 5525 }, { "epoch": 0.37441561081374075, "grad_norm": 8.787965774536133, "learning_rate": 9.346293380792662e-05, "loss": 1.0101, "step": 5526 }, { "epoch": 0.37448336608171284, "grad_norm": 8.229619026184082, "learning_rate": 9.34615647888288e-05, "loss": 1.2651, "step": 5527 }, { "epoch": 0.37455112134968493, "grad_norm": 6.913321018218994, "learning_rate": 9.3460195769731e-05, "loss": 0.9196, "step": 5528 }, { "epoch": 0.374618876617657, "grad_norm": 5.944606781005859, "learning_rate": 9.345882675063318e-05, "loss": 0.857, "step": 5529 }, { "epoch": 0.3746866318856291, "grad_norm": 9.863933563232422, "learning_rate": 9.345745773153536e-05, "loss": 1.0566, "step": 5530 }, { "epoch": 0.3747543871536012, "grad_norm": 6.494686603546143, "learning_rate": 9.345608871243754e-05, "loss": 0.8839, "step": 5531 }, { "epoch": 0.3748221424215733, "grad_norm": 6.558131694793701, "learning_rate": 9.345471969333973e-05, "loss": 0.9802, "step": 5532 }, { "epoch": 0.37488989768954534, "grad_norm": 7.316765308380127, "learning_rate": 9.345335067424191e-05, "loss": 1.1635, "step": 5533 }, { "epoch": 0.37495765295751743, "grad_norm": 6.615121841430664, "learning_rate": 9.345198165514409e-05, "loss": 0.919, "step": 5534 }, { "epoch": 0.3750254082254895, "grad_norm": 8.071523666381836, "learning_rate": 9.345061263604627e-05, "loss": 1.0023, "step": 5535 }, { "epoch": 0.3750931634934616, "grad_norm": 7.563895225524902, "learning_rate": 9.344924361694845e-05, "loss": 0.963, "step": 5536 }, { "epoch": 0.3751609187614337, "grad_norm": 6.013591766357422, "learning_rate": 9.344787459785065e-05, "loss": 0.7862, "step": 5537 }, { "epoch": 0.3752286740294058, "grad_norm": 7.296882629394531, "learning_rate": 9.344650557875283e-05, "loss": 1.0106, "step": 5538 }, { "epoch": 0.3752964292973779, "grad_norm": 7.192817211151123, "learning_rate": 9.3445136559655e-05, "loss": 0.8491, "step": 5539 }, { "epoch": 0.37536418456535, "grad_norm": 7.962235927581787, "learning_rate": 9.344376754055719e-05, "loss": 1.0711, "step": 5540 }, { "epoch": 0.375431939833322, "grad_norm": 6.515812873840332, "learning_rate": 9.344239852145937e-05, "loss": 0.8779, "step": 5541 }, { "epoch": 0.3754996951012941, "grad_norm": 8.563304901123047, "learning_rate": 9.344102950236156e-05, "loss": 1.3236, "step": 5542 }, { "epoch": 0.3755674503692662, "grad_norm": 8.214926719665527, "learning_rate": 9.343966048326374e-05, "loss": 0.9231, "step": 5543 }, { "epoch": 0.3756352056372383, "grad_norm": 6.213039398193359, "learning_rate": 9.343829146416592e-05, "loss": 0.8446, "step": 5544 }, { "epoch": 0.3757029609052104, "grad_norm": 6.392467021942139, "learning_rate": 9.343692244506812e-05, "loss": 0.6742, "step": 5545 }, { "epoch": 0.3757707161731825, "grad_norm": 6.8930206298828125, "learning_rate": 9.34355534259703e-05, "loss": 0.9973, "step": 5546 }, { "epoch": 0.37583847144115456, "grad_norm": 8.074743270874023, "learning_rate": 9.343418440687248e-05, "loss": 0.8808, "step": 5547 }, { "epoch": 0.37590622670912666, "grad_norm": 8.443988800048828, "learning_rate": 9.343281538777467e-05, "loss": 1.0066, "step": 5548 }, { "epoch": 0.3759739819770987, "grad_norm": 6.530252933502197, "learning_rate": 9.343144636867685e-05, "loss": 1.0203, "step": 5549 }, { "epoch": 0.3760417372450708, "grad_norm": 6.693957328796387, "learning_rate": 9.343007734957903e-05, "loss": 0.9484, "step": 5550 }, { "epoch": 0.3761094925130429, "grad_norm": 7.103133201599121, "learning_rate": 9.342870833048122e-05, "loss": 1.0442, "step": 5551 }, { "epoch": 0.37617724778101497, "grad_norm": 6.089579105377197, "learning_rate": 9.34273393113834e-05, "loss": 0.8828, "step": 5552 }, { "epoch": 0.37624500304898706, "grad_norm": 5.47523307800293, "learning_rate": 9.342597029228559e-05, "loss": 0.8302, "step": 5553 }, { "epoch": 0.37631275831695915, "grad_norm": 7.930117130279541, "learning_rate": 9.342460127318777e-05, "loss": 0.9565, "step": 5554 }, { "epoch": 0.37638051358493124, "grad_norm": 6.38248348236084, "learning_rate": 9.342323225408996e-05, "loss": 0.8815, "step": 5555 }, { "epoch": 0.37644826885290333, "grad_norm": 6.382977485656738, "learning_rate": 9.342186323499214e-05, "loss": 0.8472, "step": 5556 }, { "epoch": 0.3765160241208754, "grad_norm": 6.029202461242676, "learning_rate": 9.342049421589432e-05, "loss": 0.8102, "step": 5557 }, { "epoch": 0.37658377938884746, "grad_norm": 9.764986991882324, "learning_rate": 9.34191251967965e-05, "loss": 0.7955, "step": 5558 }, { "epoch": 0.37665153465681955, "grad_norm": 8.568723678588867, "learning_rate": 9.341775617769868e-05, "loss": 1.0919, "step": 5559 }, { "epoch": 0.37671928992479164, "grad_norm": 13.348160743713379, "learning_rate": 9.341638715860087e-05, "loss": 1.202, "step": 5560 }, { "epoch": 0.37678704519276374, "grad_norm": 7.161466121673584, "learning_rate": 9.341501813950306e-05, "loss": 0.909, "step": 5561 }, { "epoch": 0.37685480046073583, "grad_norm": 8.446759223937988, "learning_rate": 9.341364912040524e-05, "loss": 0.8871, "step": 5562 }, { "epoch": 0.3769225557287079, "grad_norm": 7.228363037109375, "learning_rate": 9.341228010130742e-05, "loss": 0.961, "step": 5563 }, { "epoch": 0.37699031099668, "grad_norm": 7.427947998046875, "learning_rate": 9.341091108220961e-05, "loss": 1.1086, "step": 5564 }, { "epoch": 0.3770580662646521, "grad_norm": 8.64295482635498, "learning_rate": 9.340954206311179e-05, "loss": 0.9684, "step": 5565 }, { "epoch": 0.37712582153262414, "grad_norm": 10.287007331848145, "learning_rate": 9.340817304401397e-05, "loss": 1.0202, "step": 5566 }, { "epoch": 0.37719357680059623, "grad_norm": 7.803440570831299, "learning_rate": 9.340680402491615e-05, "loss": 0.8545, "step": 5567 }, { "epoch": 0.3772613320685683, "grad_norm": 7.74205207824707, "learning_rate": 9.340543500581833e-05, "loss": 1.0585, "step": 5568 }, { "epoch": 0.3773290873365404, "grad_norm": 6.5861053466796875, "learning_rate": 9.340406598672053e-05, "loss": 0.8838, "step": 5569 }, { "epoch": 0.3773968426045125, "grad_norm": 7.1810431480407715, "learning_rate": 9.34026969676227e-05, "loss": 0.8669, "step": 5570 }, { "epoch": 0.3774645978724846, "grad_norm": 7.8471808433532715, "learning_rate": 9.340132794852489e-05, "loss": 1.0412, "step": 5571 }, { "epoch": 0.3775323531404567, "grad_norm": 7.740540027618408, "learning_rate": 9.339995892942707e-05, "loss": 0.944, "step": 5572 }, { "epoch": 0.3776001084084288, "grad_norm": 7.580658912658691, "learning_rate": 9.339858991032925e-05, "loss": 0.8676, "step": 5573 }, { "epoch": 0.3776678636764008, "grad_norm": 9.143624305725098, "learning_rate": 9.339722089123144e-05, "loss": 0.9752, "step": 5574 }, { "epoch": 0.3777356189443729, "grad_norm": 8.142240524291992, "learning_rate": 9.339585187213362e-05, "loss": 0.9733, "step": 5575 }, { "epoch": 0.377803374212345, "grad_norm": 8.034200668334961, "learning_rate": 9.33944828530358e-05, "loss": 0.9867, "step": 5576 }, { "epoch": 0.3778711294803171, "grad_norm": 7.345114231109619, "learning_rate": 9.339311383393798e-05, "loss": 0.9401, "step": 5577 }, { "epoch": 0.3779388847482892, "grad_norm": 7.522977828979492, "learning_rate": 9.339174481484018e-05, "loss": 0.965, "step": 5578 }, { "epoch": 0.3780066400162613, "grad_norm": 6.02646541595459, "learning_rate": 9.339037579574236e-05, "loss": 0.9446, "step": 5579 }, { "epoch": 0.37807439528423337, "grad_norm": 6.161006450653076, "learning_rate": 9.338900677664454e-05, "loss": 0.8803, "step": 5580 }, { "epoch": 0.37814215055220546, "grad_norm": 6.318620681762695, "learning_rate": 9.338763775754672e-05, "loss": 0.646, "step": 5581 }, { "epoch": 0.3782099058201775, "grad_norm": 5.793397903442383, "learning_rate": 9.33862687384489e-05, "loss": 0.7483, "step": 5582 }, { "epoch": 0.3782776610881496, "grad_norm": 7.235496997833252, "learning_rate": 9.338489971935109e-05, "loss": 0.9768, "step": 5583 }, { "epoch": 0.3783454163561217, "grad_norm": 6.682423114776611, "learning_rate": 9.338353070025327e-05, "loss": 0.8466, "step": 5584 }, { "epoch": 0.37841317162409377, "grad_norm": 9.257969856262207, "learning_rate": 9.338216168115545e-05, "loss": 1.1546, "step": 5585 }, { "epoch": 0.37848092689206586, "grad_norm": 7.223050594329834, "learning_rate": 9.338079266205763e-05, "loss": 0.7594, "step": 5586 }, { "epoch": 0.37854868216003795, "grad_norm": 7.735831260681152, "learning_rate": 9.337942364295983e-05, "loss": 1.0392, "step": 5587 }, { "epoch": 0.37861643742801004, "grad_norm": 6.439917087554932, "learning_rate": 9.3378054623862e-05, "loss": 1.0901, "step": 5588 }, { "epoch": 0.37868419269598214, "grad_norm": 7.018877983093262, "learning_rate": 9.337668560476419e-05, "loss": 0.9918, "step": 5589 }, { "epoch": 0.37875194796395417, "grad_norm": 5.858334541320801, "learning_rate": 9.337531658566637e-05, "loss": 0.6788, "step": 5590 }, { "epoch": 0.37881970323192626, "grad_norm": 8.445877075195312, "learning_rate": 9.337394756656856e-05, "loss": 0.9777, "step": 5591 }, { "epoch": 0.37888745849989836, "grad_norm": 6.7996439933776855, "learning_rate": 9.337257854747074e-05, "loss": 1.0086, "step": 5592 }, { "epoch": 0.37895521376787045, "grad_norm": 6.983299255371094, "learning_rate": 9.337120952837292e-05, "loss": 0.9981, "step": 5593 }, { "epoch": 0.37902296903584254, "grad_norm": 7.999194622039795, "learning_rate": 9.336984050927511e-05, "loss": 0.9166, "step": 5594 }, { "epoch": 0.37909072430381463, "grad_norm": 7.698132038116455, "learning_rate": 9.33684714901773e-05, "loss": 0.8333, "step": 5595 }, { "epoch": 0.3791584795717867, "grad_norm": 5.9128217697143555, "learning_rate": 9.336710247107948e-05, "loss": 0.7822, "step": 5596 }, { "epoch": 0.3792262348397588, "grad_norm": 7.200125217437744, "learning_rate": 9.336573345198167e-05, "loss": 0.906, "step": 5597 }, { "epoch": 0.37929399010773085, "grad_norm": 5.82244348526001, "learning_rate": 9.336436443288385e-05, "loss": 0.7376, "step": 5598 }, { "epoch": 0.37936174537570294, "grad_norm": 6.775420188903809, "learning_rate": 9.336299541378603e-05, "loss": 0.6975, "step": 5599 }, { "epoch": 0.37942950064367503, "grad_norm": 6.542332649230957, "learning_rate": 9.336162639468821e-05, "loss": 0.9788, "step": 5600 }, { "epoch": 0.3794972559116471, "grad_norm": 6.677700519561768, "learning_rate": 9.33602573755904e-05, "loss": 0.79, "step": 5601 }, { "epoch": 0.3795650111796192, "grad_norm": 8.561968803405762, "learning_rate": 9.335888835649258e-05, "loss": 0.9499, "step": 5602 }, { "epoch": 0.3796327664475913, "grad_norm": 7.0320234298706055, "learning_rate": 9.335751933739477e-05, "loss": 0.865, "step": 5603 }, { "epoch": 0.3797005217155634, "grad_norm": 7.31481409072876, "learning_rate": 9.335615031829695e-05, "loss": 1.0356, "step": 5604 }, { "epoch": 0.3797682769835355, "grad_norm": 8.205849647521973, "learning_rate": 9.335478129919913e-05, "loss": 0.7836, "step": 5605 }, { "epoch": 0.3798360322515075, "grad_norm": 6.882634162902832, "learning_rate": 9.335341228010132e-05, "loss": 0.8879, "step": 5606 }, { "epoch": 0.3799037875194796, "grad_norm": 8.171550750732422, "learning_rate": 9.33520432610035e-05, "loss": 0.8849, "step": 5607 }, { "epoch": 0.3799715427874517, "grad_norm": 7.184536457061768, "learning_rate": 9.335067424190568e-05, "loss": 0.8206, "step": 5608 }, { "epoch": 0.3800392980554238, "grad_norm": 6.50775146484375, "learning_rate": 9.334930522280786e-05, "loss": 0.7802, "step": 5609 }, { "epoch": 0.3801070533233959, "grad_norm": 7.537467002868652, "learning_rate": 9.334793620371005e-05, "loss": 1.1643, "step": 5610 }, { "epoch": 0.380174808591368, "grad_norm": 6.740983486175537, "learning_rate": 9.334656718461223e-05, "loss": 0.686, "step": 5611 }, { "epoch": 0.3802425638593401, "grad_norm": 6.01100492477417, "learning_rate": 9.334519816551442e-05, "loss": 0.8931, "step": 5612 }, { "epoch": 0.38031031912731217, "grad_norm": 6.056909084320068, "learning_rate": 9.33438291464166e-05, "loss": 0.7861, "step": 5613 }, { "epoch": 0.38037807439528426, "grad_norm": 6.271320343017578, "learning_rate": 9.334246012731878e-05, "loss": 0.8277, "step": 5614 }, { "epoch": 0.3804458296632563, "grad_norm": 5.80530309677124, "learning_rate": 9.334109110822097e-05, "loss": 0.6765, "step": 5615 }, { "epoch": 0.3805135849312284, "grad_norm": 6.031884670257568, "learning_rate": 9.333972208912315e-05, "loss": 0.6482, "step": 5616 }, { "epoch": 0.3805813401992005, "grad_norm": 7.629202842712402, "learning_rate": 9.333835307002533e-05, "loss": 1.0782, "step": 5617 }, { "epoch": 0.38064909546717257, "grad_norm": 8.182783126831055, "learning_rate": 9.333698405092751e-05, "loss": 0.8355, "step": 5618 }, { "epoch": 0.38071685073514466, "grad_norm": 7.317597389221191, "learning_rate": 9.33356150318297e-05, "loss": 0.9035, "step": 5619 }, { "epoch": 0.38078460600311675, "grad_norm": 8.783252716064453, "learning_rate": 9.333424601273189e-05, "loss": 1.1105, "step": 5620 }, { "epoch": 0.38085236127108885, "grad_norm": 6.5179290771484375, "learning_rate": 9.333287699363407e-05, "loss": 0.7859, "step": 5621 }, { "epoch": 0.38092011653906094, "grad_norm": 7.544989585876465, "learning_rate": 9.333150797453625e-05, "loss": 0.8601, "step": 5622 }, { "epoch": 0.380987871807033, "grad_norm": 7.307798862457275, "learning_rate": 9.333013895543843e-05, "loss": 1.071, "step": 5623 }, { "epoch": 0.38105562707500507, "grad_norm": 6.946296215057373, "learning_rate": 9.332876993634062e-05, "loss": 0.8445, "step": 5624 }, { "epoch": 0.38112338234297716, "grad_norm": 8.099409103393555, "learning_rate": 9.33274009172428e-05, "loss": 0.9821, "step": 5625 }, { "epoch": 0.38119113761094925, "grad_norm": 7.492844104766846, "learning_rate": 9.332603189814498e-05, "loss": 0.9696, "step": 5626 }, { "epoch": 0.38125889287892134, "grad_norm": 6.632567882537842, "learning_rate": 9.332466287904716e-05, "loss": 0.8853, "step": 5627 }, { "epoch": 0.38132664814689343, "grad_norm": 5.6510748863220215, "learning_rate": 9.332329385994934e-05, "loss": 0.8686, "step": 5628 }, { "epoch": 0.3813944034148655, "grad_norm": 6.428233623504639, "learning_rate": 9.332192484085154e-05, "loss": 0.8912, "step": 5629 }, { "epoch": 0.3814621586828376, "grad_norm": 8.161954879760742, "learning_rate": 9.332055582175372e-05, "loss": 0.888, "step": 5630 }, { "epoch": 0.38152991395080965, "grad_norm": 7.743470191955566, "learning_rate": 9.33191868026559e-05, "loss": 0.8759, "step": 5631 }, { "epoch": 0.38159766921878174, "grad_norm": 7.648897647857666, "learning_rate": 9.331781778355808e-05, "loss": 1.2331, "step": 5632 }, { "epoch": 0.38166542448675383, "grad_norm": 7.748523235321045, "learning_rate": 9.331644876446027e-05, "loss": 1.1732, "step": 5633 }, { "epoch": 0.3817331797547259, "grad_norm": 7.337912082672119, "learning_rate": 9.331507974536245e-05, "loss": 0.8043, "step": 5634 }, { "epoch": 0.381800935022698, "grad_norm": 7.525491714477539, "learning_rate": 9.331371072626463e-05, "loss": 0.8762, "step": 5635 }, { "epoch": 0.3818686902906701, "grad_norm": 7.477416515350342, "learning_rate": 9.331234170716681e-05, "loss": 0.9053, "step": 5636 }, { "epoch": 0.3819364455586422, "grad_norm": 6.855381965637207, "learning_rate": 9.331097268806899e-05, "loss": 0.9124, "step": 5637 }, { "epoch": 0.3820042008266143, "grad_norm": 7.663267135620117, "learning_rate": 9.330960366897119e-05, "loss": 0.8139, "step": 5638 }, { "epoch": 0.38207195609458633, "grad_norm": 6.08014440536499, "learning_rate": 9.330823464987337e-05, "loss": 0.7084, "step": 5639 }, { "epoch": 0.3821397113625584, "grad_norm": 7.804579734802246, "learning_rate": 9.330686563077555e-05, "loss": 1.206, "step": 5640 }, { "epoch": 0.3822074666305305, "grad_norm": 7.1148552894592285, "learning_rate": 9.330549661167774e-05, "loss": 1.1221, "step": 5641 }, { "epoch": 0.3822752218985026, "grad_norm": 6.727654457092285, "learning_rate": 9.330412759257992e-05, "loss": 1.025, "step": 5642 }, { "epoch": 0.3823429771664747, "grad_norm": 7.059203147888184, "learning_rate": 9.33027585734821e-05, "loss": 1.0819, "step": 5643 }, { "epoch": 0.3824107324344468, "grad_norm": 8.003962516784668, "learning_rate": 9.33013895543843e-05, "loss": 0.95, "step": 5644 }, { "epoch": 0.3824784877024189, "grad_norm": 7.414921283721924, "learning_rate": 9.330002053528647e-05, "loss": 0.9279, "step": 5645 }, { "epoch": 0.38254624297039097, "grad_norm": 5.462070465087891, "learning_rate": 9.329865151618866e-05, "loss": 0.8003, "step": 5646 }, { "epoch": 0.382613998238363, "grad_norm": 7.23457145690918, "learning_rate": 9.329728249709085e-05, "loss": 0.8939, "step": 5647 }, { "epoch": 0.3826817535063351, "grad_norm": 6.371642112731934, "learning_rate": 9.329591347799303e-05, "loss": 0.7125, "step": 5648 }, { "epoch": 0.3827495087743072, "grad_norm": 6.855792999267578, "learning_rate": 9.329454445889521e-05, "loss": 0.745, "step": 5649 }, { "epoch": 0.3828172640422793, "grad_norm": 6.945821762084961, "learning_rate": 9.329317543979739e-05, "loss": 1.1931, "step": 5650 }, { "epoch": 0.3828850193102514, "grad_norm": 7.453825950622559, "learning_rate": 9.329180642069957e-05, "loss": 0.9513, "step": 5651 }, { "epoch": 0.38295277457822346, "grad_norm": 7.040378093719482, "learning_rate": 9.329043740160176e-05, "loss": 0.8896, "step": 5652 }, { "epoch": 0.38302052984619556, "grad_norm": 8.740388870239258, "learning_rate": 9.328906838250394e-05, "loss": 0.771, "step": 5653 }, { "epoch": 0.38308828511416765, "grad_norm": 6.791045665740967, "learning_rate": 9.328769936340613e-05, "loss": 0.8835, "step": 5654 }, { "epoch": 0.3831560403821397, "grad_norm": 6.560173511505127, "learning_rate": 9.32863303443083e-05, "loss": 0.8516, "step": 5655 }, { "epoch": 0.3832237956501118, "grad_norm": 6.763086795806885, "learning_rate": 9.32849613252105e-05, "loss": 0.7976, "step": 5656 }, { "epoch": 0.38329155091808387, "grad_norm": 5.9037370681762695, "learning_rate": 9.328359230611268e-05, "loss": 0.943, "step": 5657 }, { "epoch": 0.38335930618605596, "grad_norm": 4.944889545440674, "learning_rate": 9.328222328701486e-05, "loss": 0.7913, "step": 5658 }, { "epoch": 0.38342706145402805, "grad_norm": 8.701375007629395, "learning_rate": 9.328085426791704e-05, "loss": 0.8, "step": 5659 }, { "epoch": 0.38349481672200014, "grad_norm": 7.1927289962768555, "learning_rate": 9.327948524881922e-05, "loss": 0.8888, "step": 5660 }, { "epoch": 0.38356257198997223, "grad_norm": 8.781030654907227, "learning_rate": 9.327811622972141e-05, "loss": 0.951, "step": 5661 }, { "epoch": 0.3836303272579443, "grad_norm": 7.27484655380249, "learning_rate": 9.32767472106236e-05, "loss": 0.8034, "step": 5662 }, { "epoch": 0.3836980825259164, "grad_norm": 7.739286422729492, "learning_rate": 9.327537819152578e-05, "loss": 1.0769, "step": 5663 }, { "epoch": 0.38376583779388845, "grad_norm": 7.0264973640441895, "learning_rate": 9.327400917242796e-05, "loss": 1.0652, "step": 5664 }, { "epoch": 0.38383359306186055, "grad_norm": 6.887399673461914, "learning_rate": 9.327264015333015e-05, "loss": 0.7986, "step": 5665 }, { "epoch": 0.38390134832983264, "grad_norm": 9.081385612487793, "learning_rate": 9.327127113423233e-05, "loss": 1.2249, "step": 5666 }, { "epoch": 0.38396910359780473, "grad_norm": 7.966154098510742, "learning_rate": 9.326990211513451e-05, "loss": 0.8924, "step": 5667 }, { "epoch": 0.3840368588657768, "grad_norm": 7.321074962615967, "learning_rate": 9.326853309603669e-05, "loss": 1.0022, "step": 5668 }, { "epoch": 0.3841046141337489, "grad_norm": 7.060865879058838, "learning_rate": 9.326716407693887e-05, "loss": 0.8849, "step": 5669 }, { "epoch": 0.384172369401721, "grad_norm": 7.009364604949951, "learning_rate": 9.326579505784106e-05, "loss": 0.9032, "step": 5670 }, { "epoch": 0.3842401246696931, "grad_norm": 8.417223930358887, "learning_rate": 9.326442603874325e-05, "loss": 1.1162, "step": 5671 }, { "epoch": 0.38430787993766513, "grad_norm": 6.774781227111816, "learning_rate": 9.326305701964543e-05, "loss": 0.773, "step": 5672 }, { "epoch": 0.3843756352056372, "grad_norm": 6.5464911460876465, "learning_rate": 9.32616880005476e-05, "loss": 1.0374, "step": 5673 }, { "epoch": 0.3844433904736093, "grad_norm": 6.618286609649658, "learning_rate": 9.326031898144979e-05, "loss": 0.9538, "step": 5674 }, { "epoch": 0.3845111457415814, "grad_norm": 7.598459720611572, "learning_rate": 9.325894996235198e-05, "loss": 0.7811, "step": 5675 }, { "epoch": 0.3845789010095535, "grad_norm": 6.456278324127197, "learning_rate": 9.325758094325416e-05, "loss": 1.055, "step": 5676 }, { "epoch": 0.3846466562775256, "grad_norm": 7.940021991729736, "learning_rate": 9.325621192415634e-05, "loss": 1.1605, "step": 5677 }, { "epoch": 0.3847144115454977, "grad_norm": 8.113789558410645, "learning_rate": 9.325484290505852e-05, "loss": 1.0405, "step": 5678 }, { "epoch": 0.3847821668134698, "grad_norm": 7.918695449829102, "learning_rate": 9.325347388596071e-05, "loss": 0.8019, "step": 5679 }, { "epoch": 0.3848499220814418, "grad_norm": 8.129600524902344, "learning_rate": 9.32521048668629e-05, "loss": 0.9581, "step": 5680 }, { "epoch": 0.3849176773494139, "grad_norm": 6.6353230476379395, "learning_rate": 9.325073584776508e-05, "loss": 0.8932, "step": 5681 }, { "epoch": 0.384985432617386, "grad_norm": 6.153687477111816, "learning_rate": 9.324936682866726e-05, "loss": 0.7646, "step": 5682 }, { "epoch": 0.3850531878853581, "grad_norm": 6.837560653686523, "learning_rate": 9.324799780956944e-05, "loss": 0.8998, "step": 5683 }, { "epoch": 0.3851209431533302, "grad_norm": 8.48747444152832, "learning_rate": 9.324662879047163e-05, "loss": 0.7761, "step": 5684 }, { "epoch": 0.38518869842130227, "grad_norm": 10.725845336914062, "learning_rate": 9.324525977137381e-05, "loss": 1.1755, "step": 5685 }, { "epoch": 0.38525645368927436, "grad_norm": 5.731265544891357, "learning_rate": 9.324389075227599e-05, "loss": 0.8974, "step": 5686 }, { "epoch": 0.38532420895724645, "grad_norm": 8.479190826416016, "learning_rate": 9.324252173317818e-05, "loss": 0.9307, "step": 5687 }, { "epoch": 0.3853919642252185, "grad_norm": 6.539626598358154, "learning_rate": 9.324115271408037e-05, "loss": 0.9463, "step": 5688 }, { "epoch": 0.3854597194931906, "grad_norm": 7.225162029266357, "learning_rate": 9.323978369498255e-05, "loss": 0.8206, "step": 5689 }, { "epoch": 0.38552747476116267, "grad_norm": 8.779760360717773, "learning_rate": 9.323841467588474e-05, "loss": 1.1208, "step": 5690 }, { "epoch": 0.38559523002913476, "grad_norm": 7.580684185028076, "learning_rate": 9.323704565678692e-05, "loss": 0.7993, "step": 5691 }, { "epoch": 0.38566298529710685, "grad_norm": 8.699392318725586, "learning_rate": 9.32356766376891e-05, "loss": 1.073, "step": 5692 }, { "epoch": 0.38573074056507894, "grad_norm": 7.190006256103516, "learning_rate": 9.32343076185913e-05, "loss": 1.0204, "step": 5693 }, { "epoch": 0.38579849583305104, "grad_norm": 9.289970397949219, "learning_rate": 9.323293859949347e-05, "loss": 0.7091, "step": 5694 }, { "epoch": 0.3858662511010231, "grad_norm": 9.533041000366211, "learning_rate": 9.323156958039565e-05, "loss": 1.103, "step": 5695 }, { "epoch": 0.38593400636899516, "grad_norm": 7.375948905944824, "learning_rate": 9.323020056129783e-05, "loss": 0.9815, "step": 5696 }, { "epoch": 0.38600176163696726, "grad_norm": 8.580230712890625, "learning_rate": 9.322883154220003e-05, "loss": 1.1056, "step": 5697 }, { "epoch": 0.38606951690493935, "grad_norm": 8.332324981689453, "learning_rate": 9.322746252310221e-05, "loss": 0.8809, "step": 5698 }, { "epoch": 0.38613727217291144, "grad_norm": 6.298001289367676, "learning_rate": 9.322609350400439e-05, "loss": 0.9255, "step": 5699 }, { "epoch": 0.38620502744088353, "grad_norm": 7.031383514404297, "learning_rate": 9.322472448490657e-05, "loss": 0.9196, "step": 5700 }, { "epoch": 0.3862727827088556, "grad_norm": 6.791995048522949, "learning_rate": 9.322335546580875e-05, "loss": 0.8184, "step": 5701 }, { "epoch": 0.3863405379768277, "grad_norm": 7.81817102432251, "learning_rate": 9.322198644671094e-05, "loss": 0.9331, "step": 5702 }, { "epoch": 0.3864082932447998, "grad_norm": 7.933851718902588, "learning_rate": 9.322061742761312e-05, "loss": 0.872, "step": 5703 }, { "epoch": 0.38647604851277184, "grad_norm": 7.805744171142578, "learning_rate": 9.32192484085153e-05, "loss": 0.8378, "step": 5704 }, { "epoch": 0.38654380378074393, "grad_norm": 9.205484390258789, "learning_rate": 9.321787938941749e-05, "loss": 0.9853, "step": 5705 }, { "epoch": 0.386611559048716, "grad_norm": 7.313584804534912, "learning_rate": 9.321651037031967e-05, "loss": 0.7576, "step": 5706 }, { "epoch": 0.3866793143166881, "grad_norm": 10.125847816467285, "learning_rate": 9.321514135122186e-05, "loss": 0.8497, "step": 5707 }, { "epoch": 0.3867470695846602, "grad_norm": 7.392860412597656, "learning_rate": 9.321377233212404e-05, "loss": 0.9151, "step": 5708 }, { "epoch": 0.3868148248526323, "grad_norm": 8.305766105651855, "learning_rate": 9.321240331302622e-05, "loss": 1.1604, "step": 5709 }, { "epoch": 0.3868825801206044, "grad_norm": 5.897716522216797, "learning_rate": 9.32110342939284e-05, "loss": 0.6471, "step": 5710 }, { "epoch": 0.3869503353885765, "grad_norm": 6.7423200607299805, "learning_rate": 9.32096652748306e-05, "loss": 0.7968, "step": 5711 }, { "epoch": 0.3870180906565485, "grad_norm": 8.511382102966309, "learning_rate": 9.320829625573277e-05, "loss": 1.1875, "step": 5712 }, { "epoch": 0.3870858459245206, "grad_norm": 8.105764389038086, "learning_rate": 9.320692723663495e-05, "loss": 1.0717, "step": 5713 }, { "epoch": 0.3871536011924927, "grad_norm": 6.555315971374512, "learning_rate": 9.320555821753714e-05, "loss": 0.7491, "step": 5714 }, { "epoch": 0.3872213564604648, "grad_norm": 7.481291770935059, "learning_rate": 9.320418919843932e-05, "loss": 1.0057, "step": 5715 }, { "epoch": 0.3872891117284369, "grad_norm": 8.126254081726074, "learning_rate": 9.320282017934151e-05, "loss": 0.94, "step": 5716 }, { "epoch": 0.387356866996409, "grad_norm": 5.760578632354736, "learning_rate": 9.320145116024369e-05, "loss": 0.8861, "step": 5717 }, { "epoch": 0.38742462226438107, "grad_norm": 7.664496421813965, "learning_rate": 9.320008214114587e-05, "loss": 0.8005, "step": 5718 }, { "epoch": 0.38749237753235316, "grad_norm": 8.473989486694336, "learning_rate": 9.319871312204805e-05, "loss": 0.8196, "step": 5719 }, { "epoch": 0.38756013280032525, "grad_norm": 8.553350448608398, "learning_rate": 9.319734410295024e-05, "loss": 0.8356, "step": 5720 }, { "epoch": 0.3876278880682973, "grad_norm": 7.713801860809326, "learning_rate": 9.319597508385242e-05, "loss": 1.0326, "step": 5721 }, { "epoch": 0.3876956433362694, "grad_norm": 7.013209819793701, "learning_rate": 9.31946060647546e-05, "loss": 1.1149, "step": 5722 }, { "epoch": 0.38776339860424147, "grad_norm": 8.171217918395996, "learning_rate": 9.319323704565679e-05, "loss": 1.141, "step": 5723 }, { "epoch": 0.38783115387221356, "grad_norm": 8.459320068359375, "learning_rate": 9.319186802655897e-05, "loss": 0.8859, "step": 5724 }, { "epoch": 0.38789890914018565, "grad_norm": 6.681031227111816, "learning_rate": 9.319049900746116e-05, "loss": 0.8746, "step": 5725 }, { "epoch": 0.38796666440815775, "grad_norm": 6.796359539031982, "learning_rate": 9.318912998836334e-05, "loss": 1.0891, "step": 5726 }, { "epoch": 0.38803441967612984, "grad_norm": 6.819052696228027, "learning_rate": 9.318776096926552e-05, "loss": 0.8087, "step": 5727 }, { "epoch": 0.38810217494410193, "grad_norm": 6.746981620788574, "learning_rate": 9.31863919501677e-05, "loss": 1.1591, "step": 5728 }, { "epoch": 0.38816993021207397, "grad_norm": 7.767449855804443, "learning_rate": 9.318502293106988e-05, "loss": 1.1012, "step": 5729 }, { "epoch": 0.38823768548004606, "grad_norm": 7.967437744140625, "learning_rate": 9.318365391197207e-05, "loss": 1.0262, "step": 5730 }, { "epoch": 0.38830544074801815, "grad_norm": 6.053138256072998, "learning_rate": 9.318228489287426e-05, "loss": 0.6821, "step": 5731 }, { "epoch": 0.38837319601599024, "grad_norm": 7.298278331756592, "learning_rate": 9.318091587377644e-05, "loss": 1.0869, "step": 5732 }, { "epoch": 0.38844095128396233, "grad_norm": 6.621678352355957, "learning_rate": 9.317954685467863e-05, "loss": 0.9388, "step": 5733 }, { "epoch": 0.3885087065519344, "grad_norm": 7.588876247406006, "learning_rate": 9.317817783558081e-05, "loss": 0.8775, "step": 5734 }, { "epoch": 0.3885764618199065, "grad_norm": 6.0856242179870605, "learning_rate": 9.317680881648299e-05, "loss": 0.8057, "step": 5735 }, { "epoch": 0.3886442170878786, "grad_norm": 6.144415378570557, "learning_rate": 9.317543979738518e-05, "loss": 0.9205, "step": 5736 }, { "epoch": 0.38871197235585064, "grad_norm": 8.25857925415039, "learning_rate": 9.317407077828736e-05, "loss": 0.9608, "step": 5737 }, { "epoch": 0.38877972762382274, "grad_norm": 9.107572555541992, "learning_rate": 9.317270175918954e-05, "loss": 0.7823, "step": 5738 }, { "epoch": 0.3888474828917948, "grad_norm": 6.879073619842529, "learning_rate": 9.317133274009174e-05, "loss": 0.9137, "step": 5739 }, { "epoch": 0.3889152381597669, "grad_norm": 6.114928245544434, "learning_rate": 9.316996372099392e-05, "loss": 0.9713, "step": 5740 }, { "epoch": 0.388982993427739, "grad_norm": 6.454460620880127, "learning_rate": 9.31685947018961e-05, "loss": 0.9224, "step": 5741 }, { "epoch": 0.3890507486957111, "grad_norm": 7.231433868408203, "learning_rate": 9.316722568279828e-05, "loss": 0.9087, "step": 5742 }, { "epoch": 0.3891185039636832, "grad_norm": 7.442675590515137, "learning_rate": 9.316585666370047e-05, "loss": 0.9038, "step": 5743 }, { "epoch": 0.3891862592316553, "grad_norm": 6.472715854644775, "learning_rate": 9.316448764460265e-05, "loss": 0.7429, "step": 5744 }, { "epoch": 0.3892540144996273, "grad_norm": 7.20242977142334, "learning_rate": 9.316311862550483e-05, "loss": 0.9592, "step": 5745 }, { "epoch": 0.3893217697675994, "grad_norm": 5.8578715324401855, "learning_rate": 9.316174960640701e-05, "loss": 0.901, "step": 5746 }, { "epoch": 0.3893895250355715, "grad_norm": 5.1851983070373535, "learning_rate": 9.31603805873092e-05, "loss": 0.6872, "step": 5747 }, { "epoch": 0.3894572803035436, "grad_norm": 5.5100531578063965, "learning_rate": 9.315901156821139e-05, "loss": 0.8329, "step": 5748 }, { "epoch": 0.3895250355715157, "grad_norm": 7.71047830581665, "learning_rate": 9.315764254911357e-05, "loss": 0.8726, "step": 5749 }, { "epoch": 0.3895927908394878, "grad_norm": 5.928194522857666, "learning_rate": 9.315627353001575e-05, "loss": 0.6503, "step": 5750 }, { "epoch": 0.38966054610745987, "grad_norm": 6.636775016784668, "learning_rate": 9.315490451091793e-05, "loss": 0.8905, "step": 5751 }, { "epoch": 0.38972830137543196, "grad_norm": 7.5196027755737305, "learning_rate": 9.315353549182012e-05, "loss": 0.9333, "step": 5752 }, { "epoch": 0.389796056643404, "grad_norm": 7.1420087814331055, "learning_rate": 9.31521664727223e-05, "loss": 0.777, "step": 5753 }, { "epoch": 0.3898638119113761, "grad_norm": 6.713923931121826, "learning_rate": 9.315079745362448e-05, "loss": 0.7667, "step": 5754 }, { "epoch": 0.3899315671793482, "grad_norm": 6.81540584564209, "learning_rate": 9.314942843452666e-05, "loss": 0.8838, "step": 5755 }, { "epoch": 0.3899993224473203, "grad_norm": 7.069910049438477, "learning_rate": 9.314805941542885e-05, "loss": 0.9638, "step": 5756 }, { "epoch": 0.39006707771529237, "grad_norm": 6.350069999694824, "learning_rate": 9.314669039633104e-05, "loss": 0.8163, "step": 5757 }, { "epoch": 0.39013483298326446, "grad_norm": 5.552088737487793, "learning_rate": 9.314532137723322e-05, "loss": 0.7061, "step": 5758 }, { "epoch": 0.39020258825123655, "grad_norm": 7.8301167488098145, "learning_rate": 9.31439523581354e-05, "loss": 1.0415, "step": 5759 }, { "epoch": 0.39027034351920864, "grad_norm": 7.079575538635254, "learning_rate": 9.314258333903758e-05, "loss": 0.7247, "step": 5760 }, { "epoch": 0.3903380987871807, "grad_norm": 7.282689571380615, "learning_rate": 9.314121431993976e-05, "loss": 0.8361, "step": 5761 }, { "epoch": 0.39040585405515277, "grad_norm": 7.18900203704834, "learning_rate": 9.313984530084195e-05, "loss": 1.0206, "step": 5762 }, { "epoch": 0.39047360932312486, "grad_norm": 5.926472187042236, "learning_rate": 9.313847628174413e-05, "loss": 0.8551, "step": 5763 }, { "epoch": 0.39054136459109695, "grad_norm": 8.176214218139648, "learning_rate": 9.313710726264631e-05, "loss": 0.8106, "step": 5764 }, { "epoch": 0.39060911985906904, "grad_norm": 7.093206882476807, "learning_rate": 9.31357382435485e-05, "loss": 0.9986, "step": 5765 }, { "epoch": 0.39067687512704113, "grad_norm": 8.531807899475098, "learning_rate": 9.313436922445069e-05, "loss": 0.9368, "step": 5766 }, { "epoch": 0.3907446303950132, "grad_norm": 6.479072570800781, "learning_rate": 9.313300020535287e-05, "loss": 0.858, "step": 5767 }, { "epoch": 0.3908123856629853, "grad_norm": 6.749851226806641, "learning_rate": 9.313163118625505e-05, "loss": 0.638, "step": 5768 }, { "epoch": 0.3908801409309574, "grad_norm": 8.79270076751709, "learning_rate": 9.313026216715723e-05, "loss": 0.7734, "step": 5769 }, { "epoch": 0.39094789619892945, "grad_norm": 6.972496509552002, "learning_rate": 9.312889314805941e-05, "loss": 0.8557, "step": 5770 }, { "epoch": 0.39101565146690154, "grad_norm": 6.48140811920166, "learning_rate": 9.31275241289616e-05, "loss": 0.8183, "step": 5771 }, { "epoch": 0.39108340673487363, "grad_norm": 6.5230536460876465, "learning_rate": 9.312615510986378e-05, "loss": 0.7863, "step": 5772 }, { "epoch": 0.3911511620028457, "grad_norm": 6.982974052429199, "learning_rate": 9.312478609076597e-05, "loss": 0.9604, "step": 5773 }, { "epoch": 0.3912189172708178, "grad_norm": 7.524245262145996, "learning_rate": 9.312341707166815e-05, "loss": 0.8145, "step": 5774 }, { "epoch": 0.3912866725387899, "grad_norm": 7.770455837249756, "learning_rate": 9.312204805257034e-05, "loss": 0.858, "step": 5775 }, { "epoch": 0.391354427806762, "grad_norm": 6.145147800445557, "learning_rate": 9.312067903347252e-05, "loss": 0.6833, "step": 5776 }, { "epoch": 0.3914221830747341, "grad_norm": 7.118679523468018, "learning_rate": 9.31193100143747e-05, "loss": 0.9945, "step": 5777 }, { "epoch": 0.3914899383427061, "grad_norm": 7.793931007385254, "learning_rate": 9.311794099527688e-05, "loss": 1.0227, "step": 5778 }, { "epoch": 0.3915576936106782, "grad_norm": 6.189599514007568, "learning_rate": 9.311657197617907e-05, "loss": 1.0355, "step": 5779 }, { "epoch": 0.3916254488786503, "grad_norm": 8.16638469696045, "learning_rate": 9.311520295708125e-05, "loss": 0.998, "step": 5780 }, { "epoch": 0.3916932041466224, "grad_norm": 7.059429168701172, "learning_rate": 9.311383393798343e-05, "loss": 0.8641, "step": 5781 }, { "epoch": 0.3917609594145945, "grad_norm": 9.555390357971191, "learning_rate": 9.311246491888563e-05, "loss": 0.9243, "step": 5782 }, { "epoch": 0.3918287146825666, "grad_norm": 7.566288948059082, "learning_rate": 9.311109589978781e-05, "loss": 0.8025, "step": 5783 }, { "epoch": 0.3918964699505387, "grad_norm": 8.842116355895996, "learning_rate": 9.310972688068999e-05, "loss": 0.8875, "step": 5784 }, { "epoch": 0.39196422521851076, "grad_norm": 7.426934719085693, "learning_rate": 9.310835786159218e-05, "loss": 0.8116, "step": 5785 }, { "epoch": 0.3920319804864828, "grad_norm": 7.222875595092773, "learning_rate": 9.310698884249436e-05, "loss": 0.8286, "step": 5786 }, { "epoch": 0.3920997357544549, "grad_norm": 8.364874839782715, "learning_rate": 9.310561982339654e-05, "loss": 1.1628, "step": 5787 }, { "epoch": 0.392167491022427, "grad_norm": 6.4203104972839355, "learning_rate": 9.310425080429872e-05, "loss": 0.8501, "step": 5788 }, { "epoch": 0.3922352462903991, "grad_norm": 9.124777793884277, "learning_rate": 9.310288178520092e-05, "loss": 1.1962, "step": 5789 }, { "epoch": 0.39230300155837117, "grad_norm": 7.944338321685791, "learning_rate": 9.31015127661031e-05, "loss": 1.3275, "step": 5790 }, { "epoch": 0.39237075682634326, "grad_norm": 7.256115913391113, "learning_rate": 9.310014374700528e-05, "loss": 0.8594, "step": 5791 }, { "epoch": 0.39243851209431535, "grad_norm": 6.452229976654053, "learning_rate": 9.309877472790746e-05, "loss": 0.7744, "step": 5792 }, { "epoch": 0.39250626736228744, "grad_norm": 8.207562446594238, "learning_rate": 9.309740570880964e-05, "loss": 1.0615, "step": 5793 }, { "epoch": 0.3925740226302595, "grad_norm": 6.627633571624756, "learning_rate": 9.309603668971183e-05, "loss": 1.1951, "step": 5794 }, { "epoch": 0.39264177789823157, "grad_norm": 6.888060092926025, "learning_rate": 9.309466767061401e-05, "loss": 0.7771, "step": 5795 }, { "epoch": 0.39270953316620366, "grad_norm": 5.327685356140137, "learning_rate": 9.30932986515162e-05, "loss": 0.6568, "step": 5796 }, { "epoch": 0.39277728843417575, "grad_norm": 7.033134460449219, "learning_rate": 9.309192963241837e-05, "loss": 0.8601, "step": 5797 }, { "epoch": 0.39284504370214784, "grad_norm": 6.694526195526123, "learning_rate": 9.309056061332057e-05, "loss": 0.9084, "step": 5798 }, { "epoch": 0.39291279897011994, "grad_norm": 6.9023027420043945, "learning_rate": 9.308919159422275e-05, "loss": 0.8983, "step": 5799 }, { "epoch": 0.39298055423809203, "grad_norm": 9.527570724487305, "learning_rate": 9.308782257512493e-05, "loss": 0.9773, "step": 5800 }, { "epoch": 0.3930483095060641, "grad_norm": 7.497427940368652, "learning_rate": 9.308645355602711e-05, "loss": 0.9713, "step": 5801 }, { "epoch": 0.39311606477403616, "grad_norm": 8.860198020935059, "learning_rate": 9.308508453692929e-05, "loss": 0.8866, "step": 5802 }, { "epoch": 0.39318382004200825, "grad_norm": 7.34425687789917, "learning_rate": 9.308371551783148e-05, "loss": 0.9442, "step": 5803 }, { "epoch": 0.39325157530998034, "grad_norm": 10.30170726776123, "learning_rate": 9.308234649873366e-05, "loss": 0.8957, "step": 5804 }, { "epoch": 0.39331933057795243, "grad_norm": 8.141131401062012, "learning_rate": 9.308097747963584e-05, "loss": 1.1858, "step": 5805 }, { "epoch": 0.3933870858459245, "grad_norm": 7.154587268829346, "learning_rate": 9.307960846053802e-05, "loss": 0.9056, "step": 5806 }, { "epoch": 0.3934548411138966, "grad_norm": 6.086984634399414, "learning_rate": 9.30782394414402e-05, "loss": 0.8943, "step": 5807 }, { "epoch": 0.3935225963818687, "grad_norm": 6.48331880569458, "learning_rate": 9.30768704223424e-05, "loss": 0.8631, "step": 5808 }, { "epoch": 0.3935903516498408, "grad_norm": 9.434244155883789, "learning_rate": 9.307550140324458e-05, "loss": 1.0951, "step": 5809 }, { "epoch": 0.39365810691781283, "grad_norm": 7.943146228790283, "learning_rate": 9.307413238414676e-05, "loss": 0.9073, "step": 5810 }, { "epoch": 0.3937258621857849, "grad_norm": 6.200165271759033, "learning_rate": 9.307276336504894e-05, "loss": 0.8406, "step": 5811 }, { "epoch": 0.393793617453757, "grad_norm": 6.221752643585205, "learning_rate": 9.307139434595113e-05, "loss": 0.9181, "step": 5812 }, { "epoch": 0.3938613727217291, "grad_norm": 6.645714282989502, "learning_rate": 9.307002532685331e-05, "loss": 0.8508, "step": 5813 }, { "epoch": 0.3939291279897012, "grad_norm": 7.978506088256836, "learning_rate": 9.30686563077555e-05, "loss": 1.1544, "step": 5814 }, { "epoch": 0.3939968832576733, "grad_norm": 6.967216968536377, "learning_rate": 9.306728728865767e-05, "loss": 0.7956, "step": 5815 }, { "epoch": 0.3940646385256454, "grad_norm": 9.8863525390625, "learning_rate": 9.306591826955986e-05, "loss": 0.9111, "step": 5816 }, { "epoch": 0.3941323937936175, "grad_norm": 7.202954292297363, "learning_rate": 9.306454925046205e-05, "loss": 0.9267, "step": 5817 }, { "epoch": 0.3942001490615895, "grad_norm": 6.386566162109375, "learning_rate": 9.306318023136423e-05, "loss": 0.9494, "step": 5818 }, { "epoch": 0.3942679043295616, "grad_norm": 6.505050182342529, "learning_rate": 9.306181121226641e-05, "loss": 0.904, "step": 5819 }, { "epoch": 0.3943356595975337, "grad_norm": 7.091371536254883, "learning_rate": 9.306044219316859e-05, "loss": 0.7305, "step": 5820 }, { "epoch": 0.3944034148655058, "grad_norm": 7.322198390960693, "learning_rate": 9.305907317407078e-05, "loss": 0.8101, "step": 5821 }, { "epoch": 0.3944711701334779, "grad_norm": 6.345345497131348, "learning_rate": 9.305770415497296e-05, "loss": 0.7821, "step": 5822 }, { "epoch": 0.39453892540144997, "grad_norm": 6.418498992919922, "learning_rate": 9.305633513587514e-05, "loss": 0.8844, "step": 5823 }, { "epoch": 0.39460668066942206, "grad_norm": 7.375457763671875, "learning_rate": 9.305496611677733e-05, "loss": 1.0099, "step": 5824 }, { "epoch": 0.39467443593739415, "grad_norm": 7.758962154388428, "learning_rate": 9.305359709767952e-05, "loss": 1.1459, "step": 5825 }, { "epoch": 0.39474219120536624, "grad_norm": 9.552523612976074, "learning_rate": 9.30522280785817e-05, "loss": 1.0285, "step": 5826 }, { "epoch": 0.3948099464733383, "grad_norm": 7.053111553192139, "learning_rate": 9.305085905948388e-05, "loss": 0.8006, "step": 5827 }, { "epoch": 0.39487770174131037, "grad_norm": 7.239533424377441, "learning_rate": 9.304949004038607e-05, "loss": 0.9594, "step": 5828 }, { "epoch": 0.39494545700928246, "grad_norm": 5.681763172149658, "learning_rate": 9.304812102128825e-05, "loss": 0.7517, "step": 5829 }, { "epoch": 0.39501321227725456, "grad_norm": 7.394958019256592, "learning_rate": 9.304675200219043e-05, "loss": 0.9656, "step": 5830 }, { "epoch": 0.39508096754522665, "grad_norm": 5.894937515258789, "learning_rate": 9.304538298309263e-05, "loss": 0.7568, "step": 5831 }, { "epoch": 0.39514872281319874, "grad_norm": 7.866422653198242, "learning_rate": 9.304401396399481e-05, "loss": 1.1937, "step": 5832 }, { "epoch": 0.39521647808117083, "grad_norm": 6.807196140289307, "learning_rate": 9.304264494489699e-05, "loss": 0.7785, "step": 5833 }, { "epoch": 0.3952842333491429, "grad_norm": 7.117645740509033, "learning_rate": 9.304127592579917e-05, "loss": 0.8462, "step": 5834 }, { "epoch": 0.39535198861711496, "grad_norm": 5.873225212097168, "learning_rate": 9.303990690670136e-05, "loss": 0.8372, "step": 5835 }, { "epoch": 0.39541974388508705, "grad_norm": 6.9400410652160645, "learning_rate": 9.303853788760354e-05, "loss": 0.8154, "step": 5836 }, { "epoch": 0.39548749915305914, "grad_norm": 5.7771100997924805, "learning_rate": 9.303716886850572e-05, "loss": 0.7411, "step": 5837 }, { "epoch": 0.39555525442103123, "grad_norm": 6.784138202667236, "learning_rate": 9.30357998494079e-05, "loss": 1.062, "step": 5838 }, { "epoch": 0.3956230096890033, "grad_norm": 7.726989269256592, "learning_rate": 9.303443083031008e-05, "loss": 0.9726, "step": 5839 }, { "epoch": 0.3956907649569754, "grad_norm": 6.342170715332031, "learning_rate": 9.303306181121228e-05, "loss": 0.8388, "step": 5840 }, { "epoch": 0.3957585202249475, "grad_norm": 8.047952651977539, "learning_rate": 9.303169279211446e-05, "loss": 1.0734, "step": 5841 }, { "epoch": 0.3958262754929196, "grad_norm": 7.073848724365234, "learning_rate": 9.303032377301664e-05, "loss": 0.6968, "step": 5842 }, { "epoch": 0.39589403076089164, "grad_norm": 7.159115791320801, "learning_rate": 9.302895475391882e-05, "loss": 0.7991, "step": 5843 }, { "epoch": 0.3959617860288637, "grad_norm": 6.908319473266602, "learning_rate": 9.302758573482101e-05, "loss": 0.9283, "step": 5844 }, { "epoch": 0.3960295412968358, "grad_norm": 6.5849690437316895, "learning_rate": 9.30262167157232e-05, "loss": 0.8258, "step": 5845 }, { "epoch": 0.3960972965648079, "grad_norm": 8.763665199279785, "learning_rate": 9.302484769662537e-05, "loss": 0.7734, "step": 5846 }, { "epoch": 0.39616505183278, "grad_norm": 7.134938716888428, "learning_rate": 9.302347867752755e-05, "loss": 0.8084, "step": 5847 }, { "epoch": 0.3962328071007521, "grad_norm": 6.418683052062988, "learning_rate": 9.302210965842973e-05, "loss": 0.8225, "step": 5848 }, { "epoch": 0.3963005623687242, "grad_norm": 7.124704837799072, "learning_rate": 9.302074063933193e-05, "loss": 0.6787, "step": 5849 }, { "epoch": 0.3963683176366963, "grad_norm": 6.653177261352539, "learning_rate": 9.301937162023411e-05, "loss": 0.8026, "step": 5850 }, { "epoch": 0.3964360729046683, "grad_norm": 7.981827735900879, "learning_rate": 9.301800260113629e-05, "loss": 0.9281, "step": 5851 }, { "epoch": 0.3965038281726404, "grad_norm": 5.823386192321777, "learning_rate": 9.301663358203847e-05, "loss": 0.8158, "step": 5852 }, { "epoch": 0.3965715834406125, "grad_norm": 8.96346664428711, "learning_rate": 9.301526456294066e-05, "loss": 1.0875, "step": 5853 }, { "epoch": 0.3966393387085846, "grad_norm": 6.114203929901123, "learning_rate": 9.301389554384284e-05, "loss": 0.7896, "step": 5854 }, { "epoch": 0.3967070939765567, "grad_norm": 6.386680603027344, "learning_rate": 9.301252652474502e-05, "loss": 1.0199, "step": 5855 }, { "epoch": 0.39677484924452877, "grad_norm": 8.502519607543945, "learning_rate": 9.30111575056472e-05, "loss": 1.0127, "step": 5856 }, { "epoch": 0.39684260451250086, "grad_norm": 6.875016689300537, "learning_rate": 9.300978848654938e-05, "loss": 0.8263, "step": 5857 }, { "epoch": 0.39691035978047295, "grad_norm": 8.344440460205078, "learning_rate": 9.300841946745158e-05, "loss": 1.0351, "step": 5858 }, { "epoch": 0.396978115048445, "grad_norm": 6.42828893661499, "learning_rate": 9.300705044835376e-05, "loss": 0.8503, "step": 5859 }, { "epoch": 0.3970458703164171, "grad_norm": 6.0403547286987305, "learning_rate": 9.300568142925594e-05, "loss": 0.9078, "step": 5860 }, { "epoch": 0.3971136255843892, "grad_norm": 5.010101795196533, "learning_rate": 9.300431241015812e-05, "loss": 0.8215, "step": 5861 }, { "epoch": 0.39718138085236127, "grad_norm": 6.132750988006592, "learning_rate": 9.30029433910603e-05, "loss": 0.7887, "step": 5862 }, { "epoch": 0.39724913612033336, "grad_norm": 6.075906753540039, "learning_rate": 9.30015743719625e-05, "loss": 0.822, "step": 5863 }, { "epoch": 0.39731689138830545, "grad_norm": 8.225122451782227, "learning_rate": 9.300020535286467e-05, "loss": 0.7771, "step": 5864 }, { "epoch": 0.39738464665627754, "grad_norm": 6.863472938537598, "learning_rate": 9.299883633376685e-05, "loss": 0.9185, "step": 5865 }, { "epoch": 0.39745240192424963, "grad_norm": 7.280022144317627, "learning_rate": 9.299746731466903e-05, "loss": 0.8206, "step": 5866 }, { "epoch": 0.39752015719222167, "grad_norm": 5.646960258483887, "learning_rate": 9.299609829557123e-05, "loss": 0.7897, "step": 5867 }, { "epoch": 0.39758791246019376, "grad_norm": 8.248296737670898, "learning_rate": 9.299472927647341e-05, "loss": 0.802, "step": 5868 }, { "epoch": 0.39765566772816585, "grad_norm": 6.389570236206055, "learning_rate": 9.299336025737559e-05, "loss": 0.7744, "step": 5869 }, { "epoch": 0.39772342299613794, "grad_norm": 6.452336311340332, "learning_rate": 9.299199123827777e-05, "loss": 0.9003, "step": 5870 }, { "epoch": 0.39779117826411003, "grad_norm": 6.798274040222168, "learning_rate": 9.299062221917995e-05, "loss": 0.9585, "step": 5871 }, { "epoch": 0.3978589335320821, "grad_norm": 6.267078876495361, "learning_rate": 9.298925320008214e-05, "loss": 0.7971, "step": 5872 }, { "epoch": 0.3979266888000542, "grad_norm": 7.0529069900512695, "learning_rate": 9.298788418098432e-05, "loss": 0.9384, "step": 5873 }, { "epoch": 0.3979944440680263, "grad_norm": 6.686244964599609, "learning_rate": 9.298651516188652e-05, "loss": 0.8589, "step": 5874 }, { "epoch": 0.3980621993359984, "grad_norm": 7.230234622955322, "learning_rate": 9.29851461427887e-05, "loss": 0.8638, "step": 5875 }, { "epoch": 0.39812995460397044, "grad_norm": 8.14789867401123, "learning_rate": 9.298377712369088e-05, "loss": 0.7231, "step": 5876 }, { "epoch": 0.39819770987194253, "grad_norm": 6.5517497062683105, "learning_rate": 9.298240810459307e-05, "loss": 1.0435, "step": 5877 }, { "epoch": 0.3982654651399146, "grad_norm": 7.095836639404297, "learning_rate": 9.298103908549525e-05, "loss": 0.7551, "step": 5878 }, { "epoch": 0.3983332204078867, "grad_norm": 6.932202339172363, "learning_rate": 9.297967006639743e-05, "loss": 0.9075, "step": 5879 }, { "epoch": 0.3984009756758588, "grad_norm": 8.045002937316895, "learning_rate": 9.297830104729961e-05, "loss": 0.9787, "step": 5880 }, { "epoch": 0.3984687309438309, "grad_norm": 7.1430511474609375, "learning_rate": 9.297693202820181e-05, "loss": 0.8549, "step": 5881 }, { "epoch": 0.398536486211803, "grad_norm": 5.857006549835205, "learning_rate": 9.297556300910399e-05, "loss": 0.7573, "step": 5882 }, { "epoch": 0.3986042414797751, "grad_norm": 6.143594741821289, "learning_rate": 9.297419399000617e-05, "loss": 0.747, "step": 5883 }, { "epoch": 0.3986719967477471, "grad_norm": 7.4967169761657715, "learning_rate": 9.297282497090835e-05, "loss": 1.0366, "step": 5884 }, { "epoch": 0.3987397520157192, "grad_norm": 6.6401166915893555, "learning_rate": 9.297145595181054e-05, "loss": 0.9889, "step": 5885 }, { "epoch": 0.3988075072836913, "grad_norm": 5.577928066253662, "learning_rate": 9.297008693271272e-05, "loss": 0.6917, "step": 5886 }, { "epoch": 0.3988752625516634, "grad_norm": 5.9933061599731445, "learning_rate": 9.29687179136149e-05, "loss": 0.9545, "step": 5887 }, { "epoch": 0.3989430178196355, "grad_norm": 6.353165149688721, "learning_rate": 9.296734889451708e-05, "loss": 0.7512, "step": 5888 }, { "epoch": 0.3990107730876076, "grad_norm": 6.181026458740234, "learning_rate": 9.296597987541926e-05, "loss": 0.767, "step": 5889 }, { "epoch": 0.39907852835557966, "grad_norm": 6.579110622406006, "learning_rate": 9.296461085632146e-05, "loss": 1.0604, "step": 5890 }, { "epoch": 0.39914628362355176, "grad_norm": 6.501206398010254, "learning_rate": 9.296324183722364e-05, "loss": 0.9821, "step": 5891 }, { "epoch": 0.3992140388915238, "grad_norm": 8.253642082214355, "learning_rate": 9.296187281812582e-05, "loss": 0.8827, "step": 5892 }, { "epoch": 0.3992817941594959, "grad_norm": 7.590344429016113, "learning_rate": 9.2960503799028e-05, "loss": 0.9614, "step": 5893 }, { "epoch": 0.399349549427468, "grad_norm": 6.755953311920166, "learning_rate": 9.295913477993018e-05, "loss": 0.8298, "step": 5894 }, { "epoch": 0.39941730469544007, "grad_norm": 6.040559768676758, "learning_rate": 9.295776576083237e-05, "loss": 0.7182, "step": 5895 }, { "epoch": 0.39948505996341216, "grad_norm": 6.301966667175293, "learning_rate": 9.295639674173455e-05, "loss": 0.8545, "step": 5896 }, { "epoch": 0.39955281523138425, "grad_norm": 5.77929162979126, "learning_rate": 9.295502772263673e-05, "loss": 0.9398, "step": 5897 }, { "epoch": 0.39962057049935634, "grad_norm": 6.633763313293457, "learning_rate": 9.295365870353891e-05, "loss": 0.8914, "step": 5898 }, { "epoch": 0.39968832576732843, "grad_norm": 7.260994911193848, "learning_rate": 9.295228968444111e-05, "loss": 0.7725, "step": 5899 }, { "epoch": 0.39975608103530047, "grad_norm": 8.820511817932129, "learning_rate": 9.295092066534329e-05, "loss": 0.7847, "step": 5900 }, { "epoch": 0.39982383630327256, "grad_norm": 7.335788726806641, "learning_rate": 9.294955164624547e-05, "loss": 0.683, "step": 5901 }, { "epoch": 0.39989159157124465, "grad_norm": 7.959702491760254, "learning_rate": 9.294818262714765e-05, "loss": 0.7598, "step": 5902 }, { "epoch": 0.39995934683921675, "grad_norm": 9.680485725402832, "learning_rate": 9.294681360804983e-05, "loss": 1.0359, "step": 5903 }, { "epoch": 0.40002710210718884, "grad_norm": 6.917464733123779, "learning_rate": 9.294544458895202e-05, "loss": 1.0155, "step": 5904 }, { "epoch": 0.40009485737516093, "grad_norm": 6.221781253814697, "learning_rate": 9.29440755698542e-05, "loss": 0.9645, "step": 5905 }, { "epoch": 0.400162612643133, "grad_norm": 6.778574466705322, "learning_rate": 9.294270655075638e-05, "loss": 0.794, "step": 5906 }, { "epoch": 0.4002303679111051, "grad_norm": 7.8015522956848145, "learning_rate": 9.294133753165856e-05, "loss": 0.9472, "step": 5907 }, { "epoch": 0.40029812317907715, "grad_norm": 5.018773555755615, "learning_rate": 9.293996851256076e-05, "loss": 0.8961, "step": 5908 }, { "epoch": 0.40036587844704924, "grad_norm": 6.184563636779785, "learning_rate": 9.293859949346294e-05, "loss": 0.7988, "step": 5909 }, { "epoch": 0.40043363371502133, "grad_norm": 6.593270778656006, "learning_rate": 9.293723047436512e-05, "loss": 0.8248, "step": 5910 }, { "epoch": 0.4005013889829934, "grad_norm": 7.14009952545166, "learning_rate": 9.29358614552673e-05, "loss": 0.8589, "step": 5911 }, { "epoch": 0.4005691442509655, "grad_norm": 8.044157981872559, "learning_rate": 9.293449243616948e-05, "loss": 0.9397, "step": 5912 }, { "epoch": 0.4006368995189376, "grad_norm": 7.3374247550964355, "learning_rate": 9.293312341707167e-05, "loss": 0.8615, "step": 5913 }, { "epoch": 0.4007046547869097, "grad_norm": 6.510500907897949, "learning_rate": 9.293175439797385e-05, "loss": 0.7151, "step": 5914 }, { "epoch": 0.4007724100548818, "grad_norm": 15.020646095275879, "learning_rate": 9.293038537887603e-05, "loss": 0.9349, "step": 5915 }, { "epoch": 0.4008401653228538, "grad_norm": 6.779942989349365, "learning_rate": 9.292901635977821e-05, "loss": 0.8018, "step": 5916 }, { "epoch": 0.4009079205908259, "grad_norm": 6.3740339279174805, "learning_rate": 9.29276473406804e-05, "loss": 0.7691, "step": 5917 }, { "epoch": 0.400975675858798, "grad_norm": 6.902404308319092, "learning_rate": 9.292627832158259e-05, "loss": 0.7431, "step": 5918 }, { "epoch": 0.4010434311267701, "grad_norm": 7.324024200439453, "learning_rate": 9.292490930248477e-05, "loss": 0.9314, "step": 5919 }, { "epoch": 0.4011111863947422, "grad_norm": 8.297179222106934, "learning_rate": 9.292354028338695e-05, "loss": 1.0072, "step": 5920 }, { "epoch": 0.4011789416627143, "grad_norm": 6.1418914794921875, "learning_rate": 9.292217126428914e-05, "loss": 0.948, "step": 5921 }, { "epoch": 0.4012466969306864, "grad_norm": 8.698518753051758, "learning_rate": 9.292080224519132e-05, "loss": 0.8509, "step": 5922 }, { "epoch": 0.40131445219865847, "grad_norm": 6.628043174743652, "learning_rate": 9.29194332260935e-05, "loss": 0.9388, "step": 5923 }, { "epoch": 0.4013822074666305, "grad_norm": 8.38621711730957, "learning_rate": 9.29180642069957e-05, "loss": 1.0795, "step": 5924 }, { "epoch": 0.4014499627346026, "grad_norm": 7.546327114105225, "learning_rate": 9.291669518789788e-05, "loss": 0.865, "step": 5925 }, { "epoch": 0.4015177180025747, "grad_norm": 7.399687767028809, "learning_rate": 9.291532616880006e-05, "loss": 0.8276, "step": 5926 }, { "epoch": 0.4015854732705468, "grad_norm": 6.921968460083008, "learning_rate": 9.291395714970225e-05, "loss": 0.9275, "step": 5927 }, { "epoch": 0.40165322853851887, "grad_norm": 7.856932640075684, "learning_rate": 9.291258813060443e-05, "loss": 1.1376, "step": 5928 }, { "epoch": 0.40172098380649096, "grad_norm": 7.86414098739624, "learning_rate": 9.291121911150661e-05, "loss": 1.0225, "step": 5929 }, { "epoch": 0.40178873907446305, "grad_norm": 7.028629302978516, "learning_rate": 9.29098500924088e-05, "loss": 1.0157, "step": 5930 }, { "epoch": 0.40185649434243514, "grad_norm": 5.826450347900391, "learning_rate": 9.290848107331099e-05, "loss": 0.8605, "step": 5931 }, { "epoch": 0.40192424961040724, "grad_norm": 6.072175025939941, "learning_rate": 9.290711205421317e-05, "loss": 0.6261, "step": 5932 }, { "epoch": 0.40199200487837927, "grad_norm": 6.829746246337891, "learning_rate": 9.290574303511535e-05, "loss": 0.849, "step": 5933 }, { "epoch": 0.40205976014635136, "grad_norm": 6.809370994567871, "learning_rate": 9.290437401601753e-05, "loss": 0.8659, "step": 5934 }, { "epoch": 0.40212751541432346, "grad_norm": 7.195353984832764, "learning_rate": 9.290300499691971e-05, "loss": 0.9559, "step": 5935 }, { "epoch": 0.40219527068229555, "grad_norm": 8.443836212158203, "learning_rate": 9.29016359778219e-05, "loss": 0.7687, "step": 5936 }, { "epoch": 0.40226302595026764, "grad_norm": 5.932136058807373, "learning_rate": 9.290026695872408e-05, "loss": 0.7125, "step": 5937 }, { "epoch": 0.40233078121823973, "grad_norm": 6.654352188110352, "learning_rate": 9.289889793962626e-05, "loss": 0.9933, "step": 5938 }, { "epoch": 0.4023985364862118, "grad_norm": 7.100243091583252, "learning_rate": 9.289752892052844e-05, "loss": 0.9271, "step": 5939 }, { "epoch": 0.4024662917541839, "grad_norm": 7.78497838973999, "learning_rate": 9.289615990143062e-05, "loss": 0.9358, "step": 5940 }, { "epoch": 0.40253404702215595, "grad_norm": 6.294493198394775, "learning_rate": 9.289479088233282e-05, "loss": 0.8308, "step": 5941 }, { "epoch": 0.40260180229012804, "grad_norm": 6.8807244300842285, "learning_rate": 9.2893421863235e-05, "loss": 0.7728, "step": 5942 }, { "epoch": 0.40266955755810013, "grad_norm": 7.030758857727051, "learning_rate": 9.289205284413718e-05, "loss": 0.8725, "step": 5943 }, { "epoch": 0.4027373128260722, "grad_norm": 6.787613868713379, "learning_rate": 9.289068382503936e-05, "loss": 0.8725, "step": 5944 }, { "epoch": 0.4028050680940443, "grad_norm": 6.4748148918151855, "learning_rate": 9.288931480594155e-05, "loss": 0.9305, "step": 5945 }, { "epoch": 0.4028728233620164, "grad_norm": 5.930908203125, "learning_rate": 9.288794578684373e-05, "loss": 0.814, "step": 5946 }, { "epoch": 0.4029405786299885, "grad_norm": 6.687366008758545, "learning_rate": 9.288657676774591e-05, "loss": 0.7893, "step": 5947 }, { "epoch": 0.4030083338979606, "grad_norm": 9.194374084472656, "learning_rate": 9.28852077486481e-05, "loss": 1.1462, "step": 5948 }, { "epoch": 0.4030760891659326, "grad_norm": 8.393781661987305, "learning_rate": 9.288383872955027e-05, "loss": 0.9375, "step": 5949 }, { "epoch": 0.4031438444339047, "grad_norm": 7.802282810211182, "learning_rate": 9.288246971045247e-05, "loss": 1.1499, "step": 5950 }, { "epoch": 0.4032115997018768, "grad_norm": 7.085485458374023, "learning_rate": 9.288110069135465e-05, "loss": 0.9834, "step": 5951 }, { "epoch": 0.4032793549698489, "grad_norm": 7.905081272125244, "learning_rate": 9.287973167225683e-05, "loss": 1.11, "step": 5952 }, { "epoch": 0.403347110237821, "grad_norm": 5.531884670257568, "learning_rate": 9.287836265315901e-05, "loss": 0.8386, "step": 5953 }, { "epoch": 0.4034148655057931, "grad_norm": 5.959394931793213, "learning_rate": 9.28769936340612e-05, "loss": 0.858, "step": 5954 }, { "epoch": 0.4034826207737652, "grad_norm": 7.020748138427734, "learning_rate": 9.287562461496338e-05, "loss": 0.8708, "step": 5955 }, { "epoch": 0.40355037604173727, "grad_norm": 9.01052474975586, "learning_rate": 9.287425559586556e-05, "loss": 1.0213, "step": 5956 }, { "epoch": 0.4036181313097093, "grad_norm": 7.091532230377197, "learning_rate": 9.287288657676774e-05, "loss": 1.0133, "step": 5957 }, { "epoch": 0.4036858865776814, "grad_norm": 5.7289276123046875, "learning_rate": 9.287151755766992e-05, "loss": 0.6517, "step": 5958 }, { "epoch": 0.4037536418456535, "grad_norm": 5.692935466766357, "learning_rate": 9.287014853857212e-05, "loss": 0.865, "step": 5959 }, { "epoch": 0.4038213971136256, "grad_norm": 7.469212532043457, "learning_rate": 9.28687795194743e-05, "loss": 0.8707, "step": 5960 }, { "epoch": 0.40388915238159767, "grad_norm": 6.172707557678223, "learning_rate": 9.286741050037648e-05, "loss": 0.8217, "step": 5961 }, { "epoch": 0.40395690764956976, "grad_norm": 8.741066932678223, "learning_rate": 9.286604148127866e-05, "loss": 0.8087, "step": 5962 }, { "epoch": 0.40402466291754185, "grad_norm": 6.240641117095947, "learning_rate": 9.286467246218085e-05, "loss": 1.1012, "step": 5963 }, { "epoch": 0.40409241818551395, "grad_norm": 6.801406383514404, "learning_rate": 9.286330344308303e-05, "loss": 0.9335, "step": 5964 }, { "epoch": 0.404160173453486, "grad_norm": 6.114485263824463, "learning_rate": 9.286193442398521e-05, "loss": 0.8987, "step": 5965 }, { "epoch": 0.4042279287214581, "grad_norm": 9.715784072875977, "learning_rate": 9.28605654048874e-05, "loss": 1.0337, "step": 5966 }, { "epoch": 0.40429568398943017, "grad_norm": 10.362801551818848, "learning_rate": 9.285919638578959e-05, "loss": 0.7685, "step": 5967 }, { "epoch": 0.40436343925740226, "grad_norm": 7.567534446716309, "learning_rate": 9.285782736669177e-05, "loss": 0.8397, "step": 5968 }, { "epoch": 0.40443119452537435, "grad_norm": 7.282614231109619, "learning_rate": 9.285645834759395e-05, "loss": 1.0515, "step": 5969 }, { "epoch": 0.40449894979334644, "grad_norm": 6.683037757873535, "learning_rate": 9.285508932849614e-05, "loss": 1.1006, "step": 5970 }, { "epoch": 0.40456670506131853, "grad_norm": 8.437498092651367, "learning_rate": 9.285372030939832e-05, "loss": 1.0656, "step": 5971 }, { "epoch": 0.4046344603292906, "grad_norm": 6.153039932250977, "learning_rate": 9.28523512903005e-05, "loss": 0.8285, "step": 5972 }, { "epoch": 0.40470221559726266, "grad_norm": 6.3334221839904785, "learning_rate": 9.28509822712027e-05, "loss": 0.8968, "step": 5973 }, { "epoch": 0.40476997086523475, "grad_norm": 9.119121551513672, "learning_rate": 9.284961325210488e-05, "loss": 0.8814, "step": 5974 }, { "epoch": 0.40483772613320684, "grad_norm": 7.621852397918701, "learning_rate": 9.284824423300706e-05, "loss": 0.9297, "step": 5975 }, { "epoch": 0.40490548140117893, "grad_norm": 7.415964126586914, "learning_rate": 9.284687521390924e-05, "loss": 0.8597, "step": 5976 }, { "epoch": 0.404973236669151, "grad_norm": 6.424054145812988, "learning_rate": 9.284550619481143e-05, "loss": 0.6792, "step": 5977 }, { "epoch": 0.4050409919371231, "grad_norm": 7.875925540924072, "learning_rate": 9.284413717571361e-05, "loss": 0.7931, "step": 5978 }, { "epoch": 0.4051087472050952, "grad_norm": 5.702389240264893, "learning_rate": 9.284276815661579e-05, "loss": 0.7124, "step": 5979 }, { "epoch": 0.4051765024730673, "grad_norm": 6.58071231842041, "learning_rate": 9.284139913751797e-05, "loss": 0.8965, "step": 5980 }, { "epoch": 0.4052442577410394, "grad_norm": 8.180785179138184, "learning_rate": 9.284003011842015e-05, "loss": 1.0545, "step": 5981 }, { "epoch": 0.40531201300901143, "grad_norm": 7.393392562866211, "learning_rate": 9.283866109932235e-05, "loss": 0.8044, "step": 5982 }, { "epoch": 0.4053797682769835, "grad_norm": 6.009011745452881, "learning_rate": 9.283729208022453e-05, "loss": 0.7688, "step": 5983 }, { "epoch": 0.4054475235449556, "grad_norm": 6.8165388107299805, "learning_rate": 9.283592306112671e-05, "loss": 0.7743, "step": 5984 }, { "epoch": 0.4055152788129277, "grad_norm": 7.851406574249268, "learning_rate": 9.283455404202889e-05, "loss": 0.9081, "step": 5985 }, { "epoch": 0.4055830340808998, "grad_norm": 7.138257026672363, "learning_rate": 9.283318502293108e-05, "loss": 0.9843, "step": 5986 }, { "epoch": 0.4056507893488719, "grad_norm": 7.70706033706665, "learning_rate": 9.283181600383326e-05, "loss": 0.9158, "step": 5987 }, { "epoch": 0.405718544616844, "grad_norm": 7.948725700378418, "learning_rate": 9.283044698473544e-05, "loss": 0.8694, "step": 5988 }, { "epoch": 0.40578629988481607, "grad_norm": 6.4174628257751465, "learning_rate": 9.282907796563762e-05, "loss": 0.8547, "step": 5989 }, { "epoch": 0.4058540551527881, "grad_norm": 7.62558126449585, "learning_rate": 9.28277089465398e-05, "loss": 0.7978, "step": 5990 }, { "epoch": 0.4059218104207602, "grad_norm": 7.624577522277832, "learning_rate": 9.2826339927442e-05, "loss": 1.0346, "step": 5991 }, { "epoch": 0.4059895656887323, "grad_norm": 7.392852783203125, "learning_rate": 9.282497090834418e-05, "loss": 0.8833, "step": 5992 }, { "epoch": 0.4060573209567044, "grad_norm": 7.652538776397705, "learning_rate": 9.282360188924636e-05, "loss": 1.0438, "step": 5993 }, { "epoch": 0.4061250762246765, "grad_norm": 7.050436973571777, "learning_rate": 9.282223287014854e-05, "loss": 0.8291, "step": 5994 }, { "epoch": 0.40619283149264857, "grad_norm": 7.119441032409668, "learning_rate": 9.282086385105072e-05, "loss": 0.6427, "step": 5995 }, { "epoch": 0.40626058676062066, "grad_norm": 6.1112565994262695, "learning_rate": 9.281949483195291e-05, "loss": 0.7737, "step": 5996 }, { "epoch": 0.40632834202859275, "grad_norm": 6.173165321350098, "learning_rate": 9.28181258128551e-05, "loss": 0.8099, "step": 5997 }, { "epoch": 0.4063960972965648, "grad_norm": 6.372697830200195, "learning_rate": 9.281675679375727e-05, "loss": 0.7902, "step": 5998 }, { "epoch": 0.4064638525645369, "grad_norm": 7.213540077209473, "learning_rate": 9.281538777465945e-05, "loss": 0.7273, "step": 5999 }, { "epoch": 0.40653160783250897, "grad_norm": 8.106986999511719, "learning_rate": 9.281401875556165e-05, "loss": 0.7516, "step": 6000 }, { "epoch": 0.40659936310048106, "grad_norm": 8.372703552246094, "learning_rate": 9.281264973646383e-05, "loss": 1.1737, "step": 6001 }, { "epoch": 0.40666711836845315, "grad_norm": 9.540267944335938, "learning_rate": 9.281128071736601e-05, "loss": 1.068, "step": 6002 }, { "epoch": 0.40673487363642524, "grad_norm": 6.878968715667725, "learning_rate": 9.280991169826819e-05, "loss": 0.9329, "step": 6003 }, { "epoch": 0.40680262890439733, "grad_norm": 6.826279163360596, "learning_rate": 9.280854267917037e-05, "loss": 0.8267, "step": 6004 }, { "epoch": 0.4068703841723694, "grad_norm": 7.155866622924805, "learning_rate": 9.280717366007256e-05, "loss": 0.9133, "step": 6005 }, { "epoch": 0.40693813944034146, "grad_norm": 6.883568286895752, "learning_rate": 9.280580464097474e-05, "loss": 0.709, "step": 6006 }, { "epoch": 0.40700589470831355, "grad_norm": 6.944139003753662, "learning_rate": 9.280443562187692e-05, "loss": 0.8137, "step": 6007 }, { "epoch": 0.40707364997628565, "grad_norm": 5.899077892303467, "learning_rate": 9.28030666027791e-05, "loss": 0.733, "step": 6008 }, { "epoch": 0.40714140524425774, "grad_norm": 7.213099956512451, "learning_rate": 9.28016975836813e-05, "loss": 1.1728, "step": 6009 }, { "epoch": 0.40720916051222983, "grad_norm": 7.830915927886963, "learning_rate": 9.280032856458348e-05, "loss": 0.7465, "step": 6010 }, { "epoch": 0.4072769157802019, "grad_norm": 7.609717845916748, "learning_rate": 9.279895954548566e-05, "loss": 1.2036, "step": 6011 }, { "epoch": 0.407344671048174, "grad_norm": 8.978927612304688, "learning_rate": 9.279759052638784e-05, "loss": 0.8903, "step": 6012 }, { "epoch": 0.4074124263161461, "grad_norm": 8.331847190856934, "learning_rate": 9.279622150729003e-05, "loss": 1.0853, "step": 6013 }, { "epoch": 0.40748018158411814, "grad_norm": 6.639584541320801, "learning_rate": 9.279485248819221e-05, "loss": 0.9113, "step": 6014 }, { "epoch": 0.40754793685209023, "grad_norm": 6.87017822265625, "learning_rate": 9.27934834690944e-05, "loss": 0.748, "step": 6015 }, { "epoch": 0.4076156921200623, "grad_norm": 6.675489902496338, "learning_rate": 9.279211444999659e-05, "loss": 0.8002, "step": 6016 }, { "epoch": 0.4076834473880344, "grad_norm": 7.666563034057617, "learning_rate": 9.279074543089877e-05, "loss": 0.9973, "step": 6017 }, { "epoch": 0.4077512026560065, "grad_norm": 7.878670692443848, "learning_rate": 9.278937641180095e-05, "loss": 1.0647, "step": 6018 }, { "epoch": 0.4078189579239786, "grad_norm": 6.263443946838379, "learning_rate": 9.278800739270314e-05, "loss": 0.8827, "step": 6019 }, { "epoch": 0.4078867131919507, "grad_norm": 9.69717788696289, "learning_rate": 9.278663837360532e-05, "loss": 0.728, "step": 6020 }, { "epoch": 0.4079544684599228, "grad_norm": 8.236658096313477, "learning_rate": 9.27852693545075e-05, "loss": 0.78, "step": 6021 }, { "epoch": 0.4080222237278948, "grad_norm": 6.9468512535095215, "learning_rate": 9.278390033540968e-05, "loss": 0.7674, "step": 6022 }, { "epoch": 0.4080899789958669, "grad_norm": 8.783222198486328, "learning_rate": 9.278253131631188e-05, "loss": 0.9062, "step": 6023 }, { "epoch": 0.408157734263839, "grad_norm": 6.449438571929932, "learning_rate": 9.278116229721406e-05, "loss": 0.9683, "step": 6024 }, { "epoch": 0.4082254895318111, "grad_norm": 6.185464382171631, "learning_rate": 9.277979327811624e-05, "loss": 0.8899, "step": 6025 }, { "epoch": 0.4082932447997832, "grad_norm": 6.422550678253174, "learning_rate": 9.277842425901842e-05, "loss": 0.8765, "step": 6026 }, { "epoch": 0.4083610000677553, "grad_norm": 6.8761372566223145, "learning_rate": 9.27770552399206e-05, "loss": 0.7326, "step": 6027 }, { "epoch": 0.40842875533572737, "grad_norm": 5.4224162101745605, "learning_rate": 9.277568622082279e-05, "loss": 0.7882, "step": 6028 }, { "epoch": 0.40849651060369946, "grad_norm": 6.6288275718688965, "learning_rate": 9.277431720172497e-05, "loss": 0.936, "step": 6029 }, { "epoch": 0.4085642658716715, "grad_norm": 7.305326461791992, "learning_rate": 9.277294818262715e-05, "loss": 0.8254, "step": 6030 }, { "epoch": 0.4086320211396436, "grad_norm": 6.697827339172363, "learning_rate": 9.277157916352933e-05, "loss": 0.9365, "step": 6031 }, { "epoch": 0.4086997764076157, "grad_norm": 8.074564933776855, "learning_rate": 9.277021014443153e-05, "loss": 1.0454, "step": 6032 }, { "epoch": 0.40876753167558777, "grad_norm": 7.539030075073242, "learning_rate": 9.276884112533371e-05, "loss": 0.9275, "step": 6033 }, { "epoch": 0.40883528694355986, "grad_norm": 7.1338582038879395, "learning_rate": 9.276747210623589e-05, "loss": 0.8395, "step": 6034 }, { "epoch": 0.40890304221153195, "grad_norm": 8.182660102844238, "learning_rate": 9.276610308713807e-05, "loss": 1.1907, "step": 6035 }, { "epoch": 0.40897079747950404, "grad_norm": 6.1452813148498535, "learning_rate": 9.276473406804025e-05, "loss": 0.6271, "step": 6036 }, { "epoch": 0.40903855274747614, "grad_norm": 7.247335433959961, "learning_rate": 9.276336504894244e-05, "loss": 0.8665, "step": 6037 }, { "epoch": 0.40910630801544823, "grad_norm": 8.27696418762207, "learning_rate": 9.276199602984462e-05, "loss": 0.968, "step": 6038 }, { "epoch": 0.40917406328342026, "grad_norm": 6.888766288757324, "learning_rate": 9.27606270107468e-05, "loss": 0.8212, "step": 6039 }, { "epoch": 0.40924181855139236, "grad_norm": 6.885414123535156, "learning_rate": 9.275925799164898e-05, "loss": 0.8978, "step": 6040 }, { "epoch": 0.40930957381936445, "grad_norm": 7.307199478149414, "learning_rate": 9.275788897255118e-05, "loss": 0.9602, "step": 6041 }, { "epoch": 0.40937732908733654, "grad_norm": 6.921801567077637, "learning_rate": 9.275651995345336e-05, "loss": 0.863, "step": 6042 }, { "epoch": 0.40944508435530863, "grad_norm": 6.910282611846924, "learning_rate": 9.275515093435554e-05, "loss": 0.8481, "step": 6043 }, { "epoch": 0.4095128396232807, "grad_norm": 7.98552131652832, "learning_rate": 9.275378191525772e-05, "loss": 0.9096, "step": 6044 }, { "epoch": 0.4095805948912528, "grad_norm": 9.74826431274414, "learning_rate": 9.27524128961599e-05, "loss": 0.867, "step": 6045 }, { "epoch": 0.4096483501592249, "grad_norm": 10.083016395568848, "learning_rate": 9.275104387706209e-05, "loss": 0.9664, "step": 6046 }, { "epoch": 0.40971610542719694, "grad_norm": 8.351798057556152, "learning_rate": 9.274967485796427e-05, "loss": 0.8164, "step": 6047 }, { "epoch": 0.40978386069516903, "grad_norm": 7.7515106201171875, "learning_rate": 9.274830583886645e-05, "loss": 0.9784, "step": 6048 }, { "epoch": 0.4098516159631411, "grad_norm": 6.339774131774902, "learning_rate": 9.274693681976863e-05, "loss": 0.7614, "step": 6049 }, { "epoch": 0.4099193712311132, "grad_norm": 8.455992698669434, "learning_rate": 9.274556780067081e-05, "loss": 0.989, "step": 6050 }, { "epoch": 0.4099871264990853, "grad_norm": 9.367591857910156, "learning_rate": 9.274419878157301e-05, "loss": 1.0155, "step": 6051 }, { "epoch": 0.4100548817670574, "grad_norm": 8.611092567443848, "learning_rate": 9.274282976247519e-05, "loss": 0.9541, "step": 6052 }, { "epoch": 0.4101226370350295, "grad_norm": 8.239481925964355, "learning_rate": 9.274146074337737e-05, "loss": 0.9511, "step": 6053 }, { "epoch": 0.4101903923030016, "grad_norm": 7.31620979309082, "learning_rate": 9.274009172427955e-05, "loss": 1.0126, "step": 6054 }, { "epoch": 0.4102581475709736, "grad_norm": 6.814750671386719, "learning_rate": 9.273872270518174e-05, "loss": 0.9463, "step": 6055 }, { "epoch": 0.4103259028389457, "grad_norm": 7.000329494476318, "learning_rate": 9.273735368608392e-05, "loss": 1.0664, "step": 6056 }, { "epoch": 0.4103936581069178, "grad_norm": 6.309933662414551, "learning_rate": 9.27359846669861e-05, "loss": 0.8605, "step": 6057 }, { "epoch": 0.4104614133748899, "grad_norm": 8.13158893585205, "learning_rate": 9.273461564788828e-05, "loss": 0.9158, "step": 6058 }, { "epoch": 0.410529168642862, "grad_norm": 8.334741592407227, "learning_rate": 9.273324662879048e-05, "loss": 0.8699, "step": 6059 }, { "epoch": 0.4105969239108341, "grad_norm": 8.019463539123535, "learning_rate": 9.273187760969266e-05, "loss": 1.0366, "step": 6060 }, { "epoch": 0.41066467917880617, "grad_norm": 6.104535102844238, "learning_rate": 9.273050859059484e-05, "loss": 0.8273, "step": 6061 }, { "epoch": 0.41073243444677826, "grad_norm": 8.345130920410156, "learning_rate": 9.272913957149703e-05, "loss": 1.3653, "step": 6062 }, { "epoch": 0.4108001897147503, "grad_norm": 8.028311729431152, "learning_rate": 9.272777055239921e-05, "loss": 1.096, "step": 6063 }, { "epoch": 0.4108679449827224, "grad_norm": 7.079861164093018, "learning_rate": 9.272640153330139e-05, "loss": 1.0157, "step": 6064 }, { "epoch": 0.4109357002506945, "grad_norm": 8.497052192687988, "learning_rate": 9.272503251420359e-05, "loss": 0.9972, "step": 6065 }, { "epoch": 0.41100345551866657, "grad_norm": 8.042581558227539, "learning_rate": 9.272366349510577e-05, "loss": 0.908, "step": 6066 }, { "epoch": 0.41107121078663866, "grad_norm": 6.567187786102295, "learning_rate": 9.272229447600795e-05, "loss": 0.7992, "step": 6067 }, { "epoch": 0.41113896605461075, "grad_norm": 5.78397274017334, "learning_rate": 9.272092545691013e-05, "loss": 0.773, "step": 6068 }, { "epoch": 0.41120672132258285, "grad_norm": 8.116515159606934, "learning_rate": 9.271955643781232e-05, "loss": 0.8101, "step": 6069 }, { "epoch": 0.41127447659055494, "grad_norm": 7.628951549530029, "learning_rate": 9.27181874187145e-05, "loss": 0.9282, "step": 6070 }, { "epoch": 0.411342231858527, "grad_norm": 7.830180644989014, "learning_rate": 9.271681839961668e-05, "loss": 0.8681, "step": 6071 }, { "epoch": 0.41140998712649907, "grad_norm": 7.690285682678223, "learning_rate": 9.271544938051886e-05, "loss": 0.9064, "step": 6072 }, { "epoch": 0.41147774239447116, "grad_norm": 7.585133075714111, "learning_rate": 9.271408036142106e-05, "loss": 0.8305, "step": 6073 }, { "epoch": 0.41154549766244325, "grad_norm": 6.18320894241333, "learning_rate": 9.271271134232324e-05, "loss": 1.0395, "step": 6074 }, { "epoch": 0.41161325293041534, "grad_norm": 8.301353454589844, "learning_rate": 9.271134232322542e-05, "loss": 1.1528, "step": 6075 }, { "epoch": 0.41168100819838743, "grad_norm": 6.261874198913574, "learning_rate": 9.27099733041276e-05, "loss": 0.7802, "step": 6076 }, { "epoch": 0.4117487634663595, "grad_norm": 7.161525726318359, "learning_rate": 9.270860428502978e-05, "loss": 1.1292, "step": 6077 }, { "epoch": 0.4118165187343316, "grad_norm": 6.522838115692139, "learning_rate": 9.270723526593197e-05, "loss": 0.8013, "step": 6078 }, { "epoch": 0.41188427400230365, "grad_norm": 6.919328212738037, "learning_rate": 9.270586624683415e-05, "loss": 0.8172, "step": 6079 }, { "epoch": 0.41195202927027574, "grad_norm": 6.236283302307129, "learning_rate": 9.270449722773633e-05, "loss": 0.9677, "step": 6080 }, { "epoch": 0.41201978453824784, "grad_norm": 5.593216896057129, "learning_rate": 9.270312820863851e-05, "loss": 0.922, "step": 6081 }, { "epoch": 0.4120875398062199, "grad_norm": 8.651509284973145, "learning_rate": 9.27017591895407e-05, "loss": 0.9209, "step": 6082 }, { "epoch": 0.412155295074192, "grad_norm": 6.787774085998535, "learning_rate": 9.270039017044289e-05, "loss": 1.0991, "step": 6083 }, { "epoch": 0.4122230503421641, "grad_norm": 6.774304389953613, "learning_rate": 9.269902115134507e-05, "loss": 0.7265, "step": 6084 }, { "epoch": 0.4122908056101362, "grad_norm": 7.097721099853516, "learning_rate": 9.269765213224725e-05, "loss": 1.0353, "step": 6085 }, { "epoch": 0.4123585608781083, "grad_norm": 8.65166187286377, "learning_rate": 9.269628311314943e-05, "loss": 1.0157, "step": 6086 }, { "epoch": 0.4124263161460804, "grad_norm": 5.778721809387207, "learning_rate": 9.269491409405162e-05, "loss": 0.8381, "step": 6087 }, { "epoch": 0.4124940714140524, "grad_norm": 6.297547340393066, "learning_rate": 9.26935450749538e-05, "loss": 0.9421, "step": 6088 }, { "epoch": 0.4125618266820245, "grad_norm": 7.8641462326049805, "learning_rate": 9.269217605585598e-05, "loss": 0.9987, "step": 6089 }, { "epoch": 0.4126295819499966, "grad_norm": 7.222883701324463, "learning_rate": 9.269080703675816e-05, "loss": 0.7314, "step": 6090 }, { "epoch": 0.4126973372179687, "grad_norm": 7.291220188140869, "learning_rate": 9.268943801766034e-05, "loss": 0.9199, "step": 6091 }, { "epoch": 0.4127650924859408, "grad_norm": 7.00157356262207, "learning_rate": 9.268806899856254e-05, "loss": 0.8965, "step": 6092 }, { "epoch": 0.4128328477539129, "grad_norm": 8.229329109191895, "learning_rate": 9.268669997946472e-05, "loss": 1.0392, "step": 6093 }, { "epoch": 0.41290060302188497, "grad_norm": 6.9192633628845215, "learning_rate": 9.26853309603669e-05, "loss": 0.8987, "step": 6094 }, { "epoch": 0.41296835828985706, "grad_norm": 6.3700761795043945, "learning_rate": 9.268396194126908e-05, "loss": 0.7713, "step": 6095 }, { "epoch": 0.4130361135578291, "grad_norm": 5.654745101928711, "learning_rate": 9.268259292217127e-05, "loss": 0.6733, "step": 6096 }, { "epoch": 0.4131038688258012, "grad_norm": 6.898359298706055, "learning_rate": 9.268122390307345e-05, "loss": 0.9611, "step": 6097 }, { "epoch": 0.4131716240937733, "grad_norm": 6.53093147277832, "learning_rate": 9.267985488397563e-05, "loss": 0.795, "step": 6098 }, { "epoch": 0.4132393793617454, "grad_norm": 8.565315246582031, "learning_rate": 9.267848586487781e-05, "loss": 0.7333, "step": 6099 }, { "epoch": 0.41330713462971747, "grad_norm": 7.8006744384765625, "learning_rate": 9.267711684578e-05, "loss": 0.9579, "step": 6100 }, { "epoch": 0.41337488989768956, "grad_norm": 7.270709037780762, "learning_rate": 9.267574782668219e-05, "loss": 0.9105, "step": 6101 }, { "epoch": 0.41344264516566165, "grad_norm": 7.98935079574585, "learning_rate": 9.267437880758437e-05, "loss": 0.9697, "step": 6102 }, { "epoch": 0.41351040043363374, "grad_norm": 6.606309413909912, "learning_rate": 9.267300978848655e-05, "loss": 0.8649, "step": 6103 }, { "epoch": 0.4135781557016058, "grad_norm": 7.456545352935791, "learning_rate": 9.267164076938873e-05, "loss": 1.0884, "step": 6104 }, { "epoch": 0.41364591096957787, "grad_norm": 6.820968151092529, "learning_rate": 9.267027175029092e-05, "loss": 0.6651, "step": 6105 }, { "epoch": 0.41371366623754996, "grad_norm": 7.569576740264893, "learning_rate": 9.26689027311931e-05, "loss": 1.024, "step": 6106 }, { "epoch": 0.41378142150552205, "grad_norm": 7.982132911682129, "learning_rate": 9.266753371209528e-05, "loss": 0.9111, "step": 6107 }, { "epoch": 0.41384917677349414, "grad_norm": 7.610587120056152, "learning_rate": 9.266616469299748e-05, "loss": 0.9005, "step": 6108 }, { "epoch": 0.41391693204146623, "grad_norm": 6.773017883300781, "learning_rate": 9.266479567389966e-05, "loss": 0.6696, "step": 6109 }, { "epoch": 0.4139846873094383, "grad_norm": 7.702723979949951, "learning_rate": 9.266342665480184e-05, "loss": 0.7951, "step": 6110 }, { "epoch": 0.4140524425774104, "grad_norm": 7.299111843109131, "learning_rate": 9.266205763570403e-05, "loss": 1.0422, "step": 6111 }, { "epoch": 0.41412019784538245, "grad_norm": 5.148745536804199, "learning_rate": 9.266068861660621e-05, "loss": 0.6821, "step": 6112 }, { "epoch": 0.41418795311335455, "grad_norm": 6.768344402313232, "learning_rate": 9.265931959750839e-05, "loss": 0.8119, "step": 6113 }, { "epoch": 0.41425570838132664, "grad_norm": 6.882130146026611, "learning_rate": 9.265795057841057e-05, "loss": 0.9931, "step": 6114 }, { "epoch": 0.41432346364929873, "grad_norm": 7.292698383331299, "learning_rate": 9.265658155931277e-05, "loss": 1.0415, "step": 6115 }, { "epoch": 0.4143912189172708, "grad_norm": 7.241159915924072, "learning_rate": 9.265521254021495e-05, "loss": 0.9122, "step": 6116 }, { "epoch": 0.4144589741852429, "grad_norm": 6.7898736000061035, "learning_rate": 9.265384352111713e-05, "loss": 0.9399, "step": 6117 }, { "epoch": 0.414526729453215, "grad_norm": 6.505312919616699, "learning_rate": 9.265247450201931e-05, "loss": 0.9463, "step": 6118 }, { "epoch": 0.4145944847211871, "grad_norm": 5.761348724365234, "learning_rate": 9.26511054829215e-05, "loss": 0.8365, "step": 6119 }, { "epoch": 0.41466223998915913, "grad_norm": 6.377706527709961, "learning_rate": 9.264973646382368e-05, "loss": 0.7451, "step": 6120 }, { "epoch": 0.4147299952571312, "grad_norm": 8.465597152709961, "learning_rate": 9.264836744472586e-05, "loss": 0.8582, "step": 6121 }, { "epoch": 0.4147977505251033, "grad_norm": 6.3482770919799805, "learning_rate": 9.264699842562804e-05, "loss": 0.8291, "step": 6122 }, { "epoch": 0.4148655057930754, "grad_norm": 7.984723091125488, "learning_rate": 9.264562940653022e-05, "loss": 0.9641, "step": 6123 }, { "epoch": 0.4149332610610475, "grad_norm": 6.695097923278809, "learning_rate": 9.264426038743242e-05, "loss": 0.7661, "step": 6124 }, { "epoch": 0.4150010163290196, "grad_norm": 6.403726100921631, "learning_rate": 9.26428913683346e-05, "loss": 0.6342, "step": 6125 }, { "epoch": 0.4150687715969917, "grad_norm": 5.906423091888428, "learning_rate": 9.264152234923678e-05, "loss": 0.9588, "step": 6126 }, { "epoch": 0.4151365268649638, "grad_norm": 6.482539653778076, "learning_rate": 9.264015333013896e-05, "loss": 1.0244, "step": 6127 }, { "epoch": 0.4152042821329358, "grad_norm": 7.783926486968994, "learning_rate": 9.263878431104114e-05, "loss": 0.8611, "step": 6128 }, { "epoch": 0.4152720374009079, "grad_norm": 8.377721786499023, "learning_rate": 9.263741529194333e-05, "loss": 0.9757, "step": 6129 }, { "epoch": 0.41533979266888, "grad_norm": 8.719101905822754, "learning_rate": 9.263604627284551e-05, "loss": 1.0766, "step": 6130 }, { "epoch": 0.4154075479368521, "grad_norm": 6.0139641761779785, "learning_rate": 9.263467725374769e-05, "loss": 0.8036, "step": 6131 }, { "epoch": 0.4154753032048242, "grad_norm": 8.683677673339844, "learning_rate": 9.263330823464987e-05, "loss": 0.9476, "step": 6132 }, { "epoch": 0.41554305847279627, "grad_norm": 7.566380977630615, "learning_rate": 9.263193921555207e-05, "loss": 1.0983, "step": 6133 }, { "epoch": 0.41561081374076836, "grad_norm": 6.716690540313721, "learning_rate": 9.263057019645425e-05, "loss": 0.8803, "step": 6134 }, { "epoch": 0.41567856900874045, "grad_norm": 6.123441696166992, "learning_rate": 9.262920117735643e-05, "loss": 0.7158, "step": 6135 }, { "epoch": 0.4157463242767125, "grad_norm": 8.015522956848145, "learning_rate": 9.262783215825861e-05, "loss": 1.0269, "step": 6136 }, { "epoch": 0.4158140795446846, "grad_norm": 5.564431667327881, "learning_rate": 9.262646313916079e-05, "loss": 0.8309, "step": 6137 }, { "epoch": 0.41588183481265667, "grad_norm": 7.946048736572266, "learning_rate": 9.262509412006298e-05, "loss": 0.9334, "step": 6138 }, { "epoch": 0.41594959008062876, "grad_norm": 6.741854667663574, "learning_rate": 9.262372510096516e-05, "loss": 0.6461, "step": 6139 }, { "epoch": 0.41601734534860085, "grad_norm": 8.229652404785156, "learning_rate": 9.262235608186734e-05, "loss": 0.9046, "step": 6140 }, { "epoch": 0.41608510061657294, "grad_norm": 5.590304851531982, "learning_rate": 9.262098706276952e-05, "loss": 0.6979, "step": 6141 }, { "epoch": 0.41615285588454504, "grad_norm": 6.79884672164917, "learning_rate": 9.261961804367172e-05, "loss": 0.853, "step": 6142 }, { "epoch": 0.41622061115251713, "grad_norm": 7.175544738769531, "learning_rate": 9.26182490245739e-05, "loss": 0.9265, "step": 6143 }, { "epoch": 0.4162883664204892, "grad_norm": 8.839093208312988, "learning_rate": 9.261688000547608e-05, "loss": 0.9541, "step": 6144 }, { "epoch": 0.41635612168846126, "grad_norm": 6.818619728088379, "learning_rate": 9.261551098637826e-05, "loss": 0.6284, "step": 6145 }, { "epoch": 0.41642387695643335, "grad_norm": 7.31305456161499, "learning_rate": 9.261414196728044e-05, "loss": 0.797, "step": 6146 }, { "epoch": 0.41649163222440544, "grad_norm": 8.566871643066406, "learning_rate": 9.261277294818263e-05, "loss": 1.0893, "step": 6147 }, { "epoch": 0.41655938749237753, "grad_norm": 6.028139114379883, "learning_rate": 9.261140392908481e-05, "loss": 0.781, "step": 6148 }, { "epoch": 0.4166271427603496, "grad_norm": 7.383317947387695, "learning_rate": 9.261003490998699e-05, "loss": 0.5846, "step": 6149 }, { "epoch": 0.4166948980283217, "grad_norm": 6.392228126525879, "learning_rate": 9.260866589088917e-05, "loss": 0.9403, "step": 6150 }, { "epoch": 0.4167626532962938, "grad_norm": 6.1830644607543945, "learning_rate": 9.260729687179137e-05, "loss": 0.9502, "step": 6151 }, { "epoch": 0.4168304085642659, "grad_norm": 6.851447582244873, "learning_rate": 9.260592785269355e-05, "loss": 0.8042, "step": 6152 }, { "epoch": 0.41689816383223793, "grad_norm": 5.995123386383057, "learning_rate": 9.260455883359573e-05, "loss": 0.8875, "step": 6153 }, { "epoch": 0.41696591910021, "grad_norm": 6.834797382354736, "learning_rate": 9.260318981449791e-05, "loss": 0.8652, "step": 6154 }, { "epoch": 0.4170336743681821, "grad_norm": 6.0200371742248535, "learning_rate": 9.26018207954001e-05, "loss": 0.9198, "step": 6155 }, { "epoch": 0.4171014296361542, "grad_norm": 7.138192176818848, "learning_rate": 9.260045177630228e-05, "loss": 0.8284, "step": 6156 }, { "epoch": 0.4171691849041263, "grad_norm": 7.5531229972839355, "learning_rate": 9.259908275720446e-05, "loss": 0.7188, "step": 6157 }, { "epoch": 0.4172369401720984, "grad_norm": 8.347415924072266, "learning_rate": 9.259771373810666e-05, "loss": 1.1853, "step": 6158 }, { "epoch": 0.4173046954400705, "grad_norm": 8.188237190246582, "learning_rate": 9.259634471900884e-05, "loss": 0.7712, "step": 6159 }, { "epoch": 0.4173724507080426, "grad_norm": 7.238736629486084, "learning_rate": 9.259497569991102e-05, "loss": 0.7731, "step": 6160 }, { "epoch": 0.4174402059760146, "grad_norm": 8.175471305847168, "learning_rate": 9.259360668081321e-05, "loss": 1.0769, "step": 6161 }, { "epoch": 0.4175079612439867, "grad_norm": 6.97186803817749, "learning_rate": 9.259223766171539e-05, "loss": 0.7469, "step": 6162 }, { "epoch": 0.4175757165119588, "grad_norm": 5.761664390563965, "learning_rate": 9.259086864261757e-05, "loss": 0.8716, "step": 6163 }, { "epoch": 0.4176434717799309, "grad_norm": 8.567249298095703, "learning_rate": 9.258949962351975e-05, "loss": 0.8376, "step": 6164 }, { "epoch": 0.417711227047903, "grad_norm": 5.827561378479004, "learning_rate": 9.258813060442195e-05, "loss": 0.8468, "step": 6165 }, { "epoch": 0.41777898231587507, "grad_norm": 7.336645603179932, "learning_rate": 9.258676158532413e-05, "loss": 0.8529, "step": 6166 }, { "epoch": 0.41784673758384716, "grad_norm": 7.711108207702637, "learning_rate": 9.258539256622631e-05, "loss": 0.8802, "step": 6167 }, { "epoch": 0.41791449285181925, "grad_norm": 8.625036239624023, "learning_rate": 9.258402354712849e-05, "loss": 1.0263, "step": 6168 }, { "epoch": 0.4179822481197913, "grad_norm": 7.005527019500732, "learning_rate": 9.258265452803067e-05, "loss": 0.9728, "step": 6169 }, { "epoch": 0.4180500033877634, "grad_norm": 6.906123161315918, "learning_rate": 9.258128550893286e-05, "loss": 0.8958, "step": 6170 }, { "epoch": 0.41811775865573547, "grad_norm": 7.147536754608154, "learning_rate": 9.257991648983504e-05, "loss": 1.1466, "step": 6171 }, { "epoch": 0.41818551392370756, "grad_norm": 5.8256001472473145, "learning_rate": 9.257854747073722e-05, "loss": 0.7189, "step": 6172 }, { "epoch": 0.41825326919167966, "grad_norm": 5.144516944885254, "learning_rate": 9.25771784516394e-05, "loss": 0.6368, "step": 6173 }, { "epoch": 0.41832102445965175, "grad_norm": 7.161872863769531, "learning_rate": 9.25758094325416e-05, "loss": 1.0074, "step": 6174 }, { "epoch": 0.41838877972762384, "grad_norm": 9.098782539367676, "learning_rate": 9.257444041344378e-05, "loss": 0.9261, "step": 6175 }, { "epoch": 0.41845653499559593, "grad_norm": 7.566091060638428, "learning_rate": 9.257307139434596e-05, "loss": 0.8845, "step": 6176 }, { "epoch": 0.41852429026356797, "grad_norm": 8.618456840515137, "learning_rate": 9.257170237524814e-05, "loss": 0.8643, "step": 6177 }, { "epoch": 0.41859204553154006, "grad_norm": 8.870187759399414, "learning_rate": 9.257033335615032e-05, "loss": 1.1915, "step": 6178 }, { "epoch": 0.41865980079951215, "grad_norm": 6.494687557220459, "learning_rate": 9.256896433705251e-05, "loss": 0.6523, "step": 6179 }, { "epoch": 0.41872755606748424, "grad_norm": 7.417816638946533, "learning_rate": 9.256759531795469e-05, "loss": 0.9454, "step": 6180 }, { "epoch": 0.41879531133545633, "grad_norm": 11.310504913330078, "learning_rate": 9.256622629885687e-05, "loss": 1.2984, "step": 6181 }, { "epoch": 0.4188630666034284, "grad_norm": 6.3585615158081055, "learning_rate": 9.256485727975905e-05, "loss": 0.8379, "step": 6182 }, { "epoch": 0.4189308218714005, "grad_norm": 8.134123802185059, "learning_rate": 9.256348826066123e-05, "loss": 0.9962, "step": 6183 }, { "epoch": 0.4189985771393726, "grad_norm": 5.962007999420166, "learning_rate": 9.256211924156343e-05, "loss": 0.8923, "step": 6184 }, { "epoch": 0.41906633240734464, "grad_norm": 6.423482894897461, "learning_rate": 9.256075022246561e-05, "loss": 0.7991, "step": 6185 }, { "epoch": 0.41913408767531674, "grad_norm": 6.406213283538818, "learning_rate": 9.255938120336779e-05, "loss": 0.9454, "step": 6186 }, { "epoch": 0.4192018429432888, "grad_norm": 4.926299571990967, "learning_rate": 9.255801218426997e-05, "loss": 0.6442, "step": 6187 }, { "epoch": 0.4192695982112609, "grad_norm": 6.160318374633789, "learning_rate": 9.255664316517216e-05, "loss": 0.8575, "step": 6188 }, { "epoch": 0.419337353479233, "grad_norm": 9.17479133605957, "learning_rate": 9.255527414607434e-05, "loss": 1.2344, "step": 6189 }, { "epoch": 0.4194051087472051, "grad_norm": 8.95871639251709, "learning_rate": 9.255390512697652e-05, "loss": 0.9193, "step": 6190 }, { "epoch": 0.4194728640151772, "grad_norm": 5.636984825134277, "learning_rate": 9.25525361078787e-05, "loss": 0.725, "step": 6191 }, { "epoch": 0.4195406192831493, "grad_norm": 6.044233798980713, "learning_rate": 9.255116708878088e-05, "loss": 0.7526, "step": 6192 }, { "epoch": 0.4196083745511214, "grad_norm": 6.230378150939941, "learning_rate": 9.254979806968308e-05, "loss": 0.7923, "step": 6193 }, { "epoch": 0.4196761298190934, "grad_norm": 8.391528129577637, "learning_rate": 9.254842905058526e-05, "loss": 1.0368, "step": 6194 }, { "epoch": 0.4197438850870655, "grad_norm": 7.150004863739014, "learning_rate": 9.254706003148744e-05, "loss": 0.9087, "step": 6195 }, { "epoch": 0.4198116403550376, "grad_norm": 8.44096565246582, "learning_rate": 9.254569101238962e-05, "loss": 0.9511, "step": 6196 }, { "epoch": 0.4198793956230097, "grad_norm": 7.8519768714904785, "learning_rate": 9.254432199329181e-05, "loss": 1.0565, "step": 6197 }, { "epoch": 0.4199471508909818, "grad_norm": 6.873769760131836, "learning_rate": 9.254295297419399e-05, "loss": 0.9016, "step": 6198 }, { "epoch": 0.42001490615895387, "grad_norm": 6.1226396560668945, "learning_rate": 9.254158395509617e-05, "loss": 0.8439, "step": 6199 }, { "epoch": 0.42008266142692596, "grad_norm": 6.68569803237915, "learning_rate": 9.254021493599835e-05, "loss": 0.9836, "step": 6200 }, { "epoch": 0.42015041669489805, "grad_norm": 5.907567501068115, "learning_rate": 9.253884591690055e-05, "loss": 0.8047, "step": 6201 }, { "epoch": 0.4202181719628701, "grad_norm": 5.849989891052246, "learning_rate": 9.253747689780273e-05, "loss": 0.8791, "step": 6202 }, { "epoch": 0.4202859272308422, "grad_norm": 5.872654914855957, "learning_rate": 9.253610787870491e-05, "loss": 0.8368, "step": 6203 }, { "epoch": 0.4203536824988143, "grad_norm": 5.799046039581299, "learning_rate": 9.25347388596071e-05, "loss": 0.8215, "step": 6204 }, { "epoch": 0.42042143776678637, "grad_norm": 5.331169605255127, "learning_rate": 9.253336984050928e-05, "loss": 0.9533, "step": 6205 }, { "epoch": 0.42048919303475846, "grad_norm": 6.436175346374512, "learning_rate": 9.253200082141146e-05, "loss": 0.7356, "step": 6206 }, { "epoch": 0.42055694830273055, "grad_norm": 6.290867328643799, "learning_rate": 9.253063180231366e-05, "loss": 0.7771, "step": 6207 }, { "epoch": 0.42062470357070264, "grad_norm": 8.476110458374023, "learning_rate": 9.252926278321584e-05, "loss": 1.0357, "step": 6208 }, { "epoch": 0.42069245883867473, "grad_norm": 8.561487197875977, "learning_rate": 9.252789376411802e-05, "loss": 0.974, "step": 6209 }, { "epoch": 0.42076021410664677, "grad_norm": 7.24920654296875, "learning_rate": 9.25265247450202e-05, "loss": 1.2393, "step": 6210 }, { "epoch": 0.42082796937461886, "grad_norm": 7.135931015014648, "learning_rate": 9.252515572592239e-05, "loss": 1.0491, "step": 6211 }, { "epoch": 0.42089572464259095, "grad_norm": 8.867389678955078, "learning_rate": 9.252378670682457e-05, "loss": 0.836, "step": 6212 }, { "epoch": 0.42096347991056304, "grad_norm": 6.738379955291748, "learning_rate": 9.252241768772675e-05, "loss": 0.7829, "step": 6213 }, { "epoch": 0.42103123517853513, "grad_norm": 7.226996898651123, "learning_rate": 9.252104866862893e-05, "loss": 0.8716, "step": 6214 }, { "epoch": 0.4210989904465072, "grad_norm": 6.236578941345215, "learning_rate": 9.251967964953111e-05, "loss": 0.62, "step": 6215 }, { "epoch": 0.4211667457144793, "grad_norm": 7.801733016967773, "learning_rate": 9.25183106304333e-05, "loss": 1.1107, "step": 6216 }, { "epoch": 0.4212345009824514, "grad_norm": 6.062146186828613, "learning_rate": 9.251694161133549e-05, "loss": 0.8655, "step": 6217 }, { "epoch": 0.42130225625042345, "grad_norm": 5.408603668212891, "learning_rate": 9.251557259223767e-05, "loss": 0.7054, "step": 6218 }, { "epoch": 0.42137001151839554, "grad_norm": 9.531839370727539, "learning_rate": 9.251420357313985e-05, "loss": 0.9609, "step": 6219 }, { "epoch": 0.42143776678636763, "grad_norm": 6.054145336151123, "learning_rate": 9.251283455404204e-05, "loss": 0.9311, "step": 6220 }, { "epoch": 0.4215055220543397, "grad_norm": 6.372133731842041, "learning_rate": 9.251146553494422e-05, "loss": 0.8797, "step": 6221 }, { "epoch": 0.4215732773223118, "grad_norm": 6.394374847412109, "learning_rate": 9.25100965158464e-05, "loss": 0.7603, "step": 6222 }, { "epoch": 0.4216410325902839, "grad_norm": 7.188971519470215, "learning_rate": 9.250872749674858e-05, "loss": 0.5915, "step": 6223 }, { "epoch": 0.421708787858256, "grad_norm": 7.44260311126709, "learning_rate": 9.250735847765076e-05, "loss": 0.8215, "step": 6224 }, { "epoch": 0.4217765431262281, "grad_norm": 7.43394660949707, "learning_rate": 9.250598945855296e-05, "loss": 0.891, "step": 6225 }, { "epoch": 0.4218442983942001, "grad_norm": 6.9161200523376465, "learning_rate": 9.250462043945514e-05, "loss": 0.8795, "step": 6226 }, { "epoch": 0.4219120536621722, "grad_norm": 6.955477714538574, "learning_rate": 9.250325142035732e-05, "loss": 0.9781, "step": 6227 }, { "epoch": 0.4219798089301443, "grad_norm": 7.632521629333496, "learning_rate": 9.25018824012595e-05, "loss": 0.7374, "step": 6228 }, { "epoch": 0.4220475641981164, "grad_norm": 7.344869613647461, "learning_rate": 9.250051338216169e-05, "loss": 0.9506, "step": 6229 }, { "epoch": 0.4221153194660885, "grad_norm": 7.199647426605225, "learning_rate": 9.249914436306387e-05, "loss": 0.9225, "step": 6230 }, { "epoch": 0.4221830747340606, "grad_norm": 5.884180068969727, "learning_rate": 9.249777534396605e-05, "loss": 0.729, "step": 6231 }, { "epoch": 0.4222508300020327, "grad_norm": 6.82388973236084, "learning_rate": 9.249640632486823e-05, "loss": 1.0978, "step": 6232 }, { "epoch": 0.42231858527000476, "grad_norm": 8.883796691894531, "learning_rate": 9.249503730577041e-05, "loss": 1.1025, "step": 6233 }, { "epoch": 0.4223863405379768, "grad_norm": 6.1692938804626465, "learning_rate": 9.24936682866726e-05, "loss": 0.6734, "step": 6234 }, { "epoch": 0.4224540958059489, "grad_norm": 8.827327728271484, "learning_rate": 9.249229926757479e-05, "loss": 1.1097, "step": 6235 }, { "epoch": 0.422521851073921, "grad_norm": 6.649320602416992, "learning_rate": 9.249093024847697e-05, "loss": 0.9978, "step": 6236 }, { "epoch": 0.4225896063418931, "grad_norm": 8.04487419128418, "learning_rate": 9.248956122937915e-05, "loss": 0.9293, "step": 6237 }, { "epoch": 0.42265736160986517, "grad_norm": 7.031024932861328, "learning_rate": 9.248819221028133e-05, "loss": 0.7474, "step": 6238 }, { "epoch": 0.42272511687783726, "grad_norm": 4.858736038208008, "learning_rate": 9.248682319118352e-05, "loss": 0.6429, "step": 6239 }, { "epoch": 0.42279287214580935, "grad_norm": 6.075960159301758, "learning_rate": 9.24854541720857e-05, "loss": 0.7595, "step": 6240 }, { "epoch": 0.42286062741378144, "grad_norm": 11.653654098510742, "learning_rate": 9.248408515298788e-05, "loss": 0.7962, "step": 6241 }, { "epoch": 0.4229283826817535, "grad_norm": 6.255251884460449, "learning_rate": 9.248271613389006e-05, "loss": 0.9027, "step": 6242 }, { "epoch": 0.42299613794972557, "grad_norm": 6.908040523529053, "learning_rate": 9.248134711479226e-05, "loss": 0.936, "step": 6243 }, { "epoch": 0.42306389321769766, "grad_norm": 7.853124141693115, "learning_rate": 9.247997809569444e-05, "loss": 1.0966, "step": 6244 }, { "epoch": 0.42313164848566975, "grad_norm": 7.661264896392822, "learning_rate": 9.247860907659662e-05, "loss": 0.8407, "step": 6245 }, { "epoch": 0.42319940375364185, "grad_norm": 6.52161169052124, "learning_rate": 9.24772400574988e-05, "loss": 0.862, "step": 6246 }, { "epoch": 0.42326715902161394, "grad_norm": 6.456472873687744, "learning_rate": 9.247587103840099e-05, "loss": 0.713, "step": 6247 }, { "epoch": 0.42333491428958603, "grad_norm": 6.655475616455078, "learning_rate": 9.247450201930317e-05, "loss": 0.7422, "step": 6248 }, { "epoch": 0.4234026695575581, "grad_norm": 6.448397636413574, "learning_rate": 9.247313300020535e-05, "loss": 0.8645, "step": 6249 }, { "epoch": 0.4234704248255302, "grad_norm": 5.964814186096191, "learning_rate": 9.247176398110755e-05, "loss": 0.6173, "step": 6250 }, { "epoch": 0.42353818009350225, "grad_norm": 6.125659465789795, "learning_rate": 9.247039496200973e-05, "loss": 0.6951, "step": 6251 }, { "epoch": 0.42360593536147434, "grad_norm": 6.881291389465332, "learning_rate": 9.246902594291191e-05, "loss": 0.7525, "step": 6252 }, { "epoch": 0.42367369062944643, "grad_norm": 6.647464752197266, "learning_rate": 9.24676569238141e-05, "loss": 0.6757, "step": 6253 }, { "epoch": 0.4237414458974185, "grad_norm": 6.530306816101074, "learning_rate": 9.246628790471628e-05, "loss": 1.1478, "step": 6254 }, { "epoch": 0.4238092011653906, "grad_norm": 7.5514984130859375, "learning_rate": 9.246491888561846e-05, "loss": 0.868, "step": 6255 }, { "epoch": 0.4238769564333627, "grad_norm": 10.405878067016602, "learning_rate": 9.246354986652064e-05, "loss": 1.0607, "step": 6256 }, { "epoch": 0.4239447117013348, "grad_norm": 6.7608418464660645, "learning_rate": 9.246218084742284e-05, "loss": 0.9173, "step": 6257 }, { "epoch": 0.4240124669693069, "grad_norm": 7.583088397979736, "learning_rate": 9.246081182832502e-05, "loss": 0.9091, "step": 6258 }, { "epoch": 0.4240802222372789, "grad_norm": 7.009425163269043, "learning_rate": 9.24594428092272e-05, "loss": 0.9509, "step": 6259 }, { "epoch": 0.424147977505251, "grad_norm": 10.783225059509277, "learning_rate": 9.245807379012938e-05, "loss": 0.9033, "step": 6260 }, { "epoch": 0.4242157327732231, "grad_norm": 6.1244611740112305, "learning_rate": 9.245670477103156e-05, "loss": 0.7285, "step": 6261 }, { "epoch": 0.4242834880411952, "grad_norm": 6.983814716339111, "learning_rate": 9.245533575193375e-05, "loss": 0.9463, "step": 6262 }, { "epoch": 0.4243512433091673, "grad_norm": 6.019556522369385, "learning_rate": 9.245396673283593e-05, "loss": 0.8072, "step": 6263 }, { "epoch": 0.4244189985771394, "grad_norm": 7.2982001304626465, "learning_rate": 9.245259771373811e-05, "loss": 0.8905, "step": 6264 }, { "epoch": 0.4244867538451115, "grad_norm": 7.231256008148193, "learning_rate": 9.245122869464029e-05, "loss": 0.9114, "step": 6265 }, { "epoch": 0.42455450911308357, "grad_norm": 6.597518444061279, "learning_rate": 9.244985967554249e-05, "loss": 1.0431, "step": 6266 }, { "epoch": 0.4246222643810556, "grad_norm": 8.651244163513184, "learning_rate": 9.244849065644467e-05, "loss": 1.1346, "step": 6267 }, { "epoch": 0.4246900196490277, "grad_norm": 9.052881240844727, "learning_rate": 9.244712163734685e-05, "loss": 0.7779, "step": 6268 }, { "epoch": 0.4247577749169998, "grad_norm": 6.886295318603516, "learning_rate": 9.244575261824903e-05, "loss": 0.9643, "step": 6269 }, { "epoch": 0.4248255301849719, "grad_norm": 6.241147518157959, "learning_rate": 9.244438359915121e-05, "loss": 0.7539, "step": 6270 }, { "epoch": 0.42489328545294397, "grad_norm": 7.910638332366943, "learning_rate": 9.24430145800534e-05, "loss": 0.9967, "step": 6271 }, { "epoch": 0.42496104072091606, "grad_norm": 7.068695545196533, "learning_rate": 9.244164556095558e-05, "loss": 0.9531, "step": 6272 }, { "epoch": 0.42502879598888815, "grad_norm": 7.7406907081604, "learning_rate": 9.244027654185776e-05, "loss": 0.8593, "step": 6273 }, { "epoch": 0.42509655125686024, "grad_norm": 6.548360824584961, "learning_rate": 9.243890752275994e-05, "loss": 0.7806, "step": 6274 }, { "epoch": 0.4251643065248323, "grad_norm": 6.20359468460083, "learning_rate": 9.243753850366214e-05, "loss": 0.846, "step": 6275 }, { "epoch": 0.42523206179280437, "grad_norm": 6.6235270500183105, "learning_rate": 9.243616948456432e-05, "loss": 0.806, "step": 6276 }, { "epoch": 0.42529981706077646, "grad_norm": 7.216398239135742, "learning_rate": 9.24348004654665e-05, "loss": 1.0497, "step": 6277 }, { "epoch": 0.42536757232874856, "grad_norm": 6.946768283843994, "learning_rate": 9.243343144636868e-05, "loss": 0.7862, "step": 6278 }, { "epoch": 0.42543532759672065, "grad_norm": 6.441595554351807, "learning_rate": 9.243206242727086e-05, "loss": 0.8663, "step": 6279 }, { "epoch": 0.42550308286469274, "grad_norm": 8.521356582641602, "learning_rate": 9.243069340817305e-05, "loss": 1.3689, "step": 6280 }, { "epoch": 0.42557083813266483, "grad_norm": 7.311733722686768, "learning_rate": 9.242932438907523e-05, "loss": 0.8788, "step": 6281 }, { "epoch": 0.4256385934006369, "grad_norm": 7.157277584075928, "learning_rate": 9.242795536997741e-05, "loss": 0.8245, "step": 6282 }, { "epoch": 0.42570634866860896, "grad_norm": 5.332032680511475, "learning_rate": 9.242658635087959e-05, "loss": 0.7333, "step": 6283 }, { "epoch": 0.42577410393658105, "grad_norm": 6.090252876281738, "learning_rate": 9.242521733178179e-05, "loss": 0.7084, "step": 6284 }, { "epoch": 0.42584185920455314, "grad_norm": 6.971512317657471, "learning_rate": 9.242384831268397e-05, "loss": 0.7226, "step": 6285 }, { "epoch": 0.42590961447252523, "grad_norm": 5.37277364730835, "learning_rate": 9.242247929358615e-05, "loss": 0.701, "step": 6286 }, { "epoch": 0.4259773697404973, "grad_norm": 6.7667622566223145, "learning_rate": 9.242111027448833e-05, "loss": 0.9389, "step": 6287 }, { "epoch": 0.4260451250084694, "grad_norm": 6.8329596519470215, "learning_rate": 9.241974125539051e-05, "loss": 0.8574, "step": 6288 }, { "epoch": 0.4261128802764415, "grad_norm": 6.124345779418945, "learning_rate": 9.24183722362927e-05, "loss": 0.9497, "step": 6289 }, { "epoch": 0.4261806355444136, "grad_norm": 6.190674304962158, "learning_rate": 9.241700321719488e-05, "loss": 0.8933, "step": 6290 }, { "epoch": 0.42624839081238564, "grad_norm": 6.881906986236572, "learning_rate": 9.241563419809706e-05, "loss": 1.0343, "step": 6291 }, { "epoch": 0.4263161460803577, "grad_norm": 6.8810648918151855, "learning_rate": 9.241426517899924e-05, "loss": 1.1141, "step": 6292 }, { "epoch": 0.4263839013483298, "grad_norm": 7.524305820465088, "learning_rate": 9.241289615990144e-05, "loss": 0.9136, "step": 6293 }, { "epoch": 0.4264516566163019, "grad_norm": 5.732180595397949, "learning_rate": 9.241152714080362e-05, "loss": 0.8808, "step": 6294 }, { "epoch": 0.426519411884274, "grad_norm": 5.485534191131592, "learning_rate": 9.24101581217058e-05, "loss": 0.8755, "step": 6295 }, { "epoch": 0.4265871671522461, "grad_norm": 7.75483512878418, "learning_rate": 9.240878910260799e-05, "loss": 0.6602, "step": 6296 }, { "epoch": 0.4266549224202182, "grad_norm": 5.6334075927734375, "learning_rate": 9.240742008351017e-05, "loss": 0.6075, "step": 6297 }, { "epoch": 0.4267226776881903, "grad_norm": 6.316882133483887, "learning_rate": 9.240605106441235e-05, "loss": 0.8607, "step": 6298 }, { "epoch": 0.42679043295616237, "grad_norm": 7.0021257400512695, "learning_rate": 9.240468204531455e-05, "loss": 0.8648, "step": 6299 }, { "epoch": 0.4268581882241344, "grad_norm": 5.8421125411987305, "learning_rate": 9.240331302621673e-05, "loss": 0.7887, "step": 6300 }, { "epoch": 0.4269259434921065, "grad_norm": 8.877615928649902, "learning_rate": 9.24019440071189e-05, "loss": 1.3777, "step": 6301 }, { "epoch": 0.4269936987600786, "grad_norm": 7.440095901489258, "learning_rate": 9.240057498802109e-05, "loss": 1.0015, "step": 6302 }, { "epoch": 0.4270614540280507, "grad_norm": 6.460360527038574, "learning_rate": 9.239920596892328e-05, "loss": 0.6861, "step": 6303 }, { "epoch": 0.42712920929602277, "grad_norm": 9.731608390808105, "learning_rate": 9.239783694982546e-05, "loss": 0.8601, "step": 6304 }, { "epoch": 0.42719696456399486, "grad_norm": 7.891725540161133, "learning_rate": 9.239646793072764e-05, "loss": 0.7478, "step": 6305 }, { "epoch": 0.42726471983196695, "grad_norm": 6.742074966430664, "learning_rate": 9.239509891162982e-05, "loss": 0.7774, "step": 6306 }, { "epoch": 0.42733247509993905, "grad_norm": 7.515460014343262, "learning_rate": 9.239372989253202e-05, "loss": 0.9524, "step": 6307 }, { "epoch": 0.4274002303679111, "grad_norm": 6.278934478759766, "learning_rate": 9.23923608734342e-05, "loss": 0.9216, "step": 6308 }, { "epoch": 0.4274679856358832, "grad_norm": 6.528146266937256, "learning_rate": 9.239099185433638e-05, "loss": 0.9959, "step": 6309 }, { "epoch": 0.42753574090385527, "grad_norm": 5.953476905822754, "learning_rate": 9.238962283523856e-05, "loss": 1.0062, "step": 6310 }, { "epoch": 0.42760349617182736, "grad_norm": 7.488530158996582, "learning_rate": 9.238825381614074e-05, "loss": 0.8633, "step": 6311 }, { "epoch": 0.42767125143979945, "grad_norm": 6.974276542663574, "learning_rate": 9.238688479704293e-05, "loss": 1.0849, "step": 6312 }, { "epoch": 0.42773900670777154, "grad_norm": 7.560183048248291, "learning_rate": 9.238551577794511e-05, "loss": 0.9238, "step": 6313 }, { "epoch": 0.42780676197574363, "grad_norm": 6.859951019287109, "learning_rate": 9.238414675884729e-05, "loss": 0.7021, "step": 6314 }, { "epoch": 0.4278745172437157, "grad_norm": 8.095294952392578, "learning_rate": 9.238277773974947e-05, "loss": 0.9374, "step": 6315 }, { "epoch": 0.42794227251168776, "grad_norm": 6.5458221435546875, "learning_rate": 9.238140872065165e-05, "loss": 0.7089, "step": 6316 }, { "epoch": 0.42801002777965985, "grad_norm": 6.9012274742126465, "learning_rate": 9.238003970155385e-05, "loss": 1.0967, "step": 6317 }, { "epoch": 0.42807778304763194, "grad_norm": 7.991293430328369, "learning_rate": 9.237867068245603e-05, "loss": 0.8149, "step": 6318 }, { "epoch": 0.42814553831560404, "grad_norm": 6.636972904205322, "learning_rate": 9.23773016633582e-05, "loss": 0.9311, "step": 6319 }, { "epoch": 0.4282132935835761, "grad_norm": 7.938980579376221, "learning_rate": 9.237593264426039e-05, "loss": 1.021, "step": 6320 }, { "epoch": 0.4282810488515482, "grad_norm": 6.600100994110107, "learning_rate": 9.237456362516258e-05, "loss": 0.9375, "step": 6321 }, { "epoch": 0.4283488041195203, "grad_norm": 8.848981857299805, "learning_rate": 9.237319460606476e-05, "loss": 0.7755, "step": 6322 }, { "epoch": 0.4284165593874924, "grad_norm": 8.488492012023926, "learning_rate": 9.237182558696694e-05, "loss": 0.8267, "step": 6323 }, { "epoch": 0.42848431465546444, "grad_norm": 5.880466938018799, "learning_rate": 9.237045656786912e-05, "loss": 0.7597, "step": 6324 }, { "epoch": 0.42855206992343653, "grad_norm": 6.318797588348389, "learning_rate": 9.23690875487713e-05, "loss": 1.0001, "step": 6325 }, { "epoch": 0.4286198251914086, "grad_norm": 6.730221271514893, "learning_rate": 9.23677185296735e-05, "loss": 0.9811, "step": 6326 }, { "epoch": 0.4286875804593807, "grad_norm": 6.913969993591309, "learning_rate": 9.236634951057568e-05, "loss": 0.8782, "step": 6327 }, { "epoch": 0.4287553357273528, "grad_norm": 7.533535480499268, "learning_rate": 9.236498049147786e-05, "loss": 0.652, "step": 6328 }, { "epoch": 0.4288230909953249, "grad_norm": 7.687053680419922, "learning_rate": 9.236361147238004e-05, "loss": 0.9665, "step": 6329 }, { "epoch": 0.428890846263297, "grad_norm": 7.747707843780518, "learning_rate": 9.236224245328223e-05, "loss": 1.1481, "step": 6330 }, { "epoch": 0.4289586015312691, "grad_norm": 7.709572792053223, "learning_rate": 9.236087343418441e-05, "loss": 0.9191, "step": 6331 }, { "epoch": 0.4290263567992411, "grad_norm": 8.473289489746094, "learning_rate": 9.235950441508659e-05, "loss": 1.0646, "step": 6332 }, { "epoch": 0.4290941120672132, "grad_norm": 6.672016620635986, "learning_rate": 9.235813539598877e-05, "loss": 0.7865, "step": 6333 }, { "epoch": 0.4291618673351853, "grad_norm": 6.022176265716553, "learning_rate": 9.235676637689095e-05, "loss": 0.8593, "step": 6334 }, { "epoch": 0.4292296226031574, "grad_norm": 6.375059604644775, "learning_rate": 9.235539735779315e-05, "loss": 0.8754, "step": 6335 }, { "epoch": 0.4292973778711295, "grad_norm": 6.617783546447754, "learning_rate": 9.235402833869533e-05, "loss": 1.022, "step": 6336 }, { "epoch": 0.4293651331391016, "grad_norm": 5.790512561798096, "learning_rate": 9.235265931959751e-05, "loss": 0.8903, "step": 6337 }, { "epoch": 0.42943288840707367, "grad_norm": 6.528816223144531, "learning_rate": 9.235129030049969e-05, "loss": 0.8999, "step": 6338 }, { "epoch": 0.42950064367504576, "grad_norm": 7.407886505126953, "learning_rate": 9.234992128140188e-05, "loss": 0.6748, "step": 6339 }, { "epoch": 0.4295683989430178, "grad_norm": 6.933940887451172, "learning_rate": 9.234855226230406e-05, "loss": 1.0334, "step": 6340 }, { "epoch": 0.4296361542109899, "grad_norm": 6.4698686599731445, "learning_rate": 9.234718324320624e-05, "loss": 0.8778, "step": 6341 }, { "epoch": 0.429703909478962, "grad_norm": 7.047656059265137, "learning_rate": 9.234581422410844e-05, "loss": 0.9406, "step": 6342 }, { "epoch": 0.42977166474693407, "grad_norm": 6.107243061065674, "learning_rate": 9.234444520501062e-05, "loss": 0.716, "step": 6343 }, { "epoch": 0.42983942001490616, "grad_norm": 7.644023895263672, "learning_rate": 9.23430761859128e-05, "loss": 0.9632, "step": 6344 }, { "epoch": 0.42990717528287825, "grad_norm": 8.450486183166504, "learning_rate": 9.234170716681499e-05, "loss": 0.9608, "step": 6345 }, { "epoch": 0.42997493055085034, "grad_norm": 6.914335250854492, "learning_rate": 9.234033814771717e-05, "loss": 0.9473, "step": 6346 }, { "epoch": 0.43004268581882243, "grad_norm": 7.677610874176025, "learning_rate": 9.233896912861935e-05, "loss": 0.7403, "step": 6347 }, { "epoch": 0.4301104410867945, "grad_norm": 8.03879165649414, "learning_rate": 9.233760010952153e-05, "loss": 1.1334, "step": 6348 }, { "epoch": 0.43017819635476656, "grad_norm": 8.243664741516113, "learning_rate": 9.233623109042373e-05, "loss": 0.8745, "step": 6349 }, { "epoch": 0.43024595162273865, "grad_norm": 5.417998313903809, "learning_rate": 9.23348620713259e-05, "loss": 0.8243, "step": 6350 }, { "epoch": 0.43031370689071075, "grad_norm": 9.148211479187012, "learning_rate": 9.233349305222809e-05, "loss": 1.0485, "step": 6351 }, { "epoch": 0.43038146215868284, "grad_norm": 5.762056827545166, "learning_rate": 9.233212403313027e-05, "loss": 0.8528, "step": 6352 }, { "epoch": 0.43044921742665493, "grad_norm": 7.960264682769775, "learning_rate": 9.233075501403246e-05, "loss": 1.1308, "step": 6353 }, { "epoch": 0.430516972694627, "grad_norm": 5.9596710205078125, "learning_rate": 9.232938599493464e-05, "loss": 0.8385, "step": 6354 }, { "epoch": 0.4305847279625991, "grad_norm": 7.483585357666016, "learning_rate": 9.232801697583682e-05, "loss": 0.8117, "step": 6355 }, { "epoch": 0.4306524832305712, "grad_norm": 11.998135566711426, "learning_rate": 9.2326647956739e-05, "loss": 0.8077, "step": 6356 }, { "epoch": 0.43072023849854324, "grad_norm": 5.418194770812988, "learning_rate": 9.232527893764118e-05, "loss": 0.6575, "step": 6357 }, { "epoch": 0.43078799376651533, "grad_norm": 7.568809509277344, "learning_rate": 9.232390991854338e-05, "loss": 0.9036, "step": 6358 }, { "epoch": 0.4308557490344874, "grad_norm": 6.613016128540039, "learning_rate": 9.232254089944556e-05, "loss": 0.9918, "step": 6359 }, { "epoch": 0.4309235043024595, "grad_norm": 7.7656683921813965, "learning_rate": 9.232117188034774e-05, "loss": 1.1026, "step": 6360 }, { "epoch": 0.4309912595704316, "grad_norm": 6.714265823364258, "learning_rate": 9.231980286124992e-05, "loss": 0.8636, "step": 6361 }, { "epoch": 0.4310590148384037, "grad_norm": 6.468122482299805, "learning_rate": 9.231843384215211e-05, "loss": 0.7608, "step": 6362 }, { "epoch": 0.4311267701063758, "grad_norm": 7.882842540740967, "learning_rate": 9.231706482305429e-05, "loss": 0.8926, "step": 6363 }, { "epoch": 0.4311945253743479, "grad_norm": 6.002429485321045, "learning_rate": 9.231569580395647e-05, "loss": 0.8137, "step": 6364 }, { "epoch": 0.4312622806423199, "grad_norm": 6.818765163421631, "learning_rate": 9.231432678485865e-05, "loss": 0.8249, "step": 6365 }, { "epoch": 0.431330035910292, "grad_norm": 8.535319328308105, "learning_rate": 9.231295776576083e-05, "loss": 0.888, "step": 6366 }, { "epoch": 0.4313977911782641, "grad_norm": 7.823378086090088, "learning_rate": 9.231158874666303e-05, "loss": 0.8847, "step": 6367 }, { "epoch": 0.4314655464462362, "grad_norm": 7.392731189727783, "learning_rate": 9.23102197275652e-05, "loss": 0.7595, "step": 6368 }, { "epoch": 0.4315333017142083, "grad_norm": 5.637443542480469, "learning_rate": 9.230885070846739e-05, "loss": 0.6636, "step": 6369 }, { "epoch": 0.4316010569821804, "grad_norm": 8.26915454864502, "learning_rate": 9.230748168936957e-05, "loss": 0.7704, "step": 6370 }, { "epoch": 0.43166881225015247, "grad_norm": 8.045785903930664, "learning_rate": 9.230611267027175e-05, "loss": 0.8427, "step": 6371 }, { "epoch": 0.43173656751812456, "grad_norm": 8.981038093566895, "learning_rate": 9.230474365117394e-05, "loss": 0.9997, "step": 6372 }, { "epoch": 0.4318043227860966, "grad_norm": 7.137066841125488, "learning_rate": 9.230337463207612e-05, "loss": 0.7654, "step": 6373 }, { "epoch": 0.4318720780540687, "grad_norm": 6.7576165199279785, "learning_rate": 9.23020056129783e-05, "loss": 0.8766, "step": 6374 }, { "epoch": 0.4319398333220408, "grad_norm": 6.381602764129639, "learning_rate": 9.230063659388048e-05, "loss": 0.9203, "step": 6375 }, { "epoch": 0.43200758859001287, "grad_norm": 6.969717502593994, "learning_rate": 9.229926757478268e-05, "loss": 0.9507, "step": 6376 }, { "epoch": 0.43207534385798496, "grad_norm": 5.76108455657959, "learning_rate": 9.229789855568486e-05, "loss": 0.8533, "step": 6377 }, { "epoch": 0.43214309912595705, "grad_norm": 6.446774005889893, "learning_rate": 9.229652953658704e-05, "loss": 0.9094, "step": 6378 }, { "epoch": 0.43221085439392914, "grad_norm": 9.543696403503418, "learning_rate": 9.229516051748922e-05, "loss": 0.8955, "step": 6379 }, { "epoch": 0.43227860966190124, "grad_norm": 8.095965385437012, "learning_rate": 9.22937914983914e-05, "loss": 0.633, "step": 6380 }, { "epoch": 0.4323463649298733, "grad_norm": 9.561396598815918, "learning_rate": 9.229242247929359e-05, "loss": 0.9367, "step": 6381 }, { "epoch": 0.43241412019784536, "grad_norm": 7.348874092102051, "learning_rate": 9.229105346019577e-05, "loss": 0.7585, "step": 6382 }, { "epoch": 0.43248187546581746, "grad_norm": 5.866179466247559, "learning_rate": 9.228968444109795e-05, "loss": 0.8046, "step": 6383 }, { "epoch": 0.43254963073378955, "grad_norm": 7.945270538330078, "learning_rate": 9.228831542200013e-05, "loss": 0.8599, "step": 6384 }, { "epoch": 0.43261738600176164, "grad_norm": 6.365564346313477, "learning_rate": 9.228694640290233e-05, "loss": 0.8213, "step": 6385 }, { "epoch": 0.43268514126973373, "grad_norm": 7.802513599395752, "learning_rate": 9.22855773838045e-05, "loss": 0.7779, "step": 6386 }, { "epoch": 0.4327528965377058, "grad_norm": 7.740975379943848, "learning_rate": 9.228420836470669e-05, "loss": 0.9564, "step": 6387 }, { "epoch": 0.4328206518056779, "grad_norm": 8.962937355041504, "learning_rate": 9.228283934560887e-05, "loss": 0.7704, "step": 6388 }, { "epoch": 0.43288840707364995, "grad_norm": 9.266582489013672, "learning_rate": 9.228147032651106e-05, "loss": 1.055, "step": 6389 }, { "epoch": 0.43295616234162204, "grad_norm": 8.01395320892334, "learning_rate": 9.228010130741324e-05, "loss": 1.0197, "step": 6390 }, { "epoch": 0.43302391760959413, "grad_norm": 6.83071231842041, "learning_rate": 9.227873228831543e-05, "loss": 0.8489, "step": 6391 }, { "epoch": 0.4330916728775662, "grad_norm": 7.923128604888916, "learning_rate": 9.227736326921762e-05, "loss": 0.8601, "step": 6392 }, { "epoch": 0.4331594281455383, "grad_norm": 6.395363807678223, "learning_rate": 9.22759942501198e-05, "loss": 0.8167, "step": 6393 }, { "epoch": 0.4332271834135104, "grad_norm": 5.6835408210754395, "learning_rate": 9.227462523102198e-05, "loss": 0.895, "step": 6394 }, { "epoch": 0.4332949386814825, "grad_norm": 7.308006286621094, "learning_rate": 9.227325621192417e-05, "loss": 0.985, "step": 6395 }, { "epoch": 0.4333626939494546, "grad_norm": 6.754067897796631, "learning_rate": 9.227188719282635e-05, "loss": 0.9144, "step": 6396 }, { "epoch": 0.4334304492174266, "grad_norm": 7.917202949523926, "learning_rate": 9.227051817372853e-05, "loss": 0.8683, "step": 6397 }, { "epoch": 0.4334982044853987, "grad_norm": 7.502418041229248, "learning_rate": 9.226914915463071e-05, "loss": 1.0849, "step": 6398 }, { "epoch": 0.4335659597533708, "grad_norm": 6.86057186126709, "learning_rate": 9.22677801355329e-05, "loss": 0.9286, "step": 6399 }, { "epoch": 0.4336337150213429, "grad_norm": 8.595507621765137, "learning_rate": 9.226641111643509e-05, "loss": 0.9766, "step": 6400 }, { "epoch": 0.433701470289315, "grad_norm": 6.231963157653809, "learning_rate": 9.226504209733727e-05, "loss": 0.8714, "step": 6401 }, { "epoch": 0.4337692255572871, "grad_norm": 7.1258625984191895, "learning_rate": 9.226367307823945e-05, "loss": 1.1432, "step": 6402 }, { "epoch": 0.4338369808252592, "grad_norm": 6.715908050537109, "learning_rate": 9.226230405914163e-05, "loss": 0.8271, "step": 6403 }, { "epoch": 0.43390473609323127, "grad_norm": 7.551729679107666, "learning_rate": 9.226093504004382e-05, "loss": 0.8919, "step": 6404 }, { "epoch": 0.43397249136120336, "grad_norm": 7.136280059814453, "learning_rate": 9.2259566020946e-05, "loss": 1.0586, "step": 6405 }, { "epoch": 0.4340402466291754, "grad_norm": 5.612763404846191, "learning_rate": 9.225819700184818e-05, "loss": 0.7163, "step": 6406 }, { "epoch": 0.4341080018971475, "grad_norm": 7.445765972137451, "learning_rate": 9.225682798275036e-05, "loss": 1.0488, "step": 6407 }, { "epoch": 0.4341757571651196, "grad_norm": 7.239571571350098, "learning_rate": 9.225545896365255e-05, "loss": 0.9174, "step": 6408 }, { "epoch": 0.43424351243309167, "grad_norm": 7.86649751663208, "learning_rate": 9.225408994455474e-05, "loss": 0.9312, "step": 6409 }, { "epoch": 0.43431126770106376, "grad_norm": 7.049384117126465, "learning_rate": 9.225272092545692e-05, "loss": 0.8739, "step": 6410 }, { "epoch": 0.43437902296903586, "grad_norm": 6.824424743652344, "learning_rate": 9.22513519063591e-05, "loss": 0.9335, "step": 6411 }, { "epoch": 0.43444677823700795, "grad_norm": 6.616923809051514, "learning_rate": 9.224998288726128e-05, "loss": 0.7854, "step": 6412 }, { "epoch": 0.43451453350498004, "grad_norm": 6.989858150482178, "learning_rate": 9.224861386816347e-05, "loss": 0.915, "step": 6413 }, { "epoch": 0.4345822887729521, "grad_norm": 7.5784406661987305, "learning_rate": 9.224724484906565e-05, "loss": 0.9316, "step": 6414 }, { "epoch": 0.43465004404092417, "grad_norm": 5.958365440368652, "learning_rate": 9.224587582996783e-05, "loss": 0.9103, "step": 6415 }, { "epoch": 0.43471779930889626, "grad_norm": 6.811526775360107, "learning_rate": 9.224450681087001e-05, "loss": 0.8778, "step": 6416 }, { "epoch": 0.43478555457686835, "grad_norm": 6.9535722732543945, "learning_rate": 9.22431377917722e-05, "loss": 0.9407, "step": 6417 }, { "epoch": 0.43485330984484044, "grad_norm": 7.412068843841553, "learning_rate": 9.224176877267439e-05, "loss": 0.7526, "step": 6418 }, { "epoch": 0.43492106511281253, "grad_norm": 6.990227699279785, "learning_rate": 9.224039975357657e-05, "loss": 0.8432, "step": 6419 }, { "epoch": 0.4349888203807846, "grad_norm": 7.477065563201904, "learning_rate": 9.223903073447875e-05, "loss": 0.9579, "step": 6420 }, { "epoch": 0.4350565756487567, "grad_norm": 6.9343581199646, "learning_rate": 9.223766171538093e-05, "loss": 0.7967, "step": 6421 }, { "epoch": 0.43512433091672875, "grad_norm": 7.360040664672852, "learning_rate": 9.223629269628312e-05, "loss": 0.807, "step": 6422 }, { "epoch": 0.43519208618470084, "grad_norm": 6.708899974822998, "learning_rate": 9.22349236771853e-05, "loss": 0.723, "step": 6423 }, { "epoch": 0.43525984145267294, "grad_norm": 7.195553302764893, "learning_rate": 9.223355465808748e-05, "loss": 1.0543, "step": 6424 }, { "epoch": 0.435327596720645, "grad_norm": 6.833982467651367, "learning_rate": 9.223218563898966e-05, "loss": 0.7874, "step": 6425 }, { "epoch": 0.4353953519886171, "grad_norm": 7.278003692626953, "learning_rate": 9.223081661989184e-05, "loss": 1.0377, "step": 6426 }, { "epoch": 0.4354631072565892, "grad_norm": 6.5219407081604, "learning_rate": 9.222944760079404e-05, "loss": 0.8975, "step": 6427 }, { "epoch": 0.4355308625245613, "grad_norm": 15.948746681213379, "learning_rate": 9.222807858169622e-05, "loss": 1.1554, "step": 6428 }, { "epoch": 0.4355986177925334, "grad_norm": 8.12380599975586, "learning_rate": 9.22267095625984e-05, "loss": 0.8078, "step": 6429 }, { "epoch": 0.43566637306050543, "grad_norm": 6.6606340408325195, "learning_rate": 9.222534054350058e-05, "loss": 1.065, "step": 6430 }, { "epoch": 0.4357341283284775, "grad_norm": 8.079832077026367, "learning_rate": 9.222397152440277e-05, "loss": 0.9253, "step": 6431 }, { "epoch": 0.4358018835964496, "grad_norm": 6.005470275878906, "learning_rate": 9.222260250530495e-05, "loss": 0.8853, "step": 6432 }, { "epoch": 0.4358696388644217, "grad_norm": 8.552530288696289, "learning_rate": 9.222123348620713e-05, "loss": 0.9477, "step": 6433 }, { "epoch": 0.4359373941323938, "grad_norm": 6.014112949371338, "learning_rate": 9.221986446710931e-05, "loss": 0.9249, "step": 6434 }, { "epoch": 0.4360051494003659, "grad_norm": 8.349777221679688, "learning_rate": 9.22184954480115e-05, "loss": 0.9278, "step": 6435 }, { "epoch": 0.436072904668338, "grad_norm": 7.474494934082031, "learning_rate": 9.221712642891369e-05, "loss": 1.1279, "step": 6436 }, { "epoch": 0.43614065993631007, "grad_norm": 7.27971076965332, "learning_rate": 9.221575740981587e-05, "loss": 0.8476, "step": 6437 }, { "epoch": 0.4362084152042821, "grad_norm": 7.819595813751221, "learning_rate": 9.221438839071806e-05, "loss": 0.828, "step": 6438 }, { "epoch": 0.4362761704722542, "grad_norm": 8.474798202514648, "learning_rate": 9.221301937162024e-05, "loss": 1.0775, "step": 6439 }, { "epoch": 0.4363439257402263, "grad_norm": 7.175133228302002, "learning_rate": 9.221165035252242e-05, "loss": 0.8889, "step": 6440 }, { "epoch": 0.4364116810081984, "grad_norm": 5.881869316101074, "learning_rate": 9.221028133342461e-05, "loss": 0.7435, "step": 6441 }, { "epoch": 0.4364794362761705, "grad_norm": 8.366180419921875, "learning_rate": 9.22089123143268e-05, "loss": 1.1461, "step": 6442 }, { "epoch": 0.43654719154414257, "grad_norm": 7.311695098876953, "learning_rate": 9.220754329522898e-05, "loss": 0.7601, "step": 6443 }, { "epoch": 0.43661494681211466, "grad_norm": 6.886138439178467, "learning_rate": 9.220617427613116e-05, "loss": 0.9432, "step": 6444 }, { "epoch": 0.43668270208008675, "grad_norm": 10.014945983886719, "learning_rate": 9.220480525703335e-05, "loss": 0.8914, "step": 6445 }, { "epoch": 0.4367504573480588, "grad_norm": 6.025598526000977, "learning_rate": 9.220343623793553e-05, "loss": 0.8601, "step": 6446 }, { "epoch": 0.4368182126160309, "grad_norm": 6.072788715362549, "learning_rate": 9.220206721883771e-05, "loss": 0.7345, "step": 6447 }, { "epoch": 0.43688596788400297, "grad_norm": 8.272831916809082, "learning_rate": 9.220069819973989e-05, "loss": 0.7427, "step": 6448 }, { "epoch": 0.43695372315197506, "grad_norm": 6.721330642700195, "learning_rate": 9.219932918064207e-05, "loss": 0.8294, "step": 6449 }, { "epoch": 0.43702147841994715, "grad_norm": 7.108659267425537, "learning_rate": 9.219796016154426e-05, "loss": 0.7635, "step": 6450 }, { "epoch": 0.43708923368791924, "grad_norm": 11.399370193481445, "learning_rate": 9.219659114244645e-05, "loss": 1.0645, "step": 6451 }, { "epoch": 0.43715698895589133, "grad_norm": 7.976221561431885, "learning_rate": 9.219522212334863e-05, "loss": 0.9437, "step": 6452 }, { "epoch": 0.4372247442238634, "grad_norm": 8.320938110351562, "learning_rate": 9.21938531042508e-05, "loss": 0.8029, "step": 6453 }, { "epoch": 0.4372924994918355, "grad_norm": 6.146340370178223, "learning_rate": 9.2192484085153e-05, "loss": 0.7872, "step": 6454 }, { "epoch": 0.43736025475980755, "grad_norm": 6.408365726470947, "learning_rate": 9.219111506605518e-05, "loss": 0.5922, "step": 6455 }, { "epoch": 0.43742801002777965, "grad_norm": 7.011279106140137, "learning_rate": 9.218974604695736e-05, "loss": 0.9152, "step": 6456 }, { "epoch": 0.43749576529575174, "grad_norm": 6.819314002990723, "learning_rate": 9.218837702785954e-05, "loss": 1.002, "step": 6457 }, { "epoch": 0.43756352056372383, "grad_norm": 10.63784122467041, "learning_rate": 9.218700800876172e-05, "loss": 1.0873, "step": 6458 }, { "epoch": 0.4376312758316959, "grad_norm": 6.964840888977051, "learning_rate": 9.218563898966391e-05, "loss": 0.6972, "step": 6459 }, { "epoch": 0.437699031099668, "grad_norm": 8.421072959899902, "learning_rate": 9.21842699705661e-05, "loss": 1.0543, "step": 6460 }, { "epoch": 0.4377667863676401, "grad_norm": 5.883727073669434, "learning_rate": 9.218290095146828e-05, "loss": 0.8138, "step": 6461 }, { "epoch": 0.4378345416356122, "grad_norm": 8.054207801818848, "learning_rate": 9.218153193237046e-05, "loss": 0.8885, "step": 6462 }, { "epoch": 0.43790229690358423, "grad_norm": 7.546504974365234, "learning_rate": 9.218016291327265e-05, "loss": 1.2157, "step": 6463 }, { "epoch": 0.4379700521715563, "grad_norm": 10.255965232849121, "learning_rate": 9.217879389417483e-05, "loss": 1.0082, "step": 6464 }, { "epoch": 0.4380378074395284, "grad_norm": 6.057038307189941, "learning_rate": 9.217742487507701e-05, "loss": 0.8386, "step": 6465 }, { "epoch": 0.4381055627075005, "grad_norm": 8.343341827392578, "learning_rate": 9.217605585597919e-05, "loss": 0.7051, "step": 6466 }, { "epoch": 0.4381733179754726, "grad_norm": 8.163117408752441, "learning_rate": 9.217468683688137e-05, "loss": 1.1442, "step": 6467 }, { "epoch": 0.4382410732434447, "grad_norm": 6.132575035095215, "learning_rate": 9.217331781778357e-05, "loss": 0.7034, "step": 6468 }, { "epoch": 0.4383088285114168, "grad_norm": 6.2581658363342285, "learning_rate": 9.217194879868575e-05, "loss": 0.919, "step": 6469 }, { "epoch": 0.4383765837793889, "grad_norm": 6.228326797485352, "learning_rate": 9.217057977958793e-05, "loss": 0.7785, "step": 6470 }, { "epoch": 0.4384443390473609, "grad_norm": 5.782427787780762, "learning_rate": 9.21692107604901e-05, "loss": 0.9185, "step": 6471 }, { "epoch": 0.438512094315333, "grad_norm": 5.726394176483154, "learning_rate": 9.21678417413923e-05, "loss": 0.593, "step": 6472 }, { "epoch": 0.4385798495833051, "grad_norm": 7.62056303024292, "learning_rate": 9.216647272229448e-05, "loss": 0.8947, "step": 6473 }, { "epoch": 0.4386476048512772, "grad_norm": 7.467504024505615, "learning_rate": 9.216510370319666e-05, "loss": 0.8738, "step": 6474 }, { "epoch": 0.4387153601192493, "grad_norm": 6.0644989013671875, "learning_rate": 9.216373468409884e-05, "loss": 0.7715, "step": 6475 }, { "epoch": 0.43878311538722137, "grad_norm": 7.380848407745361, "learning_rate": 9.216236566500102e-05, "loss": 0.8055, "step": 6476 }, { "epoch": 0.43885087065519346, "grad_norm": 7.162757873535156, "learning_rate": 9.216099664590322e-05, "loss": 0.8826, "step": 6477 }, { "epoch": 0.43891862592316555, "grad_norm": 8.315613746643066, "learning_rate": 9.21596276268054e-05, "loss": 1.1239, "step": 6478 }, { "epoch": 0.4389863811911376, "grad_norm": 6.754839897155762, "learning_rate": 9.215825860770758e-05, "loss": 0.8331, "step": 6479 }, { "epoch": 0.4390541364591097, "grad_norm": 5.4843831062316895, "learning_rate": 9.215688958860976e-05, "loss": 0.8134, "step": 6480 }, { "epoch": 0.43912189172708177, "grad_norm": 7.7780656814575195, "learning_rate": 9.215552056951195e-05, "loss": 1.0216, "step": 6481 }, { "epoch": 0.43918964699505386, "grad_norm": 7.779257297515869, "learning_rate": 9.215415155041413e-05, "loss": 0.91, "step": 6482 }, { "epoch": 0.43925740226302595, "grad_norm": 7.926174640655518, "learning_rate": 9.215278253131631e-05, "loss": 0.9635, "step": 6483 }, { "epoch": 0.43932515753099805, "grad_norm": 5.595751762390137, "learning_rate": 9.21514135122185e-05, "loss": 0.9092, "step": 6484 }, { "epoch": 0.43939291279897014, "grad_norm": 8.04053020477295, "learning_rate": 9.215004449312069e-05, "loss": 0.6872, "step": 6485 }, { "epoch": 0.43946066806694223, "grad_norm": 12.462115287780762, "learning_rate": 9.214867547402287e-05, "loss": 1.2741, "step": 6486 }, { "epoch": 0.43952842333491426, "grad_norm": 6.239933490753174, "learning_rate": 9.214730645492506e-05, "loss": 0.7711, "step": 6487 }, { "epoch": 0.43959617860288636, "grad_norm": 7.584579944610596, "learning_rate": 9.214593743582724e-05, "loss": 0.8666, "step": 6488 }, { "epoch": 0.43966393387085845, "grad_norm": 7.129024505615234, "learning_rate": 9.214456841672942e-05, "loss": 0.8664, "step": 6489 }, { "epoch": 0.43973168913883054, "grad_norm": 7.35465145111084, "learning_rate": 9.21431993976316e-05, "loss": 1.2072, "step": 6490 }, { "epoch": 0.43979944440680263, "grad_norm": 7.911463737487793, "learning_rate": 9.21418303785338e-05, "loss": 0.8019, "step": 6491 }, { "epoch": 0.4398671996747747, "grad_norm": 7.9478349685668945, "learning_rate": 9.214046135943597e-05, "loss": 0.8945, "step": 6492 }, { "epoch": 0.4399349549427468, "grad_norm": 7.73642635345459, "learning_rate": 9.213909234033815e-05, "loss": 1.2203, "step": 6493 }, { "epoch": 0.4400027102107189, "grad_norm": 8.306556701660156, "learning_rate": 9.213772332124034e-05, "loss": 0.8548, "step": 6494 }, { "epoch": 0.44007046547869094, "grad_norm": 7.496469497680664, "learning_rate": 9.213635430214253e-05, "loss": 1.1485, "step": 6495 }, { "epoch": 0.44013822074666303, "grad_norm": 6.734534740447998, "learning_rate": 9.213498528304471e-05, "loss": 0.8758, "step": 6496 }, { "epoch": 0.4402059760146351, "grad_norm": 6.782371997833252, "learning_rate": 9.213361626394689e-05, "loss": 0.5879, "step": 6497 }, { "epoch": 0.4402737312826072, "grad_norm": 6.044846057891846, "learning_rate": 9.213224724484907e-05, "loss": 1.0055, "step": 6498 }, { "epoch": 0.4403414865505793, "grad_norm": 7.979533672332764, "learning_rate": 9.213087822575125e-05, "loss": 0.8265, "step": 6499 }, { "epoch": 0.4404092418185514, "grad_norm": 8.245573997497559, "learning_rate": 9.212950920665344e-05, "loss": 0.5549, "step": 6500 }, { "epoch": 0.4404769970865235, "grad_norm": 6.346557140350342, "learning_rate": 9.212814018755562e-05, "loss": 0.857, "step": 6501 }, { "epoch": 0.4405447523544956, "grad_norm": 6.079600811004639, "learning_rate": 9.21267711684578e-05, "loss": 0.9127, "step": 6502 }, { "epoch": 0.4406125076224676, "grad_norm": 9.402070045471191, "learning_rate": 9.212540214935999e-05, "loss": 0.8104, "step": 6503 }, { "epoch": 0.4406802628904397, "grad_norm": 8.062244415283203, "learning_rate": 9.212403313026217e-05, "loss": 1.195, "step": 6504 }, { "epoch": 0.4407480181584118, "grad_norm": 8.54050064086914, "learning_rate": 9.212266411116436e-05, "loss": 0.854, "step": 6505 }, { "epoch": 0.4408157734263839, "grad_norm": 6.040203094482422, "learning_rate": 9.212129509206654e-05, "loss": 0.7028, "step": 6506 }, { "epoch": 0.440883528694356, "grad_norm": 7.689701080322266, "learning_rate": 9.211992607296872e-05, "loss": 0.9999, "step": 6507 }, { "epoch": 0.4409512839623281, "grad_norm": 7.860703468322754, "learning_rate": 9.21185570538709e-05, "loss": 1.0413, "step": 6508 }, { "epoch": 0.44101903923030017, "grad_norm": 7.390681266784668, "learning_rate": 9.21171880347731e-05, "loss": 1.0337, "step": 6509 }, { "epoch": 0.44108679449827226, "grad_norm": 6.800081253051758, "learning_rate": 9.211581901567527e-05, "loss": 0.9613, "step": 6510 }, { "epoch": 0.44115454976624435, "grad_norm": 5.59462833404541, "learning_rate": 9.211444999657746e-05, "loss": 0.7001, "step": 6511 }, { "epoch": 0.4412223050342164, "grad_norm": 8.300586700439453, "learning_rate": 9.211308097747964e-05, "loss": 1.0087, "step": 6512 }, { "epoch": 0.4412900603021885, "grad_norm": 6.590997695922852, "learning_rate": 9.211171195838182e-05, "loss": 0.8233, "step": 6513 }, { "epoch": 0.44135781557016057, "grad_norm": 8.995779991149902, "learning_rate": 9.211034293928401e-05, "loss": 0.9385, "step": 6514 }, { "epoch": 0.44142557083813266, "grad_norm": 6.689282417297363, "learning_rate": 9.210897392018619e-05, "loss": 0.9455, "step": 6515 }, { "epoch": 0.44149332610610476, "grad_norm": 6.494836330413818, "learning_rate": 9.210760490108837e-05, "loss": 0.9009, "step": 6516 }, { "epoch": 0.44156108137407685, "grad_norm": 7.502823352813721, "learning_rate": 9.210623588199055e-05, "loss": 0.7972, "step": 6517 }, { "epoch": 0.44162883664204894, "grad_norm": 6.561639785766602, "learning_rate": 9.210486686289274e-05, "loss": 0.7988, "step": 6518 }, { "epoch": 0.44169659191002103, "grad_norm": 6.1523847579956055, "learning_rate": 9.210349784379493e-05, "loss": 0.9561, "step": 6519 }, { "epoch": 0.44176434717799307, "grad_norm": 6.792056560516357, "learning_rate": 9.21021288246971e-05, "loss": 0.7623, "step": 6520 }, { "epoch": 0.44183210244596516, "grad_norm": 6.477465629577637, "learning_rate": 9.210075980559929e-05, "loss": 0.6875, "step": 6521 }, { "epoch": 0.44189985771393725, "grad_norm": 7.468865394592285, "learning_rate": 9.209939078650147e-05, "loss": 0.8727, "step": 6522 }, { "epoch": 0.44196761298190934, "grad_norm": 5.351073265075684, "learning_rate": 9.209802176740366e-05, "loss": 0.8114, "step": 6523 }, { "epoch": 0.44203536824988143, "grad_norm": 7.701816558837891, "learning_rate": 9.209665274830584e-05, "loss": 1.1438, "step": 6524 }, { "epoch": 0.4421031235178535, "grad_norm": 6.072995662689209, "learning_rate": 9.209528372920802e-05, "loss": 0.9237, "step": 6525 }, { "epoch": 0.4421708787858256, "grad_norm": 7.1209716796875, "learning_rate": 9.20939147101102e-05, "loss": 0.8445, "step": 6526 }, { "epoch": 0.4422386340537977, "grad_norm": 7.518238067626953, "learning_rate": 9.20925456910124e-05, "loss": 1.0969, "step": 6527 }, { "epoch": 0.44230638932176974, "grad_norm": 5.822054862976074, "learning_rate": 9.209117667191458e-05, "loss": 0.843, "step": 6528 }, { "epoch": 0.44237414458974184, "grad_norm": 6.8769612312316895, "learning_rate": 9.208980765281676e-05, "loss": 0.9661, "step": 6529 }, { "epoch": 0.4424418998577139, "grad_norm": 6.266312122344971, "learning_rate": 9.208843863371895e-05, "loss": 0.7924, "step": 6530 }, { "epoch": 0.442509655125686, "grad_norm": 6.57719612121582, "learning_rate": 9.208706961462113e-05, "loss": 0.7929, "step": 6531 }, { "epoch": 0.4425774103936581, "grad_norm": 6.038028240203857, "learning_rate": 9.208570059552331e-05, "loss": 0.7683, "step": 6532 }, { "epoch": 0.4426451656616302, "grad_norm": 7.042256832122803, "learning_rate": 9.20843315764255e-05, "loss": 0.6646, "step": 6533 }, { "epoch": 0.4427129209296023, "grad_norm": 7.218042850494385, "learning_rate": 9.208296255732768e-05, "loss": 0.9532, "step": 6534 }, { "epoch": 0.4427806761975744, "grad_norm": 8.851286888122559, "learning_rate": 9.208159353822986e-05, "loss": 0.819, "step": 6535 }, { "epoch": 0.4428484314655464, "grad_norm": 5.37313985824585, "learning_rate": 9.208022451913205e-05, "loss": 0.6068, "step": 6536 }, { "epoch": 0.4429161867335185, "grad_norm": 6.643946647644043, "learning_rate": 9.207885550003424e-05, "loss": 1.1088, "step": 6537 }, { "epoch": 0.4429839420014906, "grad_norm": 7.155415058135986, "learning_rate": 9.207748648093642e-05, "loss": 0.9693, "step": 6538 }, { "epoch": 0.4430516972694627, "grad_norm": 7.293460369110107, "learning_rate": 9.20761174618386e-05, "loss": 1.0701, "step": 6539 }, { "epoch": 0.4431194525374348, "grad_norm": 6.664429664611816, "learning_rate": 9.207474844274078e-05, "loss": 0.6425, "step": 6540 }, { "epoch": 0.4431872078054069, "grad_norm": 5.979015350341797, "learning_rate": 9.207337942364297e-05, "loss": 0.6666, "step": 6541 }, { "epoch": 0.44325496307337897, "grad_norm": 5.959255695343018, "learning_rate": 9.207201040454515e-05, "loss": 0.6914, "step": 6542 }, { "epoch": 0.44332271834135106, "grad_norm": 7.694621562957764, "learning_rate": 9.207064138544733e-05, "loss": 0.8844, "step": 6543 }, { "epoch": 0.4433904736093231, "grad_norm": 7.128264904022217, "learning_rate": 9.206927236634951e-05, "loss": 0.6571, "step": 6544 }, { "epoch": 0.4434582288772952, "grad_norm": 7.676843166351318, "learning_rate": 9.20679033472517e-05, "loss": 0.8944, "step": 6545 }, { "epoch": 0.4435259841452673, "grad_norm": 8.858941078186035, "learning_rate": 9.206653432815389e-05, "loss": 0.8057, "step": 6546 }, { "epoch": 0.4435937394132394, "grad_norm": 7.0214033126831055, "learning_rate": 9.206516530905607e-05, "loss": 0.7815, "step": 6547 }, { "epoch": 0.44366149468121147, "grad_norm": 5.209787845611572, "learning_rate": 9.206379628995825e-05, "loss": 0.8764, "step": 6548 }, { "epoch": 0.44372924994918356, "grad_norm": 7.076624870300293, "learning_rate": 9.206242727086043e-05, "loss": 0.8881, "step": 6549 }, { "epoch": 0.44379700521715565, "grad_norm": 7.659173965454102, "learning_rate": 9.206105825176262e-05, "loss": 0.8945, "step": 6550 }, { "epoch": 0.44386476048512774, "grad_norm": 6.2118611335754395, "learning_rate": 9.20596892326648e-05, "loss": 0.8022, "step": 6551 }, { "epoch": 0.4439325157530998, "grad_norm": 6.649642467498779, "learning_rate": 9.205832021356698e-05, "loss": 0.9765, "step": 6552 }, { "epoch": 0.44400027102107187, "grad_norm": 6.0131683349609375, "learning_rate": 9.205695119446917e-05, "loss": 0.6928, "step": 6553 }, { "epoch": 0.44406802628904396, "grad_norm": 6.295266151428223, "learning_rate": 9.205558217537135e-05, "loss": 0.9785, "step": 6554 }, { "epoch": 0.44413578155701605, "grad_norm": 6.524127006530762, "learning_rate": 9.205421315627354e-05, "loss": 0.8065, "step": 6555 }, { "epoch": 0.44420353682498814, "grad_norm": 7.478303909301758, "learning_rate": 9.205284413717572e-05, "loss": 0.9533, "step": 6556 }, { "epoch": 0.44427129209296023, "grad_norm": 7.92426061630249, "learning_rate": 9.20514751180779e-05, "loss": 1.1578, "step": 6557 }, { "epoch": 0.4443390473609323, "grad_norm": 7.532670497894287, "learning_rate": 9.205010609898008e-05, "loss": 0.9318, "step": 6558 }, { "epoch": 0.4444068026289044, "grad_norm": 8.458464622497559, "learning_rate": 9.204873707988226e-05, "loss": 1.087, "step": 6559 }, { "epoch": 0.4444745578968765, "grad_norm": 5.044344425201416, "learning_rate": 9.204736806078445e-05, "loss": 0.7836, "step": 6560 }, { "epoch": 0.44454231316484855, "grad_norm": 6.682579517364502, "learning_rate": 9.204599904168663e-05, "loss": 0.8407, "step": 6561 }, { "epoch": 0.44461006843282064, "grad_norm": 6.948788166046143, "learning_rate": 9.204463002258882e-05, "loss": 0.72, "step": 6562 }, { "epoch": 0.44467782370079273, "grad_norm": 7.333683967590332, "learning_rate": 9.2043261003491e-05, "loss": 0.934, "step": 6563 }, { "epoch": 0.4447455789687648, "grad_norm": 7.648477077484131, "learning_rate": 9.204189198439319e-05, "loss": 0.8992, "step": 6564 }, { "epoch": 0.4448133342367369, "grad_norm": 6.203789710998535, "learning_rate": 9.204052296529537e-05, "loss": 1.011, "step": 6565 }, { "epoch": 0.444881089504709, "grad_norm": 7.739291667938232, "learning_rate": 9.203915394619755e-05, "loss": 0.8695, "step": 6566 }, { "epoch": 0.4449488447726811, "grad_norm": 6.642831325531006, "learning_rate": 9.203778492709973e-05, "loss": 0.666, "step": 6567 }, { "epoch": 0.4450166000406532, "grad_norm": 7.760629653930664, "learning_rate": 9.203641590800191e-05, "loss": 1.1022, "step": 6568 }, { "epoch": 0.4450843553086252, "grad_norm": 6.180607318878174, "learning_rate": 9.20350468889041e-05, "loss": 0.8599, "step": 6569 }, { "epoch": 0.4451521105765973, "grad_norm": 6.284255504608154, "learning_rate": 9.203367786980629e-05, "loss": 0.7226, "step": 6570 }, { "epoch": 0.4452198658445694, "grad_norm": 5.078493595123291, "learning_rate": 9.203230885070847e-05, "loss": 0.8089, "step": 6571 }, { "epoch": 0.4452876211125415, "grad_norm": 6.533141136169434, "learning_rate": 9.203093983161065e-05, "loss": 0.8426, "step": 6572 }, { "epoch": 0.4453553763805136, "grad_norm": 6.589559555053711, "learning_rate": 9.202957081251284e-05, "loss": 1.0924, "step": 6573 }, { "epoch": 0.4454231316484857, "grad_norm": 5.858087539672852, "learning_rate": 9.202820179341502e-05, "loss": 0.8007, "step": 6574 }, { "epoch": 0.4454908869164578, "grad_norm": 6.8898491859436035, "learning_rate": 9.20268327743172e-05, "loss": 0.7622, "step": 6575 }, { "epoch": 0.44555864218442987, "grad_norm": 5.419859409332275, "learning_rate": 9.20254637552194e-05, "loss": 0.711, "step": 6576 }, { "epoch": 0.4456263974524019, "grad_norm": 7.31076192855835, "learning_rate": 9.202409473612157e-05, "loss": 1.0181, "step": 6577 }, { "epoch": 0.445694152720374, "grad_norm": 8.525489807128906, "learning_rate": 9.202272571702375e-05, "loss": 1.0324, "step": 6578 }, { "epoch": 0.4457619079883461, "grad_norm": 6.309749603271484, "learning_rate": 9.202135669792595e-05, "loss": 1.1596, "step": 6579 }, { "epoch": 0.4458296632563182, "grad_norm": 5.667306423187256, "learning_rate": 9.201998767882813e-05, "loss": 0.8091, "step": 6580 }, { "epoch": 0.44589741852429027, "grad_norm": 6.930799961090088, "learning_rate": 9.201861865973031e-05, "loss": 0.915, "step": 6581 }, { "epoch": 0.44596517379226236, "grad_norm": 8.236886024475098, "learning_rate": 9.201724964063249e-05, "loss": 0.8973, "step": 6582 }, { "epoch": 0.44603292906023445, "grad_norm": 8.804180145263672, "learning_rate": 9.201588062153468e-05, "loss": 0.9985, "step": 6583 }, { "epoch": 0.44610068432820654, "grad_norm": 6.966750621795654, "learning_rate": 9.201451160243686e-05, "loss": 0.9065, "step": 6584 }, { "epoch": 0.4461684395961786, "grad_norm": 6.3808698654174805, "learning_rate": 9.201314258333904e-05, "loss": 0.8519, "step": 6585 }, { "epoch": 0.44623619486415067, "grad_norm": 6.380456447601318, "learning_rate": 9.201177356424122e-05, "loss": 0.6365, "step": 6586 }, { "epoch": 0.44630395013212276, "grad_norm": 6.893950939178467, "learning_rate": 9.201040454514342e-05, "loss": 0.8017, "step": 6587 }, { "epoch": 0.44637170540009485, "grad_norm": 6.246983051300049, "learning_rate": 9.20090355260456e-05, "loss": 0.8914, "step": 6588 }, { "epoch": 0.44643946066806695, "grad_norm": 5.012611389160156, "learning_rate": 9.200766650694778e-05, "loss": 0.7421, "step": 6589 }, { "epoch": 0.44650721593603904, "grad_norm": 6.981064319610596, "learning_rate": 9.200629748784996e-05, "loss": 0.9045, "step": 6590 }, { "epoch": 0.44657497120401113, "grad_norm": 6.434818744659424, "learning_rate": 9.200492846875214e-05, "loss": 1.0952, "step": 6591 }, { "epoch": 0.4466427264719832, "grad_norm": 7.2661027908325195, "learning_rate": 9.200355944965433e-05, "loss": 0.8943, "step": 6592 }, { "epoch": 0.44671048173995526, "grad_norm": 6.347513675689697, "learning_rate": 9.200219043055651e-05, "loss": 0.7958, "step": 6593 }, { "epoch": 0.44677823700792735, "grad_norm": 6.98016881942749, "learning_rate": 9.20008214114587e-05, "loss": 0.7197, "step": 6594 }, { "epoch": 0.44684599227589944, "grad_norm": 5.671807765960693, "learning_rate": 9.199945239236087e-05, "loss": 0.7577, "step": 6595 }, { "epoch": 0.44691374754387153, "grad_norm": 8.79588794708252, "learning_rate": 9.199808337326307e-05, "loss": 0.9273, "step": 6596 }, { "epoch": 0.4469815028118436, "grad_norm": 7.276648998260498, "learning_rate": 9.199671435416525e-05, "loss": 0.8705, "step": 6597 }, { "epoch": 0.4470492580798157, "grad_norm": 8.435094833374023, "learning_rate": 9.199534533506743e-05, "loss": 0.9946, "step": 6598 }, { "epoch": 0.4471170133477878, "grad_norm": 6.148091793060303, "learning_rate": 9.199397631596961e-05, "loss": 0.9541, "step": 6599 }, { "epoch": 0.4471847686157599, "grad_norm": 8.867514610290527, "learning_rate": 9.199260729687179e-05, "loss": 1.029, "step": 6600 }, { "epoch": 0.44725252388373193, "grad_norm": 7.480152606964111, "learning_rate": 9.199123827777398e-05, "loss": 0.8819, "step": 6601 }, { "epoch": 0.447320279151704, "grad_norm": 7.422079086303711, "learning_rate": 9.198986925867616e-05, "loss": 0.9998, "step": 6602 }, { "epoch": 0.4473880344196761, "grad_norm": 6.239773750305176, "learning_rate": 9.198850023957834e-05, "loss": 0.9129, "step": 6603 }, { "epoch": 0.4474557896876482, "grad_norm": 6.59074592590332, "learning_rate": 9.198713122048053e-05, "loss": 1.1373, "step": 6604 }, { "epoch": 0.4475235449556203, "grad_norm": 6.096022129058838, "learning_rate": 9.198576220138272e-05, "loss": 0.8916, "step": 6605 }, { "epoch": 0.4475913002235924, "grad_norm": 5.775374889373779, "learning_rate": 9.19843931822849e-05, "loss": 0.8679, "step": 6606 }, { "epoch": 0.4476590554915645, "grad_norm": 6.836724758148193, "learning_rate": 9.198302416318708e-05, "loss": 0.8205, "step": 6607 }, { "epoch": 0.4477268107595366, "grad_norm": 6.876745700836182, "learning_rate": 9.198165514408926e-05, "loss": 0.9564, "step": 6608 }, { "epoch": 0.4477945660275086, "grad_norm": 6.725889205932617, "learning_rate": 9.198028612499144e-05, "loss": 0.7783, "step": 6609 }, { "epoch": 0.4478623212954807, "grad_norm": 6.312922477722168, "learning_rate": 9.197891710589363e-05, "loss": 0.8358, "step": 6610 }, { "epoch": 0.4479300765634528, "grad_norm": 5.7454071044921875, "learning_rate": 9.197754808679581e-05, "loss": 0.7174, "step": 6611 }, { "epoch": 0.4479978318314249, "grad_norm": 9.245071411132812, "learning_rate": 9.1976179067698e-05, "loss": 0.8224, "step": 6612 }, { "epoch": 0.448065587099397, "grad_norm": 7.2646307945251465, "learning_rate": 9.197481004860018e-05, "loss": 0.7895, "step": 6613 }, { "epoch": 0.44813334236736907, "grad_norm": 6.799703598022461, "learning_rate": 9.197344102950236e-05, "loss": 0.9685, "step": 6614 }, { "epoch": 0.44820109763534116, "grad_norm": 6.408104419708252, "learning_rate": 9.197207201040455e-05, "loss": 0.9136, "step": 6615 }, { "epoch": 0.44826885290331325, "grad_norm": 7.4527363777160645, "learning_rate": 9.197070299130673e-05, "loss": 0.9364, "step": 6616 }, { "epoch": 0.44833660817128534, "grad_norm": 6.287598609924316, "learning_rate": 9.196933397220891e-05, "loss": 0.8987, "step": 6617 }, { "epoch": 0.4484043634392574, "grad_norm": 7.270476341247559, "learning_rate": 9.196796495311109e-05, "loss": 1.0113, "step": 6618 }, { "epoch": 0.4484721187072295, "grad_norm": 8.098075866699219, "learning_rate": 9.196659593401328e-05, "loss": 0.7957, "step": 6619 }, { "epoch": 0.44853987397520156, "grad_norm": 6.268966197967529, "learning_rate": 9.196522691491546e-05, "loss": 0.7363, "step": 6620 }, { "epoch": 0.44860762924317366, "grad_norm": 8.81617259979248, "learning_rate": 9.196385789581765e-05, "loss": 1.1112, "step": 6621 }, { "epoch": 0.44867538451114575, "grad_norm": 7.593179225921631, "learning_rate": 9.196248887671984e-05, "loss": 0.7988, "step": 6622 }, { "epoch": 0.44874313977911784, "grad_norm": 7.044666290283203, "learning_rate": 9.196111985762202e-05, "loss": 0.832, "step": 6623 }, { "epoch": 0.44881089504708993, "grad_norm": 5.546746730804443, "learning_rate": 9.19597508385242e-05, "loss": 0.7273, "step": 6624 }, { "epoch": 0.448878650315062, "grad_norm": 7.123478412628174, "learning_rate": 9.19583818194264e-05, "loss": 0.9006, "step": 6625 }, { "epoch": 0.44894640558303406, "grad_norm": 9.073535919189453, "learning_rate": 9.195701280032857e-05, "loss": 0.8702, "step": 6626 }, { "epoch": 0.44901416085100615, "grad_norm": 7.320106506347656, "learning_rate": 9.195564378123075e-05, "loss": 0.7704, "step": 6627 }, { "epoch": 0.44908191611897824, "grad_norm": 7.6163177490234375, "learning_rate": 9.195427476213295e-05, "loss": 1.1432, "step": 6628 }, { "epoch": 0.44914967138695033, "grad_norm": 6.271851062774658, "learning_rate": 9.195290574303513e-05, "loss": 0.9378, "step": 6629 }, { "epoch": 0.4492174266549224, "grad_norm": 6.142657279968262, "learning_rate": 9.195153672393731e-05, "loss": 0.9356, "step": 6630 }, { "epoch": 0.4492851819228945, "grad_norm": 6.669857025146484, "learning_rate": 9.195016770483949e-05, "loss": 0.5939, "step": 6631 }, { "epoch": 0.4493529371908666, "grad_norm": 7.524440288543701, "learning_rate": 9.194879868574167e-05, "loss": 0.7756, "step": 6632 }, { "epoch": 0.4494206924588387, "grad_norm": 7.47619104385376, "learning_rate": 9.194742966664386e-05, "loss": 0.7881, "step": 6633 }, { "epoch": 0.44948844772681074, "grad_norm": 5.466754913330078, "learning_rate": 9.194606064754604e-05, "loss": 0.7653, "step": 6634 }, { "epoch": 0.4495562029947828, "grad_norm": 6.149755954742432, "learning_rate": 9.194469162844822e-05, "loss": 0.8208, "step": 6635 }, { "epoch": 0.4496239582627549, "grad_norm": 6.091528415679932, "learning_rate": 9.19433226093504e-05, "loss": 1.0214, "step": 6636 }, { "epoch": 0.449691713530727, "grad_norm": 5.599562168121338, "learning_rate": 9.194195359025258e-05, "loss": 1.0719, "step": 6637 }, { "epoch": 0.4497594687986991, "grad_norm": 7.242834091186523, "learning_rate": 9.194058457115478e-05, "loss": 1.0185, "step": 6638 }, { "epoch": 0.4498272240666712, "grad_norm": 6.523613452911377, "learning_rate": 9.193921555205696e-05, "loss": 0.9699, "step": 6639 }, { "epoch": 0.4498949793346433, "grad_norm": 6.815830230712891, "learning_rate": 9.193784653295914e-05, "loss": 0.814, "step": 6640 }, { "epoch": 0.4499627346026154, "grad_norm": 5.4793524742126465, "learning_rate": 9.193647751386132e-05, "loss": 0.724, "step": 6641 }, { "epoch": 0.4500304898705874, "grad_norm": 7.303586959838867, "learning_rate": 9.193510849476351e-05, "loss": 0.9991, "step": 6642 }, { "epoch": 0.4500982451385595, "grad_norm": 7.781108856201172, "learning_rate": 9.19337394756657e-05, "loss": 0.925, "step": 6643 }, { "epoch": 0.4501660004065316, "grad_norm": 5.924014568328857, "learning_rate": 9.193237045656787e-05, "loss": 0.8656, "step": 6644 }, { "epoch": 0.4502337556745037, "grad_norm": 8.77978801727295, "learning_rate": 9.193100143747005e-05, "loss": 0.6839, "step": 6645 }, { "epoch": 0.4503015109424758, "grad_norm": 6.725009441375732, "learning_rate": 9.192963241837223e-05, "loss": 1.09, "step": 6646 }, { "epoch": 0.45036926621044787, "grad_norm": 5.697434902191162, "learning_rate": 9.192826339927443e-05, "loss": 0.6516, "step": 6647 }, { "epoch": 0.45043702147841996, "grad_norm": 7.783257007598877, "learning_rate": 9.192689438017661e-05, "loss": 1.1097, "step": 6648 }, { "epoch": 0.45050477674639205, "grad_norm": 7.0213541984558105, "learning_rate": 9.192552536107879e-05, "loss": 1.0617, "step": 6649 }, { "epoch": 0.4505725320143641, "grad_norm": 9.152633666992188, "learning_rate": 9.192415634198097e-05, "loss": 0.9626, "step": 6650 }, { "epoch": 0.4506402872823362, "grad_norm": 8.690450668334961, "learning_rate": 9.192278732288316e-05, "loss": 0.7741, "step": 6651 }, { "epoch": 0.4507080425503083, "grad_norm": 6.784976482391357, "learning_rate": 9.192141830378534e-05, "loss": 1.092, "step": 6652 }, { "epoch": 0.45077579781828037, "grad_norm": 6.294474124908447, "learning_rate": 9.192004928468752e-05, "loss": 0.9322, "step": 6653 }, { "epoch": 0.45084355308625246, "grad_norm": 5.054771423339844, "learning_rate": 9.19186802655897e-05, "loss": 0.7711, "step": 6654 }, { "epoch": 0.45091130835422455, "grad_norm": 7.640350341796875, "learning_rate": 9.191731124649189e-05, "loss": 0.8006, "step": 6655 }, { "epoch": 0.45097906362219664, "grad_norm": 6.9720001220703125, "learning_rate": 9.191594222739408e-05, "loss": 0.7912, "step": 6656 }, { "epoch": 0.45104681889016873, "grad_norm": 7.61221981048584, "learning_rate": 9.191457320829626e-05, "loss": 0.8612, "step": 6657 }, { "epoch": 0.45111457415814077, "grad_norm": 7.7177958488464355, "learning_rate": 9.191320418919844e-05, "loss": 1.0009, "step": 6658 }, { "epoch": 0.45118232942611286, "grad_norm": 6.217735290527344, "learning_rate": 9.191183517010062e-05, "loss": 0.8091, "step": 6659 }, { "epoch": 0.45125008469408495, "grad_norm": 8.2019681930542, "learning_rate": 9.191046615100281e-05, "loss": 1.055, "step": 6660 }, { "epoch": 0.45131783996205704, "grad_norm": 5.853175640106201, "learning_rate": 9.1909097131905e-05, "loss": 1.1045, "step": 6661 }, { "epoch": 0.45138559523002914, "grad_norm": 6.276338577270508, "learning_rate": 9.190772811280717e-05, "loss": 0.9436, "step": 6662 }, { "epoch": 0.4514533504980012, "grad_norm": 11.320722579956055, "learning_rate": 9.190635909370935e-05, "loss": 1.1028, "step": 6663 }, { "epoch": 0.4515211057659733, "grad_norm": 6.311827659606934, "learning_rate": 9.190499007461154e-05, "loss": 0.9232, "step": 6664 }, { "epoch": 0.4515888610339454, "grad_norm": 8.186714172363281, "learning_rate": 9.190362105551373e-05, "loss": 0.7087, "step": 6665 }, { "epoch": 0.4516566163019175, "grad_norm": 6.127712249755859, "learning_rate": 9.190225203641591e-05, "loss": 1.0268, "step": 6666 }, { "epoch": 0.45172437156988954, "grad_norm": 6.840565204620361, "learning_rate": 9.190088301731809e-05, "loss": 0.9955, "step": 6667 }, { "epoch": 0.45179212683786163, "grad_norm": 6.302945137023926, "learning_rate": 9.189951399822027e-05, "loss": 0.9892, "step": 6668 }, { "epoch": 0.4518598821058337, "grad_norm": 9.08460521697998, "learning_rate": 9.189814497912246e-05, "loss": 0.7988, "step": 6669 }, { "epoch": 0.4519276373738058, "grad_norm": 7.497137069702148, "learning_rate": 9.189677596002464e-05, "loss": 0.8358, "step": 6670 }, { "epoch": 0.4519953926417779, "grad_norm": 6.515604496002197, "learning_rate": 9.189540694092682e-05, "loss": 0.7724, "step": 6671 }, { "epoch": 0.45206314790975, "grad_norm": 6.092652320861816, "learning_rate": 9.189403792182902e-05, "loss": 0.7308, "step": 6672 }, { "epoch": 0.4521309031777221, "grad_norm": 8.810023307800293, "learning_rate": 9.18926689027312e-05, "loss": 0.7249, "step": 6673 }, { "epoch": 0.4521986584456942, "grad_norm": 8.328235626220703, "learning_rate": 9.189129988363338e-05, "loss": 0.8115, "step": 6674 }, { "epoch": 0.4522664137136662, "grad_norm": 7.251335620880127, "learning_rate": 9.188993086453557e-05, "loss": 0.9843, "step": 6675 }, { "epoch": 0.4523341689816383, "grad_norm": 8.03663444519043, "learning_rate": 9.188856184543775e-05, "loss": 0.9925, "step": 6676 }, { "epoch": 0.4524019242496104, "grad_norm": 9.7393798828125, "learning_rate": 9.188719282633993e-05, "loss": 0.7575, "step": 6677 }, { "epoch": 0.4524696795175825, "grad_norm": 6.343038558959961, "learning_rate": 9.188582380724211e-05, "loss": 1.0638, "step": 6678 }, { "epoch": 0.4525374347855546, "grad_norm": 7.530363082885742, "learning_rate": 9.188445478814431e-05, "loss": 1.0212, "step": 6679 }, { "epoch": 0.4526051900535267, "grad_norm": 8.741231918334961, "learning_rate": 9.188308576904649e-05, "loss": 1.1723, "step": 6680 }, { "epoch": 0.45267294532149877, "grad_norm": 9.057670593261719, "learning_rate": 9.188171674994867e-05, "loss": 1.0396, "step": 6681 }, { "epoch": 0.45274070058947086, "grad_norm": 6.638017177581787, "learning_rate": 9.188034773085085e-05, "loss": 0.9465, "step": 6682 }, { "epoch": 0.4528084558574429, "grad_norm": 7.643139839172363, "learning_rate": 9.187897871175304e-05, "loss": 0.9346, "step": 6683 }, { "epoch": 0.452876211125415, "grad_norm": 7.176743984222412, "learning_rate": 9.187760969265522e-05, "loss": 1.036, "step": 6684 }, { "epoch": 0.4529439663933871, "grad_norm": 7.506284713745117, "learning_rate": 9.18762406735574e-05, "loss": 0.9487, "step": 6685 }, { "epoch": 0.45301172166135917, "grad_norm": 6.865815162658691, "learning_rate": 9.187487165445958e-05, "loss": 0.8211, "step": 6686 }, { "epoch": 0.45307947692933126, "grad_norm": 6.889796733856201, "learning_rate": 9.187350263536176e-05, "loss": 0.7901, "step": 6687 }, { "epoch": 0.45314723219730335, "grad_norm": 7.016167163848877, "learning_rate": 9.187213361626396e-05, "loss": 0.7699, "step": 6688 }, { "epoch": 0.45321498746527544, "grad_norm": 8.25091552734375, "learning_rate": 9.187076459716614e-05, "loss": 0.9778, "step": 6689 }, { "epoch": 0.45328274273324753, "grad_norm": 7.092465877532959, "learning_rate": 9.186939557806832e-05, "loss": 1.2544, "step": 6690 }, { "epoch": 0.45335049800121957, "grad_norm": 6.402629375457764, "learning_rate": 9.18680265589705e-05, "loss": 0.9225, "step": 6691 }, { "epoch": 0.45341825326919166, "grad_norm": 7.404470920562744, "learning_rate": 9.186665753987268e-05, "loss": 0.7975, "step": 6692 }, { "epoch": 0.45348600853716375, "grad_norm": 5.976271629333496, "learning_rate": 9.186528852077487e-05, "loss": 0.8868, "step": 6693 }, { "epoch": 0.45355376380513585, "grad_norm": 7.843896389007568, "learning_rate": 9.186391950167705e-05, "loss": 0.8034, "step": 6694 }, { "epoch": 0.45362151907310794, "grad_norm": 7.217418193817139, "learning_rate": 9.186255048257923e-05, "loss": 0.6763, "step": 6695 }, { "epoch": 0.45368927434108003, "grad_norm": 6.091420650482178, "learning_rate": 9.186118146348141e-05, "loss": 0.6565, "step": 6696 }, { "epoch": 0.4537570296090521, "grad_norm": 8.277983665466309, "learning_rate": 9.185981244438361e-05, "loss": 0.7589, "step": 6697 }, { "epoch": 0.4538247848770242, "grad_norm": 9.001012802124023, "learning_rate": 9.185844342528579e-05, "loss": 0.8664, "step": 6698 }, { "epoch": 0.45389254014499625, "grad_norm": 6.039417266845703, "learning_rate": 9.185707440618797e-05, "loss": 0.809, "step": 6699 }, { "epoch": 0.45396029541296834, "grad_norm": 6.930225849151611, "learning_rate": 9.185570538709015e-05, "loss": 0.7567, "step": 6700 }, { "epoch": 0.45402805068094043, "grad_norm": 7.135040760040283, "learning_rate": 9.185433636799233e-05, "loss": 0.7669, "step": 6701 }, { "epoch": 0.4540958059489125, "grad_norm": 7.994284152984619, "learning_rate": 9.185296734889452e-05, "loss": 0.7671, "step": 6702 }, { "epoch": 0.4541635612168846, "grad_norm": 6.146820068359375, "learning_rate": 9.18515983297967e-05, "loss": 0.8975, "step": 6703 }, { "epoch": 0.4542313164848567, "grad_norm": 6.619115352630615, "learning_rate": 9.185022931069888e-05, "loss": 0.9972, "step": 6704 }, { "epoch": 0.4542990717528288, "grad_norm": 5.762700080871582, "learning_rate": 9.184886029160106e-05, "loss": 0.6798, "step": 6705 }, { "epoch": 0.4543668270208009, "grad_norm": 7.270383358001709, "learning_rate": 9.184749127250326e-05, "loss": 0.8676, "step": 6706 }, { "epoch": 0.4544345822887729, "grad_norm": 6.667981147766113, "learning_rate": 9.184612225340544e-05, "loss": 0.7988, "step": 6707 }, { "epoch": 0.454502337556745, "grad_norm": 5.513166904449463, "learning_rate": 9.184475323430762e-05, "loss": 0.868, "step": 6708 }, { "epoch": 0.4545700928247171, "grad_norm": 6.376199245452881, "learning_rate": 9.18433842152098e-05, "loss": 0.7859, "step": 6709 }, { "epoch": 0.4546378480926892, "grad_norm": 6.444526672363281, "learning_rate": 9.184201519611198e-05, "loss": 0.7793, "step": 6710 }, { "epoch": 0.4547056033606613, "grad_norm": 8.83858871459961, "learning_rate": 9.184064617701417e-05, "loss": 0.9903, "step": 6711 }, { "epoch": 0.4547733586286334, "grad_norm": 6.07218599319458, "learning_rate": 9.183927715791635e-05, "loss": 0.8349, "step": 6712 }, { "epoch": 0.4548411138966055, "grad_norm": 6.652568340301514, "learning_rate": 9.183790813881853e-05, "loss": 0.8982, "step": 6713 }, { "epoch": 0.45490886916457757, "grad_norm": 7.582399845123291, "learning_rate": 9.183653911972071e-05, "loss": 0.9833, "step": 6714 }, { "epoch": 0.4549766244325496, "grad_norm": 8.411558151245117, "learning_rate": 9.183517010062291e-05, "loss": 0.9433, "step": 6715 }, { "epoch": 0.4550443797005217, "grad_norm": 7.420217037200928, "learning_rate": 9.183380108152509e-05, "loss": 0.9508, "step": 6716 }, { "epoch": 0.4551121349684938, "grad_norm": 7.064800262451172, "learning_rate": 9.183243206242727e-05, "loss": 0.9414, "step": 6717 }, { "epoch": 0.4551798902364659, "grad_norm": 8.102936744689941, "learning_rate": 9.183106304332946e-05, "loss": 0.9061, "step": 6718 }, { "epoch": 0.45524764550443797, "grad_norm": 8.261469841003418, "learning_rate": 9.182969402423164e-05, "loss": 1.2411, "step": 6719 }, { "epoch": 0.45531540077241006, "grad_norm": 7.5003204345703125, "learning_rate": 9.182832500513382e-05, "loss": 0.9296, "step": 6720 }, { "epoch": 0.45538315604038215, "grad_norm": 7.358273029327393, "learning_rate": 9.182695598603602e-05, "loss": 0.8109, "step": 6721 }, { "epoch": 0.45545091130835424, "grad_norm": 7.0917768478393555, "learning_rate": 9.18255869669382e-05, "loss": 0.9517, "step": 6722 }, { "epoch": 0.45551866657632634, "grad_norm": 6.559359550476074, "learning_rate": 9.182421794784038e-05, "loss": 0.8381, "step": 6723 }, { "epoch": 0.4555864218442984, "grad_norm": 6.368768215179443, "learning_rate": 9.182284892874256e-05, "loss": 0.9104, "step": 6724 }, { "epoch": 0.45565417711227046, "grad_norm": 6.576778411865234, "learning_rate": 9.182147990964475e-05, "loss": 0.9116, "step": 6725 }, { "epoch": 0.45572193238024256, "grad_norm": 6.608447551727295, "learning_rate": 9.182011089054693e-05, "loss": 0.9371, "step": 6726 }, { "epoch": 0.45578968764821465, "grad_norm": 6.38028621673584, "learning_rate": 9.181874187144911e-05, "loss": 0.9607, "step": 6727 }, { "epoch": 0.45585744291618674, "grad_norm": 8.349663734436035, "learning_rate": 9.18173728523513e-05, "loss": 0.8739, "step": 6728 }, { "epoch": 0.45592519818415883, "grad_norm": 7.229827880859375, "learning_rate": 9.181600383325349e-05, "loss": 0.6525, "step": 6729 }, { "epoch": 0.4559929534521309, "grad_norm": 7.870299816131592, "learning_rate": 9.181463481415567e-05, "loss": 0.9938, "step": 6730 }, { "epoch": 0.456060708720103, "grad_norm": 7.6586503982543945, "learning_rate": 9.181326579505785e-05, "loss": 0.9489, "step": 6731 }, { "epoch": 0.45612846398807505, "grad_norm": 8.91909122467041, "learning_rate": 9.181189677596003e-05, "loss": 0.9642, "step": 6732 }, { "epoch": 0.45619621925604714, "grad_norm": 8.341778755187988, "learning_rate": 9.181052775686221e-05, "loss": 1.0161, "step": 6733 }, { "epoch": 0.45626397452401923, "grad_norm": 7.269313812255859, "learning_rate": 9.18091587377644e-05, "loss": 0.8535, "step": 6734 }, { "epoch": 0.4563317297919913, "grad_norm": 6.635743141174316, "learning_rate": 9.180778971866658e-05, "loss": 0.6519, "step": 6735 }, { "epoch": 0.4563994850599634, "grad_norm": 7.213603973388672, "learning_rate": 9.180642069956876e-05, "loss": 0.8314, "step": 6736 }, { "epoch": 0.4564672403279355, "grad_norm": 6.491673469543457, "learning_rate": 9.180505168047094e-05, "loss": 0.8691, "step": 6737 }, { "epoch": 0.4565349955959076, "grad_norm": 6.087094783782959, "learning_rate": 9.180368266137314e-05, "loss": 0.8732, "step": 6738 }, { "epoch": 0.4566027508638797, "grad_norm": 6.0057172775268555, "learning_rate": 9.180231364227532e-05, "loss": 0.74, "step": 6739 }, { "epoch": 0.45667050613185173, "grad_norm": 6.74835729598999, "learning_rate": 9.18009446231775e-05, "loss": 0.7628, "step": 6740 }, { "epoch": 0.4567382613998238, "grad_norm": 5.433539867401123, "learning_rate": 9.179957560407968e-05, "loss": 0.6438, "step": 6741 }, { "epoch": 0.4568060166677959, "grad_norm": 7.143089771270752, "learning_rate": 9.179820658498186e-05, "loss": 0.807, "step": 6742 }, { "epoch": 0.456873771935768, "grad_norm": 6.345632076263428, "learning_rate": 9.179683756588405e-05, "loss": 0.8652, "step": 6743 }, { "epoch": 0.4569415272037401, "grad_norm": 6.4180908203125, "learning_rate": 9.179546854678623e-05, "loss": 0.8225, "step": 6744 }, { "epoch": 0.4570092824717122, "grad_norm": 7.375923156738281, "learning_rate": 9.179409952768841e-05, "loss": 0.7922, "step": 6745 }, { "epoch": 0.4570770377396843, "grad_norm": 5.134999752044678, "learning_rate": 9.17927305085906e-05, "loss": 0.8943, "step": 6746 }, { "epoch": 0.45714479300765637, "grad_norm": 8.040843963623047, "learning_rate": 9.179136148949277e-05, "loss": 0.9971, "step": 6747 }, { "epoch": 0.4572125482756284, "grad_norm": 5.302629470825195, "learning_rate": 9.178999247039497e-05, "loss": 0.6811, "step": 6748 }, { "epoch": 0.4572803035436005, "grad_norm": 6.630457878112793, "learning_rate": 9.178862345129715e-05, "loss": 0.7929, "step": 6749 }, { "epoch": 0.4573480588115726, "grad_norm": 5.959926605224609, "learning_rate": 9.178725443219933e-05, "loss": 0.7941, "step": 6750 }, { "epoch": 0.4574158140795447, "grad_norm": 7.855923175811768, "learning_rate": 9.178588541310151e-05, "loss": 0.911, "step": 6751 }, { "epoch": 0.45748356934751677, "grad_norm": 7.775002479553223, "learning_rate": 9.17845163940037e-05, "loss": 0.8315, "step": 6752 }, { "epoch": 0.45755132461548886, "grad_norm": 7.259592056274414, "learning_rate": 9.178314737490588e-05, "loss": 0.8383, "step": 6753 }, { "epoch": 0.45761907988346096, "grad_norm": 8.843462944030762, "learning_rate": 9.178177835580806e-05, "loss": 1.1005, "step": 6754 }, { "epoch": 0.45768683515143305, "grad_norm": 6.322036266326904, "learning_rate": 9.178040933671024e-05, "loss": 0.8553, "step": 6755 }, { "epoch": 0.4577545904194051, "grad_norm": 6.644796848297119, "learning_rate": 9.177904031761242e-05, "loss": 1.0933, "step": 6756 }, { "epoch": 0.4578223456873772, "grad_norm": 7.463156700134277, "learning_rate": 9.177767129851462e-05, "loss": 0.9584, "step": 6757 }, { "epoch": 0.45789010095534927, "grad_norm": 5.70056676864624, "learning_rate": 9.17763022794168e-05, "loss": 0.6945, "step": 6758 }, { "epoch": 0.45795785622332136, "grad_norm": 6.347555637359619, "learning_rate": 9.177493326031898e-05, "loss": 0.9143, "step": 6759 }, { "epoch": 0.45802561149129345, "grad_norm": 6.00151252746582, "learning_rate": 9.177356424122116e-05, "loss": 0.8139, "step": 6760 }, { "epoch": 0.45809336675926554, "grad_norm": 8.02507495880127, "learning_rate": 9.177219522212335e-05, "loss": 0.8941, "step": 6761 }, { "epoch": 0.45816112202723763, "grad_norm": 7.0266876220703125, "learning_rate": 9.177082620302553e-05, "loss": 1.0009, "step": 6762 }, { "epoch": 0.4582288772952097, "grad_norm": 5.483405113220215, "learning_rate": 9.176945718392771e-05, "loss": 0.8113, "step": 6763 }, { "epoch": 0.45829663256318176, "grad_norm": 5.661553382873535, "learning_rate": 9.176808816482991e-05, "loss": 0.7524, "step": 6764 }, { "epoch": 0.45836438783115385, "grad_norm": 7.485569953918457, "learning_rate": 9.176671914573209e-05, "loss": 0.8712, "step": 6765 }, { "epoch": 0.45843214309912594, "grad_norm": 8.262595176696777, "learning_rate": 9.176535012663427e-05, "loss": 0.7777, "step": 6766 }, { "epoch": 0.45849989836709804, "grad_norm": 6.955083847045898, "learning_rate": 9.176398110753646e-05, "loss": 0.8515, "step": 6767 }, { "epoch": 0.4585676536350701, "grad_norm": 5.7011871337890625, "learning_rate": 9.176261208843864e-05, "loss": 0.7977, "step": 6768 }, { "epoch": 0.4586354089030422, "grad_norm": 7.344122886657715, "learning_rate": 9.176124306934082e-05, "loss": 0.877, "step": 6769 }, { "epoch": 0.4587031641710143, "grad_norm": 6.595543384552002, "learning_rate": 9.1759874050243e-05, "loss": 0.7739, "step": 6770 }, { "epoch": 0.4587709194389864, "grad_norm": 6.087022304534912, "learning_rate": 9.17585050311452e-05, "loss": 0.9127, "step": 6771 }, { "epoch": 0.4588386747069585, "grad_norm": 6.13311243057251, "learning_rate": 9.175713601204738e-05, "loss": 0.8803, "step": 6772 }, { "epoch": 0.45890642997493053, "grad_norm": 5.866177558898926, "learning_rate": 9.175576699294956e-05, "loss": 0.9859, "step": 6773 }, { "epoch": 0.4589741852429026, "grad_norm": 7.406826972961426, "learning_rate": 9.175439797385174e-05, "loss": 1.2433, "step": 6774 }, { "epoch": 0.4590419405108747, "grad_norm": 5.886981964111328, "learning_rate": 9.175302895475393e-05, "loss": 0.9994, "step": 6775 }, { "epoch": 0.4591096957788468, "grad_norm": 6.240331172943115, "learning_rate": 9.175165993565611e-05, "loss": 0.6988, "step": 6776 }, { "epoch": 0.4591774510468189, "grad_norm": 6.331019878387451, "learning_rate": 9.175029091655829e-05, "loss": 0.9625, "step": 6777 }, { "epoch": 0.459245206314791, "grad_norm": 6.082772731781006, "learning_rate": 9.174892189746047e-05, "loss": 0.6876, "step": 6778 }, { "epoch": 0.4593129615827631, "grad_norm": 9.173615455627441, "learning_rate": 9.174755287836265e-05, "loss": 0.9572, "step": 6779 }, { "epoch": 0.45938071685073517, "grad_norm": 7.658091068267822, "learning_rate": 9.174618385926485e-05, "loss": 1.0724, "step": 6780 }, { "epoch": 0.4594484721187072, "grad_norm": 5.414113521575928, "learning_rate": 9.174481484016703e-05, "loss": 0.963, "step": 6781 }, { "epoch": 0.4595162273866793, "grad_norm": 8.290900230407715, "learning_rate": 9.174344582106921e-05, "loss": 0.8012, "step": 6782 }, { "epoch": 0.4595839826546514, "grad_norm": 5.778069019317627, "learning_rate": 9.174207680197139e-05, "loss": 0.9199, "step": 6783 }, { "epoch": 0.4596517379226235, "grad_norm": 7.42002010345459, "learning_rate": 9.174070778287358e-05, "loss": 1.022, "step": 6784 }, { "epoch": 0.4597194931905956, "grad_norm": 5.714671611785889, "learning_rate": 9.173933876377576e-05, "loss": 0.8194, "step": 6785 }, { "epoch": 0.45978724845856767, "grad_norm": 6.420266628265381, "learning_rate": 9.173796974467794e-05, "loss": 0.8879, "step": 6786 }, { "epoch": 0.45985500372653976, "grad_norm": 7.046072006225586, "learning_rate": 9.173660072558012e-05, "loss": 0.7672, "step": 6787 }, { "epoch": 0.45992275899451185, "grad_norm": 8.797179222106934, "learning_rate": 9.17352317064823e-05, "loss": 0.7714, "step": 6788 }, { "epoch": 0.4599905142624839, "grad_norm": 6.534037113189697, "learning_rate": 9.17338626873845e-05, "loss": 0.8629, "step": 6789 }, { "epoch": 0.460058269530456, "grad_norm": 7.647995948791504, "learning_rate": 9.173249366828668e-05, "loss": 0.8143, "step": 6790 }, { "epoch": 0.46012602479842807, "grad_norm": 5.4971418380737305, "learning_rate": 9.173112464918886e-05, "loss": 0.7946, "step": 6791 }, { "epoch": 0.46019378006640016, "grad_norm": 6.304740905761719, "learning_rate": 9.172975563009104e-05, "loss": 0.9233, "step": 6792 }, { "epoch": 0.46026153533437225, "grad_norm": 7.571120262145996, "learning_rate": 9.172838661099323e-05, "loss": 0.9919, "step": 6793 }, { "epoch": 0.46032929060234434, "grad_norm": 6.336854457855225, "learning_rate": 9.172701759189541e-05, "loss": 0.7232, "step": 6794 }, { "epoch": 0.46039704587031643, "grad_norm": 6.926676273345947, "learning_rate": 9.17256485727976e-05, "loss": 0.758, "step": 6795 }, { "epoch": 0.4604648011382885, "grad_norm": 5.480643272399902, "learning_rate": 9.172427955369977e-05, "loss": 0.5871, "step": 6796 }, { "epoch": 0.46053255640626056, "grad_norm": 8.286799430847168, "learning_rate": 9.172291053460195e-05, "loss": 0.8574, "step": 6797 }, { "epoch": 0.46060031167423265, "grad_norm": 8.043119430541992, "learning_rate": 9.172154151550415e-05, "loss": 0.6363, "step": 6798 }, { "epoch": 0.46066806694220475, "grad_norm": 8.308424949645996, "learning_rate": 9.172017249640633e-05, "loss": 0.9203, "step": 6799 }, { "epoch": 0.46073582221017684, "grad_norm": 7.131639003753662, "learning_rate": 9.171880347730851e-05, "loss": 0.685, "step": 6800 }, { "epoch": 0.46080357747814893, "grad_norm": 8.397472381591797, "learning_rate": 9.171743445821069e-05, "loss": 0.9637, "step": 6801 }, { "epoch": 0.460871332746121, "grad_norm": 6.666365146636963, "learning_rate": 9.171606543911287e-05, "loss": 1.0735, "step": 6802 }, { "epoch": 0.4609390880140931, "grad_norm": 11.102825164794922, "learning_rate": 9.171469642001506e-05, "loss": 0.8958, "step": 6803 }, { "epoch": 0.4610068432820652, "grad_norm": 7.294391632080078, "learning_rate": 9.171332740091724e-05, "loss": 1.0354, "step": 6804 }, { "epoch": 0.46107459855003724, "grad_norm": 5.9984517097473145, "learning_rate": 9.171195838181942e-05, "loss": 0.605, "step": 6805 }, { "epoch": 0.46114235381800933, "grad_norm": 6.209224224090576, "learning_rate": 9.17105893627216e-05, "loss": 0.8715, "step": 6806 }, { "epoch": 0.4612101090859814, "grad_norm": 5.404207229614258, "learning_rate": 9.17092203436238e-05, "loss": 0.6694, "step": 6807 }, { "epoch": 0.4612778643539535, "grad_norm": 6.675217151641846, "learning_rate": 9.170785132452598e-05, "loss": 0.7288, "step": 6808 }, { "epoch": 0.4613456196219256, "grad_norm": 8.444995880126953, "learning_rate": 9.170648230542816e-05, "loss": 1.0756, "step": 6809 }, { "epoch": 0.4614133748898977, "grad_norm": 8.167703628540039, "learning_rate": 9.170511328633035e-05, "loss": 1.1605, "step": 6810 }, { "epoch": 0.4614811301578698, "grad_norm": 7.55181360244751, "learning_rate": 9.170374426723253e-05, "loss": 0.7805, "step": 6811 }, { "epoch": 0.4615488854258419, "grad_norm": 6.380015850067139, "learning_rate": 9.170237524813471e-05, "loss": 0.6584, "step": 6812 }, { "epoch": 0.4616166406938139, "grad_norm": 6.2454376220703125, "learning_rate": 9.170100622903691e-05, "loss": 0.8212, "step": 6813 }, { "epoch": 0.461684395961786, "grad_norm": 7.530882358551025, "learning_rate": 9.169963720993909e-05, "loss": 0.8926, "step": 6814 }, { "epoch": 0.4617521512297581, "grad_norm": 6.3245158195495605, "learning_rate": 9.169826819084127e-05, "loss": 0.8672, "step": 6815 }, { "epoch": 0.4618199064977302, "grad_norm": 5.822272300720215, "learning_rate": 9.169689917174346e-05, "loss": 0.7972, "step": 6816 }, { "epoch": 0.4618876617657023, "grad_norm": 8.160684585571289, "learning_rate": 9.169553015264564e-05, "loss": 1.0076, "step": 6817 }, { "epoch": 0.4619554170336744, "grad_norm": 6.745192050933838, "learning_rate": 9.169416113354782e-05, "loss": 0.8187, "step": 6818 }, { "epoch": 0.46202317230164647, "grad_norm": 8.530631065368652, "learning_rate": 9.169279211445e-05, "loss": 0.7655, "step": 6819 }, { "epoch": 0.46209092756961856, "grad_norm": 7.213817596435547, "learning_rate": 9.169142309535218e-05, "loss": 0.8737, "step": 6820 }, { "epoch": 0.4621586828375906, "grad_norm": 6.5060577392578125, "learning_rate": 9.169005407625438e-05, "loss": 0.815, "step": 6821 }, { "epoch": 0.4622264381055627, "grad_norm": 6.782070636749268, "learning_rate": 9.168868505715656e-05, "loss": 0.792, "step": 6822 }, { "epoch": 0.4622941933735348, "grad_norm": 6.130987644195557, "learning_rate": 9.168731603805874e-05, "loss": 0.6743, "step": 6823 }, { "epoch": 0.46236194864150687, "grad_norm": 8.178153991699219, "learning_rate": 9.168594701896092e-05, "loss": 1.0958, "step": 6824 }, { "epoch": 0.46242970390947896, "grad_norm": 8.360578536987305, "learning_rate": 9.16845779998631e-05, "loss": 0.9709, "step": 6825 }, { "epoch": 0.46249745917745105, "grad_norm": 7.507253170013428, "learning_rate": 9.168320898076529e-05, "loss": 0.9384, "step": 6826 }, { "epoch": 0.46256521444542315, "grad_norm": 8.285658836364746, "learning_rate": 9.168183996166747e-05, "loss": 0.8913, "step": 6827 }, { "epoch": 0.46263296971339524, "grad_norm": 7.765392780303955, "learning_rate": 9.168047094256965e-05, "loss": 0.8754, "step": 6828 }, { "epoch": 0.46270072498136733, "grad_norm": 7.6219892501831055, "learning_rate": 9.167910192347183e-05, "loss": 0.7401, "step": 6829 }, { "epoch": 0.46276848024933936, "grad_norm": 9.139801979064941, "learning_rate": 9.167773290437403e-05, "loss": 0.8068, "step": 6830 }, { "epoch": 0.46283623551731146, "grad_norm": 7.655275821685791, "learning_rate": 9.167636388527621e-05, "loss": 0.8314, "step": 6831 }, { "epoch": 0.46290399078528355, "grad_norm": 6.699294090270996, "learning_rate": 9.167499486617839e-05, "loss": 0.8665, "step": 6832 }, { "epoch": 0.46297174605325564, "grad_norm": 8.436169624328613, "learning_rate": 9.167362584708057e-05, "loss": 1.3026, "step": 6833 }, { "epoch": 0.46303950132122773, "grad_norm": 5.195062160491943, "learning_rate": 9.167225682798275e-05, "loss": 0.7468, "step": 6834 }, { "epoch": 0.4631072565891998, "grad_norm": 5.665080547332764, "learning_rate": 9.167088780888494e-05, "loss": 0.7357, "step": 6835 }, { "epoch": 0.4631750118571719, "grad_norm": 6.243655681610107, "learning_rate": 9.166951878978712e-05, "loss": 0.7705, "step": 6836 }, { "epoch": 0.463242767125144, "grad_norm": 7.49260950088501, "learning_rate": 9.16681497706893e-05, "loss": 0.8061, "step": 6837 }, { "epoch": 0.46331052239311604, "grad_norm": 7.076335430145264, "learning_rate": 9.166678075159148e-05, "loss": 0.8307, "step": 6838 }, { "epoch": 0.46337827766108813, "grad_norm": 6.332518100738525, "learning_rate": 9.166541173249368e-05, "loss": 0.7431, "step": 6839 }, { "epoch": 0.4634460329290602, "grad_norm": 6.261449813842773, "learning_rate": 9.166404271339586e-05, "loss": 0.8817, "step": 6840 }, { "epoch": 0.4635137881970323, "grad_norm": 7.467769622802734, "learning_rate": 9.166267369429804e-05, "loss": 0.9473, "step": 6841 }, { "epoch": 0.4635815434650044, "grad_norm": 5.55718469619751, "learning_rate": 9.166130467520022e-05, "loss": 0.7726, "step": 6842 }, { "epoch": 0.4636492987329765, "grad_norm": 7.012959003448486, "learning_rate": 9.16599356561024e-05, "loss": 0.9274, "step": 6843 }, { "epoch": 0.4637170540009486, "grad_norm": 6.344860076904297, "learning_rate": 9.165856663700459e-05, "loss": 1.0617, "step": 6844 }, { "epoch": 0.4637848092689207, "grad_norm": 6.024655342102051, "learning_rate": 9.165719761790677e-05, "loss": 0.8224, "step": 6845 }, { "epoch": 0.4638525645368927, "grad_norm": 6.97758674621582, "learning_rate": 9.165582859880895e-05, "loss": 0.8344, "step": 6846 }, { "epoch": 0.4639203198048648, "grad_norm": 6.874304294586182, "learning_rate": 9.165445957971113e-05, "loss": 0.8698, "step": 6847 }, { "epoch": 0.4639880750728369, "grad_norm": 8.15816593170166, "learning_rate": 9.165309056061331e-05, "loss": 1.0457, "step": 6848 }, { "epoch": 0.464055830340809, "grad_norm": 7.304631233215332, "learning_rate": 9.165172154151551e-05, "loss": 0.9027, "step": 6849 }, { "epoch": 0.4641235856087811, "grad_norm": 7.377762317657471, "learning_rate": 9.165035252241769e-05, "loss": 0.6635, "step": 6850 }, { "epoch": 0.4641913408767532, "grad_norm": 6.051864147186279, "learning_rate": 9.164898350331987e-05, "loss": 0.925, "step": 6851 }, { "epoch": 0.46425909614472527, "grad_norm": 6.560476779937744, "learning_rate": 9.164761448422205e-05, "loss": 0.7674, "step": 6852 }, { "epoch": 0.46432685141269736, "grad_norm": 6.828582763671875, "learning_rate": 9.164624546512424e-05, "loss": 0.8159, "step": 6853 }, { "epoch": 0.4643946066806694, "grad_norm": 6.463281631469727, "learning_rate": 9.164487644602642e-05, "loss": 0.6261, "step": 6854 }, { "epoch": 0.4644623619486415, "grad_norm": 6.3371734619140625, "learning_rate": 9.16435074269286e-05, "loss": 0.6489, "step": 6855 }, { "epoch": 0.4645301172166136, "grad_norm": 6.052369117736816, "learning_rate": 9.16421384078308e-05, "loss": 0.6899, "step": 6856 }, { "epoch": 0.46459787248458567, "grad_norm": 6.464377403259277, "learning_rate": 9.164076938873298e-05, "loss": 0.7603, "step": 6857 }, { "epoch": 0.46466562775255776, "grad_norm": 5.9912567138671875, "learning_rate": 9.163940036963516e-05, "loss": 0.7188, "step": 6858 }, { "epoch": 0.46473338302052986, "grad_norm": 8.469727516174316, "learning_rate": 9.163803135053735e-05, "loss": 0.9991, "step": 6859 }, { "epoch": 0.46480113828850195, "grad_norm": 6.261715888977051, "learning_rate": 9.163666233143953e-05, "loss": 0.7429, "step": 6860 }, { "epoch": 0.46486889355647404, "grad_norm": 7.389404296875, "learning_rate": 9.163529331234171e-05, "loss": 0.6294, "step": 6861 }, { "epoch": 0.4649366488244461, "grad_norm": 6.988171100616455, "learning_rate": 9.16339242932439e-05, "loss": 0.8705, "step": 6862 }, { "epoch": 0.46500440409241817, "grad_norm": 6.285641193389893, "learning_rate": 9.163255527414609e-05, "loss": 0.7026, "step": 6863 }, { "epoch": 0.46507215936039026, "grad_norm": 8.77840518951416, "learning_rate": 9.163118625504827e-05, "loss": 0.8766, "step": 6864 }, { "epoch": 0.46513991462836235, "grad_norm": 6.199909210205078, "learning_rate": 9.162981723595045e-05, "loss": 0.7304, "step": 6865 }, { "epoch": 0.46520766989633444, "grad_norm": 6.947317600250244, "learning_rate": 9.162844821685263e-05, "loss": 0.8649, "step": 6866 }, { "epoch": 0.46527542516430653, "grad_norm": 8.396434783935547, "learning_rate": 9.162707919775482e-05, "loss": 1.2161, "step": 6867 }, { "epoch": 0.4653431804322786, "grad_norm": 5.935467720031738, "learning_rate": 9.1625710178657e-05, "loss": 0.8499, "step": 6868 }, { "epoch": 0.4654109357002507, "grad_norm": 6.258296966552734, "learning_rate": 9.162434115955918e-05, "loss": 0.9541, "step": 6869 }, { "epoch": 0.46547869096822275, "grad_norm": 6.29650354385376, "learning_rate": 9.162297214046136e-05, "loss": 0.7774, "step": 6870 }, { "epoch": 0.46554644623619484, "grad_norm": 5.948836803436279, "learning_rate": 9.162160312136356e-05, "loss": 0.7579, "step": 6871 }, { "epoch": 0.46561420150416694, "grad_norm": 5.719659805297852, "learning_rate": 9.162023410226574e-05, "loss": 0.7482, "step": 6872 }, { "epoch": 0.465681956772139, "grad_norm": 6.145468235015869, "learning_rate": 9.161886508316792e-05, "loss": 0.8816, "step": 6873 }, { "epoch": 0.4657497120401111, "grad_norm": 6.33701753616333, "learning_rate": 9.16174960640701e-05, "loss": 0.7617, "step": 6874 }, { "epoch": 0.4658174673080832, "grad_norm": 6.143844127655029, "learning_rate": 9.161612704497228e-05, "loss": 0.9695, "step": 6875 }, { "epoch": 0.4658852225760553, "grad_norm": 6.316319942474365, "learning_rate": 9.161475802587447e-05, "loss": 0.8837, "step": 6876 }, { "epoch": 0.4659529778440274, "grad_norm": 6.68782377243042, "learning_rate": 9.161338900677665e-05, "loss": 0.7324, "step": 6877 }, { "epoch": 0.4660207331119995, "grad_norm": 6.216282844543457, "learning_rate": 9.161201998767883e-05, "loss": 0.7787, "step": 6878 }, { "epoch": 0.4660884883799715, "grad_norm": 6.8673176765441895, "learning_rate": 9.161065096858101e-05, "loss": 1.1118, "step": 6879 }, { "epoch": 0.4661562436479436, "grad_norm": 6.71323823928833, "learning_rate": 9.16092819494832e-05, "loss": 0.9176, "step": 6880 }, { "epoch": 0.4662239989159157, "grad_norm": 8.589679718017578, "learning_rate": 9.160791293038539e-05, "loss": 0.9758, "step": 6881 }, { "epoch": 0.4662917541838878, "grad_norm": 6.692760467529297, "learning_rate": 9.160654391128757e-05, "loss": 0.8867, "step": 6882 }, { "epoch": 0.4663595094518599, "grad_norm": 8.462479591369629, "learning_rate": 9.160517489218975e-05, "loss": 0.9738, "step": 6883 }, { "epoch": 0.466427264719832, "grad_norm": 6.068343162536621, "learning_rate": 9.160380587309193e-05, "loss": 0.6652, "step": 6884 }, { "epoch": 0.46649501998780407, "grad_norm": 6.04793643951416, "learning_rate": 9.160243685399412e-05, "loss": 0.8575, "step": 6885 }, { "epoch": 0.46656277525577616, "grad_norm": 7.8360514640808105, "learning_rate": 9.16010678348963e-05, "loss": 0.8488, "step": 6886 }, { "epoch": 0.4666305305237482, "grad_norm": 6.028532981872559, "learning_rate": 9.159969881579848e-05, "loss": 0.8841, "step": 6887 }, { "epoch": 0.4666982857917203, "grad_norm": 7.673785209655762, "learning_rate": 9.159832979670066e-05, "loss": 0.9035, "step": 6888 }, { "epoch": 0.4667660410596924, "grad_norm": 7.042590618133545, "learning_rate": 9.159696077760284e-05, "loss": 0.8812, "step": 6889 }, { "epoch": 0.4668337963276645, "grad_norm": 6.562109470367432, "learning_rate": 9.159559175850504e-05, "loss": 0.7179, "step": 6890 }, { "epoch": 0.46690155159563657, "grad_norm": 6.083657264709473, "learning_rate": 9.159422273940722e-05, "loss": 0.7322, "step": 6891 }, { "epoch": 0.46696930686360866, "grad_norm": 7.408811569213867, "learning_rate": 9.15928537203094e-05, "loss": 0.9291, "step": 6892 }, { "epoch": 0.46703706213158075, "grad_norm": 6.70425271987915, "learning_rate": 9.159148470121158e-05, "loss": 0.9796, "step": 6893 }, { "epoch": 0.46710481739955284, "grad_norm": 7.486738681793213, "learning_rate": 9.159011568211377e-05, "loss": 1.128, "step": 6894 }, { "epoch": 0.4671725726675249, "grad_norm": 6.747304916381836, "learning_rate": 9.158874666301595e-05, "loss": 0.8432, "step": 6895 }, { "epoch": 0.46724032793549697, "grad_norm": 7.494656085968018, "learning_rate": 9.158737764391813e-05, "loss": 0.9587, "step": 6896 }, { "epoch": 0.46730808320346906, "grad_norm": 5.556826114654541, "learning_rate": 9.158600862482031e-05, "loss": 0.6565, "step": 6897 }, { "epoch": 0.46737583847144115, "grad_norm": 6.550345420837402, "learning_rate": 9.15846396057225e-05, "loss": 0.7854, "step": 6898 }, { "epoch": 0.46744359373941324, "grad_norm": 7.0627899169921875, "learning_rate": 9.158327058662469e-05, "loss": 0.9204, "step": 6899 }, { "epoch": 0.46751134900738534, "grad_norm": 7.1070661544799805, "learning_rate": 9.158190156752687e-05, "loss": 0.9357, "step": 6900 }, { "epoch": 0.4675791042753574, "grad_norm": 7.313487529754639, "learning_rate": 9.158053254842905e-05, "loss": 0.7561, "step": 6901 }, { "epoch": 0.4676468595433295, "grad_norm": 6.839418888092041, "learning_rate": 9.157916352933123e-05, "loss": 0.9551, "step": 6902 }, { "epoch": 0.46771461481130155, "grad_norm": 6.881319999694824, "learning_rate": 9.157779451023342e-05, "loss": 0.8799, "step": 6903 }, { "epoch": 0.46778237007927365, "grad_norm": 7.470109462738037, "learning_rate": 9.15764254911356e-05, "loss": 0.806, "step": 6904 }, { "epoch": 0.46785012534724574, "grad_norm": 7.056912899017334, "learning_rate": 9.157505647203778e-05, "loss": 0.7905, "step": 6905 }, { "epoch": 0.46791788061521783, "grad_norm": 6.295861721038818, "learning_rate": 9.157368745293998e-05, "loss": 0.8256, "step": 6906 }, { "epoch": 0.4679856358831899, "grad_norm": 5.80570650100708, "learning_rate": 9.157231843384216e-05, "loss": 0.7747, "step": 6907 }, { "epoch": 0.468053391151162, "grad_norm": 7.0094218254089355, "learning_rate": 9.157094941474435e-05, "loss": 0.9504, "step": 6908 }, { "epoch": 0.4681211464191341, "grad_norm": 5.334716320037842, "learning_rate": 9.156958039564653e-05, "loss": 0.7608, "step": 6909 }, { "epoch": 0.4681889016871062, "grad_norm": 8.058252334594727, "learning_rate": 9.156821137654871e-05, "loss": 0.889, "step": 6910 }, { "epoch": 0.46825665695507823, "grad_norm": 7.769287586212158, "learning_rate": 9.156684235745089e-05, "loss": 1.0391, "step": 6911 }, { "epoch": 0.4683244122230503, "grad_norm": 5.96143102645874, "learning_rate": 9.156547333835307e-05, "loss": 0.8793, "step": 6912 }, { "epoch": 0.4683921674910224, "grad_norm": 5.6451005935668945, "learning_rate": 9.156410431925527e-05, "loss": 0.7804, "step": 6913 }, { "epoch": 0.4684599227589945, "grad_norm": 7.539672374725342, "learning_rate": 9.156273530015745e-05, "loss": 0.8137, "step": 6914 }, { "epoch": 0.4685276780269666, "grad_norm": 7.447227954864502, "learning_rate": 9.156136628105963e-05, "loss": 1.0692, "step": 6915 }, { "epoch": 0.4685954332949387, "grad_norm": 8.100126266479492, "learning_rate": 9.155999726196181e-05, "loss": 0.8124, "step": 6916 }, { "epoch": 0.4686631885629108, "grad_norm": 8.560744285583496, "learning_rate": 9.1558628242864e-05, "loss": 0.8659, "step": 6917 }, { "epoch": 0.4687309438308829, "grad_norm": 7.135112285614014, "learning_rate": 9.155725922376618e-05, "loss": 0.9009, "step": 6918 }, { "epoch": 0.4687986990988549, "grad_norm": 7.749111175537109, "learning_rate": 9.155589020466836e-05, "loss": 0.9209, "step": 6919 }, { "epoch": 0.468866454366827, "grad_norm": 6.616466999053955, "learning_rate": 9.155452118557054e-05, "loss": 0.9283, "step": 6920 }, { "epoch": 0.4689342096347991, "grad_norm": 6.961619853973389, "learning_rate": 9.155315216647272e-05, "loss": 0.8142, "step": 6921 }, { "epoch": 0.4690019649027712, "grad_norm": 7.225759506225586, "learning_rate": 9.155178314737492e-05, "loss": 0.7626, "step": 6922 }, { "epoch": 0.4690697201707433, "grad_norm": 6.0465922355651855, "learning_rate": 9.15504141282771e-05, "loss": 0.8281, "step": 6923 }, { "epoch": 0.46913747543871537, "grad_norm": 7.507081985473633, "learning_rate": 9.154904510917928e-05, "loss": 1.1642, "step": 6924 }, { "epoch": 0.46920523070668746, "grad_norm": 6.281520843505859, "learning_rate": 9.154767609008146e-05, "loss": 0.8694, "step": 6925 }, { "epoch": 0.46927298597465955, "grad_norm": 6.0980658531188965, "learning_rate": 9.154630707098365e-05, "loss": 0.6983, "step": 6926 }, { "epoch": 0.4693407412426316, "grad_norm": 5.842291355133057, "learning_rate": 9.154493805188583e-05, "loss": 0.8392, "step": 6927 }, { "epoch": 0.4694084965106037, "grad_norm": 7.486947536468506, "learning_rate": 9.154356903278801e-05, "loss": 0.8363, "step": 6928 }, { "epoch": 0.46947625177857577, "grad_norm": 7.279699802398682, "learning_rate": 9.154220001369019e-05, "loss": 0.9697, "step": 6929 }, { "epoch": 0.46954400704654786, "grad_norm": 6.720832347869873, "learning_rate": 9.154083099459237e-05, "loss": 1.0377, "step": 6930 }, { "epoch": 0.46961176231451995, "grad_norm": 9.792084693908691, "learning_rate": 9.153946197549457e-05, "loss": 1.1697, "step": 6931 }, { "epoch": 0.46967951758249205, "grad_norm": 6.329649925231934, "learning_rate": 9.153809295639675e-05, "loss": 0.8825, "step": 6932 }, { "epoch": 0.46974727285046414, "grad_norm": 5.9268269538879395, "learning_rate": 9.153672393729893e-05, "loss": 0.9157, "step": 6933 }, { "epoch": 0.46981502811843623, "grad_norm": 7.7385430335998535, "learning_rate": 9.153535491820111e-05, "loss": 0.7662, "step": 6934 }, { "epoch": 0.4698827833864083, "grad_norm": 6.896132946014404, "learning_rate": 9.153398589910329e-05, "loss": 1.004, "step": 6935 }, { "epoch": 0.46995053865438036, "grad_norm": 6.822011470794678, "learning_rate": 9.153261688000548e-05, "loss": 1.0163, "step": 6936 }, { "epoch": 0.47001829392235245, "grad_norm": 6.4482574462890625, "learning_rate": 9.153124786090766e-05, "loss": 0.8293, "step": 6937 }, { "epoch": 0.47008604919032454, "grad_norm": 6.855703353881836, "learning_rate": 9.152987884180984e-05, "loss": 0.9737, "step": 6938 }, { "epoch": 0.47015380445829663, "grad_norm": 6.508902549743652, "learning_rate": 9.152850982271202e-05, "loss": 1.0892, "step": 6939 }, { "epoch": 0.4702215597262687, "grad_norm": 5.452862739562988, "learning_rate": 9.152714080361422e-05, "loss": 0.803, "step": 6940 }, { "epoch": 0.4702893149942408, "grad_norm": 5.583015441894531, "learning_rate": 9.15257717845164e-05, "loss": 0.6624, "step": 6941 }, { "epoch": 0.4703570702622129, "grad_norm": 7.870943069458008, "learning_rate": 9.152440276541858e-05, "loss": 0.929, "step": 6942 }, { "epoch": 0.470424825530185, "grad_norm": 7.723261833190918, "learning_rate": 9.152303374632076e-05, "loss": 0.8467, "step": 6943 }, { "epoch": 0.47049258079815703, "grad_norm": 8.4433012008667, "learning_rate": 9.152166472722294e-05, "loss": 0.9935, "step": 6944 }, { "epoch": 0.4705603360661291, "grad_norm": 7.142673015594482, "learning_rate": 9.152029570812513e-05, "loss": 0.9821, "step": 6945 }, { "epoch": 0.4706280913341012, "grad_norm": 6.800427436828613, "learning_rate": 9.151892668902731e-05, "loss": 0.7329, "step": 6946 }, { "epoch": 0.4706958466020733, "grad_norm": 6.462594985961914, "learning_rate": 9.151755766992949e-05, "loss": 0.6495, "step": 6947 }, { "epoch": 0.4707636018700454, "grad_norm": 6.2622294425964355, "learning_rate": 9.151618865083167e-05, "loss": 1.0438, "step": 6948 }, { "epoch": 0.4708313571380175, "grad_norm": 7.76660680770874, "learning_rate": 9.151481963173387e-05, "loss": 0.8855, "step": 6949 }, { "epoch": 0.4708991124059896, "grad_norm": 4.535599231719971, "learning_rate": 9.151345061263605e-05, "loss": 0.6823, "step": 6950 }, { "epoch": 0.4709668676739617, "grad_norm": 5.862152099609375, "learning_rate": 9.151208159353823e-05, "loss": 0.7322, "step": 6951 }, { "epoch": 0.4710346229419337, "grad_norm": 6.145107746124268, "learning_rate": 9.151071257444042e-05, "loss": 0.8258, "step": 6952 }, { "epoch": 0.4711023782099058, "grad_norm": 5.889834880828857, "learning_rate": 9.15093435553426e-05, "loss": 0.8009, "step": 6953 }, { "epoch": 0.4711701334778779, "grad_norm": 8.594234466552734, "learning_rate": 9.150797453624478e-05, "loss": 0.8349, "step": 6954 }, { "epoch": 0.47123788874585, "grad_norm": 7.7569379806518555, "learning_rate": 9.150660551714698e-05, "loss": 0.6711, "step": 6955 }, { "epoch": 0.4713056440138221, "grad_norm": 7.4088544845581055, "learning_rate": 9.150523649804916e-05, "loss": 0.8936, "step": 6956 }, { "epoch": 0.47137339928179417, "grad_norm": 6.610263347625732, "learning_rate": 9.150386747895134e-05, "loss": 0.9786, "step": 6957 }, { "epoch": 0.47144115454976626, "grad_norm": 6.46956729888916, "learning_rate": 9.150249845985352e-05, "loss": 0.6781, "step": 6958 }, { "epoch": 0.47150890981773835, "grad_norm": 6.152948379516602, "learning_rate": 9.150112944075571e-05, "loss": 0.8375, "step": 6959 }, { "epoch": 0.4715766650857104, "grad_norm": 6.96013879776001, "learning_rate": 9.149976042165789e-05, "loss": 0.8629, "step": 6960 }, { "epoch": 0.4716444203536825, "grad_norm": 6.767139434814453, "learning_rate": 9.149839140256007e-05, "loss": 0.8043, "step": 6961 }, { "epoch": 0.4717121756216546, "grad_norm": 7.95654296875, "learning_rate": 9.149702238346225e-05, "loss": 1.0179, "step": 6962 }, { "epoch": 0.47177993088962666, "grad_norm": 8.155875205993652, "learning_rate": 9.149565336436445e-05, "loss": 1.0663, "step": 6963 }, { "epoch": 0.47184768615759876, "grad_norm": 7.903263092041016, "learning_rate": 9.149428434526663e-05, "loss": 1.0214, "step": 6964 }, { "epoch": 0.47191544142557085, "grad_norm": 7.510760307312012, "learning_rate": 9.149291532616881e-05, "loss": 0.9942, "step": 6965 }, { "epoch": 0.47198319669354294, "grad_norm": 5.814423084259033, "learning_rate": 9.149154630707099e-05, "loss": 0.7677, "step": 6966 }, { "epoch": 0.47205095196151503, "grad_norm": 8.29617977142334, "learning_rate": 9.149017728797317e-05, "loss": 0.8268, "step": 6967 }, { "epoch": 0.47211870722948707, "grad_norm": 7.393543720245361, "learning_rate": 9.148880826887536e-05, "loss": 0.9067, "step": 6968 }, { "epoch": 0.47218646249745916, "grad_norm": 7.611250400543213, "learning_rate": 9.148743924977754e-05, "loss": 0.8908, "step": 6969 }, { "epoch": 0.47225421776543125, "grad_norm": 9.330535888671875, "learning_rate": 9.148607023067972e-05, "loss": 0.6368, "step": 6970 }, { "epoch": 0.47232197303340334, "grad_norm": 7.130900859832764, "learning_rate": 9.14847012115819e-05, "loss": 0.8157, "step": 6971 }, { "epoch": 0.47238972830137543, "grad_norm": 6.899352550506592, "learning_rate": 9.14833321924841e-05, "loss": 0.7058, "step": 6972 }, { "epoch": 0.4724574835693475, "grad_norm": 7.850022792816162, "learning_rate": 9.148196317338628e-05, "loss": 0.9622, "step": 6973 }, { "epoch": 0.4725252388373196, "grad_norm": 8.331214904785156, "learning_rate": 9.148059415428846e-05, "loss": 0.9366, "step": 6974 }, { "epoch": 0.4725929941052917, "grad_norm": 6.377279758453369, "learning_rate": 9.147922513519064e-05, "loss": 0.8701, "step": 6975 }, { "epoch": 0.47266074937326374, "grad_norm": 6.650668621063232, "learning_rate": 9.147785611609282e-05, "loss": 0.8066, "step": 6976 }, { "epoch": 0.47272850464123584, "grad_norm": 7.406231880187988, "learning_rate": 9.147648709699501e-05, "loss": 0.8833, "step": 6977 }, { "epoch": 0.4727962599092079, "grad_norm": 6.31017541885376, "learning_rate": 9.147511807789719e-05, "loss": 0.8001, "step": 6978 }, { "epoch": 0.47286401517718, "grad_norm": 6.4131927490234375, "learning_rate": 9.147374905879937e-05, "loss": 0.77, "step": 6979 }, { "epoch": 0.4729317704451521, "grad_norm": 9.2667236328125, "learning_rate": 9.147238003970155e-05, "loss": 0.9891, "step": 6980 }, { "epoch": 0.4729995257131242, "grad_norm": 7.107274055480957, "learning_rate": 9.147101102060373e-05, "loss": 1.0087, "step": 6981 }, { "epoch": 0.4730672809810963, "grad_norm": 8.102210998535156, "learning_rate": 9.146964200150593e-05, "loss": 0.7827, "step": 6982 }, { "epoch": 0.4731350362490684, "grad_norm": 6.7619099617004395, "learning_rate": 9.146827298240811e-05, "loss": 0.7432, "step": 6983 }, { "epoch": 0.4732027915170405, "grad_norm": 7.551081657409668, "learning_rate": 9.146690396331029e-05, "loss": 0.6776, "step": 6984 }, { "epoch": 0.4732705467850125, "grad_norm": 6.923361301422119, "learning_rate": 9.146553494421247e-05, "loss": 0.962, "step": 6985 }, { "epoch": 0.4733383020529846, "grad_norm": 6.971046447753906, "learning_rate": 9.146416592511466e-05, "loss": 0.9481, "step": 6986 }, { "epoch": 0.4734060573209567, "grad_norm": 7.046440601348877, "learning_rate": 9.146279690601684e-05, "loss": 1.0244, "step": 6987 }, { "epoch": 0.4734738125889288, "grad_norm": 6.281298637390137, "learning_rate": 9.146142788691902e-05, "loss": 0.8104, "step": 6988 }, { "epoch": 0.4735415678569009, "grad_norm": 10.22514820098877, "learning_rate": 9.14600588678212e-05, "loss": 1.1109, "step": 6989 }, { "epoch": 0.47360932312487297, "grad_norm": 6.466033458709717, "learning_rate": 9.145868984872338e-05, "loss": 0.9219, "step": 6990 }, { "epoch": 0.47367707839284506, "grad_norm": 7.837368011474609, "learning_rate": 9.145732082962558e-05, "loss": 1.0033, "step": 6991 }, { "epoch": 0.47374483366081716, "grad_norm": 7.501054286956787, "learning_rate": 9.145595181052776e-05, "loss": 0.919, "step": 6992 }, { "epoch": 0.4738125889287892, "grad_norm": 6.243696212768555, "learning_rate": 9.145458279142994e-05, "loss": 0.7736, "step": 6993 }, { "epoch": 0.4738803441967613, "grad_norm": 7.519147872924805, "learning_rate": 9.145321377233212e-05, "loss": 0.8014, "step": 6994 }, { "epoch": 0.4739480994647334, "grad_norm": 5.793912887573242, "learning_rate": 9.145184475323431e-05, "loss": 0.7508, "step": 6995 }, { "epoch": 0.47401585473270547, "grad_norm": 7.050177097320557, "learning_rate": 9.145047573413649e-05, "loss": 0.8174, "step": 6996 }, { "epoch": 0.47408361000067756, "grad_norm": 5.979698657989502, "learning_rate": 9.144910671503867e-05, "loss": 0.9812, "step": 6997 }, { "epoch": 0.47415136526864965, "grad_norm": 7.254084587097168, "learning_rate": 9.144773769594087e-05, "loss": 0.8015, "step": 6998 }, { "epoch": 0.47421912053662174, "grad_norm": 8.168086051940918, "learning_rate": 9.144636867684305e-05, "loss": 0.8107, "step": 6999 }, { "epoch": 0.47428687580459383, "grad_norm": 6.693539619445801, "learning_rate": 9.144499965774523e-05, "loss": 0.7048, "step": 7000 }, { "epoch": 0.47435463107256587, "grad_norm": 7.216420650482178, "learning_rate": 9.144363063864742e-05, "loss": 0.7434, "step": 7001 }, { "epoch": 0.47442238634053796, "grad_norm": 8.014084815979004, "learning_rate": 9.14422616195496e-05, "loss": 1.1213, "step": 7002 }, { "epoch": 0.47449014160851005, "grad_norm": 6.1906938552856445, "learning_rate": 9.144089260045178e-05, "loss": 0.8188, "step": 7003 }, { "epoch": 0.47455789687648214, "grad_norm": 8.70464038848877, "learning_rate": 9.143952358135398e-05, "loss": 1.0273, "step": 7004 }, { "epoch": 0.47462565214445424, "grad_norm": 5.303441524505615, "learning_rate": 9.143815456225616e-05, "loss": 0.6782, "step": 7005 }, { "epoch": 0.4746934074124263, "grad_norm": 8.176512718200684, "learning_rate": 9.143678554315834e-05, "loss": 0.7853, "step": 7006 }, { "epoch": 0.4747611626803984, "grad_norm": 6.074409008026123, "learning_rate": 9.143541652406052e-05, "loss": 0.9088, "step": 7007 }, { "epoch": 0.4748289179483705, "grad_norm": 7.32485294342041, "learning_rate": 9.14340475049627e-05, "loss": 1.0558, "step": 7008 }, { "epoch": 0.47489667321634255, "grad_norm": 5.4144463539123535, "learning_rate": 9.143267848586489e-05, "loss": 0.9064, "step": 7009 }, { "epoch": 0.47496442848431464, "grad_norm": 7.717291355133057, "learning_rate": 9.143130946676707e-05, "loss": 0.9129, "step": 7010 }, { "epoch": 0.47503218375228673, "grad_norm": 7.902177333831787, "learning_rate": 9.142994044766925e-05, "loss": 1.1144, "step": 7011 }, { "epoch": 0.4750999390202588, "grad_norm": 6.94300651550293, "learning_rate": 9.142857142857143e-05, "loss": 0.7982, "step": 7012 }, { "epoch": 0.4751676942882309, "grad_norm": 7.08957052230835, "learning_rate": 9.142720240947361e-05, "loss": 1.009, "step": 7013 }, { "epoch": 0.475235449556203, "grad_norm": 5.148087024688721, "learning_rate": 9.14258333903758e-05, "loss": 0.7443, "step": 7014 }, { "epoch": 0.4753032048241751, "grad_norm": 6.155871868133545, "learning_rate": 9.142446437127799e-05, "loss": 0.8135, "step": 7015 }, { "epoch": 0.4753709600921472, "grad_norm": 6.569172382354736, "learning_rate": 9.142309535218017e-05, "loss": 0.7684, "step": 7016 }, { "epoch": 0.4754387153601192, "grad_norm": 9.110980033874512, "learning_rate": 9.142172633308235e-05, "loss": 1.0382, "step": 7017 }, { "epoch": 0.4755064706280913, "grad_norm": 6.317762851715088, "learning_rate": 9.142035731398454e-05, "loss": 0.7206, "step": 7018 }, { "epoch": 0.4755742258960634, "grad_norm": 7.054732799530029, "learning_rate": 9.141898829488672e-05, "loss": 0.7442, "step": 7019 }, { "epoch": 0.4756419811640355, "grad_norm": 8.904619216918945, "learning_rate": 9.14176192757889e-05, "loss": 1.1686, "step": 7020 }, { "epoch": 0.4757097364320076, "grad_norm": 7.206265926361084, "learning_rate": 9.141625025669108e-05, "loss": 0.8675, "step": 7021 }, { "epoch": 0.4757774916999797, "grad_norm": 7.7408318519592285, "learning_rate": 9.141488123759326e-05, "loss": 0.9132, "step": 7022 }, { "epoch": 0.4758452469679518, "grad_norm": 5.807632923126221, "learning_rate": 9.141351221849546e-05, "loss": 0.8356, "step": 7023 }, { "epoch": 0.47591300223592387, "grad_norm": 8.07009506225586, "learning_rate": 9.141214319939764e-05, "loss": 1.1522, "step": 7024 }, { "epoch": 0.4759807575038959, "grad_norm": 7.409401893615723, "learning_rate": 9.141077418029982e-05, "loss": 0.7972, "step": 7025 }, { "epoch": 0.476048512771868, "grad_norm": 7.14201545715332, "learning_rate": 9.1409405161202e-05, "loss": 0.9284, "step": 7026 }, { "epoch": 0.4761162680398401, "grad_norm": 6.279862880706787, "learning_rate": 9.140803614210419e-05, "loss": 0.7995, "step": 7027 }, { "epoch": 0.4761840233078122, "grad_norm": 6.493180274963379, "learning_rate": 9.140666712300637e-05, "loss": 0.8848, "step": 7028 }, { "epoch": 0.47625177857578427, "grad_norm": 8.528377532958984, "learning_rate": 9.140529810390855e-05, "loss": 0.9142, "step": 7029 }, { "epoch": 0.47631953384375636, "grad_norm": 6.636556625366211, "learning_rate": 9.140392908481073e-05, "loss": 0.6841, "step": 7030 }, { "epoch": 0.47638728911172845, "grad_norm": 7.340085983276367, "learning_rate": 9.140256006571291e-05, "loss": 1.0198, "step": 7031 }, { "epoch": 0.47645504437970054, "grad_norm": 7.510453224182129, "learning_rate": 9.14011910466151e-05, "loss": 0.8581, "step": 7032 }, { "epoch": 0.4765227996476726, "grad_norm": 5.910886287689209, "learning_rate": 9.139982202751729e-05, "loss": 0.7947, "step": 7033 }, { "epoch": 0.47659055491564467, "grad_norm": 6.257016181945801, "learning_rate": 9.139845300841947e-05, "loss": 0.8759, "step": 7034 }, { "epoch": 0.47665831018361676, "grad_norm": 7.840462684631348, "learning_rate": 9.139708398932165e-05, "loss": 1.1975, "step": 7035 }, { "epoch": 0.47672606545158885, "grad_norm": 9.107316970825195, "learning_rate": 9.139571497022383e-05, "loss": 0.8321, "step": 7036 }, { "epoch": 0.47679382071956095, "grad_norm": 5.630661964416504, "learning_rate": 9.139434595112602e-05, "loss": 0.7302, "step": 7037 }, { "epoch": 0.47686157598753304, "grad_norm": 6.390323162078857, "learning_rate": 9.13929769320282e-05, "loss": 0.862, "step": 7038 }, { "epoch": 0.47692933125550513, "grad_norm": 8.85464096069336, "learning_rate": 9.139160791293038e-05, "loss": 0.9283, "step": 7039 }, { "epoch": 0.4769970865234772, "grad_norm": 6.312126159667969, "learning_rate": 9.139023889383256e-05, "loss": 0.933, "step": 7040 }, { "epoch": 0.4770648417914493, "grad_norm": 6.027670383453369, "learning_rate": 9.138886987473476e-05, "loss": 0.8221, "step": 7041 }, { "epoch": 0.47713259705942135, "grad_norm": 8.296350479125977, "learning_rate": 9.138750085563694e-05, "loss": 0.8621, "step": 7042 }, { "epoch": 0.47720035232739344, "grad_norm": 8.337299346923828, "learning_rate": 9.138613183653912e-05, "loss": 0.9126, "step": 7043 }, { "epoch": 0.47726810759536553, "grad_norm": 7.545529842376709, "learning_rate": 9.138476281744131e-05, "loss": 1.1327, "step": 7044 }, { "epoch": 0.4773358628633376, "grad_norm": 5.35225248336792, "learning_rate": 9.138339379834349e-05, "loss": 0.7311, "step": 7045 }, { "epoch": 0.4774036181313097, "grad_norm": 7.592402458190918, "learning_rate": 9.138202477924567e-05, "loss": 0.9959, "step": 7046 }, { "epoch": 0.4774713733992818, "grad_norm": 8.010600090026855, "learning_rate": 9.138065576014787e-05, "loss": 0.7788, "step": 7047 }, { "epoch": 0.4775391286672539, "grad_norm": 7.170941352844238, "learning_rate": 9.137928674105005e-05, "loss": 0.9218, "step": 7048 }, { "epoch": 0.477606883935226, "grad_norm": 6.852916240692139, "learning_rate": 9.137791772195223e-05, "loss": 0.7614, "step": 7049 }, { "epoch": 0.477674639203198, "grad_norm": 8.727351188659668, "learning_rate": 9.137654870285442e-05, "loss": 0.8449, "step": 7050 }, { "epoch": 0.4777423944711701, "grad_norm": 6.188234329223633, "learning_rate": 9.13751796837566e-05, "loss": 0.7001, "step": 7051 }, { "epoch": 0.4778101497391422, "grad_norm": 6.4075398445129395, "learning_rate": 9.137381066465878e-05, "loss": 0.9363, "step": 7052 }, { "epoch": 0.4778779050071143, "grad_norm": 7.744530200958252, "learning_rate": 9.137244164556096e-05, "loss": 0.9253, "step": 7053 }, { "epoch": 0.4779456602750864, "grad_norm": 6.745645999908447, "learning_rate": 9.137107262646314e-05, "loss": 0.9649, "step": 7054 }, { "epoch": 0.4780134155430585, "grad_norm": 8.560202598571777, "learning_rate": 9.136970360736534e-05, "loss": 1.2453, "step": 7055 }, { "epoch": 0.4780811708110306, "grad_norm": 6.941448211669922, "learning_rate": 9.136833458826752e-05, "loss": 0.8577, "step": 7056 }, { "epoch": 0.47814892607900267, "grad_norm": 6.4724626541137695, "learning_rate": 9.13669655691697e-05, "loss": 0.9304, "step": 7057 }, { "epoch": 0.4782166813469747, "grad_norm": 6.535644054412842, "learning_rate": 9.136559655007188e-05, "loss": 0.8323, "step": 7058 }, { "epoch": 0.4782844366149468, "grad_norm": 6.4093217849731445, "learning_rate": 9.136422753097407e-05, "loss": 0.9816, "step": 7059 }, { "epoch": 0.4783521918829189, "grad_norm": 6.645406246185303, "learning_rate": 9.136285851187625e-05, "loss": 0.9507, "step": 7060 }, { "epoch": 0.478419947150891, "grad_norm": 8.383099555969238, "learning_rate": 9.136148949277843e-05, "loss": 1.0339, "step": 7061 }, { "epoch": 0.47848770241886307, "grad_norm": 6.430543422698975, "learning_rate": 9.136012047368061e-05, "loss": 0.755, "step": 7062 }, { "epoch": 0.47855545768683516, "grad_norm": 6.5899176597595215, "learning_rate": 9.135875145458279e-05, "loss": 1.0841, "step": 7063 }, { "epoch": 0.47862321295480725, "grad_norm": 9.340789794921875, "learning_rate": 9.135738243548499e-05, "loss": 0.7539, "step": 7064 }, { "epoch": 0.47869096822277934, "grad_norm": 7.300266742706299, "learning_rate": 9.135601341638717e-05, "loss": 0.9049, "step": 7065 }, { "epoch": 0.4787587234907514, "grad_norm": 6.495701313018799, "learning_rate": 9.135464439728935e-05, "loss": 0.9168, "step": 7066 }, { "epoch": 0.4788264787587235, "grad_norm": 8.14792537689209, "learning_rate": 9.135327537819153e-05, "loss": 0.7678, "step": 7067 }, { "epoch": 0.47889423402669556, "grad_norm": 5.638490200042725, "learning_rate": 9.135190635909371e-05, "loss": 0.8914, "step": 7068 }, { "epoch": 0.47896198929466766, "grad_norm": 7.043061256408691, "learning_rate": 9.13505373399959e-05, "loss": 0.9575, "step": 7069 }, { "epoch": 0.47902974456263975, "grad_norm": 7.867429256439209, "learning_rate": 9.134916832089808e-05, "loss": 0.9887, "step": 7070 }, { "epoch": 0.47909749983061184, "grad_norm": 5.737957000732422, "learning_rate": 9.134779930180026e-05, "loss": 0.8605, "step": 7071 }, { "epoch": 0.47916525509858393, "grad_norm": 9.436585426330566, "learning_rate": 9.134643028270244e-05, "loss": 1.0482, "step": 7072 }, { "epoch": 0.479233010366556, "grad_norm": 6.28998327255249, "learning_rate": 9.134506126360464e-05, "loss": 1.0235, "step": 7073 }, { "epoch": 0.47930076563452806, "grad_norm": 5.121227741241455, "learning_rate": 9.134369224450682e-05, "loss": 0.8735, "step": 7074 }, { "epoch": 0.47936852090250015, "grad_norm": 5.818413734436035, "learning_rate": 9.1342323225409e-05, "loss": 0.738, "step": 7075 }, { "epoch": 0.47943627617047224, "grad_norm": 6.716359615325928, "learning_rate": 9.134095420631118e-05, "loss": 0.8825, "step": 7076 }, { "epoch": 0.47950403143844433, "grad_norm": 5.944983959197998, "learning_rate": 9.133958518721336e-05, "loss": 0.9145, "step": 7077 }, { "epoch": 0.4795717867064164, "grad_norm": 6.65972375869751, "learning_rate": 9.133821616811555e-05, "loss": 0.7746, "step": 7078 }, { "epoch": 0.4796395419743885, "grad_norm": 7.595485210418701, "learning_rate": 9.133684714901773e-05, "loss": 0.817, "step": 7079 }, { "epoch": 0.4797072972423606, "grad_norm": 5.7008280754089355, "learning_rate": 9.133547812991991e-05, "loss": 0.7996, "step": 7080 }, { "epoch": 0.4797750525103327, "grad_norm": 6.7002668380737305, "learning_rate": 9.133410911082209e-05, "loss": 1.0676, "step": 7081 }, { "epoch": 0.47984280777830474, "grad_norm": 8.43227481842041, "learning_rate": 9.133274009172429e-05, "loss": 0.9844, "step": 7082 }, { "epoch": 0.47991056304627683, "grad_norm": 5.9677653312683105, "learning_rate": 9.133137107262647e-05, "loss": 0.9158, "step": 7083 }, { "epoch": 0.4799783183142489, "grad_norm": 7.774659156799316, "learning_rate": 9.133000205352865e-05, "loss": 0.6829, "step": 7084 }, { "epoch": 0.480046073582221, "grad_norm": 7.344854354858398, "learning_rate": 9.132863303443083e-05, "loss": 0.8579, "step": 7085 }, { "epoch": 0.4801138288501931, "grad_norm": 6.4308624267578125, "learning_rate": 9.132726401533301e-05, "loss": 0.936, "step": 7086 }, { "epoch": 0.4801815841181652, "grad_norm": 7.600352764129639, "learning_rate": 9.13258949962352e-05, "loss": 0.8561, "step": 7087 }, { "epoch": 0.4802493393861373, "grad_norm": 9.475528717041016, "learning_rate": 9.132452597713738e-05, "loss": 0.9684, "step": 7088 }, { "epoch": 0.4803170946541094, "grad_norm": 5.927639007568359, "learning_rate": 9.132315695803956e-05, "loss": 0.7472, "step": 7089 }, { "epoch": 0.48038484992208147, "grad_norm": 7.304515838623047, "learning_rate": 9.132178793894176e-05, "loss": 0.8983, "step": 7090 }, { "epoch": 0.4804526051900535, "grad_norm": 7.6303391456604, "learning_rate": 9.132041891984394e-05, "loss": 0.864, "step": 7091 }, { "epoch": 0.4805203604580256, "grad_norm": 6.243314266204834, "learning_rate": 9.131904990074612e-05, "loss": 0.8448, "step": 7092 }, { "epoch": 0.4805881157259977, "grad_norm": 6.947970867156982, "learning_rate": 9.131768088164831e-05, "loss": 0.7853, "step": 7093 }, { "epoch": 0.4806558709939698, "grad_norm": 5.687379837036133, "learning_rate": 9.131631186255049e-05, "loss": 0.838, "step": 7094 }, { "epoch": 0.48072362626194187, "grad_norm": 6.477471351623535, "learning_rate": 9.131494284345267e-05, "loss": 0.9849, "step": 7095 }, { "epoch": 0.48079138152991396, "grad_norm": 6.526174068450928, "learning_rate": 9.131357382435487e-05, "loss": 0.9269, "step": 7096 }, { "epoch": 0.48085913679788606, "grad_norm": 7.404792308807373, "learning_rate": 9.131220480525705e-05, "loss": 0.8257, "step": 7097 }, { "epoch": 0.48092689206585815, "grad_norm": 7.290988445281982, "learning_rate": 9.131083578615923e-05, "loss": 1.0275, "step": 7098 }, { "epoch": 0.4809946473338302, "grad_norm": 7.744154930114746, "learning_rate": 9.13094667670614e-05, "loss": 0.9692, "step": 7099 }, { "epoch": 0.4810624026018023, "grad_norm": 5.7665252685546875, "learning_rate": 9.130809774796359e-05, "loss": 0.6751, "step": 7100 }, { "epoch": 0.48113015786977437, "grad_norm": 6.921631813049316, "learning_rate": 9.130672872886578e-05, "loss": 0.8771, "step": 7101 }, { "epoch": 0.48119791313774646, "grad_norm": 6.097098350524902, "learning_rate": 9.130535970976796e-05, "loss": 0.737, "step": 7102 }, { "epoch": 0.48126566840571855, "grad_norm": 7.192615032196045, "learning_rate": 9.130399069067014e-05, "loss": 0.9283, "step": 7103 }, { "epoch": 0.48133342367369064, "grad_norm": 6.464249610900879, "learning_rate": 9.130262167157232e-05, "loss": 1.0467, "step": 7104 }, { "epoch": 0.48140117894166273, "grad_norm": 4.616031646728516, "learning_rate": 9.130125265247452e-05, "loss": 0.5657, "step": 7105 }, { "epoch": 0.4814689342096348, "grad_norm": 6.356307029724121, "learning_rate": 9.12998836333767e-05, "loss": 0.722, "step": 7106 }, { "epoch": 0.48153668947760686, "grad_norm": 6.398674011230469, "learning_rate": 9.129851461427888e-05, "loss": 1.066, "step": 7107 }, { "epoch": 0.48160444474557895, "grad_norm": 7.015667915344238, "learning_rate": 9.129714559518106e-05, "loss": 0.8681, "step": 7108 }, { "epoch": 0.48167220001355104, "grad_norm": 8.212510108947754, "learning_rate": 9.129577657608324e-05, "loss": 0.8158, "step": 7109 }, { "epoch": 0.48173995528152314, "grad_norm": 8.409659385681152, "learning_rate": 9.129440755698543e-05, "loss": 0.7391, "step": 7110 }, { "epoch": 0.4818077105494952, "grad_norm": 7.42771053314209, "learning_rate": 9.129303853788761e-05, "loss": 0.9876, "step": 7111 }, { "epoch": 0.4818754658174673, "grad_norm": 7.816039562225342, "learning_rate": 9.129166951878979e-05, "loss": 0.9601, "step": 7112 }, { "epoch": 0.4819432210854394, "grad_norm": 8.761859893798828, "learning_rate": 9.129030049969197e-05, "loss": 0.6283, "step": 7113 }, { "epoch": 0.4820109763534115, "grad_norm": 6.981362342834473, "learning_rate": 9.128893148059415e-05, "loss": 0.9901, "step": 7114 }, { "epoch": 0.48207873162138354, "grad_norm": 7.718024253845215, "learning_rate": 9.128756246149635e-05, "loss": 0.8318, "step": 7115 }, { "epoch": 0.48214648688935563, "grad_norm": 8.191770553588867, "learning_rate": 9.128619344239853e-05, "loss": 0.9108, "step": 7116 }, { "epoch": 0.4822142421573277, "grad_norm": 8.095576286315918, "learning_rate": 9.12848244233007e-05, "loss": 0.8404, "step": 7117 }, { "epoch": 0.4822819974252998, "grad_norm": 6.66763162612915, "learning_rate": 9.128345540420289e-05, "loss": 0.6538, "step": 7118 }, { "epoch": 0.4823497526932719, "grad_norm": 7.253853797912598, "learning_rate": 9.128208638510508e-05, "loss": 0.8893, "step": 7119 }, { "epoch": 0.482417507961244, "grad_norm": 8.162705421447754, "learning_rate": 9.128071736600726e-05, "loss": 0.9602, "step": 7120 }, { "epoch": 0.4824852632292161, "grad_norm": 7.548867225646973, "learning_rate": 9.127934834690944e-05, "loss": 0.8701, "step": 7121 }, { "epoch": 0.4825530184971882, "grad_norm": 5.984834671020508, "learning_rate": 9.127797932781162e-05, "loss": 1.0416, "step": 7122 }, { "epoch": 0.4826207737651602, "grad_norm": 8.716156005859375, "learning_rate": 9.12766103087138e-05, "loss": 1.016, "step": 7123 }, { "epoch": 0.4826885290331323, "grad_norm": 6.2188873291015625, "learning_rate": 9.1275241289616e-05, "loss": 0.7147, "step": 7124 }, { "epoch": 0.4827562843011044, "grad_norm": 10.026150703430176, "learning_rate": 9.127387227051818e-05, "loss": 0.9221, "step": 7125 }, { "epoch": 0.4828240395690765, "grad_norm": 7.320329666137695, "learning_rate": 9.127250325142036e-05, "loss": 0.8486, "step": 7126 }, { "epoch": 0.4828917948370486, "grad_norm": 6.134348392486572, "learning_rate": 9.127113423232254e-05, "loss": 0.7383, "step": 7127 }, { "epoch": 0.4829595501050207, "grad_norm": 7.2880778312683105, "learning_rate": 9.126976521322473e-05, "loss": 1.059, "step": 7128 }, { "epoch": 0.48302730537299277, "grad_norm": 6.100679874420166, "learning_rate": 9.126839619412691e-05, "loss": 0.8673, "step": 7129 }, { "epoch": 0.48309506064096486, "grad_norm": 6.899023056030273, "learning_rate": 9.126702717502909e-05, "loss": 1.1572, "step": 7130 }, { "epoch": 0.4831628159089369, "grad_norm": 6.187694549560547, "learning_rate": 9.126565815593127e-05, "loss": 0.9584, "step": 7131 }, { "epoch": 0.483230571176909, "grad_norm": 7.093903064727783, "learning_rate": 9.126428913683345e-05, "loss": 0.969, "step": 7132 }, { "epoch": 0.4832983264448811, "grad_norm": 6.003389835357666, "learning_rate": 9.126292011773565e-05, "loss": 0.7638, "step": 7133 }, { "epoch": 0.48336608171285317, "grad_norm": 5.557130336761475, "learning_rate": 9.126155109863783e-05, "loss": 0.8529, "step": 7134 }, { "epoch": 0.48343383698082526, "grad_norm": 6.029399871826172, "learning_rate": 9.126018207954001e-05, "loss": 0.6359, "step": 7135 }, { "epoch": 0.48350159224879735, "grad_norm": 6.123723030090332, "learning_rate": 9.12588130604422e-05, "loss": 0.9462, "step": 7136 }, { "epoch": 0.48356934751676944, "grad_norm": 6.213245868682861, "learning_rate": 9.125744404134438e-05, "loss": 0.8322, "step": 7137 }, { "epoch": 0.48363710278474153, "grad_norm": 7.509876251220703, "learning_rate": 9.125607502224656e-05, "loss": 1.0556, "step": 7138 }, { "epoch": 0.48370485805271357, "grad_norm": 6.355532646179199, "learning_rate": 9.125470600314876e-05, "loss": 0.9281, "step": 7139 }, { "epoch": 0.48377261332068566, "grad_norm": 7.302781105041504, "learning_rate": 9.125333698405094e-05, "loss": 0.846, "step": 7140 }, { "epoch": 0.48384036858865775, "grad_norm": 7.081716537475586, "learning_rate": 9.125196796495312e-05, "loss": 0.8571, "step": 7141 }, { "epoch": 0.48390812385662985, "grad_norm": 7.652805328369141, "learning_rate": 9.125059894585531e-05, "loss": 0.7986, "step": 7142 }, { "epoch": 0.48397587912460194, "grad_norm": 5.906263828277588, "learning_rate": 9.124922992675749e-05, "loss": 0.8058, "step": 7143 }, { "epoch": 0.48404363439257403, "grad_norm": 7.398087024688721, "learning_rate": 9.124786090765967e-05, "loss": 0.8961, "step": 7144 }, { "epoch": 0.4841113896605461, "grad_norm": 9.616337776184082, "learning_rate": 9.124649188856185e-05, "loss": 0.9403, "step": 7145 }, { "epoch": 0.4841791449285182, "grad_norm": 7.74692440032959, "learning_rate": 9.124512286946403e-05, "loss": 0.7659, "step": 7146 }, { "epoch": 0.4842469001964903, "grad_norm": 6.067378520965576, "learning_rate": 9.124375385036623e-05, "loss": 0.8094, "step": 7147 }, { "epoch": 0.48431465546446234, "grad_norm": 7.943274974822998, "learning_rate": 9.12423848312684e-05, "loss": 0.8103, "step": 7148 }, { "epoch": 0.48438241073243443, "grad_norm": 7.710971355438232, "learning_rate": 9.124101581217059e-05, "loss": 1.0818, "step": 7149 }, { "epoch": 0.4844501660004065, "grad_norm": 6.904791831970215, "learning_rate": 9.123964679307277e-05, "loss": 0.9413, "step": 7150 }, { "epoch": 0.4845179212683786, "grad_norm": 8.46650505065918, "learning_rate": 9.123827777397496e-05, "loss": 1.1661, "step": 7151 }, { "epoch": 0.4845856765363507, "grad_norm": 7.021801471710205, "learning_rate": 9.123690875487714e-05, "loss": 0.7305, "step": 7152 }, { "epoch": 0.4846534318043228, "grad_norm": 7.553677082061768, "learning_rate": 9.123553973577932e-05, "loss": 0.9806, "step": 7153 }, { "epoch": 0.4847211870722949, "grad_norm": 6.204870700836182, "learning_rate": 9.12341707166815e-05, "loss": 0.8433, "step": 7154 }, { "epoch": 0.484788942340267, "grad_norm": 7.81880521774292, "learning_rate": 9.123280169758368e-05, "loss": 1.112, "step": 7155 }, { "epoch": 0.484856697608239, "grad_norm": 7.029433727264404, "learning_rate": 9.123143267848588e-05, "loss": 0.8068, "step": 7156 }, { "epoch": 0.4849244528762111, "grad_norm": 6.795009136199951, "learning_rate": 9.123006365938806e-05, "loss": 0.766, "step": 7157 }, { "epoch": 0.4849922081441832, "grad_norm": 5.924415588378906, "learning_rate": 9.122869464029024e-05, "loss": 0.7914, "step": 7158 }, { "epoch": 0.4850599634121553, "grad_norm": 5.561947345733643, "learning_rate": 9.122732562119242e-05, "loss": 1.1486, "step": 7159 }, { "epoch": 0.4851277186801274, "grad_norm": 7.202826976776123, "learning_rate": 9.122595660209461e-05, "loss": 0.8802, "step": 7160 }, { "epoch": 0.4851954739480995, "grad_norm": 6.435755729675293, "learning_rate": 9.122458758299679e-05, "loss": 0.888, "step": 7161 }, { "epoch": 0.48526322921607157, "grad_norm": 6.197578430175781, "learning_rate": 9.122321856389897e-05, "loss": 0.9139, "step": 7162 }, { "epoch": 0.48533098448404366, "grad_norm": 6.596435070037842, "learning_rate": 9.122184954480115e-05, "loss": 0.7802, "step": 7163 }, { "epoch": 0.4853987397520157, "grad_norm": 7.858447551727295, "learning_rate": 9.122048052570333e-05, "loss": 0.8759, "step": 7164 }, { "epoch": 0.4854664950199878, "grad_norm": 6.002086162567139, "learning_rate": 9.121911150660553e-05, "loss": 0.9621, "step": 7165 }, { "epoch": 0.4855342502879599, "grad_norm": 5.917041301727295, "learning_rate": 9.12177424875077e-05, "loss": 0.8144, "step": 7166 }, { "epoch": 0.48560200555593197, "grad_norm": 6.202271461486816, "learning_rate": 9.121637346840989e-05, "loss": 0.9937, "step": 7167 }, { "epoch": 0.48566976082390406, "grad_norm": 6.635425567626953, "learning_rate": 9.121500444931207e-05, "loss": 0.8103, "step": 7168 }, { "epoch": 0.48573751609187615, "grad_norm": 7.288759708404541, "learning_rate": 9.121363543021425e-05, "loss": 0.8354, "step": 7169 }, { "epoch": 0.48580527135984825, "grad_norm": 6.015995979309082, "learning_rate": 9.121226641111644e-05, "loss": 1.0732, "step": 7170 }, { "epoch": 0.48587302662782034, "grad_norm": 6.693684101104736, "learning_rate": 9.121089739201862e-05, "loss": 0.9792, "step": 7171 }, { "epoch": 0.4859407818957924, "grad_norm": 7.2573561668396, "learning_rate": 9.12095283729208e-05, "loss": 0.8823, "step": 7172 }, { "epoch": 0.48600853716376446, "grad_norm": 6.1881585121154785, "learning_rate": 9.120815935382298e-05, "loss": 0.8397, "step": 7173 }, { "epoch": 0.48607629243173656, "grad_norm": 7.354151725769043, "learning_rate": 9.120679033472518e-05, "loss": 1.0145, "step": 7174 }, { "epoch": 0.48614404769970865, "grad_norm": 6.806859016418457, "learning_rate": 9.120542131562736e-05, "loss": 0.9289, "step": 7175 }, { "epoch": 0.48621180296768074, "grad_norm": 6.098382949829102, "learning_rate": 9.120405229652954e-05, "loss": 0.9819, "step": 7176 }, { "epoch": 0.48627955823565283, "grad_norm": 6.944835186004639, "learning_rate": 9.120268327743172e-05, "loss": 0.8004, "step": 7177 }, { "epoch": 0.4863473135036249, "grad_norm": 7.9429497718811035, "learning_rate": 9.12013142583339e-05, "loss": 0.9473, "step": 7178 }, { "epoch": 0.486415068771597, "grad_norm": 6.831770420074463, "learning_rate": 9.119994523923609e-05, "loss": 0.8866, "step": 7179 }, { "epoch": 0.48648282403956905, "grad_norm": 7.992087364196777, "learning_rate": 9.119857622013827e-05, "loss": 0.6405, "step": 7180 }, { "epoch": 0.48655057930754114, "grad_norm": 7.081966400146484, "learning_rate": 9.119720720104045e-05, "loss": 0.9166, "step": 7181 }, { "epoch": 0.48661833457551323, "grad_norm": 8.257608413696289, "learning_rate": 9.119583818194263e-05, "loss": 0.7556, "step": 7182 }, { "epoch": 0.4866860898434853, "grad_norm": 6.714028835296631, "learning_rate": 9.119446916284483e-05, "loss": 0.784, "step": 7183 }, { "epoch": 0.4867538451114574, "grad_norm": 5.71464204788208, "learning_rate": 9.1193100143747e-05, "loss": 0.815, "step": 7184 }, { "epoch": 0.4868216003794295, "grad_norm": 8.290979385375977, "learning_rate": 9.119173112464919e-05, "loss": 0.822, "step": 7185 }, { "epoch": 0.4868893556474016, "grad_norm": 6.116361141204834, "learning_rate": 9.119036210555138e-05, "loss": 0.7727, "step": 7186 }, { "epoch": 0.4869571109153737, "grad_norm": 5.137567043304443, "learning_rate": 9.118899308645356e-05, "loss": 0.4916, "step": 7187 }, { "epoch": 0.48702486618334573, "grad_norm": 6.987879753112793, "learning_rate": 9.118762406735574e-05, "loss": 1.0662, "step": 7188 }, { "epoch": 0.4870926214513178, "grad_norm": 7.223506927490234, "learning_rate": 9.118625504825794e-05, "loss": 0.7408, "step": 7189 }, { "epoch": 0.4871603767192899, "grad_norm": 7.493766784667969, "learning_rate": 9.118488602916012e-05, "loss": 0.9625, "step": 7190 }, { "epoch": 0.487228131987262, "grad_norm": 7.265352725982666, "learning_rate": 9.11835170100623e-05, "loss": 0.9228, "step": 7191 }, { "epoch": 0.4872958872552341, "grad_norm": 6.04194450378418, "learning_rate": 9.118214799096449e-05, "loss": 0.8081, "step": 7192 }, { "epoch": 0.4873636425232062, "grad_norm": 7.033185958862305, "learning_rate": 9.118077897186667e-05, "loss": 1.0709, "step": 7193 }, { "epoch": 0.4874313977911783, "grad_norm": 5.993305683135986, "learning_rate": 9.117940995276885e-05, "loss": 0.8222, "step": 7194 }, { "epoch": 0.48749915305915037, "grad_norm": 6.695589542388916, "learning_rate": 9.117804093367103e-05, "loss": 0.7981, "step": 7195 }, { "epoch": 0.48756690832712246, "grad_norm": 6.6938157081604, "learning_rate": 9.117667191457321e-05, "loss": 0.8485, "step": 7196 }, { "epoch": 0.4876346635950945, "grad_norm": 6.760074138641357, "learning_rate": 9.11753028954754e-05, "loss": 0.6706, "step": 7197 }, { "epoch": 0.4877024188630666, "grad_norm": 6.397393703460693, "learning_rate": 9.117393387637759e-05, "loss": 0.8247, "step": 7198 }, { "epoch": 0.4877701741310387, "grad_norm": 7.138194561004639, "learning_rate": 9.117256485727977e-05, "loss": 1.0124, "step": 7199 }, { "epoch": 0.4878379293990108, "grad_norm": 6.4646172523498535, "learning_rate": 9.117119583818195e-05, "loss": 1.0531, "step": 7200 }, { "epoch": 0.48790568466698286, "grad_norm": 6.58534574508667, "learning_rate": 9.116982681908413e-05, "loss": 0.962, "step": 7201 }, { "epoch": 0.48797343993495496, "grad_norm": 8.237184524536133, "learning_rate": 9.116845779998632e-05, "loss": 1.0363, "step": 7202 }, { "epoch": 0.48804119520292705, "grad_norm": 9.047645568847656, "learning_rate": 9.11670887808885e-05, "loss": 0.8925, "step": 7203 }, { "epoch": 0.48810895047089914, "grad_norm": 5.87882137298584, "learning_rate": 9.116571976179068e-05, "loss": 0.7843, "step": 7204 }, { "epoch": 0.4881767057388712, "grad_norm": 7.792137622833252, "learning_rate": 9.116435074269286e-05, "loss": 0.6999, "step": 7205 }, { "epoch": 0.48824446100684327, "grad_norm": 6.028510570526123, "learning_rate": 9.116298172359506e-05, "loss": 0.9233, "step": 7206 }, { "epoch": 0.48831221627481536, "grad_norm": 6.219117164611816, "learning_rate": 9.116161270449724e-05, "loss": 0.5807, "step": 7207 }, { "epoch": 0.48837997154278745, "grad_norm": 5.99801778793335, "learning_rate": 9.116024368539942e-05, "loss": 0.9267, "step": 7208 }, { "epoch": 0.48844772681075954, "grad_norm": 7.163285255432129, "learning_rate": 9.11588746663016e-05, "loss": 0.8695, "step": 7209 }, { "epoch": 0.48851548207873163, "grad_norm": 5.209384441375732, "learning_rate": 9.115750564720378e-05, "loss": 0.8395, "step": 7210 }, { "epoch": 0.4885832373467037, "grad_norm": 5.942741394042969, "learning_rate": 9.115613662810597e-05, "loss": 0.8593, "step": 7211 }, { "epoch": 0.4886509926146758, "grad_norm": 6.742303848266602, "learning_rate": 9.115476760900815e-05, "loss": 0.7964, "step": 7212 }, { "epoch": 0.48871874788264785, "grad_norm": 6.951894283294678, "learning_rate": 9.115339858991033e-05, "loss": 0.9285, "step": 7213 }, { "epoch": 0.48878650315061994, "grad_norm": 5.752067565917969, "learning_rate": 9.115202957081251e-05, "loss": 0.6053, "step": 7214 }, { "epoch": 0.48885425841859204, "grad_norm": 6.7097883224487305, "learning_rate": 9.11506605517147e-05, "loss": 0.82, "step": 7215 }, { "epoch": 0.4889220136865641, "grad_norm": 6.396644592285156, "learning_rate": 9.114929153261689e-05, "loss": 0.9438, "step": 7216 }, { "epoch": 0.4889897689545362, "grad_norm": 6.378931999206543, "learning_rate": 9.114792251351907e-05, "loss": 0.9635, "step": 7217 }, { "epoch": 0.4890575242225083, "grad_norm": 5.802820682525635, "learning_rate": 9.114655349442125e-05, "loss": 0.7166, "step": 7218 }, { "epoch": 0.4891252794904804, "grad_norm": 6.493535041809082, "learning_rate": 9.114518447532343e-05, "loss": 0.985, "step": 7219 }, { "epoch": 0.4891930347584525, "grad_norm": 7.591537952423096, "learning_rate": 9.114381545622562e-05, "loss": 0.8348, "step": 7220 }, { "epoch": 0.48926079002642453, "grad_norm": 6.379971027374268, "learning_rate": 9.11424464371278e-05, "loss": 0.9761, "step": 7221 }, { "epoch": 0.4893285452943966, "grad_norm": 9.187169075012207, "learning_rate": 9.114107741802998e-05, "loss": 1.1654, "step": 7222 }, { "epoch": 0.4893963005623687, "grad_norm": 6.582739353179932, "learning_rate": 9.113970839893216e-05, "loss": 0.8921, "step": 7223 }, { "epoch": 0.4894640558303408, "grad_norm": 5.6152544021606445, "learning_rate": 9.113833937983434e-05, "loss": 0.7498, "step": 7224 }, { "epoch": 0.4895318110983129, "grad_norm": 6.960738182067871, "learning_rate": 9.113697036073654e-05, "loss": 0.7443, "step": 7225 }, { "epoch": 0.489599566366285, "grad_norm": 6.515749454498291, "learning_rate": 9.113560134163872e-05, "loss": 0.903, "step": 7226 }, { "epoch": 0.4896673216342571, "grad_norm": 7.327613830566406, "learning_rate": 9.11342323225409e-05, "loss": 0.8484, "step": 7227 }, { "epoch": 0.48973507690222917, "grad_norm": 6.575616359710693, "learning_rate": 9.113286330344308e-05, "loss": 1.001, "step": 7228 }, { "epoch": 0.4898028321702012, "grad_norm": 6.429412364959717, "learning_rate": 9.113149428434527e-05, "loss": 0.8028, "step": 7229 }, { "epoch": 0.4898705874381733, "grad_norm": 6.13348388671875, "learning_rate": 9.113012526524745e-05, "loss": 0.8966, "step": 7230 }, { "epoch": 0.4899383427061454, "grad_norm": 5.246626377105713, "learning_rate": 9.112875624614963e-05, "loss": 0.7548, "step": 7231 }, { "epoch": 0.4900060979741175, "grad_norm": 8.197700500488281, "learning_rate": 9.112738722705183e-05, "loss": 0.9122, "step": 7232 }, { "epoch": 0.4900738532420896, "grad_norm": 7.987671375274658, "learning_rate": 9.1126018207954e-05, "loss": 1.0089, "step": 7233 }, { "epoch": 0.49014160851006167, "grad_norm": 6.8674211502075195, "learning_rate": 9.112464918885619e-05, "loss": 0.7244, "step": 7234 }, { "epoch": 0.49020936377803376, "grad_norm": 9.719727516174316, "learning_rate": 9.112328016975838e-05, "loss": 0.9079, "step": 7235 }, { "epoch": 0.49027711904600585, "grad_norm": 6.982578277587891, "learning_rate": 9.112191115066056e-05, "loss": 0.7876, "step": 7236 }, { "epoch": 0.4903448743139779, "grad_norm": 5.029915809631348, "learning_rate": 9.112054213156274e-05, "loss": 0.7574, "step": 7237 }, { "epoch": 0.49041262958195, "grad_norm": 8.605433464050293, "learning_rate": 9.111917311246493e-05, "loss": 0.9255, "step": 7238 }, { "epoch": 0.49048038484992207, "grad_norm": 7.910008907318115, "learning_rate": 9.111780409336711e-05, "loss": 0.9514, "step": 7239 }, { "epoch": 0.49054814011789416, "grad_norm": 6.401332855224609, "learning_rate": 9.11164350742693e-05, "loss": 0.9243, "step": 7240 }, { "epoch": 0.49061589538586625, "grad_norm": 6.722992420196533, "learning_rate": 9.111506605517148e-05, "loss": 0.7595, "step": 7241 }, { "epoch": 0.49068365065383834, "grad_norm": 6.60951566696167, "learning_rate": 9.111369703607366e-05, "loss": 0.9381, "step": 7242 }, { "epoch": 0.49075140592181044, "grad_norm": 7.443787574768066, "learning_rate": 9.111232801697585e-05, "loss": 1.0037, "step": 7243 }, { "epoch": 0.4908191611897825, "grad_norm": 7.43011999130249, "learning_rate": 9.111095899787803e-05, "loss": 0.8213, "step": 7244 }, { "epoch": 0.49088691645775456, "grad_norm": 8.865147590637207, "learning_rate": 9.110958997878021e-05, "loss": 0.9547, "step": 7245 }, { "epoch": 0.49095467172572665, "grad_norm": 5.83010721206665, "learning_rate": 9.110822095968239e-05, "loss": 0.7253, "step": 7246 }, { "epoch": 0.49102242699369875, "grad_norm": 6.1638946533203125, "learning_rate": 9.110685194058458e-05, "loss": 0.7643, "step": 7247 }, { "epoch": 0.49109018226167084, "grad_norm": 6.534294128417969, "learning_rate": 9.110548292148677e-05, "loss": 0.9102, "step": 7248 }, { "epoch": 0.49115793752964293, "grad_norm": 8.244972229003906, "learning_rate": 9.110411390238895e-05, "loss": 0.9615, "step": 7249 }, { "epoch": 0.491225692797615, "grad_norm": 7.7991461753845215, "learning_rate": 9.110274488329113e-05, "loss": 0.9697, "step": 7250 }, { "epoch": 0.4912934480655871, "grad_norm": 7.489588737487793, "learning_rate": 9.11013758641933e-05, "loss": 0.7339, "step": 7251 }, { "epoch": 0.4913612033335592, "grad_norm": 6.112767696380615, "learning_rate": 9.11000068450955e-05, "loss": 1.1147, "step": 7252 }, { "epoch": 0.4914289586015313, "grad_norm": 6.319901943206787, "learning_rate": 9.109863782599768e-05, "loss": 0.9777, "step": 7253 }, { "epoch": 0.49149671386950333, "grad_norm": 7.175682544708252, "learning_rate": 9.109726880689986e-05, "loss": 0.7403, "step": 7254 }, { "epoch": 0.4915644691374754, "grad_norm": 7.254464149475098, "learning_rate": 9.109589978780204e-05, "loss": 0.8533, "step": 7255 }, { "epoch": 0.4916322244054475, "grad_norm": 6.266726493835449, "learning_rate": 9.109453076870422e-05, "loss": 0.8648, "step": 7256 }, { "epoch": 0.4916999796734196, "grad_norm": 7.290742874145508, "learning_rate": 9.109316174960642e-05, "loss": 0.7274, "step": 7257 }, { "epoch": 0.4917677349413917, "grad_norm": 6.882185459136963, "learning_rate": 9.10917927305086e-05, "loss": 0.9184, "step": 7258 }, { "epoch": 0.4918354902093638, "grad_norm": 6.796192169189453, "learning_rate": 9.109042371141078e-05, "loss": 0.9734, "step": 7259 }, { "epoch": 0.4919032454773359, "grad_norm": 7.461274147033691, "learning_rate": 9.108905469231296e-05, "loss": 0.8931, "step": 7260 }, { "epoch": 0.491971000745308, "grad_norm": 6.522415637969971, "learning_rate": 9.108768567321515e-05, "loss": 0.7529, "step": 7261 }, { "epoch": 0.49203875601328, "grad_norm": 7.108310699462891, "learning_rate": 9.108631665411733e-05, "loss": 0.7551, "step": 7262 }, { "epoch": 0.4921065112812521, "grad_norm": 6.65360164642334, "learning_rate": 9.108494763501951e-05, "loss": 1.1178, "step": 7263 }, { "epoch": 0.4921742665492242, "grad_norm": 8.197813034057617, "learning_rate": 9.108357861592169e-05, "loss": 1.2079, "step": 7264 }, { "epoch": 0.4922420218171963, "grad_norm": 6.898741245269775, "learning_rate": 9.108220959682387e-05, "loss": 0.7838, "step": 7265 }, { "epoch": 0.4923097770851684, "grad_norm": 7.363327980041504, "learning_rate": 9.108084057772607e-05, "loss": 0.763, "step": 7266 }, { "epoch": 0.49237753235314047, "grad_norm": 7.18175745010376, "learning_rate": 9.107947155862825e-05, "loss": 0.7809, "step": 7267 }, { "epoch": 0.49244528762111256, "grad_norm": 5.595573902130127, "learning_rate": 9.107810253953043e-05, "loss": 0.8471, "step": 7268 }, { "epoch": 0.49251304288908465, "grad_norm": 6.333422660827637, "learning_rate": 9.10767335204326e-05, "loss": 0.7616, "step": 7269 }, { "epoch": 0.4925807981570567, "grad_norm": 8.157796859741211, "learning_rate": 9.10753645013348e-05, "loss": 0.8233, "step": 7270 }, { "epoch": 0.4926485534250288, "grad_norm": 6.123366832733154, "learning_rate": 9.107399548223698e-05, "loss": 0.7266, "step": 7271 }, { "epoch": 0.49271630869300087, "grad_norm": 10.229715347290039, "learning_rate": 9.107262646313916e-05, "loss": 0.9589, "step": 7272 }, { "epoch": 0.49278406396097296, "grad_norm": 5.360836505889893, "learning_rate": 9.107125744404134e-05, "loss": 0.7874, "step": 7273 }, { "epoch": 0.49285181922894505, "grad_norm": 6.273800373077393, "learning_rate": 9.106988842494352e-05, "loss": 0.9512, "step": 7274 }, { "epoch": 0.49291957449691715, "grad_norm": 7.902069568634033, "learning_rate": 9.106851940584572e-05, "loss": 0.799, "step": 7275 }, { "epoch": 0.49298732976488924, "grad_norm": 9.35932731628418, "learning_rate": 9.10671503867479e-05, "loss": 0.8973, "step": 7276 }, { "epoch": 0.49305508503286133, "grad_norm": 7.119052410125732, "learning_rate": 9.106578136765008e-05, "loss": 1.2001, "step": 7277 }, { "epoch": 0.49312284030083336, "grad_norm": 6.111217498779297, "learning_rate": 9.106441234855227e-05, "loss": 0.7685, "step": 7278 }, { "epoch": 0.49319059556880546, "grad_norm": 6.093493461608887, "learning_rate": 9.106304332945445e-05, "loss": 0.7097, "step": 7279 }, { "epoch": 0.49325835083677755, "grad_norm": 6.722117900848389, "learning_rate": 9.106167431035663e-05, "loss": 0.805, "step": 7280 }, { "epoch": 0.49332610610474964, "grad_norm": 6.489585876464844, "learning_rate": 9.106030529125882e-05, "loss": 0.9748, "step": 7281 }, { "epoch": 0.49339386137272173, "grad_norm": 6.1473236083984375, "learning_rate": 9.1058936272161e-05, "loss": 0.8299, "step": 7282 }, { "epoch": 0.4934616166406938, "grad_norm": 7.472615718841553, "learning_rate": 9.105756725306319e-05, "loss": 1.0012, "step": 7283 }, { "epoch": 0.4935293719086659, "grad_norm": 7.1405463218688965, "learning_rate": 9.105619823396538e-05, "loss": 0.9959, "step": 7284 }, { "epoch": 0.493597127176638, "grad_norm": 6.21019172668457, "learning_rate": 9.105482921486756e-05, "loss": 0.867, "step": 7285 }, { "epoch": 0.49366488244461004, "grad_norm": 6.836954593658447, "learning_rate": 9.105346019576974e-05, "loss": 0.8058, "step": 7286 }, { "epoch": 0.49373263771258213, "grad_norm": 6.4547319412231445, "learning_rate": 9.105209117667192e-05, "loss": 0.8028, "step": 7287 }, { "epoch": 0.4938003929805542, "grad_norm": 6.335334300994873, "learning_rate": 9.10507221575741e-05, "loss": 0.763, "step": 7288 }, { "epoch": 0.4938681482485263, "grad_norm": 7.21290397644043, "learning_rate": 9.10493531384763e-05, "loss": 0.9477, "step": 7289 }, { "epoch": 0.4939359035164984, "grad_norm": 7.856054782867432, "learning_rate": 9.104798411937847e-05, "loss": 1.1117, "step": 7290 }, { "epoch": 0.4940036587844705, "grad_norm": 6.308975696563721, "learning_rate": 9.104661510028066e-05, "loss": 1.0397, "step": 7291 }, { "epoch": 0.4940714140524426, "grad_norm": 6.111830711364746, "learning_rate": 9.104524608118284e-05, "loss": 0.8082, "step": 7292 }, { "epoch": 0.4941391693204147, "grad_norm": 7.741870403289795, "learning_rate": 9.104387706208503e-05, "loss": 0.804, "step": 7293 }, { "epoch": 0.4942069245883867, "grad_norm": 7.447502613067627, "learning_rate": 9.104250804298721e-05, "loss": 0.9074, "step": 7294 }, { "epoch": 0.4942746798563588, "grad_norm": 4.931535243988037, "learning_rate": 9.104113902388939e-05, "loss": 0.7519, "step": 7295 }, { "epoch": 0.4943424351243309, "grad_norm": 9.830883026123047, "learning_rate": 9.103977000479157e-05, "loss": 0.8659, "step": 7296 }, { "epoch": 0.494410190392303, "grad_norm": 6.182522296905518, "learning_rate": 9.103840098569375e-05, "loss": 0.7021, "step": 7297 }, { "epoch": 0.4944779456602751, "grad_norm": 8.73188591003418, "learning_rate": 9.103703196659594e-05, "loss": 0.8496, "step": 7298 }, { "epoch": 0.4945457009282472, "grad_norm": 7.2238640785217285, "learning_rate": 9.103566294749813e-05, "loss": 1.0778, "step": 7299 }, { "epoch": 0.49461345619621927, "grad_norm": 6.101573944091797, "learning_rate": 9.10342939284003e-05, "loss": 0.9264, "step": 7300 }, { "epoch": 0.49468121146419136, "grad_norm": 7.327548503875732, "learning_rate": 9.103292490930249e-05, "loss": 0.7973, "step": 7301 }, { "epoch": 0.49474896673216345, "grad_norm": 7.1809186935424805, "learning_rate": 9.103155589020467e-05, "loss": 0.9586, "step": 7302 }, { "epoch": 0.4948167220001355, "grad_norm": 5.164478778839111, "learning_rate": 9.103018687110686e-05, "loss": 0.6197, "step": 7303 }, { "epoch": 0.4948844772681076, "grad_norm": 6.387687683105469, "learning_rate": 9.102881785200904e-05, "loss": 0.9509, "step": 7304 }, { "epoch": 0.4949522325360797, "grad_norm": 8.202160835266113, "learning_rate": 9.102744883291122e-05, "loss": 0.8557, "step": 7305 }, { "epoch": 0.49501998780405176, "grad_norm": 6.164126873016357, "learning_rate": 9.10260798138134e-05, "loss": 0.7898, "step": 7306 }, { "epoch": 0.49508774307202386, "grad_norm": 6.448176383972168, "learning_rate": 9.10247107947156e-05, "loss": 0.9094, "step": 7307 }, { "epoch": 0.49515549833999595, "grad_norm": 6.582845687866211, "learning_rate": 9.102334177561778e-05, "loss": 0.8778, "step": 7308 }, { "epoch": 0.49522325360796804, "grad_norm": 7.6885552406311035, "learning_rate": 9.102197275651996e-05, "loss": 1.0229, "step": 7309 }, { "epoch": 0.49529100887594013, "grad_norm": 6.840595722198486, "learning_rate": 9.102060373742214e-05, "loss": 0.972, "step": 7310 }, { "epoch": 0.49535876414391217, "grad_norm": 6.486509323120117, "learning_rate": 9.101923471832432e-05, "loss": 0.8788, "step": 7311 }, { "epoch": 0.49542651941188426, "grad_norm": 9.052862167358398, "learning_rate": 9.101786569922651e-05, "loss": 0.8917, "step": 7312 }, { "epoch": 0.49549427467985635, "grad_norm": 5.528444766998291, "learning_rate": 9.101649668012869e-05, "loss": 0.6167, "step": 7313 }, { "epoch": 0.49556202994782844, "grad_norm": 6.7654032707214355, "learning_rate": 9.101512766103087e-05, "loss": 0.9848, "step": 7314 }, { "epoch": 0.49562978521580053, "grad_norm": 6.247506618499756, "learning_rate": 9.101375864193305e-05, "loss": 0.5868, "step": 7315 }, { "epoch": 0.4956975404837726, "grad_norm": 6.155362129211426, "learning_rate": 9.101238962283525e-05, "loss": 0.8344, "step": 7316 }, { "epoch": 0.4957652957517447, "grad_norm": 8.219099998474121, "learning_rate": 9.101102060373743e-05, "loss": 0.8839, "step": 7317 }, { "epoch": 0.4958330510197168, "grad_norm": 5.146651744842529, "learning_rate": 9.10096515846396e-05, "loss": 0.7872, "step": 7318 }, { "epoch": 0.49590080628768884, "grad_norm": 7.989886283874512, "learning_rate": 9.100828256554179e-05, "loss": 1.0485, "step": 7319 }, { "epoch": 0.49596856155566094, "grad_norm": 7.214312553405762, "learning_rate": 9.100691354644397e-05, "loss": 0.9304, "step": 7320 }, { "epoch": 0.49603631682363303, "grad_norm": 6.856055736541748, "learning_rate": 9.100554452734616e-05, "loss": 0.8934, "step": 7321 }, { "epoch": 0.4961040720916051, "grad_norm": 6.771759986877441, "learning_rate": 9.100417550824834e-05, "loss": 0.9349, "step": 7322 }, { "epoch": 0.4961718273595772, "grad_norm": 8.19810962677002, "learning_rate": 9.100280648915052e-05, "loss": 1.0005, "step": 7323 }, { "epoch": 0.4962395826275493, "grad_norm": 6.946470737457275, "learning_rate": 9.100143747005271e-05, "loss": 0.7505, "step": 7324 }, { "epoch": 0.4963073378955214, "grad_norm": 6.6737518310546875, "learning_rate": 9.10000684509549e-05, "loss": 0.8722, "step": 7325 }, { "epoch": 0.4963750931634935, "grad_norm": 5.637021064758301, "learning_rate": 9.099869943185708e-05, "loss": 0.9587, "step": 7326 }, { "epoch": 0.4964428484314655, "grad_norm": 5.846226692199707, "learning_rate": 9.099733041275927e-05, "loss": 0.7726, "step": 7327 }, { "epoch": 0.4965106036994376, "grad_norm": 7.612580299377441, "learning_rate": 9.099596139366145e-05, "loss": 0.7203, "step": 7328 }, { "epoch": 0.4965783589674097, "grad_norm": 5.490561485290527, "learning_rate": 9.099459237456363e-05, "loss": 0.9457, "step": 7329 }, { "epoch": 0.4966461142353818, "grad_norm": 5.573283672332764, "learning_rate": 9.099322335546582e-05, "loss": 0.8116, "step": 7330 }, { "epoch": 0.4967138695033539, "grad_norm": 7.063168048858643, "learning_rate": 9.0991854336368e-05, "loss": 0.906, "step": 7331 }, { "epoch": 0.496781624771326, "grad_norm": 7.066551685333252, "learning_rate": 9.099048531727018e-05, "loss": 0.8515, "step": 7332 }, { "epoch": 0.49684938003929807, "grad_norm": 6.462795257568359, "learning_rate": 9.098911629817237e-05, "loss": 0.6325, "step": 7333 }, { "epoch": 0.49691713530727016, "grad_norm": 6.59752893447876, "learning_rate": 9.098774727907455e-05, "loss": 0.7356, "step": 7334 }, { "epoch": 0.4969848905752422, "grad_norm": 5.600460052490234, "learning_rate": 9.098637825997674e-05, "loss": 0.6307, "step": 7335 }, { "epoch": 0.4970526458432143, "grad_norm": 6.0045270919799805, "learning_rate": 9.098500924087892e-05, "loss": 1.0137, "step": 7336 }, { "epoch": 0.4971204011111864, "grad_norm": 7.3824143409729, "learning_rate": 9.09836402217811e-05, "loss": 0.8982, "step": 7337 }, { "epoch": 0.4971881563791585, "grad_norm": 6.573738098144531, "learning_rate": 9.098227120268328e-05, "loss": 0.7594, "step": 7338 }, { "epoch": 0.49725591164713057, "grad_norm": 8.101619720458984, "learning_rate": 9.098090218358547e-05, "loss": 1.1994, "step": 7339 }, { "epoch": 0.49732366691510266, "grad_norm": 5.76462459564209, "learning_rate": 9.097953316448765e-05, "loss": 0.7082, "step": 7340 }, { "epoch": 0.49739142218307475, "grad_norm": 6.358243465423584, "learning_rate": 9.097816414538983e-05, "loss": 0.8459, "step": 7341 }, { "epoch": 0.49745917745104684, "grad_norm": 7.145965576171875, "learning_rate": 9.097679512629202e-05, "loss": 0.9953, "step": 7342 }, { "epoch": 0.4975269327190189, "grad_norm": 8.23405933380127, "learning_rate": 9.09754261071942e-05, "loss": 1.1466, "step": 7343 }, { "epoch": 0.49759468798699097, "grad_norm": 6.5327982902526855, "learning_rate": 9.097405708809639e-05, "loss": 1.0226, "step": 7344 }, { "epoch": 0.49766244325496306, "grad_norm": 7.101400375366211, "learning_rate": 9.097268806899857e-05, "loss": 1.0476, "step": 7345 }, { "epoch": 0.49773019852293515, "grad_norm": 7.278493404388428, "learning_rate": 9.097131904990075e-05, "loss": 0.9499, "step": 7346 }, { "epoch": 0.49779795379090724, "grad_norm": 6.826780796051025, "learning_rate": 9.096995003080293e-05, "loss": 0.8371, "step": 7347 }, { "epoch": 0.49786570905887934, "grad_norm": 6.522684097290039, "learning_rate": 9.096858101170512e-05, "loss": 0.9318, "step": 7348 }, { "epoch": 0.4979334643268514, "grad_norm": 6.954566478729248, "learning_rate": 9.09672119926073e-05, "loss": 0.9174, "step": 7349 }, { "epoch": 0.4980012195948235, "grad_norm": 6.442493438720703, "learning_rate": 9.096584297350949e-05, "loss": 0.8619, "step": 7350 }, { "epoch": 0.49806897486279555, "grad_norm": 7.186161041259766, "learning_rate": 9.096447395441167e-05, "loss": 0.661, "step": 7351 }, { "epoch": 0.49813673013076765, "grad_norm": 6.716146945953369, "learning_rate": 9.096310493531385e-05, "loss": 0.9148, "step": 7352 }, { "epoch": 0.49820448539873974, "grad_norm": 5.643620014190674, "learning_rate": 9.096173591621604e-05, "loss": 0.8393, "step": 7353 }, { "epoch": 0.49827224066671183, "grad_norm": 7.107893466949463, "learning_rate": 9.096036689711822e-05, "loss": 0.972, "step": 7354 }, { "epoch": 0.4983399959346839, "grad_norm": 6.967519283294678, "learning_rate": 9.09589978780204e-05, "loss": 0.7033, "step": 7355 }, { "epoch": 0.498407751202656, "grad_norm": 7.291131496429443, "learning_rate": 9.095762885892258e-05, "loss": 0.8853, "step": 7356 }, { "epoch": 0.4984755064706281, "grad_norm": 7.630476474761963, "learning_rate": 9.095625983982476e-05, "loss": 0.8116, "step": 7357 }, { "epoch": 0.4985432617386002, "grad_norm": 6.2367167472839355, "learning_rate": 9.095489082072695e-05, "loss": 0.8344, "step": 7358 }, { "epoch": 0.4986110170065723, "grad_norm": 9.436936378479004, "learning_rate": 9.095352180162914e-05, "loss": 1.043, "step": 7359 }, { "epoch": 0.4986787722745443, "grad_norm": 5.330153942108154, "learning_rate": 9.095215278253132e-05, "loss": 0.7025, "step": 7360 }, { "epoch": 0.4987465275425164, "grad_norm": 5.117184162139893, "learning_rate": 9.09507837634335e-05, "loss": 0.8207, "step": 7361 }, { "epoch": 0.4988142828104885, "grad_norm": 7.965060234069824, "learning_rate": 9.094941474433569e-05, "loss": 0.9344, "step": 7362 }, { "epoch": 0.4988820380784606, "grad_norm": 6.985347747802734, "learning_rate": 9.094804572523787e-05, "loss": 0.8708, "step": 7363 }, { "epoch": 0.4989497933464327, "grad_norm": 8.380836486816406, "learning_rate": 9.094667670614005e-05, "loss": 0.8901, "step": 7364 }, { "epoch": 0.4990175486144048, "grad_norm": 6.336101055145264, "learning_rate": 9.094530768704223e-05, "loss": 0.9709, "step": 7365 }, { "epoch": 0.4990853038823769, "grad_norm": 6.435248851776123, "learning_rate": 9.094393866794441e-05, "loss": 0.8913, "step": 7366 }, { "epoch": 0.49915305915034897, "grad_norm": 7.674434661865234, "learning_rate": 9.09425696488466e-05, "loss": 1.0581, "step": 7367 }, { "epoch": 0.499220814418321, "grad_norm": 7.2185211181640625, "learning_rate": 9.094120062974879e-05, "loss": 0.9202, "step": 7368 }, { "epoch": 0.4992885696862931, "grad_norm": 6.717129230499268, "learning_rate": 9.093983161065097e-05, "loss": 0.8664, "step": 7369 }, { "epoch": 0.4993563249542652, "grad_norm": 6.229526996612549, "learning_rate": 9.093846259155316e-05, "loss": 0.7374, "step": 7370 }, { "epoch": 0.4994240802222373, "grad_norm": 7.177096843719482, "learning_rate": 9.093709357245534e-05, "loss": 1.0426, "step": 7371 }, { "epoch": 0.49949183549020937, "grad_norm": 7.289033889770508, "learning_rate": 9.093572455335752e-05, "loss": 0.8705, "step": 7372 }, { "epoch": 0.49955959075818146, "grad_norm": 8.260116577148438, "learning_rate": 9.093435553425971e-05, "loss": 0.6928, "step": 7373 }, { "epoch": 0.49962734602615355, "grad_norm": 5.730698108673096, "learning_rate": 9.09329865151619e-05, "loss": 0.7852, "step": 7374 }, { "epoch": 0.49969510129412564, "grad_norm": 6.90589714050293, "learning_rate": 9.093161749606407e-05, "loss": 0.9258, "step": 7375 }, { "epoch": 0.4997628565620977, "grad_norm": 5.358353137969971, "learning_rate": 9.093024847696627e-05, "loss": 0.7327, "step": 7376 }, { "epoch": 0.49983061183006977, "grad_norm": 5.4898505210876465, "learning_rate": 9.092887945786845e-05, "loss": 0.7371, "step": 7377 }, { "epoch": 0.49989836709804186, "grad_norm": 7.045073986053467, "learning_rate": 9.092751043877063e-05, "loss": 0.8785, "step": 7378 }, { "epoch": 0.49989836709804186, "eval_loss": 0.8431733846664429, "eval_noise_accuracy": 0.0, "eval_runtime": 1466.1466, "eval_samples_per_second": 3.505, "eval_steps_per_second": 0.22, "eval_wer": 75.89224292121845, "step": 7378 }, { "epoch": 0.49996612236601395, "grad_norm": 6.144540309906006, "learning_rate": 9.092614141967281e-05, "loss": 0.8467, "step": 7379 }, { "epoch": 0.500033877633986, "grad_norm": 5.819301605224609, "learning_rate": 9.0924772400575e-05, "loss": 0.7813, "step": 7380 }, { "epoch": 0.5001016329019581, "grad_norm": 5.759615421295166, "learning_rate": 9.092340338147718e-05, "loss": 0.7834, "step": 7381 }, { "epoch": 0.5001693881699302, "grad_norm": 5.7733917236328125, "learning_rate": 9.092203436237936e-05, "loss": 0.6671, "step": 7382 }, { "epoch": 0.5002371434379023, "grad_norm": 8.171788215637207, "learning_rate": 9.092066534328154e-05, "loss": 0.9039, "step": 7383 }, { "epoch": 0.5003048987058744, "grad_norm": 6.261331081390381, "learning_rate": 9.091929632418373e-05, "loss": 0.9185, "step": 7384 }, { "epoch": 0.5003726539738464, "grad_norm": 4.867089748382568, "learning_rate": 9.091792730508592e-05, "loss": 0.8967, "step": 7385 }, { "epoch": 0.5004404092418185, "grad_norm": 8.540884971618652, "learning_rate": 9.09165582859881e-05, "loss": 1.0576, "step": 7386 }, { "epoch": 0.5005081645097906, "grad_norm": 6.116450786590576, "learning_rate": 9.091518926689028e-05, "loss": 0.8109, "step": 7387 }, { "epoch": 0.5005759197777627, "grad_norm": 5.271210670471191, "learning_rate": 9.091382024779246e-05, "loss": 1.0116, "step": 7388 }, { "epoch": 0.5006436750457348, "grad_norm": 5.851868152618408, "learning_rate": 9.091245122869464e-05, "loss": 0.8285, "step": 7389 }, { "epoch": 0.5007114303137069, "grad_norm": 6.349635124206543, "learning_rate": 9.091108220959683e-05, "loss": 1.0065, "step": 7390 }, { "epoch": 0.500779185581679, "grad_norm": 7.653061389923096, "learning_rate": 9.090971319049901e-05, "loss": 0.8314, "step": 7391 }, { "epoch": 0.5008469408496511, "grad_norm": 7.560361385345459, "learning_rate": 9.09083441714012e-05, "loss": 0.9674, "step": 7392 }, { "epoch": 0.5009146961176232, "grad_norm": 5.517054080963135, "learning_rate": 9.090697515230338e-05, "loss": 0.6423, "step": 7393 }, { "epoch": 0.5009824513855953, "grad_norm": 6.644406795501709, "learning_rate": 9.090560613320557e-05, "loss": 0.7256, "step": 7394 }, { "epoch": 0.5010502066535674, "grad_norm": 6.9502854347229, "learning_rate": 9.090423711410775e-05, "loss": 0.8194, "step": 7395 }, { "epoch": 0.5011179619215393, "grad_norm": 5.900984287261963, "learning_rate": 9.090286809500993e-05, "loss": 1.1089, "step": 7396 }, { "epoch": 0.5011857171895114, "grad_norm": 5.964234352111816, "learning_rate": 9.090149907591211e-05, "loss": 0.9525, "step": 7397 }, { "epoch": 0.5012534724574835, "grad_norm": 7.2671895027160645, "learning_rate": 9.090013005681429e-05, "loss": 0.7658, "step": 7398 }, { "epoch": 0.5013212277254556, "grad_norm": 6.128476619720459, "learning_rate": 9.089876103771648e-05, "loss": 0.8413, "step": 7399 }, { "epoch": 0.5013889829934277, "grad_norm": 5.146761894226074, "learning_rate": 9.089739201861866e-05, "loss": 0.7552, "step": 7400 }, { "epoch": 0.5014567382613998, "grad_norm": 7.736568927764893, "learning_rate": 9.089602299952085e-05, "loss": 0.5941, "step": 7401 }, { "epoch": 0.5015244935293719, "grad_norm": 7.113034725189209, "learning_rate": 9.089465398042303e-05, "loss": 0.8414, "step": 7402 }, { "epoch": 0.501592248797344, "grad_norm": 6.421565055847168, "learning_rate": 9.089328496132522e-05, "loss": 0.7477, "step": 7403 }, { "epoch": 0.5016600040653161, "grad_norm": 6.609166145324707, "learning_rate": 9.08919159422274e-05, "loss": 0.8404, "step": 7404 }, { "epoch": 0.5017277593332882, "grad_norm": 5.771233558654785, "learning_rate": 9.089054692312958e-05, "loss": 0.8446, "step": 7405 }, { "epoch": 0.5017955146012603, "grad_norm": 5.4080119132995605, "learning_rate": 9.088917790403176e-05, "loss": 0.7046, "step": 7406 }, { "epoch": 0.5018632698692324, "grad_norm": 7.0025248527526855, "learning_rate": 9.088780888493394e-05, "loss": 1.0269, "step": 7407 }, { "epoch": 0.5019310251372044, "grad_norm": 6.687203884124756, "learning_rate": 9.088643986583613e-05, "loss": 0.9794, "step": 7408 }, { "epoch": 0.5019987804051765, "grad_norm": 7.627871036529541, "learning_rate": 9.088507084673831e-05, "loss": 1.0068, "step": 7409 }, { "epoch": 0.5020665356731486, "grad_norm": 6.824975490570068, "learning_rate": 9.08837018276405e-05, "loss": 0.9582, "step": 7410 }, { "epoch": 0.5021342909411207, "grad_norm": 5.561855792999268, "learning_rate": 9.088233280854268e-05, "loss": 0.7373, "step": 7411 }, { "epoch": 0.5022020462090928, "grad_norm": 6.751492023468018, "learning_rate": 9.088096378944486e-05, "loss": 1.1699, "step": 7412 }, { "epoch": 0.5022698014770648, "grad_norm": 6.15092658996582, "learning_rate": 9.087959477034705e-05, "loss": 0.656, "step": 7413 }, { "epoch": 0.5023375567450369, "grad_norm": 7.125277042388916, "learning_rate": 9.087822575124923e-05, "loss": 0.8737, "step": 7414 }, { "epoch": 0.502405312013009, "grad_norm": 8.87856674194336, "learning_rate": 9.087685673215141e-05, "loss": 0.9961, "step": 7415 }, { "epoch": 0.5024730672809811, "grad_norm": 5.436285495758057, "learning_rate": 9.087548771305359e-05, "loss": 0.8834, "step": 7416 }, { "epoch": 0.5025408225489532, "grad_norm": 6.28549861907959, "learning_rate": 9.087411869395578e-05, "loss": 0.6354, "step": 7417 }, { "epoch": 0.5026085778169253, "grad_norm": 5.951661109924316, "learning_rate": 9.087274967485797e-05, "loss": 0.8347, "step": 7418 }, { "epoch": 0.5026763330848973, "grad_norm": 5.268624305725098, "learning_rate": 9.087138065576015e-05, "loss": 0.9966, "step": 7419 }, { "epoch": 0.5027440883528694, "grad_norm": 6.974735260009766, "learning_rate": 9.087001163666234e-05, "loss": 0.9685, "step": 7420 }, { "epoch": 0.5028118436208415, "grad_norm": 6.689586639404297, "learning_rate": 9.086864261756452e-05, "loss": 0.8843, "step": 7421 }, { "epoch": 0.5028795988888136, "grad_norm": 6.510265827178955, "learning_rate": 9.086727359846671e-05, "loss": 1.0125, "step": 7422 }, { "epoch": 0.5029473541567857, "grad_norm": 7.039668560028076, "learning_rate": 9.08659045793689e-05, "loss": 0.7289, "step": 7423 }, { "epoch": 0.5030151094247578, "grad_norm": 6.1734442710876465, "learning_rate": 9.086453556027107e-05, "loss": 0.8023, "step": 7424 }, { "epoch": 0.5030828646927299, "grad_norm": 7.895476818084717, "learning_rate": 9.086316654117325e-05, "loss": 1.0043, "step": 7425 }, { "epoch": 0.503150619960702, "grad_norm": 6.599829196929932, "learning_rate": 9.086179752207545e-05, "loss": 0.8537, "step": 7426 }, { "epoch": 0.5032183752286741, "grad_norm": 8.60390567779541, "learning_rate": 9.086042850297763e-05, "loss": 1.1038, "step": 7427 }, { "epoch": 0.5032861304966462, "grad_norm": 6.656140327453613, "learning_rate": 9.085905948387981e-05, "loss": 0.9533, "step": 7428 }, { "epoch": 0.5033538857646181, "grad_norm": 5.768946170806885, "learning_rate": 9.085769046478199e-05, "loss": 0.6271, "step": 7429 }, { "epoch": 0.5034216410325902, "grad_norm": 5.938215732574463, "learning_rate": 9.085632144568417e-05, "loss": 1.0308, "step": 7430 }, { "epoch": 0.5034893963005623, "grad_norm": 7.145301342010498, "learning_rate": 9.085495242658636e-05, "loss": 0.8695, "step": 7431 }, { "epoch": 0.5035571515685344, "grad_norm": 5.975915908813477, "learning_rate": 9.085358340748854e-05, "loss": 0.9071, "step": 7432 }, { "epoch": 0.5036249068365065, "grad_norm": 5.689105033874512, "learning_rate": 9.085221438839072e-05, "loss": 0.9037, "step": 7433 }, { "epoch": 0.5036926621044786, "grad_norm": 6.513401985168457, "learning_rate": 9.08508453692929e-05, "loss": 0.7534, "step": 7434 }, { "epoch": 0.5037604173724507, "grad_norm": 7.220860958099365, "learning_rate": 9.084947635019509e-05, "loss": 1.0833, "step": 7435 }, { "epoch": 0.5038281726404228, "grad_norm": 10.640632629394531, "learning_rate": 9.084810733109728e-05, "loss": 0.9143, "step": 7436 }, { "epoch": 0.5038959279083949, "grad_norm": 6.900107383728027, "learning_rate": 9.084673831199946e-05, "loss": 0.8239, "step": 7437 }, { "epoch": 0.503963683176367, "grad_norm": 6.290066719055176, "learning_rate": 9.084536929290164e-05, "loss": 0.8982, "step": 7438 }, { "epoch": 0.5040314384443391, "grad_norm": 7.073644638061523, "learning_rate": 9.084400027380382e-05, "loss": 1.0932, "step": 7439 }, { "epoch": 0.5040991937123112, "grad_norm": 7.144145965576172, "learning_rate": 9.084263125470601e-05, "loss": 0.7772, "step": 7440 }, { "epoch": 0.5041669489802832, "grad_norm": 5.8309326171875, "learning_rate": 9.08412622356082e-05, "loss": 0.8945, "step": 7441 }, { "epoch": 0.5042347042482553, "grad_norm": 7.0719218254089355, "learning_rate": 9.083989321651037e-05, "loss": 0.7513, "step": 7442 }, { "epoch": 0.5043024595162274, "grad_norm": 7.7847795486450195, "learning_rate": 9.083852419741255e-05, "loss": 0.9553, "step": 7443 }, { "epoch": 0.5043702147841995, "grad_norm": 6.7899169921875, "learning_rate": 9.083715517831474e-05, "loss": 0.7908, "step": 7444 }, { "epoch": 0.5044379700521715, "grad_norm": 7.399930000305176, "learning_rate": 9.083578615921693e-05, "loss": 0.8104, "step": 7445 }, { "epoch": 0.5045057253201436, "grad_norm": 6.694761753082275, "learning_rate": 9.083441714011911e-05, "loss": 0.8481, "step": 7446 }, { "epoch": 0.5045734805881157, "grad_norm": 5.661715030670166, "learning_rate": 9.083304812102129e-05, "loss": 0.7598, "step": 7447 }, { "epoch": 0.5046412358560878, "grad_norm": 6.502758979797363, "learning_rate": 9.083167910192347e-05, "loss": 0.8789, "step": 7448 }, { "epoch": 0.5047089911240599, "grad_norm": 7.428299427032471, "learning_rate": 9.083031008282566e-05, "loss": 1.0323, "step": 7449 }, { "epoch": 0.504776746392032, "grad_norm": 7.600015163421631, "learning_rate": 9.082894106372784e-05, "loss": 0.8985, "step": 7450 }, { "epoch": 0.504844501660004, "grad_norm": 5.523435592651367, "learning_rate": 9.082757204463002e-05, "loss": 0.7891, "step": 7451 }, { "epoch": 0.5049122569279761, "grad_norm": 7.293107509613037, "learning_rate": 9.08262030255322e-05, "loss": 1.0804, "step": 7452 }, { "epoch": 0.5049800121959482, "grad_norm": 5.669400215148926, "learning_rate": 9.082483400643439e-05, "loss": 0.9136, "step": 7453 }, { "epoch": 0.5050477674639203, "grad_norm": 6.409341812133789, "learning_rate": 9.082346498733658e-05, "loss": 1.1784, "step": 7454 }, { "epoch": 0.5051155227318924, "grad_norm": 5.636824607849121, "learning_rate": 9.082209596823876e-05, "loss": 0.7079, "step": 7455 }, { "epoch": 0.5051832779998645, "grad_norm": 6.060736179351807, "learning_rate": 9.082072694914094e-05, "loss": 0.9279, "step": 7456 }, { "epoch": 0.5052510332678366, "grad_norm": 8.116156578063965, "learning_rate": 9.081935793004312e-05, "loss": 0.919, "step": 7457 }, { "epoch": 0.5053187885358087, "grad_norm": 7.159115791320801, "learning_rate": 9.081798891094531e-05, "loss": 0.8237, "step": 7458 }, { "epoch": 0.5053865438037808, "grad_norm": 7.625302314758301, "learning_rate": 9.08166198918475e-05, "loss": 0.8204, "step": 7459 }, { "epoch": 0.5054542990717529, "grad_norm": 7.276190280914307, "learning_rate": 9.081525087274967e-05, "loss": 0.6219, "step": 7460 }, { "epoch": 0.505522054339725, "grad_norm": 8.660572052001953, "learning_rate": 9.081388185365186e-05, "loss": 0.9772, "step": 7461 }, { "epoch": 0.505589809607697, "grad_norm": 8.218971252441406, "learning_rate": 9.081251283455404e-05, "loss": 0.7771, "step": 7462 }, { "epoch": 0.505657564875669, "grad_norm": 5.246251106262207, "learning_rate": 9.081114381545623e-05, "loss": 0.7706, "step": 7463 }, { "epoch": 0.5057253201436411, "grad_norm": 8.52219009399414, "learning_rate": 9.080977479635841e-05, "loss": 0.986, "step": 7464 }, { "epoch": 0.5057930754116132, "grad_norm": 5.994356632232666, "learning_rate": 9.080840577726059e-05, "loss": 0.783, "step": 7465 }, { "epoch": 0.5058608306795853, "grad_norm": 7.397661209106445, "learning_rate": 9.080703675816278e-05, "loss": 0.8338, "step": 7466 }, { "epoch": 0.5059285859475574, "grad_norm": 7.875096797943115, "learning_rate": 9.080566773906496e-05, "loss": 1.0615, "step": 7467 }, { "epoch": 0.5059963412155295, "grad_norm": 5.616501331329346, "learning_rate": 9.080429871996714e-05, "loss": 0.8735, "step": 7468 }, { "epoch": 0.5060640964835016, "grad_norm": 7.229982852935791, "learning_rate": 9.080292970086934e-05, "loss": 0.6812, "step": 7469 }, { "epoch": 0.5061318517514737, "grad_norm": 6.370266437530518, "learning_rate": 9.080156068177152e-05, "loss": 0.999, "step": 7470 }, { "epoch": 0.5061996070194458, "grad_norm": 7.752940654754639, "learning_rate": 9.08001916626737e-05, "loss": 1.0239, "step": 7471 }, { "epoch": 0.5062673622874179, "grad_norm": 6.016890048980713, "learning_rate": 9.079882264357589e-05, "loss": 0.8494, "step": 7472 }, { "epoch": 0.50633511755539, "grad_norm": 5.739022731781006, "learning_rate": 9.079745362447807e-05, "loss": 0.8505, "step": 7473 }, { "epoch": 0.506402872823362, "grad_norm": 5.543849468231201, "learning_rate": 9.079608460538025e-05, "loss": 0.884, "step": 7474 }, { "epoch": 0.5064706280913341, "grad_norm": 6.608273506164551, "learning_rate": 9.079471558628243e-05, "loss": 0.832, "step": 7475 }, { "epoch": 0.5065383833593062, "grad_norm": 5.356375217437744, "learning_rate": 9.079334656718461e-05, "loss": 0.7631, "step": 7476 }, { "epoch": 0.5066061386272783, "grad_norm": 6.19942045211792, "learning_rate": 9.079197754808681e-05, "loss": 0.8971, "step": 7477 }, { "epoch": 0.5066738938952503, "grad_norm": 6.8287506103515625, "learning_rate": 9.079060852898899e-05, "loss": 1.1027, "step": 7478 }, { "epoch": 0.5067416491632224, "grad_norm": 5.98441219329834, "learning_rate": 9.078923950989117e-05, "loss": 0.8476, "step": 7479 }, { "epoch": 0.5068094044311945, "grad_norm": 5.990217208862305, "learning_rate": 9.078787049079335e-05, "loss": 0.7411, "step": 7480 }, { "epoch": 0.5068771596991666, "grad_norm": 7.649387836456299, "learning_rate": 9.078650147169554e-05, "loss": 0.8498, "step": 7481 }, { "epoch": 0.5069449149671387, "grad_norm": 5.848696708679199, "learning_rate": 9.078513245259772e-05, "loss": 0.9977, "step": 7482 }, { "epoch": 0.5070126702351108, "grad_norm": 7.848155498504639, "learning_rate": 9.07837634334999e-05, "loss": 1.0863, "step": 7483 }, { "epoch": 0.5070804255030829, "grad_norm": 6.667200088500977, "learning_rate": 9.078239441440208e-05, "loss": 0.8052, "step": 7484 }, { "epoch": 0.507148180771055, "grad_norm": 7.332635879516602, "learning_rate": 9.078102539530426e-05, "loss": 0.8855, "step": 7485 }, { "epoch": 0.507215936039027, "grad_norm": 8.586542129516602, "learning_rate": 9.077965637620646e-05, "loss": 1.0024, "step": 7486 }, { "epoch": 0.5072836913069991, "grad_norm": 7.780113697052002, "learning_rate": 9.077828735710864e-05, "loss": 0.9129, "step": 7487 }, { "epoch": 0.5073514465749712, "grad_norm": 6.412726879119873, "learning_rate": 9.077691833801082e-05, "loss": 0.6586, "step": 7488 }, { "epoch": 0.5074192018429433, "grad_norm": 7.84084415435791, "learning_rate": 9.0775549318913e-05, "loss": 0.8709, "step": 7489 }, { "epoch": 0.5074869571109154, "grad_norm": 6.463359355926514, "learning_rate": 9.077418029981518e-05, "loss": 0.7824, "step": 7490 }, { "epoch": 0.5075547123788875, "grad_norm": 7.006936550140381, "learning_rate": 9.077281128071737e-05, "loss": 0.8868, "step": 7491 }, { "epoch": 0.5076224676468596, "grad_norm": 6.153975486755371, "learning_rate": 9.077144226161955e-05, "loss": 0.8607, "step": 7492 }, { "epoch": 0.5076902229148317, "grad_norm": 6.791597366333008, "learning_rate": 9.077007324252173e-05, "loss": 0.8782, "step": 7493 }, { "epoch": 0.5077579781828037, "grad_norm": 8.730172157287598, "learning_rate": 9.076870422342391e-05, "loss": 0.9439, "step": 7494 }, { "epoch": 0.5078257334507758, "grad_norm": 8.325760841369629, "learning_rate": 9.076733520432611e-05, "loss": 0.9425, "step": 7495 }, { "epoch": 0.5078934887187478, "grad_norm": 5.549458026885986, "learning_rate": 9.076596618522829e-05, "loss": 0.741, "step": 7496 }, { "epoch": 0.5079612439867199, "grad_norm": 6.16536283493042, "learning_rate": 9.076459716613047e-05, "loss": 0.9809, "step": 7497 }, { "epoch": 0.508028999254692, "grad_norm": 7.393336772918701, "learning_rate": 9.076322814703265e-05, "loss": 0.7392, "step": 7498 }, { "epoch": 0.5080967545226641, "grad_norm": 6.697513580322266, "learning_rate": 9.076185912793483e-05, "loss": 0.9413, "step": 7499 }, { "epoch": 0.5081645097906362, "grad_norm": 5.854069709777832, "learning_rate": 9.076049010883702e-05, "loss": 0.8432, "step": 7500 }, { "epoch": 0.5082322650586083, "grad_norm": 5.70686149597168, "learning_rate": 9.07591210897392e-05, "loss": 0.762, "step": 7501 }, { "epoch": 0.5083000203265804, "grad_norm": 7.012457370758057, "learning_rate": 9.075775207064138e-05, "loss": 0.8047, "step": 7502 }, { "epoch": 0.5083677755945525, "grad_norm": 6.485003471374512, "learning_rate": 9.075638305154357e-05, "loss": 0.6968, "step": 7503 }, { "epoch": 0.5084355308625246, "grad_norm": 6.104341506958008, "learning_rate": 9.075501403244576e-05, "loss": 0.82, "step": 7504 }, { "epoch": 0.5085032861304967, "grad_norm": 7.397383689880371, "learning_rate": 9.075364501334794e-05, "loss": 0.7462, "step": 7505 }, { "epoch": 0.5085710413984688, "grad_norm": 6.942671775817871, "learning_rate": 9.075227599425012e-05, "loss": 0.8829, "step": 7506 }, { "epoch": 0.5086387966664409, "grad_norm": 7.897338390350342, "learning_rate": 9.07509069751523e-05, "loss": 0.8725, "step": 7507 }, { "epoch": 0.5087065519344129, "grad_norm": 6.410269260406494, "learning_rate": 9.074953795605448e-05, "loss": 0.6648, "step": 7508 }, { "epoch": 0.508774307202385, "grad_norm": 7.2909955978393555, "learning_rate": 9.074816893695667e-05, "loss": 0.8097, "step": 7509 }, { "epoch": 0.508842062470357, "grad_norm": 7.618723392486572, "learning_rate": 9.074679991785885e-05, "loss": 1.0666, "step": 7510 }, { "epoch": 0.5089098177383291, "grad_norm": 6.482639789581299, "learning_rate": 9.074543089876103e-05, "loss": 0.7946, "step": 7511 }, { "epoch": 0.5089775730063012, "grad_norm": 8.747861862182617, "learning_rate": 9.074406187966323e-05, "loss": 0.9464, "step": 7512 }, { "epoch": 0.5090453282742733, "grad_norm": 8.655475616455078, "learning_rate": 9.074269286056541e-05, "loss": 1.0564, "step": 7513 }, { "epoch": 0.5091130835422454, "grad_norm": 5.97476053237915, "learning_rate": 9.074132384146759e-05, "loss": 0.8152, "step": 7514 }, { "epoch": 0.5091808388102175, "grad_norm": 7.176423072814941, "learning_rate": 9.073995482236978e-05, "loss": 0.9338, "step": 7515 }, { "epoch": 0.5092485940781896, "grad_norm": 6.205722808837891, "learning_rate": 9.073858580327196e-05, "loss": 0.9404, "step": 7516 }, { "epoch": 0.5093163493461617, "grad_norm": 6.857678413391113, "learning_rate": 9.073721678417414e-05, "loss": 0.7308, "step": 7517 }, { "epoch": 0.5093841046141337, "grad_norm": 5.610182285308838, "learning_rate": 9.073584776507634e-05, "loss": 0.7608, "step": 7518 }, { "epoch": 0.5094518598821058, "grad_norm": 5.618816375732422, "learning_rate": 9.073447874597852e-05, "loss": 0.6947, "step": 7519 }, { "epoch": 0.5095196151500779, "grad_norm": 5.652774810791016, "learning_rate": 9.07331097268807e-05, "loss": 0.6527, "step": 7520 }, { "epoch": 0.50958737041805, "grad_norm": 6.597334861755371, "learning_rate": 9.073174070778288e-05, "loss": 0.8084, "step": 7521 }, { "epoch": 0.5096551256860221, "grad_norm": 6.7087202072143555, "learning_rate": 9.073037168868506e-05, "loss": 0.9075, "step": 7522 }, { "epoch": 0.5097228809539942, "grad_norm": 6.56160306930542, "learning_rate": 9.072900266958725e-05, "loss": 0.9683, "step": 7523 }, { "epoch": 0.5097906362219663, "grad_norm": 7.624117851257324, "learning_rate": 9.072763365048943e-05, "loss": 1.0495, "step": 7524 }, { "epoch": 0.5098583914899384, "grad_norm": 5.690593719482422, "learning_rate": 9.072626463139161e-05, "loss": 1.0348, "step": 7525 }, { "epoch": 0.5099261467579105, "grad_norm": 5.954509258270264, "learning_rate": 9.07248956122938e-05, "loss": 0.8622, "step": 7526 }, { "epoch": 0.5099939020258825, "grad_norm": 6.603368759155273, "learning_rate": 9.072352659319599e-05, "loss": 0.6596, "step": 7527 }, { "epoch": 0.5100616572938546, "grad_norm": 6.074961185455322, "learning_rate": 9.072215757409817e-05, "loss": 1.1204, "step": 7528 }, { "epoch": 0.5101294125618266, "grad_norm": 7.444427967071533, "learning_rate": 9.072078855500035e-05, "loss": 0.8495, "step": 7529 }, { "epoch": 0.5101971678297987, "grad_norm": 6.496705532073975, "learning_rate": 9.071941953590253e-05, "loss": 0.8796, "step": 7530 }, { "epoch": 0.5102649230977708, "grad_norm": 6.413107872009277, "learning_rate": 9.071805051680471e-05, "loss": 0.8713, "step": 7531 }, { "epoch": 0.5103326783657429, "grad_norm": 5.7435221672058105, "learning_rate": 9.07166814977069e-05, "loss": 0.7322, "step": 7532 }, { "epoch": 0.510400433633715, "grad_norm": 5.807244777679443, "learning_rate": 9.071531247860908e-05, "loss": 0.8808, "step": 7533 }, { "epoch": 0.5104681889016871, "grad_norm": 7.4514617919921875, "learning_rate": 9.071394345951126e-05, "loss": 0.9344, "step": 7534 }, { "epoch": 0.5105359441696592, "grad_norm": 8.864927291870117, "learning_rate": 9.071257444041344e-05, "loss": 1.1036, "step": 7535 }, { "epoch": 0.5106036994376313, "grad_norm": 6.26414155960083, "learning_rate": 9.071120542131564e-05, "loss": 0.7743, "step": 7536 }, { "epoch": 0.5106714547056034, "grad_norm": 6.583436012268066, "learning_rate": 9.070983640221782e-05, "loss": 0.8079, "step": 7537 }, { "epoch": 0.5107392099735755, "grad_norm": 6.833841323852539, "learning_rate": 9.070846738312e-05, "loss": 0.9583, "step": 7538 }, { "epoch": 0.5108069652415476, "grad_norm": 7.538537502288818, "learning_rate": 9.070709836402218e-05, "loss": 0.9455, "step": 7539 }, { "epoch": 0.5108747205095197, "grad_norm": 6.710206031799316, "learning_rate": 9.070572934492436e-05, "loss": 0.8598, "step": 7540 }, { "epoch": 0.5109424757774917, "grad_norm": 5.586297035217285, "learning_rate": 9.070436032582655e-05, "loss": 0.7335, "step": 7541 }, { "epoch": 0.5110102310454638, "grad_norm": 6.353386878967285, "learning_rate": 9.070299130672873e-05, "loss": 0.7107, "step": 7542 }, { "epoch": 0.5110779863134358, "grad_norm": 7.016844272613525, "learning_rate": 9.070162228763091e-05, "loss": 0.8202, "step": 7543 }, { "epoch": 0.5111457415814079, "grad_norm": 7.088860988616943, "learning_rate": 9.07002532685331e-05, "loss": 0.7402, "step": 7544 }, { "epoch": 0.51121349684938, "grad_norm": 7.208921432495117, "learning_rate": 9.069888424943527e-05, "loss": 0.8991, "step": 7545 }, { "epoch": 0.5112812521173521, "grad_norm": 7.308844566345215, "learning_rate": 9.069751523033747e-05, "loss": 0.9908, "step": 7546 }, { "epoch": 0.5113490073853242, "grad_norm": 6.0522308349609375, "learning_rate": 9.069614621123965e-05, "loss": 0.788, "step": 7547 }, { "epoch": 0.5114167626532963, "grad_norm": 6.078645706176758, "learning_rate": 9.069477719214183e-05, "loss": 0.7327, "step": 7548 }, { "epoch": 0.5114845179212684, "grad_norm": 7.585491180419922, "learning_rate": 9.069340817304401e-05, "loss": 1.0976, "step": 7549 }, { "epoch": 0.5115522731892405, "grad_norm": 5.420160293579102, "learning_rate": 9.06920391539462e-05, "loss": 0.5896, "step": 7550 }, { "epoch": 0.5116200284572126, "grad_norm": 7.09340763092041, "learning_rate": 9.069067013484838e-05, "loss": 0.8464, "step": 7551 }, { "epoch": 0.5116877837251846, "grad_norm": 5.813265323638916, "learning_rate": 9.068930111575056e-05, "loss": 0.685, "step": 7552 }, { "epoch": 0.5117555389931567, "grad_norm": 8.996179580688477, "learning_rate": 9.068793209665274e-05, "loss": 1.0089, "step": 7553 }, { "epoch": 0.5118232942611288, "grad_norm": 5.605385780334473, "learning_rate": 9.068656307755493e-05, "loss": 0.8688, "step": 7554 }, { "epoch": 0.5118910495291009, "grad_norm": 7.07156229019165, "learning_rate": 9.068519405845712e-05, "loss": 0.7926, "step": 7555 }, { "epoch": 0.511958804797073, "grad_norm": 6.07735538482666, "learning_rate": 9.06838250393593e-05, "loss": 0.7721, "step": 7556 }, { "epoch": 0.5120265600650451, "grad_norm": 8.173517227172852, "learning_rate": 9.068245602026148e-05, "loss": 0.8789, "step": 7557 }, { "epoch": 0.5120943153330172, "grad_norm": 6.656474590301514, "learning_rate": 9.068108700116367e-05, "loss": 0.9221, "step": 7558 }, { "epoch": 0.5121620706009892, "grad_norm": 7.5140061378479, "learning_rate": 9.067971798206585e-05, "loss": 0.7789, "step": 7559 }, { "epoch": 0.5122298258689613, "grad_norm": 6.517942428588867, "learning_rate": 9.067834896296803e-05, "loss": 0.7376, "step": 7560 }, { "epoch": 0.5122975811369334, "grad_norm": 6.736027717590332, "learning_rate": 9.067697994387023e-05, "loss": 0.8051, "step": 7561 }, { "epoch": 0.5123653364049054, "grad_norm": 11.268937110900879, "learning_rate": 9.067561092477241e-05, "loss": 0.7261, "step": 7562 }, { "epoch": 0.5124330916728775, "grad_norm": 6.4452667236328125, "learning_rate": 9.067424190567459e-05, "loss": 0.9289, "step": 7563 }, { "epoch": 0.5125008469408496, "grad_norm": 6.565738201141357, "learning_rate": 9.067287288657678e-05, "loss": 0.8039, "step": 7564 }, { "epoch": 0.5125686022088217, "grad_norm": 5.919821739196777, "learning_rate": 9.067150386747896e-05, "loss": 0.7832, "step": 7565 }, { "epoch": 0.5126363574767938, "grad_norm": 7.885809898376465, "learning_rate": 9.067013484838114e-05, "loss": 0.7081, "step": 7566 }, { "epoch": 0.5127041127447659, "grad_norm": 6.188759803771973, "learning_rate": 9.066876582928332e-05, "loss": 0.9531, "step": 7567 }, { "epoch": 0.512771868012738, "grad_norm": 7.5221452713012695, "learning_rate": 9.06673968101855e-05, "loss": 0.9727, "step": 7568 }, { "epoch": 0.5128396232807101, "grad_norm": 6.461081027984619, "learning_rate": 9.06660277910877e-05, "loss": 0.9477, "step": 7569 }, { "epoch": 0.5129073785486822, "grad_norm": 7.04016637802124, "learning_rate": 9.066465877198988e-05, "loss": 0.7655, "step": 7570 }, { "epoch": 0.5129751338166543, "grad_norm": 6.721169471740723, "learning_rate": 9.066328975289206e-05, "loss": 0.7675, "step": 7571 }, { "epoch": 0.5130428890846264, "grad_norm": 6.552652835845947, "learning_rate": 9.066192073379424e-05, "loss": 0.6938, "step": 7572 }, { "epoch": 0.5131106443525985, "grad_norm": 5.2619452476501465, "learning_rate": 9.066055171469643e-05, "loss": 0.8072, "step": 7573 }, { "epoch": 0.5131783996205705, "grad_norm": 6.426028728485107, "learning_rate": 9.065918269559861e-05, "loss": 0.8989, "step": 7574 }, { "epoch": 0.5132461548885426, "grad_norm": 7.762108325958252, "learning_rate": 9.06578136765008e-05, "loss": 1.0512, "step": 7575 }, { "epoch": 0.5133139101565146, "grad_norm": 5.862361431121826, "learning_rate": 9.065644465740297e-05, "loss": 0.8233, "step": 7576 }, { "epoch": 0.5133816654244867, "grad_norm": 6.885676860809326, "learning_rate": 9.065507563830515e-05, "loss": 0.9328, "step": 7577 }, { "epoch": 0.5134494206924588, "grad_norm": 6.061431884765625, "learning_rate": 9.065370661920735e-05, "loss": 0.6901, "step": 7578 }, { "epoch": 0.5135171759604309, "grad_norm": 5.702830791473389, "learning_rate": 9.065233760010953e-05, "loss": 0.8137, "step": 7579 }, { "epoch": 0.513584931228403, "grad_norm": 7.937398910522461, "learning_rate": 9.065096858101171e-05, "loss": 1.1357, "step": 7580 }, { "epoch": 0.5136526864963751, "grad_norm": 6.353341102600098, "learning_rate": 9.064959956191389e-05, "loss": 0.8848, "step": 7581 }, { "epoch": 0.5137204417643472, "grad_norm": 5.204100608825684, "learning_rate": 9.064823054281608e-05, "loss": 0.6812, "step": 7582 }, { "epoch": 0.5137881970323193, "grad_norm": 6.726477146148682, "learning_rate": 9.064686152371826e-05, "loss": 0.8012, "step": 7583 }, { "epoch": 0.5138559523002914, "grad_norm": 8.397887229919434, "learning_rate": 9.064549250462044e-05, "loss": 0.6674, "step": 7584 }, { "epoch": 0.5139237075682634, "grad_norm": 6.773873329162598, "learning_rate": 9.064412348552262e-05, "loss": 0.7718, "step": 7585 }, { "epoch": 0.5139914628362355, "grad_norm": 5.611907005310059, "learning_rate": 9.06427544664248e-05, "loss": 0.9307, "step": 7586 }, { "epoch": 0.5140592181042076, "grad_norm": 7.3962225914001465, "learning_rate": 9.0641385447327e-05, "loss": 0.8438, "step": 7587 }, { "epoch": 0.5141269733721797, "grad_norm": 7.1288580894470215, "learning_rate": 9.064001642822918e-05, "loss": 0.8185, "step": 7588 }, { "epoch": 0.5141947286401518, "grad_norm": 6.204834461212158, "learning_rate": 9.063864740913136e-05, "loss": 0.8201, "step": 7589 }, { "epoch": 0.5142624839081239, "grad_norm": 6.229215145111084, "learning_rate": 9.063727839003354e-05, "loss": 0.811, "step": 7590 }, { "epoch": 0.514330239176096, "grad_norm": 7.3174285888671875, "learning_rate": 9.063590937093573e-05, "loss": 0.9745, "step": 7591 }, { "epoch": 0.514397994444068, "grad_norm": 7.100773334503174, "learning_rate": 9.063454035183791e-05, "loss": 0.8132, "step": 7592 }, { "epoch": 0.5144657497120401, "grad_norm": 7.439940929412842, "learning_rate": 9.06331713327401e-05, "loss": 1.042, "step": 7593 }, { "epoch": 0.5145335049800122, "grad_norm": 6.337569713592529, "learning_rate": 9.063180231364227e-05, "loss": 0.6381, "step": 7594 }, { "epoch": 0.5146012602479842, "grad_norm": 8.016763687133789, "learning_rate": 9.063043329454445e-05, "loss": 0.944, "step": 7595 }, { "epoch": 0.5146690155159563, "grad_norm": 7.090301036834717, "learning_rate": 9.062906427544665e-05, "loss": 1.0519, "step": 7596 }, { "epoch": 0.5147367707839284, "grad_norm": 6.24937629699707, "learning_rate": 9.062769525634883e-05, "loss": 0.916, "step": 7597 }, { "epoch": 0.5148045260519005, "grad_norm": 6.316051959991455, "learning_rate": 9.062632623725101e-05, "loss": 0.8483, "step": 7598 }, { "epoch": 0.5148722813198726, "grad_norm": 7.601284027099609, "learning_rate": 9.062495721815319e-05, "loss": 0.9532, "step": 7599 }, { "epoch": 0.5149400365878447, "grad_norm": 6.206179618835449, "learning_rate": 9.062358819905537e-05, "loss": 0.8767, "step": 7600 }, { "epoch": 0.5150077918558168, "grad_norm": 7.3104119300842285, "learning_rate": 9.062221917995756e-05, "loss": 0.9337, "step": 7601 }, { "epoch": 0.5150755471237889, "grad_norm": 7.43108606338501, "learning_rate": 9.062085016085974e-05, "loss": 0.9725, "step": 7602 }, { "epoch": 0.515143302391761, "grad_norm": 8.126846313476562, "learning_rate": 9.061948114176192e-05, "loss": 1.0367, "step": 7603 }, { "epoch": 0.5152110576597331, "grad_norm": 6.7592926025390625, "learning_rate": 9.061811212266412e-05, "loss": 0.9557, "step": 7604 }, { "epoch": 0.5152788129277052, "grad_norm": 5.046600341796875, "learning_rate": 9.06167431035663e-05, "loss": 0.601, "step": 7605 }, { "epoch": 0.5153465681956773, "grad_norm": 7.141878604888916, "learning_rate": 9.061537408446848e-05, "loss": 0.8938, "step": 7606 }, { "epoch": 0.5154143234636493, "grad_norm": 8.64307689666748, "learning_rate": 9.061400506537067e-05, "loss": 1.1668, "step": 7607 }, { "epoch": 0.5154820787316213, "grad_norm": 6.312736988067627, "learning_rate": 9.061263604627285e-05, "loss": 0.7902, "step": 7608 }, { "epoch": 0.5155498339995934, "grad_norm": 8.432990074157715, "learning_rate": 9.061126702717503e-05, "loss": 0.9889, "step": 7609 }, { "epoch": 0.5156175892675655, "grad_norm": 9.558629989624023, "learning_rate": 9.060989800807723e-05, "loss": 0.8032, "step": 7610 }, { "epoch": 0.5156853445355376, "grad_norm": 5.524839878082275, "learning_rate": 9.060852898897941e-05, "loss": 0.8536, "step": 7611 }, { "epoch": 0.5157530998035097, "grad_norm": 6.718236923217773, "learning_rate": 9.060715996988159e-05, "loss": 1.1837, "step": 7612 }, { "epoch": 0.5158208550714818, "grad_norm": 4.946925163269043, "learning_rate": 9.060579095078377e-05, "loss": 0.8149, "step": 7613 }, { "epoch": 0.5158886103394539, "grad_norm": 8.684269905090332, "learning_rate": 9.060442193168596e-05, "loss": 1.1112, "step": 7614 }, { "epoch": 0.515956365607426, "grad_norm": 5.708873271942139, "learning_rate": 9.060305291258814e-05, "loss": 0.9813, "step": 7615 }, { "epoch": 0.5160241208753981, "grad_norm": 7.8680419921875, "learning_rate": 9.060168389349032e-05, "loss": 0.8572, "step": 7616 }, { "epoch": 0.5160918761433702, "grad_norm": 7.092006206512451, "learning_rate": 9.06003148743925e-05, "loss": 1.0532, "step": 7617 }, { "epoch": 0.5161596314113422, "grad_norm": 6.504335880279541, "learning_rate": 9.059894585529468e-05, "loss": 0.969, "step": 7618 }, { "epoch": 0.5162273866793143, "grad_norm": 7.514725208282471, "learning_rate": 9.059757683619688e-05, "loss": 0.7966, "step": 7619 }, { "epoch": 0.5162951419472864, "grad_norm": 5.896969318389893, "learning_rate": 9.059620781709906e-05, "loss": 0.6818, "step": 7620 }, { "epoch": 0.5163628972152585, "grad_norm": 11.024290084838867, "learning_rate": 9.059483879800124e-05, "loss": 0.8365, "step": 7621 }, { "epoch": 0.5164306524832306, "grad_norm": 6.390562534332275, "learning_rate": 9.059346977890342e-05, "loss": 0.9039, "step": 7622 }, { "epoch": 0.5164984077512027, "grad_norm": 6.528719425201416, "learning_rate": 9.05921007598056e-05, "loss": 0.8762, "step": 7623 }, { "epoch": 0.5165661630191748, "grad_norm": 6.40498685836792, "learning_rate": 9.059073174070779e-05, "loss": 1.1455, "step": 7624 }, { "epoch": 0.5166339182871468, "grad_norm": 6.250789165496826, "learning_rate": 9.058936272160997e-05, "loss": 0.8012, "step": 7625 }, { "epoch": 0.5167016735551189, "grad_norm": 5.280569076538086, "learning_rate": 9.058799370251215e-05, "loss": 0.5803, "step": 7626 }, { "epoch": 0.516769428823091, "grad_norm": 6.143814563751221, "learning_rate": 9.058662468341433e-05, "loss": 0.8817, "step": 7627 }, { "epoch": 0.516837184091063, "grad_norm": 5.753292560577393, "learning_rate": 9.058525566431653e-05, "loss": 0.6251, "step": 7628 }, { "epoch": 0.5169049393590351, "grad_norm": 7.431488037109375, "learning_rate": 9.058388664521871e-05, "loss": 0.807, "step": 7629 }, { "epoch": 0.5169726946270072, "grad_norm": 7.9407548904418945, "learning_rate": 9.058251762612089e-05, "loss": 0.9708, "step": 7630 }, { "epoch": 0.5170404498949793, "grad_norm": 9.618108749389648, "learning_rate": 9.058114860702307e-05, "loss": 0.769, "step": 7631 }, { "epoch": 0.5171082051629514, "grad_norm": 7.05092716217041, "learning_rate": 9.057977958792525e-05, "loss": 0.8202, "step": 7632 }, { "epoch": 0.5171759604309235, "grad_norm": 8.478489875793457, "learning_rate": 9.057841056882744e-05, "loss": 0.836, "step": 7633 }, { "epoch": 0.5172437156988956, "grad_norm": 9.781081199645996, "learning_rate": 9.057704154972962e-05, "loss": 1.1411, "step": 7634 }, { "epoch": 0.5173114709668677, "grad_norm": 6.536010265350342, "learning_rate": 9.05756725306318e-05, "loss": 1.0199, "step": 7635 }, { "epoch": 0.5173792262348398, "grad_norm": 6.633424758911133, "learning_rate": 9.057430351153398e-05, "loss": 0.8167, "step": 7636 }, { "epoch": 0.5174469815028119, "grad_norm": 8.581771850585938, "learning_rate": 9.057293449243618e-05, "loss": 0.9926, "step": 7637 }, { "epoch": 0.517514736770784, "grad_norm": 5.538379669189453, "learning_rate": 9.057156547333836e-05, "loss": 0.7093, "step": 7638 }, { "epoch": 0.5175824920387561, "grad_norm": 6.037271022796631, "learning_rate": 9.057019645424054e-05, "loss": 0.9901, "step": 7639 }, { "epoch": 0.5176502473067282, "grad_norm": 5.663453578948975, "learning_rate": 9.056882743514272e-05, "loss": 0.929, "step": 7640 }, { "epoch": 0.5177180025747001, "grad_norm": 9.440011978149414, "learning_rate": 9.05674584160449e-05, "loss": 0.6656, "step": 7641 }, { "epoch": 0.5177857578426722, "grad_norm": 7.314742088317871, "learning_rate": 9.056608939694709e-05, "loss": 1.0248, "step": 7642 }, { "epoch": 0.5178535131106443, "grad_norm": 6.524215221405029, "learning_rate": 9.056472037784927e-05, "loss": 0.7533, "step": 7643 }, { "epoch": 0.5179212683786164, "grad_norm": 7.727807998657227, "learning_rate": 9.056335135875145e-05, "loss": 0.8106, "step": 7644 }, { "epoch": 0.5179890236465885, "grad_norm": 6.237912654876709, "learning_rate": 9.056198233965363e-05, "loss": 0.8731, "step": 7645 }, { "epoch": 0.5180567789145606, "grad_norm": 5.7623796463012695, "learning_rate": 9.056061332055583e-05, "loss": 0.7102, "step": 7646 }, { "epoch": 0.5181245341825327, "grad_norm": 7.946072578430176, "learning_rate": 9.055924430145801e-05, "loss": 0.9145, "step": 7647 }, { "epoch": 0.5181922894505048, "grad_norm": 6.882560729980469, "learning_rate": 9.055787528236019e-05, "loss": 0.9543, "step": 7648 }, { "epoch": 0.5182600447184769, "grad_norm": 5.425245761871338, "learning_rate": 9.055650626326237e-05, "loss": 0.5889, "step": 7649 }, { "epoch": 0.518327799986449, "grad_norm": 5.8002142906188965, "learning_rate": 9.055513724416456e-05, "loss": 0.8603, "step": 7650 }, { "epoch": 0.518395555254421, "grad_norm": 6.522377014160156, "learning_rate": 9.055376822506674e-05, "loss": 0.8288, "step": 7651 }, { "epoch": 0.5184633105223931, "grad_norm": 6.6606221199035645, "learning_rate": 9.055239920596892e-05, "loss": 0.8647, "step": 7652 }, { "epoch": 0.5185310657903652, "grad_norm": 7.66829776763916, "learning_rate": 9.055103018687112e-05, "loss": 1.0252, "step": 7653 }, { "epoch": 0.5185988210583373, "grad_norm": 5.306484222412109, "learning_rate": 9.05496611677733e-05, "loss": 0.7053, "step": 7654 }, { "epoch": 0.5186665763263094, "grad_norm": 5.434155464172363, "learning_rate": 9.054829214867548e-05, "loss": 0.7684, "step": 7655 }, { "epoch": 0.5187343315942815, "grad_norm": 6.3002495765686035, "learning_rate": 9.054692312957767e-05, "loss": 0.8797, "step": 7656 }, { "epoch": 0.5188020868622535, "grad_norm": 7.075455665588379, "learning_rate": 9.054555411047985e-05, "loss": 1.0045, "step": 7657 }, { "epoch": 0.5188698421302256, "grad_norm": 8.508553504943848, "learning_rate": 9.054418509138203e-05, "loss": 0.8166, "step": 7658 }, { "epoch": 0.5189375973981977, "grad_norm": 6.156308174133301, "learning_rate": 9.054281607228421e-05, "loss": 0.5456, "step": 7659 }, { "epoch": 0.5190053526661698, "grad_norm": 5.993220806121826, "learning_rate": 9.054144705318641e-05, "loss": 0.8783, "step": 7660 }, { "epoch": 0.5190731079341419, "grad_norm": 7.312931537628174, "learning_rate": 9.054007803408859e-05, "loss": 0.8758, "step": 7661 }, { "epoch": 0.519140863202114, "grad_norm": 6.825846195220947, "learning_rate": 9.053870901499077e-05, "loss": 0.9189, "step": 7662 }, { "epoch": 0.519208618470086, "grad_norm": 6.013767242431641, "learning_rate": 9.053733999589295e-05, "loss": 0.9579, "step": 7663 }, { "epoch": 0.5192763737380581, "grad_norm": 7.817983627319336, "learning_rate": 9.053597097679513e-05, "loss": 0.7481, "step": 7664 }, { "epoch": 0.5193441290060302, "grad_norm": 6.868429183959961, "learning_rate": 9.053460195769732e-05, "loss": 0.9323, "step": 7665 }, { "epoch": 0.5194118842740023, "grad_norm": 8.720475196838379, "learning_rate": 9.05332329385995e-05, "loss": 0.6936, "step": 7666 }, { "epoch": 0.5194796395419744, "grad_norm": 4.687426567077637, "learning_rate": 9.053186391950168e-05, "loss": 0.6088, "step": 7667 }, { "epoch": 0.5195473948099465, "grad_norm": 7.063420295715332, "learning_rate": 9.053049490040386e-05, "loss": 1.0478, "step": 7668 }, { "epoch": 0.5196151500779186, "grad_norm": 6.082928657531738, "learning_rate": 9.052912588130606e-05, "loss": 0.7404, "step": 7669 }, { "epoch": 0.5196829053458907, "grad_norm": 6.03659200668335, "learning_rate": 9.052775686220824e-05, "loss": 0.9515, "step": 7670 }, { "epoch": 0.5197506606138628, "grad_norm": 5.9596333503723145, "learning_rate": 9.052638784311042e-05, "loss": 0.789, "step": 7671 }, { "epoch": 0.5198184158818349, "grad_norm": 6.595233917236328, "learning_rate": 9.05250188240126e-05, "loss": 0.8488, "step": 7672 }, { "epoch": 0.519886171149807, "grad_norm": 5.341801643371582, "learning_rate": 9.052364980491478e-05, "loss": 0.7815, "step": 7673 }, { "epoch": 0.5199539264177789, "grad_norm": 9.91911506652832, "learning_rate": 9.052228078581697e-05, "loss": 0.9121, "step": 7674 }, { "epoch": 0.520021681685751, "grad_norm": 6.1603498458862305, "learning_rate": 9.052091176671915e-05, "loss": 0.9465, "step": 7675 }, { "epoch": 0.5200894369537231, "grad_norm": 7.671917915344238, "learning_rate": 9.051954274762133e-05, "loss": 0.8225, "step": 7676 }, { "epoch": 0.5201571922216952, "grad_norm": 7.148565769195557, "learning_rate": 9.051817372852351e-05, "loss": 0.6911, "step": 7677 }, { "epoch": 0.5202249474896673, "grad_norm": 5.975964546203613, "learning_rate": 9.05168047094257e-05, "loss": 0.6338, "step": 7678 }, { "epoch": 0.5202927027576394, "grad_norm": 6.8202924728393555, "learning_rate": 9.051543569032789e-05, "loss": 0.8961, "step": 7679 }, { "epoch": 0.5203604580256115, "grad_norm": 6.167017459869385, "learning_rate": 9.051406667123007e-05, "loss": 0.7479, "step": 7680 }, { "epoch": 0.5204282132935836, "grad_norm": 6.801616668701172, "learning_rate": 9.051269765213225e-05, "loss": 0.7939, "step": 7681 }, { "epoch": 0.5204959685615557, "grad_norm": 5.709447860717773, "learning_rate": 9.051132863303443e-05, "loss": 0.74, "step": 7682 }, { "epoch": 0.5205637238295278, "grad_norm": 7.522529602050781, "learning_rate": 9.050995961393662e-05, "loss": 0.8341, "step": 7683 }, { "epoch": 0.5206314790974998, "grad_norm": 7.840425491333008, "learning_rate": 9.05085905948388e-05, "loss": 0.6591, "step": 7684 }, { "epoch": 0.5206992343654719, "grad_norm": 5.461009979248047, "learning_rate": 9.050722157574098e-05, "loss": 0.6172, "step": 7685 }, { "epoch": 0.520766989633444, "grad_norm": 6.047004222869873, "learning_rate": 9.050585255664316e-05, "loss": 0.8012, "step": 7686 }, { "epoch": 0.5208347449014161, "grad_norm": 7.2090630531311035, "learning_rate": 9.050448353754534e-05, "loss": 0.9606, "step": 7687 }, { "epoch": 0.5209025001693882, "grad_norm": 6.153731822967529, "learning_rate": 9.050311451844754e-05, "loss": 0.8408, "step": 7688 }, { "epoch": 0.5209702554373603, "grad_norm": 6.858744144439697, "learning_rate": 9.050174549934972e-05, "loss": 0.9427, "step": 7689 }, { "epoch": 0.5210380107053323, "grad_norm": 6.979424476623535, "learning_rate": 9.05003764802519e-05, "loss": 0.7301, "step": 7690 }, { "epoch": 0.5211057659733044, "grad_norm": 6.714144706726074, "learning_rate": 9.049900746115408e-05, "loss": 1.0239, "step": 7691 }, { "epoch": 0.5211735212412765, "grad_norm": 6.382346153259277, "learning_rate": 9.049763844205627e-05, "loss": 0.7648, "step": 7692 }, { "epoch": 0.5212412765092486, "grad_norm": 6.722228050231934, "learning_rate": 9.049626942295845e-05, "loss": 0.5747, "step": 7693 }, { "epoch": 0.5213090317772207, "grad_norm": 8.618867874145508, "learning_rate": 9.049490040386063e-05, "loss": 0.9178, "step": 7694 }, { "epoch": 0.5213767870451927, "grad_norm": 7.99964714050293, "learning_rate": 9.049353138476281e-05, "loss": 0.989, "step": 7695 }, { "epoch": 0.5214445423131648, "grad_norm": 8.504440307617188, "learning_rate": 9.0492162365665e-05, "loss": 0.7935, "step": 7696 }, { "epoch": 0.5215122975811369, "grad_norm": 6.088517665863037, "learning_rate": 9.049079334656719e-05, "loss": 0.894, "step": 7697 }, { "epoch": 0.521580052849109, "grad_norm": 7.038356304168701, "learning_rate": 9.048942432746937e-05, "loss": 0.8796, "step": 7698 }, { "epoch": 0.5216478081170811, "grad_norm": 6.817752361297607, "learning_rate": 9.048805530837155e-05, "loss": 0.8724, "step": 7699 }, { "epoch": 0.5217155633850532, "grad_norm": 6.776190280914307, "learning_rate": 9.048668628927374e-05, "loss": 0.8975, "step": 7700 }, { "epoch": 0.5217833186530253, "grad_norm": 6.522316932678223, "learning_rate": 9.048531727017592e-05, "loss": 0.7861, "step": 7701 }, { "epoch": 0.5218510739209974, "grad_norm": 8.256267547607422, "learning_rate": 9.04839482510781e-05, "loss": 1.1714, "step": 7702 }, { "epoch": 0.5219188291889695, "grad_norm": 6.675931930541992, "learning_rate": 9.04825792319803e-05, "loss": 0.8718, "step": 7703 }, { "epoch": 0.5219865844569416, "grad_norm": 7.407254695892334, "learning_rate": 9.048121021288248e-05, "loss": 1.1478, "step": 7704 }, { "epoch": 0.5220543397249137, "grad_norm": 7.207369327545166, "learning_rate": 9.047984119378466e-05, "loss": 0.9569, "step": 7705 }, { "epoch": 0.5221220949928856, "grad_norm": 5.364320278167725, "learning_rate": 9.047847217468685e-05, "loss": 0.7651, "step": 7706 }, { "epoch": 0.5221898502608577, "grad_norm": 6.586124897003174, "learning_rate": 9.047710315558903e-05, "loss": 0.8322, "step": 7707 }, { "epoch": 0.5222576055288298, "grad_norm": 6.659854412078857, "learning_rate": 9.047573413649121e-05, "loss": 0.8912, "step": 7708 }, { "epoch": 0.5223253607968019, "grad_norm": 7.059124946594238, "learning_rate": 9.047436511739339e-05, "loss": 1.012, "step": 7709 }, { "epoch": 0.522393116064774, "grad_norm": 6.54586124420166, "learning_rate": 9.047299609829557e-05, "loss": 0.9704, "step": 7710 }, { "epoch": 0.5224608713327461, "grad_norm": 5.587647438049316, "learning_rate": 9.047162707919777e-05, "loss": 0.6115, "step": 7711 }, { "epoch": 0.5225286266007182, "grad_norm": 6.951663970947266, "learning_rate": 9.047025806009995e-05, "loss": 0.7751, "step": 7712 }, { "epoch": 0.5225963818686903, "grad_norm": 7.021738052368164, "learning_rate": 9.046888904100213e-05, "loss": 1.1632, "step": 7713 }, { "epoch": 0.5226641371366624, "grad_norm": 6.46588134765625, "learning_rate": 9.046752002190431e-05, "loss": 0.7517, "step": 7714 }, { "epoch": 0.5227318924046345, "grad_norm": 6.638917922973633, "learning_rate": 9.04661510028065e-05, "loss": 1.0171, "step": 7715 }, { "epoch": 0.5227996476726066, "grad_norm": 5.955547332763672, "learning_rate": 9.046478198370868e-05, "loss": 0.9812, "step": 7716 }, { "epoch": 0.5228674029405787, "grad_norm": 6.903220176696777, "learning_rate": 9.046341296461086e-05, "loss": 0.7894, "step": 7717 }, { "epoch": 0.5229351582085507, "grad_norm": 7.656357288360596, "learning_rate": 9.046204394551304e-05, "loss": 1.0162, "step": 7718 }, { "epoch": 0.5230029134765228, "grad_norm": 8.025165557861328, "learning_rate": 9.046067492641522e-05, "loss": 0.9158, "step": 7719 }, { "epoch": 0.5230706687444949, "grad_norm": 7.652121067047119, "learning_rate": 9.045930590731742e-05, "loss": 0.9347, "step": 7720 }, { "epoch": 0.523138424012467, "grad_norm": 7.028977870941162, "learning_rate": 9.04579368882196e-05, "loss": 0.9914, "step": 7721 }, { "epoch": 0.523206179280439, "grad_norm": 6.23792028427124, "learning_rate": 9.045656786912178e-05, "loss": 0.7154, "step": 7722 }, { "epoch": 0.5232739345484111, "grad_norm": 8.56432056427002, "learning_rate": 9.045519885002396e-05, "loss": 0.9627, "step": 7723 }, { "epoch": 0.5233416898163832, "grad_norm": 6.733086109161377, "learning_rate": 9.045382983092615e-05, "loss": 0.8932, "step": 7724 }, { "epoch": 0.5234094450843553, "grad_norm": 6.419559955596924, "learning_rate": 9.045246081182833e-05, "loss": 0.6503, "step": 7725 }, { "epoch": 0.5234772003523274, "grad_norm": 6.498406887054443, "learning_rate": 9.045109179273051e-05, "loss": 1.0333, "step": 7726 }, { "epoch": 0.5235449556202995, "grad_norm": 6.396055698394775, "learning_rate": 9.044972277363269e-05, "loss": 0.7628, "step": 7727 }, { "epoch": 0.5236127108882715, "grad_norm": 5.671381950378418, "learning_rate": 9.044835375453487e-05, "loss": 0.7091, "step": 7728 }, { "epoch": 0.5236804661562436, "grad_norm": 7.36892557144165, "learning_rate": 9.044698473543707e-05, "loss": 0.8571, "step": 7729 }, { "epoch": 0.5237482214242157, "grad_norm": 7.574361324310303, "learning_rate": 9.044561571633925e-05, "loss": 0.9784, "step": 7730 }, { "epoch": 0.5238159766921878, "grad_norm": 7.9222540855407715, "learning_rate": 9.044424669724143e-05, "loss": 0.8634, "step": 7731 }, { "epoch": 0.5238837319601599, "grad_norm": 6.013153076171875, "learning_rate": 9.044287767814361e-05, "loss": 0.9073, "step": 7732 }, { "epoch": 0.523951487228132, "grad_norm": 7.480499744415283, "learning_rate": 9.044150865904579e-05, "loss": 0.9001, "step": 7733 }, { "epoch": 0.5240192424961041, "grad_norm": 5.559460163116455, "learning_rate": 9.044013963994798e-05, "loss": 0.7644, "step": 7734 }, { "epoch": 0.5240869977640762, "grad_norm": 5.924765110015869, "learning_rate": 9.043877062085016e-05, "loss": 0.6714, "step": 7735 }, { "epoch": 0.5241547530320483, "grad_norm": 5.304020404815674, "learning_rate": 9.043740160175234e-05, "loss": 0.6169, "step": 7736 }, { "epoch": 0.5242225083000204, "grad_norm": 6.051050662994385, "learning_rate": 9.043603258265452e-05, "loss": 0.9661, "step": 7737 }, { "epoch": 0.5242902635679925, "grad_norm": 7.776313304901123, "learning_rate": 9.043466356355672e-05, "loss": 0.9922, "step": 7738 }, { "epoch": 0.5243580188359644, "grad_norm": 6.945296287536621, "learning_rate": 9.04332945444589e-05, "loss": 0.8607, "step": 7739 }, { "epoch": 0.5244257741039365, "grad_norm": 7.384977340698242, "learning_rate": 9.043192552536108e-05, "loss": 1.1737, "step": 7740 }, { "epoch": 0.5244935293719086, "grad_norm": 7.252483367919922, "learning_rate": 9.043055650626326e-05, "loss": 0.8508, "step": 7741 }, { "epoch": 0.5245612846398807, "grad_norm": 6.793081283569336, "learning_rate": 9.042918748716544e-05, "loss": 0.8632, "step": 7742 }, { "epoch": 0.5246290399078528, "grad_norm": 6.1907057762146, "learning_rate": 9.042781846806763e-05, "loss": 0.8447, "step": 7743 }, { "epoch": 0.5246967951758249, "grad_norm": 6.956373691558838, "learning_rate": 9.042644944896981e-05, "loss": 0.7821, "step": 7744 }, { "epoch": 0.524764550443797, "grad_norm": 6.5989484786987305, "learning_rate": 9.0425080429872e-05, "loss": 0.9129, "step": 7745 }, { "epoch": 0.5248323057117691, "grad_norm": 6.416126728057861, "learning_rate": 9.042371141077419e-05, "loss": 0.7622, "step": 7746 }, { "epoch": 0.5249000609797412, "grad_norm": 7.08583402633667, "learning_rate": 9.042234239167637e-05, "loss": 0.868, "step": 7747 }, { "epoch": 0.5249678162477133, "grad_norm": 6.688178539276123, "learning_rate": 9.042097337257855e-05, "loss": 0.8158, "step": 7748 }, { "epoch": 0.5250355715156854, "grad_norm": 6.829257488250732, "learning_rate": 9.041960435348074e-05, "loss": 1.0495, "step": 7749 }, { "epoch": 0.5251033267836575, "grad_norm": 9.319602966308594, "learning_rate": 9.041823533438292e-05, "loss": 1.1006, "step": 7750 }, { "epoch": 0.5251710820516295, "grad_norm": 5.709659576416016, "learning_rate": 9.04168663152851e-05, "loss": 0.8396, "step": 7751 }, { "epoch": 0.5252388373196016, "grad_norm": 7.805490493774414, "learning_rate": 9.04154972961873e-05, "loss": 0.8824, "step": 7752 }, { "epoch": 0.5253065925875737, "grad_norm": 6.146732330322266, "learning_rate": 9.041412827708948e-05, "loss": 0.6136, "step": 7753 }, { "epoch": 0.5253743478555458, "grad_norm": 7.149491310119629, "learning_rate": 9.041275925799166e-05, "loss": 0.8545, "step": 7754 }, { "epoch": 0.5254421031235178, "grad_norm": 6.13656759262085, "learning_rate": 9.041139023889384e-05, "loss": 0.6775, "step": 7755 }, { "epoch": 0.5255098583914899, "grad_norm": 7.198244571685791, "learning_rate": 9.041002121979602e-05, "loss": 0.8732, "step": 7756 }, { "epoch": 0.525577613659462, "grad_norm": 7.588368892669678, "learning_rate": 9.040865220069821e-05, "loss": 0.8651, "step": 7757 }, { "epoch": 0.5256453689274341, "grad_norm": 6.695016860961914, "learning_rate": 9.040728318160039e-05, "loss": 0.8592, "step": 7758 }, { "epoch": 0.5257131241954062, "grad_norm": 6.87295389175415, "learning_rate": 9.040591416250257e-05, "loss": 0.8187, "step": 7759 }, { "epoch": 0.5257808794633783, "grad_norm": 6.2048163414001465, "learning_rate": 9.040454514340475e-05, "loss": 0.9454, "step": 7760 }, { "epoch": 0.5258486347313504, "grad_norm": 6.1639933586120605, "learning_rate": 9.040317612430695e-05, "loss": 0.7686, "step": 7761 }, { "epoch": 0.5259163899993224, "grad_norm": 6.537137508392334, "learning_rate": 9.040180710520913e-05, "loss": 0.8203, "step": 7762 }, { "epoch": 0.5259841452672945, "grad_norm": 6.688928127288818, "learning_rate": 9.040043808611131e-05, "loss": 0.7651, "step": 7763 }, { "epoch": 0.5260519005352666, "grad_norm": 7.225207805633545, "learning_rate": 9.039906906701349e-05, "loss": 0.7461, "step": 7764 }, { "epoch": 0.5261196558032387, "grad_norm": 6.173831939697266, "learning_rate": 9.039770004791567e-05, "loss": 0.6654, "step": 7765 }, { "epoch": 0.5261874110712108, "grad_norm": 6.1730146408081055, "learning_rate": 9.039633102881786e-05, "loss": 0.7595, "step": 7766 }, { "epoch": 0.5262551663391829, "grad_norm": 6.485074520111084, "learning_rate": 9.039496200972004e-05, "loss": 0.8252, "step": 7767 }, { "epoch": 0.526322921607155, "grad_norm": 5.83876895904541, "learning_rate": 9.039359299062222e-05, "loss": 0.7358, "step": 7768 }, { "epoch": 0.5263906768751271, "grad_norm": 6.3718953132629395, "learning_rate": 9.03922239715244e-05, "loss": 0.8205, "step": 7769 }, { "epoch": 0.5264584321430992, "grad_norm": 7.796750068664551, "learning_rate": 9.03908549524266e-05, "loss": 1.285, "step": 7770 }, { "epoch": 0.5265261874110712, "grad_norm": 6.802064418792725, "learning_rate": 9.038948593332878e-05, "loss": 0.8769, "step": 7771 }, { "epoch": 0.5265939426790432, "grad_norm": 6.941751956939697, "learning_rate": 9.038811691423096e-05, "loss": 0.8375, "step": 7772 }, { "epoch": 0.5266616979470153, "grad_norm": 4.891567707061768, "learning_rate": 9.038674789513314e-05, "loss": 0.7047, "step": 7773 }, { "epoch": 0.5267294532149874, "grad_norm": 6.294151306152344, "learning_rate": 9.038537887603532e-05, "loss": 0.9026, "step": 7774 }, { "epoch": 0.5267972084829595, "grad_norm": 8.100536346435547, "learning_rate": 9.038400985693751e-05, "loss": 0.912, "step": 7775 }, { "epoch": 0.5268649637509316, "grad_norm": 6.022440433502197, "learning_rate": 9.038264083783969e-05, "loss": 0.9056, "step": 7776 }, { "epoch": 0.5269327190189037, "grad_norm": 5.4183454513549805, "learning_rate": 9.038127181874187e-05, "loss": 0.883, "step": 7777 }, { "epoch": 0.5270004742868758, "grad_norm": 7.119368076324463, "learning_rate": 9.037990279964405e-05, "loss": 0.8019, "step": 7778 }, { "epoch": 0.5270682295548479, "grad_norm": 7.654230117797852, "learning_rate": 9.037853378054625e-05, "loss": 0.9149, "step": 7779 }, { "epoch": 0.52713598482282, "grad_norm": 8.382317543029785, "learning_rate": 9.037716476144843e-05, "loss": 0.8678, "step": 7780 }, { "epoch": 0.5272037400907921, "grad_norm": 6.171057224273682, "learning_rate": 9.037579574235061e-05, "loss": 0.7199, "step": 7781 }, { "epoch": 0.5272714953587642, "grad_norm": 7.584953784942627, "learning_rate": 9.037442672325279e-05, "loss": 1.1612, "step": 7782 }, { "epoch": 0.5273392506267363, "grad_norm": 8.255087852478027, "learning_rate": 9.037305770415497e-05, "loss": 1.0615, "step": 7783 }, { "epoch": 0.5274070058947083, "grad_norm": 5.925463676452637, "learning_rate": 9.037168868505716e-05, "loss": 0.5921, "step": 7784 }, { "epoch": 0.5274747611626804, "grad_norm": 6.816803932189941, "learning_rate": 9.037031966595934e-05, "loss": 0.7954, "step": 7785 }, { "epoch": 0.5275425164306525, "grad_norm": 5.511174201965332, "learning_rate": 9.036895064686152e-05, "loss": 0.6013, "step": 7786 }, { "epoch": 0.5276102716986246, "grad_norm": 8.431069374084473, "learning_rate": 9.03675816277637e-05, "loss": 1.1493, "step": 7787 }, { "epoch": 0.5276780269665966, "grad_norm": 7.5087666511535645, "learning_rate": 9.036621260866588e-05, "loss": 0.9416, "step": 7788 }, { "epoch": 0.5277457822345687, "grad_norm": 6.478548049926758, "learning_rate": 9.036484358956808e-05, "loss": 0.7571, "step": 7789 }, { "epoch": 0.5278135375025408, "grad_norm": 6.319628715515137, "learning_rate": 9.036347457047026e-05, "loss": 1.076, "step": 7790 }, { "epoch": 0.5278812927705129, "grad_norm": 7.512424945831299, "learning_rate": 9.036210555137244e-05, "loss": 1.2705, "step": 7791 }, { "epoch": 0.527949048038485, "grad_norm": 9.076953887939453, "learning_rate": 9.036073653227463e-05, "loss": 0.723, "step": 7792 }, { "epoch": 0.5280168033064571, "grad_norm": 7.088381767272949, "learning_rate": 9.035936751317681e-05, "loss": 0.8198, "step": 7793 }, { "epoch": 0.5280845585744292, "grad_norm": 5.79082727432251, "learning_rate": 9.035799849407899e-05, "loss": 0.6164, "step": 7794 }, { "epoch": 0.5281523138424012, "grad_norm": 5.18831729888916, "learning_rate": 9.035662947498119e-05, "loss": 0.7178, "step": 7795 }, { "epoch": 0.5282200691103733, "grad_norm": 5.686639308929443, "learning_rate": 9.035526045588337e-05, "loss": 0.8515, "step": 7796 }, { "epoch": 0.5282878243783454, "grad_norm": 5.51866340637207, "learning_rate": 9.035389143678555e-05, "loss": 0.7451, "step": 7797 }, { "epoch": 0.5283555796463175, "grad_norm": 5.142022609710693, "learning_rate": 9.035252241768774e-05, "loss": 0.8063, "step": 7798 }, { "epoch": 0.5284233349142896, "grad_norm": 8.204577445983887, "learning_rate": 9.035115339858992e-05, "loss": 1.0189, "step": 7799 }, { "epoch": 0.5284910901822617, "grad_norm": 7.62236213684082, "learning_rate": 9.03497843794921e-05, "loss": 1.0905, "step": 7800 }, { "epoch": 0.5285588454502338, "grad_norm": 7.067042827606201, "learning_rate": 9.034841536039428e-05, "loss": 0.8651, "step": 7801 }, { "epoch": 0.5286266007182059, "grad_norm": 5.675161361694336, "learning_rate": 9.034704634129648e-05, "loss": 0.8717, "step": 7802 }, { "epoch": 0.528694355986178, "grad_norm": 6.024029731750488, "learning_rate": 9.034567732219866e-05, "loss": 0.8326, "step": 7803 }, { "epoch": 0.52876211125415, "grad_norm": 6.060807228088379, "learning_rate": 9.034430830310084e-05, "loss": 0.7887, "step": 7804 }, { "epoch": 0.528829866522122, "grad_norm": 7.034177780151367, "learning_rate": 9.034293928400302e-05, "loss": 0.8416, "step": 7805 }, { "epoch": 0.5288976217900941, "grad_norm": 6.663289546966553, "learning_rate": 9.03415702649052e-05, "loss": 0.8078, "step": 7806 }, { "epoch": 0.5289653770580662, "grad_norm": 5.6733880043029785, "learning_rate": 9.034020124580739e-05, "loss": 0.8106, "step": 7807 }, { "epoch": 0.5290331323260383, "grad_norm": 7.0584940910339355, "learning_rate": 9.033883222670957e-05, "loss": 0.8558, "step": 7808 }, { "epoch": 0.5291008875940104, "grad_norm": 7.715261459350586, "learning_rate": 9.033746320761175e-05, "loss": 0.9663, "step": 7809 }, { "epoch": 0.5291686428619825, "grad_norm": 6.987175464630127, "learning_rate": 9.033609418851393e-05, "loss": 0.8837, "step": 7810 }, { "epoch": 0.5292363981299546, "grad_norm": 6.454022407531738, "learning_rate": 9.033472516941611e-05, "loss": 0.7697, "step": 7811 }, { "epoch": 0.5293041533979267, "grad_norm": 6.083451271057129, "learning_rate": 9.03333561503183e-05, "loss": 0.7759, "step": 7812 }, { "epoch": 0.5293719086658988, "grad_norm": 6.736910343170166, "learning_rate": 9.033198713122049e-05, "loss": 0.7654, "step": 7813 }, { "epoch": 0.5294396639338709, "grad_norm": 7.494657516479492, "learning_rate": 9.033061811212267e-05, "loss": 0.9894, "step": 7814 }, { "epoch": 0.529507419201843, "grad_norm": 5.999954700469971, "learning_rate": 9.032924909302485e-05, "loss": 0.7583, "step": 7815 }, { "epoch": 0.5295751744698151, "grad_norm": 6.741001605987549, "learning_rate": 9.032788007392704e-05, "loss": 0.6704, "step": 7816 }, { "epoch": 0.5296429297377871, "grad_norm": 7.073209762573242, "learning_rate": 9.032651105482922e-05, "loss": 0.8104, "step": 7817 }, { "epoch": 0.5297106850057592, "grad_norm": 6.424438953399658, "learning_rate": 9.03251420357314e-05, "loss": 0.8042, "step": 7818 }, { "epoch": 0.5297784402737313, "grad_norm": 5.467334270477295, "learning_rate": 9.032377301663358e-05, "loss": 0.7108, "step": 7819 }, { "epoch": 0.5298461955417033, "grad_norm": 7.322316646575928, "learning_rate": 9.032240399753576e-05, "loss": 0.9461, "step": 7820 }, { "epoch": 0.5299139508096754, "grad_norm": 6.560715198516846, "learning_rate": 9.032103497843796e-05, "loss": 1.135, "step": 7821 }, { "epoch": 0.5299817060776475, "grad_norm": 6.125954627990723, "learning_rate": 9.031966595934014e-05, "loss": 1.0004, "step": 7822 }, { "epoch": 0.5300494613456196, "grad_norm": 6.26869535446167, "learning_rate": 9.031829694024232e-05, "loss": 0.7594, "step": 7823 }, { "epoch": 0.5301172166135917, "grad_norm": 7.5978193283081055, "learning_rate": 9.03169279211445e-05, "loss": 0.7051, "step": 7824 }, { "epoch": 0.5301849718815638, "grad_norm": 9.408920288085938, "learning_rate": 9.031555890204669e-05, "loss": 0.7884, "step": 7825 }, { "epoch": 0.5302527271495359, "grad_norm": 7.134509086608887, "learning_rate": 9.031418988294887e-05, "loss": 0.844, "step": 7826 }, { "epoch": 0.530320482417508, "grad_norm": 9.650331497192383, "learning_rate": 9.031282086385105e-05, "loss": 0.9636, "step": 7827 }, { "epoch": 0.53038823768548, "grad_norm": 8.131028175354004, "learning_rate": 9.031145184475323e-05, "loss": 1.1321, "step": 7828 }, { "epoch": 0.5304559929534521, "grad_norm": 6.363440036773682, "learning_rate": 9.031008282565541e-05, "loss": 0.7437, "step": 7829 }, { "epoch": 0.5305237482214242, "grad_norm": 6.6181440353393555, "learning_rate": 9.030871380655761e-05, "loss": 1.1998, "step": 7830 }, { "epoch": 0.5305915034893963, "grad_norm": 6.380220413208008, "learning_rate": 9.030734478745979e-05, "loss": 0.8995, "step": 7831 }, { "epoch": 0.5306592587573684, "grad_norm": 7.590890407562256, "learning_rate": 9.030597576836197e-05, "loss": 0.8736, "step": 7832 }, { "epoch": 0.5307270140253405, "grad_norm": 7.453338146209717, "learning_rate": 9.030460674926415e-05, "loss": 0.7418, "step": 7833 }, { "epoch": 0.5307947692933126, "grad_norm": 5.30055570602417, "learning_rate": 9.030323773016634e-05, "loss": 0.7002, "step": 7834 }, { "epoch": 0.5308625245612847, "grad_norm": 7.599035739898682, "learning_rate": 9.030186871106852e-05, "loss": 1.0054, "step": 7835 }, { "epoch": 0.5309302798292568, "grad_norm": 7.773530006408691, "learning_rate": 9.03004996919707e-05, "loss": 0.8603, "step": 7836 }, { "epoch": 0.5309980350972288, "grad_norm": 7.378914833068848, "learning_rate": 9.029913067287288e-05, "loss": 0.8082, "step": 7837 }, { "epoch": 0.5310657903652009, "grad_norm": 5.327150344848633, "learning_rate": 9.029776165377508e-05, "loss": 0.7642, "step": 7838 }, { "epoch": 0.5311335456331729, "grad_norm": 6.01016092300415, "learning_rate": 9.029639263467726e-05, "loss": 1.0466, "step": 7839 }, { "epoch": 0.531201300901145, "grad_norm": 4.763113498687744, "learning_rate": 9.029502361557944e-05, "loss": 0.7451, "step": 7840 }, { "epoch": 0.5312690561691171, "grad_norm": 5.175201416015625, "learning_rate": 9.029365459648163e-05, "loss": 0.6928, "step": 7841 }, { "epoch": 0.5313368114370892, "grad_norm": 9.04288387298584, "learning_rate": 9.029228557738381e-05, "loss": 0.7087, "step": 7842 }, { "epoch": 0.5314045667050613, "grad_norm": 6.132844924926758, "learning_rate": 9.029091655828599e-05, "loss": 0.8231, "step": 7843 }, { "epoch": 0.5314723219730334, "grad_norm": 7.074441909790039, "learning_rate": 9.028954753918819e-05, "loss": 0.7966, "step": 7844 }, { "epoch": 0.5315400772410055, "grad_norm": 7.046668529510498, "learning_rate": 9.028817852009037e-05, "loss": 0.9588, "step": 7845 }, { "epoch": 0.5316078325089776, "grad_norm": 6.8423662185668945, "learning_rate": 9.028680950099255e-05, "loss": 0.9413, "step": 7846 }, { "epoch": 0.5316755877769497, "grad_norm": 5.093846797943115, "learning_rate": 9.028544048189473e-05, "loss": 0.6967, "step": 7847 }, { "epoch": 0.5317433430449218, "grad_norm": 7.116734027862549, "learning_rate": 9.028407146279692e-05, "loss": 0.8084, "step": 7848 }, { "epoch": 0.5318110983128939, "grad_norm": 6.06544828414917, "learning_rate": 9.02827024436991e-05, "loss": 0.8041, "step": 7849 }, { "epoch": 0.531878853580866, "grad_norm": 6.238218307495117, "learning_rate": 9.028133342460128e-05, "loss": 0.9562, "step": 7850 }, { "epoch": 0.531946608848838, "grad_norm": 5.939441680908203, "learning_rate": 9.027996440550346e-05, "loss": 0.8226, "step": 7851 }, { "epoch": 0.5320143641168101, "grad_norm": 5.409734725952148, "learning_rate": 9.027859538640564e-05, "loss": 0.7485, "step": 7852 }, { "epoch": 0.5320821193847821, "grad_norm": 5.72265625, "learning_rate": 9.027722636730784e-05, "loss": 0.691, "step": 7853 }, { "epoch": 0.5321498746527542, "grad_norm": 5.743767261505127, "learning_rate": 9.027585734821002e-05, "loss": 0.8193, "step": 7854 }, { "epoch": 0.5322176299207263, "grad_norm": 7.155900955200195, "learning_rate": 9.02744883291122e-05, "loss": 1.1046, "step": 7855 }, { "epoch": 0.5322853851886984, "grad_norm": 5.2816996574401855, "learning_rate": 9.027311931001438e-05, "loss": 0.5548, "step": 7856 }, { "epoch": 0.5323531404566705, "grad_norm": 8.357234001159668, "learning_rate": 9.027175029091657e-05, "loss": 0.9597, "step": 7857 }, { "epoch": 0.5324208957246426, "grad_norm": 6.937591075897217, "learning_rate": 9.027038127181875e-05, "loss": 1.0591, "step": 7858 }, { "epoch": 0.5324886509926147, "grad_norm": 9.02571964263916, "learning_rate": 9.026901225272093e-05, "loss": 0.9186, "step": 7859 }, { "epoch": 0.5325564062605868, "grad_norm": 6.292126178741455, "learning_rate": 9.026764323362311e-05, "loss": 0.9691, "step": 7860 }, { "epoch": 0.5326241615285588, "grad_norm": 6.714123725891113, "learning_rate": 9.026627421452529e-05, "loss": 0.6972, "step": 7861 }, { "epoch": 0.5326919167965309, "grad_norm": 8.798256874084473, "learning_rate": 9.026490519542749e-05, "loss": 1.237, "step": 7862 }, { "epoch": 0.532759672064503, "grad_norm": 7.367827892303467, "learning_rate": 9.026353617632967e-05, "loss": 0.8725, "step": 7863 }, { "epoch": 0.5328274273324751, "grad_norm": 7.99265193939209, "learning_rate": 9.026216715723185e-05, "loss": 0.7761, "step": 7864 }, { "epoch": 0.5328951826004472, "grad_norm": 6.861504554748535, "learning_rate": 9.026079813813403e-05, "loss": 1.0259, "step": 7865 }, { "epoch": 0.5329629378684193, "grad_norm": 6.661327838897705, "learning_rate": 9.025942911903621e-05, "loss": 0.7245, "step": 7866 }, { "epoch": 0.5330306931363914, "grad_norm": 6.973964691162109, "learning_rate": 9.02580600999384e-05, "loss": 1.0628, "step": 7867 }, { "epoch": 0.5330984484043635, "grad_norm": 6.732471466064453, "learning_rate": 9.025669108084058e-05, "loss": 0.7972, "step": 7868 }, { "epoch": 0.5331662036723355, "grad_norm": 5.995905876159668, "learning_rate": 9.025532206174276e-05, "loss": 0.831, "step": 7869 }, { "epoch": 0.5332339589403076, "grad_norm": 7.870401382446289, "learning_rate": 9.025395304264494e-05, "loss": 0.9793, "step": 7870 }, { "epoch": 0.5333017142082797, "grad_norm": 5.577757835388184, "learning_rate": 9.025258402354714e-05, "loss": 0.7032, "step": 7871 }, { "epoch": 0.5333694694762517, "grad_norm": 7.106391429901123, "learning_rate": 9.025121500444932e-05, "loss": 0.8424, "step": 7872 }, { "epoch": 0.5334372247442238, "grad_norm": 5.373198986053467, "learning_rate": 9.02498459853515e-05, "loss": 0.8357, "step": 7873 }, { "epoch": 0.5335049800121959, "grad_norm": 6.655058860778809, "learning_rate": 9.024847696625368e-05, "loss": 1.0405, "step": 7874 }, { "epoch": 0.533572735280168, "grad_norm": 5.896321773529053, "learning_rate": 9.024710794715586e-05, "loss": 1.0676, "step": 7875 }, { "epoch": 0.5336404905481401, "grad_norm": 7.391895771026611, "learning_rate": 9.024573892805805e-05, "loss": 0.9807, "step": 7876 }, { "epoch": 0.5337082458161122, "grad_norm": 8.472131729125977, "learning_rate": 9.024436990896023e-05, "loss": 1.0328, "step": 7877 }, { "epoch": 0.5337760010840843, "grad_norm": 5.47896671295166, "learning_rate": 9.024300088986241e-05, "loss": 0.6698, "step": 7878 }, { "epoch": 0.5338437563520564, "grad_norm": 7.560204982757568, "learning_rate": 9.024163187076459e-05, "loss": 0.7431, "step": 7879 }, { "epoch": 0.5339115116200285, "grad_norm": 7.789547920227051, "learning_rate": 9.024026285166679e-05, "loss": 0.7852, "step": 7880 }, { "epoch": 0.5339792668880006, "grad_norm": 6.868436336517334, "learning_rate": 9.023889383256897e-05, "loss": 0.8807, "step": 7881 }, { "epoch": 0.5340470221559727, "grad_norm": 5.439123153686523, "learning_rate": 9.023752481347115e-05, "loss": 0.8914, "step": 7882 }, { "epoch": 0.5341147774239448, "grad_norm": 6.825755596160889, "learning_rate": 9.023615579437333e-05, "loss": 0.9643, "step": 7883 }, { "epoch": 0.5341825326919168, "grad_norm": 8.002971649169922, "learning_rate": 9.023478677527552e-05, "loss": 0.8881, "step": 7884 }, { "epoch": 0.5342502879598889, "grad_norm": 6.1144022941589355, "learning_rate": 9.02334177561777e-05, "loss": 0.9887, "step": 7885 }, { "epoch": 0.5343180432278609, "grad_norm": 6.414967060089111, "learning_rate": 9.023204873707988e-05, "loss": 0.8882, "step": 7886 }, { "epoch": 0.534385798495833, "grad_norm": 6.222415447235107, "learning_rate": 9.023067971798208e-05, "loss": 0.7824, "step": 7887 }, { "epoch": 0.5344535537638051, "grad_norm": 6.128655910491943, "learning_rate": 9.022931069888426e-05, "loss": 1.0061, "step": 7888 }, { "epoch": 0.5345213090317772, "grad_norm": 8.61054515838623, "learning_rate": 9.022794167978644e-05, "loss": 0.8921, "step": 7889 }, { "epoch": 0.5345890642997493, "grad_norm": 7.387160301208496, "learning_rate": 9.022657266068863e-05, "loss": 1.0001, "step": 7890 }, { "epoch": 0.5346568195677214, "grad_norm": 5.878726005554199, "learning_rate": 9.022520364159081e-05, "loss": 0.8865, "step": 7891 }, { "epoch": 0.5347245748356935, "grad_norm": 6.011378288269043, "learning_rate": 9.022383462249299e-05, "loss": 0.8961, "step": 7892 }, { "epoch": 0.5347923301036656, "grad_norm": 6.258980751037598, "learning_rate": 9.022246560339517e-05, "loss": 0.8744, "step": 7893 }, { "epoch": 0.5348600853716377, "grad_norm": 8.403583526611328, "learning_rate": 9.022109658429737e-05, "loss": 0.7601, "step": 7894 }, { "epoch": 0.5349278406396097, "grad_norm": 9.266546249389648, "learning_rate": 9.021972756519955e-05, "loss": 1.0684, "step": 7895 }, { "epoch": 0.5349955959075818, "grad_norm": 6.160810470581055, "learning_rate": 9.021835854610173e-05, "loss": 0.6533, "step": 7896 }, { "epoch": 0.5350633511755539, "grad_norm": 6.975162982940674, "learning_rate": 9.02169895270039e-05, "loss": 0.8974, "step": 7897 }, { "epoch": 0.535131106443526, "grad_norm": 6.557673931121826, "learning_rate": 9.021562050790609e-05, "loss": 0.8291, "step": 7898 }, { "epoch": 0.5351988617114981, "grad_norm": 6.393524169921875, "learning_rate": 9.021425148880828e-05, "loss": 0.8898, "step": 7899 }, { "epoch": 0.5352666169794702, "grad_norm": 6.089913368225098, "learning_rate": 9.021288246971046e-05, "loss": 0.9246, "step": 7900 }, { "epoch": 0.5353343722474423, "grad_norm": 5.542110919952393, "learning_rate": 9.021151345061264e-05, "loss": 0.7785, "step": 7901 }, { "epoch": 0.5354021275154143, "grad_norm": 6.648521900177002, "learning_rate": 9.021014443151482e-05, "loss": 0.7992, "step": 7902 }, { "epoch": 0.5354698827833864, "grad_norm": 5.972752571105957, "learning_rate": 9.020877541241702e-05, "loss": 0.8436, "step": 7903 }, { "epoch": 0.5355376380513585, "grad_norm": 8.538671493530273, "learning_rate": 9.02074063933192e-05, "loss": 0.7983, "step": 7904 }, { "epoch": 0.5356053933193305, "grad_norm": 6.8005690574646, "learning_rate": 9.020603737422138e-05, "loss": 0.6566, "step": 7905 }, { "epoch": 0.5356731485873026, "grad_norm": 9.595316886901855, "learning_rate": 9.020466835512356e-05, "loss": 0.9392, "step": 7906 }, { "epoch": 0.5357409038552747, "grad_norm": 6.57702112197876, "learning_rate": 9.020329933602574e-05, "loss": 0.8176, "step": 7907 }, { "epoch": 0.5358086591232468, "grad_norm": 6.209599018096924, "learning_rate": 9.020193031692793e-05, "loss": 0.8261, "step": 7908 }, { "epoch": 0.5358764143912189, "grad_norm": 8.036966323852539, "learning_rate": 9.020056129783011e-05, "loss": 0.8524, "step": 7909 }, { "epoch": 0.535944169659191, "grad_norm": 7.813173770904541, "learning_rate": 9.019919227873229e-05, "loss": 1.0166, "step": 7910 }, { "epoch": 0.5360119249271631, "grad_norm": 6.065460205078125, "learning_rate": 9.019782325963447e-05, "loss": 0.9869, "step": 7911 }, { "epoch": 0.5360796801951352, "grad_norm": 5.9636993408203125, "learning_rate": 9.019645424053667e-05, "loss": 0.8227, "step": 7912 }, { "epoch": 0.5361474354631073, "grad_norm": 7.270611763000488, "learning_rate": 9.019508522143885e-05, "loss": 0.7653, "step": 7913 }, { "epoch": 0.5362151907310794, "grad_norm": 6.054271221160889, "learning_rate": 9.019371620234103e-05, "loss": 0.6753, "step": 7914 }, { "epoch": 0.5362829459990515, "grad_norm": 6.736015796661377, "learning_rate": 9.019234718324321e-05, "loss": 0.868, "step": 7915 }, { "epoch": 0.5363507012670236, "grad_norm": 6.971399307250977, "learning_rate": 9.019097816414539e-05, "loss": 0.9097, "step": 7916 }, { "epoch": 0.5364184565349956, "grad_norm": 7.001399040222168, "learning_rate": 9.018960914504758e-05, "loss": 0.9245, "step": 7917 }, { "epoch": 0.5364862118029676, "grad_norm": 6.465878009796143, "learning_rate": 9.018824012594976e-05, "loss": 0.8308, "step": 7918 }, { "epoch": 0.5365539670709397, "grad_norm": 6.622878551483154, "learning_rate": 9.018687110685194e-05, "loss": 0.6466, "step": 7919 }, { "epoch": 0.5366217223389118, "grad_norm": 7.167222023010254, "learning_rate": 9.018550208775412e-05, "loss": 0.9521, "step": 7920 }, { "epoch": 0.5366894776068839, "grad_norm": 7.362137794494629, "learning_rate": 9.01841330686563e-05, "loss": 0.7561, "step": 7921 }, { "epoch": 0.536757232874856, "grad_norm": 6.314590930938721, "learning_rate": 9.01827640495585e-05, "loss": 0.8892, "step": 7922 }, { "epoch": 0.5368249881428281, "grad_norm": 5.427424430847168, "learning_rate": 9.018139503046068e-05, "loss": 0.7648, "step": 7923 }, { "epoch": 0.5368927434108002, "grad_norm": 8.388633728027344, "learning_rate": 9.018002601136286e-05, "loss": 1.0141, "step": 7924 }, { "epoch": 0.5369604986787723, "grad_norm": 5.852472305297852, "learning_rate": 9.017865699226504e-05, "loss": 0.7744, "step": 7925 }, { "epoch": 0.5370282539467444, "grad_norm": 6.68207311630249, "learning_rate": 9.017728797316723e-05, "loss": 0.9003, "step": 7926 }, { "epoch": 0.5370960092147165, "grad_norm": 6.475498199462891, "learning_rate": 9.017591895406941e-05, "loss": 1.0294, "step": 7927 }, { "epoch": 0.5371637644826885, "grad_norm": 8.690805435180664, "learning_rate": 9.017454993497159e-05, "loss": 1.0175, "step": 7928 }, { "epoch": 0.5372315197506606, "grad_norm": 5.874716758728027, "learning_rate": 9.017318091587377e-05, "loss": 0.7761, "step": 7929 }, { "epoch": 0.5372992750186327, "grad_norm": 5.937534332275391, "learning_rate": 9.017181189677595e-05, "loss": 0.6782, "step": 7930 }, { "epoch": 0.5373670302866048, "grad_norm": 6.882140159606934, "learning_rate": 9.017044287767815e-05, "loss": 0.8815, "step": 7931 }, { "epoch": 0.5374347855545769, "grad_norm": 5.27649450302124, "learning_rate": 9.016907385858033e-05, "loss": 0.6816, "step": 7932 }, { "epoch": 0.537502540822549, "grad_norm": 5.861900329589844, "learning_rate": 9.016770483948251e-05, "loss": 0.7224, "step": 7933 }, { "epoch": 0.537570296090521, "grad_norm": 5.196268558502197, "learning_rate": 9.01663358203847e-05, "loss": 0.7526, "step": 7934 }, { "epoch": 0.5376380513584931, "grad_norm": 7.050784111022949, "learning_rate": 9.016496680128688e-05, "loss": 0.7451, "step": 7935 }, { "epoch": 0.5377058066264652, "grad_norm": 6.952356338500977, "learning_rate": 9.016359778218906e-05, "loss": 0.8074, "step": 7936 }, { "epoch": 0.5377735618944373, "grad_norm": 6.38202428817749, "learning_rate": 9.016222876309126e-05, "loss": 0.8738, "step": 7937 }, { "epoch": 0.5378413171624093, "grad_norm": 7.252553462982178, "learning_rate": 9.016085974399344e-05, "loss": 1.001, "step": 7938 }, { "epoch": 0.5379090724303814, "grad_norm": 7.240688323974609, "learning_rate": 9.015949072489562e-05, "loss": 0.818, "step": 7939 }, { "epoch": 0.5379768276983535, "grad_norm": 8.255992889404297, "learning_rate": 9.015812170579781e-05, "loss": 0.9443, "step": 7940 }, { "epoch": 0.5380445829663256, "grad_norm": 8.154399871826172, "learning_rate": 9.015675268669999e-05, "loss": 1.2508, "step": 7941 }, { "epoch": 0.5381123382342977, "grad_norm": 6.9914069175720215, "learning_rate": 9.015538366760217e-05, "loss": 0.8552, "step": 7942 }, { "epoch": 0.5381800935022698, "grad_norm": 6.447218894958496, "learning_rate": 9.015401464850435e-05, "loss": 0.9249, "step": 7943 }, { "epoch": 0.5382478487702419, "grad_norm": 6.867689609527588, "learning_rate": 9.015264562940653e-05, "loss": 1.0145, "step": 7944 }, { "epoch": 0.538315604038214, "grad_norm": 5.5277419090271, "learning_rate": 9.015127661030873e-05, "loss": 0.5239, "step": 7945 }, { "epoch": 0.5383833593061861, "grad_norm": 6.092846393585205, "learning_rate": 9.01499075912109e-05, "loss": 0.7933, "step": 7946 }, { "epoch": 0.5384511145741582, "grad_norm": 5.636915683746338, "learning_rate": 9.014853857211309e-05, "loss": 1.0177, "step": 7947 }, { "epoch": 0.5385188698421303, "grad_norm": 6.031810760498047, "learning_rate": 9.014716955301527e-05, "loss": 0.7189, "step": 7948 }, { "epoch": 0.5385866251101024, "grad_norm": 6.822633266448975, "learning_rate": 9.014580053391746e-05, "loss": 0.8511, "step": 7949 }, { "epoch": 0.5386543803780744, "grad_norm": 6.105456352233887, "learning_rate": 9.014443151481964e-05, "loss": 0.8941, "step": 7950 }, { "epoch": 0.5387221356460464, "grad_norm": 6.250092029571533, "learning_rate": 9.014306249572182e-05, "loss": 0.7968, "step": 7951 }, { "epoch": 0.5387898909140185, "grad_norm": 7.53422212600708, "learning_rate": 9.0141693476624e-05, "loss": 0.7581, "step": 7952 }, { "epoch": 0.5388576461819906, "grad_norm": 5.800909042358398, "learning_rate": 9.014032445752618e-05, "loss": 0.5946, "step": 7953 }, { "epoch": 0.5389254014499627, "grad_norm": 6.202663898468018, "learning_rate": 9.013895543842838e-05, "loss": 0.9158, "step": 7954 }, { "epoch": 0.5389931567179348, "grad_norm": 6.153894424438477, "learning_rate": 9.013758641933056e-05, "loss": 0.921, "step": 7955 }, { "epoch": 0.5390609119859069, "grad_norm": 7.0276384353637695, "learning_rate": 9.013621740023274e-05, "loss": 0.8353, "step": 7956 }, { "epoch": 0.539128667253879, "grad_norm": 7.378089427947998, "learning_rate": 9.013484838113492e-05, "loss": 0.7336, "step": 7957 }, { "epoch": 0.5391964225218511, "grad_norm": 7.097971439361572, "learning_rate": 9.013347936203711e-05, "loss": 0.7911, "step": 7958 }, { "epoch": 0.5392641777898232, "grad_norm": 6.771259307861328, "learning_rate": 9.013211034293929e-05, "loss": 0.8515, "step": 7959 }, { "epoch": 0.5393319330577953, "grad_norm": 6.949087142944336, "learning_rate": 9.013074132384147e-05, "loss": 1.0146, "step": 7960 }, { "epoch": 0.5393996883257673, "grad_norm": 7.584465026855469, "learning_rate": 9.012937230474365e-05, "loss": 0.681, "step": 7961 }, { "epoch": 0.5394674435937394, "grad_norm": 7.227475643157959, "learning_rate": 9.012800328564583e-05, "loss": 1.085, "step": 7962 }, { "epoch": 0.5395351988617115, "grad_norm": 5.863607406616211, "learning_rate": 9.012663426654803e-05, "loss": 0.7184, "step": 7963 }, { "epoch": 0.5396029541296836, "grad_norm": 6.440633296966553, "learning_rate": 9.01252652474502e-05, "loss": 0.7896, "step": 7964 }, { "epoch": 0.5396707093976557, "grad_norm": 6.37113094329834, "learning_rate": 9.012389622835239e-05, "loss": 0.8235, "step": 7965 }, { "epoch": 0.5397384646656278, "grad_norm": 7.00465202331543, "learning_rate": 9.012252720925457e-05, "loss": 0.9816, "step": 7966 }, { "epoch": 0.5398062199335998, "grad_norm": 6.948329925537109, "learning_rate": 9.012115819015676e-05, "loss": 0.721, "step": 7967 }, { "epoch": 0.5398739752015719, "grad_norm": 7.836682319641113, "learning_rate": 9.011978917105894e-05, "loss": 0.8239, "step": 7968 }, { "epoch": 0.539941730469544, "grad_norm": 7.849715232849121, "learning_rate": 9.011842015196112e-05, "loss": 0.9537, "step": 7969 }, { "epoch": 0.5400094857375161, "grad_norm": 6.9721221923828125, "learning_rate": 9.01170511328633e-05, "loss": 0.8685, "step": 7970 }, { "epoch": 0.5400772410054882, "grad_norm": 6.895398139953613, "learning_rate": 9.011568211376548e-05, "loss": 0.7817, "step": 7971 }, { "epoch": 0.5401449962734602, "grad_norm": 6.040884017944336, "learning_rate": 9.011431309466768e-05, "loss": 0.7531, "step": 7972 }, { "epoch": 0.5402127515414323, "grad_norm": 8.733409881591797, "learning_rate": 9.011294407556986e-05, "loss": 1.1737, "step": 7973 }, { "epoch": 0.5402805068094044, "grad_norm": 7.4223761558532715, "learning_rate": 9.011157505647204e-05, "loss": 0.7954, "step": 7974 }, { "epoch": 0.5403482620773765, "grad_norm": 6.471921443939209, "learning_rate": 9.011020603737422e-05, "loss": 0.8795, "step": 7975 }, { "epoch": 0.5404160173453486, "grad_norm": 7.537938594818115, "learning_rate": 9.01088370182764e-05, "loss": 0.888, "step": 7976 }, { "epoch": 0.5404837726133207, "grad_norm": 7.756222724914551, "learning_rate": 9.010746799917859e-05, "loss": 1.0905, "step": 7977 }, { "epoch": 0.5405515278812928, "grad_norm": 6.659689426422119, "learning_rate": 9.010609898008077e-05, "loss": 0.7667, "step": 7978 }, { "epoch": 0.5406192831492649, "grad_norm": 9.589966773986816, "learning_rate": 9.010472996098295e-05, "loss": 0.9853, "step": 7979 }, { "epoch": 0.540687038417237, "grad_norm": 7.351951599121094, "learning_rate": 9.010336094188515e-05, "loss": 0.7348, "step": 7980 }, { "epoch": 0.5407547936852091, "grad_norm": 7.74218225479126, "learning_rate": 9.010199192278733e-05, "loss": 1.1712, "step": 7981 }, { "epoch": 0.5408225489531812, "grad_norm": 6.658298015594482, "learning_rate": 9.01006229036895e-05, "loss": 0.8334, "step": 7982 }, { "epoch": 0.5408903042211531, "grad_norm": 6.736339092254639, "learning_rate": 9.00992538845917e-05, "loss": 0.9479, "step": 7983 }, { "epoch": 0.5409580594891252, "grad_norm": 6.63740873336792, "learning_rate": 9.009788486549388e-05, "loss": 0.9086, "step": 7984 }, { "epoch": 0.5410258147570973, "grad_norm": 6.095336437225342, "learning_rate": 9.009651584639606e-05, "loss": 0.8407, "step": 7985 }, { "epoch": 0.5410935700250694, "grad_norm": 5.067421913146973, "learning_rate": 9.009514682729826e-05, "loss": 0.8147, "step": 7986 }, { "epoch": 0.5411613252930415, "grad_norm": 5.876379013061523, "learning_rate": 9.009377780820044e-05, "loss": 0.8639, "step": 7987 }, { "epoch": 0.5412290805610136, "grad_norm": 6.525248050689697, "learning_rate": 9.009240878910262e-05, "loss": 0.7483, "step": 7988 }, { "epoch": 0.5412968358289857, "grad_norm": 7.615539073944092, "learning_rate": 9.00910397700048e-05, "loss": 0.8863, "step": 7989 }, { "epoch": 0.5413645910969578, "grad_norm": 7.767553329467773, "learning_rate": 9.008967075090699e-05, "loss": 0.8501, "step": 7990 }, { "epoch": 0.5414323463649299, "grad_norm": 7.159237384796143, "learning_rate": 9.008830173180917e-05, "loss": 0.8759, "step": 7991 }, { "epoch": 0.541500101632902, "grad_norm": 6.989110469818115, "learning_rate": 9.008693271271135e-05, "loss": 0.9121, "step": 7992 }, { "epoch": 0.5415678569008741, "grad_norm": 6.767205238342285, "learning_rate": 9.008556369361353e-05, "loss": 0.6788, "step": 7993 }, { "epoch": 0.5416356121688461, "grad_norm": 5.92645788192749, "learning_rate": 9.008419467451571e-05, "loss": 0.6165, "step": 7994 }, { "epoch": 0.5417033674368182, "grad_norm": 5.536478042602539, "learning_rate": 9.00828256554179e-05, "loss": 0.6475, "step": 7995 }, { "epoch": 0.5417711227047903, "grad_norm": 6.186464786529541, "learning_rate": 9.008145663632009e-05, "loss": 0.6399, "step": 7996 }, { "epoch": 0.5418388779727624, "grad_norm": 6.986947536468506, "learning_rate": 9.008008761722227e-05, "loss": 0.8797, "step": 7997 }, { "epoch": 0.5419066332407345, "grad_norm": 7.718267917633057, "learning_rate": 9.007871859812445e-05, "loss": 0.8077, "step": 7998 }, { "epoch": 0.5419743885087066, "grad_norm": 6.4064483642578125, "learning_rate": 9.007734957902663e-05, "loss": 0.7077, "step": 7999 }, { "epoch": 0.5420421437766786, "grad_norm": 7.1472320556640625, "learning_rate": 9.007598055992882e-05, "loss": 0.8007, "step": 8000 }, { "epoch": 0.5421098990446507, "grad_norm": 7.518289566040039, "learning_rate": 9.0074611540831e-05, "loss": 0.8132, "step": 8001 }, { "epoch": 0.5421776543126228, "grad_norm": 6.509357929229736, "learning_rate": 9.007324252173318e-05, "loss": 0.8157, "step": 8002 }, { "epoch": 0.5422454095805949, "grad_norm": 6.129663467407227, "learning_rate": 9.007187350263536e-05, "loss": 1.0118, "step": 8003 }, { "epoch": 0.542313164848567, "grad_norm": 7.274816513061523, "learning_rate": 9.007050448353756e-05, "loss": 1.0167, "step": 8004 }, { "epoch": 0.542380920116539, "grad_norm": 6.708817005157471, "learning_rate": 9.006913546443974e-05, "loss": 0.8317, "step": 8005 }, { "epoch": 0.5424486753845111, "grad_norm": 6.974154949188232, "learning_rate": 9.006776644534192e-05, "loss": 0.6758, "step": 8006 }, { "epoch": 0.5425164306524832, "grad_norm": 6.915993690490723, "learning_rate": 9.00663974262441e-05, "loss": 0.8113, "step": 8007 }, { "epoch": 0.5425841859204553, "grad_norm": 7.1193623542785645, "learning_rate": 9.006502840714628e-05, "loss": 0.9428, "step": 8008 }, { "epoch": 0.5426519411884274, "grad_norm": 6.0862298011779785, "learning_rate": 9.006365938804847e-05, "loss": 0.8424, "step": 8009 }, { "epoch": 0.5427196964563995, "grad_norm": 7.726379871368408, "learning_rate": 9.006229036895065e-05, "loss": 1.0722, "step": 8010 }, { "epoch": 0.5427874517243716, "grad_norm": 7.901097774505615, "learning_rate": 9.006092134985283e-05, "loss": 0.9023, "step": 8011 }, { "epoch": 0.5428552069923437, "grad_norm": 6.264953136444092, "learning_rate": 9.005955233075501e-05, "loss": 0.967, "step": 8012 }, { "epoch": 0.5429229622603158, "grad_norm": 7.827919960021973, "learning_rate": 9.00581833116572e-05, "loss": 0.8026, "step": 8013 }, { "epoch": 0.5429907175282879, "grad_norm": 5.923975944519043, "learning_rate": 9.005681429255939e-05, "loss": 0.8619, "step": 8014 }, { "epoch": 0.54305847279626, "grad_norm": 8.078742027282715, "learning_rate": 9.005544527346157e-05, "loss": 1.0838, "step": 8015 }, { "epoch": 0.5431262280642319, "grad_norm": 8.241996765136719, "learning_rate": 9.005407625436375e-05, "loss": 0.8315, "step": 8016 }, { "epoch": 0.543193983332204, "grad_norm": 8.346336364746094, "learning_rate": 9.005270723526593e-05, "loss": 0.809, "step": 8017 }, { "epoch": 0.5432617386001761, "grad_norm": 8.163125991821289, "learning_rate": 9.005133821616812e-05, "loss": 0.9445, "step": 8018 }, { "epoch": 0.5433294938681482, "grad_norm": 6.224919319152832, "learning_rate": 9.00499691970703e-05, "loss": 0.8583, "step": 8019 }, { "epoch": 0.5433972491361203, "grad_norm": 6.901029586791992, "learning_rate": 9.004860017797248e-05, "loss": 0.8486, "step": 8020 }, { "epoch": 0.5434650044040924, "grad_norm": 5.692861080169678, "learning_rate": 9.004723115887466e-05, "loss": 0.7233, "step": 8021 }, { "epoch": 0.5435327596720645, "grad_norm": 7.3206939697265625, "learning_rate": 9.004586213977684e-05, "loss": 0.9268, "step": 8022 }, { "epoch": 0.5436005149400366, "grad_norm": 5.781581401824951, "learning_rate": 9.004449312067904e-05, "loss": 0.8536, "step": 8023 }, { "epoch": 0.5436682702080087, "grad_norm": 6.006667137145996, "learning_rate": 9.004312410158122e-05, "loss": 0.6872, "step": 8024 }, { "epoch": 0.5437360254759808, "grad_norm": 6.082067012786865, "learning_rate": 9.00417550824834e-05, "loss": 1.0156, "step": 8025 }, { "epoch": 0.5438037807439529, "grad_norm": 9.1718111038208, "learning_rate": 9.004038606338559e-05, "loss": 1.1392, "step": 8026 }, { "epoch": 0.543871536011925, "grad_norm": 8.803008079528809, "learning_rate": 9.003901704428777e-05, "loss": 1.1443, "step": 8027 }, { "epoch": 0.543939291279897, "grad_norm": 6.125607013702393, "learning_rate": 9.003764802518995e-05, "loss": 0.8334, "step": 8028 }, { "epoch": 0.5440070465478691, "grad_norm": 5.974092960357666, "learning_rate": 9.003627900609215e-05, "loss": 0.7561, "step": 8029 }, { "epoch": 0.5440748018158412, "grad_norm": 6.347378730773926, "learning_rate": 9.003490998699433e-05, "loss": 0.7946, "step": 8030 }, { "epoch": 0.5441425570838133, "grad_norm": 6.224796295166016, "learning_rate": 9.00335409678965e-05, "loss": 0.7501, "step": 8031 }, { "epoch": 0.5442103123517853, "grad_norm": 9.026642799377441, "learning_rate": 9.00321719487987e-05, "loss": 0.8056, "step": 8032 }, { "epoch": 0.5442780676197574, "grad_norm": 6.823796272277832, "learning_rate": 9.003080292970088e-05, "loss": 0.7378, "step": 8033 }, { "epoch": 0.5443458228877295, "grad_norm": 6.37065315246582, "learning_rate": 9.002943391060306e-05, "loss": 1.0857, "step": 8034 }, { "epoch": 0.5444135781557016, "grad_norm": 6.7236151695251465, "learning_rate": 9.002806489150524e-05, "loss": 0.7145, "step": 8035 }, { "epoch": 0.5444813334236737, "grad_norm": 8.093012809753418, "learning_rate": 9.002669587240743e-05, "loss": 1.1783, "step": 8036 }, { "epoch": 0.5445490886916458, "grad_norm": 6.60814094543457, "learning_rate": 9.002532685330962e-05, "loss": 0.7929, "step": 8037 }, { "epoch": 0.5446168439596178, "grad_norm": 7.279522895812988, "learning_rate": 9.00239578342118e-05, "loss": 0.6758, "step": 8038 }, { "epoch": 0.5446845992275899, "grad_norm": 5.578268051147461, "learning_rate": 9.002258881511398e-05, "loss": 0.7601, "step": 8039 }, { "epoch": 0.544752354495562, "grad_norm": 4.871660232543945, "learning_rate": 9.002121979601616e-05, "loss": 0.635, "step": 8040 }, { "epoch": 0.5448201097635341, "grad_norm": 6.011699199676514, "learning_rate": 9.001985077691835e-05, "loss": 0.8716, "step": 8041 }, { "epoch": 0.5448878650315062, "grad_norm": 5.491373062133789, "learning_rate": 9.001848175782053e-05, "loss": 0.9069, "step": 8042 }, { "epoch": 0.5449556202994783, "grad_norm": 7.352384567260742, "learning_rate": 9.001711273872271e-05, "loss": 0.9996, "step": 8043 }, { "epoch": 0.5450233755674504, "grad_norm": 5.829630374908447, "learning_rate": 9.001574371962489e-05, "loss": 0.9563, "step": 8044 }, { "epoch": 0.5450911308354225, "grad_norm": 6.887197017669678, "learning_rate": 9.001437470052708e-05, "loss": 1.0043, "step": 8045 }, { "epoch": 0.5451588861033946, "grad_norm": 7.507176399230957, "learning_rate": 9.001300568142927e-05, "loss": 1.1256, "step": 8046 }, { "epoch": 0.5452266413713667, "grad_norm": 5.741685390472412, "learning_rate": 9.001163666233145e-05, "loss": 0.7904, "step": 8047 }, { "epoch": 0.5452943966393388, "grad_norm": 6.044425964355469, "learning_rate": 9.001026764323363e-05, "loss": 0.8707, "step": 8048 }, { "epoch": 0.5453621519073107, "grad_norm": 7.574776649475098, "learning_rate": 9.00088986241358e-05, "loss": 1.1286, "step": 8049 }, { "epoch": 0.5454299071752828, "grad_norm": 6.032942771911621, "learning_rate": 9.0007529605038e-05, "loss": 0.7711, "step": 8050 }, { "epoch": 0.5454976624432549, "grad_norm": 9.648497581481934, "learning_rate": 9.000616058594018e-05, "loss": 0.8222, "step": 8051 }, { "epoch": 0.545565417711227, "grad_norm": 6.6689839363098145, "learning_rate": 9.000479156684236e-05, "loss": 0.7225, "step": 8052 }, { "epoch": 0.5456331729791991, "grad_norm": 4.215627193450928, "learning_rate": 9.000342254774454e-05, "loss": 0.6903, "step": 8053 }, { "epoch": 0.5457009282471712, "grad_norm": 6.513290882110596, "learning_rate": 9.000205352864672e-05, "loss": 0.9443, "step": 8054 }, { "epoch": 0.5457686835151433, "grad_norm": 5.759246826171875, "learning_rate": 9.000068450954892e-05, "loss": 0.7979, "step": 8055 }, { "epoch": 0.5458364387831154, "grad_norm": 5.937248229980469, "learning_rate": 8.99993154904511e-05, "loss": 0.8381, "step": 8056 }, { "epoch": 0.5459041940510875, "grad_norm": 8.01611328125, "learning_rate": 8.999794647135328e-05, "loss": 0.9463, "step": 8057 }, { "epoch": 0.5459719493190596, "grad_norm": 7.086623191833496, "learning_rate": 8.999657745225546e-05, "loss": 0.711, "step": 8058 }, { "epoch": 0.5460397045870317, "grad_norm": 8.19179916381836, "learning_rate": 8.999520843315765e-05, "loss": 0.7565, "step": 8059 }, { "epoch": 0.5461074598550038, "grad_norm": 7.0273518562316895, "learning_rate": 8.999383941405983e-05, "loss": 0.9285, "step": 8060 }, { "epoch": 0.5461752151229758, "grad_norm": 7.5481743812561035, "learning_rate": 8.999247039496201e-05, "loss": 0.9347, "step": 8061 }, { "epoch": 0.5462429703909479, "grad_norm": 7.882439613342285, "learning_rate": 8.999110137586419e-05, "loss": 0.8584, "step": 8062 }, { "epoch": 0.54631072565892, "grad_norm": 7.260676860809326, "learning_rate": 8.998973235676637e-05, "loss": 0.7111, "step": 8063 }, { "epoch": 0.5463784809268921, "grad_norm": 6.826397895812988, "learning_rate": 8.998836333766857e-05, "loss": 0.6173, "step": 8064 }, { "epoch": 0.5464462361948641, "grad_norm": 7.557692527770996, "learning_rate": 8.998699431857075e-05, "loss": 0.9096, "step": 8065 }, { "epoch": 0.5465139914628362, "grad_norm": 5.558192253112793, "learning_rate": 8.998562529947293e-05, "loss": 0.8556, "step": 8066 }, { "epoch": 0.5465817467308083, "grad_norm": 7.186037540435791, "learning_rate": 8.99842562803751e-05, "loss": 1.0854, "step": 8067 }, { "epoch": 0.5466495019987804, "grad_norm": 6.516674518585205, "learning_rate": 8.99828872612773e-05, "loss": 0.7311, "step": 8068 }, { "epoch": 0.5467172572667525, "grad_norm": 7.974714279174805, "learning_rate": 8.998151824217948e-05, "loss": 0.9699, "step": 8069 }, { "epoch": 0.5467850125347246, "grad_norm": 6.709014892578125, "learning_rate": 8.998014922308166e-05, "loss": 0.8903, "step": 8070 }, { "epoch": 0.5468527678026966, "grad_norm": 5.7826361656188965, "learning_rate": 8.997878020398384e-05, "loss": 0.7266, "step": 8071 }, { "epoch": 0.5469205230706687, "grad_norm": 6.743555068969727, "learning_rate": 8.997741118488604e-05, "loss": 0.9665, "step": 8072 }, { "epoch": 0.5469882783386408, "grad_norm": 6.872563362121582, "learning_rate": 8.997604216578822e-05, "loss": 0.8867, "step": 8073 }, { "epoch": 0.5470560336066129, "grad_norm": 7.1092658042907715, "learning_rate": 8.99746731466904e-05, "loss": 1.1457, "step": 8074 }, { "epoch": 0.547123788874585, "grad_norm": 5.996377944946289, "learning_rate": 8.997330412759259e-05, "loss": 0.784, "step": 8075 }, { "epoch": 0.5471915441425571, "grad_norm": 6.767289638519287, "learning_rate": 8.997193510849477e-05, "loss": 0.9997, "step": 8076 }, { "epoch": 0.5472592994105292, "grad_norm": 8.10904598236084, "learning_rate": 8.997056608939695e-05, "loss": 0.9479, "step": 8077 }, { "epoch": 0.5473270546785013, "grad_norm": 7.407421588897705, "learning_rate": 8.996919707029914e-05, "loss": 0.6885, "step": 8078 }, { "epoch": 0.5473948099464734, "grad_norm": 9.024150848388672, "learning_rate": 8.996782805120132e-05, "loss": 0.8968, "step": 8079 }, { "epoch": 0.5474625652144455, "grad_norm": 5.622673511505127, "learning_rate": 8.99664590321035e-05, "loss": 0.6612, "step": 8080 }, { "epoch": 0.5475303204824175, "grad_norm": 6.339783191680908, "learning_rate": 8.996509001300569e-05, "loss": 1.0033, "step": 8081 }, { "epoch": 0.5475980757503895, "grad_norm": 6.384222984313965, "learning_rate": 8.996372099390788e-05, "loss": 0.9403, "step": 8082 }, { "epoch": 0.5476658310183616, "grad_norm": 4.613900661468506, "learning_rate": 8.996235197481006e-05, "loss": 0.6112, "step": 8083 }, { "epoch": 0.5477335862863337, "grad_norm": 8.393567085266113, "learning_rate": 8.996098295571224e-05, "loss": 1.0299, "step": 8084 }, { "epoch": 0.5478013415543058, "grad_norm": 6.199436664581299, "learning_rate": 8.995961393661442e-05, "loss": 0.7737, "step": 8085 }, { "epoch": 0.5478690968222779, "grad_norm": 8.227348327636719, "learning_rate": 8.99582449175166e-05, "loss": 0.9148, "step": 8086 }, { "epoch": 0.54793685209025, "grad_norm": 4.897773265838623, "learning_rate": 8.99568758984188e-05, "loss": 0.7832, "step": 8087 }, { "epoch": 0.5480046073582221, "grad_norm": 5.995389938354492, "learning_rate": 8.995550687932098e-05, "loss": 0.7117, "step": 8088 }, { "epoch": 0.5480723626261942, "grad_norm": 6.70106840133667, "learning_rate": 8.995413786022316e-05, "loss": 0.6605, "step": 8089 }, { "epoch": 0.5481401178941663, "grad_norm": 6.50885534286499, "learning_rate": 8.995276884112534e-05, "loss": 0.8888, "step": 8090 }, { "epoch": 0.5482078731621384, "grad_norm": 6.068159580230713, "learning_rate": 8.995139982202753e-05, "loss": 0.6167, "step": 8091 }, { "epoch": 0.5482756284301105, "grad_norm": 7.8484578132629395, "learning_rate": 8.995003080292971e-05, "loss": 1.0058, "step": 8092 }, { "epoch": 0.5483433836980826, "grad_norm": 8.712899208068848, "learning_rate": 8.994866178383189e-05, "loss": 1.0509, "step": 8093 }, { "epoch": 0.5484111389660546, "grad_norm": 5.627629280090332, "learning_rate": 8.994729276473407e-05, "loss": 0.6542, "step": 8094 }, { "epoch": 0.5484788942340267, "grad_norm": 9.192571640014648, "learning_rate": 8.994592374563625e-05, "loss": 1.072, "step": 8095 }, { "epoch": 0.5485466495019988, "grad_norm": 7.169675350189209, "learning_rate": 8.994455472653844e-05, "loss": 0.8983, "step": 8096 }, { "epoch": 0.5486144047699709, "grad_norm": 5.486343860626221, "learning_rate": 8.994318570744063e-05, "loss": 0.8792, "step": 8097 }, { "epoch": 0.5486821600379429, "grad_norm": 7.902504920959473, "learning_rate": 8.99418166883428e-05, "loss": 0.7665, "step": 8098 }, { "epoch": 0.548749915305915, "grad_norm": 6.8819122314453125, "learning_rate": 8.994044766924499e-05, "loss": 0.7169, "step": 8099 }, { "epoch": 0.5488176705738871, "grad_norm": 8.217923164367676, "learning_rate": 8.993907865014718e-05, "loss": 0.7619, "step": 8100 }, { "epoch": 0.5488854258418592, "grad_norm": 6.089666366577148, "learning_rate": 8.993770963104936e-05, "loss": 0.6823, "step": 8101 }, { "epoch": 0.5489531811098313, "grad_norm": 5.951727390289307, "learning_rate": 8.993634061195154e-05, "loss": 0.9311, "step": 8102 }, { "epoch": 0.5490209363778034, "grad_norm": 5.070009231567383, "learning_rate": 8.993497159285372e-05, "loss": 0.7265, "step": 8103 }, { "epoch": 0.5490886916457755, "grad_norm": 6.631993293762207, "learning_rate": 8.99336025737559e-05, "loss": 0.8403, "step": 8104 }, { "epoch": 0.5491564469137475, "grad_norm": 6.938666820526123, "learning_rate": 8.99322335546581e-05, "loss": 1.0324, "step": 8105 }, { "epoch": 0.5492242021817196, "grad_norm": 6.868338584899902, "learning_rate": 8.993086453556028e-05, "loss": 0.8591, "step": 8106 }, { "epoch": 0.5492919574496917, "grad_norm": 6.081043720245361, "learning_rate": 8.992949551646246e-05, "loss": 0.9657, "step": 8107 }, { "epoch": 0.5493597127176638, "grad_norm": 6.885746955871582, "learning_rate": 8.992812649736464e-05, "loss": 0.7396, "step": 8108 }, { "epoch": 0.5494274679856359, "grad_norm": 7.478306770324707, "learning_rate": 8.992675747826682e-05, "loss": 0.8213, "step": 8109 }, { "epoch": 0.549495223253608, "grad_norm": 6.534050941467285, "learning_rate": 8.992538845916901e-05, "loss": 0.928, "step": 8110 }, { "epoch": 0.5495629785215801, "grad_norm": 6.312312602996826, "learning_rate": 8.992401944007119e-05, "loss": 0.8283, "step": 8111 }, { "epoch": 0.5496307337895522, "grad_norm": 6.73023796081543, "learning_rate": 8.992265042097337e-05, "loss": 0.9155, "step": 8112 }, { "epoch": 0.5496984890575243, "grad_norm": 6.13808012008667, "learning_rate": 8.992128140187555e-05, "loss": 0.9905, "step": 8113 }, { "epoch": 0.5497662443254963, "grad_norm": 5.021573066711426, "learning_rate": 8.991991238277775e-05, "loss": 0.6841, "step": 8114 }, { "epoch": 0.5498339995934683, "grad_norm": 6.291537284851074, "learning_rate": 8.991854336367993e-05, "loss": 1.1789, "step": 8115 }, { "epoch": 0.5499017548614404, "grad_norm": 5.784443378448486, "learning_rate": 8.99171743445821e-05, "loss": 0.7338, "step": 8116 }, { "epoch": 0.5499695101294125, "grad_norm": 5.844326496124268, "learning_rate": 8.991580532548429e-05, "loss": 0.7649, "step": 8117 }, { "epoch": 0.5500372653973846, "grad_norm": 6.961515426635742, "learning_rate": 8.991443630638648e-05, "loss": 0.8765, "step": 8118 }, { "epoch": 0.5501050206653567, "grad_norm": 6.371798515319824, "learning_rate": 8.991306728728866e-05, "loss": 0.7896, "step": 8119 }, { "epoch": 0.5501727759333288, "grad_norm": 8.220330238342285, "learning_rate": 8.991169826819084e-05, "loss": 0.9147, "step": 8120 }, { "epoch": 0.5502405312013009, "grad_norm": 15.172338485717773, "learning_rate": 8.991032924909303e-05, "loss": 0.798, "step": 8121 }, { "epoch": 0.550308286469273, "grad_norm": 7.026042938232422, "learning_rate": 8.990896022999522e-05, "loss": 0.9536, "step": 8122 }, { "epoch": 0.5503760417372451, "grad_norm": 7.6364336013793945, "learning_rate": 8.99075912108974e-05, "loss": 0.8633, "step": 8123 }, { "epoch": 0.5504437970052172, "grad_norm": 7.09596586227417, "learning_rate": 8.990622219179959e-05, "loss": 0.7266, "step": 8124 }, { "epoch": 0.5505115522731893, "grad_norm": 6.502638816833496, "learning_rate": 8.990485317270177e-05, "loss": 1.0156, "step": 8125 }, { "epoch": 0.5505793075411614, "grad_norm": 6.83535623550415, "learning_rate": 8.990348415360395e-05, "loss": 0.7604, "step": 8126 }, { "epoch": 0.5506470628091334, "grad_norm": 8.162100791931152, "learning_rate": 8.990211513450613e-05, "loss": 0.9476, "step": 8127 }, { "epoch": 0.5507148180771055, "grad_norm": 7.7978410720825195, "learning_rate": 8.990074611540832e-05, "loss": 0.9341, "step": 8128 }, { "epoch": 0.5507825733450776, "grad_norm": 7.659994602203369, "learning_rate": 8.98993770963105e-05, "loss": 0.9403, "step": 8129 }, { "epoch": 0.5508503286130496, "grad_norm": 7.724880218505859, "learning_rate": 8.989800807721268e-05, "loss": 0.8563, "step": 8130 }, { "epoch": 0.5509180838810217, "grad_norm": 8.337576866149902, "learning_rate": 8.989663905811487e-05, "loss": 0.874, "step": 8131 }, { "epoch": 0.5509858391489938, "grad_norm": 6.336655616760254, "learning_rate": 8.989527003901705e-05, "loss": 0.6825, "step": 8132 }, { "epoch": 0.5510535944169659, "grad_norm": 8.086697578430176, "learning_rate": 8.989390101991924e-05, "loss": 1.1419, "step": 8133 }, { "epoch": 0.551121349684938, "grad_norm": 6.572722434997559, "learning_rate": 8.989253200082142e-05, "loss": 0.9213, "step": 8134 }, { "epoch": 0.5511891049529101, "grad_norm": 6.7894487380981445, "learning_rate": 8.98911629817236e-05, "loss": 1.0189, "step": 8135 }, { "epoch": 0.5512568602208822, "grad_norm": 8.123970985412598, "learning_rate": 8.988979396262578e-05, "loss": 1.1131, "step": 8136 }, { "epoch": 0.5513246154888543, "grad_norm": 7.39744758605957, "learning_rate": 8.988842494352797e-05, "loss": 0.7286, "step": 8137 }, { "epoch": 0.5513923707568263, "grad_norm": 6.353941440582275, "learning_rate": 8.988705592443015e-05, "loss": 0.7356, "step": 8138 }, { "epoch": 0.5514601260247984, "grad_norm": 10.126983642578125, "learning_rate": 8.988568690533234e-05, "loss": 0.8082, "step": 8139 }, { "epoch": 0.5515278812927705, "grad_norm": 7.518803596496582, "learning_rate": 8.988431788623452e-05, "loss": 0.801, "step": 8140 }, { "epoch": 0.5515956365607426, "grad_norm": 6.769824981689453, "learning_rate": 8.98829488671367e-05, "loss": 0.7756, "step": 8141 }, { "epoch": 0.5516633918287147, "grad_norm": 6.841986656188965, "learning_rate": 8.988157984803889e-05, "loss": 0.6801, "step": 8142 }, { "epoch": 0.5517311470966868, "grad_norm": 6.92106294631958, "learning_rate": 8.988021082894107e-05, "loss": 0.9085, "step": 8143 }, { "epoch": 0.5517989023646589, "grad_norm": 5.728677272796631, "learning_rate": 8.987884180984325e-05, "loss": 0.8921, "step": 8144 }, { "epoch": 0.551866657632631, "grad_norm": 6.752305507659912, "learning_rate": 8.987747279074543e-05, "loss": 0.7462, "step": 8145 }, { "epoch": 0.551934412900603, "grad_norm": 7.200686454772949, "learning_rate": 8.987610377164762e-05, "loss": 1.3344, "step": 8146 }, { "epoch": 0.5520021681685751, "grad_norm": 5.663136959075928, "learning_rate": 8.98747347525498e-05, "loss": 0.8558, "step": 8147 }, { "epoch": 0.5520699234365471, "grad_norm": 5.89145565032959, "learning_rate": 8.987336573345199e-05, "loss": 0.8393, "step": 8148 }, { "epoch": 0.5521376787045192, "grad_norm": 6.481657028198242, "learning_rate": 8.987199671435417e-05, "loss": 1.0019, "step": 8149 }, { "epoch": 0.5522054339724913, "grad_norm": 6.341575622558594, "learning_rate": 8.987062769525635e-05, "loss": 0.84, "step": 8150 }, { "epoch": 0.5522731892404634, "grad_norm": 8.580132484436035, "learning_rate": 8.986925867615854e-05, "loss": 0.9604, "step": 8151 }, { "epoch": 0.5523409445084355, "grad_norm": 6.118538856506348, "learning_rate": 8.986788965706072e-05, "loss": 0.8404, "step": 8152 }, { "epoch": 0.5524086997764076, "grad_norm": 6.263945579528809, "learning_rate": 8.98665206379629e-05, "loss": 0.93, "step": 8153 }, { "epoch": 0.5524764550443797, "grad_norm": 6.55873441696167, "learning_rate": 8.986515161886508e-05, "loss": 0.8502, "step": 8154 }, { "epoch": 0.5525442103123518, "grad_norm": 6.372352600097656, "learning_rate": 8.986378259976726e-05, "loss": 1.0641, "step": 8155 }, { "epoch": 0.5526119655803239, "grad_norm": 7.0687665939331055, "learning_rate": 8.986241358066946e-05, "loss": 0.8543, "step": 8156 }, { "epoch": 0.552679720848296, "grad_norm": 8.265472412109375, "learning_rate": 8.986104456157164e-05, "loss": 0.9062, "step": 8157 }, { "epoch": 0.5527474761162681, "grad_norm": 6.169217586517334, "learning_rate": 8.985967554247382e-05, "loss": 0.7173, "step": 8158 }, { "epoch": 0.5528152313842402, "grad_norm": 5.441207408905029, "learning_rate": 8.9858306523376e-05, "loss": 0.6463, "step": 8159 }, { "epoch": 0.5528829866522122, "grad_norm": 7.118704795837402, "learning_rate": 8.985693750427819e-05, "loss": 0.8944, "step": 8160 }, { "epoch": 0.5529507419201843, "grad_norm": 6.042327404022217, "learning_rate": 8.985556848518037e-05, "loss": 1.081, "step": 8161 }, { "epoch": 0.5530184971881564, "grad_norm": 7.080348014831543, "learning_rate": 8.985419946608255e-05, "loss": 0.6969, "step": 8162 }, { "epoch": 0.5530862524561284, "grad_norm": 6.585824489593506, "learning_rate": 8.985283044698473e-05, "loss": 0.817, "step": 8163 }, { "epoch": 0.5531540077241005, "grad_norm": 7.307035446166992, "learning_rate": 8.985146142788692e-05, "loss": 0.9677, "step": 8164 }, { "epoch": 0.5532217629920726, "grad_norm": 7.577461242675781, "learning_rate": 8.98500924087891e-05, "loss": 0.9558, "step": 8165 }, { "epoch": 0.5532895182600447, "grad_norm": 8.000713348388672, "learning_rate": 8.984872338969129e-05, "loss": 0.8819, "step": 8166 }, { "epoch": 0.5533572735280168, "grad_norm": 8.770408630371094, "learning_rate": 8.984735437059348e-05, "loss": 1.0097, "step": 8167 }, { "epoch": 0.5534250287959889, "grad_norm": 6.20897102355957, "learning_rate": 8.984598535149566e-05, "loss": 0.936, "step": 8168 }, { "epoch": 0.553492784063961, "grad_norm": 6.364378929138184, "learning_rate": 8.984461633239784e-05, "loss": 0.8158, "step": 8169 }, { "epoch": 0.553560539331933, "grad_norm": 6.148647308349609, "learning_rate": 8.984324731330003e-05, "loss": 0.8356, "step": 8170 }, { "epoch": 0.5536282945999051, "grad_norm": 6.537686347961426, "learning_rate": 8.984187829420221e-05, "loss": 0.9588, "step": 8171 }, { "epoch": 0.5536960498678772, "grad_norm": 5.661696910858154, "learning_rate": 8.98405092751044e-05, "loss": 0.8554, "step": 8172 }, { "epoch": 0.5537638051358493, "grad_norm": 5.869355201721191, "learning_rate": 8.983914025600658e-05, "loss": 0.7676, "step": 8173 }, { "epoch": 0.5538315604038214, "grad_norm": 5.434372425079346, "learning_rate": 8.983777123690877e-05, "loss": 0.7861, "step": 8174 }, { "epoch": 0.5538993156717935, "grad_norm": 7.260756015777588, "learning_rate": 8.983640221781095e-05, "loss": 0.793, "step": 8175 }, { "epoch": 0.5539670709397656, "grad_norm": 7.556422710418701, "learning_rate": 8.983503319871313e-05, "loss": 0.9004, "step": 8176 }, { "epoch": 0.5540348262077377, "grad_norm": 5.610649585723877, "learning_rate": 8.983366417961531e-05, "loss": 0.6254, "step": 8177 }, { "epoch": 0.5541025814757098, "grad_norm": 8.736811637878418, "learning_rate": 8.98322951605175e-05, "loss": 1.028, "step": 8178 }, { "epoch": 0.5541703367436818, "grad_norm": 6.036482334136963, "learning_rate": 8.983092614141968e-05, "loss": 0.805, "step": 8179 }, { "epoch": 0.5542380920116539, "grad_norm": 7.7951250076293945, "learning_rate": 8.982955712232186e-05, "loss": 0.8463, "step": 8180 }, { "epoch": 0.554305847279626, "grad_norm": 5.3911871910095215, "learning_rate": 8.982818810322404e-05, "loss": 0.6961, "step": 8181 }, { "epoch": 0.554373602547598, "grad_norm": 7.813619613647461, "learning_rate": 8.982681908412623e-05, "loss": 0.7671, "step": 8182 }, { "epoch": 0.5544413578155701, "grad_norm": 6.328850269317627, "learning_rate": 8.982545006502842e-05, "loss": 0.7286, "step": 8183 }, { "epoch": 0.5545091130835422, "grad_norm": 7.214839935302734, "learning_rate": 8.98240810459306e-05, "loss": 0.8523, "step": 8184 }, { "epoch": 0.5545768683515143, "grad_norm": 5.996763229370117, "learning_rate": 8.982271202683278e-05, "loss": 0.8575, "step": 8185 }, { "epoch": 0.5546446236194864, "grad_norm": 8.032330513000488, "learning_rate": 8.982134300773496e-05, "loss": 0.9224, "step": 8186 }, { "epoch": 0.5547123788874585, "grad_norm": 7.919235706329346, "learning_rate": 8.981997398863714e-05, "loss": 0.9237, "step": 8187 }, { "epoch": 0.5547801341554306, "grad_norm": 7.324405670166016, "learning_rate": 8.981860496953933e-05, "loss": 0.904, "step": 8188 }, { "epoch": 0.5548478894234027, "grad_norm": 7.116741180419922, "learning_rate": 8.981723595044151e-05, "loss": 1.078, "step": 8189 }, { "epoch": 0.5549156446913748, "grad_norm": 6.27305269241333, "learning_rate": 8.98158669313437e-05, "loss": 0.7974, "step": 8190 }, { "epoch": 0.5549833999593469, "grad_norm": 6.298983097076416, "learning_rate": 8.981449791224588e-05, "loss": 0.8742, "step": 8191 }, { "epoch": 0.555051155227319, "grad_norm": 5.965509414672852, "learning_rate": 8.981312889314807e-05, "loss": 0.7601, "step": 8192 }, { "epoch": 0.555118910495291, "grad_norm": 6.754343509674072, "learning_rate": 8.981175987405025e-05, "loss": 1.0106, "step": 8193 }, { "epoch": 0.5551866657632631, "grad_norm": 4.9216790199279785, "learning_rate": 8.981039085495243e-05, "loss": 0.7036, "step": 8194 }, { "epoch": 0.5552544210312351, "grad_norm": 5.694727897644043, "learning_rate": 8.980902183585461e-05, "loss": 0.6663, "step": 8195 }, { "epoch": 0.5553221762992072, "grad_norm": 7.885236740112305, "learning_rate": 8.980765281675679e-05, "loss": 0.9234, "step": 8196 }, { "epoch": 0.5553899315671793, "grad_norm": 9.071615219116211, "learning_rate": 8.980628379765898e-05, "loss": 0.8357, "step": 8197 }, { "epoch": 0.5554576868351514, "grad_norm": 5.474040508270264, "learning_rate": 8.980491477856116e-05, "loss": 0.6571, "step": 8198 }, { "epoch": 0.5555254421031235, "grad_norm": 6.519590854644775, "learning_rate": 8.980354575946335e-05, "loss": 0.7547, "step": 8199 }, { "epoch": 0.5555931973710956, "grad_norm": 6.605935096740723, "learning_rate": 8.980217674036553e-05, "loss": 0.8487, "step": 8200 }, { "epoch": 0.5556609526390677, "grad_norm": 7.937171459197998, "learning_rate": 8.980080772126772e-05, "loss": 1.0935, "step": 8201 }, { "epoch": 0.5557287079070398, "grad_norm": 8.487578392028809, "learning_rate": 8.97994387021699e-05, "loss": 1.2286, "step": 8202 }, { "epoch": 0.5557964631750119, "grad_norm": 5.452174186706543, "learning_rate": 8.979806968307208e-05, "loss": 0.8182, "step": 8203 }, { "epoch": 0.555864218442984, "grad_norm": 7.257054805755615, "learning_rate": 8.979670066397426e-05, "loss": 1.0002, "step": 8204 }, { "epoch": 0.555931973710956, "grad_norm": 6.426529407501221, "learning_rate": 8.979533164487644e-05, "loss": 0.8226, "step": 8205 }, { "epoch": 0.5559997289789281, "grad_norm": 7.804161548614502, "learning_rate": 8.979396262577863e-05, "loss": 1.0396, "step": 8206 }, { "epoch": 0.5560674842469002, "grad_norm": 5.7507004737854, "learning_rate": 8.979259360668082e-05, "loss": 0.856, "step": 8207 }, { "epoch": 0.5561352395148723, "grad_norm": 5.974365234375, "learning_rate": 8.9791224587583e-05, "loss": 0.7983, "step": 8208 }, { "epoch": 0.5562029947828444, "grad_norm": 8.598740577697754, "learning_rate": 8.978985556848518e-05, "loss": 0.9326, "step": 8209 }, { "epoch": 0.5562707500508165, "grad_norm": 6.554419994354248, "learning_rate": 8.978848654938736e-05, "loss": 0.8367, "step": 8210 }, { "epoch": 0.5563385053187886, "grad_norm": 5.485220432281494, "learning_rate": 8.978711753028955e-05, "loss": 0.8231, "step": 8211 }, { "epoch": 0.5564062605867606, "grad_norm": 7.286705493927002, "learning_rate": 8.978574851119173e-05, "loss": 1.0673, "step": 8212 }, { "epoch": 0.5564740158547327, "grad_norm": 7.718635559082031, "learning_rate": 8.978437949209391e-05, "loss": 1.1467, "step": 8213 }, { "epoch": 0.5565417711227048, "grad_norm": 7.21420431137085, "learning_rate": 8.97830104729961e-05, "loss": 0.7332, "step": 8214 }, { "epoch": 0.5566095263906768, "grad_norm": 7.05917501449585, "learning_rate": 8.978164145389828e-05, "loss": 0.8301, "step": 8215 }, { "epoch": 0.5566772816586489, "grad_norm": 7.492615699768066, "learning_rate": 8.978027243480047e-05, "loss": 0.7282, "step": 8216 }, { "epoch": 0.556745036926621, "grad_norm": 5.647882461547852, "learning_rate": 8.977890341570266e-05, "loss": 0.9054, "step": 8217 }, { "epoch": 0.5568127921945931, "grad_norm": 7.022364616394043, "learning_rate": 8.977753439660484e-05, "loss": 0.7759, "step": 8218 }, { "epoch": 0.5568805474625652, "grad_norm": 7.175726890563965, "learning_rate": 8.977616537750702e-05, "loss": 0.8165, "step": 8219 }, { "epoch": 0.5569483027305373, "grad_norm": 5.680481433868408, "learning_rate": 8.977479635840921e-05, "loss": 0.8801, "step": 8220 }, { "epoch": 0.5570160579985094, "grad_norm": 6.144750118255615, "learning_rate": 8.97734273393114e-05, "loss": 0.8273, "step": 8221 }, { "epoch": 0.5570838132664815, "grad_norm": 6.248444080352783, "learning_rate": 8.977205832021357e-05, "loss": 1.1512, "step": 8222 }, { "epoch": 0.5571515685344536, "grad_norm": 5.904661655426025, "learning_rate": 8.977068930111575e-05, "loss": 0.8319, "step": 8223 }, { "epoch": 0.5572193238024257, "grad_norm": 5.726762771606445, "learning_rate": 8.976932028201795e-05, "loss": 0.7412, "step": 8224 }, { "epoch": 0.5572870790703978, "grad_norm": 6.143310070037842, "learning_rate": 8.976795126292013e-05, "loss": 1.0178, "step": 8225 }, { "epoch": 0.5573548343383699, "grad_norm": 5.4348907470703125, "learning_rate": 8.976658224382231e-05, "loss": 0.7297, "step": 8226 }, { "epoch": 0.557422589606342, "grad_norm": 6.981692314147949, "learning_rate": 8.976521322472449e-05, "loss": 0.8807, "step": 8227 }, { "epoch": 0.5574903448743139, "grad_norm": 7.862269878387451, "learning_rate": 8.976384420562667e-05, "loss": 1.0944, "step": 8228 }, { "epoch": 0.557558100142286, "grad_norm": 6.2533345222473145, "learning_rate": 8.976247518652886e-05, "loss": 0.8815, "step": 8229 }, { "epoch": 0.5576258554102581, "grad_norm": 5.445539951324463, "learning_rate": 8.976110616743104e-05, "loss": 0.7853, "step": 8230 }, { "epoch": 0.5576936106782302, "grad_norm": 5.220200538635254, "learning_rate": 8.975973714833322e-05, "loss": 0.6072, "step": 8231 }, { "epoch": 0.5577613659462023, "grad_norm": 7.320289611816406, "learning_rate": 8.97583681292354e-05, "loss": 0.7023, "step": 8232 }, { "epoch": 0.5578291212141744, "grad_norm": 6.331526756286621, "learning_rate": 8.97569991101376e-05, "loss": 0.8804, "step": 8233 }, { "epoch": 0.5578968764821465, "grad_norm": 7.493820667266846, "learning_rate": 8.975563009103978e-05, "loss": 0.8167, "step": 8234 }, { "epoch": 0.5579646317501186, "grad_norm": 5.947346210479736, "learning_rate": 8.975426107194196e-05, "loss": 0.6117, "step": 8235 }, { "epoch": 0.5580323870180907, "grad_norm": 6.589901447296143, "learning_rate": 8.975289205284414e-05, "loss": 0.8721, "step": 8236 }, { "epoch": 0.5581001422860628, "grad_norm": 5.9715423583984375, "learning_rate": 8.975152303374632e-05, "loss": 0.8644, "step": 8237 }, { "epoch": 0.5581678975540348, "grad_norm": 8.454349517822266, "learning_rate": 8.975015401464851e-05, "loss": 0.89, "step": 8238 }, { "epoch": 0.5582356528220069, "grad_norm": 6.455380916595459, "learning_rate": 8.97487849955507e-05, "loss": 0.8405, "step": 8239 }, { "epoch": 0.558303408089979, "grad_norm": 6.558000564575195, "learning_rate": 8.974741597645287e-05, "loss": 0.8561, "step": 8240 }, { "epoch": 0.5583711633579511, "grad_norm": 6.80634880065918, "learning_rate": 8.974604695735506e-05, "loss": 0.8508, "step": 8241 }, { "epoch": 0.5584389186259232, "grad_norm": 5.507918357849121, "learning_rate": 8.974467793825724e-05, "loss": 0.9459, "step": 8242 }, { "epoch": 0.5585066738938953, "grad_norm": 6.2580037117004395, "learning_rate": 8.974330891915943e-05, "loss": 0.919, "step": 8243 }, { "epoch": 0.5585744291618673, "grad_norm": 7.050917148590088, "learning_rate": 8.974193990006161e-05, "loss": 0.6281, "step": 8244 }, { "epoch": 0.5586421844298394, "grad_norm": 6.926290512084961, "learning_rate": 8.974057088096379e-05, "loss": 0.9378, "step": 8245 }, { "epoch": 0.5587099396978115, "grad_norm": 7.242706298828125, "learning_rate": 8.973920186186597e-05, "loss": 0.9724, "step": 8246 }, { "epoch": 0.5587776949657836, "grad_norm": 6.239006519317627, "learning_rate": 8.973783284276816e-05, "loss": 1.1008, "step": 8247 }, { "epoch": 0.5588454502337556, "grad_norm": 5.4303998947143555, "learning_rate": 8.973646382367034e-05, "loss": 0.8486, "step": 8248 }, { "epoch": 0.5589132055017277, "grad_norm": 7.883191108703613, "learning_rate": 8.973509480457252e-05, "loss": 1.1543, "step": 8249 }, { "epoch": 0.5589809607696998, "grad_norm": 6.10230016708374, "learning_rate": 8.97337257854747e-05, "loss": 0.8662, "step": 8250 }, { "epoch": 0.5590487160376719, "grad_norm": 7.152645587921143, "learning_rate": 8.973235676637689e-05, "loss": 0.7332, "step": 8251 }, { "epoch": 0.559116471305644, "grad_norm": 5.797628879547119, "learning_rate": 8.973098774727908e-05, "loss": 0.7454, "step": 8252 }, { "epoch": 0.5591842265736161, "grad_norm": 6.342854976654053, "learning_rate": 8.972961872818126e-05, "loss": 0.6993, "step": 8253 }, { "epoch": 0.5592519818415882, "grad_norm": 8.492813110351562, "learning_rate": 8.972824970908344e-05, "loss": 0.8986, "step": 8254 }, { "epoch": 0.5593197371095603, "grad_norm": 5.740788459777832, "learning_rate": 8.972688068998562e-05, "loss": 0.8702, "step": 8255 }, { "epoch": 0.5593874923775324, "grad_norm": 5.833809852600098, "learning_rate": 8.972551167088781e-05, "loss": 0.9314, "step": 8256 }, { "epoch": 0.5594552476455045, "grad_norm": 7.672467231750488, "learning_rate": 8.972414265179e-05, "loss": 0.7654, "step": 8257 }, { "epoch": 0.5595230029134766, "grad_norm": 6.214809417724609, "learning_rate": 8.972277363269218e-05, "loss": 0.8391, "step": 8258 }, { "epoch": 0.5595907581814487, "grad_norm": 6.621498107910156, "learning_rate": 8.972140461359436e-05, "loss": 0.8521, "step": 8259 }, { "epoch": 0.5596585134494207, "grad_norm": 6.759031295776367, "learning_rate": 8.972003559449655e-05, "loss": 1.0709, "step": 8260 }, { "epoch": 0.5597262687173927, "grad_norm": 6.506051540374756, "learning_rate": 8.971866657539873e-05, "loss": 1.1408, "step": 8261 }, { "epoch": 0.5597940239853648, "grad_norm": 5.346510410308838, "learning_rate": 8.971729755630091e-05, "loss": 0.7054, "step": 8262 }, { "epoch": 0.5598617792533369, "grad_norm": 7.418613910675049, "learning_rate": 8.97159285372031e-05, "loss": 0.976, "step": 8263 }, { "epoch": 0.559929534521309, "grad_norm": 5.501760482788086, "learning_rate": 8.971455951810528e-05, "loss": 0.7454, "step": 8264 }, { "epoch": 0.5599972897892811, "grad_norm": 6.592947959899902, "learning_rate": 8.971319049900746e-05, "loss": 0.6618, "step": 8265 }, { "epoch": 0.5600650450572532, "grad_norm": 7.502736568450928, "learning_rate": 8.971182147990966e-05, "loss": 0.8033, "step": 8266 }, { "epoch": 0.5601328003252253, "grad_norm": 7.374648094177246, "learning_rate": 8.971045246081184e-05, "loss": 0.8319, "step": 8267 }, { "epoch": 0.5602005555931974, "grad_norm": 7.346577167510986, "learning_rate": 8.970908344171402e-05, "loss": 0.9729, "step": 8268 }, { "epoch": 0.5602683108611695, "grad_norm": 5.724850654602051, "learning_rate": 8.97077144226162e-05, "loss": 0.8124, "step": 8269 }, { "epoch": 0.5603360661291416, "grad_norm": 7.397567272186279, "learning_rate": 8.97063454035184e-05, "loss": 1.2057, "step": 8270 }, { "epoch": 0.5604038213971136, "grad_norm": 8.235245704650879, "learning_rate": 8.970497638442057e-05, "loss": 0.7024, "step": 8271 }, { "epoch": 0.5604715766650857, "grad_norm": 6.514690399169922, "learning_rate": 8.970360736532275e-05, "loss": 0.7149, "step": 8272 }, { "epoch": 0.5605393319330578, "grad_norm": 7.889196872711182, "learning_rate": 8.970223834622493e-05, "loss": 0.664, "step": 8273 }, { "epoch": 0.5606070872010299, "grad_norm": 6.1832475662231445, "learning_rate": 8.970086932712711e-05, "loss": 0.8841, "step": 8274 }, { "epoch": 0.560674842469002, "grad_norm": 5.73254919052124, "learning_rate": 8.969950030802931e-05, "loss": 0.8285, "step": 8275 }, { "epoch": 0.5607425977369741, "grad_norm": 7.358850955963135, "learning_rate": 8.969813128893149e-05, "loss": 0.8508, "step": 8276 }, { "epoch": 0.5608103530049461, "grad_norm": 7.491328716278076, "learning_rate": 8.969676226983367e-05, "loss": 1.0391, "step": 8277 }, { "epoch": 0.5608781082729182, "grad_norm": 6.1154375076293945, "learning_rate": 8.969539325073585e-05, "loss": 0.8254, "step": 8278 }, { "epoch": 0.5609458635408903, "grad_norm": 7.245144367218018, "learning_rate": 8.969402423163804e-05, "loss": 0.9224, "step": 8279 }, { "epoch": 0.5610136188088624, "grad_norm": 7.001935958862305, "learning_rate": 8.969265521254022e-05, "loss": 0.6674, "step": 8280 }, { "epoch": 0.5610813740768344, "grad_norm": 7.420647144317627, "learning_rate": 8.96912861934424e-05, "loss": 0.9413, "step": 8281 }, { "epoch": 0.5611491293448065, "grad_norm": 8.350207328796387, "learning_rate": 8.968991717434458e-05, "loss": 1.0078, "step": 8282 }, { "epoch": 0.5612168846127786, "grad_norm": 7.219613075256348, "learning_rate": 8.968854815524676e-05, "loss": 0.9071, "step": 8283 }, { "epoch": 0.5612846398807507, "grad_norm": 6.428452014923096, "learning_rate": 8.968717913614896e-05, "loss": 1.0902, "step": 8284 }, { "epoch": 0.5613523951487228, "grad_norm": 6.86848258972168, "learning_rate": 8.968581011705114e-05, "loss": 0.967, "step": 8285 }, { "epoch": 0.5614201504166949, "grad_norm": 6.413376808166504, "learning_rate": 8.968444109795332e-05, "loss": 0.7595, "step": 8286 }, { "epoch": 0.561487905684667, "grad_norm": 7.2994866371154785, "learning_rate": 8.96830720788555e-05, "loss": 1.176, "step": 8287 }, { "epoch": 0.5615556609526391, "grad_norm": 6.439824104309082, "learning_rate": 8.968170305975768e-05, "loss": 0.8067, "step": 8288 }, { "epoch": 0.5616234162206112, "grad_norm": 7.186101913452148, "learning_rate": 8.968033404065987e-05, "loss": 0.6857, "step": 8289 }, { "epoch": 0.5616911714885833, "grad_norm": 7.133284091949463, "learning_rate": 8.967896502156205e-05, "loss": 0.9404, "step": 8290 }, { "epoch": 0.5617589267565554, "grad_norm": 5.822772979736328, "learning_rate": 8.967759600246423e-05, "loss": 0.8007, "step": 8291 }, { "epoch": 0.5618266820245275, "grad_norm": 6.281779766082764, "learning_rate": 8.967622698336642e-05, "loss": 0.8698, "step": 8292 }, { "epoch": 0.5618944372924994, "grad_norm": 6.691267490386963, "learning_rate": 8.967485796426861e-05, "loss": 0.7307, "step": 8293 }, { "epoch": 0.5619621925604715, "grad_norm": 8.061805725097656, "learning_rate": 8.967348894517079e-05, "loss": 0.9223, "step": 8294 }, { "epoch": 0.5620299478284436, "grad_norm": 6.480556488037109, "learning_rate": 8.967211992607297e-05, "loss": 0.6982, "step": 8295 }, { "epoch": 0.5620977030964157, "grad_norm": 6.673006534576416, "learning_rate": 8.967075090697515e-05, "loss": 0.9059, "step": 8296 }, { "epoch": 0.5621654583643878, "grad_norm": 5.882975101470947, "learning_rate": 8.966938188787733e-05, "loss": 0.7252, "step": 8297 }, { "epoch": 0.5622332136323599, "grad_norm": 7.131857872009277, "learning_rate": 8.966801286877952e-05, "loss": 0.8641, "step": 8298 }, { "epoch": 0.562300968900332, "grad_norm": 6.564770698547363, "learning_rate": 8.96666438496817e-05, "loss": 0.8122, "step": 8299 }, { "epoch": 0.5623687241683041, "grad_norm": 5.747824192047119, "learning_rate": 8.966527483058388e-05, "loss": 0.8257, "step": 8300 }, { "epoch": 0.5624364794362762, "grad_norm": 5.892056465148926, "learning_rate": 8.966390581148607e-05, "loss": 0.5754, "step": 8301 }, { "epoch": 0.5625042347042483, "grad_norm": 7.563606262207031, "learning_rate": 8.966253679238826e-05, "loss": 0.7932, "step": 8302 }, { "epoch": 0.5625719899722204, "grad_norm": 6.839925289154053, "learning_rate": 8.966116777329044e-05, "loss": 0.8489, "step": 8303 }, { "epoch": 0.5626397452401924, "grad_norm": 7.593649387359619, "learning_rate": 8.965979875419262e-05, "loss": 0.7083, "step": 8304 }, { "epoch": 0.5627075005081645, "grad_norm": 6.513439178466797, "learning_rate": 8.96584297350948e-05, "loss": 0.8906, "step": 8305 }, { "epoch": 0.5627752557761366, "grad_norm": 7.1831889152526855, "learning_rate": 8.9657060715997e-05, "loss": 1.1598, "step": 8306 }, { "epoch": 0.5628430110441087, "grad_norm": 5.392392158508301, "learning_rate": 8.965569169689917e-05, "loss": 0.7419, "step": 8307 }, { "epoch": 0.5629107663120808, "grad_norm": 5.79964017868042, "learning_rate": 8.965432267780135e-05, "loss": 0.5992, "step": 8308 }, { "epoch": 0.5629785215800529, "grad_norm": 6.701503276824951, "learning_rate": 8.965295365870355e-05, "loss": 0.9259, "step": 8309 }, { "epoch": 0.5630462768480249, "grad_norm": 7.373070240020752, "learning_rate": 8.965158463960573e-05, "loss": 0.7898, "step": 8310 }, { "epoch": 0.563114032115997, "grad_norm": 5.849572658538818, "learning_rate": 8.965021562050791e-05, "loss": 0.6339, "step": 8311 }, { "epoch": 0.5631817873839691, "grad_norm": 7.531026840209961, "learning_rate": 8.96488466014101e-05, "loss": 0.9883, "step": 8312 }, { "epoch": 0.5632495426519412, "grad_norm": 6.910517692565918, "learning_rate": 8.964747758231228e-05, "loss": 0.8582, "step": 8313 }, { "epoch": 0.5633172979199133, "grad_norm": 7.760013103485107, "learning_rate": 8.964610856321446e-05, "loss": 0.8696, "step": 8314 }, { "epoch": 0.5633850531878853, "grad_norm": 5.592878341674805, "learning_rate": 8.964473954411664e-05, "loss": 0.7325, "step": 8315 }, { "epoch": 0.5634528084558574, "grad_norm": 6.491446018218994, "learning_rate": 8.964337052501884e-05, "loss": 0.7607, "step": 8316 }, { "epoch": 0.5635205637238295, "grad_norm": 5.725716590881348, "learning_rate": 8.964200150592102e-05, "loss": 0.928, "step": 8317 }, { "epoch": 0.5635883189918016, "grad_norm": 6.139345169067383, "learning_rate": 8.96406324868232e-05, "loss": 0.9763, "step": 8318 }, { "epoch": 0.5636560742597737, "grad_norm": 7.040091514587402, "learning_rate": 8.963926346772538e-05, "loss": 0.7395, "step": 8319 }, { "epoch": 0.5637238295277458, "grad_norm": 5.59568977355957, "learning_rate": 8.963789444862756e-05, "loss": 0.7304, "step": 8320 }, { "epoch": 0.5637915847957179, "grad_norm": 5.827798366546631, "learning_rate": 8.963652542952975e-05, "loss": 0.7063, "step": 8321 }, { "epoch": 0.56385934006369, "grad_norm": 7.218785285949707, "learning_rate": 8.963515641043193e-05, "loss": 0.8324, "step": 8322 }, { "epoch": 0.5639270953316621, "grad_norm": 6.695993423461914, "learning_rate": 8.963378739133411e-05, "loss": 0.7374, "step": 8323 }, { "epoch": 0.5639948505996342, "grad_norm": 7.842550754547119, "learning_rate": 8.96324183722363e-05, "loss": 1.1097, "step": 8324 }, { "epoch": 0.5640626058676063, "grad_norm": 10.275467872619629, "learning_rate": 8.963104935313849e-05, "loss": 0.974, "step": 8325 }, { "epoch": 0.5641303611355782, "grad_norm": 5.597927093505859, "learning_rate": 8.962968033404067e-05, "loss": 0.8399, "step": 8326 }, { "epoch": 0.5641981164035503, "grad_norm": 8.49342155456543, "learning_rate": 8.962831131494285e-05, "loss": 0.8277, "step": 8327 }, { "epoch": 0.5642658716715224, "grad_norm": 5.775453567504883, "learning_rate": 8.962694229584503e-05, "loss": 0.7712, "step": 8328 }, { "epoch": 0.5643336269394945, "grad_norm": 6.063767910003662, "learning_rate": 8.962557327674721e-05, "loss": 0.6816, "step": 8329 }, { "epoch": 0.5644013822074666, "grad_norm": 6.82351541519165, "learning_rate": 8.96242042576494e-05, "loss": 0.7759, "step": 8330 }, { "epoch": 0.5644691374754387, "grad_norm": 7.734725475311279, "learning_rate": 8.962283523855158e-05, "loss": 0.856, "step": 8331 }, { "epoch": 0.5645368927434108, "grad_norm": 9.442476272583008, "learning_rate": 8.962146621945376e-05, "loss": 0.9022, "step": 8332 }, { "epoch": 0.5646046480113829, "grad_norm": 4.6692352294921875, "learning_rate": 8.962009720035594e-05, "loss": 0.689, "step": 8333 }, { "epoch": 0.564672403279355, "grad_norm": 7.143623352050781, "learning_rate": 8.961872818125814e-05, "loss": 0.7562, "step": 8334 }, { "epoch": 0.5647401585473271, "grad_norm": 6.053460597991943, "learning_rate": 8.961735916216032e-05, "loss": 0.6182, "step": 8335 }, { "epoch": 0.5648079138152992, "grad_norm": 6.414344310760498, "learning_rate": 8.96159901430625e-05, "loss": 1.0768, "step": 8336 }, { "epoch": 0.5648756690832712, "grad_norm": 5.544490814208984, "learning_rate": 8.961462112396468e-05, "loss": 0.722, "step": 8337 }, { "epoch": 0.5649434243512433, "grad_norm": 7.075659275054932, "learning_rate": 8.961325210486686e-05, "loss": 1.0114, "step": 8338 }, { "epoch": 0.5650111796192154, "grad_norm": 6.1844024658203125, "learning_rate": 8.961188308576905e-05, "loss": 0.7534, "step": 8339 }, { "epoch": 0.5650789348871875, "grad_norm": 7.791917324066162, "learning_rate": 8.961051406667123e-05, "loss": 0.9825, "step": 8340 }, { "epoch": 0.5651466901551596, "grad_norm": 6.58551025390625, "learning_rate": 8.960914504757341e-05, "loss": 0.7715, "step": 8341 }, { "epoch": 0.5652144454231316, "grad_norm": 6.225470066070557, "learning_rate": 8.96077760284756e-05, "loss": 0.7685, "step": 8342 }, { "epoch": 0.5652822006911037, "grad_norm": 10.40294361114502, "learning_rate": 8.960640700937778e-05, "loss": 0.9553, "step": 8343 }, { "epoch": 0.5653499559590758, "grad_norm": 5.97885274887085, "learning_rate": 8.960503799027997e-05, "loss": 0.7282, "step": 8344 }, { "epoch": 0.5654177112270479, "grad_norm": 7.777407646179199, "learning_rate": 8.960366897118215e-05, "loss": 0.859, "step": 8345 }, { "epoch": 0.56548546649502, "grad_norm": 6.5945892333984375, "learning_rate": 8.960229995208433e-05, "loss": 1.0263, "step": 8346 }, { "epoch": 0.565553221762992, "grad_norm": 5.678567409515381, "learning_rate": 8.960093093298651e-05, "loss": 0.6956, "step": 8347 }, { "epoch": 0.5656209770309641, "grad_norm": 6.3759846687316895, "learning_rate": 8.95995619138887e-05, "loss": 0.8164, "step": 8348 }, { "epoch": 0.5656887322989362, "grad_norm": 6.627008438110352, "learning_rate": 8.959819289479088e-05, "loss": 0.9505, "step": 8349 }, { "epoch": 0.5657564875669083, "grad_norm": 8.078243255615234, "learning_rate": 8.959682387569306e-05, "loss": 0.9061, "step": 8350 }, { "epoch": 0.5658242428348804, "grad_norm": 5.019404888153076, "learning_rate": 8.959545485659524e-05, "loss": 0.7382, "step": 8351 }, { "epoch": 0.5658919981028525, "grad_norm": 8.406132698059082, "learning_rate": 8.959408583749744e-05, "loss": 1.0107, "step": 8352 }, { "epoch": 0.5659597533708246, "grad_norm": 6.881223678588867, "learning_rate": 8.959271681839962e-05, "loss": 0.7417, "step": 8353 }, { "epoch": 0.5660275086387967, "grad_norm": 5.801870822906494, "learning_rate": 8.95913477993018e-05, "loss": 0.7736, "step": 8354 }, { "epoch": 0.5660952639067688, "grad_norm": 5.533797740936279, "learning_rate": 8.9589978780204e-05, "loss": 0.9576, "step": 8355 }, { "epoch": 0.5661630191747409, "grad_norm": 5.76112174987793, "learning_rate": 8.958860976110617e-05, "loss": 0.8774, "step": 8356 }, { "epoch": 0.566230774442713, "grad_norm": 6.424200534820557, "learning_rate": 8.958724074200835e-05, "loss": 0.9104, "step": 8357 }, { "epoch": 0.566298529710685, "grad_norm": 7.279561519622803, "learning_rate": 8.958587172291055e-05, "loss": 0.8566, "step": 8358 }, { "epoch": 0.566366284978657, "grad_norm": 7.023104667663574, "learning_rate": 8.958450270381273e-05, "loss": 0.785, "step": 8359 }, { "epoch": 0.5664340402466291, "grad_norm": 5.087419033050537, "learning_rate": 8.958313368471491e-05, "loss": 0.7379, "step": 8360 }, { "epoch": 0.5665017955146012, "grad_norm": 6.156972885131836, "learning_rate": 8.958176466561709e-05, "loss": 0.9361, "step": 8361 }, { "epoch": 0.5665695507825733, "grad_norm": 6.432037830352783, "learning_rate": 8.958039564651928e-05, "loss": 0.804, "step": 8362 }, { "epoch": 0.5666373060505454, "grad_norm": 6.592261791229248, "learning_rate": 8.957902662742146e-05, "loss": 0.8668, "step": 8363 }, { "epoch": 0.5667050613185175, "grad_norm": 6.8097944259643555, "learning_rate": 8.957765760832364e-05, "loss": 0.8852, "step": 8364 }, { "epoch": 0.5667728165864896, "grad_norm": 6.4310078620910645, "learning_rate": 8.957628858922582e-05, "loss": 0.9113, "step": 8365 }, { "epoch": 0.5668405718544617, "grad_norm": 4.992232322692871, "learning_rate": 8.957491957012802e-05, "loss": 0.7373, "step": 8366 }, { "epoch": 0.5669083271224338, "grad_norm": 6.666651725769043, "learning_rate": 8.95735505510302e-05, "loss": 0.9169, "step": 8367 }, { "epoch": 0.5669760823904059, "grad_norm": 6.658245086669922, "learning_rate": 8.957218153193238e-05, "loss": 0.7536, "step": 8368 }, { "epoch": 0.567043837658378, "grad_norm": 7.354119300842285, "learning_rate": 8.957081251283456e-05, "loss": 0.9655, "step": 8369 }, { "epoch": 0.56711159292635, "grad_norm": 7.857820510864258, "learning_rate": 8.956944349373674e-05, "loss": 1.0672, "step": 8370 }, { "epoch": 0.5671793481943221, "grad_norm": 9.102689743041992, "learning_rate": 8.956807447463893e-05, "loss": 1.1671, "step": 8371 }, { "epoch": 0.5672471034622942, "grad_norm": 7.795739650726318, "learning_rate": 8.956670545554111e-05, "loss": 0.8685, "step": 8372 }, { "epoch": 0.5673148587302663, "grad_norm": 9.154378890991211, "learning_rate": 8.95653364364433e-05, "loss": 0.9891, "step": 8373 }, { "epoch": 0.5673826139982384, "grad_norm": 5.6434645652771, "learning_rate": 8.956396741734547e-05, "loss": 0.7481, "step": 8374 }, { "epoch": 0.5674503692662104, "grad_norm": 4.90270471572876, "learning_rate": 8.956259839824765e-05, "loss": 0.5697, "step": 8375 }, { "epoch": 0.5675181245341825, "grad_norm": 6.498236656188965, "learning_rate": 8.956122937914985e-05, "loss": 0.8037, "step": 8376 }, { "epoch": 0.5675858798021546, "grad_norm": 6.723139762878418, "learning_rate": 8.955986036005203e-05, "loss": 0.7044, "step": 8377 }, { "epoch": 0.5676536350701267, "grad_norm": 6.958408832550049, "learning_rate": 8.955849134095421e-05, "loss": 1.2551, "step": 8378 }, { "epoch": 0.5677213903380988, "grad_norm": 5.749763488769531, "learning_rate": 8.955712232185639e-05, "loss": 0.6988, "step": 8379 }, { "epoch": 0.5677891456060709, "grad_norm": 8.176393508911133, "learning_rate": 8.955575330275858e-05, "loss": 0.9854, "step": 8380 }, { "epoch": 0.567856900874043, "grad_norm": 6.546833038330078, "learning_rate": 8.955438428366076e-05, "loss": 0.6731, "step": 8381 }, { "epoch": 0.567924656142015, "grad_norm": 5.418013095855713, "learning_rate": 8.955301526456294e-05, "loss": 0.6531, "step": 8382 }, { "epoch": 0.5679924114099871, "grad_norm": 5.538142681121826, "learning_rate": 8.955164624546512e-05, "loss": 0.6456, "step": 8383 }, { "epoch": 0.5680601666779592, "grad_norm": 7.09976053237915, "learning_rate": 8.95502772263673e-05, "loss": 0.9614, "step": 8384 }, { "epoch": 0.5681279219459313, "grad_norm": 6.105874061584473, "learning_rate": 8.95489082072695e-05, "loss": 0.6241, "step": 8385 }, { "epoch": 0.5681956772139034, "grad_norm": 6.9980998039245605, "learning_rate": 8.954753918817168e-05, "loss": 0.9669, "step": 8386 }, { "epoch": 0.5682634324818755, "grad_norm": 6.8302764892578125, "learning_rate": 8.954617016907386e-05, "loss": 0.6397, "step": 8387 }, { "epoch": 0.5683311877498476, "grad_norm": 7.777739524841309, "learning_rate": 8.954480114997604e-05, "loss": 1.0691, "step": 8388 }, { "epoch": 0.5683989430178197, "grad_norm": 7.322915554046631, "learning_rate": 8.954343213087823e-05, "loss": 0.9179, "step": 8389 }, { "epoch": 0.5684666982857918, "grad_norm": 8.225046157836914, "learning_rate": 8.954206311178041e-05, "loss": 1.1727, "step": 8390 }, { "epoch": 0.5685344535537638, "grad_norm": 5.573197364807129, "learning_rate": 8.95406940926826e-05, "loss": 0.9111, "step": 8391 }, { "epoch": 0.5686022088217358, "grad_norm": 5.810042381286621, "learning_rate": 8.953932507358477e-05, "loss": 0.7115, "step": 8392 }, { "epoch": 0.5686699640897079, "grad_norm": 7.172820091247559, "learning_rate": 8.953795605448695e-05, "loss": 1.0404, "step": 8393 }, { "epoch": 0.56873771935768, "grad_norm": 8.901079177856445, "learning_rate": 8.953658703538915e-05, "loss": 1.164, "step": 8394 }, { "epoch": 0.5688054746256521, "grad_norm": 5.692160129547119, "learning_rate": 8.953521801629133e-05, "loss": 0.7094, "step": 8395 }, { "epoch": 0.5688732298936242, "grad_norm": 5.794686317443848, "learning_rate": 8.953384899719351e-05, "loss": 0.7625, "step": 8396 }, { "epoch": 0.5689409851615963, "grad_norm": 6.330542087554932, "learning_rate": 8.953247997809569e-05, "loss": 0.8826, "step": 8397 }, { "epoch": 0.5690087404295684, "grad_norm": 6.6873698234558105, "learning_rate": 8.953111095899788e-05, "loss": 0.8109, "step": 8398 }, { "epoch": 0.5690764956975405, "grad_norm": 5.331307411193848, "learning_rate": 8.952974193990006e-05, "loss": 0.6919, "step": 8399 }, { "epoch": 0.5691442509655126, "grad_norm": 6.942624568939209, "learning_rate": 8.952837292080224e-05, "loss": 0.9336, "step": 8400 }, { "epoch": 0.5692120062334847, "grad_norm": 5.526673793792725, "learning_rate": 8.952700390170444e-05, "loss": 0.6552, "step": 8401 }, { "epoch": 0.5692797615014568, "grad_norm": 6.674069404602051, "learning_rate": 8.952563488260662e-05, "loss": 0.7824, "step": 8402 }, { "epoch": 0.5693475167694289, "grad_norm": 7.103878498077393, "learning_rate": 8.95242658635088e-05, "loss": 0.7983, "step": 8403 }, { "epoch": 0.5694152720374009, "grad_norm": 5.963317394256592, "learning_rate": 8.952289684441099e-05, "loss": 0.7804, "step": 8404 }, { "epoch": 0.569483027305373, "grad_norm": 5.7701215744018555, "learning_rate": 8.952152782531317e-05, "loss": 0.6684, "step": 8405 }, { "epoch": 0.5695507825733451, "grad_norm": 6.809589862823486, "learning_rate": 8.952015880621535e-05, "loss": 0.8358, "step": 8406 }, { "epoch": 0.5696185378413171, "grad_norm": 5.772964954376221, "learning_rate": 8.951878978711753e-05, "loss": 0.8283, "step": 8407 }, { "epoch": 0.5696862931092892, "grad_norm": 5.902827262878418, "learning_rate": 8.951742076801973e-05, "loss": 0.9207, "step": 8408 }, { "epoch": 0.5697540483772613, "grad_norm": 6.312051296234131, "learning_rate": 8.951605174892191e-05, "loss": 0.8424, "step": 8409 }, { "epoch": 0.5698218036452334, "grad_norm": 6.687371730804443, "learning_rate": 8.951468272982409e-05, "loss": 0.9563, "step": 8410 }, { "epoch": 0.5698895589132055, "grad_norm": 5.801156520843506, "learning_rate": 8.951331371072627e-05, "loss": 0.898, "step": 8411 }, { "epoch": 0.5699573141811776, "grad_norm": 5.955209255218506, "learning_rate": 8.951194469162846e-05, "loss": 0.9062, "step": 8412 }, { "epoch": 0.5700250694491497, "grad_norm": 6.845731258392334, "learning_rate": 8.951057567253064e-05, "loss": 0.7345, "step": 8413 }, { "epoch": 0.5700928247171217, "grad_norm": 7.945831298828125, "learning_rate": 8.950920665343282e-05, "loss": 1.1156, "step": 8414 }, { "epoch": 0.5701605799850938, "grad_norm": 6.423880100250244, "learning_rate": 8.9507837634335e-05, "loss": 0.886, "step": 8415 }, { "epoch": 0.5702283352530659, "grad_norm": 6.089376449584961, "learning_rate": 8.950646861523718e-05, "loss": 0.7217, "step": 8416 }, { "epoch": 0.570296090521038, "grad_norm": 7.061302661895752, "learning_rate": 8.950509959613938e-05, "loss": 1.0641, "step": 8417 }, { "epoch": 0.5703638457890101, "grad_norm": 7.304849147796631, "learning_rate": 8.950373057704156e-05, "loss": 0.8099, "step": 8418 }, { "epoch": 0.5704316010569822, "grad_norm": 7.116214752197266, "learning_rate": 8.950236155794374e-05, "loss": 0.7974, "step": 8419 }, { "epoch": 0.5704993563249543, "grad_norm": 6.840792655944824, "learning_rate": 8.950099253884592e-05, "loss": 0.7558, "step": 8420 }, { "epoch": 0.5705671115929264, "grad_norm": 5.458795547485352, "learning_rate": 8.94996235197481e-05, "loss": 0.7173, "step": 8421 }, { "epoch": 0.5706348668608985, "grad_norm": 7.809031963348389, "learning_rate": 8.949825450065029e-05, "loss": 0.7751, "step": 8422 }, { "epoch": 0.5707026221288706, "grad_norm": 7.147863388061523, "learning_rate": 8.949688548155247e-05, "loss": 0.7824, "step": 8423 }, { "epoch": 0.5707703773968426, "grad_norm": 7.230483055114746, "learning_rate": 8.949551646245465e-05, "loss": 1.0771, "step": 8424 }, { "epoch": 0.5708381326648146, "grad_norm": 7.998863697052002, "learning_rate": 8.949414744335683e-05, "loss": 0.8166, "step": 8425 }, { "epoch": 0.5709058879327867, "grad_norm": 6.230087757110596, "learning_rate": 8.949277842425903e-05, "loss": 0.8565, "step": 8426 }, { "epoch": 0.5709736432007588, "grad_norm": 5.557027816772461, "learning_rate": 8.949140940516121e-05, "loss": 0.9794, "step": 8427 }, { "epoch": 0.5710413984687309, "grad_norm": 6.321481704711914, "learning_rate": 8.949004038606339e-05, "loss": 0.8254, "step": 8428 }, { "epoch": 0.571109153736703, "grad_norm": 4.4708428382873535, "learning_rate": 8.948867136696557e-05, "loss": 0.8051, "step": 8429 }, { "epoch": 0.5711769090046751, "grad_norm": 6.521273612976074, "learning_rate": 8.948730234786775e-05, "loss": 0.6903, "step": 8430 }, { "epoch": 0.5712446642726472, "grad_norm": 7.250330448150635, "learning_rate": 8.948593332876994e-05, "loss": 1.0477, "step": 8431 }, { "epoch": 0.5713124195406193, "grad_norm": 6.6420207023620605, "learning_rate": 8.948456430967212e-05, "loss": 0.7598, "step": 8432 }, { "epoch": 0.5713801748085914, "grad_norm": 5.907214164733887, "learning_rate": 8.94831952905743e-05, "loss": 0.8957, "step": 8433 }, { "epoch": 0.5714479300765635, "grad_norm": 7.927393913269043, "learning_rate": 8.948182627147648e-05, "loss": 0.814, "step": 8434 }, { "epoch": 0.5715156853445356, "grad_norm": 7.30061674118042, "learning_rate": 8.948045725237868e-05, "loss": 1.0433, "step": 8435 }, { "epoch": 0.5715834406125077, "grad_norm": 6.409942150115967, "learning_rate": 8.947908823328086e-05, "loss": 1.1921, "step": 8436 }, { "epoch": 0.5716511958804797, "grad_norm": 6.589274883270264, "learning_rate": 8.947771921418304e-05, "loss": 0.9055, "step": 8437 }, { "epoch": 0.5717189511484518, "grad_norm": 6.010043144226074, "learning_rate": 8.947635019508522e-05, "loss": 0.8581, "step": 8438 }, { "epoch": 0.5717867064164239, "grad_norm": 5.75832462310791, "learning_rate": 8.94749811759874e-05, "loss": 0.7936, "step": 8439 }, { "epoch": 0.5718544616843959, "grad_norm": 5.600092887878418, "learning_rate": 8.94736121568896e-05, "loss": 0.6994, "step": 8440 }, { "epoch": 0.571922216952368, "grad_norm": 5.774161338806152, "learning_rate": 8.947224313779177e-05, "loss": 0.8647, "step": 8441 }, { "epoch": 0.5719899722203401, "grad_norm": 6.7266950607299805, "learning_rate": 8.947087411869395e-05, "loss": 1.0754, "step": 8442 }, { "epoch": 0.5720577274883122, "grad_norm": 7.049251556396484, "learning_rate": 8.946950509959613e-05, "loss": 0.7269, "step": 8443 }, { "epoch": 0.5721254827562843, "grad_norm": 7.4380879402160645, "learning_rate": 8.946813608049833e-05, "loss": 0.8803, "step": 8444 }, { "epoch": 0.5721932380242564, "grad_norm": 6.558119297027588, "learning_rate": 8.946676706140051e-05, "loss": 0.774, "step": 8445 }, { "epoch": 0.5722609932922285, "grad_norm": 6.586884021759033, "learning_rate": 8.946539804230269e-05, "loss": 0.8607, "step": 8446 }, { "epoch": 0.5723287485602006, "grad_norm": 7.708719253540039, "learning_rate": 8.946402902320487e-05, "loss": 0.9618, "step": 8447 }, { "epoch": 0.5723965038281726, "grad_norm": 7.740994930267334, "learning_rate": 8.946266000410706e-05, "loss": 0.6727, "step": 8448 }, { "epoch": 0.5724642590961447, "grad_norm": 6.2427287101745605, "learning_rate": 8.946129098500924e-05, "loss": 0.8622, "step": 8449 }, { "epoch": 0.5725320143641168, "grad_norm": 8.826958656311035, "learning_rate": 8.945992196591142e-05, "loss": 0.8872, "step": 8450 }, { "epoch": 0.5725997696320889, "grad_norm": 7.137535572052002, "learning_rate": 8.945855294681362e-05, "loss": 0.7906, "step": 8451 }, { "epoch": 0.572667524900061, "grad_norm": 6.379401206970215, "learning_rate": 8.94571839277158e-05, "loss": 0.9356, "step": 8452 }, { "epoch": 0.5727352801680331, "grad_norm": 6.800835609436035, "learning_rate": 8.945581490861798e-05, "loss": 0.7702, "step": 8453 }, { "epoch": 0.5728030354360052, "grad_norm": 6.04707670211792, "learning_rate": 8.945444588952017e-05, "loss": 0.7877, "step": 8454 }, { "epoch": 0.5728707907039773, "grad_norm": 6.483813285827637, "learning_rate": 8.945307687042235e-05, "loss": 1.0264, "step": 8455 }, { "epoch": 0.5729385459719493, "grad_norm": 7.314120292663574, "learning_rate": 8.945170785132453e-05, "loss": 1.2052, "step": 8456 }, { "epoch": 0.5730063012399214, "grad_norm": 8.42261791229248, "learning_rate": 8.945033883222671e-05, "loss": 0.963, "step": 8457 }, { "epoch": 0.5730740565078934, "grad_norm": 5.949848175048828, "learning_rate": 8.944896981312891e-05, "loss": 0.7765, "step": 8458 }, { "epoch": 0.5731418117758655, "grad_norm": 8.215453147888184, "learning_rate": 8.944760079403109e-05, "loss": 0.9526, "step": 8459 }, { "epoch": 0.5732095670438376, "grad_norm": 7.214929580688477, "learning_rate": 8.944623177493327e-05, "loss": 0.9869, "step": 8460 }, { "epoch": 0.5732773223118097, "grad_norm": 7.484433174133301, "learning_rate": 8.944486275583545e-05, "loss": 0.9432, "step": 8461 }, { "epoch": 0.5733450775797818, "grad_norm": 6.356048107147217, "learning_rate": 8.944349373673763e-05, "loss": 0.8457, "step": 8462 }, { "epoch": 0.5734128328477539, "grad_norm": 6.3201117515563965, "learning_rate": 8.944212471763982e-05, "loss": 0.8232, "step": 8463 }, { "epoch": 0.573480588115726, "grad_norm": 7.538288593292236, "learning_rate": 8.9440755698542e-05, "loss": 0.655, "step": 8464 }, { "epoch": 0.5735483433836981, "grad_norm": 6.674449920654297, "learning_rate": 8.943938667944418e-05, "loss": 0.7804, "step": 8465 }, { "epoch": 0.5736160986516702, "grad_norm": 6.578172206878662, "learning_rate": 8.943801766034636e-05, "loss": 0.7174, "step": 8466 }, { "epoch": 0.5736838539196423, "grad_norm": 5.858245372772217, "learning_rate": 8.943664864124856e-05, "loss": 0.7236, "step": 8467 }, { "epoch": 0.5737516091876144, "grad_norm": 5.510379791259766, "learning_rate": 8.943527962215074e-05, "loss": 0.7767, "step": 8468 }, { "epoch": 0.5738193644555865, "grad_norm": 6.925917148590088, "learning_rate": 8.943391060305292e-05, "loss": 1.0637, "step": 8469 }, { "epoch": 0.5738871197235585, "grad_norm": 5.841418266296387, "learning_rate": 8.94325415839551e-05, "loss": 0.7703, "step": 8470 }, { "epoch": 0.5739548749915306, "grad_norm": 5.818962097167969, "learning_rate": 8.943117256485728e-05, "loss": 0.8358, "step": 8471 }, { "epoch": 0.5740226302595027, "grad_norm": 4.567956924438477, "learning_rate": 8.942980354575947e-05, "loss": 0.7853, "step": 8472 }, { "epoch": 0.5740903855274747, "grad_norm": 7.374995231628418, "learning_rate": 8.942843452666165e-05, "loss": 0.7623, "step": 8473 }, { "epoch": 0.5741581407954468, "grad_norm": 6.090158939361572, "learning_rate": 8.942706550756383e-05, "loss": 0.617, "step": 8474 }, { "epoch": 0.5742258960634189, "grad_norm": 7.125964641571045, "learning_rate": 8.942569648846601e-05, "loss": 0.9063, "step": 8475 }, { "epoch": 0.574293651331391, "grad_norm": 6.784455299377441, "learning_rate": 8.94243274693682e-05, "loss": 0.8758, "step": 8476 }, { "epoch": 0.5743614065993631, "grad_norm": 6.138723373413086, "learning_rate": 8.942295845027039e-05, "loss": 0.9269, "step": 8477 }, { "epoch": 0.5744291618673352, "grad_norm": 7.604799747467041, "learning_rate": 8.942158943117257e-05, "loss": 0.9721, "step": 8478 }, { "epoch": 0.5744969171353073, "grad_norm": 7.989227771759033, "learning_rate": 8.942022041207475e-05, "loss": 0.9722, "step": 8479 }, { "epoch": 0.5745646724032794, "grad_norm": 5.422422885894775, "learning_rate": 8.941885139297693e-05, "loss": 0.6328, "step": 8480 }, { "epoch": 0.5746324276712514, "grad_norm": 5.384946346282959, "learning_rate": 8.941748237387912e-05, "loss": 0.677, "step": 8481 }, { "epoch": 0.5747001829392235, "grad_norm": 6.695030212402344, "learning_rate": 8.94161133547813e-05, "loss": 0.8051, "step": 8482 }, { "epoch": 0.5747679382071956, "grad_norm": 6.037063121795654, "learning_rate": 8.941474433568348e-05, "loss": 0.7381, "step": 8483 }, { "epoch": 0.5748356934751677, "grad_norm": 5.64218807220459, "learning_rate": 8.941337531658566e-05, "loss": 0.943, "step": 8484 }, { "epoch": 0.5749034487431398, "grad_norm": 7.324346542358398, "learning_rate": 8.941200629748784e-05, "loss": 0.8886, "step": 8485 }, { "epoch": 0.5749712040111119, "grad_norm": 7.247791767120361, "learning_rate": 8.941063727839004e-05, "loss": 0.7939, "step": 8486 }, { "epoch": 0.575038959279084, "grad_norm": 6.780027866363525, "learning_rate": 8.940926825929222e-05, "loss": 0.9357, "step": 8487 }, { "epoch": 0.5751067145470561, "grad_norm": 6.983544826507568, "learning_rate": 8.94078992401944e-05, "loss": 0.73, "step": 8488 }, { "epoch": 0.5751744698150281, "grad_norm": 5.811152458190918, "learning_rate": 8.940653022109658e-05, "loss": 0.875, "step": 8489 }, { "epoch": 0.5752422250830002, "grad_norm": 5.372483253479004, "learning_rate": 8.940516120199877e-05, "loss": 0.8868, "step": 8490 }, { "epoch": 0.5753099803509722, "grad_norm": 7.48350191116333, "learning_rate": 8.940379218290095e-05, "loss": 1.0441, "step": 8491 }, { "epoch": 0.5753777356189443, "grad_norm": 4.902670383453369, "learning_rate": 8.940242316380313e-05, "loss": 0.6726, "step": 8492 }, { "epoch": 0.5754454908869164, "grad_norm": 5.900030136108398, "learning_rate": 8.940105414470531e-05, "loss": 0.799, "step": 8493 }, { "epoch": 0.5755132461548885, "grad_norm": 7.37599515914917, "learning_rate": 8.939968512560751e-05, "loss": 1.0577, "step": 8494 }, { "epoch": 0.5755810014228606, "grad_norm": 5.871433734893799, "learning_rate": 8.939831610650969e-05, "loss": 0.9335, "step": 8495 }, { "epoch": 0.5756487566908327, "grad_norm": 7.249385833740234, "learning_rate": 8.939694708741187e-05, "loss": 0.8244, "step": 8496 }, { "epoch": 0.5757165119588048, "grad_norm": 7.102729320526123, "learning_rate": 8.939557806831406e-05, "loss": 0.7431, "step": 8497 }, { "epoch": 0.5757842672267769, "grad_norm": 6.558697700500488, "learning_rate": 8.939420904921624e-05, "loss": 0.7765, "step": 8498 }, { "epoch": 0.575852022494749, "grad_norm": 6.82787561416626, "learning_rate": 8.939284003011842e-05, "loss": 0.9449, "step": 8499 }, { "epoch": 0.5759197777627211, "grad_norm": 7.055886268615723, "learning_rate": 8.939147101102062e-05, "loss": 0.8397, "step": 8500 }, { "epoch": 0.5759875330306932, "grad_norm": 7.750473976135254, "learning_rate": 8.93901019919228e-05, "loss": 0.8001, "step": 8501 }, { "epoch": 0.5760552882986653, "grad_norm": 9.581280708312988, "learning_rate": 8.938873297282498e-05, "loss": 0.7048, "step": 8502 }, { "epoch": 0.5761230435666373, "grad_norm": 6.592195510864258, "learning_rate": 8.938736395372716e-05, "loss": 0.9294, "step": 8503 }, { "epoch": 0.5761907988346094, "grad_norm": 5.0412797927856445, "learning_rate": 8.938599493462935e-05, "loss": 0.6723, "step": 8504 }, { "epoch": 0.5762585541025814, "grad_norm": 6.099496364593506, "learning_rate": 8.938462591553153e-05, "loss": 0.8038, "step": 8505 }, { "epoch": 0.5763263093705535, "grad_norm": 6.402875900268555, "learning_rate": 8.938325689643371e-05, "loss": 0.7956, "step": 8506 }, { "epoch": 0.5763940646385256, "grad_norm": 6.937565326690674, "learning_rate": 8.938188787733589e-05, "loss": 0.9354, "step": 8507 }, { "epoch": 0.5764618199064977, "grad_norm": 7.218740940093994, "learning_rate": 8.938051885823807e-05, "loss": 0.8022, "step": 8508 }, { "epoch": 0.5765295751744698, "grad_norm": 7.368991851806641, "learning_rate": 8.937914983914027e-05, "loss": 1.0043, "step": 8509 }, { "epoch": 0.5765973304424419, "grad_norm": 5.916724681854248, "learning_rate": 8.937778082004245e-05, "loss": 0.7391, "step": 8510 }, { "epoch": 0.576665085710414, "grad_norm": 6.9001007080078125, "learning_rate": 8.937641180094463e-05, "loss": 0.9798, "step": 8511 }, { "epoch": 0.5767328409783861, "grad_norm": 6.652282238006592, "learning_rate": 8.937504278184681e-05, "loss": 0.9313, "step": 8512 }, { "epoch": 0.5768005962463582, "grad_norm": 7.920009613037109, "learning_rate": 8.9373673762749e-05, "loss": 0.8018, "step": 8513 }, { "epoch": 0.5768683515143302, "grad_norm": 7.468683242797852, "learning_rate": 8.937230474365118e-05, "loss": 0.8666, "step": 8514 }, { "epoch": 0.5769361067823023, "grad_norm": 5.6963653564453125, "learning_rate": 8.937093572455336e-05, "loss": 0.8645, "step": 8515 }, { "epoch": 0.5770038620502744, "grad_norm": 9.493772506713867, "learning_rate": 8.936956670545554e-05, "loss": 0.7124, "step": 8516 }, { "epoch": 0.5770716173182465, "grad_norm": 6.918614864349365, "learning_rate": 8.936819768635772e-05, "loss": 0.7712, "step": 8517 }, { "epoch": 0.5771393725862186, "grad_norm": 5.827054977416992, "learning_rate": 8.936682866725992e-05, "loss": 0.7484, "step": 8518 }, { "epoch": 0.5772071278541907, "grad_norm": 4.4998626708984375, "learning_rate": 8.93654596481621e-05, "loss": 0.7199, "step": 8519 }, { "epoch": 0.5772748831221628, "grad_norm": 5.738202095031738, "learning_rate": 8.936409062906428e-05, "loss": 0.9027, "step": 8520 }, { "epoch": 0.5773426383901349, "grad_norm": 6.771662712097168, "learning_rate": 8.936272160996646e-05, "loss": 0.7006, "step": 8521 }, { "epoch": 0.5774103936581069, "grad_norm": 6.145763397216797, "learning_rate": 8.936135259086865e-05, "loss": 0.927, "step": 8522 }, { "epoch": 0.577478148926079, "grad_norm": 6.981403827667236, "learning_rate": 8.935998357177083e-05, "loss": 0.8556, "step": 8523 }, { "epoch": 0.577545904194051, "grad_norm": 6.167423725128174, "learning_rate": 8.935861455267301e-05, "loss": 0.643, "step": 8524 }, { "epoch": 0.5776136594620231, "grad_norm": 6.135037899017334, "learning_rate": 8.93572455335752e-05, "loss": 0.8866, "step": 8525 }, { "epoch": 0.5776814147299952, "grad_norm": 5.348565101623535, "learning_rate": 8.935587651447737e-05, "loss": 0.7559, "step": 8526 }, { "epoch": 0.5777491699979673, "grad_norm": 5.95448637008667, "learning_rate": 8.935450749537957e-05, "loss": 0.6934, "step": 8527 }, { "epoch": 0.5778169252659394, "grad_norm": 6.3629961013793945, "learning_rate": 8.935313847628175e-05, "loss": 0.7634, "step": 8528 }, { "epoch": 0.5778846805339115, "grad_norm": 6.271450996398926, "learning_rate": 8.935176945718393e-05, "loss": 0.8269, "step": 8529 }, { "epoch": 0.5779524358018836, "grad_norm": 5.41464900970459, "learning_rate": 8.935040043808611e-05, "loss": 0.5833, "step": 8530 }, { "epoch": 0.5780201910698557, "grad_norm": 10.096611022949219, "learning_rate": 8.934903141898829e-05, "loss": 0.7474, "step": 8531 }, { "epoch": 0.5780879463378278, "grad_norm": 7.8773393630981445, "learning_rate": 8.934766239989048e-05, "loss": 0.894, "step": 8532 }, { "epoch": 0.5781557016057999, "grad_norm": 6.695224761962891, "learning_rate": 8.934629338079266e-05, "loss": 0.8072, "step": 8533 }, { "epoch": 0.578223456873772, "grad_norm": 6.079283237457275, "learning_rate": 8.934492436169484e-05, "loss": 0.6978, "step": 8534 }, { "epoch": 0.5782912121417441, "grad_norm": 6.053205490112305, "learning_rate": 8.934355534259702e-05, "loss": 0.8264, "step": 8535 }, { "epoch": 0.5783589674097162, "grad_norm": 6.483332633972168, "learning_rate": 8.934218632349922e-05, "loss": 0.9318, "step": 8536 }, { "epoch": 0.5784267226776882, "grad_norm": 7.496954441070557, "learning_rate": 8.93408173044014e-05, "loss": 0.69, "step": 8537 }, { "epoch": 0.5784944779456602, "grad_norm": 6.839014530181885, "learning_rate": 8.933944828530358e-05, "loss": 0.8462, "step": 8538 }, { "epoch": 0.5785622332136323, "grad_norm": 7.050039291381836, "learning_rate": 8.933807926620576e-05, "loss": 1.0005, "step": 8539 }, { "epoch": 0.5786299884816044, "grad_norm": 6.284921169281006, "learning_rate": 8.933671024710795e-05, "loss": 0.6977, "step": 8540 }, { "epoch": 0.5786977437495765, "grad_norm": 6.051140308380127, "learning_rate": 8.933534122801013e-05, "loss": 0.9931, "step": 8541 }, { "epoch": 0.5787654990175486, "grad_norm": 6.233377933502197, "learning_rate": 8.933397220891231e-05, "loss": 0.6643, "step": 8542 }, { "epoch": 0.5788332542855207, "grad_norm": 5.1774678230285645, "learning_rate": 8.933260318981451e-05, "loss": 0.8994, "step": 8543 }, { "epoch": 0.5789010095534928, "grad_norm": 5.453426837921143, "learning_rate": 8.933123417071669e-05, "loss": 0.9404, "step": 8544 }, { "epoch": 0.5789687648214649, "grad_norm": 6.1316609382629395, "learning_rate": 8.932986515161887e-05, "loss": 0.8306, "step": 8545 }, { "epoch": 0.579036520089437, "grad_norm": 7.448431968688965, "learning_rate": 8.932849613252106e-05, "loss": 0.8259, "step": 8546 }, { "epoch": 0.579104275357409, "grad_norm": 8.094486236572266, "learning_rate": 8.932712711342324e-05, "loss": 1.011, "step": 8547 }, { "epoch": 0.5791720306253811, "grad_norm": 4.893822193145752, "learning_rate": 8.932575809432542e-05, "loss": 0.8489, "step": 8548 }, { "epoch": 0.5792397858933532, "grad_norm": 5.577503204345703, "learning_rate": 8.93243890752276e-05, "loss": 0.8933, "step": 8549 }, { "epoch": 0.5793075411613253, "grad_norm": 5.874213218688965, "learning_rate": 8.93230200561298e-05, "loss": 1.0447, "step": 8550 }, { "epoch": 0.5793752964292974, "grad_norm": 7.764857769012451, "learning_rate": 8.932165103703198e-05, "loss": 1.0503, "step": 8551 }, { "epoch": 0.5794430516972695, "grad_norm": 6.291406631469727, "learning_rate": 8.932028201793416e-05, "loss": 0.7368, "step": 8552 }, { "epoch": 0.5795108069652416, "grad_norm": 6.035033226013184, "learning_rate": 8.931891299883634e-05, "loss": 0.7683, "step": 8553 }, { "epoch": 0.5795785622332136, "grad_norm": 4.60941743850708, "learning_rate": 8.931754397973853e-05, "loss": 0.8216, "step": 8554 }, { "epoch": 0.5796463175011857, "grad_norm": 5.524953365325928, "learning_rate": 8.931617496064071e-05, "loss": 0.8958, "step": 8555 }, { "epoch": 0.5797140727691578, "grad_norm": 5.544130802154541, "learning_rate": 8.931480594154289e-05, "loss": 0.7621, "step": 8556 }, { "epoch": 0.5797818280371299, "grad_norm": 5.973321437835693, "learning_rate": 8.931343692244507e-05, "loss": 0.8805, "step": 8557 }, { "epoch": 0.579849583305102, "grad_norm": 5.566673755645752, "learning_rate": 8.931206790334725e-05, "loss": 0.7309, "step": 8558 }, { "epoch": 0.579917338573074, "grad_norm": 5.564174652099609, "learning_rate": 8.931069888424945e-05, "loss": 0.7333, "step": 8559 }, { "epoch": 0.5799850938410461, "grad_norm": 5.655849933624268, "learning_rate": 8.930932986515163e-05, "loss": 0.7378, "step": 8560 }, { "epoch": 0.5800528491090182, "grad_norm": 6.19892692565918, "learning_rate": 8.930796084605381e-05, "loss": 0.5912, "step": 8561 }, { "epoch": 0.5801206043769903, "grad_norm": 6.6580095291137695, "learning_rate": 8.930659182695599e-05, "loss": 0.7054, "step": 8562 }, { "epoch": 0.5801883596449624, "grad_norm": 5.171754837036133, "learning_rate": 8.930522280785817e-05, "loss": 0.7478, "step": 8563 }, { "epoch": 0.5802561149129345, "grad_norm": 6.493400573730469, "learning_rate": 8.930385378876036e-05, "loss": 0.7751, "step": 8564 }, { "epoch": 0.5803238701809066, "grad_norm": 7.6895833015441895, "learning_rate": 8.930248476966254e-05, "loss": 0.776, "step": 8565 }, { "epoch": 0.5803916254488787, "grad_norm": 6.17990255355835, "learning_rate": 8.930111575056472e-05, "loss": 0.6127, "step": 8566 }, { "epoch": 0.5804593807168508, "grad_norm": 7.613852024078369, "learning_rate": 8.92997467314669e-05, "loss": 0.901, "step": 8567 }, { "epoch": 0.5805271359848229, "grad_norm": 6.891161918640137, "learning_rate": 8.92983777123691e-05, "loss": 0.8535, "step": 8568 }, { "epoch": 0.580594891252795, "grad_norm": 8.584257125854492, "learning_rate": 8.929700869327128e-05, "loss": 0.8043, "step": 8569 }, { "epoch": 0.580662646520767, "grad_norm": 6.883901119232178, "learning_rate": 8.929563967417346e-05, "loss": 1.0738, "step": 8570 }, { "epoch": 0.580730401788739, "grad_norm": 6.844675064086914, "learning_rate": 8.929427065507564e-05, "loss": 0.868, "step": 8571 }, { "epoch": 0.5807981570567111, "grad_norm": 8.33270263671875, "learning_rate": 8.929290163597782e-05, "loss": 1.2373, "step": 8572 }, { "epoch": 0.5808659123246832, "grad_norm": 7.0670037269592285, "learning_rate": 8.929153261688001e-05, "loss": 0.875, "step": 8573 }, { "epoch": 0.5809336675926553, "grad_norm": 6.652496337890625, "learning_rate": 8.929016359778219e-05, "loss": 0.9693, "step": 8574 }, { "epoch": 0.5810014228606274, "grad_norm": 6.9150390625, "learning_rate": 8.928879457868437e-05, "loss": 1.0438, "step": 8575 }, { "epoch": 0.5810691781285995, "grad_norm": 7.5465192794799805, "learning_rate": 8.928742555958655e-05, "loss": 0.7079, "step": 8576 }, { "epoch": 0.5811369333965716, "grad_norm": 5.88304328918457, "learning_rate": 8.928605654048875e-05, "loss": 0.8251, "step": 8577 }, { "epoch": 0.5812046886645437, "grad_norm": 6.625080108642578, "learning_rate": 8.928468752139093e-05, "loss": 0.833, "step": 8578 }, { "epoch": 0.5812724439325158, "grad_norm": 5.93492317199707, "learning_rate": 8.928331850229311e-05, "loss": 0.8211, "step": 8579 }, { "epoch": 0.5813401992004879, "grad_norm": 5.411419868469238, "learning_rate": 8.928194948319529e-05, "loss": 0.8365, "step": 8580 }, { "epoch": 0.5814079544684599, "grad_norm": 6.126975059509277, "learning_rate": 8.928058046409747e-05, "loss": 0.8646, "step": 8581 }, { "epoch": 0.581475709736432, "grad_norm": 6.076915264129639, "learning_rate": 8.927921144499966e-05, "loss": 0.8304, "step": 8582 }, { "epoch": 0.5815434650044041, "grad_norm": 7.348392009735107, "learning_rate": 8.927784242590184e-05, "loss": 0.8467, "step": 8583 }, { "epoch": 0.5816112202723762, "grad_norm": 5.092489242553711, "learning_rate": 8.927647340680402e-05, "loss": 0.7371, "step": 8584 }, { "epoch": 0.5816789755403483, "grad_norm": 5.680901527404785, "learning_rate": 8.92751043877062e-05, "loss": 0.7388, "step": 8585 }, { "epoch": 0.5817467308083204, "grad_norm": 7.501118183135986, "learning_rate": 8.92737353686084e-05, "loss": 0.6264, "step": 8586 }, { "epoch": 0.5818144860762924, "grad_norm": 6.8912529945373535, "learning_rate": 8.927236634951058e-05, "loss": 0.8985, "step": 8587 }, { "epoch": 0.5818822413442645, "grad_norm": 6.703160285949707, "learning_rate": 8.927099733041276e-05, "loss": 1.1611, "step": 8588 }, { "epoch": 0.5819499966122366, "grad_norm": 5.655977725982666, "learning_rate": 8.926962831131495e-05, "loss": 0.7641, "step": 8589 }, { "epoch": 0.5820177518802087, "grad_norm": 7.246835708618164, "learning_rate": 8.926825929221713e-05, "loss": 0.97, "step": 8590 }, { "epoch": 0.5820855071481807, "grad_norm": 7.098568916320801, "learning_rate": 8.926689027311931e-05, "loss": 0.9531, "step": 8591 }, { "epoch": 0.5821532624161528, "grad_norm": 5.854083061218262, "learning_rate": 8.92655212540215e-05, "loss": 0.5604, "step": 8592 }, { "epoch": 0.5822210176841249, "grad_norm": 9.047096252441406, "learning_rate": 8.926415223492369e-05, "loss": 0.9453, "step": 8593 }, { "epoch": 0.582288772952097, "grad_norm": 6.015660762786865, "learning_rate": 8.926278321582587e-05, "loss": 0.8029, "step": 8594 }, { "epoch": 0.5823565282200691, "grad_norm": 5.913661479949951, "learning_rate": 8.926141419672805e-05, "loss": 0.6898, "step": 8595 }, { "epoch": 0.5824242834880412, "grad_norm": 5.473297595977783, "learning_rate": 8.926004517763024e-05, "loss": 0.9829, "step": 8596 }, { "epoch": 0.5824920387560133, "grad_norm": 5.53269624710083, "learning_rate": 8.925867615853242e-05, "loss": 0.7763, "step": 8597 }, { "epoch": 0.5825597940239854, "grad_norm": 6.618366241455078, "learning_rate": 8.92573071394346e-05, "loss": 1.1884, "step": 8598 }, { "epoch": 0.5826275492919575, "grad_norm": 6.628344535827637, "learning_rate": 8.925593812033678e-05, "loss": 0.7971, "step": 8599 }, { "epoch": 0.5826953045599296, "grad_norm": 6.690967082977295, "learning_rate": 8.925456910123898e-05, "loss": 0.837, "step": 8600 }, { "epoch": 0.5827630598279017, "grad_norm": 6.788214683532715, "learning_rate": 8.925320008214116e-05, "loss": 0.6239, "step": 8601 }, { "epoch": 0.5828308150958738, "grad_norm": 6.47343111038208, "learning_rate": 8.925183106304334e-05, "loss": 0.8656, "step": 8602 }, { "epoch": 0.5828985703638457, "grad_norm": 7.423914909362793, "learning_rate": 8.925046204394552e-05, "loss": 0.9741, "step": 8603 }, { "epoch": 0.5829663256318178, "grad_norm": 6.18986701965332, "learning_rate": 8.92490930248477e-05, "loss": 0.8261, "step": 8604 }, { "epoch": 0.5830340808997899, "grad_norm": 6.904397010803223, "learning_rate": 8.924772400574989e-05, "loss": 0.6875, "step": 8605 }, { "epoch": 0.583101836167762, "grad_norm": 6.054782867431641, "learning_rate": 8.924635498665207e-05, "loss": 0.7726, "step": 8606 }, { "epoch": 0.5831695914357341, "grad_norm": 7.563791751861572, "learning_rate": 8.924498596755425e-05, "loss": 1.038, "step": 8607 }, { "epoch": 0.5832373467037062, "grad_norm": 7.547990322113037, "learning_rate": 8.924361694845643e-05, "loss": 0.8108, "step": 8608 }, { "epoch": 0.5833051019716783, "grad_norm": 6.109129905700684, "learning_rate": 8.924224792935861e-05, "loss": 0.9512, "step": 8609 }, { "epoch": 0.5833728572396504, "grad_norm": 6.7443528175354, "learning_rate": 8.924087891026081e-05, "loss": 1.0178, "step": 8610 }, { "epoch": 0.5834406125076225, "grad_norm": 4.522619247436523, "learning_rate": 8.923950989116299e-05, "loss": 0.7338, "step": 8611 }, { "epoch": 0.5835083677755946, "grad_norm": 6.25988245010376, "learning_rate": 8.923814087206517e-05, "loss": 0.9412, "step": 8612 }, { "epoch": 0.5835761230435667, "grad_norm": 8.428157806396484, "learning_rate": 8.923677185296735e-05, "loss": 0.9129, "step": 8613 }, { "epoch": 0.5836438783115387, "grad_norm": 6.854711055755615, "learning_rate": 8.923540283386954e-05, "loss": 0.9614, "step": 8614 }, { "epoch": 0.5837116335795108, "grad_norm": 5.909132957458496, "learning_rate": 8.923403381477172e-05, "loss": 1.0066, "step": 8615 }, { "epoch": 0.5837793888474829, "grad_norm": 6.080239295959473, "learning_rate": 8.92326647956739e-05, "loss": 0.8778, "step": 8616 }, { "epoch": 0.583847144115455, "grad_norm": 5.880582809448242, "learning_rate": 8.923129577657608e-05, "loss": 0.674, "step": 8617 }, { "epoch": 0.5839148993834271, "grad_norm": 5.32850980758667, "learning_rate": 8.922992675747826e-05, "loss": 0.7366, "step": 8618 }, { "epoch": 0.5839826546513991, "grad_norm": 5.818620204925537, "learning_rate": 8.922855773838046e-05, "loss": 0.8723, "step": 8619 }, { "epoch": 0.5840504099193712, "grad_norm": 7.105060577392578, "learning_rate": 8.922718871928264e-05, "loss": 0.9449, "step": 8620 }, { "epoch": 0.5841181651873433, "grad_norm": 7.047363758087158, "learning_rate": 8.922581970018482e-05, "loss": 1.0505, "step": 8621 }, { "epoch": 0.5841859204553154, "grad_norm": 7.657389163970947, "learning_rate": 8.9224450681087e-05, "loss": 1.0046, "step": 8622 }, { "epoch": 0.5842536757232875, "grad_norm": 6.699533462524414, "learning_rate": 8.922308166198919e-05, "loss": 0.9382, "step": 8623 }, { "epoch": 0.5843214309912595, "grad_norm": 7.720858573913574, "learning_rate": 8.922171264289137e-05, "loss": 0.732, "step": 8624 }, { "epoch": 0.5843891862592316, "grad_norm": 5.816531658172607, "learning_rate": 8.922034362379355e-05, "loss": 0.7825, "step": 8625 }, { "epoch": 0.5844569415272037, "grad_norm": 5.647473335266113, "learning_rate": 8.921897460469573e-05, "loss": 0.8234, "step": 8626 }, { "epoch": 0.5845246967951758, "grad_norm": 6.133992671966553, "learning_rate": 8.921760558559791e-05, "loss": 0.6883, "step": 8627 }, { "epoch": 0.5845924520631479, "grad_norm": 6.076829433441162, "learning_rate": 8.921623656650011e-05, "loss": 0.6453, "step": 8628 }, { "epoch": 0.58466020733112, "grad_norm": 6.3974385261535645, "learning_rate": 8.921486754740229e-05, "loss": 0.7473, "step": 8629 }, { "epoch": 0.5847279625990921, "grad_norm": 8.298005104064941, "learning_rate": 8.921349852830447e-05, "loss": 0.563, "step": 8630 }, { "epoch": 0.5847957178670642, "grad_norm": 6.789668083190918, "learning_rate": 8.921212950920665e-05, "loss": 0.8418, "step": 8631 }, { "epoch": 0.5848634731350363, "grad_norm": 5.68512487411499, "learning_rate": 8.921076049010884e-05, "loss": 0.7862, "step": 8632 }, { "epoch": 0.5849312284030084, "grad_norm": 7.587048053741455, "learning_rate": 8.920939147101102e-05, "loss": 0.6798, "step": 8633 }, { "epoch": 0.5849989836709805, "grad_norm": 6.540048122406006, "learning_rate": 8.92080224519132e-05, "loss": 0.9243, "step": 8634 }, { "epoch": 0.5850667389389526, "grad_norm": 6.688183784484863, "learning_rate": 8.92066534328154e-05, "loss": 0.7537, "step": 8635 }, { "epoch": 0.5851344942069245, "grad_norm": 6.010653972625732, "learning_rate": 8.920528441371758e-05, "loss": 0.962, "step": 8636 }, { "epoch": 0.5852022494748966, "grad_norm": 6.197324752807617, "learning_rate": 8.920391539461976e-05, "loss": 0.8345, "step": 8637 }, { "epoch": 0.5852700047428687, "grad_norm": 6.193318843841553, "learning_rate": 8.920254637552195e-05, "loss": 0.77, "step": 8638 }, { "epoch": 0.5853377600108408, "grad_norm": 7.584078788757324, "learning_rate": 8.920117735642413e-05, "loss": 0.9704, "step": 8639 }, { "epoch": 0.5854055152788129, "grad_norm": 9.150726318359375, "learning_rate": 8.919980833732631e-05, "loss": 0.9343, "step": 8640 }, { "epoch": 0.585473270546785, "grad_norm": 5.945910930633545, "learning_rate": 8.919843931822849e-05, "loss": 0.8009, "step": 8641 }, { "epoch": 0.5855410258147571, "grad_norm": 6.68170690536499, "learning_rate": 8.919707029913069e-05, "loss": 0.7701, "step": 8642 }, { "epoch": 0.5856087810827292, "grad_norm": 5.520997047424316, "learning_rate": 8.919570128003287e-05, "loss": 0.8759, "step": 8643 }, { "epoch": 0.5856765363507013, "grad_norm": 6.285208702087402, "learning_rate": 8.919433226093505e-05, "loss": 0.9845, "step": 8644 }, { "epoch": 0.5857442916186734, "grad_norm": 5.63783597946167, "learning_rate": 8.919296324183723e-05, "loss": 0.7603, "step": 8645 }, { "epoch": 0.5858120468866455, "grad_norm": 6.416496753692627, "learning_rate": 8.919159422273942e-05, "loss": 0.6493, "step": 8646 }, { "epoch": 0.5858798021546175, "grad_norm": 6.023496627807617, "learning_rate": 8.91902252036416e-05, "loss": 0.7045, "step": 8647 }, { "epoch": 0.5859475574225896, "grad_norm": 6.045313835144043, "learning_rate": 8.918885618454378e-05, "loss": 0.8136, "step": 8648 }, { "epoch": 0.5860153126905617, "grad_norm": 5.985274314880371, "learning_rate": 8.918748716544596e-05, "loss": 0.6254, "step": 8649 }, { "epoch": 0.5860830679585338, "grad_norm": 6.4496965408325195, "learning_rate": 8.918611814634814e-05, "loss": 0.8043, "step": 8650 }, { "epoch": 0.5861508232265059, "grad_norm": 6.74000883102417, "learning_rate": 8.918474912725034e-05, "loss": 0.7568, "step": 8651 }, { "epoch": 0.5862185784944779, "grad_norm": 6.625565052032471, "learning_rate": 8.918338010815252e-05, "loss": 0.8329, "step": 8652 }, { "epoch": 0.58628633376245, "grad_norm": 8.42115592956543, "learning_rate": 8.91820110890547e-05, "loss": 0.8818, "step": 8653 }, { "epoch": 0.5863540890304221, "grad_norm": 5.852290153503418, "learning_rate": 8.918064206995688e-05, "loss": 0.7521, "step": 8654 }, { "epoch": 0.5864218442983942, "grad_norm": 5.760341644287109, "learning_rate": 8.917927305085907e-05, "loss": 0.7142, "step": 8655 }, { "epoch": 0.5864895995663663, "grad_norm": 6.094460964202881, "learning_rate": 8.917790403176125e-05, "loss": 0.7863, "step": 8656 }, { "epoch": 0.5865573548343384, "grad_norm": 7.665438652038574, "learning_rate": 8.917653501266343e-05, "loss": 0.7998, "step": 8657 }, { "epoch": 0.5866251101023104, "grad_norm": 8.002902030944824, "learning_rate": 8.917516599356561e-05, "loss": 0.6918, "step": 8658 }, { "epoch": 0.5866928653702825, "grad_norm": 8.93215274810791, "learning_rate": 8.917379697446779e-05, "loss": 1.044, "step": 8659 }, { "epoch": 0.5867606206382546, "grad_norm": 6.267491340637207, "learning_rate": 8.917242795536999e-05, "loss": 0.9676, "step": 8660 }, { "epoch": 0.5868283759062267, "grad_norm": 8.059505462646484, "learning_rate": 8.917105893627217e-05, "loss": 0.6688, "step": 8661 }, { "epoch": 0.5868961311741988, "grad_norm": 9.9888277053833, "learning_rate": 8.916968991717435e-05, "loss": 1.0911, "step": 8662 }, { "epoch": 0.5869638864421709, "grad_norm": 5.3233232498168945, "learning_rate": 8.916832089807653e-05, "loss": 0.6354, "step": 8663 }, { "epoch": 0.587031641710143, "grad_norm": 6.694457054138184, "learning_rate": 8.916695187897871e-05, "loss": 0.8184, "step": 8664 }, { "epoch": 0.5870993969781151, "grad_norm": 6.7131547927856445, "learning_rate": 8.91655828598809e-05, "loss": 0.9447, "step": 8665 }, { "epoch": 0.5871671522460872, "grad_norm": 6.680534362792969, "learning_rate": 8.916421384078308e-05, "loss": 0.7022, "step": 8666 }, { "epoch": 0.5872349075140593, "grad_norm": 5.335456371307373, "learning_rate": 8.916284482168526e-05, "loss": 0.806, "step": 8667 }, { "epoch": 0.5873026627820312, "grad_norm": 7.5574235916137695, "learning_rate": 8.916147580258744e-05, "loss": 0.7042, "step": 8668 }, { "epoch": 0.5873704180500033, "grad_norm": 6.056331634521484, "learning_rate": 8.916010678348964e-05, "loss": 0.7419, "step": 8669 }, { "epoch": 0.5874381733179754, "grad_norm": 6.732639312744141, "learning_rate": 8.915873776439182e-05, "loss": 0.7788, "step": 8670 }, { "epoch": 0.5875059285859475, "grad_norm": 5.853601932525635, "learning_rate": 8.9157368745294e-05, "loss": 0.7423, "step": 8671 }, { "epoch": 0.5875736838539196, "grad_norm": 5.647787570953369, "learning_rate": 8.915599972619618e-05, "loss": 0.6611, "step": 8672 }, { "epoch": 0.5876414391218917, "grad_norm": 5.323878288269043, "learning_rate": 8.915463070709836e-05, "loss": 0.7538, "step": 8673 }, { "epoch": 0.5877091943898638, "grad_norm": 7.562190055847168, "learning_rate": 8.915326168800055e-05, "loss": 0.865, "step": 8674 }, { "epoch": 0.5877769496578359, "grad_norm": 5.60343599319458, "learning_rate": 8.915189266890273e-05, "loss": 0.8023, "step": 8675 }, { "epoch": 0.587844704925808, "grad_norm": 6.189206123352051, "learning_rate": 8.915052364980491e-05, "loss": 0.8698, "step": 8676 }, { "epoch": 0.5879124601937801, "grad_norm": 6.227395534515381, "learning_rate": 8.914915463070709e-05, "loss": 0.9052, "step": 8677 }, { "epoch": 0.5879802154617522, "grad_norm": 5.004229545593262, "learning_rate": 8.914778561160929e-05, "loss": 0.6775, "step": 8678 }, { "epoch": 0.5880479707297243, "grad_norm": 8.418085098266602, "learning_rate": 8.914641659251147e-05, "loss": 1.1209, "step": 8679 }, { "epoch": 0.5881157259976963, "grad_norm": 7.0277204513549805, "learning_rate": 8.914504757341365e-05, "loss": 0.9533, "step": 8680 }, { "epoch": 0.5881834812656684, "grad_norm": 6.450404644012451, "learning_rate": 8.914367855431584e-05, "loss": 0.9277, "step": 8681 }, { "epoch": 0.5882512365336405, "grad_norm": 6.549577236175537, "learning_rate": 8.914230953521802e-05, "loss": 0.7418, "step": 8682 }, { "epoch": 0.5883189918016126, "grad_norm": 7.030699729919434, "learning_rate": 8.91409405161202e-05, "loss": 0.8336, "step": 8683 }, { "epoch": 0.5883867470695847, "grad_norm": 6.665583610534668, "learning_rate": 8.91395714970224e-05, "loss": 0.8458, "step": 8684 }, { "epoch": 0.5884545023375567, "grad_norm": 7.172003269195557, "learning_rate": 8.913820247792458e-05, "loss": 0.9778, "step": 8685 }, { "epoch": 0.5885222576055288, "grad_norm": 5.771976947784424, "learning_rate": 8.913683345882676e-05, "loss": 0.6404, "step": 8686 }, { "epoch": 0.5885900128735009, "grad_norm": 6.679132461547852, "learning_rate": 8.913546443972895e-05, "loss": 0.8758, "step": 8687 }, { "epoch": 0.588657768141473, "grad_norm": 6.341593265533447, "learning_rate": 8.913409542063113e-05, "loss": 0.9231, "step": 8688 }, { "epoch": 0.5887255234094451, "grad_norm": 4.834532260894775, "learning_rate": 8.913272640153331e-05, "loss": 0.6709, "step": 8689 }, { "epoch": 0.5887932786774172, "grad_norm": 6.240522861480713, "learning_rate": 8.913135738243549e-05, "loss": 0.923, "step": 8690 }, { "epoch": 0.5888610339453892, "grad_norm": 6.935693264007568, "learning_rate": 8.912998836333767e-05, "loss": 0.7663, "step": 8691 }, { "epoch": 0.5889287892133613, "grad_norm": 6.702020168304443, "learning_rate": 8.912861934423987e-05, "loss": 0.937, "step": 8692 }, { "epoch": 0.5889965444813334, "grad_norm": 6.361667156219482, "learning_rate": 8.912725032514205e-05, "loss": 0.7574, "step": 8693 }, { "epoch": 0.5890642997493055, "grad_norm": 7.599695682525635, "learning_rate": 8.912588130604423e-05, "loss": 0.8366, "step": 8694 }, { "epoch": 0.5891320550172776, "grad_norm": 7.101802349090576, "learning_rate": 8.912451228694641e-05, "loss": 0.9409, "step": 8695 }, { "epoch": 0.5891998102852497, "grad_norm": 7.674192905426025, "learning_rate": 8.912314326784859e-05, "loss": 1.0846, "step": 8696 }, { "epoch": 0.5892675655532218, "grad_norm": 6.223476886749268, "learning_rate": 8.912177424875078e-05, "loss": 0.9597, "step": 8697 }, { "epoch": 0.5893353208211939, "grad_norm": 5.847829341888428, "learning_rate": 8.912040522965296e-05, "loss": 0.8017, "step": 8698 }, { "epoch": 0.589403076089166, "grad_norm": 6.459780216217041, "learning_rate": 8.911903621055514e-05, "loss": 0.877, "step": 8699 }, { "epoch": 0.5894708313571381, "grad_norm": 5.9923481941223145, "learning_rate": 8.911766719145732e-05, "loss": 0.9193, "step": 8700 }, { "epoch": 0.58953858662511, "grad_norm": 5.799968242645264, "learning_rate": 8.911629817235952e-05, "loss": 0.7449, "step": 8701 }, { "epoch": 0.5896063418930821, "grad_norm": 6.3980393409729, "learning_rate": 8.91149291532617e-05, "loss": 0.9118, "step": 8702 }, { "epoch": 0.5896740971610542, "grad_norm": 6.184665203094482, "learning_rate": 8.911356013416388e-05, "loss": 0.6857, "step": 8703 }, { "epoch": 0.5897418524290263, "grad_norm": 7.494194030761719, "learning_rate": 8.911219111506606e-05, "loss": 1.0483, "step": 8704 }, { "epoch": 0.5898096076969984, "grad_norm": 5.360753059387207, "learning_rate": 8.911082209596824e-05, "loss": 0.7064, "step": 8705 }, { "epoch": 0.5898773629649705, "grad_norm": 6.393972873687744, "learning_rate": 8.910945307687043e-05, "loss": 0.9095, "step": 8706 }, { "epoch": 0.5899451182329426, "grad_norm": 8.099264144897461, "learning_rate": 8.910808405777261e-05, "loss": 1.0004, "step": 8707 }, { "epoch": 0.5900128735009147, "grad_norm": 5.654821872711182, "learning_rate": 8.910671503867479e-05, "loss": 0.753, "step": 8708 }, { "epoch": 0.5900806287688868, "grad_norm": 7.046943187713623, "learning_rate": 8.910534601957697e-05, "loss": 1.012, "step": 8709 }, { "epoch": 0.5901483840368589, "grad_norm": 6.534369945526123, "learning_rate": 8.910397700047917e-05, "loss": 0.8296, "step": 8710 }, { "epoch": 0.590216139304831, "grad_norm": 5.612612724304199, "learning_rate": 8.910260798138135e-05, "loss": 0.7516, "step": 8711 }, { "epoch": 0.5902838945728031, "grad_norm": 5.311154365539551, "learning_rate": 8.910123896228353e-05, "loss": 0.7594, "step": 8712 }, { "epoch": 0.5903516498407752, "grad_norm": 6.404855251312256, "learning_rate": 8.909986994318571e-05, "loss": 0.7252, "step": 8713 }, { "epoch": 0.5904194051087472, "grad_norm": 7.446944236755371, "learning_rate": 8.909850092408789e-05, "loss": 0.9048, "step": 8714 }, { "epoch": 0.5904871603767193, "grad_norm": 5.755975246429443, "learning_rate": 8.909713190499008e-05, "loss": 0.6617, "step": 8715 }, { "epoch": 0.5905549156446914, "grad_norm": 6.966080188751221, "learning_rate": 8.909576288589226e-05, "loss": 0.5794, "step": 8716 }, { "epoch": 0.5906226709126634, "grad_norm": 6.13206672668457, "learning_rate": 8.909439386679444e-05, "loss": 0.8356, "step": 8717 }, { "epoch": 0.5906904261806355, "grad_norm": 7.370462417602539, "learning_rate": 8.909302484769662e-05, "loss": 0.7215, "step": 8718 }, { "epoch": 0.5907581814486076, "grad_norm": 7.727412223815918, "learning_rate": 8.90916558285988e-05, "loss": 0.7943, "step": 8719 }, { "epoch": 0.5908259367165797, "grad_norm": 7.801811218261719, "learning_rate": 8.9090286809501e-05, "loss": 0.7986, "step": 8720 }, { "epoch": 0.5908936919845518, "grad_norm": 8.468879699707031, "learning_rate": 8.908891779040318e-05, "loss": 0.9178, "step": 8721 }, { "epoch": 0.5909614472525239, "grad_norm": 5.8401665687561035, "learning_rate": 8.908754877130536e-05, "loss": 0.9042, "step": 8722 }, { "epoch": 0.591029202520496, "grad_norm": 6.750467777252197, "learning_rate": 8.908617975220754e-05, "loss": 0.7983, "step": 8723 }, { "epoch": 0.591096957788468, "grad_norm": 5.489363193511963, "learning_rate": 8.908481073310973e-05, "loss": 0.6839, "step": 8724 }, { "epoch": 0.5911647130564401, "grad_norm": 8.433201789855957, "learning_rate": 8.908344171401191e-05, "loss": 0.9413, "step": 8725 }, { "epoch": 0.5912324683244122, "grad_norm": 6.133078575134277, "learning_rate": 8.908207269491409e-05, "loss": 1.0624, "step": 8726 }, { "epoch": 0.5913002235923843, "grad_norm": 7.1547698974609375, "learning_rate": 8.908070367581627e-05, "loss": 0.7873, "step": 8727 }, { "epoch": 0.5913679788603564, "grad_norm": 6.1702494621276855, "learning_rate": 8.907933465671847e-05, "loss": 0.737, "step": 8728 }, { "epoch": 0.5914357341283285, "grad_norm": 8.64965534210205, "learning_rate": 8.907796563762065e-05, "loss": 0.7949, "step": 8729 }, { "epoch": 0.5915034893963006, "grad_norm": 6.769810676574707, "learning_rate": 8.907659661852283e-05, "loss": 0.9665, "step": 8730 }, { "epoch": 0.5915712446642727, "grad_norm": 6.564850807189941, "learning_rate": 8.907522759942502e-05, "loss": 0.658, "step": 8731 }, { "epoch": 0.5916389999322448, "grad_norm": 5.231021404266357, "learning_rate": 8.90738585803272e-05, "loss": 0.9782, "step": 8732 }, { "epoch": 0.5917067552002169, "grad_norm": 6.880924701690674, "learning_rate": 8.907248956122938e-05, "loss": 0.9236, "step": 8733 }, { "epoch": 0.5917745104681889, "grad_norm": 7.0143585205078125, "learning_rate": 8.907112054213158e-05, "loss": 0.8532, "step": 8734 }, { "epoch": 0.5918422657361609, "grad_norm": 6.451882839202881, "learning_rate": 8.906975152303376e-05, "loss": 0.7571, "step": 8735 }, { "epoch": 0.591910021004133, "grad_norm": 7.905577182769775, "learning_rate": 8.906838250393594e-05, "loss": 0.7599, "step": 8736 }, { "epoch": 0.5919777762721051, "grad_norm": 6.122454643249512, "learning_rate": 8.906701348483812e-05, "loss": 0.9153, "step": 8737 }, { "epoch": 0.5920455315400772, "grad_norm": 7.811397552490234, "learning_rate": 8.906564446574031e-05, "loss": 0.906, "step": 8738 }, { "epoch": 0.5921132868080493, "grad_norm": 5.695565223693848, "learning_rate": 8.906427544664249e-05, "loss": 0.7452, "step": 8739 }, { "epoch": 0.5921810420760214, "grad_norm": 6.3696393966674805, "learning_rate": 8.906290642754467e-05, "loss": 0.8223, "step": 8740 }, { "epoch": 0.5922487973439935, "grad_norm": 6.49605655670166, "learning_rate": 8.906153740844685e-05, "loss": 0.9092, "step": 8741 }, { "epoch": 0.5923165526119656, "grad_norm": 5.290238380432129, "learning_rate": 8.906016838934903e-05, "loss": 0.7335, "step": 8742 }, { "epoch": 0.5923843078799377, "grad_norm": 6.913309097290039, "learning_rate": 8.905879937025123e-05, "loss": 0.7756, "step": 8743 }, { "epoch": 0.5924520631479098, "grad_norm": 5.938857555389404, "learning_rate": 8.90574303511534e-05, "loss": 0.758, "step": 8744 }, { "epoch": 0.5925198184158819, "grad_norm": 7.276566982269287, "learning_rate": 8.905606133205559e-05, "loss": 0.8429, "step": 8745 }, { "epoch": 0.592587573683854, "grad_norm": 7.6287522315979, "learning_rate": 8.905469231295777e-05, "loss": 1.0939, "step": 8746 }, { "epoch": 0.592655328951826, "grad_norm": 7.293666362762451, "learning_rate": 8.905332329385996e-05, "loss": 0.8264, "step": 8747 }, { "epoch": 0.5927230842197981, "grad_norm": 5.522965908050537, "learning_rate": 8.905195427476214e-05, "loss": 0.6578, "step": 8748 }, { "epoch": 0.5927908394877702, "grad_norm": 6.8883466720581055, "learning_rate": 8.905058525566432e-05, "loss": 0.9412, "step": 8749 }, { "epoch": 0.5928585947557422, "grad_norm": 6.682039737701416, "learning_rate": 8.90492162365665e-05, "loss": 0.8375, "step": 8750 }, { "epoch": 0.5929263500237143, "grad_norm": 9.619691848754883, "learning_rate": 8.904784721746868e-05, "loss": 0.9461, "step": 8751 }, { "epoch": 0.5929941052916864, "grad_norm": 6.880954265594482, "learning_rate": 8.904647819837088e-05, "loss": 0.895, "step": 8752 }, { "epoch": 0.5930618605596585, "grad_norm": 5.3324761390686035, "learning_rate": 8.904510917927306e-05, "loss": 0.7613, "step": 8753 }, { "epoch": 0.5931296158276306, "grad_norm": 7.377603054046631, "learning_rate": 8.904374016017524e-05, "loss": 0.8565, "step": 8754 }, { "epoch": 0.5931973710956027, "grad_norm": 6.537837982177734, "learning_rate": 8.904237114107742e-05, "loss": 0.9888, "step": 8755 }, { "epoch": 0.5932651263635748, "grad_norm": 6.159317493438721, "learning_rate": 8.904100212197961e-05, "loss": 1.0156, "step": 8756 }, { "epoch": 0.5933328816315468, "grad_norm": 10.275593757629395, "learning_rate": 8.903963310288179e-05, "loss": 0.8343, "step": 8757 }, { "epoch": 0.5934006368995189, "grad_norm": 6.211344242095947, "learning_rate": 8.903826408378397e-05, "loss": 0.8728, "step": 8758 }, { "epoch": 0.593468392167491, "grad_norm": 8.628105163574219, "learning_rate": 8.903689506468615e-05, "loss": 0.8468, "step": 8759 }, { "epoch": 0.5935361474354631, "grad_norm": 5.519963264465332, "learning_rate": 8.903552604558833e-05, "loss": 0.7188, "step": 8760 }, { "epoch": 0.5936039027034352, "grad_norm": 6.6959991455078125, "learning_rate": 8.903415702649053e-05, "loss": 1.1405, "step": 8761 }, { "epoch": 0.5936716579714073, "grad_norm": 5.843451499938965, "learning_rate": 8.90327880073927e-05, "loss": 0.7847, "step": 8762 }, { "epoch": 0.5937394132393794, "grad_norm": 5.644291400909424, "learning_rate": 8.903141898829489e-05, "loss": 0.6311, "step": 8763 }, { "epoch": 0.5938071685073515, "grad_norm": 6.7555952072143555, "learning_rate": 8.903004996919707e-05, "loss": 1.0387, "step": 8764 }, { "epoch": 0.5938749237753236, "grad_norm": 6.006453990936279, "learning_rate": 8.902868095009926e-05, "loss": 0.6646, "step": 8765 }, { "epoch": 0.5939426790432956, "grad_norm": 7.705087184906006, "learning_rate": 8.902731193100144e-05, "loss": 1.1749, "step": 8766 }, { "epoch": 0.5940104343112677, "grad_norm": 6.157181739807129, "learning_rate": 8.902594291190362e-05, "loss": 0.979, "step": 8767 }, { "epoch": 0.5940781895792397, "grad_norm": 6.080206394195557, "learning_rate": 8.90245738928058e-05, "loss": 0.9294, "step": 8768 }, { "epoch": 0.5941459448472118, "grad_norm": 5.215951919555664, "learning_rate": 8.902320487370798e-05, "loss": 0.7154, "step": 8769 }, { "epoch": 0.5942137001151839, "grad_norm": 6.383238792419434, "learning_rate": 8.902183585461018e-05, "loss": 0.9136, "step": 8770 }, { "epoch": 0.594281455383156, "grad_norm": 5.297086238861084, "learning_rate": 8.902046683551236e-05, "loss": 0.7147, "step": 8771 }, { "epoch": 0.5943492106511281, "grad_norm": 6.686932563781738, "learning_rate": 8.901909781641454e-05, "loss": 1.0018, "step": 8772 }, { "epoch": 0.5944169659191002, "grad_norm": 5.988333702087402, "learning_rate": 8.901772879731672e-05, "loss": 0.7809, "step": 8773 }, { "epoch": 0.5944847211870723, "grad_norm": 6.060636043548584, "learning_rate": 8.901635977821891e-05, "loss": 0.6055, "step": 8774 }, { "epoch": 0.5945524764550444, "grad_norm": 6.24500036239624, "learning_rate": 8.901499075912109e-05, "loss": 0.8885, "step": 8775 }, { "epoch": 0.5946202317230165, "grad_norm": 6.189664363861084, "learning_rate": 8.901362174002327e-05, "loss": 0.8793, "step": 8776 }, { "epoch": 0.5946879869909886, "grad_norm": 5.860182762145996, "learning_rate": 8.901225272092547e-05, "loss": 0.9083, "step": 8777 }, { "epoch": 0.5947557422589607, "grad_norm": 10.454379081726074, "learning_rate": 8.901088370182765e-05, "loss": 0.6481, "step": 8778 }, { "epoch": 0.5948234975269328, "grad_norm": 6.585578918457031, "learning_rate": 8.900951468272983e-05, "loss": 1.0381, "step": 8779 }, { "epoch": 0.5948912527949048, "grad_norm": 7.335190296173096, "learning_rate": 8.900814566363202e-05, "loss": 0.947, "step": 8780 }, { "epoch": 0.5949590080628769, "grad_norm": 6.560307502746582, "learning_rate": 8.90067766445342e-05, "loss": 0.951, "step": 8781 }, { "epoch": 0.595026763330849, "grad_norm": 5.926759243011475, "learning_rate": 8.900540762543638e-05, "loss": 0.9487, "step": 8782 }, { "epoch": 0.595094518598821, "grad_norm": 5.300271034240723, "learning_rate": 8.900403860633856e-05, "loss": 0.7232, "step": 8783 }, { "epoch": 0.5951622738667931, "grad_norm": 6.828729629516602, "learning_rate": 8.900266958724076e-05, "loss": 0.9124, "step": 8784 }, { "epoch": 0.5952300291347652, "grad_norm": 5.51554012298584, "learning_rate": 8.900130056814294e-05, "loss": 0.7698, "step": 8785 }, { "epoch": 0.5952977844027373, "grad_norm": 6.492809295654297, "learning_rate": 8.899993154904512e-05, "loss": 0.7388, "step": 8786 }, { "epoch": 0.5953655396707094, "grad_norm": 6.4316792488098145, "learning_rate": 8.89985625299473e-05, "loss": 0.7843, "step": 8787 }, { "epoch": 0.5954332949386815, "grad_norm": 5.711614608764648, "learning_rate": 8.899719351084949e-05, "loss": 0.8458, "step": 8788 }, { "epoch": 0.5955010502066536, "grad_norm": 7.605274677276611, "learning_rate": 8.899582449175167e-05, "loss": 0.8907, "step": 8789 }, { "epoch": 0.5955688054746257, "grad_norm": 6.734317302703857, "learning_rate": 8.899445547265385e-05, "loss": 0.8063, "step": 8790 }, { "epoch": 0.5956365607425977, "grad_norm": 10.887665748596191, "learning_rate": 8.899308645355603e-05, "loss": 0.861, "step": 8791 }, { "epoch": 0.5957043160105698, "grad_norm": 7.033245086669922, "learning_rate": 8.899171743445821e-05, "loss": 1.0345, "step": 8792 }, { "epoch": 0.5957720712785419, "grad_norm": 5.743894577026367, "learning_rate": 8.89903484153604e-05, "loss": 0.7492, "step": 8793 }, { "epoch": 0.595839826546514, "grad_norm": 5.664433479309082, "learning_rate": 8.898897939626259e-05, "loss": 0.9206, "step": 8794 }, { "epoch": 0.5959075818144861, "grad_norm": 5.304537773132324, "learning_rate": 8.898761037716477e-05, "loss": 0.7669, "step": 8795 }, { "epoch": 0.5959753370824582, "grad_norm": 6.449788570404053, "learning_rate": 8.898624135806695e-05, "loss": 0.9168, "step": 8796 }, { "epoch": 0.5960430923504303, "grad_norm": 7.1110429763793945, "learning_rate": 8.898487233896913e-05, "loss": 0.8633, "step": 8797 }, { "epoch": 0.5961108476184024, "grad_norm": 8.217866897583008, "learning_rate": 8.898350331987132e-05, "loss": 0.9472, "step": 8798 }, { "epoch": 0.5961786028863744, "grad_norm": 6.968807697296143, "learning_rate": 8.89821343007735e-05, "loss": 0.7977, "step": 8799 }, { "epoch": 0.5962463581543465, "grad_norm": 6.342806339263916, "learning_rate": 8.898076528167568e-05, "loss": 0.8461, "step": 8800 }, { "epoch": 0.5963141134223185, "grad_norm": 6.918409824371338, "learning_rate": 8.897939626257786e-05, "loss": 0.9768, "step": 8801 }, { "epoch": 0.5963818686902906, "grad_norm": 6.3519978523254395, "learning_rate": 8.897802724348006e-05, "loss": 0.794, "step": 8802 }, { "epoch": 0.5964496239582627, "grad_norm": 9.268524169921875, "learning_rate": 8.897665822438224e-05, "loss": 1.0708, "step": 8803 }, { "epoch": 0.5965173792262348, "grad_norm": 6.6414008140563965, "learning_rate": 8.897528920528442e-05, "loss": 0.9317, "step": 8804 }, { "epoch": 0.5965851344942069, "grad_norm": 5.683966159820557, "learning_rate": 8.89739201861866e-05, "loss": 0.8018, "step": 8805 }, { "epoch": 0.596652889762179, "grad_norm": 8.505208015441895, "learning_rate": 8.897255116708878e-05, "loss": 0.8207, "step": 8806 }, { "epoch": 0.5967206450301511, "grad_norm": 6.509139060974121, "learning_rate": 8.897118214799097e-05, "loss": 0.8822, "step": 8807 }, { "epoch": 0.5967884002981232, "grad_norm": 4.886582851409912, "learning_rate": 8.896981312889315e-05, "loss": 1.1176, "step": 8808 }, { "epoch": 0.5968561555660953, "grad_norm": 6.053840637207031, "learning_rate": 8.896844410979533e-05, "loss": 0.6305, "step": 8809 }, { "epoch": 0.5969239108340674, "grad_norm": 6.561148166656494, "learning_rate": 8.896707509069751e-05, "loss": 1.0339, "step": 8810 }, { "epoch": 0.5969916661020395, "grad_norm": 7.02574348449707, "learning_rate": 8.89657060715997e-05, "loss": 0.8793, "step": 8811 }, { "epoch": 0.5970594213700116, "grad_norm": 6.380439758300781, "learning_rate": 8.896433705250189e-05, "loss": 0.9474, "step": 8812 }, { "epoch": 0.5971271766379836, "grad_norm": 6.902020454406738, "learning_rate": 8.896296803340407e-05, "loss": 0.8541, "step": 8813 }, { "epoch": 0.5971949319059557, "grad_norm": 5.170351505279541, "learning_rate": 8.896159901430625e-05, "loss": 0.6224, "step": 8814 }, { "epoch": 0.5972626871739277, "grad_norm": 6.399029731750488, "learning_rate": 8.896022999520843e-05, "loss": 0.7733, "step": 8815 }, { "epoch": 0.5973304424418998, "grad_norm": 7.200798988342285, "learning_rate": 8.895886097611062e-05, "loss": 0.9942, "step": 8816 }, { "epoch": 0.5973981977098719, "grad_norm": 5.057744979858398, "learning_rate": 8.89574919570128e-05, "loss": 0.8113, "step": 8817 }, { "epoch": 0.597465952977844, "grad_norm": 6.267950534820557, "learning_rate": 8.895612293791498e-05, "loss": 1.1343, "step": 8818 }, { "epoch": 0.5975337082458161, "grad_norm": 5.712194919586182, "learning_rate": 8.895475391881716e-05, "loss": 0.6812, "step": 8819 }, { "epoch": 0.5976014635137882, "grad_norm": 6.567288875579834, "learning_rate": 8.895338489971936e-05, "loss": 0.8451, "step": 8820 }, { "epoch": 0.5976692187817603, "grad_norm": 6.08546781539917, "learning_rate": 8.895201588062154e-05, "loss": 0.7353, "step": 8821 }, { "epoch": 0.5977369740497324, "grad_norm": 6.859540939331055, "learning_rate": 8.895064686152372e-05, "loss": 0.6795, "step": 8822 }, { "epoch": 0.5978047293177045, "grad_norm": 5.679804801940918, "learning_rate": 8.894927784242591e-05, "loss": 0.7286, "step": 8823 }, { "epoch": 0.5978724845856765, "grad_norm": 7.603654384613037, "learning_rate": 8.894790882332809e-05, "loss": 0.87, "step": 8824 }, { "epoch": 0.5979402398536486, "grad_norm": 6.440685272216797, "learning_rate": 8.894653980423027e-05, "loss": 1.043, "step": 8825 }, { "epoch": 0.5980079951216207, "grad_norm": 6.420576572418213, "learning_rate": 8.894517078513247e-05, "loss": 0.6191, "step": 8826 }, { "epoch": 0.5980757503895928, "grad_norm": 6.01546573638916, "learning_rate": 8.894380176603465e-05, "loss": 0.9581, "step": 8827 }, { "epoch": 0.5981435056575649, "grad_norm": 6.9343767166137695, "learning_rate": 8.894243274693683e-05, "loss": 0.9531, "step": 8828 }, { "epoch": 0.598211260925537, "grad_norm": 6.511411666870117, "learning_rate": 8.8941063727839e-05, "loss": 0.6982, "step": 8829 }, { "epoch": 0.5982790161935091, "grad_norm": 5.96348762512207, "learning_rate": 8.89396947087412e-05, "loss": 0.6761, "step": 8830 }, { "epoch": 0.5983467714614811, "grad_norm": 5.571112155914307, "learning_rate": 8.893832568964338e-05, "loss": 0.7856, "step": 8831 }, { "epoch": 0.5984145267294532, "grad_norm": 7.437000751495361, "learning_rate": 8.893695667054556e-05, "loss": 0.8045, "step": 8832 }, { "epoch": 0.5984822819974253, "grad_norm": 6.710272789001465, "learning_rate": 8.893558765144774e-05, "loss": 0.725, "step": 8833 }, { "epoch": 0.5985500372653973, "grad_norm": 8.211858749389648, "learning_rate": 8.893421863234994e-05, "loss": 1.0648, "step": 8834 }, { "epoch": 0.5986177925333694, "grad_norm": 6.0621018409729, "learning_rate": 8.893284961325212e-05, "loss": 0.8461, "step": 8835 }, { "epoch": 0.5986855478013415, "grad_norm": 6.834799766540527, "learning_rate": 8.89314805941543e-05, "loss": 0.9162, "step": 8836 }, { "epoch": 0.5987533030693136, "grad_norm": 7.666240215301514, "learning_rate": 8.893011157505648e-05, "loss": 0.8657, "step": 8837 }, { "epoch": 0.5988210583372857, "grad_norm": 5.868284225463867, "learning_rate": 8.892874255595866e-05, "loss": 0.78, "step": 8838 }, { "epoch": 0.5988888136052578, "grad_norm": 8.84867000579834, "learning_rate": 8.892737353686085e-05, "loss": 1.0317, "step": 8839 }, { "epoch": 0.5989565688732299, "grad_norm": 7.292559623718262, "learning_rate": 8.892600451776303e-05, "loss": 0.9336, "step": 8840 }, { "epoch": 0.599024324141202, "grad_norm": 8.978638648986816, "learning_rate": 8.892463549866521e-05, "loss": 0.7901, "step": 8841 }, { "epoch": 0.5990920794091741, "grad_norm": 6.713160037994385, "learning_rate": 8.892326647956739e-05, "loss": 0.9241, "step": 8842 }, { "epoch": 0.5991598346771462, "grad_norm": 7.525753021240234, "learning_rate": 8.892189746046959e-05, "loss": 1.0159, "step": 8843 }, { "epoch": 0.5992275899451183, "grad_norm": 5.174670696258545, "learning_rate": 8.892052844137177e-05, "loss": 0.6338, "step": 8844 }, { "epoch": 0.5992953452130904, "grad_norm": 6.392926216125488, "learning_rate": 8.891915942227395e-05, "loss": 0.9449, "step": 8845 }, { "epoch": 0.5993631004810624, "grad_norm": 6.458075523376465, "learning_rate": 8.891779040317613e-05, "loss": 0.8109, "step": 8846 }, { "epoch": 0.5994308557490345, "grad_norm": 6.092816352844238, "learning_rate": 8.89164213840783e-05, "loss": 0.6486, "step": 8847 }, { "epoch": 0.5994986110170065, "grad_norm": 5.358661651611328, "learning_rate": 8.89150523649805e-05, "loss": 0.6597, "step": 8848 }, { "epoch": 0.5995663662849786, "grad_norm": 7.168787956237793, "learning_rate": 8.891368334588268e-05, "loss": 0.9348, "step": 8849 }, { "epoch": 0.5996341215529507, "grad_norm": 8.961309432983398, "learning_rate": 8.891231432678486e-05, "loss": 1.078, "step": 8850 }, { "epoch": 0.5997018768209228, "grad_norm": 6.025523662567139, "learning_rate": 8.891094530768704e-05, "loss": 0.8771, "step": 8851 }, { "epoch": 0.5997696320888949, "grad_norm": 6.479866981506348, "learning_rate": 8.890957628858922e-05, "loss": 0.6807, "step": 8852 }, { "epoch": 0.599837387356867, "grad_norm": 7.786321640014648, "learning_rate": 8.890820726949142e-05, "loss": 1.0007, "step": 8853 }, { "epoch": 0.5999051426248391, "grad_norm": 6.334638595581055, "learning_rate": 8.89068382503936e-05, "loss": 0.9056, "step": 8854 }, { "epoch": 0.5999728978928112, "grad_norm": 6.959164619445801, "learning_rate": 8.890546923129578e-05, "loss": 0.9682, "step": 8855 }, { "epoch": 0.6000406531607833, "grad_norm": 7.86105489730835, "learning_rate": 8.890410021219796e-05, "loss": 0.7696, "step": 8856 }, { "epoch": 0.6001084084287553, "grad_norm": 9.848732948303223, "learning_rate": 8.890273119310015e-05, "loss": 0.8796, "step": 8857 }, { "epoch": 0.6001761636967274, "grad_norm": 6.406124591827393, "learning_rate": 8.890136217400233e-05, "loss": 0.9671, "step": 8858 }, { "epoch": 0.6002439189646995, "grad_norm": 6.441462993621826, "learning_rate": 8.889999315490451e-05, "loss": 0.851, "step": 8859 }, { "epoch": 0.6003116742326716, "grad_norm": 7.711560249328613, "learning_rate": 8.889862413580669e-05, "loss": 1.0492, "step": 8860 }, { "epoch": 0.6003794295006437, "grad_norm": 7.2431960105896, "learning_rate": 8.889725511670887e-05, "loss": 0.9887, "step": 8861 }, { "epoch": 0.6004471847686158, "grad_norm": 7.861832618713379, "learning_rate": 8.889588609761107e-05, "loss": 1.0759, "step": 8862 }, { "epoch": 0.6005149400365879, "grad_norm": 6.643199920654297, "learning_rate": 8.889451707851325e-05, "loss": 0.7989, "step": 8863 }, { "epoch": 0.6005826953045599, "grad_norm": 5.903257369995117, "learning_rate": 8.889314805941543e-05, "loss": 0.6606, "step": 8864 }, { "epoch": 0.600650450572532, "grad_norm": 6.016655445098877, "learning_rate": 8.889177904031761e-05, "loss": 0.6489, "step": 8865 }, { "epoch": 0.6007182058405041, "grad_norm": 7.733530521392822, "learning_rate": 8.88904100212198e-05, "loss": 1.0653, "step": 8866 }, { "epoch": 0.6007859611084762, "grad_norm": 6.951436996459961, "learning_rate": 8.888904100212198e-05, "loss": 0.821, "step": 8867 }, { "epoch": 0.6008537163764482, "grad_norm": 5.853886604309082, "learning_rate": 8.888767198302416e-05, "loss": 0.7154, "step": 8868 }, { "epoch": 0.6009214716444203, "grad_norm": 7.300787925720215, "learning_rate": 8.888630296392636e-05, "loss": 1.0176, "step": 8869 }, { "epoch": 0.6009892269123924, "grad_norm": 6.771045684814453, "learning_rate": 8.888493394482854e-05, "loss": 0.7652, "step": 8870 }, { "epoch": 0.6010569821803645, "grad_norm": 6.345951557159424, "learning_rate": 8.888356492573072e-05, "loss": 1.0231, "step": 8871 }, { "epoch": 0.6011247374483366, "grad_norm": 6.479809284210205, "learning_rate": 8.888219590663291e-05, "loss": 0.7587, "step": 8872 }, { "epoch": 0.6011924927163087, "grad_norm": 7.4716796875, "learning_rate": 8.888082688753509e-05, "loss": 0.8283, "step": 8873 }, { "epoch": 0.6012602479842808, "grad_norm": 7.2003068923950195, "learning_rate": 8.887945786843727e-05, "loss": 0.8622, "step": 8874 }, { "epoch": 0.6013280032522529, "grad_norm": 5.774078845977783, "learning_rate": 8.887808884933945e-05, "loss": 0.7783, "step": 8875 }, { "epoch": 0.601395758520225, "grad_norm": 5.885690689086914, "learning_rate": 8.887671983024164e-05, "loss": 0.984, "step": 8876 }, { "epoch": 0.6014635137881971, "grad_norm": 6.9094038009643555, "learning_rate": 8.887535081114383e-05, "loss": 0.8093, "step": 8877 }, { "epoch": 0.6015312690561692, "grad_norm": 7.623313903808594, "learning_rate": 8.8873981792046e-05, "loss": 0.8604, "step": 8878 }, { "epoch": 0.6015990243241413, "grad_norm": 4.915633201599121, "learning_rate": 8.887261277294819e-05, "loss": 0.6538, "step": 8879 }, { "epoch": 0.6016667795921132, "grad_norm": 6.35188627243042, "learning_rate": 8.887124375385038e-05, "loss": 0.7546, "step": 8880 }, { "epoch": 0.6017345348600853, "grad_norm": 6.262838840484619, "learning_rate": 8.886987473475256e-05, "loss": 0.7816, "step": 8881 }, { "epoch": 0.6018022901280574, "grad_norm": 6.2654032707214355, "learning_rate": 8.886850571565474e-05, "loss": 1.0446, "step": 8882 }, { "epoch": 0.6018700453960295, "grad_norm": 6.389410972595215, "learning_rate": 8.886713669655692e-05, "loss": 0.9104, "step": 8883 }, { "epoch": 0.6019378006640016, "grad_norm": 6.192864894866943, "learning_rate": 8.88657676774591e-05, "loss": 0.8855, "step": 8884 }, { "epoch": 0.6020055559319737, "grad_norm": 6.384714126586914, "learning_rate": 8.88643986583613e-05, "loss": 0.7535, "step": 8885 }, { "epoch": 0.6020733111999458, "grad_norm": 6.433627605438232, "learning_rate": 8.886302963926348e-05, "loss": 0.6994, "step": 8886 }, { "epoch": 0.6021410664679179, "grad_norm": 6.539730072021484, "learning_rate": 8.886166062016566e-05, "loss": 1.0176, "step": 8887 }, { "epoch": 0.60220882173589, "grad_norm": 7.459704875946045, "learning_rate": 8.886029160106784e-05, "loss": 0.8599, "step": 8888 }, { "epoch": 0.6022765770038621, "grad_norm": 7.512004375457764, "learning_rate": 8.885892258197003e-05, "loss": 0.7238, "step": 8889 }, { "epoch": 0.6023443322718341, "grad_norm": 7.773438453674316, "learning_rate": 8.885755356287221e-05, "loss": 0.72, "step": 8890 }, { "epoch": 0.6024120875398062, "grad_norm": 5.3314948081970215, "learning_rate": 8.885618454377439e-05, "loss": 0.9285, "step": 8891 }, { "epoch": 0.6024798428077783, "grad_norm": 5.834819793701172, "learning_rate": 8.885481552467657e-05, "loss": 0.9032, "step": 8892 }, { "epoch": 0.6025475980757504, "grad_norm": 6.342952251434326, "learning_rate": 8.885344650557875e-05, "loss": 0.8987, "step": 8893 }, { "epoch": 0.6026153533437225, "grad_norm": 5.579010963439941, "learning_rate": 8.885207748648095e-05, "loss": 0.9275, "step": 8894 }, { "epoch": 0.6026831086116946, "grad_norm": 5.604851722717285, "learning_rate": 8.885070846738313e-05, "loss": 0.8163, "step": 8895 }, { "epoch": 0.6027508638796667, "grad_norm": 6.057446479797363, "learning_rate": 8.88493394482853e-05, "loss": 0.8466, "step": 8896 }, { "epoch": 0.6028186191476387, "grad_norm": 8.311997413635254, "learning_rate": 8.884797042918749e-05, "loss": 0.8493, "step": 8897 }, { "epoch": 0.6028863744156108, "grad_norm": 6.672399044036865, "learning_rate": 8.884660141008968e-05, "loss": 0.7638, "step": 8898 }, { "epoch": 0.6029541296835829, "grad_norm": 7.031404495239258, "learning_rate": 8.884523239099186e-05, "loss": 0.7692, "step": 8899 }, { "epoch": 0.603021884951555, "grad_norm": 6.296429634094238, "learning_rate": 8.884386337189404e-05, "loss": 0.9525, "step": 8900 }, { "epoch": 0.603089640219527, "grad_norm": 6.698690891265869, "learning_rate": 8.884249435279622e-05, "loss": 1.0234, "step": 8901 }, { "epoch": 0.6031573954874991, "grad_norm": 5.885977268218994, "learning_rate": 8.88411253336984e-05, "loss": 0.6533, "step": 8902 }, { "epoch": 0.6032251507554712, "grad_norm": 7.2855072021484375, "learning_rate": 8.88397563146006e-05, "loss": 0.7969, "step": 8903 }, { "epoch": 0.6032929060234433, "grad_norm": 5.964268207550049, "learning_rate": 8.883838729550278e-05, "loss": 0.9055, "step": 8904 }, { "epoch": 0.6033606612914154, "grad_norm": 8.085535049438477, "learning_rate": 8.883701827640496e-05, "loss": 0.9988, "step": 8905 }, { "epoch": 0.6034284165593875, "grad_norm": 6.891632080078125, "learning_rate": 8.883564925730714e-05, "loss": 1.0141, "step": 8906 }, { "epoch": 0.6034961718273596, "grad_norm": 5.324792861938477, "learning_rate": 8.883428023820932e-05, "loss": 0.7212, "step": 8907 }, { "epoch": 0.6035639270953317, "grad_norm": 5.735985279083252, "learning_rate": 8.883291121911151e-05, "loss": 0.9523, "step": 8908 }, { "epoch": 0.6036316823633038, "grad_norm": 7.501296043395996, "learning_rate": 8.883154220001369e-05, "loss": 0.938, "step": 8909 }, { "epoch": 0.6036994376312759, "grad_norm": 6.442415714263916, "learning_rate": 8.883017318091587e-05, "loss": 0.5799, "step": 8910 }, { "epoch": 0.603767192899248, "grad_norm": 6.753159999847412, "learning_rate": 8.882880416181805e-05, "loss": 0.6011, "step": 8911 }, { "epoch": 0.60383494816722, "grad_norm": 7.805209636688232, "learning_rate": 8.882743514272025e-05, "loss": 1.1798, "step": 8912 }, { "epoch": 0.603902703435192, "grad_norm": 5.123414993286133, "learning_rate": 8.882606612362243e-05, "loss": 0.7891, "step": 8913 }, { "epoch": 0.6039704587031641, "grad_norm": 6.106377601623535, "learning_rate": 8.88246971045246e-05, "loss": 0.8042, "step": 8914 }, { "epoch": 0.6040382139711362, "grad_norm": 6.874667167663574, "learning_rate": 8.88233280854268e-05, "loss": 0.8167, "step": 8915 }, { "epoch": 0.6041059692391083, "grad_norm": 5.306685447692871, "learning_rate": 8.882195906632898e-05, "loss": 0.6487, "step": 8916 }, { "epoch": 0.6041737245070804, "grad_norm": 6.309480667114258, "learning_rate": 8.882059004723116e-05, "loss": 0.6866, "step": 8917 }, { "epoch": 0.6042414797750525, "grad_norm": 7.042599201202393, "learning_rate": 8.881922102813335e-05, "loss": 0.8205, "step": 8918 }, { "epoch": 0.6043092350430246, "grad_norm": 5.572051048278809, "learning_rate": 8.881785200903554e-05, "loss": 0.8188, "step": 8919 }, { "epoch": 0.6043769903109967, "grad_norm": 5.881922245025635, "learning_rate": 8.881648298993772e-05, "loss": 0.8344, "step": 8920 }, { "epoch": 0.6044447455789688, "grad_norm": 6.141275882720947, "learning_rate": 8.881511397083991e-05, "loss": 0.7084, "step": 8921 }, { "epoch": 0.6045125008469409, "grad_norm": 6.847328186035156, "learning_rate": 8.881374495174209e-05, "loss": 0.9248, "step": 8922 }, { "epoch": 0.604580256114913, "grad_norm": 8.392770767211914, "learning_rate": 8.881237593264427e-05, "loss": 1.0319, "step": 8923 }, { "epoch": 0.604648011382885, "grad_norm": 6.491360664367676, "learning_rate": 8.881100691354645e-05, "loss": 0.8098, "step": 8924 }, { "epoch": 0.6047157666508571, "grad_norm": 7.355408191680908, "learning_rate": 8.880963789444863e-05, "loss": 0.9513, "step": 8925 }, { "epoch": 0.6047835219188292, "grad_norm": 5.667849063873291, "learning_rate": 8.880826887535082e-05, "loss": 0.6251, "step": 8926 }, { "epoch": 0.6048512771868013, "grad_norm": 7.0746636390686035, "learning_rate": 8.8806899856253e-05, "loss": 0.9789, "step": 8927 }, { "epoch": 0.6049190324547734, "grad_norm": 8.575697898864746, "learning_rate": 8.880553083715519e-05, "loss": 0.6239, "step": 8928 }, { "epoch": 0.6049867877227454, "grad_norm": 5.967097759246826, "learning_rate": 8.880416181805737e-05, "loss": 0.7224, "step": 8929 }, { "epoch": 0.6050545429907175, "grad_norm": 7.468807697296143, "learning_rate": 8.880279279895955e-05, "loss": 0.663, "step": 8930 }, { "epoch": 0.6051222982586896, "grad_norm": 7.503567218780518, "learning_rate": 8.880142377986174e-05, "loss": 1.0067, "step": 8931 }, { "epoch": 0.6051900535266617, "grad_norm": 5.722857475280762, "learning_rate": 8.880005476076392e-05, "loss": 0.6875, "step": 8932 }, { "epoch": 0.6052578087946338, "grad_norm": 5.868055820465088, "learning_rate": 8.87986857416661e-05, "loss": 0.9231, "step": 8933 }, { "epoch": 0.6053255640626058, "grad_norm": 6.367112636566162, "learning_rate": 8.879731672256828e-05, "loss": 0.7726, "step": 8934 }, { "epoch": 0.6053933193305779, "grad_norm": 5.738692760467529, "learning_rate": 8.879594770347047e-05, "loss": 0.6841, "step": 8935 }, { "epoch": 0.60546107459855, "grad_norm": 6.935656547546387, "learning_rate": 8.879457868437266e-05, "loss": 0.9394, "step": 8936 }, { "epoch": 0.6055288298665221, "grad_norm": 5.599362373352051, "learning_rate": 8.879320966527484e-05, "loss": 0.741, "step": 8937 }, { "epoch": 0.6055965851344942, "grad_norm": 6.967918395996094, "learning_rate": 8.879184064617702e-05, "loss": 0.7936, "step": 8938 }, { "epoch": 0.6056643404024663, "grad_norm": 7.082143306732178, "learning_rate": 8.87904716270792e-05, "loss": 0.9448, "step": 8939 }, { "epoch": 0.6057320956704384, "grad_norm": 7.133969783782959, "learning_rate": 8.878910260798139e-05, "loss": 1.0402, "step": 8940 }, { "epoch": 0.6057998509384105, "grad_norm": 6.228809356689453, "learning_rate": 8.878773358888357e-05, "loss": 0.9436, "step": 8941 }, { "epoch": 0.6058676062063826, "grad_norm": 7.93583345413208, "learning_rate": 8.878636456978575e-05, "loss": 0.8431, "step": 8942 }, { "epoch": 0.6059353614743547, "grad_norm": 6.536636829376221, "learning_rate": 8.878499555068793e-05, "loss": 0.6499, "step": 8943 }, { "epoch": 0.6060031167423268, "grad_norm": 6.2305145263671875, "learning_rate": 8.878362653159012e-05, "loss": 0.8509, "step": 8944 }, { "epoch": 0.6060708720102989, "grad_norm": 6.194677829742432, "learning_rate": 8.87822575124923e-05, "loss": 0.8863, "step": 8945 }, { "epoch": 0.6061386272782708, "grad_norm": 6.955784797668457, "learning_rate": 8.878088849339449e-05, "loss": 0.669, "step": 8946 }, { "epoch": 0.6062063825462429, "grad_norm": 6.535221099853516, "learning_rate": 8.877951947429667e-05, "loss": 0.9381, "step": 8947 }, { "epoch": 0.606274137814215, "grad_norm": 6.1568803787231445, "learning_rate": 8.877815045519885e-05, "loss": 0.8616, "step": 8948 }, { "epoch": 0.6063418930821871, "grad_norm": 6.541617393493652, "learning_rate": 8.877678143610104e-05, "loss": 0.8262, "step": 8949 }, { "epoch": 0.6064096483501592, "grad_norm": 6.519277572631836, "learning_rate": 8.877541241700322e-05, "loss": 0.6887, "step": 8950 }, { "epoch": 0.6064774036181313, "grad_norm": 5.049574375152588, "learning_rate": 8.87740433979054e-05, "loss": 0.5653, "step": 8951 }, { "epoch": 0.6065451588861034, "grad_norm": 5.1171698570251465, "learning_rate": 8.877267437880758e-05, "loss": 0.9257, "step": 8952 }, { "epoch": 0.6066129141540755, "grad_norm": 5.691270351409912, "learning_rate": 8.877130535970978e-05, "loss": 0.7008, "step": 8953 }, { "epoch": 0.6066806694220476, "grad_norm": 6.948482990264893, "learning_rate": 8.876993634061196e-05, "loss": 1.0571, "step": 8954 }, { "epoch": 0.6067484246900197, "grad_norm": 7.013044357299805, "learning_rate": 8.876856732151414e-05, "loss": 0.901, "step": 8955 }, { "epoch": 0.6068161799579918, "grad_norm": 6.465219974517822, "learning_rate": 8.876719830241632e-05, "loss": 0.8805, "step": 8956 }, { "epoch": 0.6068839352259638, "grad_norm": 6.187651634216309, "learning_rate": 8.87658292833185e-05, "loss": 0.9569, "step": 8957 }, { "epoch": 0.6069516904939359, "grad_norm": 7.414092063903809, "learning_rate": 8.876446026422069e-05, "loss": 0.8365, "step": 8958 }, { "epoch": 0.607019445761908, "grad_norm": 6.181675910949707, "learning_rate": 8.876309124512287e-05, "loss": 0.8135, "step": 8959 }, { "epoch": 0.6070872010298801, "grad_norm": 5.536195278167725, "learning_rate": 8.876172222602505e-05, "loss": 0.8226, "step": 8960 }, { "epoch": 0.6071549562978522, "grad_norm": 6.445333480834961, "learning_rate": 8.876035320692723e-05, "loss": 0.851, "step": 8961 }, { "epoch": 0.6072227115658242, "grad_norm": 7.526693820953369, "learning_rate": 8.875898418782943e-05, "loss": 0.806, "step": 8962 }, { "epoch": 0.6072904668337963, "grad_norm": 6.761377334594727, "learning_rate": 8.87576151687316e-05, "loss": 0.7834, "step": 8963 }, { "epoch": 0.6073582221017684, "grad_norm": 6.632135391235352, "learning_rate": 8.875624614963379e-05, "loss": 0.8597, "step": 8964 }, { "epoch": 0.6074259773697405, "grad_norm": 6.839773178100586, "learning_rate": 8.875487713053598e-05, "loss": 0.7056, "step": 8965 }, { "epoch": 0.6074937326377126, "grad_norm": 6.412302017211914, "learning_rate": 8.875350811143816e-05, "loss": 0.8321, "step": 8966 }, { "epoch": 0.6075614879056846, "grad_norm": 10.579608917236328, "learning_rate": 8.875213909234034e-05, "loss": 1.0393, "step": 8967 }, { "epoch": 0.6076292431736567, "grad_norm": 7.9641265869140625, "learning_rate": 8.875077007324253e-05, "loss": 0.8742, "step": 8968 }, { "epoch": 0.6076969984416288, "grad_norm": 5.490036487579346, "learning_rate": 8.874940105414471e-05, "loss": 0.9735, "step": 8969 }, { "epoch": 0.6077647537096009, "grad_norm": 6.595509052276611, "learning_rate": 8.87480320350469e-05, "loss": 0.9047, "step": 8970 }, { "epoch": 0.607832508977573, "grad_norm": 5.147292137145996, "learning_rate": 8.874666301594908e-05, "loss": 0.6643, "step": 8971 }, { "epoch": 0.6079002642455451, "grad_norm": 6.266574859619141, "learning_rate": 8.874529399685127e-05, "loss": 0.832, "step": 8972 }, { "epoch": 0.6079680195135172, "grad_norm": 5.882715225219727, "learning_rate": 8.874392497775345e-05, "loss": 0.9908, "step": 8973 }, { "epoch": 0.6080357747814893, "grad_norm": 7.928326606750488, "learning_rate": 8.874255595865563e-05, "loss": 1.0295, "step": 8974 }, { "epoch": 0.6081035300494614, "grad_norm": 8.489121437072754, "learning_rate": 8.874118693955781e-05, "loss": 0.9872, "step": 8975 }, { "epoch": 0.6081712853174335, "grad_norm": 5.946715354919434, "learning_rate": 8.873981792046e-05, "loss": 1.0244, "step": 8976 }, { "epoch": 0.6082390405854056, "grad_norm": 6.594414234161377, "learning_rate": 8.873844890136218e-05, "loss": 0.9772, "step": 8977 }, { "epoch": 0.6083067958533775, "grad_norm": 5.421601295471191, "learning_rate": 8.873707988226436e-05, "loss": 0.8735, "step": 8978 }, { "epoch": 0.6083745511213496, "grad_norm": 6.623301029205322, "learning_rate": 8.873571086316655e-05, "loss": 0.9308, "step": 8979 }, { "epoch": 0.6084423063893217, "grad_norm": 7.260282516479492, "learning_rate": 8.873434184406873e-05, "loss": 0.8352, "step": 8980 }, { "epoch": 0.6085100616572938, "grad_norm": 5.427891254425049, "learning_rate": 8.873297282497092e-05, "loss": 0.5448, "step": 8981 }, { "epoch": 0.6085778169252659, "grad_norm": 6.445272922515869, "learning_rate": 8.87316038058731e-05, "loss": 0.7464, "step": 8982 }, { "epoch": 0.608645572193238, "grad_norm": 5.404473304748535, "learning_rate": 8.873023478677528e-05, "loss": 0.8883, "step": 8983 }, { "epoch": 0.6087133274612101, "grad_norm": 5.924696445465088, "learning_rate": 8.872886576767746e-05, "loss": 0.9852, "step": 8984 }, { "epoch": 0.6087810827291822, "grad_norm": 7.119851112365723, "learning_rate": 8.872749674857964e-05, "loss": 0.8267, "step": 8985 }, { "epoch": 0.6088488379971543, "grad_norm": 6.306662559509277, "learning_rate": 8.872612772948183e-05, "loss": 0.8916, "step": 8986 }, { "epoch": 0.6089165932651264, "grad_norm": 6.999206066131592, "learning_rate": 8.872475871038402e-05, "loss": 0.9852, "step": 8987 }, { "epoch": 0.6089843485330985, "grad_norm": 7.93400239944458, "learning_rate": 8.87233896912862e-05, "loss": 0.7336, "step": 8988 }, { "epoch": 0.6090521038010706, "grad_norm": 6.08065938949585, "learning_rate": 8.872202067218838e-05, "loss": 0.8714, "step": 8989 }, { "epoch": 0.6091198590690426, "grad_norm": 8.639623641967773, "learning_rate": 8.872065165309057e-05, "loss": 0.7973, "step": 8990 }, { "epoch": 0.6091876143370147, "grad_norm": 6.663565158843994, "learning_rate": 8.871928263399275e-05, "loss": 0.7789, "step": 8991 }, { "epoch": 0.6092553696049868, "grad_norm": 5.1584153175354, "learning_rate": 8.871791361489493e-05, "loss": 0.7933, "step": 8992 }, { "epoch": 0.6093231248729589, "grad_norm": 5.59836483001709, "learning_rate": 8.871654459579711e-05, "loss": 0.731, "step": 8993 }, { "epoch": 0.609390880140931, "grad_norm": 6.6651482582092285, "learning_rate": 8.871517557669929e-05, "loss": 0.624, "step": 8994 }, { "epoch": 0.609458635408903, "grad_norm": 5.002302169799805, "learning_rate": 8.871380655760148e-05, "loss": 0.6816, "step": 8995 }, { "epoch": 0.6095263906768751, "grad_norm": 7.68015718460083, "learning_rate": 8.871243753850367e-05, "loss": 0.7353, "step": 8996 }, { "epoch": 0.6095941459448472, "grad_norm": 6.140766143798828, "learning_rate": 8.871106851940585e-05, "loss": 0.7172, "step": 8997 }, { "epoch": 0.6096619012128193, "grad_norm": 6.370824813842773, "learning_rate": 8.870969950030803e-05, "loss": 0.7664, "step": 8998 }, { "epoch": 0.6097296564807914, "grad_norm": 6.050932884216309, "learning_rate": 8.870833048121022e-05, "loss": 0.8182, "step": 8999 }, { "epoch": 0.6097974117487635, "grad_norm": 6.733234882354736, "learning_rate": 8.87069614621124e-05, "loss": 0.8736, "step": 9000 }, { "epoch": 0.6098651670167355, "grad_norm": 5.3225417137146, "learning_rate": 8.870559244301458e-05, "loss": 0.737, "step": 9001 }, { "epoch": 0.6099329222847076, "grad_norm": 5.601250171661377, "learning_rate": 8.870422342391676e-05, "loss": 0.7802, "step": 9002 }, { "epoch": 0.6100006775526797, "grad_norm": 4.972486972808838, "learning_rate": 8.870285440481894e-05, "loss": 0.6247, "step": 9003 }, { "epoch": 0.6100684328206518, "grad_norm": 6.177496433258057, "learning_rate": 8.870148538572114e-05, "loss": 0.8745, "step": 9004 }, { "epoch": 0.6101361880886239, "grad_norm": 5.5521321296691895, "learning_rate": 8.870011636662332e-05, "loss": 0.7281, "step": 9005 }, { "epoch": 0.610203943356596, "grad_norm": 5.7942705154418945, "learning_rate": 8.86987473475255e-05, "loss": 0.8116, "step": 9006 }, { "epoch": 0.6102716986245681, "grad_norm": 6.355384349822998, "learning_rate": 8.869737832842768e-05, "loss": 0.9577, "step": 9007 }, { "epoch": 0.6103394538925402, "grad_norm": 9.047319412231445, "learning_rate": 8.869600930932987e-05, "loss": 0.7913, "step": 9008 }, { "epoch": 0.6104072091605123, "grad_norm": 6.6220855712890625, "learning_rate": 8.869464029023205e-05, "loss": 0.8176, "step": 9009 }, { "epoch": 0.6104749644284844, "grad_norm": 7.567444324493408, "learning_rate": 8.869327127113423e-05, "loss": 0.9745, "step": 9010 }, { "epoch": 0.6105427196964563, "grad_norm": 7.12336540222168, "learning_rate": 8.869190225203642e-05, "loss": 0.7816, "step": 9011 }, { "epoch": 0.6106104749644284, "grad_norm": 5.867663860321045, "learning_rate": 8.86905332329386e-05, "loss": 0.8209, "step": 9012 }, { "epoch": 0.6106782302324005, "grad_norm": 6.362299919128418, "learning_rate": 8.868916421384079e-05, "loss": 0.8131, "step": 9013 }, { "epoch": 0.6107459855003726, "grad_norm": 6.171093463897705, "learning_rate": 8.868779519474298e-05, "loss": 0.783, "step": 9014 }, { "epoch": 0.6108137407683447, "grad_norm": 5.57711124420166, "learning_rate": 8.868642617564516e-05, "loss": 0.8022, "step": 9015 }, { "epoch": 0.6108814960363168, "grad_norm": 6.855584144592285, "learning_rate": 8.868505715654734e-05, "loss": 0.8162, "step": 9016 }, { "epoch": 0.6109492513042889, "grad_norm": 5.897914886474609, "learning_rate": 8.868368813744952e-05, "loss": 0.7343, "step": 9017 }, { "epoch": 0.611017006572261, "grad_norm": 5.253844261169434, "learning_rate": 8.868231911835171e-05, "loss": 0.7384, "step": 9018 }, { "epoch": 0.6110847618402331, "grad_norm": 5.258492469787598, "learning_rate": 8.86809500992539e-05, "loss": 0.7803, "step": 9019 }, { "epoch": 0.6111525171082052, "grad_norm": 5.407173156738281, "learning_rate": 8.867958108015607e-05, "loss": 0.7416, "step": 9020 }, { "epoch": 0.6112202723761773, "grad_norm": 7.5746283531188965, "learning_rate": 8.867821206105826e-05, "loss": 0.8327, "step": 9021 }, { "epoch": 0.6112880276441494, "grad_norm": 6.094844818115234, "learning_rate": 8.867684304196045e-05, "loss": 0.76, "step": 9022 }, { "epoch": 0.6113557829121214, "grad_norm": 6.436967849731445, "learning_rate": 8.867547402286263e-05, "loss": 0.8966, "step": 9023 }, { "epoch": 0.6114235381800935, "grad_norm": 4.939077377319336, "learning_rate": 8.867410500376481e-05, "loss": 0.8912, "step": 9024 }, { "epoch": 0.6114912934480656, "grad_norm": 6.9223127365112305, "learning_rate": 8.867273598466699e-05, "loss": 0.9135, "step": 9025 }, { "epoch": 0.6115590487160377, "grad_norm": 5.3313517570495605, "learning_rate": 8.867136696556917e-05, "loss": 0.7065, "step": 9026 }, { "epoch": 0.6116268039840097, "grad_norm": 5.880809783935547, "learning_rate": 8.866999794647136e-05, "loss": 0.8402, "step": 9027 }, { "epoch": 0.6116945592519818, "grad_norm": 5.272537708282471, "learning_rate": 8.866862892737354e-05, "loss": 0.6471, "step": 9028 }, { "epoch": 0.6117623145199539, "grad_norm": 4.6608147621154785, "learning_rate": 8.866725990827572e-05, "loss": 0.663, "step": 9029 }, { "epoch": 0.611830069787926, "grad_norm": 7.082324981689453, "learning_rate": 8.86658908891779e-05, "loss": 0.7523, "step": 9030 }, { "epoch": 0.6118978250558981, "grad_norm": 5.356766223907471, "learning_rate": 8.86645218700801e-05, "loss": 0.8098, "step": 9031 }, { "epoch": 0.6119655803238702, "grad_norm": 4.575824737548828, "learning_rate": 8.866315285098228e-05, "loss": 0.7311, "step": 9032 }, { "epoch": 0.6120333355918423, "grad_norm": 6.3496012687683105, "learning_rate": 8.866178383188446e-05, "loss": 0.9489, "step": 9033 }, { "epoch": 0.6121010908598143, "grad_norm": 5.932507038116455, "learning_rate": 8.866041481278664e-05, "loss": 0.7362, "step": 9034 }, { "epoch": 0.6121688461277864, "grad_norm": 6.566854000091553, "learning_rate": 8.865904579368882e-05, "loss": 0.9414, "step": 9035 }, { "epoch": 0.6122366013957585, "grad_norm": 6.274519920349121, "learning_rate": 8.865767677459101e-05, "loss": 0.9277, "step": 9036 }, { "epoch": 0.6123043566637306, "grad_norm": 6.512722492218018, "learning_rate": 8.86563077554932e-05, "loss": 0.8095, "step": 9037 }, { "epoch": 0.6123721119317027, "grad_norm": 5.9804558753967285, "learning_rate": 8.865493873639538e-05, "loss": 0.7803, "step": 9038 }, { "epoch": 0.6124398671996748, "grad_norm": 7.561446189880371, "learning_rate": 8.865356971729756e-05, "loss": 0.8143, "step": 9039 }, { "epoch": 0.6125076224676469, "grad_norm": 6.611248016357422, "learning_rate": 8.865220069819974e-05, "loss": 0.813, "step": 9040 }, { "epoch": 0.612575377735619, "grad_norm": 6.7652764320373535, "learning_rate": 8.865083167910193e-05, "loss": 0.8242, "step": 9041 }, { "epoch": 0.6126431330035911, "grad_norm": 4.964203834533691, "learning_rate": 8.864946266000411e-05, "loss": 0.6155, "step": 9042 }, { "epoch": 0.6127108882715631, "grad_norm": 5.260312080383301, "learning_rate": 8.864809364090629e-05, "loss": 0.694, "step": 9043 }, { "epoch": 0.6127786435395352, "grad_norm": 6.220287799835205, "learning_rate": 8.864672462180847e-05, "loss": 0.7783, "step": 9044 }, { "epoch": 0.6128463988075072, "grad_norm": 6.5388078689575195, "learning_rate": 8.864535560271066e-05, "loss": 0.6654, "step": 9045 }, { "epoch": 0.6129141540754793, "grad_norm": 6.570968151092529, "learning_rate": 8.864398658361284e-05, "loss": 1.15, "step": 9046 }, { "epoch": 0.6129819093434514, "grad_norm": 5.285862445831299, "learning_rate": 8.864261756451503e-05, "loss": 0.5951, "step": 9047 }, { "epoch": 0.6130496646114235, "grad_norm": 6.997344017028809, "learning_rate": 8.86412485454172e-05, "loss": 0.8675, "step": 9048 }, { "epoch": 0.6131174198793956, "grad_norm": 7.796441555023193, "learning_rate": 8.863987952631939e-05, "loss": 0.7794, "step": 9049 }, { "epoch": 0.6131851751473677, "grad_norm": 5.723931789398193, "learning_rate": 8.863851050722158e-05, "loss": 0.7727, "step": 9050 }, { "epoch": 0.6132529304153398, "grad_norm": 6.294017791748047, "learning_rate": 8.863714148812376e-05, "loss": 0.9198, "step": 9051 }, { "epoch": 0.6133206856833119, "grad_norm": 7.23032808303833, "learning_rate": 8.863577246902594e-05, "loss": 0.9892, "step": 9052 }, { "epoch": 0.613388440951284, "grad_norm": 5.533211708068848, "learning_rate": 8.863440344992812e-05, "loss": 0.7416, "step": 9053 }, { "epoch": 0.6134561962192561, "grad_norm": 5.181722164154053, "learning_rate": 8.863303443083031e-05, "loss": 0.7471, "step": 9054 }, { "epoch": 0.6135239514872282, "grad_norm": 5.428781986236572, "learning_rate": 8.86316654117325e-05, "loss": 0.8347, "step": 9055 }, { "epoch": 0.6135917067552003, "grad_norm": 7.403633117675781, "learning_rate": 8.863029639263468e-05, "loss": 0.9809, "step": 9056 }, { "epoch": 0.6136594620231723, "grad_norm": 6.061591625213623, "learning_rate": 8.862892737353687e-05, "loss": 0.7752, "step": 9057 }, { "epoch": 0.6137272172911444, "grad_norm": 6.5160393714904785, "learning_rate": 8.862755835443905e-05, "loss": 0.8982, "step": 9058 }, { "epoch": 0.6137949725591165, "grad_norm": 6.234467506408691, "learning_rate": 8.862618933534123e-05, "loss": 0.7428, "step": 9059 }, { "epoch": 0.6138627278270885, "grad_norm": 7.634365081787109, "learning_rate": 8.862482031624342e-05, "loss": 0.8113, "step": 9060 }, { "epoch": 0.6139304830950606, "grad_norm": 6.873602867126465, "learning_rate": 8.86234512971456e-05, "loss": 0.8789, "step": 9061 }, { "epoch": 0.6139982383630327, "grad_norm": 8.263740539550781, "learning_rate": 8.862208227804778e-05, "loss": 1.0911, "step": 9062 }, { "epoch": 0.6140659936310048, "grad_norm": 7.048012733459473, "learning_rate": 8.862071325894996e-05, "loss": 0.7947, "step": 9063 }, { "epoch": 0.6141337488989769, "grad_norm": 6.902647018432617, "learning_rate": 8.861934423985216e-05, "loss": 0.9391, "step": 9064 }, { "epoch": 0.614201504166949, "grad_norm": 7.542623043060303, "learning_rate": 8.861797522075434e-05, "loss": 0.8601, "step": 9065 }, { "epoch": 0.6142692594349211, "grad_norm": 6.404265403747559, "learning_rate": 8.861660620165652e-05, "loss": 0.7602, "step": 9066 }, { "epoch": 0.6143370147028931, "grad_norm": 5.523179054260254, "learning_rate": 8.86152371825587e-05, "loss": 0.7821, "step": 9067 }, { "epoch": 0.6144047699708652, "grad_norm": 5.909379005432129, "learning_rate": 8.86138681634609e-05, "loss": 0.788, "step": 9068 }, { "epoch": 0.6144725252388373, "grad_norm": 6.69690465927124, "learning_rate": 8.861249914436307e-05, "loss": 0.9958, "step": 9069 }, { "epoch": 0.6145402805068094, "grad_norm": 4.888934135437012, "learning_rate": 8.861113012526525e-05, "loss": 0.6594, "step": 9070 }, { "epoch": 0.6146080357747815, "grad_norm": 6.2297892570495605, "learning_rate": 8.860976110616743e-05, "loss": 0.7989, "step": 9071 }, { "epoch": 0.6146757910427536, "grad_norm": 6.807466506958008, "learning_rate": 8.860839208706962e-05, "loss": 0.9594, "step": 9072 }, { "epoch": 0.6147435463107257, "grad_norm": 5.524564266204834, "learning_rate": 8.860702306797181e-05, "loss": 0.9957, "step": 9073 }, { "epoch": 0.6148113015786978, "grad_norm": 6.862370014190674, "learning_rate": 8.860565404887399e-05, "loss": 0.8455, "step": 9074 }, { "epoch": 0.6148790568466699, "grad_norm": 6.885018825531006, "learning_rate": 8.860428502977617e-05, "loss": 0.8901, "step": 9075 }, { "epoch": 0.6149468121146419, "grad_norm": 5.2585344314575195, "learning_rate": 8.860291601067835e-05, "loss": 0.6791, "step": 9076 }, { "epoch": 0.615014567382614, "grad_norm": 6.821567058563232, "learning_rate": 8.860154699158054e-05, "loss": 1.1323, "step": 9077 }, { "epoch": 0.615082322650586, "grad_norm": 5.113526344299316, "learning_rate": 8.860017797248272e-05, "loss": 0.8949, "step": 9078 }, { "epoch": 0.6151500779185581, "grad_norm": 6.890782356262207, "learning_rate": 8.85988089533849e-05, "loss": 0.9682, "step": 9079 }, { "epoch": 0.6152178331865302, "grad_norm": 6.634467124938965, "learning_rate": 8.859743993428708e-05, "loss": 0.8697, "step": 9080 }, { "epoch": 0.6152855884545023, "grad_norm": 8.704490661621094, "learning_rate": 8.859607091518927e-05, "loss": 1.0182, "step": 9081 }, { "epoch": 0.6153533437224744, "grad_norm": 6.126344680786133, "learning_rate": 8.859470189609146e-05, "loss": 0.8234, "step": 9082 }, { "epoch": 0.6154210989904465, "grad_norm": 4.622169017791748, "learning_rate": 8.859333287699364e-05, "loss": 0.5793, "step": 9083 }, { "epoch": 0.6154888542584186, "grad_norm": 6.331384181976318, "learning_rate": 8.859196385789582e-05, "loss": 0.7669, "step": 9084 }, { "epoch": 0.6155566095263907, "grad_norm": 5.072127819061279, "learning_rate": 8.8590594838798e-05, "loss": 0.8827, "step": 9085 }, { "epoch": 0.6156243647943628, "grad_norm": 5.524892330169678, "learning_rate": 8.85892258197002e-05, "loss": 0.5116, "step": 9086 }, { "epoch": 0.6156921200623349, "grad_norm": 7.506863594055176, "learning_rate": 8.858785680060237e-05, "loss": 1.1547, "step": 9087 }, { "epoch": 0.615759875330307, "grad_norm": 5.211267471313477, "learning_rate": 8.858648778150455e-05, "loss": 0.7001, "step": 9088 }, { "epoch": 0.615827630598279, "grad_norm": 5.154802322387695, "learning_rate": 8.858511876240674e-05, "loss": 0.6498, "step": 9089 }, { "epoch": 0.6158953858662511, "grad_norm": 6.631251335144043, "learning_rate": 8.858374974330892e-05, "loss": 0.7026, "step": 9090 }, { "epoch": 0.6159631411342232, "grad_norm": 6.677391529083252, "learning_rate": 8.858238072421111e-05, "loss": 0.8582, "step": 9091 }, { "epoch": 0.6160308964021952, "grad_norm": 5.569202423095703, "learning_rate": 8.858101170511329e-05, "loss": 0.7886, "step": 9092 }, { "epoch": 0.6160986516701673, "grad_norm": 6.454385280609131, "learning_rate": 8.857964268601547e-05, "loss": 0.7088, "step": 9093 }, { "epoch": 0.6161664069381394, "grad_norm": 6.360875606536865, "learning_rate": 8.857827366691765e-05, "loss": 0.7079, "step": 9094 }, { "epoch": 0.6162341622061115, "grad_norm": 6.538075923919678, "learning_rate": 8.857690464781983e-05, "loss": 0.851, "step": 9095 }, { "epoch": 0.6163019174740836, "grad_norm": 6.111706256866455, "learning_rate": 8.857553562872202e-05, "loss": 0.6732, "step": 9096 }, { "epoch": 0.6163696727420557, "grad_norm": 6.8173956871032715, "learning_rate": 8.85741666096242e-05, "loss": 0.8164, "step": 9097 }, { "epoch": 0.6164374280100278, "grad_norm": 6.984659194946289, "learning_rate": 8.857279759052639e-05, "loss": 0.7083, "step": 9098 }, { "epoch": 0.6165051832779999, "grad_norm": 5.256351947784424, "learning_rate": 8.857142857142857e-05, "loss": 0.7592, "step": 9099 }, { "epoch": 0.616572938545972, "grad_norm": 8.32116985321045, "learning_rate": 8.857005955233076e-05, "loss": 1.041, "step": 9100 }, { "epoch": 0.616640693813944, "grad_norm": 5.583303928375244, "learning_rate": 8.856869053323294e-05, "loss": 0.9433, "step": 9101 }, { "epoch": 0.6167084490819161, "grad_norm": 8.839009284973145, "learning_rate": 8.856732151413512e-05, "loss": 1.1276, "step": 9102 }, { "epoch": 0.6167762043498882, "grad_norm": 6.0189528465271, "learning_rate": 8.856595249503731e-05, "loss": 0.8986, "step": 9103 }, { "epoch": 0.6168439596178603, "grad_norm": 6.6191205978393555, "learning_rate": 8.85645834759395e-05, "loss": 0.8594, "step": 9104 }, { "epoch": 0.6169117148858324, "grad_norm": 7.318183898925781, "learning_rate": 8.856321445684167e-05, "loss": 0.936, "step": 9105 }, { "epoch": 0.6169794701538045, "grad_norm": 5.551211357116699, "learning_rate": 8.856184543774387e-05, "loss": 0.8986, "step": 9106 }, { "epoch": 0.6170472254217766, "grad_norm": 6.483643054962158, "learning_rate": 8.856047641864605e-05, "loss": 0.8394, "step": 9107 }, { "epoch": 0.6171149806897487, "grad_norm": 6.838339805603027, "learning_rate": 8.855910739954823e-05, "loss": 0.8305, "step": 9108 }, { "epoch": 0.6171827359577207, "grad_norm": 6.734610557556152, "learning_rate": 8.855773838045042e-05, "loss": 0.8967, "step": 9109 }, { "epoch": 0.6172504912256928, "grad_norm": 5.749598503112793, "learning_rate": 8.85563693613526e-05, "loss": 0.8226, "step": 9110 }, { "epoch": 0.6173182464936648, "grad_norm": 5.9449872970581055, "learning_rate": 8.855500034225478e-05, "loss": 0.7528, "step": 9111 }, { "epoch": 0.6173860017616369, "grad_norm": 7.123237133026123, "learning_rate": 8.855363132315696e-05, "loss": 0.8002, "step": 9112 }, { "epoch": 0.617453757029609, "grad_norm": 6.046530723571777, "learning_rate": 8.855226230405914e-05, "loss": 0.7788, "step": 9113 }, { "epoch": 0.6175215122975811, "grad_norm": 5.466145992279053, "learning_rate": 8.855089328496134e-05, "loss": 0.6303, "step": 9114 }, { "epoch": 0.6175892675655532, "grad_norm": 6.014889717102051, "learning_rate": 8.854952426586352e-05, "loss": 0.9445, "step": 9115 }, { "epoch": 0.6176570228335253, "grad_norm": 6.305761814117432, "learning_rate": 8.85481552467657e-05, "loss": 0.7314, "step": 9116 }, { "epoch": 0.6177247781014974, "grad_norm": 7.205751895904541, "learning_rate": 8.854678622766788e-05, "loss": 0.7635, "step": 9117 }, { "epoch": 0.6177925333694695, "grad_norm": 7.25909948348999, "learning_rate": 8.854541720857006e-05, "loss": 0.8655, "step": 9118 }, { "epoch": 0.6178602886374416, "grad_norm": 6.141407489776611, "learning_rate": 8.854404818947225e-05, "loss": 0.7744, "step": 9119 }, { "epoch": 0.6179280439054137, "grad_norm": 7.026790142059326, "learning_rate": 8.854267917037443e-05, "loss": 0.6679, "step": 9120 }, { "epoch": 0.6179957991733858, "grad_norm": 6.5543437004089355, "learning_rate": 8.854131015127661e-05, "loss": 0.7657, "step": 9121 }, { "epoch": 0.6180635544413579, "grad_norm": 6.620877742767334, "learning_rate": 8.85399411321788e-05, "loss": 0.7058, "step": 9122 }, { "epoch": 0.61813130970933, "grad_norm": 6.877584457397461, "learning_rate": 8.853857211308099e-05, "loss": 0.8799, "step": 9123 }, { "epoch": 0.618199064977302, "grad_norm": 6.836280822753906, "learning_rate": 8.853720309398317e-05, "loss": 0.7901, "step": 9124 }, { "epoch": 0.618266820245274, "grad_norm": 5.966172695159912, "learning_rate": 8.853583407488535e-05, "loss": 0.6202, "step": 9125 }, { "epoch": 0.6183345755132461, "grad_norm": 6.592352867126465, "learning_rate": 8.853446505578753e-05, "loss": 0.8176, "step": 9126 }, { "epoch": 0.6184023307812182, "grad_norm": 7.010197639465332, "learning_rate": 8.853309603668971e-05, "loss": 0.8645, "step": 9127 }, { "epoch": 0.6184700860491903, "grad_norm": 6.962997913360596, "learning_rate": 8.85317270175919e-05, "loss": 0.7481, "step": 9128 }, { "epoch": 0.6185378413171624, "grad_norm": 6.8080244064331055, "learning_rate": 8.853035799849408e-05, "loss": 0.8811, "step": 9129 }, { "epoch": 0.6186055965851345, "grad_norm": 5.936764240264893, "learning_rate": 8.852898897939626e-05, "loss": 0.6795, "step": 9130 }, { "epoch": 0.6186733518531066, "grad_norm": 6.132038116455078, "learning_rate": 8.852761996029844e-05, "loss": 0.9207, "step": 9131 }, { "epoch": 0.6187411071210787, "grad_norm": 6.451957702636719, "learning_rate": 8.852625094120064e-05, "loss": 0.7098, "step": 9132 }, { "epoch": 0.6188088623890508, "grad_norm": 6.429556369781494, "learning_rate": 8.852488192210282e-05, "loss": 0.8215, "step": 9133 }, { "epoch": 0.6188766176570228, "grad_norm": 5.414734363555908, "learning_rate": 8.8523512903005e-05, "loss": 0.7205, "step": 9134 }, { "epoch": 0.6189443729249949, "grad_norm": 7.48563289642334, "learning_rate": 8.852214388390718e-05, "loss": 0.9738, "step": 9135 }, { "epoch": 0.619012128192967, "grad_norm": 7.014744758605957, "learning_rate": 8.852077486480936e-05, "loss": 0.7494, "step": 9136 }, { "epoch": 0.6190798834609391, "grad_norm": 9.400522232055664, "learning_rate": 8.851940584571155e-05, "loss": 0.8805, "step": 9137 }, { "epoch": 0.6191476387289112, "grad_norm": 5.838070869445801, "learning_rate": 8.851803682661373e-05, "loss": 0.7817, "step": 9138 }, { "epoch": 0.6192153939968833, "grad_norm": 6.750514984130859, "learning_rate": 8.851666780751591e-05, "loss": 0.8173, "step": 9139 }, { "epoch": 0.6192831492648554, "grad_norm": 8.359286308288574, "learning_rate": 8.85152987884181e-05, "loss": 0.8902, "step": 9140 }, { "epoch": 0.6193509045328274, "grad_norm": 4.75096321105957, "learning_rate": 8.851392976932029e-05, "loss": 0.6834, "step": 9141 }, { "epoch": 0.6194186598007995, "grad_norm": 6.427238464355469, "learning_rate": 8.851256075022247e-05, "loss": 0.7946, "step": 9142 }, { "epoch": 0.6194864150687716, "grad_norm": 6.961672782897949, "learning_rate": 8.851119173112465e-05, "loss": 0.6882, "step": 9143 }, { "epoch": 0.6195541703367436, "grad_norm": 7.266171455383301, "learning_rate": 8.850982271202683e-05, "loss": 0.8905, "step": 9144 }, { "epoch": 0.6196219256047157, "grad_norm": 6.961668491363525, "learning_rate": 8.850845369292901e-05, "loss": 0.7874, "step": 9145 }, { "epoch": 0.6196896808726878, "grad_norm": 6.123990535736084, "learning_rate": 8.85070846738312e-05, "loss": 0.7126, "step": 9146 }, { "epoch": 0.6197574361406599, "grad_norm": 7.221274375915527, "learning_rate": 8.850571565473338e-05, "loss": 0.8443, "step": 9147 }, { "epoch": 0.619825191408632, "grad_norm": 6.273942947387695, "learning_rate": 8.850434663563556e-05, "loss": 0.8795, "step": 9148 }, { "epoch": 0.6198929466766041, "grad_norm": 6.505329608917236, "learning_rate": 8.850297761653776e-05, "loss": 0.845, "step": 9149 }, { "epoch": 0.6199607019445762, "grad_norm": 6.682892322540283, "learning_rate": 8.850160859743994e-05, "loss": 0.8908, "step": 9150 }, { "epoch": 0.6200284572125483, "grad_norm": 6.669549942016602, "learning_rate": 8.850023957834212e-05, "loss": 0.8027, "step": 9151 }, { "epoch": 0.6200962124805204, "grad_norm": 4.51635217666626, "learning_rate": 8.849887055924431e-05, "loss": 0.6746, "step": 9152 }, { "epoch": 0.6201639677484925, "grad_norm": 7.118916988372803, "learning_rate": 8.84975015401465e-05, "loss": 1.1932, "step": 9153 }, { "epoch": 0.6202317230164646, "grad_norm": 5.470358371734619, "learning_rate": 8.849613252104867e-05, "loss": 0.9683, "step": 9154 }, { "epoch": 0.6202994782844367, "grad_norm": 5.723430633544922, "learning_rate": 8.849476350195087e-05, "loss": 0.9483, "step": 9155 }, { "epoch": 0.6203672335524087, "grad_norm": 6.195518493652344, "learning_rate": 8.849339448285305e-05, "loss": 0.6756, "step": 9156 }, { "epoch": 0.6204349888203808, "grad_norm": 5.686561584472656, "learning_rate": 8.849202546375523e-05, "loss": 0.6946, "step": 9157 }, { "epoch": 0.6205027440883528, "grad_norm": 6.569991588592529, "learning_rate": 8.849065644465741e-05, "loss": 0.8295, "step": 9158 }, { "epoch": 0.6205704993563249, "grad_norm": 7.306698799133301, "learning_rate": 8.848928742555959e-05, "loss": 1.0515, "step": 9159 }, { "epoch": 0.620638254624297, "grad_norm": 7.042558670043945, "learning_rate": 8.848791840646178e-05, "loss": 1.0191, "step": 9160 }, { "epoch": 0.6207060098922691, "grad_norm": 6.552324295043945, "learning_rate": 8.848654938736396e-05, "loss": 0.7424, "step": 9161 }, { "epoch": 0.6207737651602412, "grad_norm": 7.646967887878418, "learning_rate": 8.848518036826614e-05, "loss": 0.8457, "step": 9162 }, { "epoch": 0.6208415204282133, "grad_norm": 6.561422348022461, "learning_rate": 8.848381134916832e-05, "loss": 0.7605, "step": 9163 }, { "epoch": 0.6209092756961854, "grad_norm": 7.425536155700684, "learning_rate": 8.848244233007052e-05, "loss": 0.8042, "step": 9164 }, { "epoch": 0.6209770309641575, "grad_norm": 5.6792521476745605, "learning_rate": 8.84810733109727e-05, "loss": 0.8775, "step": 9165 }, { "epoch": 0.6210447862321296, "grad_norm": 6.988245010375977, "learning_rate": 8.847970429187488e-05, "loss": 0.7295, "step": 9166 }, { "epoch": 0.6211125415001016, "grad_norm": 4.709329605102539, "learning_rate": 8.847833527277706e-05, "loss": 0.6508, "step": 9167 }, { "epoch": 0.6211802967680737, "grad_norm": 7.601302623748779, "learning_rate": 8.847696625367924e-05, "loss": 0.6558, "step": 9168 }, { "epoch": 0.6212480520360458, "grad_norm": 6.5276007652282715, "learning_rate": 8.847559723458143e-05, "loss": 0.6851, "step": 9169 }, { "epoch": 0.6213158073040179, "grad_norm": 6.256960391998291, "learning_rate": 8.847422821548361e-05, "loss": 0.9257, "step": 9170 }, { "epoch": 0.62138356257199, "grad_norm": 7.133554935455322, "learning_rate": 8.84728591963858e-05, "loss": 1.0145, "step": 9171 }, { "epoch": 0.6214513178399621, "grad_norm": 7.465946674346924, "learning_rate": 8.847149017728797e-05, "loss": 1.1211, "step": 9172 }, { "epoch": 0.6215190731079342, "grad_norm": 7.24479341506958, "learning_rate": 8.847012115819015e-05, "loss": 0.787, "step": 9173 }, { "epoch": 0.6215868283759062, "grad_norm": 6.052404403686523, "learning_rate": 8.846875213909235e-05, "loss": 0.7718, "step": 9174 }, { "epoch": 0.6216545836438783, "grad_norm": 7.27697229385376, "learning_rate": 8.846738311999453e-05, "loss": 0.8102, "step": 9175 }, { "epoch": 0.6217223389118504, "grad_norm": 6.269348621368408, "learning_rate": 8.846601410089671e-05, "loss": 0.9089, "step": 9176 }, { "epoch": 0.6217900941798225, "grad_norm": 5.706981658935547, "learning_rate": 8.846464508179889e-05, "loss": 0.8213, "step": 9177 }, { "epoch": 0.6218578494477945, "grad_norm": 7.0556817054748535, "learning_rate": 8.846327606270108e-05, "loss": 0.696, "step": 9178 }, { "epoch": 0.6219256047157666, "grad_norm": 5.418951034545898, "learning_rate": 8.846190704360326e-05, "loss": 0.6344, "step": 9179 }, { "epoch": 0.6219933599837387, "grad_norm": 6.63913106918335, "learning_rate": 8.846053802450544e-05, "loss": 0.8222, "step": 9180 }, { "epoch": 0.6220611152517108, "grad_norm": 6.560824394226074, "learning_rate": 8.845916900540762e-05, "loss": 0.7443, "step": 9181 }, { "epoch": 0.6221288705196829, "grad_norm": 6.946655750274658, "learning_rate": 8.84577999863098e-05, "loss": 1.0413, "step": 9182 }, { "epoch": 0.622196625787655, "grad_norm": 5.037294864654541, "learning_rate": 8.8456430967212e-05, "loss": 0.7489, "step": 9183 }, { "epoch": 0.6222643810556271, "grad_norm": 6.396673202514648, "learning_rate": 8.845506194811418e-05, "loss": 0.768, "step": 9184 }, { "epoch": 0.6223321363235992, "grad_norm": 6.354964733123779, "learning_rate": 8.845369292901636e-05, "loss": 1.0378, "step": 9185 }, { "epoch": 0.6223998915915713, "grad_norm": 7.494623184204102, "learning_rate": 8.845232390991854e-05, "loss": 0.763, "step": 9186 }, { "epoch": 0.6224676468595434, "grad_norm": 6.109148025512695, "learning_rate": 8.845095489082073e-05, "loss": 0.7609, "step": 9187 }, { "epoch": 0.6225354021275155, "grad_norm": 7.039491653442383, "learning_rate": 8.844958587172291e-05, "loss": 0.7999, "step": 9188 }, { "epoch": 0.6226031573954875, "grad_norm": 5.7005486488342285, "learning_rate": 8.84482168526251e-05, "loss": 0.7632, "step": 9189 }, { "epoch": 0.6226709126634595, "grad_norm": 6.816334247589111, "learning_rate": 8.844684783352727e-05, "loss": 0.6813, "step": 9190 }, { "epoch": 0.6227386679314316, "grad_norm": 7.970419883728027, "learning_rate": 8.844547881442946e-05, "loss": 0.5728, "step": 9191 }, { "epoch": 0.6228064231994037, "grad_norm": 6.688904285430908, "learning_rate": 8.844410979533165e-05, "loss": 0.7974, "step": 9192 }, { "epoch": 0.6228741784673758, "grad_norm": 6.076619625091553, "learning_rate": 8.844274077623383e-05, "loss": 0.7217, "step": 9193 }, { "epoch": 0.6229419337353479, "grad_norm": 7.09970760345459, "learning_rate": 8.844137175713601e-05, "loss": 0.8479, "step": 9194 }, { "epoch": 0.62300968900332, "grad_norm": 7.291125297546387, "learning_rate": 8.84400027380382e-05, "loss": 0.9391, "step": 9195 }, { "epoch": 0.6230774442712921, "grad_norm": 6.2532148361206055, "learning_rate": 8.843863371894038e-05, "loss": 0.9657, "step": 9196 }, { "epoch": 0.6231451995392642, "grad_norm": 6.567989349365234, "learning_rate": 8.843726469984256e-05, "loss": 0.767, "step": 9197 }, { "epoch": 0.6232129548072363, "grad_norm": 5.543100357055664, "learning_rate": 8.843589568074476e-05, "loss": 0.7537, "step": 9198 }, { "epoch": 0.6232807100752084, "grad_norm": 7.001931190490723, "learning_rate": 8.843452666164694e-05, "loss": 0.8386, "step": 9199 }, { "epoch": 0.6233484653431804, "grad_norm": 6.852741718292236, "learning_rate": 8.843315764254912e-05, "loss": 0.9314, "step": 9200 }, { "epoch": 0.6234162206111525, "grad_norm": 6.225865364074707, "learning_rate": 8.843178862345131e-05, "loss": 0.8302, "step": 9201 }, { "epoch": 0.6234839758791246, "grad_norm": 7.8117594718933105, "learning_rate": 8.843041960435349e-05, "loss": 0.9156, "step": 9202 }, { "epoch": 0.6235517311470967, "grad_norm": 7.585949420928955, "learning_rate": 8.842905058525567e-05, "loss": 0.9901, "step": 9203 }, { "epoch": 0.6236194864150688, "grad_norm": 5.7599945068359375, "learning_rate": 8.842768156615785e-05, "loss": 0.8697, "step": 9204 }, { "epoch": 0.6236872416830409, "grad_norm": 6.114898204803467, "learning_rate": 8.842631254706003e-05, "loss": 0.917, "step": 9205 }, { "epoch": 0.623754996951013, "grad_norm": 6.57565450668335, "learning_rate": 8.842494352796223e-05, "loss": 0.9872, "step": 9206 }, { "epoch": 0.623822752218985, "grad_norm": 6.324807643890381, "learning_rate": 8.842357450886441e-05, "loss": 0.804, "step": 9207 }, { "epoch": 0.6238905074869571, "grad_norm": 7.262860298156738, "learning_rate": 8.842220548976659e-05, "loss": 0.8051, "step": 9208 }, { "epoch": 0.6239582627549292, "grad_norm": 6.4210968017578125, "learning_rate": 8.842083647066877e-05, "loss": 0.9117, "step": 9209 }, { "epoch": 0.6240260180229013, "grad_norm": 5.8516154289245605, "learning_rate": 8.841946745157096e-05, "loss": 0.7292, "step": 9210 }, { "epoch": 0.6240937732908733, "grad_norm": 7.249476909637451, "learning_rate": 8.841809843247314e-05, "loss": 0.7149, "step": 9211 }, { "epoch": 0.6241615285588454, "grad_norm": 6.276209831237793, "learning_rate": 8.841672941337532e-05, "loss": 0.8576, "step": 9212 }, { "epoch": 0.6242292838268175, "grad_norm": 7.875953674316406, "learning_rate": 8.84153603942775e-05, "loss": 0.8345, "step": 9213 }, { "epoch": 0.6242970390947896, "grad_norm": 6.010235786437988, "learning_rate": 8.841399137517968e-05, "loss": 0.8376, "step": 9214 }, { "epoch": 0.6243647943627617, "grad_norm": 6.451040744781494, "learning_rate": 8.841262235608188e-05, "loss": 0.8943, "step": 9215 }, { "epoch": 0.6244325496307338, "grad_norm": 7.024655342102051, "learning_rate": 8.841125333698406e-05, "loss": 0.634, "step": 9216 }, { "epoch": 0.6245003048987059, "grad_norm": 7.169036865234375, "learning_rate": 8.840988431788624e-05, "loss": 0.8139, "step": 9217 }, { "epoch": 0.624568060166678, "grad_norm": 5.163486003875732, "learning_rate": 8.840851529878842e-05, "loss": 0.847, "step": 9218 }, { "epoch": 0.6246358154346501, "grad_norm": 6.5144429206848145, "learning_rate": 8.840714627969061e-05, "loss": 0.8632, "step": 9219 }, { "epoch": 0.6247035707026222, "grad_norm": 8.250146865844727, "learning_rate": 8.84057772605928e-05, "loss": 1.1546, "step": 9220 }, { "epoch": 0.6247713259705943, "grad_norm": 5.385178565979004, "learning_rate": 8.840440824149497e-05, "loss": 1.2391, "step": 9221 }, { "epoch": 0.6248390812385664, "grad_norm": 6.536712169647217, "learning_rate": 8.840303922239715e-05, "loss": 0.8697, "step": 9222 }, { "epoch": 0.6249068365065383, "grad_norm": 5.161468982696533, "learning_rate": 8.840167020329933e-05, "loss": 0.6399, "step": 9223 }, { "epoch": 0.6249745917745104, "grad_norm": 6.684597969055176, "learning_rate": 8.840030118420153e-05, "loss": 0.8301, "step": 9224 }, { "epoch": 0.6250423470424825, "grad_norm": 7.047337055206299, "learning_rate": 8.839893216510371e-05, "loss": 1.0555, "step": 9225 }, { "epoch": 0.6251101023104546, "grad_norm": 6.454337120056152, "learning_rate": 8.839756314600589e-05, "loss": 0.7768, "step": 9226 }, { "epoch": 0.6251778575784267, "grad_norm": 5.521293640136719, "learning_rate": 8.839619412690807e-05, "loss": 0.8204, "step": 9227 }, { "epoch": 0.6252456128463988, "grad_norm": 7.6041364669799805, "learning_rate": 8.839482510781025e-05, "loss": 1.0063, "step": 9228 }, { "epoch": 0.6253133681143709, "grad_norm": 6.339493751525879, "learning_rate": 8.839345608871244e-05, "loss": 0.6371, "step": 9229 }, { "epoch": 0.625381123382343, "grad_norm": 6.844937324523926, "learning_rate": 8.839208706961462e-05, "loss": 0.7956, "step": 9230 }, { "epoch": 0.6254488786503151, "grad_norm": 5.3728461265563965, "learning_rate": 8.83907180505168e-05, "loss": 0.765, "step": 9231 }, { "epoch": 0.6255166339182872, "grad_norm": 9.041521072387695, "learning_rate": 8.838934903141898e-05, "loss": 0.6906, "step": 9232 }, { "epoch": 0.6255843891862592, "grad_norm": 7.101466178894043, "learning_rate": 8.838798001232118e-05, "loss": 1.174, "step": 9233 }, { "epoch": 0.6256521444542313, "grad_norm": 7.049058437347412, "learning_rate": 8.838661099322336e-05, "loss": 0.7884, "step": 9234 }, { "epoch": 0.6257198997222034, "grad_norm": 6.012722969055176, "learning_rate": 8.838524197412554e-05, "loss": 0.7543, "step": 9235 }, { "epoch": 0.6257876549901755, "grad_norm": 8.046865463256836, "learning_rate": 8.838387295502772e-05, "loss": 0.7745, "step": 9236 }, { "epoch": 0.6258554102581476, "grad_norm": 6.681391716003418, "learning_rate": 8.83825039359299e-05, "loss": 0.8218, "step": 9237 }, { "epoch": 0.6259231655261197, "grad_norm": 8.432860374450684, "learning_rate": 8.83811349168321e-05, "loss": 0.84, "step": 9238 }, { "epoch": 0.6259909207940917, "grad_norm": 7.283944606781006, "learning_rate": 8.837976589773427e-05, "loss": 0.7066, "step": 9239 }, { "epoch": 0.6260586760620638, "grad_norm": 6.028378009796143, "learning_rate": 8.837839687863645e-05, "loss": 0.7066, "step": 9240 }, { "epoch": 0.6261264313300359, "grad_norm": 5.715835094451904, "learning_rate": 8.837702785953863e-05, "loss": 0.9115, "step": 9241 }, { "epoch": 0.626194186598008, "grad_norm": 5.851448059082031, "learning_rate": 8.837565884044083e-05, "loss": 0.7464, "step": 9242 }, { "epoch": 0.62626194186598, "grad_norm": 7.112005710601807, "learning_rate": 8.837428982134301e-05, "loss": 0.8635, "step": 9243 }, { "epoch": 0.6263296971339521, "grad_norm": 6.428948879241943, "learning_rate": 8.837292080224519e-05, "loss": 0.9229, "step": 9244 }, { "epoch": 0.6263974524019242, "grad_norm": 5.358401298522949, "learning_rate": 8.837155178314738e-05, "loss": 0.5362, "step": 9245 }, { "epoch": 0.6264652076698963, "grad_norm": 5.889663219451904, "learning_rate": 8.837018276404956e-05, "loss": 0.995, "step": 9246 }, { "epoch": 0.6265329629378684, "grad_norm": 5.585958480834961, "learning_rate": 8.836881374495174e-05, "loss": 0.8504, "step": 9247 }, { "epoch": 0.6266007182058405, "grad_norm": 6.147828102111816, "learning_rate": 8.836744472585394e-05, "loss": 0.8975, "step": 9248 }, { "epoch": 0.6266684734738126, "grad_norm": 6.951436996459961, "learning_rate": 8.836607570675612e-05, "loss": 1.063, "step": 9249 }, { "epoch": 0.6267362287417847, "grad_norm": 6.059296131134033, "learning_rate": 8.83647066876583e-05, "loss": 0.815, "step": 9250 }, { "epoch": 0.6268039840097568, "grad_norm": 8.177648544311523, "learning_rate": 8.836333766856048e-05, "loss": 0.9035, "step": 9251 }, { "epoch": 0.6268717392777289, "grad_norm": 5.385120868682861, "learning_rate": 8.836196864946267e-05, "loss": 0.5322, "step": 9252 }, { "epoch": 0.626939494545701, "grad_norm": 9.279441833496094, "learning_rate": 8.836059963036485e-05, "loss": 0.8954, "step": 9253 }, { "epoch": 0.6270072498136731, "grad_norm": 6.143721103668213, "learning_rate": 8.835923061126703e-05, "loss": 0.7225, "step": 9254 }, { "epoch": 0.627075005081645, "grad_norm": 7.277685642242432, "learning_rate": 8.835786159216921e-05, "loss": 0.769, "step": 9255 }, { "epoch": 0.6271427603496171, "grad_norm": 6.348362445831299, "learning_rate": 8.835649257307141e-05, "loss": 0.8076, "step": 9256 }, { "epoch": 0.6272105156175892, "grad_norm": 5.717894077301025, "learning_rate": 8.835512355397359e-05, "loss": 0.5646, "step": 9257 }, { "epoch": 0.6272782708855613, "grad_norm": 5.959596157073975, "learning_rate": 8.835375453487577e-05, "loss": 0.8393, "step": 9258 }, { "epoch": 0.6273460261535334, "grad_norm": 6.298025131225586, "learning_rate": 8.835238551577795e-05, "loss": 0.6772, "step": 9259 }, { "epoch": 0.6274137814215055, "grad_norm": 8.076531410217285, "learning_rate": 8.835101649668013e-05, "loss": 1.2792, "step": 9260 }, { "epoch": 0.6274815366894776, "grad_norm": 7.445847034454346, "learning_rate": 8.834964747758232e-05, "loss": 0.8439, "step": 9261 }, { "epoch": 0.6275492919574497, "grad_norm": 8.871906280517578, "learning_rate": 8.83482784584845e-05, "loss": 0.7631, "step": 9262 }, { "epoch": 0.6276170472254218, "grad_norm": 5.552763938903809, "learning_rate": 8.834690943938668e-05, "loss": 0.7293, "step": 9263 }, { "epoch": 0.6276848024933939, "grad_norm": 6.359529495239258, "learning_rate": 8.834554042028886e-05, "loss": 0.8246, "step": 9264 }, { "epoch": 0.627752557761366, "grad_norm": 6.481078147888184, "learning_rate": 8.834417140119106e-05, "loss": 0.8931, "step": 9265 }, { "epoch": 0.627820313029338, "grad_norm": 6.024886608123779, "learning_rate": 8.834280238209324e-05, "loss": 0.6816, "step": 9266 }, { "epoch": 0.6278880682973101, "grad_norm": 6.775660514831543, "learning_rate": 8.834143336299542e-05, "loss": 0.9066, "step": 9267 }, { "epoch": 0.6279558235652822, "grad_norm": 6.696643829345703, "learning_rate": 8.83400643438976e-05, "loss": 0.7173, "step": 9268 }, { "epoch": 0.6280235788332543, "grad_norm": 7.639727592468262, "learning_rate": 8.833869532479978e-05, "loss": 0.772, "step": 9269 }, { "epoch": 0.6280913341012264, "grad_norm": 7.364283084869385, "learning_rate": 8.833732630570197e-05, "loss": 0.8966, "step": 9270 }, { "epoch": 0.6281590893691985, "grad_norm": 6.8663482666015625, "learning_rate": 8.833595728660415e-05, "loss": 0.9459, "step": 9271 }, { "epoch": 0.6282268446371705, "grad_norm": 7.687761306762695, "learning_rate": 8.833458826750633e-05, "loss": 1.0611, "step": 9272 }, { "epoch": 0.6282945999051426, "grad_norm": 6.051063060760498, "learning_rate": 8.833321924840851e-05, "loss": 0.6693, "step": 9273 }, { "epoch": 0.6283623551731147, "grad_norm": 6.312000274658203, "learning_rate": 8.833185022931071e-05, "loss": 0.6403, "step": 9274 }, { "epoch": 0.6284301104410868, "grad_norm": 6.270723819732666, "learning_rate": 8.833048121021289e-05, "loss": 0.9183, "step": 9275 }, { "epoch": 0.6284978657090589, "grad_norm": 6.80443811416626, "learning_rate": 8.832911219111507e-05, "loss": 0.6951, "step": 9276 }, { "epoch": 0.628565620977031, "grad_norm": 6.09138822555542, "learning_rate": 8.832774317201725e-05, "loss": 0.8251, "step": 9277 }, { "epoch": 0.628633376245003, "grad_norm": 5.778343200683594, "learning_rate": 8.832637415291943e-05, "loss": 0.5598, "step": 9278 }, { "epoch": 0.6287011315129751, "grad_norm": 6.845189571380615, "learning_rate": 8.832500513382162e-05, "loss": 0.9961, "step": 9279 }, { "epoch": 0.6287688867809472, "grad_norm": 5.6591081619262695, "learning_rate": 8.83236361147238e-05, "loss": 0.9207, "step": 9280 }, { "epoch": 0.6288366420489193, "grad_norm": 6.356192588806152, "learning_rate": 8.832226709562598e-05, "loss": 0.9117, "step": 9281 }, { "epoch": 0.6289043973168914, "grad_norm": 6.646953582763672, "learning_rate": 8.832089807652816e-05, "loss": 0.8016, "step": 9282 }, { "epoch": 0.6289721525848635, "grad_norm": 7.553640365600586, "learning_rate": 8.831952905743034e-05, "loss": 0.872, "step": 9283 }, { "epoch": 0.6290399078528356, "grad_norm": 6.47942590713501, "learning_rate": 8.831816003833254e-05, "loss": 0.7643, "step": 9284 }, { "epoch": 0.6291076631208077, "grad_norm": 4.924723148345947, "learning_rate": 8.831679101923472e-05, "loss": 0.673, "step": 9285 }, { "epoch": 0.6291754183887798, "grad_norm": 6.718869686126709, "learning_rate": 8.83154220001369e-05, "loss": 0.9365, "step": 9286 }, { "epoch": 0.6292431736567519, "grad_norm": 6.06726598739624, "learning_rate": 8.831405298103908e-05, "loss": 0.7183, "step": 9287 }, { "epoch": 0.6293109289247238, "grad_norm": 6.576848030090332, "learning_rate": 8.831268396194127e-05, "loss": 0.8866, "step": 9288 }, { "epoch": 0.6293786841926959, "grad_norm": 7.97581148147583, "learning_rate": 8.831131494284345e-05, "loss": 0.8479, "step": 9289 }, { "epoch": 0.629446439460668, "grad_norm": 5.632781028747559, "learning_rate": 8.830994592374563e-05, "loss": 0.7944, "step": 9290 }, { "epoch": 0.6295141947286401, "grad_norm": 7.232397556304932, "learning_rate": 8.830857690464783e-05, "loss": 0.9172, "step": 9291 }, { "epoch": 0.6295819499966122, "grad_norm": 7.6181464195251465, "learning_rate": 8.830720788555001e-05, "loss": 0.6232, "step": 9292 }, { "epoch": 0.6296497052645843, "grad_norm": 7.363900661468506, "learning_rate": 8.830583886645219e-05, "loss": 0.9468, "step": 9293 }, { "epoch": 0.6297174605325564, "grad_norm": 6.03270149230957, "learning_rate": 8.830446984735438e-05, "loss": 0.9073, "step": 9294 }, { "epoch": 0.6297852158005285, "grad_norm": 6.522334098815918, "learning_rate": 8.830310082825656e-05, "loss": 0.7139, "step": 9295 }, { "epoch": 0.6298529710685006, "grad_norm": 7.849470615386963, "learning_rate": 8.830173180915874e-05, "loss": 1.0359, "step": 9296 }, { "epoch": 0.6299207263364727, "grad_norm": 7.876535892486572, "learning_rate": 8.830036279006094e-05, "loss": 0.8647, "step": 9297 }, { "epoch": 0.6299884816044448, "grad_norm": 5.959194183349609, "learning_rate": 8.829899377096312e-05, "loss": 0.9567, "step": 9298 }, { "epoch": 0.6300562368724169, "grad_norm": 5.821471691131592, "learning_rate": 8.82976247518653e-05, "loss": 0.8456, "step": 9299 }, { "epoch": 0.6301239921403889, "grad_norm": 5.807693004608154, "learning_rate": 8.829625573276748e-05, "loss": 0.8929, "step": 9300 }, { "epoch": 0.630191747408361, "grad_norm": 6.666048049926758, "learning_rate": 8.829488671366966e-05, "loss": 0.891, "step": 9301 }, { "epoch": 0.6302595026763331, "grad_norm": 6.061259746551514, "learning_rate": 8.829351769457185e-05, "loss": 0.7318, "step": 9302 }, { "epoch": 0.6303272579443052, "grad_norm": 6.72253942489624, "learning_rate": 8.829214867547403e-05, "loss": 0.9176, "step": 9303 }, { "epoch": 0.6303950132122772, "grad_norm": 6.540244102478027, "learning_rate": 8.829077965637621e-05, "loss": 0.7941, "step": 9304 }, { "epoch": 0.6304627684802493, "grad_norm": 6.3027262687683105, "learning_rate": 8.82894106372784e-05, "loss": 0.7615, "step": 9305 }, { "epoch": 0.6305305237482214, "grad_norm": 5.5668745040893555, "learning_rate": 8.828804161818057e-05, "loss": 0.7446, "step": 9306 }, { "epoch": 0.6305982790161935, "grad_norm": 6.234310150146484, "learning_rate": 8.828667259908277e-05, "loss": 0.8601, "step": 9307 }, { "epoch": 0.6306660342841656, "grad_norm": 6.879257678985596, "learning_rate": 8.828530357998495e-05, "loss": 0.8312, "step": 9308 }, { "epoch": 0.6307337895521377, "grad_norm": 5.684000015258789, "learning_rate": 8.828393456088713e-05, "loss": 0.8129, "step": 9309 }, { "epoch": 0.6308015448201097, "grad_norm": 6.523510932922363, "learning_rate": 8.828256554178931e-05, "loss": 1.0274, "step": 9310 }, { "epoch": 0.6308693000880818, "grad_norm": 6.060532569885254, "learning_rate": 8.82811965226915e-05, "loss": 0.6513, "step": 9311 }, { "epoch": 0.6309370553560539, "grad_norm": 6.2253828048706055, "learning_rate": 8.827982750359368e-05, "loss": 0.7539, "step": 9312 }, { "epoch": 0.631004810624026, "grad_norm": 5.188565254211426, "learning_rate": 8.827845848449586e-05, "loss": 0.7904, "step": 9313 }, { "epoch": 0.6310725658919981, "grad_norm": 6.081325054168701, "learning_rate": 8.827708946539804e-05, "loss": 0.8399, "step": 9314 }, { "epoch": 0.6311403211599702, "grad_norm": 5.515079975128174, "learning_rate": 8.827572044630022e-05, "loss": 0.7415, "step": 9315 }, { "epoch": 0.6312080764279423, "grad_norm": 6.149160385131836, "learning_rate": 8.827435142720242e-05, "loss": 0.6537, "step": 9316 }, { "epoch": 0.6312758316959144, "grad_norm": 8.075578689575195, "learning_rate": 8.82729824081046e-05, "loss": 0.9744, "step": 9317 }, { "epoch": 0.6313435869638865, "grad_norm": 5.570530414581299, "learning_rate": 8.827161338900678e-05, "loss": 0.6468, "step": 9318 }, { "epoch": 0.6314113422318586, "grad_norm": 7.706857204437256, "learning_rate": 8.827024436990896e-05, "loss": 1.0308, "step": 9319 }, { "epoch": 0.6314790974998307, "grad_norm": 8.566661834716797, "learning_rate": 8.826887535081115e-05, "loss": 0.6278, "step": 9320 }, { "epoch": 0.6315468527678026, "grad_norm": 6.264912128448486, "learning_rate": 8.826750633171333e-05, "loss": 0.9, "step": 9321 }, { "epoch": 0.6316146080357747, "grad_norm": 6.289927005767822, "learning_rate": 8.826613731261551e-05, "loss": 0.788, "step": 9322 }, { "epoch": 0.6316823633037468, "grad_norm": 6.285764217376709, "learning_rate": 8.82647682935177e-05, "loss": 0.7928, "step": 9323 }, { "epoch": 0.6317501185717189, "grad_norm": 6.555515766143799, "learning_rate": 8.826339927441987e-05, "loss": 0.7345, "step": 9324 }, { "epoch": 0.631817873839691, "grad_norm": 7.84982967376709, "learning_rate": 8.826203025532207e-05, "loss": 0.917, "step": 9325 }, { "epoch": 0.6318856291076631, "grad_norm": 5.471888065338135, "learning_rate": 8.826066123622425e-05, "loss": 0.7449, "step": 9326 }, { "epoch": 0.6319533843756352, "grad_norm": 7.806143283843994, "learning_rate": 8.825929221712643e-05, "loss": 0.8559, "step": 9327 }, { "epoch": 0.6320211396436073, "grad_norm": 6.160640716552734, "learning_rate": 8.825792319802861e-05, "loss": 0.7819, "step": 9328 }, { "epoch": 0.6320888949115794, "grad_norm": 6.968262195587158, "learning_rate": 8.825655417893079e-05, "loss": 1.1055, "step": 9329 }, { "epoch": 0.6321566501795515, "grad_norm": 5.455170154571533, "learning_rate": 8.825518515983298e-05, "loss": 0.7852, "step": 9330 }, { "epoch": 0.6322244054475236, "grad_norm": 7.499693870544434, "learning_rate": 8.825381614073516e-05, "loss": 0.8827, "step": 9331 }, { "epoch": 0.6322921607154957, "grad_norm": 6.363000869750977, "learning_rate": 8.825244712163734e-05, "loss": 0.9469, "step": 9332 }, { "epoch": 0.6323599159834677, "grad_norm": 6.631052017211914, "learning_rate": 8.825107810253952e-05, "loss": 0.8891, "step": 9333 }, { "epoch": 0.6324276712514398, "grad_norm": 5.2757697105407715, "learning_rate": 8.824970908344172e-05, "loss": 0.8287, "step": 9334 }, { "epoch": 0.6324954265194119, "grad_norm": 8.268365859985352, "learning_rate": 8.82483400643439e-05, "loss": 0.9812, "step": 9335 }, { "epoch": 0.632563181787384, "grad_norm": 6.276435852050781, "learning_rate": 8.824697104524608e-05, "loss": 0.9827, "step": 9336 }, { "epoch": 0.632630937055356, "grad_norm": 5.93610954284668, "learning_rate": 8.824560202614827e-05, "loss": 0.8656, "step": 9337 }, { "epoch": 0.6326986923233281, "grad_norm": 6.036667346954346, "learning_rate": 8.824423300705045e-05, "loss": 0.9812, "step": 9338 }, { "epoch": 0.6327664475913002, "grad_norm": 5.460302829742432, "learning_rate": 8.824286398795263e-05, "loss": 0.7286, "step": 9339 }, { "epoch": 0.6328342028592723, "grad_norm": 6.362276554107666, "learning_rate": 8.824149496885483e-05, "loss": 0.8244, "step": 9340 }, { "epoch": 0.6329019581272444, "grad_norm": 7.416135311126709, "learning_rate": 8.824012594975701e-05, "loss": 0.7997, "step": 9341 }, { "epoch": 0.6329697133952165, "grad_norm": 7.711816787719727, "learning_rate": 8.823875693065919e-05, "loss": 1.0054, "step": 9342 }, { "epoch": 0.6330374686631886, "grad_norm": 5.800533771514893, "learning_rate": 8.823738791156138e-05, "loss": 1.1502, "step": 9343 }, { "epoch": 0.6331052239311606, "grad_norm": 5.00458288192749, "learning_rate": 8.823601889246356e-05, "loss": 0.6607, "step": 9344 }, { "epoch": 0.6331729791991327, "grad_norm": 6.7192277908325195, "learning_rate": 8.823464987336574e-05, "loss": 1.0327, "step": 9345 }, { "epoch": 0.6332407344671048, "grad_norm": 6.815017223358154, "learning_rate": 8.823328085426792e-05, "loss": 0.7579, "step": 9346 }, { "epoch": 0.6333084897350769, "grad_norm": 7.114835262298584, "learning_rate": 8.82319118351701e-05, "loss": 1.0215, "step": 9347 }, { "epoch": 0.633376245003049, "grad_norm": 4.819394111633301, "learning_rate": 8.82305428160723e-05, "loss": 0.6611, "step": 9348 }, { "epoch": 0.6334440002710211, "grad_norm": 7.824159145355225, "learning_rate": 8.822917379697448e-05, "loss": 1.0557, "step": 9349 }, { "epoch": 0.6335117555389932, "grad_norm": 6.462610721588135, "learning_rate": 8.822780477787666e-05, "loss": 1.1399, "step": 9350 }, { "epoch": 0.6335795108069653, "grad_norm": 5.1685709953308105, "learning_rate": 8.822643575877884e-05, "loss": 0.7156, "step": 9351 }, { "epoch": 0.6336472660749374, "grad_norm": 6.346011638641357, "learning_rate": 8.822506673968103e-05, "loss": 0.8654, "step": 9352 }, { "epoch": 0.6337150213429094, "grad_norm": 5.18143367767334, "learning_rate": 8.822369772058321e-05, "loss": 0.6909, "step": 9353 }, { "epoch": 0.6337827766108814, "grad_norm": 5.92888069152832, "learning_rate": 8.822232870148539e-05, "loss": 0.7467, "step": 9354 }, { "epoch": 0.6338505318788535, "grad_norm": 7.556412696838379, "learning_rate": 8.822095968238757e-05, "loss": 0.9099, "step": 9355 }, { "epoch": 0.6339182871468256, "grad_norm": 5.749037742614746, "learning_rate": 8.821959066328975e-05, "loss": 0.7791, "step": 9356 }, { "epoch": 0.6339860424147977, "grad_norm": 6.511964321136475, "learning_rate": 8.821822164419195e-05, "loss": 0.879, "step": 9357 }, { "epoch": 0.6340537976827698, "grad_norm": 7.070270538330078, "learning_rate": 8.821685262509413e-05, "loss": 0.8151, "step": 9358 }, { "epoch": 0.6341215529507419, "grad_norm": 7.1327409744262695, "learning_rate": 8.821548360599631e-05, "loss": 0.9491, "step": 9359 }, { "epoch": 0.634189308218714, "grad_norm": 5.8238911628723145, "learning_rate": 8.821411458689849e-05, "loss": 0.6894, "step": 9360 }, { "epoch": 0.6342570634866861, "grad_norm": 5.967693328857422, "learning_rate": 8.821274556780067e-05, "loss": 0.7576, "step": 9361 }, { "epoch": 0.6343248187546582, "grad_norm": 6.3497395515441895, "learning_rate": 8.821137654870286e-05, "loss": 0.9133, "step": 9362 }, { "epoch": 0.6343925740226303, "grad_norm": 5.200571060180664, "learning_rate": 8.821000752960504e-05, "loss": 0.6492, "step": 9363 }, { "epoch": 0.6344603292906024, "grad_norm": 6.661485195159912, "learning_rate": 8.820863851050722e-05, "loss": 0.7363, "step": 9364 }, { "epoch": 0.6345280845585745, "grad_norm": 5.2447733879089355, "learning_rate": 8.82072694914094e-05, "loss": 0.7672, "step": 9365 }, { "epoch": 0.6345958398265465, "grad_norm": 6.811657905578613, "learning_rate": 8.82059004723116e-05, "loss": 0.9903, "step": 9366 }, { "epoch": 0.6346635950945186, "grad_norm": 5.834871768951416, "learning_rate": 8.820453145321378e-05, "loss": 0.697, "step": 9367 }, { "epoch": 0.6347313503624907, "grad_norm": 6.931889057159424, "learning_rate": 8.820316243411596e-05, "loss": 0.8039, "step": 9368 }, { "epoch": 0.6347991056304628, "grad_norm": 5.947389602661133, "learning_rate": 8.820179341501814e-05, "loss": 0.7339, "step": 9369 }, { "epoch": 0.6348668608984348, "grad_norm": 6.914769649505615, "learning_rate": 8.820042439592032e-05, "loss": 0.7366, "step": 9370 }, { "epoch": 0.6349346161664069, "grad_norm": 7.74104118347168, "learning_rate": 8.819905537682251e-05, "loss": 1.1219, "step": 9371 }, { "epoch": 0.635002371434379, "grad_norm": 6.544697284698486, "learning_rate": 8.819768635772469e-05, "loss": 0.7795, "step": 9372 }, { "epoch": 0.6350701267023511, "grad_norm": 6.4533772468566895, "learning_rate": 8.819631733862687e-05, "loss": 0.8343, "step": 9373 }, { "epoch": 0.6351378819703232, "grad_norm": 6.726015090942383, "learning_rate": 8.819494831952905e-05, "loss": 0.6306, "step": 9374 }, { "epoch": 0.6352056372382953, "grad_norm": 6.396018981933594, "learning_rate": 8.819357930043125e-05, "loss": 0.7632, "step": 9375 }, { "epoch": 0.6352733925062674, "grad_norm": 7.739027500152588, "learning_rate": 8.819221028133343e-05, "loss": 1.2215, "step": 9376 }, { "epoch": 0.6353411477742394, "grad_norm": 6.827197551727295, "learning_rate": 8.819084126223561e-05, "loss": 1.0954, "step": 9377 }, { "epoch": 0.6354089030422115, "grad_norm": 6.967258930206299, "learning_rate": 8.818947224313779e-05, "loss": 1.1225, "step": 9378 }, { "epoch": 0.6354766583101836, "grad_norm": 7.872466564178467, "learning_rate": 8.818810322403997e-05, "loss": 0.8114, "step": 9379 }, { "epoch": 0.6355444135781557, "grad_norm": 6.985890865325928, "learning_rate": 8.818673420494216e-05, "loss": 0.8177, "step": 9380 }, { "epoch": 0.6356121688461278, "grad_norm": 7.022130966186523, "learning_rate": 8.818536518584434e-05, "loss": 0.7718, "step": 9381 }, { "epoch": 0.6356799241140999, "grad_norm": 8.023907661437988, "learning_rate": 8.818399616674652e-05, "loss": 0.9181, "step": 9382 }, { "epoch": 0.635747679382072, "grad_norm": 7.373246669769287, "learning_rate": 8.818262714764872e-05, "loss": 0.8733, "step": 9383 }, { "epoch": 0.6358154346500441, "grad_norm": 5.75626802444458, "learning_rate": 8.81812581285509e-05, "loss": 0.6196, "step": 9384 }, { "epoch": 0.6358831899180162, "grad_norm": 7.449457168579102, "learning_rate": 8.817988910945308e-05, "loss": 1.1738, "step": 9385 }, { "epoch": 0.6359509451859882, "grad_norm": 6.01223087310791, "learning_rate": 8.817852009035527e-05, "loss": 0.6577, "step": 9386 }, { "epoch": 0.6360187004539603, "grad_norm": 5.752939701080322, "learning_rate": 8.817715107125745e-05, "loss": 0.8448, "step": 9387 }, { "epoch": 0.6360864557219323, "grad_norm": 6.657622337341309, "learning_rate": 8.817578205215963e-05, "loss": 0.8274, "step": 9388 }, { "epoch": 0.6361542109899044, "grad_norm": 8.236910820007324, "learning_rate": 8.817441303306183e-05, "loss": 1.0927, "step": 9389 }, { "epoch": 0.6362219662578765, "grad_norm": 6.963682651519775, "learning_rate": 8.817304401396401e-05, "loss": 1.0313, "step": 9390 }, { "epoch": 0.6362897215258486, "grad_norm": 7.404387950897217, "learning_rate": 8.817167499486619e-05, "loss": 1.0147, "step": 9391 }, { "epoch": 0.6363574767938207, "grad_norm": 6.6976470947265625, "learning_rate": 8.817030597576837e-05, "loss": 0.8769, "step": 9392 }, { "epoch": 0.6364252320617928, "grad_norm": 5.77000617980957, "learning_rate": 8.816893695667055e-05, "loss": 0.8143, "step": 9393 }, { "epoch": 0.6364929873297649, "grad_norm": 5.952037334442139, "learning_rate": 8.816756793757274e-05, "loss": 0.8243, "step": 9394 }, { "epoch": 0.636560742597737, "grad_norm": 7.398003578186035, "learning_rate": 8.816619891847492e-05, "loss": 1.0528, "step": 9395 }, { "epoch": 0.6366284978657091, "grad_norm": 7.800195693969727, "learning_rate": 8.81648298993771e-05, "loss": 0.8146, "step": 9396 }, { "epoch": 0.6366962531336812, "grad_norm": 5.836297035217285, "learning_rate": 8.816346088027928e-05, "loss": 0.6748, "step": 9397 }, { "epoch": 0.6367640084016533, "grad_norm": 8.607617378234863, "learning_rate": 8.816209186118148e-05, "loss": 1.1096, "step": 9398 }, { "epoch": 0.6368317636696254, "grad_norm": 5.9134392738342285, "learning_rate": 8.816072284208366e-05, "loss": 0.8241, "step": 9399 }, { "epoch": 0.6368995189375974, "grad_norm": 6.997758865356445, "learning_rate": 8.815935382298584e-05, "loss": 0.842, "step": 9400 }, { "epoch": 0.6369672742055695, "grad_norm": 6.37071418762207, "learning_rate": 8.815798480388802e-05, "loss": 0.8297, "step": 9401 }, { "epoch": 0.6370350294735415, "grad_norm": 5.971574306488037, "learning_rate": 8.81566157847902e-05, "loss": 0.7567, "step": 9402 }, { "epoch": 0.6371027847415136, "grad_norm": 7.232084274291992, "learning_rate": 8.815524676569239e-05, "loss": 0.9095, "step": 9403 }, { "epoch": 0.6371705400094857, "grad_norm": 4.349423885345459, "learning_rate": 8.815387774659457e-05, "loss": 0.568, "step": 9404 }, { "epoch": 0.6372382952774578, "grad_norm": 5.8268208503723145, "learning_rate": 8.815250872749675e-05, "loss": 0.9203, "step": 9405 }, { "epoch": 0.6373060505454299, "grad_norm": 7.490527153015137, "learning_rate": 8.815113970839893e-05, "loss": 1.1386, "step": 9406 }, { "epoch": 0.637373805813402, "grad_norm": 7.359919548034668, "learning_rate": 8.814977068930113e-05, "loss": 1.1407, "step": 9407 }, { "epoch": 0.6374415610813741, "grad_norm": 8.83666706085205, "learning_rate": 8.814840167020331e-05, "loss": 0.7869, "step": 9408 }, { "epoch": 0.6375093163493462, "grad_norm": 8.88762378692627, "learning_rate": 8.814703265110549e-05, "loss": 1.1761, "step": 9409 }, { "epoch": 0.6375770716173182, "grad_norm": 5.899496078491211, "learning_rate": 8.814566363200767e-05, "loss": 0.8142, "step": 9410 }, { "epoch": 0.6376448268852903, "grad_norm": 5.661779880523682, "learning_rate": 8.814429461290985e-05, "loss": 0.7744, "step": 9411 }, { "epoch": 0.6377125821532624, "grad_norm": 6.715488910675049, "learning_rate": 8.814292559381204e-05, "loss": 0.7039, "step": 9412 }, { "epoch": 0.6377803374212345, "grad_norm": 6.482609272003174, "learning_rate": 8.814155657471422e-05, "loss": 0.6712, "step": 9413 }, { "epoch": 0.6378480926892066, "grad_norm": 6.715174198150635, "learning_rate": 8.81401875556164e-05, "loss": 0.899, "step": 9414 }, { "epoch": 0.6379158479571787, "grad_norm": 5.657138824462891, "learning_rate": 8.813881853651858e-05, "loss": 0.7513, "step": 9415 }, { "epoch": 0.6379836032251508, "grad_norm": 5.905570983886719, "learning_rate": 8.813744951742076e-05, "loss": 0.8074, "step": 9416 }, { "epoch": 0.6380513584931229, "grad_norm": 7.106529235839844, "learning_rate": 8.813608049832296e-05, "loss": 0.8908, "step": 9417 }, { "epoch": 0.638119113761095, "grad_norm": 6.900413990020752, "learning_rate": 8.813471147922514e-05, "loss": 0.8776, "step": 9418 }, { "epoch": 0.638186869029067, "grad_norm": 6.651158809661865, "learning_rate": 8.813334246012732e-05, "loss": 0.8336, "step": 9419 }, { "epoch": 0.638254624297039, "grad_norm": 8.57502269744873, "learning_rate": 8.81319734410295e-05, "loss": 1.0396, "step": 9420 }, { "epoch": 0.6383223795650111, "grad_norm": 4.871134281158447, "learning_rate": 8.813060442193169e-05, "loss": 0.7517, "step": 9421 }, { "epoch": 0.6383901348329832, "grad_norm": 10.170848846435547, "learning_rate": 8.812923540283387e-05, "loss": 0.9677, "step": 9422 }, { "epoch": 0.6384578901009553, "grad_norm": 6.10742712020874, "learning_rate": 8.812786638373605e-05, "loss": 0.9884, "step": 9423 }, { "epoch": 0.6385256453689274, "grad_norm": 5.653887748718262, "learning_rate": 8.812649736463823e-05, "loss": 0.7325, "step": 9424 }, { "epoch": 0.6385934006368995, "grad_norm": 6.501628398895264, "learning_rate": 8.812512834554041e-05, "loss": 0.8378, "step": 9425 }, { "epoch": 0.6386611559048716, "grad_norm": 5.696453094482422, "learning_rate": 8.812375932644261e-05, "loss": 0.787, "step": 9426 }, { "epoch": 0.6387289111728437, "grad_norm": 6.385643482208252, "learning_rate": 8.812239030734479e-05, "loss": 0.7208, "step": 9427 }, { "epoch": 0.6387966664408158, "grad_norm": 8.655128479003906, "learning_rate": 8.812102128824697e-05, "loss": 0.5881, "step": 9428 }, { "epoch": 0.6388644217087879, "grad_norm": 5.63440465927124, "learning_rate": 8.811965226914916e-05, "loss": 0.8304, "step": 9429 }, { "epoch": 0.63893217697676, "grad_norm": 7.352899074554443, "learning_rate": 8.811828325005134e-05, "loss": 0.9048, "step": 9430 }, { "epoch": 0.6389999322447321, "grad_norm": 6.372758865356445, "learning_rate": 8.811691423095352e-05, "loss": 0.8811, "step": 9431 }, { "epoch": 0.6390676875127042, "grad_norm": 8.284529685974121, "learning_rate": 8.811554521185572e-05, "loss": 1.0255, "step": 9432 }, { "epoch": 0.6391354427806762, "grad_norm": 4.690229892730713, "learning_rate": 8.81141761927579e-05, "loss": 0.6671, "step": 9433 }, { "epoch": 0.6392031980486483, "grad_norm": 5.906257629394531, "learning_rate": 8.811280717366008e-05, "loss": 1.0164, "step": 9434 }, { "epoch": 0.6392709533166203, "grad_norm": 5.012555122375488, "learning_rate": 8.811143815456227e-05, "loss": 0.5985, "step": 9435 }, { "epoch": 0.6393387085845924, "grad_norm": 9.228821754455566, "learning_rate": 8.811006913546445e-05, "loss": 0.9541, "step": 9436 }, { "epoch": 0.6394064638525645, "grad_norm": 6.516229152679443, "learning_rate": 8.810870011636663e-05, "loss": 1.0687, "step": 9437 }, { "epoch": 0.6394742191205366, "grad_norm": 6.953182220458984, "learning_rate": 8.810733109726881e-05, "loss": 0.6515, "step": 9438 }, { "epoch": 0.6395419743885087, "grad_norm": 5.848710536956787, "learning_rate": 8.810596207817099e-05, "loss": 0.9173, "step": 9439 }, { "epoch": 0.6396097296564808, "grad_norm": 6.093873023986816, "learning_rate": 8.810459305907319e-05, "loss": 0.7612, "step": 9440 }, { "epoch": 0.6396774849244529, "grad_norm": 6.799055099487305, "learning_rate": 8.810322403997537e-05, "loss": 0.9219, "step": 9441 }, { "epoch": 0.639745240192425, "grad_norm": 6.2249436378479, "learning_rate": 8.810185502087755e-05, "loss": 0.8459, "step": 9442 }, { "epoch": 0.639812995460397, "grad_norm": 9.087257385253906, "learning_rate": 8.810048600177973e-05, "loss": 0.7746, "step": 9443 }, { "epoch": 0.6398807507283691, "grad_norm": 5.2963433265686035, "learning_rate": 8.809911698268192e-05, "loss": 1.0891, "step": 9444 }, { "epoch": 0.6399485059963412, "grad_norm": 6.330840110778809, "learning_rate": 8.80977479635841e-05, "loss": 0.7877, "step": 9445 }, { "epoch": 0.6400162612643133, "grad_norm": 6.288755416870117, "learning_rate": 8.809637894448628e-05, "loss": 0.8538, "step": 9446 }, { "epoch": 0.6400840165322854, "grad_norm": 6.01615571975708, "learning_rate": 8.809500992538846e-05, "loss": 0.8595, "step": 9447 }, { "epoch": 0.6401517718002575, "grad_norm": 5.402866840362549, "learning_rate": 8.809364090629064e-05, "loss": 0.7067, "step": 9448 }, { "epoch": 0.6402195270682296, "grad_norm": 5.39243221282959, "learning_rate": 8.809227188719284e-05, "loss": 0.7097, "step": 9449 }, { "epoch": 0.6402872823362017, "grad_norm": 5.97304630279541, "learning_rate": 8.809090286809502e-05, "loss": 0.8014, "step": 9450 }, { "epoch": 0.6403550376041737, "grad_norm": 9.370304107666016, "learning_rate": 8.80895338489972e-05, "loss": 0.8021, "step": 9451 }, { "epoch": 0.6404227928721458, "grad_norm": 5.073042869567871, "learning_rate": 8.808816482989938e-05, "loss": 0.7241, "step": 9452 }, { "epoch": 0.6404905481401179, "grad_norm": 6.402749061584473, "learning_rate": 8.808679581080157e-05, "loss": 0.733, "step": 9453 }, { "epoch": 0.64055830340809, "grad_norm": 5.919348239898682, "learning_rate": 8.808542679170375e-05, "loss": 1.0033, "step": 9454 }, { "epoch": 0.640626058676062, "grad_norm": 5.996345520019531, "learning_rate": 8.808405777260593e-05, "loss": 0.5867, "step": 9455 }, { "epoch": 0.6406938139440341, "grad_norm": 6.826650619506836, "learning_rate": 8.808268875350811e-05, "loss": 1.1324, "step": 9456 }, { "epoch": 0.6407615692120062, "grad_norm": 5.9293999671936035, "learning_rate": 8.808131973441029e-05, "loss": 0.7688, "step": 9457 }, { "epoch": 0.6408293244799783, "grad_norm": 7.4283833503723145, "learning_rate": 8.807995071531249e-05, "loss": 1.0136, "step": 9458 }, { "epoch": 0.6408970797479504, "grad_norm": 6.848118782043457, "learning_rate": 8.807858169621467e-05, "loss": 0.8612, "step": 9459 }, { "epoch": 0.6409648350159225, "grad_norm": 5.884200096130371, "learning_rate": 8.807721267711685e-05, "loss": 0.7734, "step": 9460 }, { "epoch": 0.6410325902838946, "grad_norm": 6.456984996795654, "learning_rate": 8.807584365801903e-05, "loss": 0.899, "step": 9461 }, { "epoch": 0.6411003455518667, "grad_norm": 6.896454334259033, "learning_rate": 8.807447463892121e-05, "loss": 0.9756, "step": 9462 }, { "epoch": 0.6411681008198388, "grad_norm": 5.393690586090088, "learning_rate": 8.80731056198234e-05, "loss": 0.5639, "step": 9463 }, { "epoch": 0.6412358560878109, "grad_norm": 5.285966873168945, "learning_rate": 8.807173660072558e-05, "loss": 0.6522, "step": 9464 }, { "epoch": 0.641303611355783, "grad_norm": 5.970489501953125, "learning_rate": 8.807036758162776e-05, "loss": 0.6489, "step": 9465 }, { "epoch": 0.641371366623755, "grad_norm": 6.003796100616455, "learning_rate": 8.806899856252994e-05, "loss": 0.8582, "step": 9466 }, { "epoch": 0.641439121891727, "grad_norm": 5.609601974487305, "learning_rate": 8.806762954343214e-05, "loss": 0.6631, "step": 9467 }, { "epoch": 0.6415068771596991, "grad_norm": 6.617072582244873, "learning_rate": 8.806626052433432e-05, "loss": 0.8057, "step": 9468 }, { "epoch": 0.6415746324276712, "grad_norm": 5.329599857330322, "learning_rate": 8.80648915052365e-05, "loss": 0.7941, "step": 9469 }, { "epoch": 0.6416423876956433, "grad_norm": 6.617512226104736, "learning_rate": 8.806352248613868e-05, "loss": 0.7594, "step": 9470 }, { "epoch": 0.6417101429636154, "grad_norm": 5.257902145385742, "learning_rate": 8.806215346704086e-05, "loss": 0.8737, "step": 9471 }, { "epoch": 0.6417778982315875, "grad_norm": 8.781728744506836, "learning_rate": 8.806078444794305e-05, "loss": 0.9662, "step": 9472 }, { "epoch": 0.6418456534995596, "grad_norm": 5.8004889488220215, "learning_rate": 8.805941542884523e-05, "loss": 0.8442, "step": 9473 }, { "epoch": 0.6419134087675317, "grad_norm": 7.527797698974609, "learning_rate": 8.805804640974741e-05, "loss": 0.8849, "step": 9474 }, { "epoch": 0.6419811640355038, "grad_norm": 6.8633952140808105, "learning_rate": 8.80566773906496e-05, "loss": 0.9317, "step": 9475 }, { "epoch": 0.6420489193034759, "grad_norm": 5.778538703918457, "learning_rate": 8.805530837155179e-05, "loss": 0.7787, "step": 9476 }, { "epoch": 0.6421166745714479, "grad_norm": 5.698131084442139, "learning_rate": 8.805393935245397e-05, "loss": 0.7335, "step": 9477 }, { "epoch": 0.64218442983942, "grad_norm": 7.117033958435059, "learning_rate": 8.805257033335615e-05, "loss": 0.9524, "step": 9478 }, { "epoch": 0.6422521851073921, "grad_norm": 7.394477367401123, "learning_rate": 8.805120131425834e-05, "loss": 0.8617, "step": 9479 }, { "epoch": 0.6423199403753642, "grad_norm": 6.299468517303467, "learning_rate": 8.804983229516052e-05, "loss": 0.7959, "step": 9480 }, { "epoch": 0.6423876956433363, "grad_norm": 5.769999027252197, "learning_rate": 8.80484632760627e-05, "loss": 0.8217, "step": 9481 }, { "epoch": 0.6424554509113084, "grad_norm": 6.29069709777832, "learning_rate": 8.80470942569649e-05, "loss": 0.8485, "step": 9482 }, { "epoch": 0.6425232061792805, "grad_norm": 6.773715496063232, "learning_rate": 8.804572523786708e-05, "loss": 0.9625, "step": 9483 }, { "epoch": 0.6425909614472525, "grad_norm": 5.882883071899414, "learning_rate": 8.804435621876926e-05, "loss": 0.977, "step": 9484 }, { "epoch": 0.6426587167152246, "grad_norm": 5.250417709350586, "learning_rate": 8.804298719967145e-05, "loss": 0.844, "step": 9485 }, { "epoch": 0.6427264719831967, "grad_norm": 6.4407267570495605, "learning_rate": 8.804161818057363e-05, "loss": 0.749, "step": 9486 }, { "epoch": 0.6427942272511687, "grad_norm": 5.549936294555664, "learning_rate": 8.804024916147581e-05, "loss": 0.6762, "step": 9487 }, { "epoch": 0.6428619825191408, "grad_norm": 4.843740463256836, "learning_rate": 8.803888014237799e-05, "loss": 0.6318, "step": 9488 }, { "epoch": 0.6429297377871129, "grad_norm": 8.922931671142578, "learning_rate": 8.803751112328017e-05, "loss": 0.943, "step": 9489 }, { "epoch": 0.642997493055085, "grad_norm": 5.983587265014648, "learning_rate": 8.803614210418237e-05, "loss": 0.6995, "step": 9490 }, { "epoch": 0.6430652483230571, "grad_norm": 10.325361251831055, "learning_rate": 8.803477308508455e-05, "loss": 0.6907, "step": 9491 }, { "epoch": 0.6431330035910292, "grad_norm": 7.284884452819824, "learning_rate": 8.803340406598673e-05, "loss": 0.8517, "step": 9492 }, { "epoch": 0.6432007588590013, "grad_norm": 5.540932655334473, "learning_rate": 8.803203504688891e-05, "loss": 0.6415, "step": 9493 }, { "epoch": 0.6432685141269734, "grad_norm": 8.60664176940918, "learning_rate": 8.803066602779109e-05, "loss": 0.8818, "step": 9494 }, { "epoch": 0.6433362693949455, "grad_norm": 5.478827476501465, "learning_rate": 8.802929700869328e-05, "loss": 0.5589, "step": 9495 }, { "epoch": 0.6434040246629176, "grad_norm": 6.415865421295166, "learning_rate": 8.802792798959546e-05, "loss": 0.748, "step": 9496 }, { "epoch": 0.6434717799308897, "grad_norm": 7.106724262237549, "learning_rate": 8.802655897049764e-05, "loss": 1.0591, "step": 9497 }, { "epoch": 0.6435395351988618, "grad_norm": 7.584676742553711, "learning_rate": 8.802518995139982e-05, "loss": 0.924, "step": 9498 }, { "epoch": 0.6436072904668338, "grad_norm": 7.438377380371094, "learning_rate": 8.802382093230202e-05, "loss": 1.0451, "step": 9499 }, { "epoch": 0.6436750457348058, "grad_norm": 6.775513648986816, "learning_rate": 8.80224519132042e-05, "loss": 0.8273, "step": 9500 }, { "epoch": 0.6437428010027779, "grad_norm": 8.242133140563965, "learning_rate": 8.802108289410638e-05, "loss": 1.0045, "step": 9501 }, { "epoch": 0.64381055627075, "grad_norm": 6.122509956359863, "learning_rate": 8.801971387500856e-05, "loss": 0.8026, "step": 9502 }, { "epoch": 0.6438783115387221, "grad_norm": 5.884267330169678, "learning_rate": 8.801834485591074e-05, "loss": 0.7935, "step": 9503 }, { "epoch": 0.6439460668066942, "grad_norm": 5.653980731964111, "learning_rate": 8.801697583681293e-05, "loss": 0.7223, "step": 9504 }, { "epoch": 0.6440138220746663, "grad_norm": 9.43459701538086, "learning_rate": 8.801560681771511e-05, "loss": 0.7211, "step": 9505 }, { "epoch": 0.6440815773426384, "grad_norm": 6.32805061340332, "learning_rate": 8.801423779861729e-05, "loss": 0.5007, "step": 9506 }, { "epoch": 0.6441493326106105, "grad_norm": 5.424881458282471, "learning_rate": 8.801286877951947e-05, "loss": 0.8556, "step": 9507 }, { "epoch": 0.6442170878785826, "grad_norm": 6.115161418914795, "learning_rate": 8.801149976042167e-05, "loss": 1.068, "step": 9508 }, { "epoch": 0.6442848431465547, "grad_norm": 6.05783748626709, "learning_rate": 8.801013074132385e-05, "loss": 0.7372, "step": 9509 }, { "epoch": 0.6443525984145267, "grad_norm": 5.965503692626953, "learning_rate": 8.800876172222603e-05, "loss": 0.7439, "step": 9510 }, { "epoch": 0.6444203536824988, "grad_norm": 6.0100483894348145, "learning_rate": 8.800739270312821e-05, "loss": 0.8335, "step": 9511 }, { "epoch": 0.6444881089504709, "grad_norm": 4.494331359863281, "learning_rate": 8.800602368403039e-05, "loss": 0.7585, "step": 9512 }, { "epoch": 0.644555864218443, "grad_norm": 6.9478631019592285, "learning_rate": 8.800465466493258e-05, "loss": 0.9518, "step": 9513 }, { "epoch": 0.6446236194864151, "grad_norm": 6.849409103393555, "learning_rate": 8.800328564583476e-05, "loss": 0.9584, "step": 9514 }, { "epoch": 0.6446913747543872, "grad_norm": 5.837069034576416, "learning_rate": 8.800191662673694e-05, "loss": 0.7976, "step": 9515 }, { "epoch": 0.6447591300223592, "grad_norm": 8.214330673217773, "learning_rate": 8.800054760763912e-05, "loss": 0.6255, "step": 9516 }, { "epoch": 0.6448268852903313, "grad_norm": 5.174075126647949, "learning_rate": 8.79991785885413e-05, "loss": 0.5772, "step": 9517 }, { "epoch": 0.6448946405583034, "grad_norm": 5.130014896392822, "learning_rate": 8.79978095694435e-05, "loss": 0.8923, "step": 9518 }, { "epoch": 0.6449623958262755, "grad_norm": 6.434813499450684, "learning_rate": 8.799644055034568e-05, "loss": 0.9681, "step": 9519 }, { "epoch": 0.6450301510942476, "grad_norm": 6.135867118835449, "learning_rate": 8.799507153124786e-05, "loss": 0.7337, "step": 9520 }, { "epoch": 0.6450979063622196, "grad_norm": 5.493929386138916, "learning_rate": 8.799370251215004e-05, "loss": 0.6834, "step": 9521 }, { "epoch": 0.6451656616301917, "grad_norm": 7.558812618255615, "learning_rate": 8.799233349305223e-05, "loss": 1.0415, "step": 9522 }, { "epoch": 0.6452334168981638, "grad_norm": 6.610668659210205, "learning_rate": 8.799096447395441e-05, "loss": 0.6631, "step": 9523 }, { "epoch": 0.6453011721661359, "grad_norm": 6.513849258422852, "learning_rate": 8.798959545485659e-05, "loss": 0.7852, "step": 9524 }, { "epoch": 0.645368927434108, "grad_norm": 5.489187717437744, "learning_rate": 8.798822643575879e-05, "loss": 0.8824, "step": 9525 }, { "epoch": 0.6454366827020801, "grad_norm": 6.717770576477051, "learning_rate": 8.798685741666097e-05, "loss": 0.8239, "step": 9526 }, { "epoch": 0.6455044379700522, "grad_norm": 5.649533271789551, "learning_rate": 8.798548839756315e-05, "loss": 0.7458, "step": 9527 }, { "epoch": 0.6455721932380243, "grad_norm": 4.854784965515137, "learning_rate": 8.798411937846534e-05, "loss": 0.6289, "step": 9528 }, { "epoch": 0.6456399485059964, "grad_norm": 7.124615669250488, "learning_rate": 8.798275035936752e-05, "loss": 0.8035, "step": 9529 }, { "epoch": 0.6457077037739685, "grad_norm": 7.39643669128418, "learning_rate": 8.79813813402697e-05, "loss": 0.776, "step": 9530 }, { "epoch": 0.6457754590419406, "grad_norm": 5.295919418334961, "learning_rate": 8.79800123211719e-05, "loss": 0.8542, "step": 9531 }, { "epoch": 0.6458432143099126, "grad_norm": 6.795876502990723, "learning_rate": 8.797864330207408e-05, "loss": 0.8777, "step": 9532 }, { "epoch": 0.6459109695778846, "grad_norm": 6.2083001136779785, "learning_rate": 8.797727428297626e-05, "loss": 1.0176, "step": 9533 }, { "epoch": 0.6459787248458567, "grad_norm": 7.269214153289795, "learning_rate": 8.797590526387844e-05, "loss": 1.0724, "step": 9534 }, { "epoch": 0.6460464801138288, "grad_norm": 5.762153625488281, "learning_rate": 8.797453624478062e-05, "loss": 0.7247, "step": 9535 }, { "epoch": 0.6461142353818009, "grad_norm": 5.321116924285889, "learning_rate": 8.797316722568281e-05, "loss": 0.6601, "step": 9536 }, { "epoch": 0.646181990649773, "grad_norm": 5.3908586502075195, "learning_rate": 8.797179820658499e-05, "loss": 0.5765, "step": 9537 }, { "epoch": 0.6462497459177451, "grad_norm": 6.827112197875977, "learning_rate": 8.797042918748717e-05, "loss": 0.8915, "step": 9538 }, { "epoch": 0.6463175011857172, "grad_norm": 6.380746841430664, "learning_rate": 8.796906016838935e-05, "loss": 0.8016, "step": 9539 }, { "epoch": 0.6463852564536893, "grad_norm": 6.202441215515137, "learning_rate": 8.796769114929155e-05, "loss": 0.8524, "step": 9540 }, { "epoch": 0.6464530117216614, "grad_norm": 7.0272955894470215, "learning_rate": 8.796632213019373e-05, "loss": 0.7491, "step": 9541 }, { "epoch": 0.6465207669896335, "grad_norm": 5.641077995300293, "learning_rate": 8.79649531110959e-05, "loss": 0.6814, "step": 9542 }, { "epoch": 0.6465885222576055, "grad_norm": 5.427177429199219, "learning_rate": 8.796358409199809e-05, "loss": 0.6764, "step": 9543 }, { "epoch": 0.6466562775255776, "grad_norm": 5.034342288970947, "learning_rate": 8.796221507290027e-05, "loss": 0.7099, "step": 9544 }, { "epoch": 0.6467240327935497, "grad_norm": 7.89331579208374, "learning_rate": 8.796084605380246e-05, "loss": 0.7996, "step": 9545 }, { "epoch": 0.6467917880615218, "grad_norm": 7.4129767417907715, "learning_rate": 8.795947703470464e-05, "loss": 0.6799, "step": 9546 }, { "epoch": 0.6468595433294939, "grad_norm": 5.55999755859375, "learning_rate": 8.795810801560682e-05, "loss": 0.858, "step": 9547 }, { "epoch": 0.646927298597466, "grad_norm": 5.925971031188965, "learning_rate": 8.7956738996509e-05, "loss": 0.7046, "step": 9548 }, { "epoch": 0.646995053865438, "grad_norm": 5.659457206726074, "learning_rate": 8.795536997741118e-05, "loss": 0.8139, "step": 9549 }, { "epoch": 0.6470628091334101, "grad_norm": 6.554767608642578, "learning_rate": 8.795400095831338e-05, "loss": 1.0621, "step": 9550 }, { "epoch": 0.6471305644013822, "grad_norm": 5.473086357116699, "learning_rate": 8.795263193921556e-05, "loss": 0.8589, "step": 9551 }, { "epoch": 0.6471983196693543, "grad_norm": 5.166335582733154, "learning_rate": 8.795126292011774e-05, "loss": 0.7418, "step": 9552 }, { "epoch": 0.6472660749373264, "grad_norm": 10.33619213104248, "learning_rate": 8.794989390101992e-05, "loss": 0.7379, "step": 9553 }, { "epoch": 0.6473338302052984, "grad_norm": 8.167377471923828, "learning_rate": 8.794852488192211e-05, "loss": 1.0573, "step": 9554 }, { "epoch": 0.6474015854732705, "grad_norm": 9.861980438232422, "learning_rate": 8.794715586282429e-05, "loss": 0.9429, "step": 9555 }, { "epoch": 0.6474693407412426, "grad_norm": 6.154029369354248, "learning_rate": 8.794578684372647e-05, "loss": 0.9017, "step": 9556 }, { "epoch": 0.6475370960092147, "grad_norm": 5.818716526031494, "learning_rate": 8.794441782462865e-05, "loss": 0.8584, "step": 9557 }, { "epoch": 0.6476048512771868, "grad_norm": 7.98982048034668, "learning_rate": 8.794304880553083e-05, "loss": 0.8423, "step": 9558 }, { "epoch": 0.6476726065451589, "grad_norm": 6.663909435272217, "learning_rate": 8.794167978643303e-05, "loss": 1.0512, "step": 9559 }, { "epoch": 0.647740361813131, "grad_norm": 4.941400051116943, "learning_rate": 8.794031076733521e-05, "loss": 0.6979, "step": 9560 }, { "epoch": 0.6478081170811031, "grad_norm": 6.668032169342041, "learning_rate": 8.793894174823739e-05, "loss": 0.9286, "step": 9561 }, { "epoch": 0.6478758723490752, "grad_norm": 6.0007100105285645, "learning_rate": 8.793757272913957e-05, "loss": 1.0924, "step": 9562 }, { "epoch": 0.6479436276170473, "grad_norm": 5.80806303024292, "learning_rate": 8.793620371004176e-05, "loss": 0.8201, "step": 9563 }, { "epoch": 0.6480113828850194, "grad_norm": 5.381270408630371, "learning_rate": 8.793483469094394e-05, "loss": 0.813, "step": 9564 }, { "epoch": 0.6480791381529913, "grad_norm": 6.4966721534729, "learning_rate": 8.793346567184612e-05, "loss": 1.0333, "step": 9565 }, { "epoch": 0.6481468934209634, "grad_norm": 6.251739978790283, "learning_rate": 8.79320966527483e-05, "loss": 0.8608, "step": 9566 }, { "epoch": 0.6482146486889355, "grad_norm": 6.237257957458496, "learning_rate": 8.793072763365048e-05, "loss": 0.713, "step": 9567 }, { "epoch": 0.6482824039569076, "grad_norm": 5.686729907989502, "learning_rate": 8.792935861455268e-05, "loss": 0.7128, "step": 9568 }, { "epoch": 0.6483501592248797, "grad_norm": 7.520707130432129, "learning_rate": 8.792798959545486e-05, "loss": 0.8688, "step": 9569 }, { "epoch": 0.6484179144928518, "grad_norm": 4.83900785446167, "learning_rate": 8.792662057635704e-05, "loss": 0.5782, "step": 9570 }, { "epoch": 0.6484856697608239, "grad_norm": 8.14783763885498, "learning_rate": 8.792525155725923e-05, "loss": 0.5705, "step": 9571 }, { "epoch": 0.648553425028796, "grad_norm": 5.105820655822754, "learning_rate": 8.792388253816141e-05, "loss": 0.7871, "step": 9572 }, { "epoch": 0.6486211802967681, "grad_norm": 5.665055274963379, "learning_rate": 8.792251351906359e-05, "loss": 0.8448, "step": 9573 }, { "epoch": 0.6486889355647402, "grad_norm": 7.109708309173584, "learning_rate": 8.792114449996579e-05, "loss": 0.6738, "step": 9574 }, { "epoch": 0.6487566908327123, "grad_norm": 6.253261089324951, "learning_rate": 8.791977548086797e-05, "loss": 0.6266, "step": 9575 }, { "epoch": 0.6488244461006843, "grad_norm": 7.64295768737793, "learning_rate": 8.791840646177015e-05, "loss": 1.0729, "step": 9576 }, { "epoch": 0.6488922013686564, "grad_norm": 4.579455375671387, "learning_rate": 8.791703744267234e-05, "loss": 0.7472, "step": 9577 }, { "epoch": 0.6489599566366285, "grad_norm": 6.2152791023254395, "learning_rate": 8.791566842357452e-05, "loss": 0.8086, "step": 9578 }, { "epoch": 0.6490277119046006, "grad_norm": 6.039999008178711, "learning_rate": 8.79142994044767e-05, "loss": 0.6148, "step": 9579 }, { "epoch": 0.6490954671725727, "grad_norm": 6.458342552185059, "learning_rate": 8.791293038537888e-05, "loss": 0.9861, "step": 9580 }, { "epoch": 0.6491632224405448, "grad_norm": 7.025355815887451, "learning_rate": 8.791156136628106e-05, "loss": 0.9073, "step": 9581 }, { "epoch": 0.6492309777085168, "grad_norm": 5.3977484703063965, "learning_rate": 8.791019234718326e-05, "loss": 0.7138, "step": 9582 }, { "epoch": 0.6492987329764889, "grad_norm": 5.696099281311035, "learning_rate": 8.790882332808544e-05, "loss": 0.952, "step": 9583 }, { "epoch": 0.649366488244461, "grad_norm": 5.6307501792907715, "learning_rate": 8.790745430898762e-05, "loss": 0.8517, "step": 9584 }, { "epoch": 0.6494342435124331, "grad_norm": 8.366070747375488, "learning_rate": 8.79060852898898e-05, "loss": 0.9537, "step": 9585 }, { "epoch": 0.6495019987804052, "grad_norm": 5.901167392730713, "learning_rate": 8.790471627079199e-05, "loss": 0.8325, "step": 9586 }, { "epoch": 0.6495697540483772, "grad_norm": 11.905413627624512, "learning_rate": 8.790334725169417e-05, "loss": 0.8988, "step": 9587 }, { "epoch": 0.6496375093163493, "grad_norm": 7.553168773651123, "learning_rate": 8.790197823259635e-05, "loss": 0.8318, "step": 9588 }, { "epoch": 0.6497052645843214, "grad_norm": 6.661622047424316, "learning_rate": 8.790060921349853e-05, "loss": 0.7276, "step": 9589 }, { "epoch": 0.6497730198522935, "grad_norm": 5.579341411590576, "learning_rate": 8.789924019440071e-05, "loss": 0.8388, "step": 9590 }, { "epoch": 0.6498407751202656, "grad_norm": 6.08758020401001, "learning_rate": 8.78978711753029e-05, "loss": 0.9075, "step": 9591 }, { "epoch": 0.6499085303882377, "grad_norm": 7.589080810546875, "learning_rate": 8.789650215620509e-05, "loss": 0.7852, "step": 9592 }, { "epoch": 0.6499762856562098, "grad_norm": 6.479293346405029, "learning_rate": 8.789513313710727e-05, "loss": 0.768, "step": 9593 }, { "epoch": 0.6500440409241819, "grad_norm": 5.796462535858154, "learning_rate": 8.789376411800945e-05, "loss": 0.7431, "step": 9594 }, { "epoch": 0.650111796192154, "grad_norm": 5.195106506347656, "learning_rate": 8.789239509891163e-05, "loss": 0.9473, "step": 9595 }, { "epoch": 0.6501795514601261, "grad_norm": 5.687699317932129, "learning_rate": 8.789102607981382e-05, "loss": 0.8332, "step": 9596 }, { "epoch": 0.6502473067280982, "grad_norm": 5.329159259796143, "learning_rate": 8.7889657060716e-05, "loss": 0.8068, "step": 9597 }, { "epoch": 0.6503150619960701, "grad_norm": 7.383645057678223, "learning_rate": 8.788828804161818e-05, "loss": 0.9984, "step": 9598 }, { "epoch": 0.6503828172640422, "grad_norm": 7.364968776702881, "learning_rate": 8.788691902252036e-05, "loss": 0.7574, "step": 9599 }, { "epoch": 0.6504505725320143, "grad_norm": 6.2040886878967285, "learning_rate": 8.788555000342256e-05, "loss": 0.6552, "step": 9600 }, { "epoch": 0.6505183277999864, "grad_norm": 10.075439453125, "learning_rate": 8.788418098432474e-05, "loss": 0.659, "step": 9601 }, { "epoch": 0.6505860830679585, "grad_norm": 6.742908954620361, "learning_rate": 8.788281196522692e-05, "loss": 0.6485, "step": 9602 }, { "epoch": 0.6506538383359306, "grad_norm": 6.678378582000732, "learning_rate": 8.78814429461291e-05, "loss": 0.7113, "step": 9603 }, { "epoch": 0.6507215936039027, "grad_norm": 5.988241195678711, "learning_rate": 8.788007392703128e-05, "loss": 0.8417, "step": 9604 }, { "epoch": 0.6507893488718748, "grad_norm": 6.3953704833984375, "learning_rate": 8.787870490793347e-05, "loss": 0.8448, "step": 9605 }, { "epoch": 0.6508571041398469, "grad_norm": 11.006570816040039, "learning_rate": 8.787733588883565e-05, "loss": 0.9258, "step": 9606 }, { "epoch": 0.650924859407819, "grad_norm": 7.086367607116699, "learning_rate": 8.787596686973783e-05, "loss": 0.86, "step": 9607 }, { "epoch": 0.6509926146757911, "grad_norm": 5.534137725830078, "learning_rate": 8.787459785064001e-05, "loss": 0.7984, "step": 9608 }, { "epoch": 0.6510603699437632, "grad_norm": 6.710955619812012, "learning_rate": 8.78732288315422e-05, "loss": 0.9852, "step": 9609 }, { "epoch": 0.6511281252117352, "grad_norm": 7.151968479156494, "learning_rate": 8.787185981244439e-05, "loss": 0.7383, "step": 9610 }, { "epoch": 0.6511958804797073, "grad_norm": 6.063338279724121, "learning_rate": 8.787049079334657e-05, "loss": 0.8971, "step": 9611 }, { "epoch": 0.6512636357476794, "grad_norm": 7.058009147644043, "learning_rate": 8.786912177424875e-05, "loss": 0.9314, "step": 9612 }, { "epoch": 0.6513313910156515, "grad_norm": 8.007342338562012, "learning_rate": 8.786775275515093e-05, "loss": 0.6878, "step": 9613 }, { "epoch": 0.6513991462836235, "grad_norm": 6.176419734954834, "learning_rate": 8.786638373605312e-05, "loss": 0.8863, "step": 9614 }, { "epoch": 0.6514669015515956, "grad_norm": 5.785898685455322, "learning_rate": 8.78650147169553e-05, "loss": 1.0512, "step": 9615 }, { "epoch": 0.6515346568195677, "grad_norm": 5.100857734680176, "learning_rate": 8.786364569785748e-05, "loss": 0.6969, "step": 9616 }, { "epoch": 0.6516024120875398, "grad_norm": 6.292816638946533, "learning_rate": 8.786227667875968e-05, "loss": 0.8185, "step": 9617 }, { "epoch": 0.6516701673555119, "grad_norm": 6.242623805999756, "learning_rate": 8.786090765966186e-05, "loss": 0.6833, "step": 9618 }, { "epoch": 0.651737922623484, "grad_norm": 7.2665791511535645, "learning_rate": 8.785953864056404e-05, "loss": 1.0487, "step": 9619 }, { "epoch": 0.651805677891456, "grad_norm": 5.973618507385254, "learning_rate": 8.785816962146623e-05, "loss": 0.8992, "step": 9620 }, { "epoch": 0.6518734331594281, "grad_norm": 8.300751686096191, "learning_rate": 8.785680060236841e-05, "loss": 0.9334, "step": 9621 }, { "epoch": 0.6519411884274002, "grad_norm": 5.687087535858154, "learning_rate": 8.785543158327059e-05, "loss": 0.8131, "step": 9622 }, { "epoch": 0.6520089436953723, "grad_norm": 5.419602870941162, "learning_rate": 8.785406256417279e-05, "loss": 0.6697, "step": 9623 }, { "epoch": 0.6520766989633444, "grad_norm": 7.112249851226807, "learning_rate": 8.785269354507497e-05, "loss": 0.8477, "step": 9624 }, { "epoch": 0.6521444542313165, "grad_norm": 5.755582809448242, "learning_rate": 8.785132452597715e-05, "loss": 0.6766, "step": 9625 }, { "epoch": 0.6522122094992886, "grad_norm": 5.415201187133789, "learning_rate": 8.784995550687933e-05, "loss": 0.6871, "step": 9626 }, { "epoch": 0.6522799647672607, "grad_norm": 5.576985836029053, "learning_rate": 8.78485864877815e-05, "loss": 0.9001, "step": 9627 }, { "epoch": 0.6523477200352328, "grad_norm": 5.954935073852539, "learning_rate": 8.78472174686837e-05, "loss": 0.6694, "step": 9628 }, { "epoch": 0.6524154753032049, "grad_norm": 5.59356164932251, "learning_rate": 8.784584844958588e-05, "loss": 0.74, "step": 9629 }, { "epoch": 0.652483230571177, "grad_norm": 5.1607346534729, "learning_rate": 8.784447943048806e-05, "loss": 0.7869, "step": 9630 }, { "epoch": 0.652550985839149, "grad_norm": 6.959099769592285, "learning_rate": 8.784311041139024e-05, "loss": 0.8096, "step": 9631 }, { "epoch": 0.652618741107121, "grad_norm": 6.869060516357422, "learning_rate": 8.784174139229244e-05, "loss": 0.9917, "step": 9632 }, { "epoch": 0.6526864963750931, "grad_norm": 6.426820278167725, "learning_rate": 8.784037237319462e-05, "loss": 0.718, "step": 9633 }, { "epoch": 0.6527542516430652, "grad_norm": 5.208813667297363, "learning_rate": 8.78390033540968e-05, "loss": 0.651, "step": 9634 }, { "epoch": 0.6528220069110373, "grad_norm": 6.748283863067627, "learning_rate": 8.783763433499898e-05, "loss": 1.0808, "step": 9635 }, { "epoch": 0.6528897621790094, "grad_norm": 6.567190647125244, "learning_rate": 8.783626531590116e-05, "loss": 0.9373, "step": 9636 }, { "epoch": 0.6529575174469815, "grad_norm": 5.621391773223877, "learning_rate": 8.783489629680335e-05, "loss": 0.9382, "step": 9637 }, { "epoch": 0.6530252727149536, "grad_norm": 7.807063102722168, "learning_rate": 8.783352727770553e-05, "loss": 0.8237, "step": 9638 }, { "epoch": 0.6530930279829257, "grad_norm": 6.320357322692871, "learning_rate": 8.783215825860771e-05, "loss": 0.8245, "step": 9639 }, { "epoch": 0.6531607832508978, "grad_norm": 6.225609302520752, "learning_rate": 8.783078923950989e-05, "loss": 0.932, "step": 9640 }, { "epoch": 0.6532285385188699, "grad_norm": 5.9908366203308105, "learning_rate": 8.782942022041209e-05, "loss": 0.5793, "step": 9641 }, { "epoch": 0.653296293786842, "grad_norm": 8.215045928955078, "learning_rate": 8.782805120131427e-05, "loss": 0.7771, "step": 9642 }, { "epoch": 0.653364049054814, "grad_norm": 5.830068588256836, "learning_rate": 8.782668218221645e-05, "loss": 0.8499, "step": 9643 }, { "epoch": 0.6534318043227861, "grad_norm": 5.979434013366699, "learning_rate": 8.782531316311863e-05, "loss": 0.7335, "step": 9644 }, { "epoch": 0.6534995595907582, "grad_norm": 6.369264125823975, "learning_rate": 8.782394414402081e-05, "loss": 0.994, "step": 9645 }, { "epoch": 0.6535673148587303, "grad_norm": 6.8189287185668945, "learning_rate": 8.7822575124923e-05, "loss": 0.7789, "step": 9646 }, { "epoch": 0.6536350701267023, "grad_norm": 5.990640163421631, "learning_rate": 8.782120610582518e-05, "loss": 0.8369, "step": 9647 }, { "epoch": 0.6537028253946744, "grad_norm": 6.319588661193848, "learning_rate": 8.781983708672736e-05, "loss": 0.9394, "step": 9648 }, { "epoch": 0.6537705806626465, "grad_norm": 5.965325355529785, "learning_rate": 8.781846806762954e-05, "loss": 1.0256, "step": 9649 }, { "epoch": 0.6538383359306186, "grad_norm": 6.193709373474121, "learning_rate": 8.781709904853172e-05, "loss": 0.7261, "step": 9650 }, { "epoch": 0.6539060911985907, "grad_norm": 6.463015079498291, "learning_rate": 8.781573002943392e-05, "loss": 0.6503, "step": 9651 }, { "epoch": 0.6539738464665628, "grad_norm": 5.9288554191589355, "learning_rate": 8.78143610103361e-05, "loss": 0.7345, "step": 9652 }, { "epoch": 0.6540416017345348, "grad_norm": 6.894443035125732, "learning_rate": 8.781299199123828e-05, "loss": 0.7115, "step": 9653 }, { "epoch": 0.6541093570025069, "grad_norm": 5.954974174499512, "learning_rate": 8.781162297214046e-05, "loss": 0.8184, "step": 9654 }, { "epoch": 0.654177112270479, "grad_norm": 7.200540065765381, "learning_rate": 8.781025395304265e-05, "loss": 0.7661, "step": 9655 }, { "epoch": 0.6542448675384511, "grad_norm": 5.560801982879639, "learning_rate": 8.780888493394483e-05, "loss": 0.6097, "step": 9656 }, { "epoch": 0.6543126228064232, "grad_norm": 9.168293952941895, "learning_rate": 8.780751591484701e-05, "loss": 1.0032, "step": 9657 }, { "epoch": 0.6543803780743953, "grad_norm": 6.599915027618408, "learning_rate": 8.780614689574919e-05, "loss": 0.9359, "step": 9658 }, { "epoch": 0.6544481333423674, "grad_norm": 6.528720855712891, "learning_rate": 8.780477787665137e-05, "loss": 0.8039, "step": 9659 }, { "epoch": 0.6545158886103395, "grad_norm": 7.300596714019775, "learning_rate": 8.780340885755357e-05, "loss": 0.823, "step": 9660 }, { "epoch": 0.6545836438783116, "grad_norm": 6.100801467895508, "learning_rate": 8.780203983845575e-05, "loss": 0.7662, "step": 9661 }, { "epoch": 0.6546513991462837, "grad_norm": 6.72039270401001, "learning_rate": 8.780067081935793e-05, "loss": 0.8888, "step": 9662 }, { "epoch": 0.6547191544142557, "grad_norm": 5.6597065925598145, "learning_rate": 8.779930180026012e-05, "loss": 0.7939, "step": 9663 }, { "epoch": 0.6547869096822277, "grad_norm": 5.955264568328857, "learning_rate": 8.77979327811623e-05, "loss": 0.8051, "step": 9664 }, { "epoch": 0.6548546649501998, "grad_norm": 6.064132213592529, "learning_rate": 8.779656376206448e-05, "loss": 0.6377, "step": 9665 }, { "epoch": 0.6549224202181719, "grad_norm": 6.713923931121826, "learning_rate": 8.779519474296668e-05, "loss": 0.9388, "step": 9666 }, { "epoch": 0.654990175486144, "grad_norm": 8.160643577575684, "learning_rate": 8.779382572386886e-05, "loss": 1.0358, "step": 9667 }, { "epoch": 0.6550579307541161, "grad_norm": 7.116758346557617, "learning_rate": 8.779245670477104e-05, "loss": 0.9486, "step": 9668 }, { "epoch": 0.6551256860220882, "grad_norm": 9.230103492736816, "learning_rate": 8.779108768567323e-05, "loss": 0.5998, "step": 9669 }, { "epoch": 0.6551934412900603, "grad_norm": 7.513571739196777, "learning_rate": 8.778971866657541e-05, "loss": 1.0098, "step": 9670 }, { "epoch": 0.6552611965580324, "grad_norm": 6.080422878265381, "learning_rate": 8.778834964747759e-05, "loss": 0.7787, "step": 9671 }, { "epoch": 0.6553289518260045, "grad_norm": 6.582731246948242, "learning_rate": 8.778698062837977e-05, "loss": 0.9038, "step": 9672 }, { "epoch": 0.6553967070939766, "grad_norm": 6.8090620040893555, "learning_rate": 8.778561160928196e-05, "loss": 0.7586, "step": 9673 }, { "epoch": 0.6554644623619487, "grad_norm": 8.279650688171387, "learning_rate": 8.778424259018415e-05, "loss": 0.8105, "step": 9674 }, { "epoch": 0.6555322176299208, "grad_norm": 5.152581691741943, "learning_rate": 8.778287357108633e-05, "loss": 0.7084, "step": 9675 }, { "epoch": 0.6555999728978928, "grad_norm": 6.42199182510376, "learning_rate": 8.77815045519885e-05, "loss": 0.7351, "step": 9676 }, { "epoch": 0.6556677281658649, "grad_norm": 6.543702125549316, "learning_rate": 8.778013553289069e-05, "loss": 0.8382, "step": 9677 }, { "epoch": 0.655735483433837, "grad_norm": 4.9357171058654785, "learning_rate": 8.777876651379288e-05, "loss": 0.8202, "step": 9678 }, { "epoch": 0.655803238701809, "grad_norm": 5.525670051574707, "learning_rate": 8.777739749469506e-05, "loss": 0.7993, "step": 9679 }, { "epoch": 0.6558709939697811, "grad_norm": 6.35006046295166, "learning_rate": 8.777602847559724e-05, "loss": 0.925, "step": 9680 }, { "epoch": 0.6559387492377532, "grad_norm": 6.477513790130615, "learning_rate": 8.777465945649942e-05, "loss": 0.8741, "step": 9681 }, { "epoch": 0.6560065045057253, "grad_norm": 6.017436981201172, "learning_rate": 8.77732904374016e-05, "loss": 0.8689, "step": 9682 }, { "epoch": 0.6560742597736974, "grad_norm": 5.3170599937438965, "learning_rate": 8.77719214183038e-05, "loss": 0.6794, "step": 9683 }, { "epoch": 0.6561420150416695, "grad_norm": 7.529482841491699, "learning_rate": 8.777055239920598e-05, "loss": 1.0313, "step": 9684 }, { "epoch": 0.6562097703096416, "grad_norm": 5.189818859100342, "learning_rate": 8.776918338010816e-05, "loss": 0.7463, "step": 9685 }, { "epoch": 0.6562775255776137, "grad_norm": 6.473649024963379, "learning_rate": 8.776781436101034e-05, "loss": 0.9591, "step": 9686 }, { "epoch": 0.6563452808455857, "grad_norm": 7.400620937347412, "learning_rate": 8.776644534191253e-05, "loss": 0.8153, "step": 9687 }, { "epoch": 0.6564130361135578, "grad_norm": 5.564664840698242, "learning_rate": 8.776507632281471e-05, "loss": 0.8807, "step": 9688 }, { "epoch": 0.6564807913815299, "grad_norm": 6.968201160430908, "learning_rate": 8.776370730371689e-05, "loss": 0.8327, "step": 9689 }, { "epoch": 0.656548546649502, "grad_norm": 8.013174057006836, "learning_rate": 8.776233828461907e-05, "loss": 0.8123, "step": 9690 }, { "epoch": 0.6566163019174741, "grad_norm": 6.545875072479248, "learning_rate": 8.776096926552125e-05, "loss": 0.7223, "step": 9691 }, { "epoch": 0.6566840571854462, "grad_norm": 6.459381580352783, "learning_rate": 8.775960024642345e-05, "loss": 1.0484, "step": 9692 }, { "epoch": 0.6567518124534183, "grad_norm": 6.893752098083496, "learning_rate": 8.775823122732563e-05, "loss": 0.841, "step": 9693 }, { "epoch": 0.6568195677213904, "grad_norm": 6.36741828918457, "learning_rate": 8.77568622082278e-05, "loss": 0.7719, "step": 9694 }, { "epoch": 0.6568873229893625, "grad_norm": 6.700324058532715, "learning_rate": 8.775549318912999e-05, "loss": 0.8612, "step": 9695 }, { "epoch": 0.6569550782573345, "grad_norm": 5.423570156097412, "learning_rate": 8.775412417003218e-05, "loss": 0.7947, "step": 9696 }, { "epoch": 0.6570228335253065, "grad_norm": 6.15387487411499, "learning_rate": 8.775275515093436e-05, "loss": 0.7978, "step": 9697 }, { "epoch": 0.6570905887932786, "grad_norm": 6.407439708709717, "learning_rate": 8.775138613183654e-05, "loss": 0.8016, "step": 9698 }, { "epoch": 0.6571583440612507, "grad_norm": 5.311725616455078, "learning_rate": 8.775001711273872e-05, "loss": 0.564, "step": 9699 }, { "epoch": 0.6572260993292228, "grad_norm": 8.463066101074219, "learning_rate": 8.77486480936409e-05, "loss": 0.9351, "step": 9700 }, { "epoch": 0.6572938545971949, "grad_norm": 6.503357410430908, "learning_rate": 8.77472790745431e-05, "loss": 0.8545, "step": 9701 }, { "epoch": 0.657361609865167, "grad_norm": 4.668941020965576, "learning_rate": 8.774591005544528e-05, "loss": 0.5757, "step": 9702 }, { "epoch": 0.6574293651331391, "grad_norm": 7.31153678894043, "learning_rate": 8.774454103634746e-05, "loss": 0.8225, "step": 9703 }, { "epoch": 0.6574971204011112, "grad_norm": 7.220789432525635, "learning_rate": 8.774317201724964e-05, "loss": 1.2222, "step": 9704 }, { "epoch": 0.6575648756690833, "grad_norm": 8.469976425170898, "learning_rate": 8.774180299815182e-05, "loss": 0.8927, "step": 9705 }, { "epoch": 0.6576326309370554, "grad_norm": 5.122034072875977, "learning_rate": 8.774043397905401e-05, "loss": 0.6429, "step": 9706 }, { "epoch": 0.6577003862050275, "grad_norm": 7.948295593261719, "learning_rate": 8.773906495995619e-05, "loss": 0.8037, "step": 9707 }, { "epoch": 0.6577681414729996, "grad_norm": 8.193120956420898, "learning_rate": 8.773769594085837e-05, "loss": 0.8298, "step": 9708 }, { "epoch": 0.6578358967409716, "grad_norm": 5.10132360458374, "learning_rate": 8.773632692176057e-05, "loss": 0.6058, "step": 9709 }, { "epoch": 0.6579036520089437, "grad_norm": 7.845650672912598, "learning_rate": 8.773495790266275e-05, "loss": 0.9913, "step": 9710 }, { "epoch": 0.6579714072769158, "grad_norm": 6.442379951477051, "learning_rate": 8.773358888356493e-05, "loss": 0.7329, "step": 9711 }, { "epoch": 0.6580391625448878, "grad_norm": 6.904378414154053, "learning_rate": 8.773221986446712e-05, "loss": 0.9929, "step": 9712 }, { "epoch": 0.6581069178128599, "grad_norm": 7.309523105621338, "learning_rate": 8.77308508453693e-05, "loss": 0.8333, "step": 9713 }, { "epoch": 0.658174673080832, "grad_norm": 5.351863861083984, "learning_rate": 8.772948182627148e-05, "loss": 0.718, "step": 9714 }, { "epoch": 0.6582424283488041, "grad_norm": 6.481664180755615, "learning_rate": 8.772811280717367e-05, "loss": 0.7649, "step": 9715 }, { "epoch": 0.6583101836167762, "grad_norm": 6.753706932067871, "learning_rate": 8.772674378807586e-05, "loss": 0.6513, "step": 9716 }, { "epoch": 0.6583779388847483, "grad_norm": 6.8181047439575195, "learning_rate": 8.772537476897804e-05, "loss": 0.8805, "step": 9717 }, { "epoch": 0.6584456941527204, "grad_norm": 6.623274803161621, "learning_rate": 8.772400574988022e-05, "loss": 1.0795, "step": 9718 }, { "epoch": 0.6585134494206925, "grad_norm": 5.954155445098877, "learning_rate": 8.772263673078241e-05, "loss": 0.7594, "step": 9719 }, { "epoch": 0.6585812046886645, "grad_norm": 5.537624835968018, "learning_rate": 8.772126771168459e-05, "loss": 0.7049, "step": 9720 }, { "epoch": 0.6586489599566366, "grad_norm": 6.186052322387695, "learning_rate": 8.771989869258677e-05, "loss": 0.8639, "step": 9721 }, { "epoch": 0.6587167152246087, "grad_norm": 6.996501445770264, "learning_rate": 8.771852967348895e-05, "loss": 0.9438, "step": 9722 }, { "epoch": 0.6587844704925808, "grad_norm": 6.926022052764893, "learning_rate": 8.771716065439113e-05, "loss": 0.7466, "step": 9723 }, { "epoch": 0.6588522257605529, "grad_norm": 5.302411079406738, "learning_rate": 8.771579163529332e-05, "loss": 0.7562, "step": 9724 }, { "epoch": 0.658919981028525, "grad_norm": 6.900167465209961, "learning_rate": 8.77144226161955e-05, "loss": 0.8841, "step": 9725 }, { "epoch": 0.6589877362964971, "grad_norm": 5.357388019561768, "learning_rate": 8.771305359709769e-05, "loss": 0.7033, "step": 9726 }, { "epoch": 0.6590554915644692, "grad_norm": 5.690728187561035, "learning_rate": 8.771168457799987e-05, "loss": 0.7668, "step": 9727 }, { "epoch": 0.6591232468324412, "grad_norm": 6.126123428344727, "learning_rate": 8.771031555890206e-05, "loss": 0.7644, "step": 9728 }, { "epoch": 0.6591910021004133, "grad_norm": 6.951844692230225, "learning_rate": 8.770894653980424e-05, "loss": 0.6453, "step": 9729 }, { "epoch": 0.6592587573683854, "grad_norm": 7.219118118286133, "learning_rate": 8.770757752070642e-05, "loss": 0.7606, "step": 9730 }, { "epoch": 0.6593265126363574, "grad_norm": 7.441622734069824, "learning_rate": 8.77062085016086e-05, "loss": 1.0002, "step": 9731 }, { "epoch": 0.6593942679043295, "grad_norm": 7.127715110778809, "learning_rate": 8.770483948251078e-05, "loss": 0.9236, "step": 9732 }, { "epoch": 0.6594620231723016, "grad_norm": 5.041599750518799, "learning_rate": 8.770347046341298e-05, "loss": 0.5938, "step": 9733 }, { "epoch": 0.6595297784402737, "grad_norm": 5.044239044189453, "learning_rate": 8.770210144431516e-05, "loss": 0.7437, "step": 9734 }, { "epoch": 0.6595975337082458, "grad_norm": 6.069377422332764, "learning_rate": 8.770073242521734e-05, "loss": 0.6742, "step": 9735 }, { "epoch": 0.6596652889762179, "grad_norm": 7.166933059692383, "learning_rate": 8.769936340611952e-05, "loss": 0.9085, "step": 9736 }, { "epoch": 0.65973304424419, "grad_norm": 6.837136268615723, "learning_rate": 8.76979943870217e-05, "loss": 0.7626, "step": 9737 }, { "epoch": 0.6598007995121621, "grad_norm": 5.012059211730957, "learning_rate": 8.769662536792389e-05, "loss": 0.6234, "step": 9738 }, { "epoch": 0.6598685547801342, "grad_norm": 5.913621425628662, "learning_rate": 8.769525634882607e-05, "loss": 0.9147, "step": 9739 }, { "epoch": 0.6599363100481063, "grad_norm": 6.618444919586182, "learning_rate": 8.769388732972825e-05, "loss": 1.0521, "step": 9740 }, { "epoch": 0.6600040653160784, "grad_norm": 6.666975975036621, "learning_rate": 8.769251831063043e-05, "loss": 0.8245, "step": 9741 }, { "epoch": 0.6600718205840505, "grad_norm": 6.840112686157227, "learning_rate": 8.769114929153263e-05, "loss": 0.8726, "step": 9742 }, { "epoch": 0.6601395758520225, "grad_norm": 7.093020915985107, "learning_rate": 8.76897802724348e-05, "loss": 0.9649, "step": 9743 }, { "epoch": 0.6602073311199946, "grad_norm": 6.4766845703125, "learning_rate": 8.768841125333699e-05, "loss": 0.7608, "step": 9744 }, { "epoch": 0.6602750863879666, "grad_norm": 6.574507236480713, "learning_rate": 8.768704223423917e-05, "loss": 0.9587, "step": 9745 }, { "epoch": 0.6603428416559387, "grad_norm": 6.037952423095703, "learning_rate": 8.768567321514135e-05, "loss": 0.8102, "step": 9746 }, { "epoch": 0.6604105969239108, "grad_norm": 5.707187175750732, "learning_rate": 8.768430419604354e-05, "loss": 0.7906, "step": 9747 }, { "epoch": 0.6604783521918829, "grad_norm": 5.039308547973633, "learning_rate": 8.768293517694572e-05, "loss": 0.7443, "step": 9748 }, { "epoch": 0.660546107459855, "grad_norm": 6.081298351287842, "learning_rate": 8.76815661578479e-05, "loss": 0.8813, "step": 9749 }, { "epoch": 0.6606138627278271, "grad_norm": 6.228826522827148, "learning_rate": 8.768019713875008e-05, "loss": 0.755, "step": 9750 }, { "epoch": 0.6606816179957992, "grad_norm": 6.391602993011475, "learning_rate": 8.767882811965228e-05, "loss": 1.0464, "step": 9751 }, { "epoch": 0.6607493732637713, "grad_norm": 6.1509318351745605, "learning_rate": 8.767745910055446e-05, "loss": 0.8464, "step": 9752 }, { "epoch": 0.6608171285317433, "grad_norm": 7.630395412445068, "learning_rate": 8.767609008145664e-05, "loss": 0.7216, "step": 9753 }, { "epoch": 0.6608848837997154, "grad_norm": 6.150453090667725, "learning_rate": 8.767472106235882e-05, "loss": 0.747, "step": 9754 }, { "epoch": 0.6609526390676875, "grad_norm": 6.866214752197266, "learning_rate": 8.7673352043261e-05, "loss": 0.7093, "step": 9755 }, { "epoch": 0.6610203943356596, "grad_norm": 5.30288553237915, "learning_rate": 8.767198302416319e-05, "loss": 0.8519, "step": 9756 }, { "epoch": 0.6610881496036317, "grad_norm": 7.960119724273682, "learning_rate": 8.767061400506537e-05, "loss": 1.0947, "step": 9757 }, { "epoch": 0.6611559048716038, "grad_norm": 6.809545993804932, "learning_rate": 8.766924498596755e-05, "loss": 0.7943, "step": 9758 }, { "epoch": 0.6612236601395759, "grad_norm": 7.028867721557617, "learning_rate": 8.766787596686975e-05, "loss": 0.8684, "step": 9759 }, { "epoch": 0.661291415407548, "grad_norm": 6.686776161193848, "learning_rate": 8.766650694777193e-05, "loss": 0.7796, "step": 9760 }, { "epoch": 0.66135917067552, "grad_norm": 5.486933708190918, "learning_rate": 8.76651379286741e-05, "loss": 0.5741, "step": 9761 }, { "epoch": 0.6614269259434921, "grad_norm": 6.500797748565674, "learning_rate": 8.76637689095763e-05, "loss": 1.0022, "step": 9762 }, { "epoch": 0.6614946812114642, "grad_norm": 8.424103736877441, "learning_rate": 8.766239989047848e-05, "loss": 0.9727, "step": 9763 }, { "epoch": 0.6615624364794362, "grad_norm": 5.002140998840332, "learning_rate": 8.766103087138066e-05, "loss": 0.5935, "step": 9764 }, { "epoch": 0.6616301917474083, "grad_norm": 6.721834659576416, "learning_rate": 8.765966185228285e-05, "loss": 0.7682, "step": 9765 }, { "epoch": 0.6616979470153804, "grad_norm": 6.448259353637695, "learning_rate": 8.765829283318503e-05, "loss": 0.6641, "step": 9766 }, { "epoch": 0.6617657022833525, "grad_norm": 5.544651985168457, "learning_rate": 8.765692381408722e-05, "loss": 0.6952, "step": 9767 }, { "epoch": 0.6618334575513246, "grad_norm": 7.139290809631348, "learning_rate": 8.76555547949894e-05, "loss": 0.8574, "step": 9768 }, { "epoch": 0.6619012128192967, "grad_norm": 7.421456813812256, "learning_rate": 8.765418577589158e-05, "loss": 0.9057, "step": 9769 }, { "epoch": 0.6619689680872688, "grad_norm": 6.075616359710693, "learning_rate": 8.765281675679377e-05, "loss": 0.7174, "step": 9770 }, { "epoch": 0.6620367233552409, "grad_norm": 5.6527323722839355, "learning_rate": 8.765144773769595e-05, "loss": 0.6707, "step": 9771 }, { "epoch": 0.662104478623213, "grad_norm": 5.773962020874023, "learning_rate": 8.765007871859813e-05, "loss": 0.714, "step": 9772 }, { "epoch": 0.6621722338911851, "grad_norm": 5.672632694244385, "learning_rate": 8.764870969950031e-05, "loss": 0.6529, "step": 9773 }, { "epoch": 0.6622399891591572, "grad_norm": 7.087960243225098, "learning_rate": 8.76473406804025e-05, "loss": 0.8998, "step": 9774 }, { "epoch": 0.6623077444271293, "grad_norm": 6.828940391540527, "learning_rate": 8.764597166130468e-05, "loss": 0.9055, "step": 9775 }, { "epoch": 0.6623754996951013, "grad_norm": 5.858781814575195, "learning_rate": 8.764460264220687e-05, "loss": 0.7853, "step": 9776 }, { "epoch": 0.6624432549630733, "grad_norm": 6.570882797241211, "learning_rate": 8.764323362310905e-05, "loss": 1.1202, "step": 9777 }, { "epoch": 0.6625110102310454, "grad_norm": 6.794251918792725, "learning_rate": 8.764186460401123e-05, "loss": 0.7795, "step": 9778 }, { "epoch": 0.6625787654990175, "grad_norm": 5.775513648986816, "learning_rate": 8.764049558491342e-05, "loss": 0.8422, "step": 9779 }, { "epoch": 0.6626465207669896, "grad_norm": 5.90095329284668, "learning_rate": 8.76391265658156e-05, "loss": 0.7624, "step": 9780 }, { "epoch": 0.6627142760349617, "grad_norm": 6.769818305969238, "learning_rate": 8.763775754671778e-05, "loss": 0.7761, "step": 9781 }, { "epoch": 0.6627820313029338, "grad_norm": 5.702734470367432, "learning_rate": 8.763638852761996e-05, "loss": 0.6399, "step": 9782 }, { "epoch": 0.6628497865709059, "grad_norm": 8.020594596862793, "learning_rate": 8.763501950852214e-05, "loss": 0.8082, "step": 9783 }, { "epoch": 0.662917541838878, "grad_norm": 6.043667316436768, "learning_rate": 8.763365048942434e-05, "loss": 0.69, "step": 9784 }, { "epoch": 0.6629852971068501, "grad_norm": 6.844956874847412, "learning_rate": 8.763228147032652e-05, "loss": 0.7028, "step": 9785 }, { "epoch": 0.6630530523748221, "grad_norm": 6.554644584655762, "learning_rate": 8.76309124512287e-05, "loss": 1.0118, "step": 9786 }, { "epoch": 0.6631208076427942, "grad_norm": 6.549968242645264, "learning_rate": 8.762954343213088e-05, "loss": 0.8027, "step": 9787 }, { "epoch": 0.6631885629107663, "grad_norm": 5.934162616729736, "learning_rate": 8.762817441303307e-05, "loss": 0.5964, "step": 9788 }, { "epoch": 0.6632563181787384, "grad_norm": 6.02857780456543, "learning_rate": 8.762680539393525e-05, "loss": 0.997, "step": 9789 }, { "epoch": 0.6633240734467105, "grad_norm": 7.619875907897949, "learning_rate": 8.762543637483743e-05, "loss": 1.0416, "step": 9790 }, { "epoch": 0.6633918287146826, "grad_norm": 7.058054447174072, "learning_rate": 8.762406735573961e-05, "loss": 0.7885, "step": 9791 }, { "epoch": 0.6634595839826547, "grad_norm": 7.8069353103637695, "learning_rate": 8.762269833664179e-05, "loss": 0.8077, "step": 9792 }, { "epoch": 0.6635273392506268, "grad_norm": 6.246554851531982, "learning_rate": 8.762132931754399e-05, "loss": 0.8138, "step": 9793 }, { "epoch": 0.6635950945185988, "grad_norm": 7.489177227020264, "learning_rate": 8.761996029844617e-05, "loss": 0.8638, "step": 9794 }, { "epoch": 0.6636628497865709, "grad_norm": 7.148414134979248, "learning_rate": 8.761859127934835e-05, "loss": 0.7998, "step": 9795 }, { "epoch": 0.663730605054543, "grad_norm": 6.611279010772705, "learning_rate": 8.761722226025053e-05, "loss": 0.6272, "step": 9796 }, { "epoch": 0.663798360322515, "grad_norm": 7.739448070526123, "learning_rate": 8.761585324115272e-05, "loss": 0.7987, "step": 9797 }, { "epoch": 0.6638661155904871, "grad_norm": 4.832591533660889, "learning_rate": 8.76144842220549e-05, "loss": 0.5663, "step": 9798 }, { "epoch": 0.6639338708584592, "grad_norm": 4.939646244049072, "learning_rate": 8.761311520295708e-05, "loss": 0.5951, "step": 9799 }, { "epoch": 0.6640016261264313, "grad_norm": 6.326164245605469, "learning_rate": 8.761174618385926e-05, "loss": 0.9925, "step": 9800 }, { "epoch": 0.6640693813944034, "grad_norm": 5.790089130401611, "learning_rate": 8.761037716476144e-05, "loss": 1.0175, "step": 9801 }, { "epoch": 0.6641371366623755, "grad_norm": 6.139710903167725, "learning_rate": 8.760900814566364e-05, "loss": 0.7322, "step": 9802 }, { "epoch": 0.6642048919303476, "grad_norm": 6.73042106628418, "learning_rate": 8.760763912656582e-05, "loss": 0.7969, "step": 9803 }, { "epoch": 0.6642726471983197, "grad_norm": 6.9577765464782715, "learning_rate": 8.7606270107468e-05, "loss": 0.7294, "step": 9804 }, { "epoch": 0.6643404024662918, "grad_norm": 7.609181880950928, "learning_rate": 8.760490108837019e-05, "loss": 0.9833, "step": 9805 }, { "epoch": 0.6644081577342639, "grad_norm": 5.679005146026611, "learning_rate": 8.760353206927237e-05, "loss": 0.8292, "step": 9806 }, { "epoch": 0.664475913002236, "grad_norm": 6.903763771057129, "learning_rate": 8.760216305017455e-05, "loss": 0.8298, "step": 9807 }, { "epoch": 0.664543668270208, "grad_norm": 5.705727577209473, "learning_rate": 8.760079403107674e-05, "loss": 0.8319, "step": 9808 }, { "epoch": 0.6646114235381801, "grad_norm": 5.971374988555908, "learning_rate": 8.759942501197892e-05, "loss": 0.8034, "step": 9809 }, { "epoch": 0.6646791788061521, "grad_norm": 4.7771806716918945, "learning_rate": 8.75980559928811e-05, "loss": 0.8539, "step": 9810 }, { "epoch": 0.6647469340741242, "grad_norm": 6.891615867614746, "learning_rate": 8.75966869737833e-05, "loss": 0.667, "step": 9811 }, { "epoch": 0.6648146893420963, "grad_norm": 6.467392444610596, "learning_rate": 8.759531795468548e-05, "loss": 0.8197, "step": 9812 }, { "epoch": 0.6648824446100684, "grad_norm": 5.87471342086792, "learning_rate": 8.759394893558766e-05, "loss": 0.9105, "step": 9813 }, { "epoch": 0.6649501998780405, "grad_norm": 5.994086265563965, "learning_rate": 8.759257991648984e-05, "loss": 0.8333, "step": 9814 }, { "epoch": 0.6650179551460126, "grad_norm": 6.536230564117432, "learning_rate": 8.759121089739202e-05, "loss": 1.0726, "step": 9815 }, { "epoch": 0.6650857104139847, "grad_norm": 7.012213230133057, "learning_rate": 8.758984187829421e-05, "loss": 1.0442, "step": 9816 }, { "epoch": 0.6651534656819568, "grad_norm": 6.069978713989258, "learning_rate": 8.75884728591964e-05, "loss": 0.6364, "step": 9817 }, { "epoch": 0.6652212209499289, "grad_norm": 5.200802803039551, "learning_rate": 8.758710384009858e-05, "loss": 0.6563, "step": 9818 }, { "epoch": 0.665288976217901, "grad_norm": 6.931329727172852, "learning_rate": 8.758573482100076e-05, "loss": 0.8925, "step": 9819 }, { "epoch": 0.665356731485873, "grad_norm": 6.1655731201171875, "learning_rate": 8.758436580190295e-05, "loss": 0.7658, "step": 9820 }, { "epoch": 0.6654244867538451, "grad_norm": 6.437480926513672, "learning_rate": 8.758299678280513e-05, "loss": 0.8279, "step": 9821 }, { "epoch": 0.6654922420218172, "grad_norm": 5.941578388214111, "learning_rate": 8.758162776370731e-05, "loss": 0.7095, "step": 9822 }, { "epoch": 0.6655599972897893, "grad_norm": 6.083083152770996, "learning_rate": 8.758025874460949e-05, "loss": 0.9792, "step": 9823 }, { "epoch": 0.6656277525577614, "grad_norm": 5.512528419494629, "learning_rate": 8.757888972551167e-05, "loss": 0.8771, "step": 9824 }, { "epoch": 0.6656955078257335, "grad_norm": 5.142529487609863, "learning_rate": 8.757752070641386e-05, "loss": 0.7935, "step": 9825 }, { "epoch": 0.6657632630937055, "grad_norm": 5.460729122161865, "learning_rate": 8.757615168731604e-05, "loss": 0.9105, "step": 9826 }, { "epoch": 0.6658310183616776, "grad_norm": 8.098207473754883, "learning_rate": 8.757478266821823e-05, "loss": 0.8831, "step": 9827 }, { "epoch": 0.6658987736296497, "grad_norm": 5.392515659332275, "learning_rate": 8.75734136491204e-05, "loss": 0.8327, "step": 9828 }, { "epoch": 0.6659665288976218, "grad_norm": 4.9883294105529785, "learning_rate": 8.75720446300226e-05, "loss": 0.7598, "step": 9829 }, { "epoch": 0.6660342841655938, "grad_norm": 5.96735954284668, "learning_rate": 8.757067561092478e-05, "loss": 0.9694, "step": 9830 }, { "epoch": 0.6661020394335659, "grad_norm": 6.818420886993408, "learning_rate": 8.756930659182696e-05, "loss": 0.9571, "step": 9831 }, { "epoch": 0.666169794701538, "grad_norm": 6.297762870788574, "learning_rate": 8.756793757272914e-05, "loss": 0.6912, "step": 9832 }, { "epoch": 0.6662375499695101, "grad_norm": 6.834626197814941, "learning_rate": 8.756656855363132e-05, "loss": 0.8697, "step": 9833 }, { "epoch": 0.6663053052374822, "grad_norm": 6.614404678344727, "learning_rate": 8.756519953453351e-05, "loss": 0.9481, "step": 9834 }, { "epoch": 0.6663730605054543, "grad_norm": 6.524447441101074, "learning_rate": 8.75638305154357e-05, "loss": 0.7346, "step": 9835 }, { "epoch": 0.6664408157734264, "grad_norm": 7.389026641845703, "learning_rate": 8.756246149633788e-05, "loss": 1.0821, "step": 9836 }, { "epoch": 0.6665085710413985, "grad_norm": 5.654766082763672, "learning_rate": 8.756109247724006e-05, "loss": 0.8859, "step": 9837 }, { "epoch": 0.6665763263093706, "grad_norm": 6.345546722412109, "learning_rate": 8.755972345814224e-05, "loss": 0.8386, "step": 9838 }, { "epoch": 0.6666440815773427, "grad_norm": 5.268699645996094, "learning_rate": 8.755835443904443e-05, "loss": 0.6937, "step": 9839 }, { "epoch": 0.6667118368453148, "grad_norm": 5.590981960296631, "learning_rate": 8.755698541994661e-05, "loss": 0.7624, "step": 9840 }, { "epoch": 0.6667795921132869, "grad_norm": 7.142122268676758, "learning_rate": 8.755561640084879e-05, "loss": 0.807, "step": 9841 }, { "epoch": 0.666847347381259, "grad_norm": 7.878788948059082, "learning_rate": 8.755424738175097e-05, "loss": 0.6766, "step": 9842 }, { "epoch": 0.6669151026492309, "grad_norm": 7.469075679779053, "learning_rate": 8.755287836265316e-05, "loss": 0.7736, "step": 9843 }, { "epoch": 0.666982857917203, "grad_norm": 6.271559715270996, "learning_rate": 8.755150934355535e-05, "loss": 1.1519, "step": 9844 }, { "epoch": 0.6670506131851751, "grad_norm": 5.424012660980225, "learning_rate": 8.755014032445753e-05, "loss": 0.6141, "step": 9845 }, { "epoch": 0.6671183684531472, "grad_norm": 6.098863124847412, "learning_rate": 8.75487713053597e-05, "loss": 0.8144, "step": 9846 }, { "epoch": 0.6671861237211193, "grad_norm": 7.099893569946289, "learning_rate": 8.754740228626189e-05, "loss": 0.897, "step": 9847 }, { "epoch": 0.6672538789890914, "grad_norm": 4.874231338500977, "learning_rate": 8.754603326716408e-05, "loss": 0.7885, "step": 9848 }, { "epoch": 0.6673216342570635, "grad_norm": 7.58192777633667, "learning_rate": 8.754466424806626e-05, "loss": 0.7326, "step": 9849 }, { "epoch": 0.6673893895250356, "grad_norm": 5.8943047523498535, "learning_rate": 8.754329522896844e-05, "loss": 0.8714, "step": 9850 }, { "epoch": 0.6674571447930077, "grad_norm": 7.8243865966796875, "learning_rate": 8.754192620987063e-05, "loss": 0.749, "step": 9851 }, { "epoch": 0.6675249000609798, "grad_norm": 6.121888160705566, "learning_rate": 8.754055719077282e-05, "loss": 0.7823, "step": 9852 }, { "epoch": 0.6675926553289518, "grad_norm": 8.671948432922363, "learning_rate": 8.7539188171675e-05, "loss": 0.7592, "step": 9853 }, { "epoch": 0.6676604105969239, "grad_norm": 7.32505989074707, "learning_rate": 8.753781915257719e-05, "loss": 0.816, "step": 9854 }, { "epoch": 0.667728165864896, "grad_norm": 5.141097545623779, "learning_rate": 8.753645013347937e-05, "loss": 0.913, "step": 9855 }, { "epoch": 0.6677959211328681, "grad_norm": 6.519028186798096, "learning_rate": 8.753508111438155e-05, "loss": 0.8542, "step": 9856 }, { "epoch": 0.6678636764008402, "grad_norm": 5.298619747161865, "learning_rate": 8.753371209528374e-05, "loss": 0.8773, "step": 9857 }, { "epoch": 0.6679314316688123, "grad_norm": 4.540358066558838, "learning_rate": 8.753234307618592e-05, "loss": 0.5887, "step": 9858 }, { "epoch": 0.6679991869367843, "grad_norm": 7.409801483154297, "learning_rate": 8.75309740570881e-05, "loss": 0.7, "step": 9859 }, { "epoch": 0.6680669422047564, "grad_norm": 7.78483772277832, "learning_rate": 8.752960503799028e-05, "loss": 0.9825, "step": 9860 }, { "epoch": 0.6681346974727285, "grad_norm": 4.964968204498291, "learning_rate": 8.752823601889248e-05, "loss": 0.8111, "step": 9861 }, { "epoch": 0.6682024527407006, "grad_norm": 6.426743984222412, "learning_rate": 8.752686699979466e-05, "loss": 1.0148, "step": 9862 }, { "epoch": 0.6682702080086727, "grad_norm": 5.606266975402832, "learning_rate": 8.752549798069684e-05, "loss": 0.7848, "step": 9863 }, { "epoch": 0.6683379632766447, "grad_norm": 6.663419723510742, "learning_rate": 8.752412896159902e-05, "loss": 0.5602, "step": 9864 }, { "epoch": 0.6684057185446168, "grad_norm": 5.719392776489258, "learning_rate": 8.75227599425012e-05, "loss": 0.6434, "step": 9865 }, { "epoch": 0.6684734738125889, "grad_norm": 6.0621747970581055, "learning_rate": 8.75213909234034e-05, "loss": 0.7172, "step": 9866 }, { "epoch": 0.668541229080561, "grad_norm": 7.197140216827393, "learning_rate": 8.752002190430557e-05, "loss": 0.9543, "step": 9867 }, { "epoch": 0.6686089843485331, "grad_norm": 7.329906940460205, "learning_rate": 8.751865288520775e-05, "loss": 0.8873, "step": 9868 }, { "epoch": 0.6686767396165052, "grad_norm": 8.936700820922852, "learning_rate": 8.751728386610994e-05, "loss": 0.8918, "step": 9869 }, { "epoch": 0.6687444948844773, "grad_norm": 5.39838171005249, "learning_rate": 8.751591484701212e-05, "loss": 1.0986, "step": 9870 }, { "epoch": 0.6688122501524494, "grad_norm": 7.213238716125488, "learning_rate": 8.751454582791431e-05, "loss": 0.9257, "step": 9871 }, { "epoch": 0.6688800054204215, "grad_norm": 7.646907806396484, "learning_rate": 8.751317680881649e-05, "loss": 0.8563, "step": 9872 }, { "epoch": 0.6689477606883936, "grad_norm": 6.506889343261719, "learning_rate": 8.751180778971867e-05, "loss": 0.8348, "step": 9873 }, { "epoch": 0.6690155159563657, "grad_norm": 8.101831436157227, "learning_rate": 8.751043877062085e-05, "loss": 0.8127, "step": 9874 }, { "epoch": 0.6690832712243376, "grad_norm": 7.347453594207764, "learning_rate": 8.750906975152304e-05, "loss": 0.768, "step": 9875 }, { "epoch": 0.6691510264923097, "grad_norm": 6.852962970733643, "learning_rate": 8.750770073242522e-05, "loss": 0.8699, "step": 9876 }, { "epoch": 0.6692187817602818, "grad_norm": 5.527287006378174, "learning_rate": 8.75063317133274e-05, "loss": 0.6164, "step": 9877 }, { "epoch": 0.6692865370282539, "grad_norm": 6.611326217651367, "learning_rate": 8.750496269422959e-05, "loss": 0.715, "step": 9878 }, { "epoch": 0.669354292296226, "grad_norm": 6.488336086273193, "learning_rate": 8.750359367513177e-05, "loss": 0.9815, "step": 9879 }, { "epoch": 0.6694220475641981, "grad_norm": 8.057877540588379, "learning_rate": 8.750222465603396e-05, "loss": 0.9369, "step": 9880 }, { "epoch": 0.6694898028321702, "grad_norm": 7.809643745422363, "learning_rate": 8.750085563693614e-05, "loss": 0.7953, "step": 9881 }, { "epoch": 0.6695575581001423, "grad_norm": 5.484494209289551, "learning_rate": 8.749948661783832e-05, "loss": 0.8527, "step": 9882 }, { "epoch": 0.6696253133681144, "grad_norm": 8.870565414428711, "learning_rate": 8.74981175987405e-05, "loss": 0.7632, "step": 9883 }, { "epoch": 0.6696930686360865, "grad_norm": 5.734538555145264, "learning_rate": 8.74967485796427e-05, "loss": 0.9434, "step": 9884 }, { "epoch": 0.6697608239040586, "grad_norm": 6.731936454772949, "learning_rate": 8.749537956054487e-05, "loss": 0.9988, "step": 9885 }, { "epoch": 0.6698285791720306, "grad_norm": 6.063337326049805, "learning_rate": 8.749401054144706e-05, "loss": 0.6882, "step": 9886 }, { "epoch": 0.6698963344400027, "grad_norm": 6.489291667938232, "learning_rate": 8.749264152234924e-05, "loss": 0.8978, "step": 9887 }, { "epoch": 0.6699640897079748, "grad_norm": 6.419568061828613, "learning_rate": 8.749127250325142e-05, "loss": 0.84, "step": 9888 }, { "epoch": 0.6700318449759469, "grad_norm": 7.085015773773193, "learning_rate": 8.748990348415361e-05, "loss": 1.0001, "step": 9889 }, { "epoch": 0.670099600243919, "grad_norm": 5.279534339904785, "learning_rate": 8.748853446505579e-05, "loss": 0.613, "step": 9890 }, { "epoch": 0.670167355511891, "grad_norm": 7.26226282119751, "learning_rate": 8.748716544595797e-05, "loss": 0.7144, "step": 9891 }, { "epoch": 0.6702351107798631, "grad_norm": 5.066533088684082, "learning_rate": 8.748579642686015e-05, "loss": 0.8349, "step": 9892 }, { "epoch": 0.6703028660478352, "grad_norm": 5.847897052764893, "learning_rate": 8.748442740776233e-05, "loss": 0.778, "step": 9893 }, { "epoch": 0.6703706213158073, "grad_norm": 7.026902198791504, "learning_rate": 8.748305838866452e-05, "loss": 0.7131, "step": 9894 }, { "epoch": 0.6704383765837794, "grad_norm": 9.44549560546875, "learning_rate": 8.74816893695667e-05, "loss": 0.7093, "step": 9895 }, { "epoch": 0.6705061318517515, "grad_norm": 5.592066764831543, "learning_rate": 8.748032035046889e-05, "loss": 0.7979, "step": 9896 }, { "epoch": 0.6705738871197235, "grad_norm": 7.530812740325928, "learning_rate": 8.747895133137108e-05, "loss": 0.7899, "step": 9897 }, { "epoch": 0.6706416423876956, "grad_norm": 7.456013202667236, "learning_rate": 8.747758231227326e-05, "loss": 0.9772, "step": 9898 }, { "epoch": 0.6707093976556677, "grad_norm": 7.631428241729736, "learning_rate": 8.747621329317544e-05, "loss": 0.9485, "step": 9899 }, { "epoch": 0.6707771529236398, "grad_norm": 5.919754981994629, "learning_rate": 8.747484427407763e-05, "loss": 0.8696, "step": 9900 }, { "epoch": 0.6708449081916119, "grad_norm": 6.412426948547363, "learning_rate": 8.747347525497981e-05, "loss": 0.8179, "step": 9901 }, { "epoch": 0.670912663459584, "grad_norm": 5.753444671630859, "learning_rate": 8.7472106235882e-05, "loss": 0.7817, "step": 9902 }, { "epoch": 0.6709804187275561, "grad_norm": 6.622903347015381, "learning_rate": 8.747073721678419e-05, "loss": 0.8184, "step": 9903 }, { "epoch": 0.6710481739955282, "grad_norm": 8.03353500366211, "learning_rate": 8.746936819768637e-05, "loss": 0.9982, "step": 9904 }, { "epoch": 0.6711159292635003, "grad_norm": 7.467156410217285, "learning_rate": 8.746799917858855e-05, "loss": 0.6797, "step": 9905 }, { "epoch": 0.6711836845314724, "grad_norm": 5.091195106506348, "learning_rate": 8.746663015949073e-05, "loss": 0.6776, "step": 9906 }, { "epoch": 0.6712514397994445, "grad_norm": 6.8875627517700195, "learning_rate": 8.746526114039292e-05, "loss": 0.926, "step": 9907 }, { "epoch": 0.6713191950674164, "grad_norm": 6.183489799499512, "learning_rate": 8.74638921212951e-05, "loss": 0.9849, "step": 9908 }, { "epoch": 0.6713869503353885, "grad_norm": 6.648115634918213, "learning_rate": 8.746252310219728e-05, "loss": 0.789, "step": 9909 }, { "epoch": 0.6714547056033606, "grad_norm": 5.949080944061279, "learning_rate": 8.746115408309946e-05, "loss": 0.6008, "step": 9910 }, { "epoch": 0.6715224608713327, "grad_norm": 6.88314151763916, "learning_rate": 8.745978506400164e-05, "loss": 0.758, "step": 9911 }, { "epoch": 0.6715902161393048, "grad_norm": 7.288704872131348, "learning_rate": 8.745841604490384e-05, "loss": 0.8002, "step": 9912 }, { "epoch": 0.6716579714072769, "grad_norm": 6.439271926879883, "learning_rate": 8.745704702580602e-05, "loss": 0.9295, "step": 9913 }, { "epoch": 0.671725726675249, "grad_norm": 6.891064643859863, "learning_rate": 8.74556780067082e-05, "loss": 0.7798, "step": 9914 }, { "epoch": 0.6717934819432211, "grad_norm": 7.882637977600098, "learning_rate": 8.745430898761038e-05, "loss": 1.0254, "step": 9915 }, { "epoch": 0.6718612372111932, "grad_norm": 6.752426624298096, "learning_rate": 8.745293996851256e-05, "loss": 0.7995, "step": 9916 }, { "epoch": 0.6719289924791653, "grad_norm": 5.643410682678223, "learning_rate": 8.745157094941475e-05, "loss": 0.7354, "step": 9917 }, { "epoch": 0.6719967477471374, "grad_norm": 7.521575927734375, "learning_rate": 8.745020193031693e-05, "loss": 0.7256, "step": 9918 }, { "epoch": 0.6720645030151094, "grad_norm": 7.460186004638672, "learning_rate": 8.744883291121911e-05, "loss": 1.063, "step": 9919 }, { "epoch": 0.6721322582830815, "grad_norm": 4.6668477058410645, "learning_rate": 8.74474638921213e-05, "loss": 0.6914, "step": 9920 }, { "epoch": 0.6722000135510536, "grad_norm": 7.0220160484313965, "learning_rate": 8.744609487302349e-05, "loss": 0.7925, "step": 9921 }, { "epoch": 0.6722677688190257, "grad_norm": 7.594117641448975, "learning_rate": 8.744472585392567e-05, "loss": 0.9337, "step": 9922 }, { "epoch": 0.6723355240869978, "grad_norm": 7.195069313049316, "learning_rate": 8.744335683482785e-05, "loss": 0.9651, "step": 9923 }, { "epoch": 0.6724032793549698, "grad_norm": 5.2893853187561035, "learning_rate": 8.744198781573003e-05, "loss": 0.7974, "step": 9924 }, { "epoch": 0.6724710346229419, "grad_norm": 7.59480619430542, "learning_rate": 8.744061879663221e-05, "loss": 0.9182, "step": 9925 }, { "epoch": 0.672538789890914, "grad_norm": 5.582269668579102, "learning_rate": 8.74392497775344e-05, "loss": 0.8509, "step": 9926 }, { "epoch": 0.6726065451588861, "grad_norm": 6.992897033691406, "learning_rate": 8.743788075843658e-05, "loss": 0.8779, "step": 9927 }, { "epoch": 0.6726743004268582, "grad_norm": 7.686391353607178, "learning_rate": 8.743651173933876e-05, "loss": 1.0611, "step": 9928 }, { "epoch": 0.6727420556948303, "grad_norm": 6.090497016906738, "learning_rate": 8.743514272024095e-05, "loss": 0.9857, "step": 9929 }, { "epoch": 0.6728098109628023, "grad_norm": 7.6027936935424805, "learning_rate": 8.743377370114314e-05, "loss": 1.0939, "step": 9930 }, { "epoch": 0.6728775662307744, "grad_norm": 6.2990593910217285, "learning_rate": 8.743240468204532e-05, "loss": 0.8225, "step": 9931 }, { "epoch": 0.6729453214987465, "grad_norm": 6.154484272003174, "learning_rate": 8.74310356629475e-05, "loss": 1.2153, "step": 9932 }, { "epoch": 0.6730130767667186, "grad_norm": 5.471065044403076, "learning_rate": 8.742966664384968e-05, "loss": 0.9146, "step": 9933 }, { "epoch": 0.6730808320346907, "grad_norm": 7.006470680236816, "learning_rate": 8.742829762475186e-05, "loss": 0.8704, "step": 9934 }, { "epoch": 0.6731485873026628, "grad_norm": 6.060577869415283, "learning_rate": 8.742692860565405e-05, "loss": 1.0097, "step": 9935 }, { "epoch": 0.6732163425706349, "grad_norm": 6.05491304397583, "learning_rate": 8.742555958655623e-05, "loss": 0.7488, "step": 9936 }, { "epoch": 0.673284097838607, "grad_norm": 4.715060710906982, "learning_rate": 8.742419056745842e-05, "loss": 0.6577, "step": 9937 }, { "epoch": 0.6733518531065791, "grad_norm": 5.644432544708252, "learning_rate": 8.74228215483606e-05, "loss": 0.6954, "step": 9938 }, { "epoch": 0.6734196083745512, "grad_norm": 6.377723217010498, "learning_rate": 8.742145252926279e-05, "loss": 0.7948, "step": 9939 }, { "epoch": 0.6734873636425232, "grad_norm": 4.606221675872803, "learning_rate": 8.742008351016497e-05, "loss": 0.8975, "step": 9940 }, { "epoch": 0.6735551189104952, "grad_norm": 7.170515537261963, "learning_rate": 8.741871449106715e-05, "loss": 0.8839, "step": 9941 }, { "epoch": 0.6736228741784673, "grad_norm": 5.995856761932373, "learning_rate": 8.741734547196933e-05, "loss": 0.8133, "step": 9942 }, { "epoch": 0.6736906294464394, "grad_norm": 5.5055036544799805, "learning_rate": 8.741597645287152e-05, "loss": 0.5468, "step": 9943 }, { "epoch": 0.6737583847144115, "grad_norm": 8.358444213867188, "learning_rate": 8.74146074337737e-05, "loss": 0.9229, "step": 9944 }, { "epoch": 0.6738261399823836, "grad_norm": 6.350693225860596, "learning_rate": 8.741323841467588e-05, "loss": 0.9045, "step": 9945 }, { "epoch": 0.6738938952503557, "grad_norm": 8.909754753112793, "learning_rate": 8.741186939557808e-05, "loss": 0.8389, "step": 9946 }, { "epoch": 0.6739616505183278, "grad_norm": 6.272004127502441, "learning_rate": 8.741050037648026e-05, "loss": 0.8733, "step": 9947 }, { "epoch": 0.6740294057862999, "grad_norm": 7.538585662841797, "learning_rate": 8.740913135738244e-05, "loss": 0.7856, "step": 9948 }, { "epoch": 0.674097161054272, "grad_norm": 8.257597923278809, "learning_rate": 8.740776233828463e-05, "loss": 0.9473, "step": 9949 }, { "epoch": 0.6741649163222441, "grad_norm": 8.649487495422363, "learning_rate": 8.740639331918681e-05, "loss": 0.874, "step": 9950 }, { "epoch": 0.6742326715902162, "grad_norm": 7.1884765625, "learning_rate": 8.7405024300089e-05, "loss": 0.8108, "step": 9951 }, { "epoch": 0.6743004268581883, "grad_norm": 6.152512550354004, "learning_rate": 8.740365528099117e-05, "loss": 1.0727, "step": 9952 }, { "epoch": 0.6743681821261603, "grad_norm": 6.419736385345459, "learning_rate": 8.740228626189337e-05, "loss": 0.6842, "step": 9953 }, { "epoch": 0.6744359373941324, "grad_norm": 7.59826135635376, "learning_rate": 8.740091724279555e-05, "loss": 0.9312, "step": 9954 }, { "epoch": 0.6745036926621045, "grad_norm": 6.706602573394775, "learning_rate": 8.739954822369773e-05, "loss": 0.9033, "step": 9955 }, { "epoch": 0.6745714479300766, "grad_norm": 6.8712992668151855, "learning_rate": 8.739817920459991e-05, "loss": 0.8354, "step": 9956 }, { "epoch": 0.6746392031980486, "grad_norm": 4.86088228225708, "learning_rate": 8.739681018550209e-05, "loss": 0.7645, "step": 9957 }, { "epoch": 0.6747069584660207, "grad_norm": 4.672707557678223, "learning_rate": 8.739544116640428e-05, "loss": 0.5008, "step": 9958 }, { "epoch": 0.6747747137339928, "grad_norm": 7.2205634117126465, "learning_rate": 8.739407214730646e-05, "loss": 0.6439, "step": 9959 }, { "epoch": 0.6748424690019649, "grad_norm": 7.193275451660156, "learning_rate": 8.739270312820864e-05, "loss": 0.6493, "step": 9960 }, { "epoch": 0.674910224269937, "grad_norm": 7.060335159301758, "learning_rate": 8.739133410911082e-05, "loss": 1.0086, "step": 9961 }, { "epoch": 0.6749779795379091, "grad_norm": 8.672296524047852, "learning_rate": 8.738996509001302e-05, "loss": 0.8854, "step": 9962 }, { "epoch": 0.6750457348058811, "grad_norm": 5.923908710479736, "learning_rate": 8.73885960709152e-05, "loss": 0.7789, "step": 9963 }, { "epoch": 0.6751134900738532, "grad_norm": 5.861504554748535, "learning_rate": 8.738722705181738e-05, "loss": 0.7219, "step": 9964 }, { "epoch": 0.6751812453418253, "grad_norm": 8.418654441833496, "learning_rate": 8.738585803271956e-05, "loss": 0.7766, "step": 9965 }, { "epoch": 0.6752490006097974, "grad_norm": 6.130760669708252, "learning_rate": 8.738448901362174e-05, "loss": 0.6711, "step": 9966 }, { "epoch": 0.6753167558777695, "grad_norm": 6.2231526374816895, "learning_rate": 8.738311999452393e-05, "loss": 0.764, "step": 9967 }, { "epoch": 0.6753845111457416, "grad_norm": 6.3183417320251465, "learning_rate": 8.738175097542611e-05, "loss": 0.6871, "step": 9968 }, { "epoch": 0.6754522664137137, "grad_norm": 7.590784549713135, "learning_rate": 8.73803819563283e-05, "loss": 0.8666, "step": 9969 }, { "epoch": 0.6755200216816858, "grad_norm": 5.8160600662231445, "learning_rate": 8.737901293723047e-05, "loss": 1.0468, "step": 9970 }, { "epoch": 0.6755877769496579, "grad_norm": 7.236125469207764, "learning_rate": 8.737764391813266e-05, "loss": 0.6594, "step": 9971 }, { "epoch": 0.67565553221763, "grad_norm": 7.984396934509277, "learning_rate": 8.737627489903485e-05, "loss": 0.8579, "step": 9972 }, { "epoch": 0.675723287485602, "grad_norm": 6.760086536407471, "learning_rate": 8.737490587993703e-05, "loss": 0.6263, "step": 9973 }, { "epoch": 0.675791042753574, "grad_norm": 6.506689071655273, "learning_rate": 8.737353686083921e-05, "loss": 0.8551, "step": 9974 }, { "epoch": 0.6758587980215461, "grad_norm": 6.718931674957275, "learning_rate": 8.737216784174139e-05, "loss": 0.8464, "step": 9975 }, { "epoch": 0.6759265532895182, "grad_norm": 7.459061145782471, "learning_rate": 8.737079882264358e-05, "loss": 0.8845, "step": 9976 }, { "epoch": 0.6759943085574903, "grad_norm": 6.885556221008301, "learning_rate": 8.736942980354576e-05, "loss": 0.963, "step": 9977 }, { "epoch": 0.6760620638254624, "grad_norm": 6.1340861320495605, "learning_rate": 8.736806078444794e-05, "loss": 0.8624, "step": 9978 }, { "epoch": 0.6761298190934345, "grad_norm": 6.667585372924805, "learning_rate": 8.736669176535012e-05, "loss": 0.9265, "step": 9979 }, { "epoch": 0.6761975743614066, "grad_norm": 5.923494338989258, "learning_rate": 8.73653227462523e-05, "loss": 0.9644, "step": 9980 }, { "epoch": 0.6762653296293787, "grad_norm": 6.134291648864746, "learning_rate": 8.73639537271545e-05, "loss": 0.821, "step": 9981 }, { "epoch": 0.6763330848973508, "grad_norm": 8.043211936950684, "learning_rate": 8.736258470805668e-05, "loss": 1.1772, "step": 9982 }, { "epoch": 0.6764008401653229, "grad_norm": 5.358396053314209, "learning_rate": 8.736121568895886e-05, "loss": 0.8436, "step": 9983 }, { "epoch": 0.676468595433295, "grad_norm": 6.659102916717529, "learning_rate": 8.735984666986104e-05, "loss": 0.9908, "step": 9984 }, { "epoch": 0.676536350701267, "grad_norm": 5.888321876525879, "learning_rate": 8.735847765076323e-05, "loss": 0.8068, "step": 9985 }, { "epoch": 0.6766041059692391, "grad_norm": 6.159606456756592, "learning_rate": 8.735710863166541e-05, "loss": 0.7639, "step": 9986 }, { "epoch": 0.6766718612372112, "grad_norm": 6.044656276702881, "learning_rate": 8.73557396125676e-05, "loss": 1.0384, "step": 9987 }, { "epoch": 0.6767396165051833, "grad_norm": 6.538012504577637, "learning_rate": 8.735437059346978e-05, "loss": 0.9542, "step": 9988 }, { "epoch": 0.6768073717731553, "grad_norm": 6.6876301765441895, "learning_rate": 8.735300157437196e-05, "loss": 0.8471, "step": 9989 }, { "epoch": 0.6768751270411274, "grad_norm": 7.488297462463379, "learning_rate": 8.735163255527415e-05, "loss": 0.797, "step": 9990 }, { "epoch": 0.6769428823090995, "grad_norm": 5.536472320556641, "learning_rate": 8.735026353617633e-05, "loss": 0.7013, "step": 9991 }, { "epoch": 0.6770106375770716, "grad_norm": 6.263519763946533, "learning_rate": 8.734889451707851e-05, "loss": 0.991, "step": 9992 }, { "epoch": 0.6770783928450437, "grad_norm": 5.862089157104492, "learning_rate": 8.73475254979807e-05, "loss": 0.7879, "step": 9993 }, { "epoch": 0.6771461481130158, "grad_norm": 4.833024501800537, "learning_rate": 8.734615647888288e-05, "loss": 0.7024, "step": 9994 }, { "epoch": 0.6772139033809879, "grad_norm": 6.159411430358887, "learning_rate": 8.734478745978506e-05, "loss": 0.7301, "step": 9995 }, { "epoch": 0.67728165864896, "grad_norm": 5.821317672729492, "learning_rate": 8.734341844068726e-05, "loss": 0.7049, "step": 9996 }, { "epoch": 0.677349413916932, "grad_norm": 6.7138872146606445, "learning_rate": 8.734204942158944e-05, "loss": 0.9978, "step": 9997 }, { "epoch": 0.6774171691849041, "grad_norm": 6.190992832183838, "learning_rate": 8.734068040249162e-05, "loss": 0.8829, "step": 9998 }, { "epoch": 0.6774849244528762, "grad_norm": 6.912006378173828, "learning_rate": 8.733931138339381e-05, "loss": 0.7127, "step": 9999 }, { "epoch": 0.6775526797208483, "grad_norm": 7.386782169342041, "learning_rate": 8.7337942364296e-05, "loss": 0.7233, "step": 10000 }, { "epoch": 0.6776204349888204, "grad_norm": 6.710719585418701, "learning_rate": 8.733657334519817e-05, "loss": 0.8965, "step": 10001 }, { "epoch": 0.6776881902567925, "grad_norm": 5.841215133666992, "learning_rate": 8.733520432610035e-05, "loss": 1.0521, "step": 10002 }, { "epoch": 0.6777559455247646, "grad_norm": 7.529298782348633, "learning_rate": 8.733383530700253e-05, "loss": 0.9908, "step": 10003 }, { "epoch": 0.6778237007927367, "grad_norm": 6.102165699005127, "learning_rate": 8.733246628790473e-05, "loss": 0.7755, "step": 10004 }, { "epoch": 0.6778914560607088, "grad_norm": 5.879965782165527, "learning_rate": 8.733109726880691e-05, "loss": 0.6404, "step": 10005 }, { "epoch": 0.6779592113286808, "grad_norm": 5.979146480560303, "learning_rate": 8.732972824970909e-05, "loss": 0.6719, "step": 10006 }, { "epoch": 0.6780269665966528, "grad_norm": 6.233981609344482, "learning_rate": 8.732835923061127e-05, "loss": 0.7691, "step": 10007 }, { "epoch": 0.6780947218646249, "grad_norm": 5.87860107421875, "learning_rate": 8.732699021151346e-05, "loss": 0.611, "step": 10008 }, { "epoch": 0.678162477132597, "grad_norm": 5.905231952667236, "learning_rate": 8.732562119241564e-05, "loss": 0.8869, "step": 10009 }, { "epoch": 0.6782302324005691, "grad_norm": 5.827250003814697, "learning_rate": 8.732425217331782e-05, "loss": 0.8758, "step": 10010 }, { "epoch": 0.6782979876685412, "grad_norm": 6.833671569824219, "learning_rate": 8.732288315422e-05, "loss": 0.653, "step": 10011 }, { "epoch": 0.6783657429365133, "grad_norm": 6.360918045043945, "learning_rate": 8.732151413512218e-05, "loss": 0.8103, "step": 10012 }, { "epoch": 0.6784334982044854, "grad_norm": 5.429161548614502, "learning_rate": 8.732014511602438e-05, "loss": 0.742, "step": 10013 }, { "epoch": 0.6785012534724575, "grad_norm": 7.001778602600098, "learning_rate": 8.731877609692656e-05, "loss": 0.7378, "step": 10014 }, { "epoch": 0.6785690087404296, "grad_norm": 6.182924270629883, "learning_rate": 8.731740707782874e-05, "loss": 0.9072, "step": 10015 }, { "epoch": 0.6786367640084017, "grad_norm": 6.020389080047607, "learning_rate": 8.731603805873092e-05, "loss": 0.7117, "step": 10016 }, { "epoch": 0.6787045192763738, "grad_norm": 6.754500865936279, "learning_rate": 8.731466903963311e-05, "loss": 1.0786, "step": 10017 }, { "epoch": 0.6787722745443459, "grad_norm": 6.181823253631592, "learning_rate": 8.73133000205353e-05, "loss": 0.9011, "step": 10018 }, { "epoch": 0.678840029812318, "grad_norm": 5.791422367095947, "learning_rate": 8.731193100143747e-05, "loss": 0.6102, "step": 10019 }, { "epoch": 0.67890778508029, "grad_norm": 6.784679412841797, "learning_rate": 8.731056198233965e-05, "loss": 0.7262, "step": 10020 }, { "epoch": 0.6789755403482621, "grad_norm": 6.07905387878418, "learning_rate": 8.730919296324183e-05, "loss": 1.0617, "step": 10021 }, { "epoch": 0.6790432956162341, "grad_norm": 6.993971347808838, "learning_rate": 8.730782394414403e-05, "loss": 0.9212, "step": 10022 }, { "epoch": 0.6791110508842062, "grad_norm": 6.363950729370117, "learning_rate": 8.730645492504621e-05, "loss": 1.0471, "step": 10023 }, { "epoch": 0.6791788061521783, "grad_norm": 7.276193618774414, "learning_rate": 8.730508590594839e-05, "loss": 0.8397, "step": 10024 }, { "epoch": 0.6792465614201504, "grad_norm": 6.6545491218566895, "learning_rate": 8.730371688685057e-05, "loss": 0.6311, "step": 10025 }, { "epoch": 0.6793143166881225, "grad_norm": 4.888131141662598, "learning_rate": 8.730234786775275e-05, "loss": 0.5807, "step": 10026 }, { "epoch": 0.6793820719560946, "grad_norm": 5.920746326446533, "learning_rate": 8.730097884865494e-05, "loss": 0.624, "step": 10027 }, { "epoch": 0.6794498272240667, "grad_norm": 5.996628761291504, "learning_rate": 8.729960982955712e-05, "loss": 0.831, "step": 10028 }, { "epoch": 0.6795175824920388, "grad_norm": 7.457335472106934, "learning_rate": 8.72982408104593e-05, "loss": 0.7734, "step": 10029 }, { "epoch": 0.6795853377600108, "grad_norm": 5.239184856414795, "learning_rate": 8.729687179136148e-05, "loss": 0.6022, "step": 10030 }, { "epoch": 0.6796530930279829, "grad_norm": 8.256555557250977, "learning_rate": 8.729550277226368e-05, "loss": 1.265, "step": 10031 }, { "epoch": 0.679720848295955, "grad_norm": 5.370911121368408, "learning_rate": 8.729413375316586e-05, "loss": 0.847, "step": 10032 }, { "epoch": 0.6797886035639271, "grad_norm": 6.657277584075928, "learning_rate": 8.729276473406804e-05, "loss": 1.0096, "step": 10033 }, { "epoch": 0.6798563588318992, "grad_norm": 7.417891502380371, "learning_rate": 8.729139571497022e-05, "loss": 0.8401, "step": 10034 }, { "epoch": 0.6799241140998713, "grad_norm": 6.959234237670898, "learning_rate": 8.72900266958724e-05, "loss": 0.8181, "step": 10035 }, { "epoch": 0.6799918693678434, "grad_norm": 6.10018253326416, "learning_rate": 8.72886576767746e-05, "loss": 0.7132, "step": 10036 }, { "epoch": 0.6800596246358155, "grad_norm": 6.869701385498047, "learning_rate": 8.728728865767677e-05, "loss": 0.6435, "step": 10037 }, { "epoch": 0.6801273799037875, "grad_norm": 6.735001087188721, "learning_rate": 8.728591963857895e-05, "loss": 0.628, "step": 10038 }, { "epoch": 0.6801951351717596, "grad_norm": 5.84961462020874, "learning_rate": 8.728455061948115e-05, "loss": 0.7503, "step": 10039 }, { "epoch": 0.6802628904397316, "grad_norm": 6.914517879486084, "learning_rate": 8.728318160038333e-05, "loss": 0.8171, "step": 10040 }, { "epoch": 0.6803306457077037, "grad_norm": 8.97480297088623, "learning_rate": 8.728181258128551e-05, "loss": 0.9635, "step": 10041 }, { "epoch": 0.6803984009756758, "grad_norm": 5.28425407409668, "learning_rate": 8.72804435621877e-05, "loss": 0.656, "step": 10042 }, { "epoch": 0.6804661562436479, "grad_norm": 6.638722896575928, "learning_rate": 8.727907454308988e-05, "loss": 0.797, "step": 10043 }, { "epoch": 0.68053391151162, "grad_norm": 9.306808471679688, "learning_rate": 8.727770552399206e-05, "loss": 1.0276, "step": 10044 }, { "epoch": 0.6806016667795921, "grad_norm": 5.549346446990967, "learning_rate": 8.727633650489426e-05, "loss": 0.7556, "step": 10045 }, { "epoch": 0.6806694220475642, "grad_norm": 9.099546432495117, "learning_rate": 8.727496748579644e-05, "loss": 0.9813, "step": 10046 }, { "epoch": 0.6807371773155363, "grad_norm": 6.115594863891602, "learning_rate": 8.727359846669862e-05, "loss": 0.7509, "step": 10047 }, { "epoch": 0.6808049325835084, "grad_norm": 6.244608402252197, "learning_rate": 8.72722294476008e-05, "loss": 0.8274, "step": 10048 }, { "epoch": 0.6808726878514805, "grad_norm": 5.933996677398682, "learning_rate": 8.727086042850298e-05, "loss": 0.9218, "step": 10049 }, { "epoch": 0.6809404431194526, "grad_norm": 6.216856002807617, "learning_rate": 8.726949140940517e-05, "loss": 0.8434, "step": 10050 }, { "epoch": 0.6810081983874247, "grad_norm": 8.29095458984375, "learning_rate": 8.726812239030735e-05, "loss": 0.9376, "step": 10051 }, { "epoch": 0.6810759536553967, "grad_norm": 6.203293323516846, "learning_rate": 8.726675337120953e-05, "loss": 0.865, "step": 10052 }, { "epoch": 0.6811437089233688, "grad_norm": 7.393670082092285, "learning_rate": 8.726538435211171e-05, "loss": 0.9512, "step": 10053 }, { "epoch": 0.6812114641913409, "grad_norm": 5.229176044464111, "learning_rate": 8.726401533301391e-05, "loss": 0.6983, "step": 10054 }, { "epoch": 0.6812792194593129, "grad_norm": 6.016887187957764, "learning_rate": 8.726264631391609e-05, "loss": 0.8935, "step": 10055 }, { "epoch": 0.681346974727285, "grad_norm": 8.189292907714844, "learning_rate": 8.726127729481827e-05, "loss": 0.9643, "step": 10056 }, { "epoch": 0.6814147299952571, "grad_norm": 7.380198001861572, "learning_rate": 8.725990827572045e-05, "loss": 0.9616, "step": 10057 }, { "epoch": 0.6814824852632292, "grad_norm": 5.0216546058654785, "learning_rate": 8.725853925662263e-05, "loss": 0.9492, "step": 10058 }, { "epoch": 0.6815502405312013, "grad_norm": 4.686762809753418, "learning_rate": 8.725717023752482e-05, "loss": 0.7308, "step": 10059 }, { "epoch": 0.6816179957991734, "grad_norm": 7.183378219604492, "learning_rate": 8.7255801218427e-05, "loss": 0.7267, "step": 10060 }, { "epoch": 0.6816857510671455, "grad_norm": 6.899569988250732, "learning_rate": 8.725443219932918e-05, "loss": 1.2609, "step": 10061 }, { "epoch": 0.6817535063351176, "grad_norm": 6.386919021606445, "learning_rate": 8.725306318023136e-05, "loss": 0.8907, "step": 10062 }, { "epoch": 0.6818212616030896, "grad_norm": 6.532101631164551, "learning_rate": 8.725169416113356e-05, "loss": 0.8595, "step": 10063 }, { "epoch": 0.6818890168710617, "grad_norm": 6.360471725463867, "learning_rate": 8.725032514203574e-05, "loss": 0.8667, "step": 10064 }, { "epoch": 0.6819567721390338, "grad_norm": 6.6123480796813965, "learning_rate": 8.724895612293792e-05, "loss": 0.9812, "step": 10065 }, { "epoch": 0.6820245274070059, "grad_norm": 7.219352722167969, "learning_rate": 8.72475871038401e-05, "loss": 0.8871, "step": 10066 }, { "epoch": 0.682092282674978, "grad_norm": 5.686796188354492, "learning_rate": 8.724621808474228e-05, "loss": 0.6971, "step": 10067 }, { "epoch": 0.6821600379429501, "grad_norm": 5.568495273590088, "learning_rate": 8.724484906564447e-05, "loss": 0.6986, "step": 10068 }, { "epoch": 0.6822277932109222, "grad_norm": 6.997779369354248, "learning_rate": 8.724348004654665e-05, "loss": 1.0181, "step": 10069 }, { "epoch": 0.6822955484788943, "grad_norm": 6.177464962005615, "learning_rate": 8.724211102744883e-05, "loss": 0.8675, "step": 10070 }, { "epoch": 0.6823633037468663, "grad_norm": 5.105398654937744, "learning_rate": 8.724074200835101e-05, "loss": 0.7752, "step": 10071 }, { "epoch": 0.6824310590148384, "grad_norm": 5.883255481719971, "learning_rate": 8.723937298925321e-05, "loss": 0.8301, "step": 10072 }, { "epoch": 0.6824988142828105, "grad_norm": 5.932136058807373, "learning_rate": 8.723800397015539e-05, "loss": 0.7768, "step": 10073 }, { "epoch": 0.6825665695507825, "grad_norm": 6.31164026260376, "learning_rate": 8.723663495105757e-05, "loss": 1.0803, "step": 10074 }, { "epoch": 0.6826343248187546, "grad_norm": 4.6186089515686035, "learning_rate": 8.723526593195975e-05, "loss": 0.7403, "step": 10075 }, { "epoch": 0.6827020800867267, "grad_norm": 6.847713470458984, "learning_rate": 8.723389691286193e-05, "loss": 0.8813, "step": 10076 }, { "epoch": 0.6827698353546988, "grad_norm": 6.186002254486084, "learning_rate": 8.723252789376412e-05, "loss": 0.7373, "step": 10077 }, { "epoch": 0.6828375906226709, "grad_norm": 5.436232089996338, "learning_rate": 8.72311588746663e-05, "loss": 0.8996, "step": 10078 }, { "epoch": 0.682905345890643, "grad_norm": 7.013981819152832, "learning_rate": 8.722978985556848e-05, "loss": 0.6978, "step": 10079 }, { "epoch": 0.6829731011586151, "grad_norm": 6.762590408325195, "learning_rate": 8.722842083647066e-05, "loss": 0.8004, "step": 10080 }, { "epoch": 0.6830408564265872, "grad_norm": 6.398361682891846, "learning_rate": 8.722705181737284e-05, "loss": 0.8928, "step": 10081 }, { "epoch": 0.6831086116945593, "grad_norm": 5.5384111404418945, "learning_rate": 8.722568279827504e-05, "loss": 0.6761, "step": 10082 }, { "epoch": 0.6831763669625314, "grad_norm": 6.1554179191589355, "learning_rate": 8.722431377917722e-05, "loss": 0.8894, "step": 10083 }, { "epoch": 0.6832441222305035, "grad_norm": 7.378477096557617, "learning_rate": 8.72229447600794e-05, "loss": 0.776, "step": 10084 }, { "epoch": 0.6833118774984756, "grad_norm": 5.925946235656738, "learning_rate": 8.72215757409816e-05, "loss": 0.978, "step": 10085 }, { "epoch": 0.6833796327664476, "grad_norm": 9.17393684387207, "learning_rate": 8.722020672188377e-05, "loss": 0.9067, "step": 10086 }, { "epoch": 0.6834473880344196, "grad_norm": 5.253411293029785, "learning_rate": 8.721883770278595e-05, "loss": 0.822, "step": 10087 }, { "epoch": 0.6835151433023917, "grad_norm": 5.1418046951293945, "learning_rate": 8.721746868368815e-05, "loss": 0.7038, "step": 10088 }, { "epoch": 0.6835828985703638, "grad_norm": 6.0158281326293945, "learning_rate": 8.721609966459033e-05, "loss": 1.0841, "step": 10089 }, { "epoch": 0.6836506538383359, "grad_norm": 5.677688121795654, "learning_rate": 8.721473064549251e-05, "loss": 0.7498, "step": 10090 }, { "epoch": 0.683718409106308, "grad_norm": 7.7817254066467285, "learning_rate": 8.72133616263947e-05, "loss": 0.5788, "step": 10091 }, { "epoch": 0.6837861643742801, "grad_norm": 5.363152503967285, "learning_rate": 8.721199260729688e-05, "loss": 0.7424, "step": 10092 }, { "epoch": 0.6838539196422522, "grad_norm": 6.539010524749756, "learning_rate": 8.721062358819906e-05, "loss": 0.8402, "step": 10093 }, { "epoch": 0.6839216749102243, "grad_norm": 5.907912254333496, "learning_rate": 8.720925456910124e-05, "loss": 0.9475, "step": 10094 }, { "epoch": 0.6839894301781964, "grad_norm": 6.317841529846191, "learning_rate": 8.720788555000344e-05, "loss": 0.7352, "step": 10095 }, { "epoch": 0.6840571854461684, "grad_norm": 6.071649074554443, "learning_rate": 8.720651653090562e-05, "loss": 0.7539, "step": 10096 }, { "epoch": 0.6841249407141405, "grad_norm": 7.052052974700928, "learning_rate": 8.72051475118078e-05, "loss": 0.788, "step": 10097 }, { "epoch": 0.6841926959821126, "grad_norm": 5.975690841674805, "learning_rate": 8.720377849270998e-05, "loss": 0.8239, "step": 10098 }, { "epoch": 0.6842604512500847, "grad_norm": 5.96523904800415, "learning_rate": 8.720240947361216e-05, "loss": 0.9525, "step": 10099 }, { "epoch": 0.6843282065180568, "grad_norm": 6.296563148498535, "learning_rate": 8.720104045451435e-05, "loss": 0.691, "step": 10100 }, { "epoch": 0.6843959617860289, "grad_norm": 5.338788986206055, "learning_rate": 8.719967143541653e-05, "loss": 0.7297, "step": 10101 }, { "epoch": 0.684463717054001, "grad_norm": 6.04310417175293, "learning_rate": 8.719830241631871e-05, "loss": 0.8976, "step": 10102 }, { "epoch": 0.684531472321973, "grad_norm": 7.260922431945801, "learning_rate": 8.71969333972209e-05, "loss": 0.804, "step": 10103 }, { "epoch": 0.6845992275899451, "grad_norm": 7.797060489654541, "learning_rate": 8.719556437812307e-05, "loss": 1.1672, "step": 10104 }, { "epoch": 0.6846669828579172, "grad_norm": 4.863615989685059, "learning_rate": 8.719419535902527e-05, "loss": 0.7793, "step": 10105 }, { "epoch": 0.6847347381258893, "grad_norm": 6.105317115783691, "learning_rate": 8.719282633992745e-05, "loss": 0.7928, "step": 10106 }, { "epoch": 0.6848024933938613, "grad_norm": 5.737043380737305, "learning_rate": 8.719145732082963e-05, "loss": 0.627, "step": 10107 }, { "epoch": 0.6848702486618334, "grad_norm": 5.305082321166992, "learning_rate": 8.719008830173181e-05, "loss": 0.6404, "step": 10108 }, { "epoch": 0.6849380039298055, "grad_norm": 6.310640335083008, "learning_rate": 8.7188719282634e-05, "loss": 1.0716, "step": 10109 }, { "epoch": 0.6850057591977776, "grad_norm": 6.286160469055176, "learning_rate": 8.718735026353618e-05, "loss": 0.8632, "step": 10110 }, { "epoch": 0.6850735144657497, "grad_norm": 6.600961208343506, "learning_rate": 8.718598124443836e-05, "loss": 0.7053, "step": 10111 }, { "epoch": 0.6851412697337218, "grad_norm": 7.745927333831787, "learning_rate": 8.718461222534054e-05, "loss": 1.3105, "step": 10112 }, { "epoch": 0.6852090250016939, "grad_norm": 7.073805332183838, "learning_rate": 8.718324320624272e-05, "loss": 1.0302, "step": 10113 }, { "epoch": 0.685276780269666, "grad_norm": 6.142061233520508, "learning_rate": 8.718187418714492e-05, "loss": 0.7433, "step": 10114 }, { "epoch": 0.6853445355376381, "grad_norm": 6.279247760772705, "learning_rate": 8.71805051680471e-05, "loss": 0.9076, "step": 10115 }, { "epoch": 0.6854122908056102, "grad_norm": 6.235780239105225, "learning_rate": 8.717913614894928e-05, "loss": 1.0124, "step": 10116 }, { "epoch": 0.6854800460735823, "grad_norm": 7.218427658081055, "learning_rate": 8.717776712985146e-05, "loss": 0.8519, "step": 10117 }, { "epoch": 0.6855478013415544, "grad_norm": 5.481386661529541, "learning_rate": 8.717639811075365e-05, "loss": 0.6997, "step": 10118 }, { "epoch": 0.6856155566095264, "grad_norm": 6.176963806152344, "learning_rate": 8.717502909165583e-05, "loss": 0.7735, "step": 10119 }, { "epoch": 0.6856833118774984, "grad_norm": 5.452063083648682, "learning_rate": 8.717366007255801e-05, "loss": 0.6307, "step": 10120 }, { "epoch": 0.6857510671454705, "grad_norm": 8.103320121765137, "learning_rate": 8.71722910534602e-05, "loss": 0.794, "step": 10121 }, { "epoch": 0.6858188224134426, "grad_norm": 7.873292922973633, "learning_rate": 8.717092203436237e-05, "loss": 0.8941, "step": 10122 }, { "epoch": 0.6858865776814147, "grad_norm": 6.97474479675293, "learning_rate": 8.716955301526457e-05, "loss": 0.627, "step": 10123 }, { "epoch": 0.6859543329493868, "grad_norm": 7.643387794494629, "learning_rate": 8.716818399616675e-05, "loss": 0.8326, "step": 10124 }, { "epoch": 0.6860220882173589, "grad_norm": 8.497008323669434, "learning_rate": 8.716681497706893e-05, "loss": 0.959, "step": 10125 }, { "epoch": 0.686089843485331, "grad_norm": 6.50831413269043, "learning_rate": 8.716544595797111e-05, "loss": 0.6411, "step": 10126 }, { "epoch": 0.6861575987533031, "grad_norm": 7.354664325714111, "learning_rate": 8.71640769388733e-05, "loss": 0.9987, "step": 10127 }, { "epoch": 0.6862253540212752, "grad_norm": 7.188365936279297, "learning_rate": 8.716270791977548e-05, "loss": 0.9559, "step": 10128 }, { "epoch": 0.6862931092892472, "grad_norm": 6.465545177459717, "learning_rate": 8.716133890067766e-05, "loss": 0.9607, "step": 10129 }, { "epoch": 0.6863608645572193, "grad_norm": 5.860640048980713, "learning_rate": 8.715996988157984e-05, "loss": 1.0831, "step": 10130 }, { "epoch": 0.6864286198251914, "grad_norm": 5.8733296394348145, "learning_rate": 8.715860086248204e-05, "loss": 0.94, "step": 10131 }, { "epoch": 0.6864963750931635, "grad_norm": 6.498860836029053, "learning_rate": 8.715723184338422e-05, "loss": 0.9037, "step": 10132 }, { "epoch": 0.6865641303611356, "grad_norm": 6.163070201873779, "learning_rate": 8.71558628242864e-05, "loss": 0.7879, "step": 10133 }, { "epoch": 0.6866318856291077, "grad_norm": 5.94657564163208, "learning_rate": 8.715449380518859e-05, "loss": 0.8599, "step": 10134 }, { "epoch": 0.6866996408970798, "grad_norm": 6.023927211761475, "learning_rate": 8.715312478609077e-05, "loss": 0.7475, "step": 10135 }, { "epoch": 0.6867673961650518, "grad_norm": 6.270586967468262, "learning_rate": 8.715175576699295e-05, "loss": 0.754, "step": 10136 }, { "epoch": 0.6868351514330239, "grad_norm": 6.190057277679443, "learning_rate": 8.715038674789515e-05, "loss": 0.6548, "step": 10137 }, { "epoch": 0.686902906700996, "grad_norm": 5.793675899505615, "learning_rate": 8.714901772879733e-05, "loss": 0.5634, "step": 10138 }, { "epoch": 0.686970661968968, "grad_norm": 6.0643310546875, "learning_rate": 8.714764870969951e-05, "loss": 1.0238, "step": 10139 }, { "epoch": 0.6870384172369401, "grad_norm": 5.128119945526123, "learning_rate": 8.714627969060169e-05, "loss": 0.6071, "step": 10140 }, { "epoch": 0.6871061725049122, "grad_norm": 6.435021877288818, "learning_rate": 8.714491067150388e-05, "loss": 0.7108, "step": 10141 }, { "epoch": 0.6871739277728843, "grad_norm": 7.8082122802734375, "learning_rate": 8.714354165240606e-05, "loss": 1.0643, "step": 10142 }, { "epoch": 0.6872416830408564, "grad_norm": 8.007335662841797, "learning_rate": 8.714217263330824e-05, "loss": 0.962, "step": 10143 }, { "epoch": 0.6873094383088285, "grad_norm": 6.111318588256836, "learning_rate": 8.714080361421042e-05, "loss": 0.8697, "step": 10144 }, { "epoch": 0.6873771935768006, "grad_norm": 5.64456844329834, "learning_rate": 8.71394345951126e-05, "loss": 1.128, "step": 10145 }, { "epoch": 0.6874449488447727, "grad_norm": 7.482577323913574, "learning_rate": 8.71380655760148e-05, "loss": 0.7496, "step": 10146 }, { "epoch": 0.6875127041127448, "grad_norm": 5.837367534637451, "learning_rate": 8.713669655691698e-05, "loss": 0.7851, "step": 10147 }, { "epoch": 0.6875804593807169, "grad_norm": 5.765466213226318, "learning_rate": 8.713532753781916e-05, "loss": 0.718, "step": 10148 }, { "epoch": 0.687648214648689, "grad_norm": 7.491219520568848, "learning_rate": 8.713395851872134e-05, "loss": 1.0697, "step": 10149 }, { "epoch": 0.6877159699166611, "grad_norm": 6.724207401275635, "learning_rate": 8.713258949962353e-05, "loss": 0.7963, "step": 10150 }, { "epoch": 0.6877837251846332, "grad_norm": 6.340019226074219, "learning_rate": 8.713122048052571e-05, "loss": 1.0246, "step": 10151 }, { "epoch": 0.6878514804526051, "grad_norm": 4.966742038726807, "learning_rate": 8.712985146142789e-05, "loss": 0.9185, "step": 10152 }, { "epoch": 0.6879192357205772, "grad_norm": 6.308014869689941, "learning_rate": 8.712848244233007e-05, "loss": 0.9262, "step": 10153 }, { "epoch": 0.6879869909885493, "grad_norm": 7.576474189758301, "learning_rate": 8.712711342323225e-05, "loss": 0.7432, "step": 10154 }, { "epoch": 0.6880547462565214, "grad_norm": 5.722362041473389, "learning_rate": 8.712574440413445e-05, "loss": 0.76, "step": 10155 }, { "epoch": 0.6881225015244935, "grad_norm": 4.6947197914123535, "learning_rate": 8.712437538503663e-05, "loss": 0.7429, "step": 10156 }, { "epoch": 0.6881902567924656, "grad_norm": 6.494797229766846, "learning_rate": 8.712300636593881e-05, "loss": 0.9554, "step": 10157 }, { "epoch": 0.6882580120604377, "grad_norm": 7.0943403244018555, "learning_rate": 8.712163734684099e-05, "loss": 0.9698, "step": 10158 }, { "epoch": 0.6883257673284098, "grad_norm": 5.764694690704346, "learning_rate": 8.712026832774317e-05, "loss": 0.5992, "step": 10159 }, { "epoch": 0.6883935225963819, "grad_norm": 8.111281394958496, "learning_rate": 8.711889930864536e-05, "loss": 0.9348, "step": 10160 }, { "epoch": 0.688461277864354, "grad_norm": 6.08704948425293, "learning_rate": 8.711753028954754e-05, "loss": 0.7621, "step": 10161 }, { "epoch": 0.688529033132326, "grad_norm": 7.329418659210205, "learning_rate": 8.711616127044972e-05, "loss": 0.8404, "step": 10162 }, { "epoch": 0.6885967884002981, "grad_norm": 5.368319988250732, "learning_rate": 8.71147922513519e-05, "loss": 0.6699, "step": 10163 }, { "epoch": 0.6886645436682702, "grad_norm": 4.94074821472168, "learning_rate": 8.71134232322541e-05, "loss": 0.7722, "step": 10164 }, { "epoch": 0.6887322989362423, "grad_norm": 7.2699408531188965, "learning_rate": 8.711205421315628e-05, "loss": 0.9826, "step": 10165 }, { "epoch": 0.6888000542042144, "grad_norm": 7.965369701385498, "learning_rate": 8.711068519405846e-05, "loss": 1.0658, "step": 10166 }, { "epoch": 0.6888678094721865, "grad_norm": 6.148140907287598, "learning_rate": 8.710931617496064e-05, "loss": 0.7169, "step": 10167 }, { "epoch": 0.6889355647401586, "grad_norm": 7.394513130187988, "learning_rate": 8.710794715586282e-05, "loss": 1.0415, "step": 10168 }, { "epoch": 0.6890033200081306, "grad_norm": 6.450402736663818, "learning_rate": 8.710657813676501e-05, "loss": 0.8588, "step": 10169 }, { "epoch": 0.6890710752761027, "grad_norm": 7.909549236297607, "learning_rate": 8.71052091176672e-05, "loss": 0.8389, "step": 10170 }, { "epoch": 0.6891388305440748, "grad_norm": 6.7112603187561035, "learning_rate": 8.710384009856937e-05, "loss": 0.8337, "step": 10171 }, { "epoch": 0.6892065858120469, "grad_norm": 5.394613265991211, "learning_rate": 8.710247107947155e-05, "loss": 0.6975, "step": 10172 }, { "epoch": 0.689274341080019, "grad_norm": 7.6131911277771, "learning_rate": 8.710110206037375e-05, "loss": 0.9802, "step": 10173 }, { "epoch": 0.689342096347991, "grad_norm": 5.598437786102295, "learning_rate": 8.709973304127593e-05, "loss": 0.6925, "step": 10174 }, { "epoch": 0.6894098516159631, "grad_norm": 7.252137660980225, "learning_rate": 8.709836402217811e-05, "loss": 0.9988, "step": 10175 }, { "epoch": 0.6894776068839352, "grad_norm": 5.04716682434082, "learning_rate": 8.709699500308029e-05, "loss": 0.6327, "step": 10176 }, { "epoch": 0.6895453621519073, "grad_norm": 5.729875564575195, "learning_rate": 8.709562598398248e-05, "loss": 0.7374, "step": 10177 }, { "epoch": 0.6896131174198794, "grad_norm": 6.3525166511535645, "learning_rate": 8.709425696488466e-05, "loss": 0.9111, "step": 10178 }, { "epoch": 0.6896808726878515, "grad_norm": 8.86587905883789, "learning_rate": 8.709288794578684e-05, "loss": 0.6059, "step": 10179 }, { "epoch": 0.6897486279558236, "grad_norm": 6.739536762237549, "learning_rate": 8.709151892668904e-05, "loss": 0.6525, "step": 10180 }, { "epoch": 0.6898163832237957, "grad_norm": 6.119062900543213, "learning_rate": 8.709014990759122e-05, "loss": 0.8521, "step": 10181 }, { "epoch": 0.6898841384917678, "grad_norm": 6.746237754821777, "learning_rate": 8.70887808884934e-05, "loss": 0.7897, "step": 10182 }, { "epoch": 0.6899518937597399, "grad_norm": 6.002828121185303, "learning_rate": 8.708741186939559e-05, "loss": 0.7147, "step": 10183 }, { "epoch": 0.690019649027712, "grad_norm": 7.710058212280273, "learning_rate": 8.708604285029777e-05, "loss": 0.6674, "step": 10184 }, { "epoch": 0.6900874042956839, "grad_norm": 7.451826572418213, "learning_rate": 8.708467383119995e-05, "loss": 0.8059, "step": 10185 }, { "epoch": 0.690155159563656, "grad_norm": 7.2232465744018555, "learning_rate": 8.708330481210213e-05, "loss": 0.8831, "step": 10186 }, { "epoch": 0.6902229148316281, "grad_norm": 6.742265701293945, "learning_rate": 8.708193579300433e-05, "loss": 0.851, "step": 10187 }, { "epoch": 0.6902906700996002, "grad_norm": 6.47482967376709, "learning_rate": 8.708056677390651e-05, "loss": 0.7428, "step": 10188 }, { "epoch": 0.6903584253675723, "grad_norm": 6.393016815185547, "learning_rate": 8.707919775480869e-05, "loss": 0.9357, "step": 10189 }, { "epoch": 0.6904261806355444, "grad_norm": 5.898126602172852, "learning_rate": 8.707782873571087e-05, "loss": 0.7807, "step": 10190 }, { "epoch": 0.6904939359035165, "grad_norm": 7.400199890136719, "learning_rate": 8.707645971661305e-05, "loss": 0.9482, "step": 10191 }, { "epoch": 0.6905616911714886, "grad_norm": 8.556063652038574, "learning_rate": 8.707509069751524e-05, "loss": 0.7055, "step": 10192 }, { "epoch": 0.6906294464394607, "grad_norm": 7.6027936935424805, "learning_rate": 8.707372167841742e-05, "loss": 0.9158, "step": 10193 }, { "epoch": 0.6906972017074328, "grad_norm": 6.305631637573242, "learning_rate": 8.70723526593196e-05, "loss": 0.8744, "step": 10194 }, { "epoch": 0.6907649569754049, "grad_norm": 5.370072841644287, "learning_rate": 8.707098364022178e-05, "loss": 0.7291, "step": 10195 }, { "epoch": 0.690832712243377, "grad_norm": 6.723821640014648, "learning_rate": 8.706961462112398e-05, "loss": 0.9333, "step": 10196 }, { "epoch": 0.690900467511349, "grad_norm": 6.01531982421875, "learning_rate": 8.706824560202616e-05, "loss": 0.6833, "step": 10197 }, { "epoch": 0.6909682227793211, "grad_norm": 7.747717380523682, "learning_rate": 8.706687658292834e-05, "loss": 1.0669, "step": 10198 }, { "epoch": 0.6910359780472932, "grad_norm": 6.549305438995361, "learning_rate": 8.706550756383052e-05, "loss": 0.6726, "step": 10199 }, { "epoch": 0.6911037333152653, "grad_norm": 5.983778476715088, "learning_rate": 8.70641385447327e-05, "loss": 0.7065, "step": 10200 }, { "epoch": 0.6911714885832373, "grad_norm": 6.709543704986572, "learning_rate": 8.706276952563489e-05, "loss": 0.7668, "step": 10201 }, { "epoch": 0.6912392438512094, "grad_norm": 6.432425498962402, "learning_rate": 8.706140050653707e-05, "loss": 0.8235, "step": 10202 }, { "epoch": 0.6913069991191815, "grad_norm": 6.770932197570801, "learning_rate": 8.706003148743925e-05, "loss": 0.9328, "step": 10203 }, { "epoch": 0.6913747543871536, "grad_norm": 6.075129508972168, "learning_rate": 8.705866246834143e-05, "loss": 0.6302, "step": 10204 }, { "epoch": 0.6914425096551257, "grad_norm": 5.651463985443115, "learning_rate": 8.705729344924363e-05, "loss": 0.8457, "step": 10205 }, { "epoch": 0.6915102649230978, "grad_norm": 4.9870524406433105, "learning_rate": 8.705592443014581e-05, "loss": 0.685, "step": 10206 }, { "epoch": 0.6915780201910698, "grad_norm": 5.86956787109375, "learning_rate": 8.705455541104799e-05, "loss": 0.9293, "step": 10207 }, { "epoch": 0.6916457754590419, "grad_norm": 6.015864849090576, "learning_rate": 8.705318639195017e-05, "loss": 0.7802, "step": 10208 }, { "epoch": 0.691713530727014, "grad_norm": 6.092733383178711, "learning_rate": 8.705181737285235e-05, "loss": 0.9689, "step": 10209 }, { "epoch": 0.6917812859949861, "grad_norm": 5.461453437805176, "learning_rate": 8.705044835375454e-05, "loss": 0.8396, "step": 10210 }, { "epoch": 0.6918490412629582, "grad_norm": 5.914142608642578, "learning_rate": 8.704907933465672e-05, "loss": 0.7936, "step": 10211 }, { "epoch": 0.6919167965309303, "grad_norm": 7.034205436706543, "learning_rate": 8.70477103155589e-05, "loss": 0.9324, "step": 10212 }, { "epoch": 0.6919845517989024, "grad_norm": 5.621762275695801, "learning_rate": 8.704634129646108e-05, "loss": 0.7763, "step": 10213 }, { "epoch": 0.6920523070668745, "grad_norm": 6.53078031539917, "learning_rate": 8.704497227736326e-05, "loss": 0.9933, "step": 10214 }, { "epoch": 0.6921200623348466, "grad_norm": 6.093494415283203, "learning_rate": 8.704360325826546e-05, "loss": 0.8164, "step": 10215 }, { "epoch": 0.6921878176028187, "grad_norm": 5.476284503936768, "learning_rate": 8.704223423916764e-05, "loss": 0.8249, "step": 10216 }, { "epoch": 0.6922555728707908, "grad_norm": 8.038809776306152, "learning_rate": 8.704086522006982e-05, "loss": 1.1032, "step": 10217 }, { "epoch": 0.6923233281387627, "grad_norm": 6.303304672241211, "learning_rate": 8.7039496200972e-05, "loss": 0.8769, "step": 10218 }, { "epoch": 0.6923910834067348, "grad_norm": 5.814499855041504, "learning_rate": 8.703812718187419e-05, "loss": 0.9121, "step": 10219 }, { "epoch": 0.6924588386747069, "grad_norm": 6.704540729522705, "learning_rate": 8.703675816277637e-05, "loss": 1.005, "step": 10220 }, { "epoch": 0.692526593942679, "grad_norm": 5.562582969665527, "learning_rate": 8.703538914367855e-05, "loss": 0.7873, "step": 10221 }, { "epoch": 0.6925943492106511, "grad_norm": 6.583497524261475, "learning_rate": 8.703402012458073e-05, "loss": 0.9385, "step": 10222 }, { "epoch": 0.6926621044786232, "grad_norm": 6.376049518585205, "learning_rate": 8.703265110548291e-05, "loss": 0.9021, "step": 10223 }, { "epoch": 0.6927298597465953, "grad_norm": 5.302101135253906, "learning_rate": 8.703128208638511e-05, "loss": 0.917, "step": 10224 }, { "epoch": 0.6927976150145674, "grad_norm": 7.336282730102539, "learning_rate": 8.702991306728729e-05, "loss": 0.6929, "step": 10225 }, { "epoch": 0.6928653702825395, "grad_norm": 6.04905366897583, "learning_rate": 8.702854404818948e-05, "loss": 0.7451, "step": 10226 }, { "epoch": 0.6929331255505116, "grad_norm": 5.474672317504883, "learning_rate": 8.702717502909166e-05, "loss": 0.7867, "step": 10227 }, { "epoch": 0.6930008808184837, "grad_norm": 5.713932037353516, "learning_rate": 8.702580600999384e-05, "loss": 0.7569, "step": 10228 }, { "epoch": 0.6930686360864557, "grad_norm": 7.172578811645508, "learning_rate": 8.702443699089604e-05, "loss": 0.7756, "step": 10229 }, { "epoch": 0.6931363913544278, "grad_norm": 5.1826019287109375, "learning_rate": 8.702306797179822e-05, "loss": 0.8408, "step": 10230 }, { "epoch": 0.6932041466223999, "grad_norm": 8.113396644592285, "learning_rate": 8.70216989527004e-05, "loss": 0.8555, "step": 10231 }, { "epoch": 0.693271901890372, "grad_norm": 5.396690845489502, "learning_rate": 8.702032993360258e-05, "loss": 0.8125, "step": 10232 }, { "epoch": 0.6933396571583441, "grad_norm": 5.951786518096924, "learning_rate": 8.701896091450477e-05, "loss": 0.8391, "step": 10233 }, { "epoch": 0.6934074124263161, "grad_norm": 7.0163774490356445, "learning_rate": 8.701759189540695e-05, "loss": 0.853, "step": 10234 }, { "epoch": 0.6934751676942882, "grad_norm": 7.563843727111816, "learning_rate": 8.701622287630913e-05, "loss": 0.9711, "step": 10235 }, { "epoch": 0.6935429229622603, "grad_norm": 5.8124284744262695, "learning_rate": 8.701485385721131e-05, "loss": 0.7018, "step": 10236 }, { "epoch": 0.6936106782302324, "grad_norm": 6.22074556350708, "learning_rate": 8.701348483811349e-05, "loss": 0.7756, "step": 10237 }, { "epoch": 0.6936784334982045, "grad_norm": 5.601717472076416, "learning_rate": 8.701211581901569e-05, "loss": 0.7832, "step": 10238 }, { "epoch": 0.6937461887661766, "grad_norm": 7.209207534790039, "learning_rate": 8.701074679991787e-05, "loss": 0.7866, "step": 10239 }, { "epoch": 0.6938139440341486, "grad_norm": 5.176044940948486, "learning_rate": 8.700937778082005e-05, "loss": 0.6875, "step": 10240 }, { "epoch": 0.6938816993021207, "grad_norm": 6.441755771636963, "learning_rate": 8.700800876172223e-05, "loss": 0.6563, "step": 10241 }, { "epoch": 0.6939494545700928, "grad_norm": 5.935150146484375, "learning_rate": 8.700663974262442e-05, "loss": 0.881, "step": 10242 }, { "epoch": 0.6940172098380649, "grad_norm": 6.082694053649902, "learning_rate": 8.70052707235266e-05, "loss": 0.7059, "step": 10243 }, { "epoch": 0.694084965106037, "grad_norm": 7.9285383224487305, "learning_rate": 8.700390170442878e-05, "loss": 0.8529, "step": 10244 }, { "epoch": 0.6941527203740091, "grad_norm": 6.027041435241699, "learning_rate": 8.700253268533096e-05, "loss": 0.7919, "step": 10245 }, { "epoch": 0.6942204756419812, "grad_norm": 6.956554889678955, "learning_rate": 8.700116366623314e-05, "loss": 0.8655, "step": 10246 }, { "epoch": 0.6942882309099533, "grad_norm": 6.508672714233398, "learning_rate": 8.699979464713534e-05, "loss": 0.7306, "step": 10247 }, { "epoch": 0.6943559861779254, "grad_norm": 6.117376804351807, "learning_rate": 8.699842562803752e-05, "loss": 0.7016, "step": 10248 }, { "epoch": 0.6944237414458975, "grad_norm": 5.537294864654541, "learning_rate": 8.69970566089397e-05, "loss": 0.8452, "step": 10249 }, { "epoch": 0.6944914967138694, "grad_norm": 6.4718241691589355, "learning_rate": 8.699568758984188e-05, "loss": 0.9693, "step": 10250 }, { "epoch": 0.6945592519818415, "grad_norm": 6.249986171722412, "learning_rate": 8.699431857074407e-05, "loss": 0.8, "step": 10251 }, { "epoch": 0.6946270072498136, "grad_norm": 5.435842037200928, "learning_rate": 8.699294955164625e-05, "loss": 0.684, "step": 10252 }, { "epoch": 0.6946947625177857, "grad_norm": 7.15748405456543, "learning_rate": 8.699158053254843e-05, "loss": 0.9773, "step": 10253 }, { "epoch": 0.6947625177857578, "grad_norm": 6.881677150726318, "learning_rate": 8.699021151345061e-05, "loss": 0.8775, "step": 10254 }, { "epoch": 0.6948302730537299, "grad_norm": 4.51616096496582, "learning_rate": 8.69888424943528e-05, "loss": 0.615, "step": 10255 }, { "epoch": 0.694898028321702, "grad_norm": 6.824566841125488, "learning_rate": 8.698747347525499e-05, "loss": 0.7369, "step": 10256 }, { "epoch": 0.6949657835896741, "grad_norm": 5.30488395690918, "learning_rate": 8.698610445615717e-05, "loss": 0.7246, "step": 10257 }, { "epoch": 0.6950335388576462, "grad_norm": 7.59017276763916, "learning_rate": 8.698473543705935e-05, "loss": 0.6782, "step": 10258 }, { "epoch": 0.6951012941256183, "grad_norm": 7.0920867919921875, "learning_rate": 8.698336641796153e-05, "loss": 0.8515, "step": 10259 }, { "epoch": 0.6951690493935904, "grad_norm": 6.791457653045654, "learning_rate": 8.698199739886372e-05, "loss": 0.761, "step": 10260 }, { "epoch": 0.6952368046615625, "grad_norm": 5.619175910949707, "learning_rate": 8.69806283797659e-05, "loss": 1.08, "step": 10261 }, { "epoch": 0.6953045599295345, "grad_norm": 6.623924732208252, "learning_rate": 8.697925936066808e-05, "loss": 0.8813, "step": 10262 }, { "epoch": 0.6953723151975066, "grad_norm": 5.582348823547363, "learning_rate": 8.697789034157026e-05, "loss": 0.8114, "step": 10263 }, { "epoch": 0.6954400704654787, "grad_norm": 6.643357753753662, "learning_rate": 8.697652132247244e-05, "loss": 0.8595, "step": 10264 }, { "epoch": 0.6955078257334508, "grad_norm": 5.2722296714782715, "learning_rate": 8.697515230337464e-05, "loss": 0.757, "step": 10265 }, { "epoch": 0.6955755810014229, "grad_norm": 6.885534286499023, "learning_rate": 8.697378328427682e-05, "loss": 0.9969, "step": 10266 }, { "epoch": 0.6956433362693949, "grad_norm": 7.001368522644043, "learning_rate": 8.6972414265179e-05, "loss": 0.9588, "step": 10267 }, { "epoch": 0.695711091537367, "grad_norm": 7.451569557189941, "learning_rate": 8.697104524608118e-05, "loss": 0.7535, "step": 10268 }, { "epoch": 0.6957788468053391, "grad_norm": 7.196292877197266, "learning_rate": 8.696967622698336e-05, "loss": 1.1084, "step": 10269 }, { "epoch": 0.6958466020733112, "grad_norm": 7.165349006652832, "learning_rate": 8.696830720788555e-05, "loss": 1.0226, "step": 10270 }, { "epoch": 0.6959143573412833, "grad_norm": 6.453275680541992, "learning_rate": 8.696693818878773e-05, "loss": 0.6691, "step": 10271 }, { "epoch": 0.6959821126092554, "grad_norm": 7.4177069664001465, "learning_rate": 8.696556916968991e-05, "loss": 0.6886, "step": 10272 }, { "epoch": 0.6960498678772274, "grad_norm": 5.137477874755859, "learning_rate": 8.696420015059211e-05, "loss": 0.9635, "step": 10273 }, { "epoch": 0.6961176231451995, "grad_norm": 7.988467693328857, "learning_rate": 8.696283113149429e-05, "loss": 0.7967, "step": 10274 }, { "epoch": 0.6961853784131716, "grad_norm": 7.050410747528076, "learning_rate": 8.696146211239647e-05, "loss": 1.0407, "step": 10275 }, { "epoch": 0.6962531336811437, "grad_norm": 6.711641311645508, "learning_rate": 8.696009309329866e-05, "loss": 0.8607, "step": 10276 }, { "epoch": 0.6963208889491158, "grad_norm": 4.641478061676025, "learning_rate": 8.695872407420084e-05, "loss": 0.6665, "step": 10277 }, { "epoch": 0.6963886442170879, "grad_norm": 5.737218379974365, "learning_rate": 8.695735505510302e-05, "loss": 0.6503, "step": 10278 }, { "epoch": 0.69645639948506, "grad_norm": 5.518801212310791, "learning_rate": 8.695598603600522e-05, "loss": 0.9215, "step": 10279 }, { "epoch": 0.6965241547530321, "grad_norm": 6.198950290679932, "learning_rate": 8.69546170169074e-05, "loss": 0.6259, "step": 10280 }, { "epoch": 0.6965919100210042, "grad_norm": 7.505566596984863, "learning_rate": 8.695324799780958e-05, "loss": 0.7902, "step": 10281 }, { "epoch": 0.6966596652889763, "grad_norm": 6.407822132110596, "learning_rate": 8.695187897871176e-05, "loss": 0.7535, "step": 10282 }, { "epoch": 0.6967274205569483, "grad_norm": 7.691595554351807, "learning_rate": 8.695050995961395e-05, "loss": 0.7941, "step": 10283 }, { "epoch": 0.6967951758249203, "grad_norm": 5.803621292114258, "learning_rate": 8.694914094051613e-05, "loss": 0.6529, "step": 10284 }, { "epoch": 0.6968629310928924, "grad_norm": 6.0364580154418945, "learning_rate": 8.694777192141831e-05, "loss": 0.6357, "step": 10285 }, { "epoch": 0.6969306863608645, "grad_norm": 6.369047164916992, "learning_rate": 8.694640290232049e-05, "loss": 0.7526, "step": 10286 }, { "epoch": 0.6969984416288366, "grad_norm": 5.736650466918945, "learning_rate": 8.694503388322267e-05, "loss": 0.945, "step": 10287 }, { "epoch": 0.6970661968968087, "grad_norm": 5.924343109130859, "learning_rate": 8.694366486412487e-05, "loss": 0.7378, "step": 10288 }, { "epoch": 0.6971339521647808, "grad_norm": 8.118910789489746, "learning_rate": 8.694229584502705e-05, "loss": 1.2927, "step": 10289 }, { "epoch": 0.6972017074327529, "grad_norm": 6.7456464767456055, "learning_rate": 8.694092682592923e-05, "loss": 0.8051, "step": 10290 }, { "epoch": 0.697269462700725, "grad_norm": 8.029818534851074, "learning_rate": 8.693955780683141e-05, "loss": 0.7865, "step": 10291 }, { "epoch": 0.6973372179686971, "grad_norm": 8.77468204498291, "learning_rate": 8.693818878773359e-05, "loss": 0.8901, "step": 10292 }, { "epoch": 0.6974049732366692, "grad_norm": 8.635099411010742, "learning_rate": 8.693681976863578e-05, "loss": 0.7982, "step": 10293 }, { "epoch": 0.6974727285046413, "grad_norm": 6.938762187957764, "learning_rate": 8.693545074953796e-05, "loss": 0.7571, "step": 10294 }, { "epoch": 0.6975404837726134, "grad_norm": 6.177728652954102, "learning_rate": 8.693408173044014e-05, "loss": 0.8976, "step": 10295 }, { "epoch": 0.6976082390405854, "grad_norm": 7.001784324645996, "learning_rate": 8.693271271134232e-05, "loss": 0.8799, "step": 10296 }, { "epoch": 0.6976759943085575, "grad_norm": 5.89376163482666, "learning_rate": 8.693134369224452e-05, "loss": 0.6843, "step": 10297 }, { "epoch": 0.6977437495765296, "grad_norm": 6.412653923034668, "learning_rate": 8.69299746731467e-05, "loss": 0.8198, "step": 10298 }, { "epoch": 0.6978115048445016, "grad_norm": 6.647368907928467, "learning_rate": 8.692860565404888e-05, "loss": 1.0389, "step": 10299 }, { "epoch": 0.6978792601124737, "grad_norm": 7.058552265167236, "learning_rate": 8.692723663495106e-05, "loss": 0.9025, "step": 10300 }, { "epoch": 0.6979470153804458, "grad_norm": 6.235774993896484, "learning_rate": 8.692586761585324e-05, "loss": 0.7788, "step": 10301 }, { "epoch": 0.6980147706484179, "grad_norm": 6.167398929595947, "learning_rate": 8.692449859675543e-05, "loss": 0.6807, "step": 10302 }, { "epoch": 0.69808252591639, "grad_norm": 5.845956802368164, "learning_rate": 8.692312957765761e-05, "loss": 0.8136, "step": 10303 }, { "epoch": 0.6981502811843621, "grad_norm": 6.548614025115967, "learning_rate": 8.692176055855979e-05, "loss": 0.7352, "step": 10304 }, { "epoch": 0.6982180364523342, "grad_norm": 6.432018756866455, "learning_rate": 8.692039153946197e-05, "loss": 0.9196, "step": 10305 }, { "epoch": 0.6982857917203062, "grad_norm": 7.851593971252441, "learning_rate": 8.691902252036417e-05, "loss": 1.1077, "step": 10306 }, { "epoch": 0.6983535469882783, "grad_norm": 6.037036895751953, "learning_rate": 8.691765350126635e-05, "loss": 0.6674, "step": 10307 }, { "epoch": 0.6984213022562504, "grad_norm": 6.833919048309326, "learning_rate": 8.691628448216853e-05, "loss": 1.1161, "step": 10308 }, { "epoch": 0.6984890575242225, "grad_norm": 7.268690586090088, "learning_rate": 8.691491546307071e-05, "loss": 0.8906, "step": 10309 }, { "epoch": 0.6985568127921946, "grad_norm": 5.807277679443359, "learning_rate": 8.691354644397289e-05, "loss": 0.8622, "step": 10310 }, { "epoch": 0.6986245680601667, "grad_norm": 6.888156414031982, "learning_rate": 8.691217742487508e-05, "loss": 0.9483, "step": 10311 }, { "epoch": 0.6986923233281388, "grad_norm": 5.432703971862793, "learning_rate": 8.691080840577726e-05, "loss": 0.6413, "step": 10312 }, { "epoch": 0.6987600785961109, "grad_norm": 6.416975021362305, "learning_rate": 8.690943938667944e-05, "loss": 0.862, "step": 10313 }, { "epoch": 0.698827833864083, "grad_norm": 5.268738746643066, "learning_rate": 8.690807036758162e-05, "loss": 0.6074, "step": 10314 }, { "epoch": 0.698895589132055, "grad_norm": 7.375731945037842, "learning_rate": 8.690670134848382e-05, "loss": 0.8446, "step": 10315 }, { "epoch": 0.698963344400027, "grad_norm": 5.654892921447754, "learning_rate": 8.6905332329386e-05, "loss": 0.7604, "step": 10316 }, { "epoch": 0.6990310996679991, "grad_norm": 6.280389308929443, "learning_rate": 8.690396331028818e-05, "loss": 0.7515, "step": 10317 }, { "epoch": 0.6990988549359712, "grad_norm": 4.915734767913818, "learning_rate": 8.690259429119036e-05, "loss": 0.8976, "step": 10318 }, { "epoch": 0.6991666102039433, "grad_norm": 6.705817222595215, "learning_rate": 8.690122527209255e-05, "loss": 0.7172, "step": 10319 }, { "epoch": 0.6992343654719154, "grad_norm": 5.9727253913879395, "learning_rate": 8.689985625299473e-05, "loss": 0.9687, "step": 10320 }, { "epoch": 0.6993021207398875, "grad_norm": 8.154400825500488, "learning_rate": 8.689848723389691e-05, "loss": 0.8834, "step": 10321 }, { "epoch": 0.6993698760078596, "grad_norm": 5.356873512268066, "learning_rate": 8.68971182147991e-05, "loss": 0.608, "step": 10322 }, { "epoch": 0.6994376312758317, "grad_norm": 7.385077476501465, "learning_rate": 8.689574919570129e-05, "loss": 0.7914, "step": 10323 }, { "epoch": 0.6995053865438038, "grad_norm": 5.762533664703369, "learning_rate": 8.689438017660347e-05, "loss": 0.7171, "step": 10324 }, { "epoch": 0.6995731418117759, "grad_norm": 6.16245698928833, "learning_rate": 8.689301115750566e-05, "loss": 0.7742, "step": 10325 }, { "epoch": 0.699640897079748, "grad_norm": 5.776895523071289, "learning_rate": 8.689164213840784e-05, "loss": 0.8008, "step": 10326 }, { "epoch": 0.6997086523477201, "grad_norm": 7.285096645355225, "learning_rate": 8.689027311931002e-05, "loss": 1.0003, "step": 10327 }, { "epoch": 0.6997764076156922, "grad_norm": 6.187610149383545, "learning_rate": 8.68889041002122e-05, "loss": 0.9697, "step": 10328 }, { "epoch": 0.6998441628836642, "grad_norm": 7.224822521209717, "learning_rate": 8.68875350811144e-05, "loss": 0.8965, "step": 10329 }, { "epoch": 0.6999119181516363, "grad_norm": 7.907904624938965, "learning_rate": 8.688616606201658e-05, "loss": 0.9466, "step": 10330 }, { "epoch": 0.6999796734196084, "grad_norm": 5.577702522277832, "learning_rate": 8.688479704291876e-05, "loss": 0.7779, "step": 10331 }, { "epoch": 0.7000474286875804, "grad_norm": 6.485890865325928, "learning_rate": 8.688342802382094e-05, "loss": 0.9368, "step": 10332 }, { "epoch": 0.7001151839555525, "grad_norm": 6.532778739929199, "learning_rate": 8.688205900472312e-05, "loss": 0.9044, "step": 10333 }, { "epoch": 0.7001829392235246, "grad_norm": 9.568724632263184, "learning_rate": 8.688068998562531e-05, "loss": 1.1657, "step": 10334 }, { "epoch": 0.7002506944914967, "grad_norm": 7.1607255935668945, "learning_rate": 8.687932096652749e-05, "loss": 0.8107, "step": 10335 }, { "epoch": 0.7003184497594688, "grad_norm": 7.112110614776611, "learning_rate": 8.687795194742967e-05, "loss": 0.8092, "step": 10336 }, { "epoch": 0.7003862050274409, "grad_norm": 6.201446056365967, "learning_rate": 8.687658292833185e-05, "loss": 0.8682, "step": 10337 }, { "epoch": 0.700453960295413, "grad_norm": 5.587967395782471, "learning_rate": 8.687521390923405e-05, "loss": 0.7769, "step": 10338 }, { "epoch": 0.700521715563385, "grad_norm": 4.441295623779297, "learning_rate": 8.687384489013623e-05, "loss": 0.5984, "step": 10339 }, { "epoch": 0.7005894708313571, "grad_norm": 7.061400413513184, "learning_rate": 8.687247587103841e-05, "loss": 0.7871, "step": 10340 }, { "epoch": 0.7006572260993292, "grad_norm": 6.004641532897949, "learning_rate": 8.687110685194059e-05, "loss": 0.8535, "step": 10341 }, { "epoch": 0.7007249813673013, "grad_norm": 6.329019546508789, "learning_rate": 8.686973783284277e-05, "loss": 0.7604, "step": 10342 }, { "epoch": 0.7007927366352734, "grad_norm": 5.995157718658447, "learning_rate": 8.686836881374496e-05, "loss": 0.8674, "step": 10343 }, { "epoch": 0.7008604919032455, "grad_norm": 6.5860915184021, "learning_rate": 8.686699979464714e-05, "loss": 0.6908, "step": 10344 }, { "epoch": 0.7009282471712176, "grad_norm": 7.01938009262085, "learning_rate": 8.686563077554932e-05, "loss": 0.7821, "step": 10345 }, { "epoch": 0.7009960024391897, "grad_norm": 4.958036422729492, "learning_rate": 8.68642617564515e-05, "loss": 0.7112, "step": 10346 }, { "epoch": 0.7010637577071618, "grad_norm": 5.658689022064209, "learning_rate": 8.686289273735368e-05, "loss": 0.768, "step": 10347 }, { "epoch": 0.7011315129751338, "grad_norm": 7.060564994812012, "learning_rate": 8.686152371825588e-05, "loss": 1.1327, "step": 10348 }, { "epoch": 0.7011992682431059, "grad_norm": 6.2527265548706055, "learning_rate": 8.686015469915806e-05, "loss": 0.8952, "step": 10349 }, { "epoch": 0.701267023511078, "grad_norm": 7.9083452224731445, "learning_rate": 8.685878568006024e-05, "loss": 0.9505, "step": 10350 }, { "epoch": 0.70133477877905, "grad_norm": 7.475040435791016, "learning_rate": 8.685741666096242e-05, "loss": 0.8404, "step": 10351 }, { "epoch": 0.7014025340470221, "grad_norm": 7.27475643157959, "learning_rate": 8.685604764186461e-05, "loss": 0.9485, "step": 10352 }, { "epoch": 0.7014702893149942, "grad_norm": 5.844339847564697, "learning_rate": 8.685467862276679e-05, "loss": 0.9179, "step": 10353 }, { "epoch": 0.7015380445829663, "grad_norm": 6.823174953460693, "learning_rate": 8.685330960366897e-05, "loss": 0.7872, "step": 10354 }, { "epoch": 0.7016057998509384, "grad_norm": 7.774914264678955, "learning_rate": 8.685194058457115e-05, "loss": 0.9215, "step": 10355 }, { "epoch": 0.7016735551189105, "grad_norm": 6.16814661026001, "learning_rate": 8.685057156547333e-05, "loss": 0.9785, "step": 10356 }, { "epoch": 0.7017413103868826, "grad_norm": 5.761654853820801, "learning_rate": 8.684920254637553e-05, "loss": 0.8459, "step": 10357 }, { "epoch": 0.7018090656548547, "grad_norm": 5.926375865936279, "learning_rate": 8.684783352727771e-05, "loss": 0.7908, "step": 10358 }, { "epoch": 0.7018768209228268, "grad_norm": 7.2848639488220215, "learning_rate": 8.684646450817989e-05, "loss": 0.8424, "step": 10359 }, { "epoch": 0.7019445761907989, "grad_norm": 6.377554416656494, "learning_rate": 8.684509548908207e-05, "loss": 0.8161, "step": 10360 }, { "epoch": 0.702012331458771, "grad_norm": 6.2031426429748535, "learning_rate": 8.684372646998426e-05, "loss": 0.9576, "step": 10361 }, { "epoch": 0.702080086726743, "grad_norm": 7.374354362487793, "learning_rate": 8.684235745088644e-05, "loss": 0.8022, "step": 10362 }, { "epoch": 0.7021478419947151, "grad_norm": 5.276646614074707, "learning_rate": 8.684098843178862e-05, "loss": 0.5603, "step": 10363 }, { "epoch": 0.7022155972626871, "grad_norm": 5.207109451293945, "learning_rate": 8.68396194126908e-05, "loss": 0.5605, "step": 10364 }, { "epoch": 0.7022833525306592, "grad_norm": 6.302850723266602, "learning_rate": 8.6838250393593e-05, "loss": 0.861, "step": 10365 }, { "epoch": 0.7023511077986313, "grad_norm": 5.8094072341918945, "learning_rate": 8.683688137449518e-05, "loss": 0.8374, "step": 10366 }, { "epoch": 0.7024188630666034, "grad_norm": 6.657436370849609, "learning_rate": 8.683551235539736e-05, "loss": 0.9263, "step": 10367 }, { "epoch": 0.7024866183345755, "grad_norm": 5.042036533355713, "learning_rate": 8.683414333629955e-05, "loss": 0.6188, "step": 10368 }, { "epoch": 0.7025543736025476, "grad_norm": 5.913759231567383, "learning_rate": 8.683277431720173e-05, "loss": 0.6699, "step": 10369 }, { "epoch": 0.7026221288705197, "grad_norm": 6.477380752563477, "learning_rate": 8.683140529810391e-05, "loss": 0.8353, "step": 10370 }, { "epoch": 0.7026898841384918, "grad_norm": 5.284722805023193, "learning_rate": 8.68300362790061e-05, "loss": 0.8633, "step": 10371 }, { "epoch": 0.7027576394064639, "grad_norm": 5.480528354644775, "learning_rate": 8.682866725990829e-05, "loss": 0.7068, "step": 10372 }, { "epoch": 0.7028253946744359, "grad_norm": 5.857044696807861, "learning_rate": 8.682729824081047e-05, "loss": 0.8696, "step": 10373 }, { "epoch": 0.702893149942408, "grad_norm": 6.4731764793396, "learning_rate": 8.682592922171265e-05, "loss": 0.7464, "step": 10374 }, { "epoch": 0.7029609052103801, "grad_norm": 7.0602827072143555, "learning_rate": 8.682456020261484e-05, "loss": 0.6128, "step": 10375 }, { "epoch": 0.7030286604783522, "grad_norm": 5.9556474685668945, "learning_rate": 8.682319118351702e-05, "loss": 0.7004, "step": 10376 }, { "epoch": 0.7030964157463243, "grad_norm": 7.673183917999268, "learning_rate": 8.68218221644192e-05, "loss": 0.7612, "step": 10377 }, { "epoch": 0.7031641710142964, "grad_norm": 7.043504238128662, "learning_rate": 8.682045314532138e-05, "loss": 1.1159, "step": 10378 }, { "epoch": 0.7032319262822685, "grad_norm": 6.224184513092041, "learning_rate": 8.681908412622356e-05, "loss": 0.7247, "step": 10379 }, { "epoch": 0.7032996815502406, "grad_norm": 7.104019641876221, "learning_rate": 8.681771510712576e-05, "loss": 0.8121, "step": 10380 }, { "epoch": 0.7033674368182126, "grad_norm": 6.4362263679504395, "learning_rate": 8.681634608802794e-05, "loss": 0.8133, "step": 10381 }, { "epoch": 0.7034351920861847, "grad_norm": 5.4112067222595215, "learning_rate": 8.681497706893012e-05, "loss": 0.8037, "step": 10382 }, { "epoch": 0.7035029473541567, "grad_norm": 8.056005477905273, "learning_rate": 8.68136080498323e-05, "loss": 1.0531, "step": 10383 }, { "epoch": 0.7035707026221288, "grad_norm": 6.620260715484619, "learning_rate": 8.681223903073449e-05, "loss": 0.8135, "step": 10384 }, { "epoch": 0.7036384578901009, "grad_norm": 5.953632354736328, "learning_rate": 8.681087001163667e-05, "loss": 0.9362, "step": 10385 }, { "epoch": 0.703706213158073, "grad_norm": 4.4729719161987305, "learning_rate": 8.680950099253885e-05, "loss": 0.8507, "step": 10386 }, { "epoch": 0.7037739684260451, "grad_norm": 6.998383522033691, "learning_rate": 8.680813197344103e-05, "loss": 0.8018, "step": 10387 }, { "epoch": 0.7038417236940172, "grad_norm": 5.445269584655762, "learning_rate": 8.680676295434321e-05, "loss": 0.7241, "step": 10388 }, { "epoch": 0.7039094789619893, "grad_norm": 7.320235729217529, "learning_rate": 8.68053939352454e-05, "loss": 1.0455, "step": 10389 }, { "epoch": 0.7039772342299614, "grad_norm": 7.40581750869751, "learning_rate": 8.680402491614759e-05, "loss": 0.8128, "step": 10390 }, { "epoch": 0.7040449894979335, "grad_norm": 6.813145637512207, "learning_rate": 8.680265589704977e-05, "loss": 0.9455, "step": 10391 }, { "epoch": 0.7041127447659056, "grad_norm": 5.903909683227539, "learning_rate": 8.680128687795195e-05, "loss": 0.6496, "step": 10392 }, { "epoch": 0.7041805000338777, "grad_norm": 4.9222846031188965, "learning_rate": 8.679991785885414e-05, "loss": 0.6605, "step": 10393 }, { "epoch": 0.7042482553018498, "grad_norm": 6.948107719421387, "learning_rate": 8.679854883975632e-05, "loss": 0.9015, "step": 10394 }, { "epoch": 0.7043160105698218, "grad_norm": 6.005917072296143, "learning_rate": 8.67971798206585e-05, "loss": 0.9129, "step": 10395 }, { "epoch": 0.7043837658377939, "grad_norm": 5.235043048858643, "learning_rate": 8.679581080156068e-05, "loss": 0.9046, "step": 10396 }, { "epoch": 0.7044515211057659, "grad_norm": 6.271544456481934, "learning_rate": 8.679444178246286e-05, "loss": 1.0159, "step": 10397 }, { "epoch": 0.704519276373738, "grad_norm": 5.432122707366943, "learning_rate": 8.679307276336506e-05, "loss": 0.8806, "step": 10398 }, { "epoch": 0.7045870316417101, "grad_norm": 5.534310817718506, "learning_rate": 8.679170374426724e-05, "loss": 0.8565, "step": 10399 }, { "epoch": 0.7046547869096822, "grad_norm": 6.202160835266113, "learning_rate": 8.679033472516942e-05, "loss": 1.1205, "step": 10400 }, { "epoch": 0.7047225421776543, "grad_norm": 7.187075614929199, "learning_rate": 8.67889657060716e-05, "loss": 0.8384, "step": 10401 }, { "epoch": 0.7047902974456264, "grad_norm": 4.62196159362793, "learning_rate": 8.678759668697378e-05, "loss": 0.6643, "step": 10402 }, { "epoch": 0.7048580527135985, "grad_norm": 10.150405883789062, "learning_rate": 8.678622766787597e-05, "loss": 0.6745, "step": 10403 }, { "epoch": 0.7049258079815706, "grad_norm": 6.843104362487793, "learning_rate": 8.678485864877815e-05, "loss": 0.7891, "step": 10404 }, { "epoch": 0.7049935632495427, "grad_norm": 6.199191570281982, "learning_rate": 8.678348962968033e-05, "loss": 0.7102, "step": 10405 }, { "epoch": 0.7050613185175147, "grad_norm": 5.368592739105225, "learning_rate": 8.678212061058251e-05, "loss": 0.8577, "step": 10406 }, { "epoch": 0.7051290737854868, "grad_norm": 4.696331977844238, "learning_rate": 8.67807515914847e-05, "loss": 0.6958, "step": 10407 }, { "epoch": 0.7051968290534589, "grad_norm": 6.961827754974365, "learning_rate": 8.677938257238689e-05, "loss": 1.0023, "step": 10408 }, { "epoch": 0.705264584321431, "grad_norm": 6.114429473876953, "learning_rate": 8.677801355328907e-05, "loss": 0.6351, "step": 10409 }, { "epoch": 0.7053323395894031, "grad_norm": 7.005643844604492, "learning_rate": 8.677664453419125e-05, "loss": 1.039, "step": 10410 }, { "epoch": 0.7054000948573752, "grad_norm": 5.262114524841309, "learning_rate": 8.677527551509344e-05, "loss": 0.7621, "step": 10411 }, { "epoch": 0.7054678501253473, "grad_norm": 6.364197731018066, "learning_rate": 8.677390649599562e-05, "loss": 0.8989, "step": 10412 }, { "epoch": 0.7055356053933193, "grad_norm": 5.497344970703125, "learning_rate": 8.67725374768978e-05, "loss": 0.6066, "step": 10413 }, { "epoch": 0.7056033606612914, "grad_norm": 6.382382869720459, "learning_rate": 8.67711684578e-05, "loss": 0.7266, "step": 10414 }, { "epoch": 0.7056711159292635, "grad_norm": 7.423126220703125, "learning_rate": 8.676979943870218e-05, "loss": 0.7454, "step": 10415 }, { "epoch": 0.7057388711972356, "grad_norm": 7.46668004989624, "learning_rate": 8.676843041960436e-05, "loss": 0.845, "step": 10416 }, { "epoch": 0.7058066264652076, "grad_norm": 5.152261734008789, "learning_rate": 8.676706140050655e-05, "loss": 0.8531, "step": 10417 }, { "epoch": 0.7058743817331797, "grad_norm": 8.402978897094727, "learning_rate": 8.676569238140873e-05, "loss": 1.0256, "step": 10418 }, { "epoch": 0.7059421370011518, "grad_norm": 5.3230299949646, "learning_rate": 8.676432336231091e-05, "loss": 0.7647, "step": 10419 }, { "epoch": 0.7060098922691239, "grad_norm": 7.257562160491943, "learning_rate": 8.676295434321309e-05, "loss": 0.8413, "step": 10420 }, { "epoch": 0.706077647537096, "grad_norm": 5.904243469238281, "learning_rate": 8.676158532411529e-05, "loss": 0.8503, "step": 10421 }, { "epoch": 0.7061454028050681, "grad_norm": 6.7053141593933105, "learning_rate": 8.676021630501747e-05, "loss": 0.8836, "step": 10422 }, { "epoch": 0.7062131580730402, "grad_norm": 7.1715407371521, "learning_rate": 8.675884728591965e-05, "loss": 0.8266, "step": 10423 }, { "epoch": 0.7062809133410123, "grad_norm": 6.313091278076172, "learning_rate": 8.675747826682183e-05, "loss": 0.6628, "step": 10424 }, { "epoch": 0.7063486686089844, "grad_norm": 5.576920986175537, "learning_rate": 8.675610924772401e-05, "loss": 0.7227, "step": 10425 }, { "epoch": 0.7064164238769565, "grad_norm": 6.882504463195801, "learning_rate": 8.67547402286262e-05, "loss": 0.8294, "step": 10426 }, { "epoch": 0.7064841791449286, "grad_norm": 8.857022285461426, "learning_rate": 8.675337120952838e-05, "loss": 0.5265, "step": 10427 }, { "epoch": 0.7065519344129007, "grad_norm": 6.785702228546143, "learning_rate": 8.675200219043056e-05, "loss": 0.8107, "step": 10428 }, { "epoch": 0.7066196896808727, "grad_norm": 7.2406415939331055, "learning_rate": 8.675063317133274e-05, "loss": 0.8089, "step": 10429 }, { "epoch": 0.7066874449488447, "grad_norm": 5.409148216247559, "learning_rate": 8.674926415223494e-05, "loss": 0.7191, "step": 10430 }, { "epoch": 0.7067552002168168, "grad_norm": 6.049896717071533, "learning_rate": 8.674789513313712e-05, "loss": 0.8811, "step": 10431 }, { "epoch": 0.7068229554847889, "grad_norm": 8.478447914123535, "learning_rate": 8.67465261140393e-05, "loss": 0.8117, "step": 10432 }, { "epoch": 0.706890710752761, "grad_norm": 6.473963260650635, "learning_rate": 8.674515709494148e-05, "loss": 1.0078, "step": 10433 }, { "epoch": 0.7069584660207331, "grad_norm": 5.57169771194458, "learning_rate": 8.674378807584366e-05, "loss": 0.6526, "step": 10434 }, { "epoch": 0.7070262212887052, "grad_norm": 5.3196492195129395, "learning_rate": 8.674241905674585e-05, "loss": 0.6312, "step": 10435 }, { "epoch": 0.7070939765566773, "grad_norm": 5.9507293701171875, "learning_rate": 8.674105003764803e-05, "loss": 0.7502, "step": 10436 }, { "epoch": 0.7071617318246494, "grad_norm": 5.272159099578857, "learning_rate": 8.673968101855021e-05, "loss": 0.7445, "step": 10437 }, { "epoch": 0.7072294870926215, "grad_norm": 8.225152969360352, "learning_rate": 8.673831199945239e-05, "loss": 0.9625, "step": 10438 }, { "epoch": 0.7072972423605935, "grad_norm": 5.791821479797363, "learning_rate": 8.673694298035459e-05, "loss": 0.7997, "step": 10439 }, { "epoch": 0.7073649976285656, "grad_norm": 6.391631126403809, "learning_rate": 8.673557396125677e-05, "loss": 0.8449, "step": 10440 }, { "epoch": 0.7074327528965377, "grad_norm": 6.157900333404541, "learning_rate": 8.673420494215895e-05, "loss": 0.8001, "step": 10441 }, { "epoch": 0.7075005081645098, "grad_norm": 5.64890193939209, "learning_rate": 8.673283592306113e-05, "loss": 0.6663, "step": 10442 }, { "epoch": 0.7075682634324819, "grad_norm": 7.436509132385254, "learning_rate": 8.673146690396331e-05, "loss": 0.8742, "step": 10443 }, { "epoch": 0.707636018700454, "grad_norm": 4.78845739364624, "learning_rate": 8.67300978848655e-05, "loss": 0.5905, "step": 10444 }, { "epoch": 0.7077037739684261, "grad_norm": 7.130674362182617, "learning_rate": 8.672872886576768e-05, "loss": 1.2396, "step": 10445 }, { "epoch": 0.7077715292363981, "grad_norm": 7.3212761878967285, "learning_rate": 8.672735984666986e-05, "loss": 0.8896, "step": 10446 }, { "epoch": 0.7078392845043702, "grad_norm": 7.6907548904418945, "learning_rate": 8.672599082757204e-05, "loss": 1.1194, "step": 10447 }, { "epoch": 0.7079070397723423, "grad_norm": 6.078713417053223, "learning_rate": 8.672462180847424e-05, "loss": 0.8578, "step": 10448 }, { "epoch": 0.7079747950403144, "grad_norm": 6.047597408294678, "learning_rate": 8.672325278937642e-05, "loss": 0.6988, "step": 10449 }, { "epoch": 0.7080425503082864, "grad_norm": 6.882000923156738, "learning_rate": 8.67218837702786e-05, "loss": 1.0003, "step": 10450 }, { "epoch": 0.7081103055762585, "grad_norm": 7.0581560134887695, "learning_rate": 8.672051475118078e-05, "loss": 0.8585, "step": 10451 }, { "epoch": 0.7081780608442306, "grad_norm": 5.636070728302002, "learning_rate": 8.671914573208296e-05, "loss": 0.9355, "step": 10452 }, { "epoch": 0.7082458161122027, "grad_norm": 7.167375564575195, "learning_rate": 8.671777671298515e-05, "loss": 0.8098, "step": 10453 }, { "epoch": 0.7083135713801748, "grad_norm": 6.989759922027588, "learning_rate": 8.671640769388733e-05, "loss": 1.1052, "step": 10454 }, { "epoch": 0.7083813266481469, "grad_norm": 5.774247646331787, "learning_rate": 8.671503867478951e-05, "loss": 0.8362, "step": 10455 }, { "epoch": 0.708449081916119, "grad_norm": 5.6326518058776855, "learning_rate": 8.671366965569169e-05, "loss": 0.644, "step": 10456 }, { "epoch": 0.7085168371840911, "grad_norm": 6.38750696182251, "learning_rate": 8.671230063659389e-05, "loss": 0.7756, "step": 10457 }, { "epoch": 0.7085845924520632, "grad_norm": 6.129147529602051, "learning_rate": 8.671093161749607e-05, "loss": 0.8983, "step": 10458 }, { "epoch": 0.7086523477200353, "grad_norm": 7.424493789672852, "learning_rate": 8.670956259839825e-05, "loss": 0.9429, "step": 10459 }, { "epoch": 0.7087201029880074, "grad_norm": 6.838191509246826, "learning_rate": 8.670819357930044e-05, "loss": 0.7546, "step": 10460 }, { "epoch": 0.7087878582559795, "grad_norm": 5.380428791046143, "learning_rate": 8.670682456020262e-05, "loss": 0.6884, "step": 10461 }, { "epoch": 0.7088556135239514, "grad_norm": 5.4953203201293945, "learning_rate": 8.67054555411048e-05, "loss": 1.0417, "step": 10462 }, { "epoch": 0.7089233687919235, "grad_norm": 6.5481133460998535, "learning_rate": 8.6704086522007e-05, "loss": 0.7382, "step": 10463 }, { "epoch": 0.7089911240598956, "grad_norm": 7.717205047607422, "learning_rate": 8.670271750290918e-05, "loss": 0.9389, "step": 10464 }, { "epoch": 0.7090588793278677, "grad_norm": 6.287739276885986, "learning_rate": 8.670134848381136e-05, "loss": 0.9393, "step": 10465 }, { "epoch": 0.7091266345958398, "grad_norm": 5.565641403198242, "learning_rate": 8.669997946471354e-05, "loss": 0.758, "step": 10466 }, { "epoch": 0.7091943898638119, "grad_norm": 5.262805938720703, "learning_rate": 8.669861044561573e-05, "loss": 0.792, "step": 10467 }, { "epoch": 0.709262145131784, "grad_norm": 5.002110004425049, "learning_rate": 8.669724142651791e-05, "loss": 0.7365, "step": 10468 }, { "epoch": 0.7093299003997561, "grad_norm": 6.84413480758667, "learning_rate": 8.669587240742009e-05, "loss": 1.0521, "step": 10469 }, { "epoch": 0.7093976556677282, "grad_norm": 6.899505138397217, "learning_rate": 8.669450338832227e-05, "loss": 0.795, "step": 10470 }, { "epoch": 0.7094654109357003, "grad_norm": 5.376099109649658, "learning_rate": 8.669313436922447e-05, "loss": 0.51, "step": 10471 }, { "epoch": 0.7095331662036723, "grad_norm": 6.934320449829102, "learning_rate": 8.669176535012665e-05, "loss": 0.9405, "step": 10472 }, { "epoch": 0.7096009214716444, "grad_norm": 5.896731376647949, "learning_rate": 8.669039633102883e-05, "loss": 0.6869, "step": 10473 }, { "epoch": 0.7096686767396165, "grad_norm": 5.4463887214660645, "learning_rate": 8.6689027311931e-05, "loss": 0.8513, "step": 10474 }, { "epoch": 0.7097364320075886, "grad_norm": 6.024421215057373, "learning_rate": 8.668765829283319e-05, "loss": 0.7993, "step": 10475 }, { "epoch": 0.7098041872755607, "grad_norm": 7.861370086669922, "learning_rate": 8.668628927373538e-05, "loss": 0.7246, "step": 10476 }, { "epoch": 0.7098719425435328, "grad_norm": 5.0704779624938965, "learning_rate": 8.668492025463756e-05, "loss": 0.6759, "step": 10477 }, { "epoch": 0.7099396978115049, "grad_norm": 6.787322998046875, "learning_rate": 8.668355123553974e-05, "loss": 0.8075, "step": 10478 }, { "epoch": 0.7100074530794769, "grad_norm": 5.564799785614014, "learning_rate": 8.668218221644192e-05, "loss": 0.7596, "step": 10479 }, { "epoch": 0.710075208347449, "grad_norm": 6.072136402130127, "learning_rate": 8.66808131973441e-05, "loss": 0.6902, "step": 10480 }, { "epoch": 0.7101429636154211, "grad_norm": 6.825998783111572, "learning_rate": 8.66794441782463e-05, "loss": 0.7987, "step": 10481 }, { "epoch": 0.7102107188833932, "grad_norm": 6.803398609161377, "learning_rate": 8.667807515914848e-05, "loss": 0.8946, "step": 10482 }, { "epoch": 0.7102784741513652, "grad_norm": 5.5623250007629395, "learning_rate": 8.667670614005066e-05, "loss": 0.9588, "step": 10483 }, { "epoch": 0.7103462294193373, "grad_norm": 6.420827865600586, "learning_rate": 8.667533712095284e-05, "loss": 0.9871, "step": 10484 }, { "epoch": 0.7104139846873094, "grad_norm": 5.774916172027588, "learning_rate": 8.667396810185503e-05, "loss": 0.7826, "step": 10485 }, { "epoch": 0.7104817399552815, "grad_norm": 6.701958656311035, "learning_rate": 8.667259908275721e-05, "loss": 0.9528, "step": 10486 }, { "epoch": 0.7105494952232536, "grad_norm": 6.663124084472656, "learning_rate": 8.667123006365939e-05, "loss": 0.7279, "step": 10487 }, { "epoch": 0.7106172504912257, "grad_norm": 6.165869235992432, "learning_rate": 8.666986104456157e-05, "loss": 0.7835, "step": 10488 }, { "epoch": 0.7106850057591978, "grad_norm": 5.795663356781006, "learning_rate": 8.666849202546375e-05, "loss": 0.6844, "step": 10489 }, { "epoch": 0.7107527610271699, "grad_norm": 5.601436138153076, "learning_rate": 8.666712300636595e-05, "loss": 0.7844, "step": 10490 }, { "epoch": 0.710820516295142, "grad_norm": 6.733765125274658, "learning_rate": 8.666575398726813e-05, "loss": 0.7687, "step": 10491 }, { "epoch": 0.7108882715631141, "grad_norm": 6.032510757446289, "learning_rate": 8.66643849681703e-05, "loss": 0.7252, "step": 10492 }, { "epoch": 0.7109560268310862, "grad_norm": 4.691253662109375, "learning_rate": 8.666301594907249e-05, "loss": 0.7354, "step": 10493 }, { "epoch": 0.7110237820990583, "grad_norm": 6.740907669067383, "learning_rate": 8.666164692997468e-05, "loss": 0.854, "step": 10494 }, { "epoch": 0.7110915373670302, "grad_norm": 6.258440971374512, "learning_rate": 8.666027791087686e-05, "loss": 0.7787, "step": 10495 }, { "epoch": 0.7111592926350023, "grad_norm": 5.37103271484375, "learning_rate": 8.665890889177904e-05, "loss": 0.8159, "step": 10496 }, { "epoch": 0.7112270479029744, "grad_norm": 5.726749420166016, "learning_rate": 8.665753987268122e-05, "loss": 1.0529, "step": 10497 }, { "epoch": 0.7112948031709465, "grad_norm": 6.467258930206299, "learning_rate": 8.66561708535834e-05, "loss": 0.7445, "step": 10498 }, { "epoch": 0.7113625584389186, "grad_norm": 5.931604385375977, "learning_rate": 8.66548018344856e-05, "loss": 0.6146, "step": 10499 }, { "epoch": 0.7114303137068907, "grad_norm": 7.676519393920898, "learning_rate": 8.665343281538778e-05, "loss": 0.9234, "step": 10500 }, { "epoch": 0.7114980689748628, "grad_norm": 6.444290637969971, "learning_rate": 8.665206379628996e-05, "loss": 0.8529, "step": 10501 }, { "epoch": 0.7115658242428349, "grad_norm": 6.420405864715576, "learning_rate": 8.665069477719214e-05, "loss": 0.8439, "step": 10502 }, { "epoch": 0.711633579510807, "grad_norm": 5.584212779998779, "learning_rate": 8.664932575809432e-05, "loss": 0.6583, "step": 10503 }, { "epoch": 0.7117013347787791, "grad_norm": 6.1522746086120605, "learning_rate": 8.664795673899651e-05, "loss": 0.7773, "step": 10504 }, { "epoch": 0.7117690900467512, "grad_norm": 6.573955535888672, "learning_rate": 8.664658771989869e-05, "loss": 0.7553, "step": 10505 }, { "epoch": 0.7118368453147232, "grad_norm": 7.660068988800049, "learning_rate": 8.664521870080087e-05, "loss": 0.7807, "step": 10506 }, { "epoch": 0.7119046005826953, "grad_norm": 6.398780822753906, "learning_rate": 8.664384968170307e-05, "loss": 0.7814, "step": 10507 }, { "epoch": 0.7119723558506674, "grad_norm": 6.873563766479492, "learning_rate": 8.664248066260525e-05, "loss": 0.8067, "step": 10508 }, { "epoch": 0.7120401111186395, "grad_norm": 6.932216644287109, "learning_rate": 8.664111164350743e-05, "loss": 0.9248, "step": 10509 }, { "epoch": 0.7121078663866116, "grad_norm": 6.539022445678711, "learning_rate": 8.663974262440962e-05, "loss": 0.8059, "step": 10510 }, { "epoch": 0.7121756216545836, "grad_norm": 6.882415771484375, "learning_rate": 8.66383736053118e-05, "loss": 0.853, "step": 10511 }, { "epoch": 0.7122433769225557, "grad_norm": 7.576079368591309, "learning_rate": 8.663700458621398e-05, "loss": 1.1152, "step": 10512 }, { "epoch": 0.7123111321905278, "grad_norm": 5.934848785400391, "learning_rate": 8.663563556711617e-05, "loss": 0.7804, "step": 10513 }, { "epoch": 0.7123788874584999, "grad_norm": 5.297085762023926, "learning_rate": 8.663426654801836e-05, "loss": 0.6543, "step": 10514 }, { "epoch": 0.712446642726472, "grad_norm": 5.618426322937012, "learning_rate": 8.663289752892054e-05, "loss": 0.5286, "step": 10515 }, { "epoch": 0.712514397994444, "grad_norm": 5.978342533111572, "learning_rate": 8.663152850982272e-05, "loss": 0.6195, "step": 10516 }, { "epoch": 0.7125821532624161, "grad_norm": 7.146844863891602, "learning_rate": 8.663015949072491e-05, "loss": 0.9239, "step": 10517 }, { "epoch": 0.7126499085303882, "grad_norm": 7.71320915222168, "learning_rate": 8.662879047162709e-05, "loss": 0.8047, "step": 10518 }, { "epoch": 0.7127176637983603, "grad_norm": 5.022526741027832, "learning_rate": 8.662742145252927e-05, "loss": 0.5567, "step": 10519 }, { "epoch": 0.7127854190663324, "grad_norm": 6.884553909301758, "learning_rate": 8.662605243343145e-05, "loss": 0.9116, "step": 10520 }, { "epoch": 0.7128531743343045, "grad_norm": 9.86026668548584, "learning_rate": 8.662468341433363e-05, "loss": 1.1616, "step": 10521 }, { "epoch": 0.7129209296022766, "grad_norm": 6.307768821716309, "learning_rate": 8.662331439523583e-05, "loss": 0.8304, "step": 10522 }, { "epoch": 0.7129886848702487, "grad_norm": 8.609663009643555, "learning_rate": 8.6621945376138e-05, "loss": 0.6635, "step": 10523 }, { "epoch": 0.7130564401382208, "grad_norm": 5.718436241149902, "learning_rate": 8.662057635704019e-05, "loss": 0.889, "step": 10524 }, { "epoch": 0.7131241954061929, "grad_norm": 6.126955986022949, "learning_rate": 8.661920733794237e-05, "loss": 0.6973, "step": 10525 }, { "epoch": 0.713191950674165, "grad_norm": 6.689998149871826, "learning_rate": 8.661783831884456e-05, "loss": 0.6399, "step": 10526 }, { "epoch": 0.713259705942137, "grad_norm": 6.500828742980957, "learning_rate": 8.661646929974674e-05, "loss": 1.0992, "step": 10527 }, { "epoch": 0.713327461210109, "grad_norm": 7.04468297958374, "learning_rate": 8.661510028064892e-05, "loss": 1.0635, "step": 10528 }, { "epoch": 0.7133952164780811, "grad_norm": 6.968896865844727, "learning_rate": 8.66137312615511e-05, "loss": 0.8095, "step": 10529 }, { "epoch": 0.7134629717460532, "grad_norm": 7.557732105255127, "learning_rate": 8.661236224245328e-05, "loss": 0.6779, "step": 10530 }, { "epoch": 0.7135307270140253, "grad_norm": 4.746248245239258, "learning_rate": 8.661099322335548e-05, "loss": 0.7275, "step": 10531 }, { "epoch": 0.7135984822819974, "grad_norm": 7.140705108642578, "learning_rate": 8.660962420425766e-05, "loss": 0.9274, "step": 10532 }, { "epoch": 0.7136662375499695, "grad_norm": 6.661166191101074, "learning_rate": 8.660825518515984e-05, "loss": 0.8924, "step": 10533 }, { "epoch": 0.7137339928179416, "grad_norm": 6.4814653396606445, "learning_rate": 8.660688616606202e-05, "loss": 0.7357, "step": 10534 }, { "epoch": 0.7138017480859137, "grad_norm": 9.411799430847168, "learning_rate": 8.66055171469642e-05, "loss": 0.5209, "step": 10535 }, { "epoch": 0.7138695033538858, "grad_norm": 5.223617076873779, "learning_rate": 8.660414812786639e-05, "loss": 0.7623, "step": 10536 }, { "epoch": 0.7139372586218579, "grad_norm": 8.094182014465332, "learning_rate": 8.660277910876857e-05, "loss": 1.0194, "step": 10537 }, { "epoch": 0.71400501388983, "grad_norm": 5.444286823272705, "learning_rate": 8.660141008967075e-05, "loss": 0.8126, "step": 10538 }, { "epoch": 0.714072769157802, "grad_norm": 4.902561664581299, "learning_rate": 8.660004107057293e-05, "loss": 0.6524, "step": 10539 }, { "epoch": 0.7141405244257741, "grad_norm": 7.155951023101807, "learning_rate": 8.659867205147513e-05, "loss": 0.9928, "step": 10540 }, { "epoch": 0.7142082796937462, "grad_norm": 6.7633538246154785, "learning_rate": 8.65973030323773e-05, "loss": 1.0564, "step": 10541 }, { "epoch": 0.7142760349617183, "grad_norm": 6.050258636474609, "learning_rate": 8.659593401327949e-05, "loss": 0.8051, "step": 10542 }, { "epoch": 0.7143437902296904, "grad_norm": 4.88824987411499, "learning_rate": 8.659456499418167e-05, "loss": 0.6135, "step": 10543 }, { "epoch": 0.7144115454976624, "grad_norm": 5.773684501647949, "learning_rate": 8.659319597508385e-05, "loss": 0.5467, "step": 10544 }, { "epoch": 0.7144793007656345, "grad_norm": 7.082754611968994, "learning_rate": 8.659182695598604e-05, "loss": 0.7677, "step": 10545 }, { "epoch": 0.7145470560336066, "grad_norm": 5.4242424964904785, "learning_rate": 8.659045793688822e-05, "loss": 0.7915, "step": 10546 }, { "epoch": 0.7146148113015787, "grad_norm": 5.280063152313232, "learning_rate": 8.65890889177904e-05, "loss": 0.5614, "step": 10547 }, { "epoch": 0.7146825665695508, "grad_norm": 6.720800876617432, "learning_rate": 8.658771989869258e-05, "loss": 1.064, "step": 10548 }, { "epoch": 0.7147503218375229, "grad_norm": 7.908580303192139, "learning_rate": 8.658635087959478e-05, "loss": 0.7564, "step": 10549 }, { "epoch": 0.7148180771054949, "grad_norm": 6.164776802062988, "learning_rate": 8.658498186049696e-05, "loss": 0.8359, "step": 10550 }, { "epoch": 0.714885832373467, "grad_norm": 5.345958709716797, "learning_rate": 8.658361284139914e-05, "loss": 0.9018, "step": 10551 }, { "epoch": 0.7149535876414391, "grad_norm": 6.751514911651611, "learning_rate": 8.658224382230132e-05, "loss": 0.9795, "step": 10552 }, { "epoch": 0.7150213429094112, "grad_norm": 5.850390911102295, "learning_rate": 8.658087480320351e-05, "loss": 0.8055, "step": 10553 }, { "epoch": 0.7150890981773833, "grad_norm": 6.081165790557861, "learning_rate": 8.657950578410569e-05, "loss": 0.7952, "step": 10554 }, { "epoch": 0.7151568534453554, "grad_norm": 5.211760997772217, "learning_rate": 8.657813676500787e-05, "loss": 0.6467, "step": 10555 }, { "epoch": 0.7152246087133275, "grad_norm": 7.24871826171875, "learning_rate": 8.657676774591007e-05, "loss": 0.8434, "step": 10556 }, { "epoch": 0.7152923639812996, "grad_norm": 6.2204413414001465, "learning_rate": 8.657539872681225e-05, "loss": 0.7948, "step": 10557 }, { "epoch": 0.7153601192492717, "grad_norm": 6.038403511047363, "learning_rate": 8.657402970771443e-05, "loss": 0.69, "step": 10558 }, { "epoch": 0.7154278745172438, "grad_norm": 6.616792678833008, "learning_rate": 8.657266068861662e-05, "loss": 0.9007, "step": 10559 }, { "epoch": 0.7154956297852157, "grad_norm": 6.901274681091309, "learning_rate": 8.65712916695188e-05, "loss": 0.8176, "step": 10560 }, { "epoch": 0.7155633850531878, "grad_norm": 6.145236015319824, "learning_rate": 8.656992265042098e-05, "loss": 0.6697, "step": 10561 }, { "epoch": 0.7156311403211599, "grad_norm": 6.30226993560791, "learning_rate": 8.656855363132316e-05, "loss": 0.7585, "step": 10562 }, { "epoch": 0.715698895589132, "grad_norm": 5.349961757659912, "learning_rate": 8.656718461222535e-05, "loss": 0.6811, "step": 10563 }, { "epoch": 0.7157666508571041, "grad_norm": 6.9230170249938965, "learning_rate": 8.656581559312753e-05, "loss": 0.7924, "step": 10564 }, { "epoch": 0.7158344061250762, "grad_norm": 6.30393123626709, "learning_rate": 8.656444657402972e-05, "loss": 0.8558, "step": 10565 }, { "epoch": 0.7159021613930483, "grad_norm": 7.642063617706299, "learning_rate": 8.65630775549319e-05, "loss": 0.8416, "step": 10566 }, { "epoch": 0.7159699166610204, "grad_norm": 6.944372653961182, "learning_rate": 8.656170853583408e-05, "loss": 1.0213, "step": 10567 }, { "epoch": 0.7160376719289925, "grad_norm": 6.925499439239502, "learning_rate": 8.656033951673627e-05, "loss": 0.8486, "step": 10568 }, { "epoch": 0.7161054271969646, "grad_norm": 5.875875949859619, "learning_rate": 8.655897049763845e-05, "loss": 0.8515, "step": 10569 }, { "epoch": 0.7161731824649367, "grad_norm": 9.030150413513184, "learning_rate": 8.655760147854063e-05, "loss": 0.9014, "step": 10570 }, { "epoch": 0.7162409377329088, "grad_norm": 5.825559139251709, "learning_rate": 8.655623245944281e-05, "loss": 0.8333, "step": 10571 }, { "epoch": 0.7163086930008808, "grad_norm": 5.936555862426758, "learning_rate": 8.6554863440345e-05, "loss": 0.6806, "step": 10572 }, { "epoch": 0.7163764482688529, "grad_norm": 5.446226596832275, "learning_rate": 8.655349442124719e-05, "loss": 0.687, "step": 10573 }, { "epoch": 0.716444203536825, "grad_norm": 7.467900276184082, "learning_rate": 8.655212540214937e-05, "loss": 1.1386, "step": 10574 }, { "epoch": 0.7165119588047971, "grad_norm": 6.0190534591674805, "learning_rate": 8.655075638305155e-05, "loss": 0.9846, "step": 10575 }, { "epoch": 0.7165797140727691, "grad_norm": 6.0063252449035645, "learning_rate": 8.654938736395373e-05, "loss": 0.7755, "step": 10576 }, { "epoch": 0.7166474693407412, "grad_norm": 7.271022796630859, "learning_rate": 8.654801834485592e-05, "loss": 0.8129, "step": 10577 }, { "epoch": 0.7167152246087133, "grad_norm": 5.204225063323975, "learning_rate": 8.65466493257581e-05, "loss": 0.707, "step": 10578 }, { "epoch": 0.7167829798766854, "grad_norm": 6.814970016479492, "learning_rate": 8.654528030666028e-05, "loss": 0.9433, "step": 10579 }, { "epoch": 0.7168507351446575, "grad_norm": 9.155210494995117, "learning_rate": 8.654391128756246e-05, "loss": 0.8058, "step": 10580 }, { "epoch": 0.7169184904126296, "grad_norm": 6.666374683380127, "learning_rate": 8.654254226846465e-05, "loss": 0.9789, "step": 10581 }, { "epoch": 0.7169862456806017, "grad_norm": 5.498271465301514, "learning_rate": 8.654117324936684e-05, "loss": 0.7266, "step": 10582 }, { "epoch": 0.7170540009485737, "grad_norm": 6.632149696350098, "learning_rate": 8.653980423026902e-05, "loss": 0.8929, "step": 10583 }, { "epoch": 0.7171217562165458, "grad_norm": 6.820444107055664, "learning_rate": 8.65384352111712e-05, "loss": 0.7493, "step": 10584 }, { "epoch": 0.7171895114845179, "grad_norm": 9.759723663330078, "learning_rate": 8.653706619207338e-05, "loss": 0.8465, "step": 10585 }, { "epoch": 0.71725726675249, "grad_norm": 6.131860256195068, "learning_rate": 8.653569717297557e-05, "loss": 0.8678, "step": 10586 }, { "epoch": 0.7173250220204621, "grad_norm": 5.567459583282471, "learning_rate": 8.653432815387775e-05, "loss": 0.7342, "step": 10587 }, { "epoch": 0.7173927772884342, "grad_norm": 4.433963775634766, "learning_rate": 8.653295913477993e-05, "loss": 0.7901, "step": 10588 }, { "epoch": 0.7174605325564063, "grad_norm": 5.557954788208008, "learning_rate": 8.653159011568211e-05, "loss": 0.6189, "step": 10589 }, { "epoch": 0.7175282878243784, "grad_norm": 6.555310249328613, "learning_rate": 8.653022109658429e-05, "loss": 0.8554, "step": 10590 }, { "epoch": 0.7175960430923505, "grad_norm": 7.396895408630371, "learning_rate": 8.652885207748649e-05, "loss": 0.9261, "step": 10591 }, { "epoch": 0.7176637983603226, "grad_norm": 5.6380181312561035, "learning_rate": 8.652748305838867e-05, "loss": 0.7968, "step": 10592 }, { "epoch": 0.7177315536282945, "grad_norm": 5.689277648925781, "learning_rate": 8.652611403929085e-05, "loss": 0.7951, "step": 10593 }, { "epoch": 0.7177993088962666, "grad_norm": 5.647032737731934, "learning_rate": 8.652474502019303e-05, "loss": 0.8273, "step": 10594 }, { "epoch": 0.7178670641642387, "grad_norm": 6.165719985961914, "learning_rate": 8.652337600109522e-05, "loss": 0.7203, "step": 10595 }, { "epoch": 0.7179348194322108, "grad_norm": 5.114332675933838, "learning_rate": 8.65220069819974e-05, "loss": 0.6582, "step": 10596 }, { "epoch": 0.7180025747001829, "grad_norm": 6.3832879066467285, "learning_rate": 8.652063796289958e-05, "loss": 0.8028, "step": 10597 }, { "epoch": 0.718070329968155, "grad_norm": 5.82213020324707, "learning_rate": 8.651926894380176e-05, "loss": 0.5738, "step": 10598 }, { "epoch": 0.7181380852361271, "grad_norm": 6.337172031402588, "learning_rate": 8.651789992470396e-05, "loss": 0.8797, "step": 10599 }, { "epoch": 0.7182058405040992, "grad_norm": 5.312211513519287, "learning_rate": 8.651653090560614e-05, "loss": 0.732, "step": 10600 }, { "epoch": 0.7182735957720713, "grad_norm": 8.132328033447266, "learning_rate": 8.651516188650832e-05, "loss": 0.9214, "step": 10601 }, { "epoch": 0.7183413510400434, "grad_norm": 6.073488235473633, "learning_rate": 8.651379286741051e-05, "loss": 0.7914, "step": 10602 }, { "epoch": 0.7184091063080155, "grad_norm": 4.74514102935791, "learning_rate": 8.651242384831269e-05, "loss": 0.7396, "step": 10603 }, { "epoch": 0.7184768615759876, "grad_norm": 6.970630645751953, "learning_rate": 8.651105482921487e-05, "loss": 0.6759, "step": 10604 }, { "epoch": 0.7185446168439596, "grad_norm": 5.5301408767700195, "learning_rate": 8.650968581011706e-05, "loss": 0.7496, "step": 10605 }, { "epoch": 0.7186123721119317, "grad_norm": 9.243334770202637, "learning_rate": 8.650831679101924e-05, "loss": 0.9962, "step": 10606 }, { "epoch": 0.7186801273799038, "grad_norm": 4.740606784820557, "learning_rate": 8.650694777192143e-05, "loss": 0.5528, "step": 10607 }, { "epoch": 0.7187478826478759, "grad_norm": 6.146499156951904, "learning_rate": 8.65055787528236e-05, "loss": 0.8339, "step": 10608 }, { "epoch": 0.7188156379158479, "grad_norm": 6.534127235412598, "learning_rate": 8.65042097337258e-05, "loss": 1.1163, "step": 10609 }, { "epoch": 0.71888339318382, "grad_norm": 8.349403381347656, "learning_rate": 8.650284071462798e-05, "loss": 0.8872, "step": 10610 }, { "epoch": 0.7189511484517921, "grad_norm": 4.883057117462158, "learning_rate": 8.650147169553016e-05, "loss": 0.7221, "step": 10611 }, { "epoch": 0.7190189037197642, "grad_norm": 9.08403491973877, "learning_rate": 8.650010267643234e-05, "loss": 0.9264, "step": 10612 }, { "epoch": 0.7190866589877363, "grad_norm": 8.753477096557617, "learning_rate": 8.649873365733452e-05, "loss": 0.715, "step": 10613 }, { "epoch": 0.7191544142557084, "grad_norm": 6.945448875427246, "learning_rate": 8.649736463823671e-05, "loss": 0.7706, "step": 10614 }, { "epoch": 0.7192221695236805, "grad_norm": 6.655423164367676, "learning_rate": 8.64959956191389e-05, "loss": 0.7832, "step": 10615 }, { "epoch": 0.7192899247916525, "grad_norm": 6.322832107543945, "learning_rate": 8.649462660004108e-05, "loss": 0.8017, "step": 10616 }, { "epoch": 0.7193576800596246, "grad_norm": 6.454827785491943, "learning_rate": 8.649325758094326e-05, "loss": 0.7492, "step": 10617 }, { "epoch": 0.7194254353275967, "grad_norm": 7.011631011962891, "learning_rate": 8.649188856184545e-05, "loss": 0.7562, "step": 10618 }, { "epoch": 0.7194931905955688, "grad_norm": 6.621539115905762, "learning_rate": 8.649051954274763e-05, "loss": 0.76, "step": 10619 }, { "epoch": 0.7195609458635409, "grad_norm": 8.84100341796875, "learning_rate": 8.648915052364981e-05, "loss": 0.9858, "step": 10620 }, { "epoch": 0.719628701131513, "grad_norm": 6.356812000274658, "learning_rate": 8.648778150455199e-05, "loss": 0.7553, "step": 10621 }, { "epoch": 0.7196964563994851, "grad_norm": 6.760133743286133, "learning_rate": 8.648641248545417e-05, "loss": 0.8642, "step": 10622 }, { "epoch": 0.7197642116674572, "grad_norm": 6.104750633239746, "learning_rate": 8.648504346635636e-05, "loss": 0.6905, "step": 10623 }, { "epoch": 0.7198319669354293, "grad_norm": 6.207709312438965, "learning_rate": 8.648367444725855e-05, "loss": 0.8549, "step": 10624 }, { "epoch": 0.7198997222034013, "grad_norm": 6.436330795288086, "learning_rate": 8.648230542816073e-05, "loss": 0.7064, "step": 10625 }, { "epoch": 0.7199674774713734, "grad_norm": 5.475677967071533, "learning_rate": 8.64809364090629e-05, "loss": 0.6146, "step": 10626 }, { "epoch": 0.7200352327393454, "grad_norm": 9.833735466003418, "learning_rate": 8.64795673899651e-05, "loss": 0.7222, "step": 10627 }, { "epoch": 0.7201029880073175, "grad_norm": 8.227372169494629, "learning_rate": 8.647819837086728e-05, "loss": 0.9564, "step": 10628 }, { "epoch": 0.7201707432752896, "grad_norm": 7.26641321182251, "learning_rate": 8.647682935176946e-05, "loss": 0.796, "step": 10629 }, { "epoch": 0.7202384985432617, "grad_norm": 6.712799549102783, "learning_rate": 8.647546033267164e-05, "loss": 0.9736, "step": 10630 }, { "epoch": 0.7203062538112338, "grad_norm": 6.906972885131836, "learning_rate": 8.647409131357382e-05, "loss": 0.8028, "step": 10631 }, { "epoch": 0.7203740090792059, "grad_norm": 6.211350440979004, "learning_rate": 8.647272229447601e-05, "loss": 0.7953, "step": 10632 }, { "epoch": 0.720441764347178, "grad_norm": 7.281525611877441, "learning_rate": 8.64713532753782e-05, "loss": 0.7667, "step": 10633 }, { "epoch": 0.7205095196151501, "grad_norm": 6.922200679779053, "learning_rate": 8.646998425628038e-05, "loss": 0.9156, "step": 10634 }, { "epoch": 0.7205772748831222, "grad_norm": 4.672682762145996, "learning_rate": 8.646861523718256e-05, "loss": 0.6602, "step": 10635 }, { "epoch": 0.7206450301510943, "grad_norm": 6.199947834014893, "learning_rate": 8.646724621808474e-05, "loss": 1.058, "step": 10636 }, { "epoch": 0.7207127854190664, "grad_norm": 6.395276069641113, "learning_rate": 8.646587719898693e-05, "loss": 0.834, "step": 10637 }, { "epoch": 0.7207805406870385, "grad_norm": 5.92854118347168, "learning_rate": 8.646450817988911e-05, "loss": 0.9097, "step": 10638 }, { "epoch": 0.7208482959550105, "grad_norm": 9.13015079498291, "learning_rate": 8.646313916079129e-05, "loss": 0.7069, "step": 10639 }, { "epoch": 0.7209160512229826, "grad_norm": 5.72170877456665, "learning_rate": 8.646177014169347e-05, "loss": 0.9316, "step": 10640 }, { "epoch": 0.7209838064909547, "grad_norm": 7.367129325866699, "learning_rate": 8.646040112259567e-05, "loss": 0.7785, "step": 10641 }, { "epoch": 0.7210515617589267, "grad_norm": 5.120598316192627, "learning_rate": 8.645903210349785e-05, "loss": 0.6656, "step": 10642 }, { "epoch": 0.7211193170268988, "grad_norm": 6.610129356384277, "learning_rate": 8.645766308440003e-05, "loss": 0.9131, "step": 10643 }, { "epoch": 0.7211870722948709, "grad_norm": 6.49082612991333, "learning_rate": 8.64562940653022e-05, "loss": 0.8923, "step": 10644 }, { "epoch": 0.721254827562843, "grad_norm": 7.010980129241943, "learning_rate": 8.64549250462044e-05, "loss": 0.794, "step": 10645 }, { "epoch": 0.7213225828308151, "grad_norm": 7.219010829925537, "learning_rate": 8.645355602710658e-05, "loss": 0.9228, "step": 10646 }, { "epoch": 0.7213903380987872, "grad_norm": 5.3610053062438965, "learning_rate": 8.645218700800876e-05, "loss": 0.7479, "step": 10647 }, { "epoch": 0.7214580933667593, "grad_norm": 6.72417688369751, "learning_rate": 8.645081798891095e-05, "loss": 0.7015, "step": 10648 }, { "epoch": 0.7215258486347313, "grad_norm": 6.321094989776611, "learning_rate": 8.644944896981313e-05, "loss": 0.7247, "step": 10649 }, { "epoch": 0.7215936039027034, "grad_norm": 6.939053058624268, "learning_rate": 8.644807995071532e-05, "loss": 0.9851, "step": 10650 }, { "epoch": 0.7216613591706755, "grad_norm": 7.304567337036133, "learning_rate": 8.644671093161751e-05, "loss": 0.9053, "step": 10651 }, { "epoch": 0.7217291144386476, "grad_norm": 7.707671165466309, "learning_rate": 8.644534191251969e-05, "loss": 0.903, "step": 10652 }, { "epoch": 0.7217968697066197, "grad_norm": 8.089873313903809, "learning_rate": 8.644397289342187e-05, "loss": 0.8698, "step": 10653 }, { "epoch": 0.7218646249745918, "grad_norm": 7.8891119956970215, "learning_rate": 8.644260387432405e-05, "loss": 0.9856, "step": 10654 }, { "epoch": 0.7219323802425639, "grad_norm": 5.457139015197754, "learning_rate": 8.644123485522624e-05, "loss": 0.6027, "step": 10655 }, { "epoch": 0.722000135510536, "grad_norm": 5.993939399719238, "learning_rate": 8.643986583612842e-05, "loss": 0.8584, "step": 10656 }, { "epoch": 0.7220678907785081, "grad_norm": 5.376394271850586, "learning_rate": 8.64384968170306e-05, "loss": 0.6551, "step": 10657 }, { "epoch": 0.7221356460464801, "grad_norm": 7.0075249671936035, "learning_rate": 8.643712779793279e-05, "loss": 0.6486, "step": 10658 }, { "epoch": 0.7222034013144522, "grad_norm": 6.753172397613525, "learning_rate": 8.643575877883498e-05, "loss": 1.0099, "step": 10659 }, { "epoch": 0.7222711565824242, "grad_norm": 8.42198371887207, "learning_rate": 8.643438975973716e-05, "loss": 0.8545, "step": 10660 }, { "epoch": 0.7223389118503963, "grad_norm": 9.268589973449707, "learning_rate": 8.643302074063934e-05, "loss": 1.0352, "step": 10661 }, { "epoch": 0.7224066671183684, "grad_norm": 6.209371566772461, "learning_rate": 8.643165172154152e-05, "loss": 0.8474, "step": 10662 }, { "epoch": 0.7224744223863405, "grad_norm": 10.753402709960938, "learning_rate": 8.64302827024437e-05, "loss": 0.8922, "step": 10663 }, { "epoch": 0.7225421776543126, "grad_norm": 7.065412998199463, "learning_rate": 8.64289136833459e-05, "loss": 0.7056, "step": 10664 }, { "epoch": 0.7226099329222847, "grad_norm": 5.867188930511475, "learning_rate": 8.642754466424807e-05, "loss": 0.6052, "step": 10665 }, { "epoch": 0.7226776881902568, "grad_norm": 7.415475368499756, "learning_rate": 8.642617564515025e-05, "loss": 0.8593, "step": 10666 }, { "epoch": 0.7227454434582289, "grad_norm": 6.486458778381348, "learning_rate": 8.642480662605244e-05, "loss": 0.8857, "step": 10667 }, { "epoch": 0.722813198726201, "grad_norm": 8.942933082580566, "learning_rate": 8.642343760695462e-05, "loss": 0.8, "step": 10668 }, { "epoch": 0.7228809539941731, "grad_norm": 4.676167011260986, "learning_rate": 8.642206858785681e-05, "loss": 0.8456, "step": 10669 }, { "epoch": 0.7229487092621452, "grad_norm": 6.750422477722168, "learning_rate": 8.642069956875899e-05, "loss": 0.9438, "step": 10670 }, { "epoch": 0.7230164645301173, "grad_norm": 8.17405891418457, "learning_rate": 8.641933054966117e-05, "loss": 0.9946, "step": 10671 }, { "epoch": 0.7230842197980893, "grad_norm": 7.05765438079834, "learning_rate": 8.641796153056335e-05, "loss": 1.0896, "step": 10672 }, { "epoch": 0.7231519750660614, "grad_norm": 5.9634857177734375, "learning_rate": 8.641659251146554e-05, "loss": 0.7975, "step": 10673 }, { "epoch": 0.7232197303340334, "grad_norm": 5.722130298614502, "learning_rate": 8.641522349236772e-05, "loss": 0.7891, "step": 10674 }, { "epoch": 0.7232874856020055, "grad_norm": 6.058647155761719, "learning_rate": 8.64138544732699e-05, "loss": 0.9987, "step": 10675 }, { "epoch": 0.7233552408699776, "grad_norm": 8.651153564453125, "learning_rate": 8.641248545417209e-05, "loss": 0.8333, "step": 10676 }, { "epoch": 0.7234229961379497, "grad_norm": 5.720202445983887, "learning_rate": 8.641111643507427e-05, "loss": 1.0346, "step": 10677 }, { "epoch": 0.7234907514059218, "grad_norm": 5.499077796936035, "learning_rate": 8.640974741597646e-05, "loss": 0.6993, "step": 10678 }, { "epoch": 0.7235585066738939, "grad_norm": 4.752992153167725, "learning_rate": 8.640837839687864e-05, "loss": 0.6134, "step": 10679 }, { "epoch": 0.723626261941866, "grad_norm": 5.855991363525391, "learning_rate": 8.640700937778082e-05, "loss": 0.7177, "step": 10680 }, { "epoch": 0.7236940172098381, "grad_norm": 6.163865566253662, "learning_rate": 8.6405640358683e-05, "loss": 0.8502, "step": 10681 }, { "epoch": 0.7237617724778102, "grad_norm": 9.418116569519043, "learning_rate": 8.64042713395852e-05, "loss": 1.1688, "step": 10682 }, { "epoch": 0.7238295277457822, "grad_norm": 6.628981113433838, "learning_rate": 8.640290232048737e-05, "loss": 0.6387, "step": 10683 }, { "epoch": 0.7238972830137543, "grad_norm": 5.6346659660339355, "learning_rate": 8.640153330138956e-05, "loss": 1.1043, "step": 10684 }, { "epoch": 0.7239650382817264, "grad_norm": 6.523744583129883, "learning_rate": 8.640016428229174e-05, "loss": 0.8915, "step": 10685 }, { "epoch": 0.7240327935496985, "grad_norm": 5.4516167640686035, "learning_rate": 8.639879526319392e-05, "loss": 0.6427, "step": 10686 }, { "epoch": 0.7241005488176706, "grad_norm": 6.612290382385254, "learning_rate": 8.639742624409611e-05, "loss": 0.8633, "step": 10687 }, { "epoch": 0.7241683040856427, "grad_norm": 5.145784854888916, "learning_rate": 8.639605722499829e-05, "loss": 0.7764, "step": 10688 }, { "epoch": 0.7242360593536148, "grad_norm": 5.991262912750244, "learning_rate": 8.639468820590047e-05, "loss": 0.723, "step": 10689 }, { "epoch": 0.7243038146215869, "grad_norm": 5.2909955978393555, "learning_rate": 8.639331918680265e-05, "loss": 0.6494, "step": 10690 }, { "epoch": 0.7243715698895589, "grad_norm": 5.02228307723999, "learning_rate": 8.639195016770484e-05, "loss": 0.5259, "step": 10691 }, { "epoch": 0.724439325157531, "grad_norm": 7.383895397186279, "learning_rate": 8.639058114860703e-05, "loss": 0.8099, "step": 10692 }, { "epoch": 0.724507080425503, "grad_norm": 7.651692867279053, "learning_rate": 8.63892121295092e-05, "loss": 0.9502, "step": 10693 }, { "epoch": 0.7245748356934751, "grad_norm": 7.732839107513428, "learning_rate": 8.63878431104114e-05, "loss": 0.6534, "step": 10694 }, { "epoch": 0.7246425909614472, "grad_norm": 6.229733467102051, "learning_rate": 8.638647409131358e-05, "loss": 0.9293, "step": 10695 }, { "epoch": 0.7247103462294193, "grad_norm": 6.323513507843018, "learning_rate": 8.638510507221576e-05, "loss": 0.7275, "step": 10696 }, { "epoch": 0.7247781014973914, "grad_norm": 4.998154163360596, "learning_rate": 8.638373605311795e-05, "loss": 0.7756, "step": 10697 }, { "epoch": 0.7248458567653635, "grad_norm": 5.609971046447754, "learning_rate": 8.638236703402013e-05, "loss": 0.9864, "step": 10698 }, { "epoch": 0.7249136120333356, "grad_norm": 8.138459205627441, "learning_rate": 8.638099801492231e-05, "loss": 0.8453, "step": 10699 }, { "epoch": 0.7249813673013077, "grad_norm": 6.91035795211792, "learning_rate": 8.63796289958245e-05, "loss": 0.7852, "step": 10700 }, { "epoch": 0.7250491225692798, "grad_norm": 5.772835731506348, "learning_rate": 8.637825997672669e-05, "loss": 0.7951, "step": 10701 }, { "epoch": 0.7251168778372519, "grad_norm": 7.034023761749268, "learning_rate": 8.637689095762887e-05, "loss": 0.9835, "step": 10702 }, { "epoch": 0.725184633105224, "grad_norm": 6.605203628540039, "learning_rate": 8.637552193853105e-05, "loss": 0.6946, "step": 10703 }, { "epoch": 0.725252388373196, "grad_norm": 6.754047870635986, "learning_rate": 8.637415291943323e-05, "loss": 0.605, "step": 10704 }, { "epoch": 0.7253201436411681, "grad_norm": 5.961748123168945, "learning_rate": 8.637278390033542e-05, "loss": 0.8438, "step": 10705 }, { "epoch": 0.7253878989091402, "grad_norm": 5.660187721252441, "learning_rate": 8.63714148812376e-05, "loss": 0.7882, "step": 10706 }, { "epoch": 0.7254556541771122, "grad_norm": 6.974256992340088, "learning_rate": 8.637004586213978e-05, "loss": 0.9528, "step": 10707 }, { "epoch": 0.7255234094450843, "grad_norm": 6.0205183029174805, "learning_rate": 8.636867684304196e-05, "loss": 0.8064, "step": 10708 }, { "epoch": 0.7255911647130564, "grad_norm": 5.7911057472229, "learning_rate": 8.636730782394415e-05, "loss": 0.7973, "step": 10709 }, { "epoch": 0.7256589199810285, "grad_norm": 6.384799480438232, "learning_rate": 8.636593880484634e-05, "loss": 0.8016, "step": 10710 }, { "epoch": 0.7257266752490006, "grad_norm": 5.134740352630615, "learning_rate": 8.636456978574852e-05, "loss": 0.7034, "step": 10711 }, { "epoch": 0.7257944305169727, "grad_norm": 6.371201992034912, "learning_rate": 8.63632007666507e-05, "loss": 0.8934, "step": 10712 }, { "epoch": 0.7258621857849448, "grad_norm": 9.626450538635254, "learning_rate": 8.636183174755288e-05, "loss": 0.9802, "step": 10713 }, { "epoch": 0.7259299410529169, "grad_norm": 7.453325271606445, "learning_rate": 8.636046272845507e-05, "loss": 0.7881, "step": 10714 }, { "epoch": 0.725997696320889, "grad_norm": 6.08189582824707, "learning_rate": 8.635909370935725e-05, "loss": 0.8223, "step": 10715 }, { "epoch": 0.726065451588861, "grad_norm": 6.120866298675537, "learning_rate": 8.635772469025943e-05, "loss": 0.7251, "step": 10716 }, { "epoch": 0.7261332068568331, "grad_norm": 5.628901481628418, "learning_rate": 8.635635567116161e-05, "loss": 0.7254, "step": 10717 }, { "epoch": 0.7262009621248052, "grad_norm": 5.944281101226807, "learning_rate": 8.63549866520638e-05, "loss": 0.9566, "step": 10718 }, { "epoch": 0.7262687173927773, "grad_norm": 5.806936740875244, "learning_rate": 8.635361763296599e-05, "loss": 0.7517, "step": 10719 }, { "epoch": 0.7263364726607494, "grad_norm": 6.54838228225708, "learning_rate": 8.635224861386817e-05, "loss": 0.6679, "step": 10720 }, { "epoch": 0.7264042279287215, "grad_norm": 6.255834102630615, "learning_rate": 8.635087959477035e-05, "loss": 0.5863, "step": 10721 }, { "epoch": 0.7264719831966936, "grad_norm": 6.097255706787109, "learning_rate": 8.634951057567253e-05, "loss": 0.7183, "step": 10722 }, { "epoch": 0.7265397384646656, "grad_norm": 8.153336524963379, "learning_rate": 8.634814155657471e-05, "loss": 0.89, "step": 10723 }, { "epoch": 0.7266074937326377, "grad_norm": 5.64036226272583, "learning_rate": 8.63467725374769e-05, "loss": 0.7711, "step": 10724 }, { "epoch": 0.7266752490006098, "grad_norm": 5.449916362762451, "learning_rate": 8.634540351837908e-05, "loss": 0.7613, "step": 10725 }, { "epoch": 0.7267430042685818, "grad_norm": 5.611260890960693, "learning_rate": 8.634403449928127e-05, "loss": 0.7947, "step": 10726 }, { "epoch": 0.7268107595365539, "grad_norm": 6.064743518829346, "learning_rate": 8.634266548018345e-05, "loss": 0.7733, "step": 10727 }, { "epoch": 0.726878514804526, "grad_norm": 6.760382175445557, "learning_rate": 8.634129646108564e-05, "loss": 0.7532, "step": 10728 }, { "epoch": 0.7269462700724981, "grad_norm": 6.390462398529053, "learning_rate": 8.633992744198782e-05, "loss": 0.827, "step": 10729 }, { "epoch": 0.7270140253404702, "grad_norm": 5.5772881507873535, "learning_rate": 8.633855842289e-05, "loss": 0.6548, "step": 10730 }, { "epoch": 0.7270817806084423, "grad_norm": 6.615449905395508, "learning_rate": 8.633718940379218e-05, "loss": 0.7181, "step": 10731 }, { "epoch": 0.7271495358764144, "grad_norm": 5.905831336975098, "learning_rate": 8.633582038469436e-05, "loss": 0.8118, "step": 10732 }, { "epoch": 0.7272172911443865, "grad_norm": 8.367280006408691, "learning_rate": 8.633445136559655e-05, "loss": 0.895, "step": 10733 }, { "epoch": 0.7272850464123586, "grad_norm": 5.006227493286133, "learning_rate": 8.633308234649873e-05, "loss": 0.7743, "step": 10734 }, { "epoch": 0.7273528016803307, "grad_norm": 5.736496448516846, "learning_rate": 8.633171332740092e-05, "loss": 0.7135, "step": 10735 }, { "epoch": 0.7274205569483028, "grad_norm": 6.69479513168335, "learning_rate": 8.63303443083031e-05, "loss": 0.9781, "step": 10736 }, { "epoch": 0.7274883122162749, "grad_norm": 5.120262622833252, "learning_rate": 8.632897528920529e-05, "loss": 0.703, "step": 10737 }, { "epoch": 0.727556067484247, "grad_norm": 5.343075275421143, "learning_rate": 8.632760627010747e-05, "loss": 0.7495, "step": 10738 }, { "epoch": 0.727623822752219, "grad_norm": 5.489655494689941, "learning_rate": 8.632623725100965e-05, "loss": 0.7753, "step": 10739 }, { "epoch": 0.727691578020191, "grad_norm": 5.322958469390869, "learning_rate": 8.632486823191184e-05, "loss": 0.7566, "step": 10740 }, { "epoch": 0.7277593332881631, "grad_norm": 8.018611907958984, "learning_rate": 8.632349921281402e-05, "loss": 0.8898, "step": 10741 }, { "epoch": 0.7278270885561352, "grad_norm": 5.888221740722656, "learning_rate": 8.63221301937162e-05, "loss": 0.7483, "step": 10742 }, { "epoch": 0.7278948438241073, "grad_norm": 7.260030746459961, "learning_rate": 8.63207611746184e-05, "loss": 0.826, "step": 10743 }, { "epoch": 0.7279625990920794, "grad_norm": 4.39701509475708, "learning_rate": 8.631939215552058e-05, "loss": 0.7154, "step": 10744 }, { "epoch": 0.7280303543600515, "grad_norm": 7.766528606414795, "learning_rate": 8.631802313642276e-05, "loss": 0.9013, "step": 10745 }, { "epoch": 0.7280981096280236, "grad_norm": 6.7016167640686035, "learning_rate": 8.631665411732494e-05, "loss": 0.6431, "step": 10746 }, { "epoch": 0.7281658648959957, "grad_norm": 7.331559181213379, "learning_rate": 8.631528509822713e-05, "loss": 0.8061, "step": 10747 }, { "epoch": 0.7282336201639678, "grad_norm": 7.0098114013671875, "learning_rate": 8.631391607912931e-05, "loss": 0.9915, "step": 10748 }, { "epoch": 0.7283013754319398, "grad_norm": 5.108738899230957, "learning_rate": 8.63125470600315e-05, "loss": 0.66, "step": 10749 }, { "epoch": 0.7283691306999119, "grad_norm": 5.508449554443359, "learning_rate": 8.631117804093367e-05, "loss": 0.6366, "step": 10750 }, { "epoch": 0.728436885967884, "grad_norm": 5.624075889587402, "learning_rate": 8.630980902183587e-05, "loss": 0.8407, "step": 10751 }, { "epoch": 0.7285046412358561, "grad_norm": 6.403767108917236, "learning_rate": 8.630844000273805e-05, "loss": 0.7612, "step": 10752 }, { "epoch": 0.7285723965038282, "grad_norm": 5.629929542541504, "learning_rate": 8.630707098364023e-05, "loss": 1.0102, "step": 10753 }, { "epoch": 0.7286401517718003, "grad_norm": 8.822092056274414, "learning_rate": 8.630570196454241e-05, "loss": 0.8066, "step": 10754 }, { "epoch": 0.7287079070397724, "grad_norm": 8.513496398925781, "learning_rate": 8.630433294544459e-05, "loss": 1.061, "step": 10755 }, { "epoch": 0.7287756623077444, "grad_norm": 5.223329067230225, "learning_rate": 8.630296392634678e-05, "loss": 0.7784, "step": 10756 }, { "epoch": 0.7288434175757165, "grad_norm": 7.098320960998535, "learning_rate": 8.630159490724896e-05, "loss": 0.9385, "step": 10757 }, { "epoch": 0.7289111728436886, "grad_norm": 5.522270679473877, "learning_rate": 8.630022588815114e-05, "loss": 0.6976, "step": 10758 }, { "epoch": 0.7289789281116607, "grad_norm": 6.411101341247559, "learning_rate": 8.629885686905332e-05, "loss": 0.7332, "step": 10759 }, { "epoch": 0.7290466833796327, "grad_norm": 5.263405799865723, "learning_rate": 8.629748784995552e-05, "loss": 0.699, "step": 10760 }, { "epoch": 0.7291144386476048, "grad_norm": 8.649581909179688, "learning_rate": 8.62961188308577e-05, "loss": 0.818, "step": 10761 }, { "epoch": 0.7291821939155769, "grad_norm": 5.901177883148193, "learning_rate": 8.629474981175988e-05, "loss": 0.8351, "step": 10762 }, { "epoch": 0.729249949183549, "grad_norm": 7.491204738616943, "learning_rate": 8.629338079266206e-05, "loss": 0.8642, "step": 10763 }, { "epoch": 0.7293177044515211, "grad_norm": 7.189452171325684, "learning_rate": 8.629201177356424e-05, "loss": 0.8524, "step": 10764 }, { "epoch": 0.7293854597194932, "grad_norm": 5.667553901672363, "learning_rate": 8.629064275446643e-05, "loss": 0.8197, "step": 10765 }, { "epoch": 0.7294532149874653, "grad_norm": 6.584259510040283, "learning_rate": 8.628927373536861e-05, "loss": 0.629, "step": 10766 }, { "epoch": 0.7295209702554374, "grad_norm": 7.937713146209717, "learning_rate": 8.62879047162708e-05, "loss": 0.8158, "step": 10767 }, { "epoch": 0.7295887255234095, "grad_norm": 9.484813690185547, "learning_rate": 8.628653569717297e-05, "loss": 1.0614, "step": 10768 }, { "epoch": 0.7296564807913816, "grad_norm": 5.351037502288818, "learning_rate": 8.628516667807516e-05, "loss": 0.6763, "step": 10769 }, { "epoch": 0.7297242360593537, "grad_norm": 7.94450569152832, "learning_rate": 8.628379765897735e-05, "loss": 1.1605, "step": 10770 }, { "epoch": 0.7297919913273258, "grad_norm": 8.101015090942383, "learning_rate": 8.628242863987953e-05, "loss": 0.8566, "step": 10771 }, { "epoch": 0.7298597465952977, "grad_norm": 5.217764854431152, "learning_rate": 8.628105962078171e-05, "loss": 0.7402, "step": 10772 }, { "epoch": 0.7299275018632698, "grad_norm": 6.194571495056152, "learning_rate": 8.627969060168389e-05, "loss": 0.7202, "step": 10773 }, { "epoch": 0.7299952571312419, "grad_norm": 5.476653575897217, "learning_rate": 8.627832158258608e-05, "loss": 0.7069, "step": 10774 }, { "epoch": 0.730063012399214, "grad_norm": 6.67211389541626, "learning_rate": 8.627695256348826e-05, "loss": 0.7778, "step": 10775 }, { "epoch": 0.7301307676671861, "grad_norm": 5.744596004486084, "learning_rate": 8.627558354439044e-05, "loss": 0.877, "step": 10776 }, { "epoch": 0.7301985229351582, "grad_norm": 7.225982666015625, "learning_rate": 8.627421452529263e-05, "loss": 0.8411, "step": 10777 }, { "epoch": 0.7302662782031303, "grad_norm": 5.470047473907471, "learning_rate": 8.62728455061948e-05, "loss": 0.6319, "step": 10778 }, { "epoch": 0.7303340334711024, "grad_norm": 6.065576553344727, "learning_rate": 8.6271476487097e-05, "loss": 0.744, "step": 10779 }, { "epoch": 0.7304017887390745, "grad_norm": 6.549447536468506, "learning_rate": 8.627010746799918e-05, "loss": 0.7161, "step": 10780 }, { "epoch": 0.7304695440070466, "grad_norm": 4.89664363861084, "learning_rate": 8.626873844890136e-05, "loss": 0.7585, "step": 10781 }, { "epoch": 0.7305372992750186, "grad_norm": 5.463417053222656, "learning_rate": 8.626736942980354e-05, "loss": 0.866, "step": 10782 }, { "epoch": 0.7306050545429907, "grad_norm": 7.537135601043701, "learning_rate": 8.626600041070573e-05, "loss": 0.8118, "step": 10783 }, { "epoch": 0.7306728098109628, "grad_norm": 6.079577445983887, "learning_rate": 8.626463139160791e-05, "loss": 0.846, "step": 10784 }, { "epoch": 0.7307405650789349, "grad_norm": 8.43422794342041, "learning_rate": 8.62632623725101e-05, "loss": 0.4532, "step": 10785 }, { "epoch": 0.730808320346907, "grad_norm": 9.411410331726074, "learning_rate": 8.626189335341228e-05, "loss": 0.808, "step": 10786 }, { "epoch": 0.7308760756148791, "grad_norm": 6.041145324707031, "learning_rate": 8.626052433431447e-05, "loss": 0.7452, "step": 10787 }, { "epoch": 0.7309438308828511, "grad_norm": 5.750189304351807, "learning_rate": 8.625915531521665e-05, "loss": 0.5868, "step": 10788 }, { "epoch": 0.7310115861508232, "grad_norm": 6.068814754486084, "learning_rate": 8.625778629611883e-05, "loss": 0.8551, "step": 10789 }, { "epoch": 0.7310793414187953, "grad_norm": 4.875567436218262, "learning_rate": 8.625641727702102e-05, "loss": 0.7231, "step": 10790 }, { "epoch": 0.7311470966867674, "grad_norm": 6.8060712814331055, "learning_rate": 8.62550482579232e-05, "loss": 0.7692, "step": 10791 }, { "epoch": 0.7312148519547395, "grad_norm": 5.561190605163574, "learning_rate": 8.625367923882538e-05, "loss": 0.7405, "step": 10792 }, { "epoch": 0.7312826072227115, "grad_norm": 5.9766130447387695, "learning_rate": 8.625231021972758e-05, "loss": 0.8625, "step": 10793 }, { "epoch": 0.7313503624906836, "grad_norm": 7.188475131988525, "learning_rate": 8.625094120062976e-05, "loss": 1.0192, "step": 10794 }, { "epoch": 0.7314181177586557, "grad_norm": 7.752885341644287, "learning_rate": 8.624957218153194e-05, "loss": 0.7109, "step": 10795 }, { "epoch": 0.7314858730266278, "grad_norm": 6.262071132659912, "learning_rate": 8.624820316243412e-05, "loss": 0.7859, "step": 10796 }, { "epoch": 0.7315536282945999, "grad_norm": 8.485372543334961, "learning_rate": 8.624683414333631e-05, "loss": 0.585, "step": 10797 }, { "epoch": 0.731621383562572, "grad_norm": 8.141338348388672, "learning_rate": 8.62454651242385e-05, "loss": 1.1567, "step": 10798 }, { "epoch": 0.7316891388305441, "grad_norm": 7.912255764007568, "learning_rate": 8.624409610514067e-05, "loss": 0.7, "step": 10799 }, { "epoch": 0.7317568940985162, "grad_norm": 5.951817035675049, "learning_rate": 8.624272708604285e-05, "loss": 0.9155, "step": 10800 }, { "epoch": 0.7318246493664883, "grad_norm": 6.594394683837891, "learning_rate": 8.624135806694503e-05, "loss": 0.9209, "step": 10801 }, { "epoch": 0.7318924046344604, "grad_norm": 6.647227764129639, "learning_rate": 8.623998904784723e-05, "loss": 0.7095, "step": 10802 }, { "epoch": 0.7319601599024325, "grad_norm": 5.953242778778076, "learning_rate": 8.623862002874941e-05, "loss": 0.6739, "step": 10803 }, { "epoch": 0.7320279151704046, "grad_norm": 6.691287517547607, "learning_rate": 8.623725100965159e-05, "loss": 0.8772, "step": 10804 }, { "epoch": 0.7320956704383765, "grad_norm": 8.432060241699219, "learning_rate": 8.623588199055377e-05, "loss": 0.676, "step": 10805 }, { "epoch": 0.7321634257063486, "grad_norm": 7.356803894042969, "learning_rate": 8.623451297145596e-05, "loss": 0.866, "step": 10806 }, { "epoch": 0.7322311809743207, "grad_norm": 6.421758651733398, "learning_rate": 8.623314395235814e-05, "loss": 0.9497, "step": 10807 }, { "epoch": 0.7322989362422928, "grad_norm": 4.597375869750977, "learning_rate": 8.623177493326032e-05, "loss": 0.788, "step": 10808 }, { "epoch": 0.7323666915102649, "grad_norm": 5.468592166900635, "learning_rate": 8.62304059141625e-05, "loss": 0.7864, "step": 10809 }, { "epoch": 0.732434446778237, "grad_norm": 6.1158833503723145, "learning_rate": 8.622903689506468e-05, "loss": 0.7756, "step": 10810 }, { "epoch": 0.7325022020462091, "grad_norm": 6.584497451782227, "learning_rate": 8.622766787596688e-05, "loss": 0.806, "step": 10811 }, { "epoch": 0.7325699573141812, "grad_norm": 4.517557621002197, "learning_rate": 8.622629885686906e-05, "loss": 0.5618, "step": 10812 }, { "epoch": 0.7326377125821533, "grad_norm": 9.059310913085938, "learning_rate": 8.622492983777124e-05, "loss": 0.8049, "step": 10813 }, { "epoch": 0.7327054678501254, "grad_norm": 5.33046293258667, "learning_rate": 8.622356081867342e-05, "loss": 0.7975, "step": 10814 }, { "epoch": 0.7327732231180974, "grad_norm": 6.890832424163818, "learning_rate": 8.622219179957561e-05, "loss": 0.781, "step": 10815 }, { "epoch": 0.7328409783860695, "grad_norm": 5.804647922515869, "learning_rate": 8.62208227804778e-05, "loss": 0.6286, "step": 10816 }, { "epoch": 0.7329087336540416, "grad_norm": 6.476672649383545, "learning_rate": 8.621945376137997e-05, "loss": 0.7815, "step": 10817 }, { "epoch": 0.7329764889220137, "grad_norm": 6.72651481628418, "learning_rate": 8.621808474228215e-05, "loss": 0.9064, "step": 10818 }, { "epoch": 0.7330442441899858, "grad_norm": 7.354333877563477, "learning_rate": 8.621671572318433e-05, "loss": 0.8214, "step": 10819 }, { "epoch": 0.7331119994579579, "grad_norm": 7.4680609703063965, "learning_rate": 8.621534670408653e-05, "loss": 0.9942, "step": 10820 }, { "epoch": 0.7331797547259299, "grad_norm": 6.3199968338012695, "learning_rate": 8.621397768498871e-05, "loss": 0.9796, "step": 10821 }, { "epoch": 0.733247509993902, "grad_norm": 5.8508453369140625, "learning_rate": 8.621260866589089e-05, "loss": 0.8393, "step": 10822 }, { "epoch": 0.7333152652618741, "grad_norm": 7.697128772735596, "learning_rate": 8.621123964679307e-05, "loss": 1.1706, "step": 10823 }, { "epoch": 0.7333830205298462, "grad_norm": 6.382595539093018, "learning_rate": 8.620987062769525e-05, "loss": 0.9019, "step": 10824 }, { "epoch": 0.7334507757978183, "grad_norm": 6.121464729309082, "learning_rate": 8.620850160859744e-05, "loss": 0.7314, "step": 10825 }, { "epoch": 0.7335185310657903, "grad_norm": 5.412440299987793, "learning_rate": 8.620713258949962e-05, "loss": 0.904, "step": 10826 }, { "epoch": 0.7335862863337624, "grad_norm": 5.761203765869141, "learning_rate": 8.62057635704018e-05, "loss": 0.7746, "step": 10827 }, { "epoch": 0.7336540416017345, "grad_norm": 5.339764595031738, "learning_rate": 8.620439455130399e-05, "loss": 0.8274, "step": 10828 }, { "epoch": 0.7337217968697066, "grad_norm": 4.942664623260498, "learning_rate": 8.620302553220618e-05, "loss": 0.7429, "step": 10829 }, { "epoch": 0.7337895521376787, "grad_norm": 6.011295318603516, "learning_rate": 8.620165651310836e-05, "loss": 0.6289, "step": 10830 }, { "epoch": 0.7338573074056508, "grad_norm": 5.268429279327393, "learning_rate": 8.620028749401054e-05, "loss": 0.7213, "step": 10831 }, { "epoch": 0.7339250626736229, "grad_norm": 5.994687080383301, "learning_rate": 8.619891847491272e-05, "loss": 0.7625, "step": 10832 }, { "epoch": 0.733992817941595, "grad_norm": 5.908527374267578, "learning_rate": 8.619754945581491e-05, "loss": 0.6557, "step": 10833 }, { "epoch": 0.7340605732095671, "grad_norm": 6.297107696533203, "learning_rate": 8.61961804367171e-05, "loss": 0.7886, "step": 10834 }, { "epoch": 0.7341283284775392, "grad_norm": 7.262679576873779, "learning_rate": 8.619481141761927e-05, "loss": 0.703, "step": 10835 }, { "epoch": 0.7341960837455113, "grad_norm": 6.179491996765137, "learning_rate": 8.619344239852147e-05, "loss": 0.646, "step": 10836 }, { "epoch": 0.7342638390134832, "grad_norm": 5.197315216064453, "learning_rate": 8.619207337942365e-05, "loss": 0.8495, "step": 10837 }, { "epoch": 0.7343315942814553, "grad_norm": 5.623149394989014, "learning_rate": 8.619070436032583e-05, "loss": 0.7513, "step": 10838 }, { "epoch": 0.7343993495494274, "grad_norm": 6.872591018676758, "learning_rate": 8.618933534122802e-05, "loss": 0.756, "step": 10839 }, { "epoch": 0.7344671048173995, "grad_norm": 6.851233005523682, "learning_rate": 8.61879663221302e-05, "loss": 0.9182, "step": 10840 }, { "epoch": 0.7345348600853716, "grad_norm": 6.495956897735596, "learning_rate": 8.618659730303238e-05, "loss": 0.9298, "step": 10841 }, { "epoch": 0.7346026153533437, "grad_norm": 5.927680492401123, "learning_rate": 8.618522828393456e-05, "loss": 0.7563, "step": 10842 }, { "epoch": 0.7346703706213158, "grad_norm": 6.39896821975708, "learning_rate": 8.618385926483676e-05, "loss": 0.993, "step": 10843 }, { "epoch": 0.7347381258892879, "grad_norm": 5.756770133972168, "learning_rate": 8.618249024573894e-05, "loss": 0.6887, "step": 10844 }, { "epoch": 0.73480588115726, "grad_norm": 4.987828731536865, "learning_rate": 8.618112122664112e-05, "loss": 0.5908, "step": 10845 }, { "epoch": 0.7348736364252321, "grad_norm": 7.487382888793945, "learning_rate": 8.61797522075433e-05, "loss": 0.6574, "step": 10846 }, { "epoch": 0.7349413916932042, "grad_norm": 5.079615592956543, "learning_rate": 8.617838318844549e-05, "loss": 0.9038, "step": 10847 }, { "epoch": 0.7350091469611763, "grad_norm": 7.286651134490967, "learning_rate": 8.617701416934767e-05, "loss": 0.9635, "step": 10848 }, { "epoch": 0.7350769022291483, "grad_norm": 6.60608434677124, "learning_rate": 8.617564515024985e-05, "loss": 0.8811, "step": 10849 }, { "epoch": 0.7351446574971204, "grad_norm": 5.821297645568848, "learning_rate": 8.617427613115203e-05, "loss": 0.6802, "step": 10850 }, { "epoch": 0.7352124127650925, "grad_norm": 5.659653663635254, "learning_rate": 8.617290711205421e-05, "loss": 0.9145, "step": 10851 }, { "epoch": 0.7352801680330646, "grad_norm": 6.8696980476379395, "learning_rate": 8.617153809295641e-05, "loss": 1.0301, "step": 10852 }, { "epoch": 0.7353479233010367, "grad_norm": 5.688724040985107, "learning_rate": 8.617016907385859e-05, "loss": 0.7185, "step": 10853 }, { "epoch": 0.7354156785690087, "grad_norm": 6.016847610473633, "learning_rate": 8.616880005476077e-05, "loss": 0.7597, "step": 10854 }, { "epoch": 0.7354834338369808, "grad_norm": 6.0264692306518555, "learning_rate": 8.616743103566295e-05, "loss": 0.6665, "step": 10855 }, { "epoch": 0.7355511891049529, "grad_norm": 5.6370415687561035, "learning_rate": 8.616606201656513e-05, "loss": 0.7088, "step": 10856 }, { "epoch": 0.735618944372925, "grad_norm": 6.686164379119873, "learning_rate": 8.616469299746732e-05, "loss": 0.7139, "step": 10857 }, { "epoch": 0.7356866996408971, "grad_norm": 6.4555134773254395, "learning_rate": 8.61633239783695e-05, "loss": 0.7969, "step": 10858 }, { "epoch": 0.7357544549088691, "grad_norm": 5.4364752769470215, "learning_rate": 8.616195495927168e-05, "loss": 0.6922, "step": 10859 }, { "epoch": 0.7358222101768412, "grad_norm": 7.089079856872559, "learning_rate": 8.616058594017386e-05, "loss": 1.0301, "step": 10860 }, { "epoch": 0.7358899654448133, "grad_norm": 5.888257026672363, "learning_rate": 8.615921692107606e-05, "loss": 0.5942, "step": 10861 }, { "epoch": 0.7359577207127854, "grad_norm": 5.762383460998535, "learning_rate": 8.615784790197824e-05, "loss": 0.6695, "step": 10862 }, { "epoch": 0.7360254759807575, "grad_norm": 5.797264575958252, "learning_rate": 8.615647888288042e-05, "loss": 0.705, "step": 10863 }, { "epoch": 0.7360932312487296, "grad_norm": 4.282798767089844, "learning_rate": 8.61551098637826e-05, "loss": 0.6221, "step": 10864 }, { "epoch": 0.7361609865167017, "grad_norm": 5.539671897888184, "learning_rate": 8.615374084468478e-05, "loss": 0.739, "step": 10865 }, { "epoch": 0.7362287417846738, "grad_norm": 5.675551891326904, "learning_rate": 8.615237182558697e-05, "loss": 0.9089, "step": 10866 }, { "epoch": 0.7362964970526459, "grad_norm": 5.868411064147949, "learning_rate": 8.615100280648915e-05, "loss": 0.8312, "step": 10867 }, { "epoch": 0.736364252320618, "grad_norm": 5.487252712249756, "learning_rate": 8.614963378739133e-05, "loss": 1.0099, "step": 10868 }, { "epoch": 0.7364320075885901, "grad_norm": 6.392849445343018, "learning_rate": 8.614826476829351e-05, "loss": 0.6691, "step": 10869 }, { "epoch": 0.736499762856562, "grad_norm": 6.58278226852417, "learning_rate": 8.614689574919571e-05, "loss": 0.8738, "step": 10870 }, { "epoch": 0.7365675181245341, "grad_norm": 8.645120620727539, "learning_rate": 8.614552673009789e-05, "loss": 0.7185, "step": 10871 }, { "epoch": 0.7366352733925062, "grad_norm": 4.876821041107178, "learning_rate": 8.614415771100007e-05, "loss": 0.6354, "step": 10872 }, { "epoch": 0.7367030286604783, "grad_norm": 5.1632232666015625, "learning_rate": 8.614278869190225e-05, "loss": 0.6593, "step": 10873 }, { "epoch": 0.7367707839284504, "grad_norm": 7.005191326141357, "learning_rate": 8.614141967280443e-05, "loss": 0.9814, "step": 10874 }, { "epoch": 0.7368385391964225, "grad_norm": 5.898367881774902, "learning_rate": 8.614005065370662e-05, "loss": 0.7328, "step": 10875 }, { "epoch": 0.7369062944643946, "grad_norm": 6.076502799987793, "learning_rate": 8.61386816346088e-05, "loss": 0.8477, "step": 10876 }, { "epoch": 0.7369740497323667, "grad_norm": 7.934567928314209, "learning_rate": 8.613731261551098e-05, "loss": 0.7914, "step": 10877 }, { "epoch": 0.7370418050003388, "grad_norm": 6.465484619140625, "learning_rate": 8.613594359641316e-05, "loss": 0.768, "step": 10878 }, { "epoch": 0.7371095602683109, "grad_norm": 5.772295951843262, "learning_rate": 8.613457457731536e-05, "loss": 0.7722, "step": 10879 }, { "epoch": 0.737177315536283, "grad_norm": 5.704565525054932, "learning_rate": 8.613320555821754e-05, "loss": 0.6609, "step": 10880 }, { "epoch": 0.737245070804255, "grad_norm": 6.054666042327881, "learning_rate": 8.613183653911972e-05, "loss": 0.8513, "step": 10881 }, { "epoch": 0.7373128260722271, "grad_norm": 6.808941841125488, "learning_rate": 8.613046752002191e-05, "loss": 0.8188, "step": 10882 }, { "epoch": 0.7373805813401992, "grad_norm": 5.95088005065918, "learning_rate": 8.61290985009241e-05, "loss": 0.8952, "step": 10883 }, { "epoch": 0.7374483366081713, "grad_norm": 6.34380578994751, "learning_rate": 8.612772948182627e-05, "loss": 0.9835, "step": 10884 }, { "epoch": 0.7375160918761434, "grad_norm": 6.720582008361816, "learning_rate": 8.612636046272847e-05, "loss": 1.0013, "step": 10885 }, { "epoch": 0.7375838471441154, "grad_norm": 7.400576591491699, "learning_rate": 8.612499144363065e-05, "loss": 0.906, "step": 10886 }, { "epoch": 0.7376516024120875, "grad_norm": 7.385793209075928, "learning_rate": 8.612362242453283e-05, "loss": 0.9448, "step": 10887 }, { "epoch": 0.7377193576800596, "grad_norm": 4.82860803604126, "learning_rate": 8.612225340543501e-05, "loss": 0.724, "step": 10888 }, { "epoch": 0.7377871129480317, "grad_norm": 7.288435935974121, "learning_rate": 8.61208843863372e-05, "loss": 1.0472, "step": 10889 }, { "epoch": 0.7378548682160038, "grad_norm": 5.416788578033447, "learning_rate": 8.611951536723938e-05, "loss": 0.6952, "step": 10890 }, { "epoch": 0.7379226234839759, "grad_norm": 6.170418739318848, "learning_rate": 8.611814634814156e-05, "loss": 0.8321, "step": 10891 }, { "epoch": 0.737990378751948, "grad_norm": 6.4705119132995605, "learning_rate": 8.611677732904374e-05, "loss": 0.9207, "step": 10892 }, { "epoch": 0.73805813401992, "grad_norm": 5.7486138343811035, "learning_rate": 8.611540830994594e-05, "loss": 0.8527, "step": 10893 }, { "epoch": 0.7381258892878921, "grad_norm": 6.1065568923950195, "learning_rate": 8.611403929084812e-05, "loss": 0.7026, "step": 10894 }, { "epoch": 0.7381936445558642, "grad_norm": 5.706049919128418, "learning_rate": 8.61126702717503e-05, "loss": 0.8366, "step": 10895 }, { "epoch": 0.7382613998238363, "grad_norm": 4.653761863708496, "learning_rate": 8.611130125265248e-05, "loss": 0.538, "step": 10896 }, { "epoch": 0.7383291550918084, "grad_norm": 7.086725234985352, "learning_rate": 8.610993223355466e-05, "loss": 0.906, "step": 10897 }, { "epoch": 0.7383969103597805, "grad_norm": 8.890185356140137, "learning_rate": 8.610856321445685e-05, "loss": 1.0786, "step": 10898 }, { "epoch": 0.7384646656277526, "grad_norm": 6.072719097137451, "learning_rate": 8.610719419535903e-05, "loss": 0.8558, "step": 10899 }, { "epoch": 0.7385324208957247, "grad_norm": 6.266420364379883, "learning_rate": 8.610582517626121e-05, "loss": 0.9337, "step": 10900 }, { "epoch": 0.7386001761636968, "grad_norm": 5.14294958114624, "learning_rate": 8.61044561571634e-05, "loss": 0.5262, "step": 10901 }, { "epoch": 0.7386679314316689, "grad_norm": 8.386168479919434, "learning_rate": 8.610308713806559e-05, "loss": 1.3214, "step": 10902 }, { "epoch": 0.7387356866996408, "grad_norm": 4.496156692504883, "learning_rate": 8.610171811896777e-05, "loss": 0.81, "step": 10903 }, { "epoch": 0.7388034419676129, "grad_norm": 8.191973686218262, "learning_rate": 8.610034909986995e-05, "loss": 0.7648, "step": 10904 }, { "epoch": 0.738871197235585, "grad_norm": 6.122156620025635, "learning_rate": 8.609898008077213e-05, "loss": 0.8471, "step": 10905 }, { "epoch": 0.7389389525035571, "grad_norm": 6.742517471313477, "learning_rate": 8.609761106167431e-05, "loss": 0.8456, "step": 10906 }, { "epoch": 0.7390067077715292, "grad_norm": 4.137988090515137, "learning_rate": 8.60962420425765e-05, "loss": 0.7035, "step": 10907 }, { "epoch": 0.7390744630395013, "grad_norm": 6.304561138153076, "learning_rate": 8.609487302347868e-05, "loss": 0.8491, "step": 10908 }, { "epoch": 0.7391422183074734, "grad_norm": 7.52929162979126, "learning_rate": 8.609350400438086e-05, "loss": 0.6814, "step": 10909 }, { "epoch": 0.7392099735754455, "grad_norm": 5.229283809661865, "learning_rate": 8.609213498528304e-05, "loss": 0.8091, "step": 10910 }, { "epoch": 0.7392777288434176, "grad_norm": 7.810683250427246, "learning_rate": 8.609076596618522e-05, "loss": 1.0311, "step": 10911 }, { "epoch": 0.7393454841113897, "grad_norm": 4.896294116973877, "learning_rate": 8.608939694708742e-05, "loss": 0.7789, "step": 10912 }, { "epoch": 0.7394132393793618, "grad_norm": 5.661660194396973, "learning_rate": 8.60880279279896e-05, "loss": 0.8267, "step": 10913 }, { "epoch": 0.7394809946473339, "grad_norm": 6.691354274749756, "learning_rate": 8.608665890889178e-05, "loss": 0.9455, "step": 10914 }, { "epoch": 0.739548749915306, "grad_norm": 4.900321960449219, "learning_rate": 8.608528988979396e-05, "loss": 0.7102, "step": 10915 }, { "epoch": 0.739616505183278, "grad_norm": 6.105568885803223, "learning_rate": 8.608392087069615e-05, "loss": 0.825, "step": 10916 }, { "epoch": 0.7396842604512501, "grad_norm": 6.132299900054932, "learning_rate": 8.608255185159833e-05, "loss": 0.754, "step": 10917 }, { "epoch": 0.7397520157192222, "grad_norm": 6.12472677230835, "learning_rate": 8.608118283250051e-05, "loss": 0.9008, "step": 10918 }, { "epoch": 0.7398197709871942, "grad_norm": 6.3657002449035645, "learning_rate": 8.60798138134027e-05, "loss": 1.0276, "step": 10919 }, { "epoch": 0.7398875262551663, "grad_norm": 5.224886894226074, "learning_rate": 8.607844479430487e-05, "loss": 0.8636, "step": 10920 }, { "epoch": 0.7399552815231384, "grad_norm": 12.790693283081055, "learning_rate": 8.607707577520707e-05, "loss": 0.9617, "step": 10921 }, { "epoch": 0.7400230367911105, "grad_norm": 5.787003040313721, "learning_rate": 8.607570675610925e-05, "loss": 0.6847, "step": 10922 }, { "epoch": 0.7400907920590826, "grad_norm": 5.556890964508057, "learning_rate": 8.607433773701143e-05, "loss": 0.7855, "step": 10923 }, { "epoch": 0.7401585473270547, "grad_norm": 4.655983924865723, "learning_rate": 8.607296871791361e-05, "loss": 0.7021, "step": 10924 }, { "epoch": 0.7402263025950268, "grad_norm": 6.317244052886963, "learning_rate": 8.60715996988158e-05, "loss": 0.7709, "step": 10925 }, { "epoch": 0.7402940578629988, "grad_norm": 5.975430965423584, "learning_rate": 8.607023067971798e-05, "loss": 0.8145, "step": 10926 }, { "epoch": 0.7403618131309709, "grad_norm": 5.794954776763916, "learning_rate": 8.606886166062016e-05, "loss": 0.8564, "step": 10927 }, { "epoch": 0.740429568398943, "grad_norm": 5.966963291168213, "learning_rate": 8.606749264152236e-05, "loss": 0.7833, "step": 10928 }, { "epoch": 0.7404973236669151, "grad_norm": 8.969901084899902, "learning_rate": 8.606612362242454e-05, "loss": 0.9768, "step": 10929 }, { "epoch": 0.7405650789348872, "grad_norm": 7.2957353591918945, "learning_rate": 8.606475460332672e-05, "loss": 1.0157, "step": 10930 }, { "epoch": 0.7406328342028593, "grad_norm": 7.000049591064453, "learning_rate": 8.606338558422891e-05, "loss": 0.7907, "step": 10931 }, { "epoch": 0.7407005894708314, "grad_norm": 7.120476722717285, "learning_rate": 8.606201656513109e-05, "loss": 0.7708, "step": 10932 }, { "epoch": 0.7407683447388035, "grad_norm": 6.740073204040527, "learning_rate": 8.606064754603327e-05, "loss": 0.8841, "step": 10933 }, { "epoch": 0.7408361000067756, "grad_norm": 4.036641597747803, "learning_rate": 8.605927852693545e-05, "loss": 0.6445, "step": 10934 }, { "epoch": 0.7409038552747476, "grad_norm": 6.200973033905029, "learning_rate": 8.605790950783765e-05, "loss": 0.6976, "step": 10935 }, { "epoch": 0.7409716105427196, "grad_norm": 6.047748565673828, "learning_rate": 8.605654048873983e-05, "loss": 0.8174, "step": 10936 }, { "epoch": 0.7410393658106917, "grad_norm": 6.8715081214904785, "learning_rate": 8.605517146964201e-05, "loss": 0.9817, "step": 10937 }, { "epoch": 0.7411071210786638, "grad_norm": 5.555269241333008, "learning_rate": 8.605380245054419e-05, "loss": 0.6318, "step": 10938 }, { "epoch": 0.7411748763466359, "grad_norm": 8.042158126831055, "learning_rate": 8.605243343144638e-05, "loss": 1.2133, "step": 10939 }, { "epoch": 0.741242631614608, "grad_norm": 6.014042854309082, "learning_rate": 8.605106441234856e-05, "loss": 0.7934, "step": 10940 }, { "epoch": 0.7413103868825801, "grad_norm": 5.317357063293457, "learning_rate": 8.604969539325074e-05, "loss": 0.7135, "step": 10941 }, { "epoch": 0.7413781421505522, "grad_norm": 5.755384922027588, "learning_rate": 8.604832637415292e-05, "loss": 0.4912, "step": 10942 }, { "epoch": 0.7414458974185243, "grad_norm": 5.812741279602051, "learning_rate": 8.60469573550551e-05, "loss": 1.029, "step": 10943 }, { "epoch": 0.7415136526864964, "grad_norm": 6.497066020965576, "learning_rate": 8.60455883359573e-05, "loss": 0.8707, "step": 10944 }, { "epoch": 0.7415814079544685, "grad_norm": 5.14995002746582, "learning_rate": 8.604421931685948e-05, "loss": 0.6412, "step": 10945 }, { "epoch": 0.7416491632224406, "grad_norm": 6.122746467590332, "learning_rate": 8.604285029776166e-05, "loss": 0.7705, "step": 10946 }, { "epoch": 0.7417169184904127, "grad_norm": 5.844207763671875, "learning_rate": 8.604148127866384e-05, "loss": 0.9171, "step": 10947 }, { "epoch": 0.7417846737583847, "grad_norm": 4.956113815307617, "learning_rate": 8.604011225956603e-05, "loss": 0.6854, "step": 10948 }, { "epoch": 0.7418524290263568, "grad_norm": 6.357004165649414, "learning_rate": 8.603874324046821e-05, "loss": 0.9097, "step": 10949 }, { "epoch": 0.7419201842943289, "grad_norm": 8.953754425048828, "learning_rate": 8.60373742213704e-05, "loss": 0.7142, "step": 10950 }, { "epoch": 0.741987939562301, "grad_norm": 6.2500691413879395, "learning_rate": 8.603600520227257e-05, "loss": 0.5626, "step": 10951 }, { "epoch": 0.742055694830273, "grad_norm": 6.902349472045898, "learning_rate": 8.603463618317475e-05, "loss": 1.0085, "step": 10952 }, { "epoch": 0.7421234500982451, "grad_norm": 5.850625991821289, "learning_rate": 8.603326716407695e-05, "loss": 0.691, "step": 10953 }, { "epoch": 0.7421912053662172, "grad_norm": 6.940263748168945, "learning_rate": 8.603189814497913e-05, "loss": 0.9006, "step": 10954 }, { "epoch": 0.7422589606341893, "grad_norm": 6.827815532684326, "learning_rate": 8.603052912588131e-05, "loss": 0.7597, "step": 10955 }, { "epoch": 0.7423267159021614, "grad_norm": 7.243155479431152, "learning_rate": 8.602916010678349e-05, "loss": 0.7973, "step": 10956 }, { "epoch": 0.7423944711701335, "grad_norm": 5.524760723114014, "learning_rate": 8.602779108768567e-05, "loss": 0.8164, "step": 10957 }, { "epoch": 0.7424622264381056, "grad_norm": 6.662420749664307, "learning_rate": 8.602642206858786e-05, "loss": 0.6328, "step": 10958 }, { "epoch": 0.7425299817060776, "grad_norm": 8.47179889678955, "learning_rate": 8.602505304949004e-05, "loss": 1.1036, "step": 10959 }, { "epoch": 0.7425977369740497, "grad_norm": 6.377445220947266, "learning_rate": 8.602368403039222e-05, "loss": 0.7944, "step": 10960 }, { "epoch": 0.7426654922420218, "grad_norm": 6.111810207366943, "learning_rate": 8.60223150112944e-05, "loss": 1.008, "step": 10961 }, { "epoch": 0.7427332475099939, "grad_norm": 3.763913154602051, "learning_rate": 8.60209459921966e-05, "loss": 0.6067, "step": 10962 }, { "epoch": 0.742801002777966, "grad_norm": 7.272477626800537, "learning_rate": 8.601957697309878e-05, "loss": 0.813, "step": 10963 }, { "epoch": 0.7428687580459381, "grad_norm": 6.8635406494140625, "learning_rate": 8.601820795400096e-05, "loss": 0.6991, "step": 10964 }, { "epoch": 0.7429365133139102, "grad_norm": 5.985293388366699, "learning_rate": 8.601683893490314e-05, "loss": 0.8404, "step": 10965 }, { "epoch": 0.7430042685818823, "grad_norm": 7.298139572143555, "learning_rate": 8.601546991580532e-05, "loss": 0.7766, "step": 10966 }, { "epoch": 0.7430720238498544, "grad_norm": 6.489261150360107, "learning_rate": 8.601410089670751e-05, "loss": 0.8351, "step": 10967 }, { "epoch": 0.7431397791178264, "grad_norm": 6.494144439697266, "learning_rate": 8.60127318776097e-05, "loss": 0.7456, "step": 10968 }, { "epoch": 0.7432075343857985, "grad_norm": 7.2324090003967285, "learning_rate": 8.601136285851187e-05, "loss": 0.7192, "step": 10969 }, { "epoch": 0.7432752896537705, "grad_norm": 5.349514007568359, "learning_rate": 8.600999383941405e-05, "loss": 0.8994, "step": 10970 }, { "epoch": 0.7433430449217426, "grad_norm": 5.59208869934082, "learning_rate": 8.600862482031625e-05, "loss": 0.6534, "step": 10971 }, { "epoch": 0.7434108001897147, "grad_norm": 6.849013328552246, "learning_rate": 8.600725580121843e-05, "loss": 0.7908, "step": 10972 }, { "epoch": 0.7434785554576868, "grad_norm": 7.590834617614746, "learning_rate": 8.600588678212061e-05, "loss": 0.7773, "step": 10973 }, { "epoch": 0.7435463107256589, "grad_norm": 5.68290901184082, "learning_rate": 8.60045177630228e-05, "loss": 0.9227, "step": 10974 }, { "epoch": 0.743614065993631, "grad_norm": 7.747159004211426, "learning_rate": 8.600314874392498e-05, "loss": 0.8515, "step": 10975 }, { "epoch": 0.7436818212616031, "grad_norm": 6.546794891357422, "learning_rate": 8.600177972482716e-05, "loss": 0.7743, "step": 10976 }, { "epoch": 0.7437495765295752, "grad_norm": 6.478428363800049, "learning_rate": 8.600041070572936e-05, "loss": 0.844, "step": 10977 }, { "epoch": 0.7438173317975473, "grad_norm": 6.902507781982422, "learning_rate": 8.599904168663154e-05, "loss": 0.9503, "step": 10978 }, { "epoch": 0.7438850870655194, "grad_norm": 5.328883171081543, "learning_rate": 8.599767266753372e-05, "loss": 0.9925, "step": 10979 }, { "epoch": 0.7439528423334915, "grad_norm": 7.376800537109375, "learning_rate": 8.599630364843591e-05, "loss": 1.0425, "step": 10980 }, { "epoch": 0.7440205976014636, "grad_norm": 5.4552693367004395, "learning_rate": 8.599493462933809e-05, "loss": 0.7362, "step": 10981 }, { "epoch": 0.7440883528694356, "grad_norm": 6.60410213470459, "learning_rate": 8.599356561024027e-05, "loss": 0.5882, "step": 10982 }, { "epoch": 0.7441561081374077, "grad_norm": 5.760132789611816, "learning_rate": 8.599219659114245e-05, "loss": 0.8001, "step": 10983 }, { "epoch": 0.7442238634053797, "grad_norm": 6.064749717712402, "learning_rate": 8.599082757204463e-05, "loss": 0.6922, "step": 10984 }, { "epoch": 0.7442916186733518, "grad_norm": 5.042727470397949, "learning_rate": 8.598945855294683e-05, "loss": 0.7558, "step": 10985 }, { "epoch": 0.7443593739413239, "grad_norm": 5.222632884979248, "learning_rate": 8.598808953384901e-05, "loss": 0.6729, "step": 10986 }, { "epoch": 0.744427129209296, "grad_norm": 6.379947185516357, "learning_rate": 8.598672051475119e-05, "loss": 0.995, "step": 10987 }, { "epoch": 0.7444948844772681, "grad_norm": 8.097314834594727, "learning_rate": 8.598535149565337e-05, "loss": 0.9257, "step": 10988 }, { "epoch": 0.7445626397452402, "grad_norm": 7.161442279815674, "learning_rate": 8.598398247655555e-05, "loss": 0.9308, "step": 10989 }, { "epoch": 0.7446303950132123, "grad_norm": 8.118345260620117, "learning_rate": 8.598261345745774e-05, "loss": 0.7901, "step": 10990 }, { "epoch": 0.7446981502811844, "grad_norm": 5.9762797355651855, "learning_rate": 8.598124443835992e-05, "loss": 0.7084, "step": 10991 }, { "epoch": 0.7447659055491564, "grad_norm": 5.742055416107178, "learning_rate": 8.59798754192621e-05, "loss": 0.6301, "step": 10992 }, { "epoch": 0.7448336608171285, "grad_norm": 5.871792793273926, "learning_rate": 8.597850640016428e-05, "loss": 0.6938, "step": 10993 }, { "epoch": 0.7449014160851006, "grad_norm": 6.759030342102051, "learning_rate": 8.597713738106648e-05, "loss": 0.8002, "step": 10994 }, { "epoch": 0.7449691713530727, "grad_norm": 7.746789455413818, "learning_rate": 8.597576836196866e-05, "loss": 1.0539, "step": 10995 }, { "epoch": 0.7450369266210448, "grad_norm": 8.104504585266113, "learning_rate": 8.597439934287084e-05, "loss": 0.8205, "step": 10996 }, { "epoch": 0.7451046818890169, "grad_norm": 5.84213399887085, "learning_rate": 8.597303032377302e-05, "loss": 1.0039, "step": 10997 }, { "epoch": 0.745172437156989, "grad_norm": 6.890494346618652, "learning_rate": 8.59716613046752e-05, "loss": 1.1614, "step": 10998 }, { "epoch": 0.7452401924249611, "grad_norm": 5.201790809631348, "learning_rate": 8.597029228557739e-05, "loss": 0.6836, "step": 10999 }, { "epoch": 0.7453079476929331, "grad_norm": 5.523726463317871, "learning_rate": 8.596892326647957e-05, "loss": 0.9281, "step": 11000 }, { "epoch": 0.7453757029609052, "grad_norm": 6.218978404998779, "learning_rate": 8.596755424738175e-05, "loss": 0.8996, "step": 11001 }, { "epoch": 0.7454434582288773, "grad_norm": 4.860278606414795, "learning_rate": 8.596618522828393e-05, "loss": 0.8022, "step": 11002 }, { "epoch": 0.7455112134968493, "grad_norm": 6.433527946472168, "learning_rate": 8.596481620918613e-05, "loss": 0.8649, "step": 11003 }, { "epoch": 0.7455789687648214, "grad_norm": 6.700179100036621, "learning_rate": 8.596344719008831e-05, "loss": 0.8124, "step": 11004 }, { "epoch": 0.7456467240327935, "grad_norm": 6.234446048736572, "learning_rate": 8.596207817099049e-05, "loss": 0.8406, "step": 11005 }, { "epoch": 0.7457144793007656, "grad_norm": 6.376819610595703, "learning_rate": 8.596070915189267e-05, "loss": 0.7509, "step": 11006 }, { "epoch": 0.7457822345687377, "grad_norm": 5.808053970336914, "learning_rate": 8.595934013279485e-05, "loss": 0.864, "step": 11007 }, { "epoch": 0.7458499898367098, "grad_norm": 6.195199012756348, "learning_rate": 8.595797111369704e-05, "loss": 0.9481, "step": 11008 }, { "epoch": 0.7459177451046819, "grad_norm": 5.600381374359131, "learning_rate": 8.595660209459922e-05, "loss": 0.8916, "step": 11009 }, { "epoch": 0.745985500372654, "grad_norm": 5.866032600402832, "learning_rate": 8.59552330755014e-05, "loss": 0.813, "step": 11010 }, { "epoch": 0.7460532556406261, "grad_norm": 7.881616592407227, "learning_rate": 8.595386405640358e-05, "loss": 0.7804, "step": 11011 }, { "epoch": 0.7461210109085982, "grad_norm": 6.006317138671875, "learning_rate": 8.595249503730576e-05, "loss": 0.9739, "step": 11012 }, { "epoch": 0.7461887661765703, "grad_norm": 5.69516658782959, "learning_rate": 8.595112601820796e-05, "loss": 0.876, "step": 11013 }, { "epoch": 0.7462565214445424, "grad_norm": 6.4250006675720215, "learning_rate": 8.594975699911014e-05, "loss": 1.126, "step": 11014 }, { "epoch": 0.7463242767125144, "grad_norm": 4.753213405609131, "learning_rate": 8.594838798001232e-05, "loss": 0.9536, "step": 11015 }, { "epoch": 0.7463920319804865, "grad_norm": 6.818500995635986, "learning_rate": 8.59470189609145e-05, "loss": 0.8902, "step": 11016 }, { "epoch": 0.7464597872484585, "grad_norm": 5.300811290740967, "learning_rate": 8.594564994181669e-05, "loss": 0.7823, "step": 11017 }, { "epoch": 0.7465275425164306, "grad_norm": 7.0292582511901855, "learning_rate": 8.594428092271887e-05, "loss": 0.733, "step": 11018 }, { "epoch": 0.7465952977844027, "grad_norm": 6.624716281890869, "learning_rate": 8.594291190362105e-05, "loss": 0.8518, "step": 11019 }, { "epoch": 0.7466630530523748, "grad_norm": 6.195197105407715, "learning_rate": 8.594154288452323e-05, "loss": 0.9108, "step": 11020 }, { "epoch": 0.7467308083203469, "grad_norm": 5.729263782501221, "learning_rate": 8.594017386542543e-05, "loss": 0.8028, "step": 11021 }, { "epoch": 0.746798563588319, "grad_norm": 5.285633563995361, "learning_rate": 8.593880484632761e-05, "loss": 0.765, "step": 11022 }, { "epoch": 0.7468663188562911, "grad_norm": 7.621737480163574, "learning_rate": 8.593743582722979e-05, "loss": 0.811, "step": 11023 }, { "epoch": 0.7469340741242632, "grad_norm": 6.321857929229736, "learning_rate": 8.593606680813198e-05, "loss": 0.8962, "step": 11024 }, { "epoch": 0.7470018293922353, "grad_norm": 8.122148513793945, "learning_rate": 8.593469778903416e-05, "loss": 0.9326, "step": 11025 }, { "epoch": 0.7470695846602073, "grad_norm": 8.037406921386719, "learning_rate": 8.593332876993634e-05, "loss": 0.7504, "step": 11026 }, { "epoch": 0.7471373399281794, "grad_norm": 6.311530590057373, "learning_rate": 8.593195975083854e-05, "loss": 0.8079, "step": 11027 }, { "epoch": 0.7472050951961515, "grad_norm": 6.504730224609375, "learning_rate": 8.593059073174072e-05, "loss": 0.8561, "step": 11028 }, { "epoch": 0.7472728504641236, "grad_norm": 6.117341995239258, "learning_rate": 8.59292217126429e-05, "loss": 1.0076, "step": 11029 }, { "epoch": 0.7473406057320957, "grad_norm": 5.374571800231934, "learning_rate": 8.592785269354508e-05, "loss": 0.763, "step": 11030 }, { "epoch": 0.7474083610000678, "grad_norm": 5.502954006195068, "learning_rate": 8.592648367444727e-05, "loss": 0.7479, "step": 11031 }, { "epoch": 0.7474761162680399, "grad_norm": 8.302000045776367, "learning_rate": 8.592511465534945e-05, "loss": 0.9581, "step": 11032 }, { "epoch": 0.7475438715360119, "grad_norm": 5.1271748542785645, "learning_rate": 8.592374563625163e-05, "loss": 0.8742, "step": 11033 }, { "epoch": 0.747611626803984, "grad_norm": 7.070951461791992, "learning_rate": 8.592237661715381e-05, "loss": 0.8405, "step": 11034 }, { "epoch": 0.7476793820719561, "grad_norm": 6.252740383148193, "learning_rate": 8.592100759805601e-05, "loss": 0.9714, "step": 11035 }, { "epoch": 0.7477471373399281, "grad_norm": 5.273550033569336, "learning_rate": 8.591963857895819e-05, "loss": 0.847, "step": 11036 }, { "epoch": 0.7478148926079002, "grad_norm": 5.054128646850586, "learning_rate": 8.591826955986037e-05, "loss": 0.7635, "step": 11037 }, { "epoch": 0.7478826478758723, "grad_norm": 6.404119968414307, "learning_rate": 8.591690054076255e-05, "loss": 0.8501, "step": 11038 }, { "epoch": 0.7479504031438444, "grad_norm": 4.8361430168151855, "learning_rate": 8.591553152166473e-05, "loss": 0.8474, "step": 11039 }, { "epoch": 0.7480181584118165, "grad_norm": 8.294208526611328, "learning_rate": 8.591416250256692e-05, "loss": 0.8207, "step": 11040 }, { "epoch": 0.7480859136797886, "grad_norm": 4.709441661834717, "learning_rate": 8.59127934834691e-05, "loss": 0.7697, "step": 11041 }, { "epoch": 0.7481536689477607, "grad_norm": 7.428562641143799, "learning_rate": 8.591142446437128e-05, "loss": 0.9717, "step": 11042 }, { "epoch": 0.7482214242157328, "grad_norm": 5.442418098449707, "learning_rate": 8.591005544527346e-05, "loss": 0.7347, "step": 11043 }, { "epoch": 0.7482891794837049, "grad_norm": 6.216116428375244, "learning_rate": 8.590868642617564e-05, "loss": 0.6919, "step": 11044 }, { "epoch": 0.748356934751677, "grad_norm": 6.814431190490723, "learning_rate": 8.590731740707784e-05, "loss": 0.9049, "step": 11045 }, { "epoch": 0.7484246900196491, "grad_norm": 6.422941207885742, "learning_rate": 8.590594838798002e-05, "loss": 0.8655, "step": 11046 }, { "epoch": 0.7484924452876212, "grad_norm": 6.052054405212402, "learning_rate": 8.59045793688822e-05, "loss": 0.8558, "step": 11047 }, { "epoch": 0.7485602005555932, "grad_norm": 7.135115146636963, "learning_rate": 8.590321034978438e-05, "loss": 0.7672, "step": 11048 }, { "epoch": 0.7486279558235652, "grad_norm": 5.608342170715332, "learning_rate": 8.590184133068657e-05, "loss": 0.6696, "step": 11049 }, { "epoch": 0.7486957110915373, "grad_norm": 6.936807155609131, "learning_rate": 8.590047231158875e-05, "loss": 0.9131, "step": 11050 }, { "epoch": 0.7487634663595094, "grad_norm": 6.121455669403076, "learning_rate": 8.589910329249093e-05, "loss": 0.8561, "step": 11051 }, { "epoch": 0.7488312216274815, "grad_norm": 5.633101463317871, "learning_rate": 8.589773427339311e-05, "loss": 0.7347, "step": 11052 }, { "epoch": 0.7488989768954536, "grad_norm": 5.5565080642700195, "learning_rate": 8.58963652542953e-05, "loss": 0.9127, "step": 11053 }, { "epoch": 0.7489667321634257, "grad_norm": 6.5937418937683105, "learning_rate": 8.589499623519749e-05, "loss": 0.9561, "step": 11054 }, { "epoch": 0.7490344874313978, "grad_norm": 5.2491774559021, "learning_rate": 8.589362721609967e-05, "loss": 0.5976, "step": 11055 }, { "epoch": 0.7491022426993699, "grad_norm": 7.343997478485107, "learning_rate": 8.589225819700185e-05, "loss": 0.7404, "step": 11056 }, { "epoch": 0.749169997967342, "grad_norm": 5.535366058349609, "learning_rate": 8.589088917790403e-05, "loss": 0.9568, "step": 11057 }, { "epoch": 0.749237753235314, "grad_norm": 5.751670837402344, "learning_rate": 8.588952015880622e-05, "loss": 0.6918, "step": 11058 }, { "epoch": 0.7493055085032861, "grad_norm": 6.7734150886535645, "learning_rate": 8.58881511397084e-05, "loss": 0.7541, "step": 11059 }, { "epoch": 0.7493732637712582, "grad_norm": 6.3308424949646, "learning_rate": 8.588678212061058e-05, "loss": 0.9318, "step": 11060 }, { "epoch": 0.7494410190392303, "grad_norm": 6.561203479766846, "learning_rate": 8.588541310151276e-05, "loss": 0.8515, "step": 11061 }, { "epoch": 0.7495087743072024, "grad_norm": 6.7505974769592285, "learning_rate": 8.588404408241494e-05, "loss": 0.5823, "step": 11062 }, { "epoch": 0.7495765295751745, "grad_norm": 6.116795063018799, "learning_rate": 8.588267506331714e-05, "loss": 0.8536, "step": 11063 }, { "epoch": 0.7496442848431466, "grad_norm": 5.443800926208496, "learning_rate": 8.588130604421932e-05, "loss": 0.8771, "step": 11064 }, { "epoch": 0.7497120401111187, "grad_norm": 6.711660385131836, "learning_rate": 8.58799370251215e-05, "loss": 0.9655, "step": 11065 }, { "epoch": 0.7497797953790907, "grad_norm": 7.173767566680908, "learning_rate": 8.587856800602368e-05, "loss": 0.8445, "step": 11066 }, { "epoch": 0.7498475506470628, "grad_norm": 5.898787498474121, "learning_rate": 8.587719898692587e-05, "loss": 0.9952, "step": 11067 }, { "epoch": 0.7498475506470628, "eval_loss": 0.784003496170044, "eval_noise_accuracy": 0.0, "eval_runtime": 1565.4788, "eval_samples_per_second": 3.283, "eval_steps_per_second": 0.206, "eval_wer": 88.605156710052, "step": 11067 }, { "epoch": 0.7499153059150349, "grad_norm": 5.584912300109863, "learning_rate": 8.587582996782805e-05, "loss": 0.6039, "step": 11068 }, { "epoch": 0.749983061183007, "grad_norm": 7.05438232421875, "learning_rate": 8.587446094873023e-05, "loss": 0.8871, "step": 11069 }, { "epoch": 0.750050816450979, "grad_norm": 5.896789073944092, "learning_rate": 8.587309192963243e-05, "loss": 0.6864, "step": 11070 }, { "epoch": 0.7501185717189511, "grad_norm": 5.357899188995361, "learning_rate": 8.587172291053461e-05, "loss": 0.7744, "step": 11071 }, { "epoch": 0.7501863269869232, "grad_norm": 5.943928241729736, "learning_rate": 8.587035389143679e-05, "loss": 0.8338, "step": 11072 }, { "epoch": 0.7502540822548953, "grad_norm": 5.910472393035889, "learning_rate": 8.586898487233898e-05, "loss": 0.6772, "step": 11073 }, { "epoch": 0.7503218375228674, "grad_norm": 7.976921558380127, "learning_rate": 8.586761585324116e-05, "loss": 0.6949, "step": 11074 }, { "epoch": 0.7503895927908395, "grad_norm": 10.462014198303223, "learning_rate": 8.586624683414334e-05, "loss": 0.903, "step": 11075 }, { "epoch": 0.7504573480588116, "grad_norm": 7.0388946533203125, "learning_rate": 8.586487781504552e-05, "loss": 0.9651, "step": 11076 }, { "epoch": 0.7505251033267837, "grad_norm": 6.222432613372803, "learning_rate": 8.586350879594772e-05, "loss": 0.8637, "step": 11077 }, { "epoch": 0.7505928585947558, "grad_norm": 5.401667594909668, "learning_rate": 8.58621397768499e-05, "loss": 1.0538, "step": 11078 }, { "epoch": 0.7506606138627279, "grad_norm": 5.605716705322266, "learning_rate": 8.586077075775208e-05, "loss": 0.589, "step": 11079 }, { "epoch": 0.7507283691307, "grad_norm": 5.001210689544678, "learning_rate": 8.585940173865426e-05, "loss": 0.8688, "step": 11080 }, { "epoch": 0.750796124398672, "grad_norm": 6.432781219482422, "learning_rate": 8.585803271955645e-05, "loss": 0.8322, "step": 11081 }, { "epoch": 0.750863879666644, "grad_norm": 5.7087483406066895, "learning_rate": 8.585666370045863e-05, "loss": 0.913, "step": 11082 }, { "epoch": 0.7509316349346161, "grad_norm": 6.006197452545166, "learning_rate": 8.585529468136081e-05, "loss": 0.7931, "step": 11083 }, { "epoch": 0.7509993902025882, "grad_norm": 6.532283782958984, "learning_rate": 8.585392566226299e-05, "loss": 0.7063, "step": 11084 }, { "epoch": 0.7510671454705603, "grad_norm": 5.088547229766846, "learning_rate": 8.585255664316517e-05, "loss": 0.7701, "step": 11085 }, { "epoch": 0.7511349007385324, "grad_norm": 4.305655479431152, "learning_rate": 8.585118762406737e-05, "loss": 0.5815, "step": 11086 }, { "epoch": 0.7512026560065045, "grad_norm": 6.688506603240967, "learning_rate": 8.584981860496955e-05, "loss": 0.7622, "step": 11087 }, { "epoch": 0.7512704112744766, "grad_norm": 4.817746639251709, "learning_rate": 8.584844958587173e-05, "loss": 0.6914, "step": 11088 }, { "epoch": 0.7513381665424487, "grad_norm": 6.330674648284912, "learning_rate": 8.584708056677391e-05, "loss": 0.7376, "step": 11089 }, { "epoch": 0.7514059218104208, "grad_norm": 6.616771697998047, "learning_rate": 8.584571154767609e-05, "loss": 0.741, "step": 11090 }, { "epoch": 0.7514736770783929, "grad_norm": 5.213818550109863, "learning_rate": 8.584434252857828e-05, "loss": 0.7452, "step": 11091 }, { "epoch": 0.751541432346365, "grad_norm": 6.440800189971924, "learning_rate": 8.584297350948046e-05, "loss": 0.7903, "step": 11092 }, { "epoch": 0.751609187614337, "grad_norm": 7.082359790802002, "learning_rate": 8.584160449038264e-05, "loss": 0.9452, "step": 11093 }, { "epoch": 0.7516769428823091, "grad_norm": 5.019514083862305, "learning_rate": 8.584023547128482e-05, "loss": 0.6764, "step": 11094 }, { "epoch": 0.7517446981502812, "grad_norm": 6.08909797668457, "learning_rate": 8.583886645218702e-05, "loss": 0.6528, "step": 11095 }, { "epoch": 0.7518124534182533, "grad_norm": 6.15897274017334, "learning_rate": 8.58374974330892e-05, "loss": 0.7528, "step": 11096 }, { "epoch": 0.7518802086862254, "grad_norm": 6.012785911560059, "learning_rate": 8.583612841399138e-05, "loss": 0.8348, "step": 11097 }, { "epoch": 0.7519479639541974, "grad_norm": 6.340338230133057, "learning_rate": 8.583475939489356e-05, "loss": 0.8322, "step": 11098 }, { "epoch": 0.7520157192221695, "grad_norm": 6.704728603363037, "learning_rate": 8.583339037579574e-05, "loss": 0.9286, "step": 11099 }, { "epoch": 0.7520834744901416, "grad_norm": 7.134490013122559, "learning_rate": 8.583202135669793e-05, "loss": 0.9965, "step": 11100 }, { "epoch": 0.7521512297581137, "grad_norm": 6.226062774658203, "learning_rate": 8.583065233760011e-05, "loss": 0.6716, "step": 11101 }, { "epoch": 0.7522189850260858, "grad_norm": 5.990030765533447, "learning_rate": 8.582928331850229e-05, "loss": 0.7636, "step": 11102 }, { "epoch": 0.7522867402940578, "grad_norm": 5.9684834480285645, "learning_rate": 8.582791429940447e-05, "loss": 0.7847, "step": 11103 }, { "epoch": 0.7523544955620299, "grad_norm": 6.136056900024414, "learning_rate": 8.582654528030667e-05, "loss": 0.8641, "step": 11104 }, { "epoch": 0.752422250830002, "grad_norm": 6.887735366821289, "learning_rate": 8.582517626120885e-05, "loss": 0.8848, "step": 11105 }, { "epoch": 0.7524900060979741, "grad_norm": 5.988890647888184, "learning_rate": 8.582380724211103e-05, "loss": 0.8409, "step": 11106 }, { "epoch": 0.7525577613659462, "grad_norm": 5.09688138961792, "learning_rate": 8.582243822301321e-05, "loss": 0.813, "step": 11107 }, { "epoch": 0.7526255166339183, "grad_norm": 5.739447593688965, "learning_rate": 8.582106920391539e-05, "loss": 0.8414, "step": 11108 }, { "epoch": 0.7526932719018904, "grad_norm": 7.205900192260742, "learning_rate": 8.581970018481758e-05, "loss": 0.7134, "step": 11109 }, { "epoch": 0.7527610271698625, "grad_norm": 5.002781867980957, "learning_rate": 8.581833116571976e-05, "loss": 0.7937, "step": 11110 }, { "epoch": 0.7528287824378346, "grad_norm": 7.405692100524902, "learning_rate": 8.581696214662194e-05, "loss": 0.7896, "step": 11111 }, { "epoch": 0.7528965377058067, "grad_norm": 6.863432884216309, "learning_rate": 8.581559312752412e-05, "loss": 1.0113, "step": 11112 }, { "epoch": 0.7529642929737788, "grad_norm": 6.594325542449951, "learning_rate": 8.581422410842632e-05, "loss": 1.1729, "step": 11113 }, { "epoch": 0.7530320482417509, "grad_norm": 7.210272312164307, "learning_rate": 8.58128550893285e-05, "loss": 1.0648, "step": 11114 }, { "epoch": 0.7530998035097228, "grad_norm": 6.694648742675781, "learning_rate": 8.581148607023068e-05, "loss": 0.7308, "step": 11115 }, { "epoch": 0.7531675587776949, "grad_norm": 4.825174808502197, "learning_rate": 8.581011705113287e-05, "loss": 0.7378, "step": 11116 }, { "epoch": 0.753235314045667, "grad_norm": 7.157209873199463, "learning_rate": 8.580874803203505e-05, "loss": 0.8906, "step": 11117 }, { "epoch": 0.7533030693136391, "grad_norm": 7.719106674194336, "learning_rate": 8.580737901293723e-05, "loss": 0.5838, "step": 11118 }, { "epoch": 0.7533708245816112, "grad_norm": 5.7004828453063965, "learning_rate": 8.580600999383943e-05, "loss": 0.8375, "step": 11119 }, { "epoch": 0.7534385798495833, "grad_norm": 6.208179950714111, "learning_rate": 8.580464097474161e-05, "loss": 0.7722, "step": 11120 }, { "epoch": 0.7535063351175554, "grad_norm": 7.243391990661621, "learning_rate": 8.580327195564379e-05, "loss": 1.0486, "step": 11121 }, { "epoch": 0.7535740903855275, "grad_norm": 5.903680324554443, "learning_rate": 8.580190293654597e-05, "loss": 0.7814, "step": 11122 }, { "epoch": 0.7536418456534996, "grad_norm": 6.017585277557373, "learning_rate": 8.580053391744816e-05, "loss": 0.9528, "step": 11123 }, { "epoch": 0.7537096009214717, "grad_norm": 6.117088317871094, "learning_rate": 8.579916489835034e-05, "loss": 0.8894, "step": 11124 }, { "epoch": 0.7537773561894437, "grad_norm": 4.541194915771484, "learning_rate": 8.579779587925252e-05, "loss": 0.6975, "step": 11125 }, { "epoch": 0.7538451114574158, "grad_norm": 5.8757452964782715, "learning_rate": 8.57964268601547e-05, "loss": 1.0208, "step": 11126 }, { "epoch": 0.7539128667253879, "grad_norm": 6.408355712890625, "learning_rate": 8.57950578410569e-05, "loss": 0.7566, "step": 11127 }, { "epoch": 0.75398062199336, "grad_norm": 6.257778167724609, "learning_rate": 8.579368882195908e-05, "loss": 0.8246, "step": 11128 }, { "epoch": 0.7540483772613321, "grad_norm": 5.630858898162842, "learning_rate": 8.579231980286126e-05, "loss": 0.9085, "step": 11129 }, { "epoch": 0.7541161325293042, "grad_norm": 5.071380138397217, "learning_rate": 8.579095078376344e-05, "loss": 0.6106, "step": 11130 }, { "epoch": 0.7541838877972762, "grad_norm": 5.429622173309326, "learning_rate": 8.578958176466562e-05, "loss": 0.5699, "step": 11131 }, { "epoch": 0.7542516430652483, "grad_norm": 7.031247615814209, "learning_rate": 8.578821274556781e-05, "loss": 0.9928, "step": 11132 }, { "epoch": 0.7543193983332204, "grad_norm": 6.868895053863525, "learning_rate": 8.578684372646999e-05, "loss": 0.5757, "step": 11133 }, { "epoch": 0.7543871536011925, "grad_norm": 6.9466118812561035, "learning_rate": 8.578547470737217e-05, "loss": 0.7591, "step": 11134 }, { "epoch": 0.7544549088691646, "grad_norm": 5.605920314788818, "learning_rate": 8.578410568827435e-05, "loss": 0.7244, "step": 11135 }, { "epoch": 0.7545226641371366, "grad_norm": 5.563654899597168, "learning_rate": 8.578273666917655e-05, "loss": 0.6451, "step": 11136 }, { "epoch": 0.7545904194051087, "grad_norm": 5.8858771324157715, "learning_rate": 8.578136765007873e-05, "loss": 0.9521, "step": 11137 }, { "epoch": 0.7546581746730808, "grad_norm": 6.400531768798828, "learning_rate": 8.577999863098091e-05, "loss": 0.7077, "step": 11138 }, { "epoch": 0.7547259299410529, "grad_norm": 6.579067230224609, "learning_rate": 8.577862961188309e-05, "loss": 0.8734, "step": 11139 }, { "epoch": 0.754793685209025, "grad_norm": 5.629753112792969, "learning_rate": 8.577726059278527e-05, "loss": 0.7637, "step": 11140 }, { "epoch": 0.7548614404769971, "grad_norm": 5.546406269073486, "learning_rate": 8.577589157368746e-05, "loss": 0.8094, "step": 11141 }, { "epoch": 0.7549291957449692, "grad_norm": 5.510883331298828, "learning_rate": 8.577452255458964e-05, "loss": 0.8629, "step": 11142 }, { "epoch": 0.7549969510129413, "grad_norm": 8.156396865844727, "learning_rate": 8.577315353549182e-05, "loss": 0.7051, "step": 11143 }, { "epoch": 0.7550647062809134, "grad_norm": 5.59662389755249, "learning_rate": 8.5771784516394e-05, "loss": 0.8627, "step": 11144 }, { "epoch": 0.7551324615488855, "grad_norm": 7.06483793258667, "learning_rate": 8.577041549729618e-05, "loss": 0.8544, "step": 11145 }, { "epoch": 0.7552002168168576, "grad_norm": 5.5211663246154785, "learning_rate": 8.576904647819838e-05, "loss": 0.9621, "step": 11146 }, { "epoch": 0.7552679720848295, "grad_norm": 5.117467880249023, "learning_rate": 8.576767745910056e-05, "loss": 0.7759, "step": 11147 }, { "epoch": 0.7553357273528016, "grad_norm": 6.539487361907959, "learning_rate": 8.576630844000274e-05, "loss": 0.8292, "step": 11148 }, { "epoch": 0.7554034826207737, "grad_norm": 6.704295635223389, "learning_rate": 8.576493942090492e-05, "loss": 0.6924, "step": 11149 }, { "epoch": 0.7554712378887458, "grad_norm": 6.1922926902771, "learning_rate": 8.576357040180711e-05, "loss": 0.9602, "step": 11150 }, { "epoch": 0.7555389931567179, "grad_norm": 6.421977519989014, "learning_rate": 8.576220138270929e-05, "loss": 0.6867, "step": 11151 }, { "epoch": 0.75560674842469, "grad_norm": 9.157228469848633, "learning_rate": 8.576083236361147e-05, "loss": 0.7311, "step": 11152 }, { "epoch": 0.7556745036926621, "grad_norm": 6.960337162017822, "learning_rate": 8.575946334451365e-05, "loss": 0.986, "step": 11153 }, { "epoch": 0.7557422589606342, "grad_norm": 7.604123115539551, "learning_rate": 8.575809432541583e-05, "loss": 0.8723, "step": 11154 }, { "epoch": 0.7558100142286063, "grad_norm": 4.943920135498047, "learning_rate": 8.575672530631803e-05, "loss": 0.7067, "step": 11155 }, { "epoch": 0.7558777694965784, "grad_norm": 6.594372272491455, "learning_rate": 8.575535628722021e-05, "loss": 0.8089, "step": 11156 }, { "epoch": 0.7559455247645505, "grad_norm": 6.759579181671143, "learning_rate": 8.575398726812239e-05, "loss": 0.8882, "step": 11157 }, { "epoch": 0.7560132800325225, "grad_norm": 7.21705961227417, "learning_rate": 8.575261824902457e-05, "loss": 0.7922, "step": 11158 }, { "epoch": 0.7560810353004946, "grad_norm": 5.465212821960449, "learning_rate": 8.575124922992676e-05, "loss": 0.5913, "step": 11159 }, { "epoch": 0.7561487905684667, "grad_norm": 5.213903427124023, "learning_rate": 8.574988021082894e-05, "loss": 0.8809, "step": 11160 }, { "epoch": 0.7562165458364388, "grad_norm": 5.521292686462402, "learning_rate": 8.574851119173112e-05, "loss": 0.8858, "step": 11161 }, { "epoch": 0.7562843011044109, "grad_norm": 4.67765474319458, "learning_rate": 8.574714217263332e-05, "loss": 0.624, "step": 11162 }, { "epoch": 0.756352056372383, "grad_norm": 6.993425369262695, "learning_rate": 8.57457731535355e-05, "loss": 0.8126, "step": 11163 }, { "epoch": 0.756419811640355, "grad_norm": 6.692846775054932, "learning_rate": 8.574440413443768e-05, "loss": 1.0727, "step": 11164 }, { "epoch": 0.7564875669083271, "grad_norm": 6.0148773193359375, "learning_rate": 8.574303511533987e-05, "loss": 0.9334, "step": 11165 }, { "epoch": 0.7565553221762992, "grad_norm": 8.869120597839355, "learning_rate": 8.574166609624205e-05, "loss": 0.9927, "step": 11166 }, { "epoch": 0.7566230774442713, "grad_norm": 6.259548187255859, "learning_rate": 8.574029707714423e-05, "loss": 0.9905, "step": 11167 }, { "epoch": 0.7566908327122434, "grad_norm": 6.2330322265625, "learning_rate": 8.573892805804643e-05, "loss": 0.5814, "step": 11168 }, { "epoch": 0.7567585879802154, "grad_norm": 5.996000289916992, "learning_rate": 8.57375590389486e-05, "loss": 0.7676, "step": 11169 }, { "epoch": 0.7568263432481875, "grad_norm": 5.289711952209473, "learning_rate": 8.573619001985079e-05, "loss": 0.5761, "step": 11170 }, { "epoch": 0.7568940985161596, "grad_norm": 7.447518825531006, "learning_rate": 8.573482100075297e-05, "loss": 0.8822, "step": 11171 }, { "epoch": 0.7569618537841317, "grad_norm": 5.87143087387085, "learning_rate": 8.573345198165515e-05, "loss": 0.7935, "step": 11172 }, { "epoch": 0.7570296090521038, "grad_norm": 5.967605113983154, "learning_rate": 8.573208296255734e-05, "loss": 0.7755, "step": 11173 }, { "epoch": 0.7570973643200759, "grad_norm": 5.8669562339782715, "learning_rate": 8.573071394345952e-05, "loss": 0.7079, "step": 11174 }, { "epoch": 0.757165119588048, "grad_norm": 5.084571361541748, "learning_rate": 8.57293449243617e-05, "loss": 0.5683, "step": 11175 }, { "epoch": 0.7572328748560201, "grad_norm": 5.499231338500977, "learning_rate": 8.572797590526388e-05, "loss": 0.7443, "step": 11176 }, { "epoch": 0.7573006301239922, "grad_norm": 6.275290489196777, "learning_rate": 8.572660688616606e-05, "loss": 0.8232, "step": 11177 }, { "epoch": 0.7573683853919643, "grad_norm": 9.729387283325195, "learning_rate": 8.572523786706826e-05, "loss": 1.0499, "step": 11178 }, { "epoch": 0.7574361406599364, "grad_norm": 5.243049144744873, "learning_rate": 8.572386884797044e-05, "loss": 0.8695, "step": 11179 }, { "epoch": 0.7575038959279083, "grad_norm": 6.83506965637207, "learning_rate": 8.572249982887262e-05, "loss": 1.0048, "step": 11180 }, { "epoch": 0.7575716511958804, "grad_norm": 5.801395893096924, "learning_rate": 8.57211308097748e-05, "loss": 0.7159, "step": 11181 }, { "epoch": 0.7576394064638525, "grad_norm": 5.878986835479736, "learning_rate": 8.571976179067699e-05, "loss": 0.8116, "step": 11182 }, { "epoch": 0.7577071617318246, "grad_norm": 5.621567726135254, "learning_rate": 8.571839277157917e-05, "loss": 0.771, "step": 11183 }, { "epoch": 0.7577749169997967, "grad_norm": 6.197023391723633, "learning_rate": 8.571702375248135e-05, "loss": 0.7588, "step": 11184 }, { "epoch": 0.7578426722677688, "grad_norm": 6.170006275177002, "learning_rate": 8.571565473338353e-05, "loss": 0.7644, "step": 11185 }, { "epoch": 0.7579104275357409, "grad_norm": 7.654489517211914, "learning_rate": 8.571428571428571e-05, "loss": 0.8256, "step": 11186 }, { "epoch": 0.757978182803713, "grad_norm": 6.45438289642334, "learning_rate": 8.57129166951879e-05, "loss": 0.8437, "step": 11187 }, { "epoch": 0.7580459380716851, "grad_norm": 6.441734790802002, "learning_rate": 8.571154767609009e-05, "loss": 0.86, "step": 11188 }, { "epoch": 0.7581136933396572, "grad_norm": 6.674750804901123, "learning_rate": 8.571017865699227e-05, "loss": 0.761, "step": 11189 }, { "epoch": 0.7581814486076293, "grad_norm": 5.539404392242432, "learning_rate": 8.570880963789445e-05, "loss": 0.7542, "step": 11190 }, { "epoch": 0.7582492038756014, "grad_norm": 7.210087776184082, "learning_rate": 8.570744061879664e-05, "loss": 0.9209, "step": 11191 }, { "epoch": 0.7583169591435734, "grad_norm": 5.365791320800781, "learning_rate": 8.570607159969882e-05, "loss": 0.7885, "step": 11192 }, { "epoch": 0.7583847144115455, "grad_norm": 5.170724391937256, "learning_rate": 8.5704702580601e-05, "loss": 0.7241, "step": 11193 }, { "epoch": 0.7584524696795176, "grad_norm": 6.509468078613281, "learning_rate": 8.570333356150318e-05, "loss": 0.6644, "step": 11194 }, { "epoch": 0.7585202249474897, "grad_norm": 7.682931900024414, "learning_rate": 8.570196454240536e-05, "loss": 0.8989, "step": 11195 }, { "epoch": 0.7585879802154617, "grad_norm": 5.240300178527832, "learning_rate": 8.570059552330756e-05, "loss": 0.7577, "step": 11196 }, { "epoch": 0.7586557354834338, "grad_norm": 6.523096084594727, "learning_rate": 8.569922650420974e-05, "loss": 0.9607, "step": 11197 }, { "epoch": 0.7587234907514059, "grad_norm": 7.254205703735352, "learning_rate": 8.569785748511192e-05, "loss": 0.744, "step": 11198 }, { "epoch": 0.758791246019378, "grad_norm": 6.33684778213501, "learning_rate": 8.56964884660141e-05, "loss": 0.7514, "step": 11199 }, { "epoch": 0.7588590012873501, "grad_norm": 7.9795403480529785, "learning_rate": 8.569511944691628e-05, "loss": 0.8608, "step": 11200 }, { "epoch": 0.7589267565553222, "grad_norm": 9.582289695739746, "learning_rate": 8.569375042781847e-05, "loss": 0.63, "step": 11201 }, { "epoch": 0.7589945118232942, "grad_norm": 7.716179847717285, "learning_rate": 8.569238140872065e-05, "loss": 0.8992, "step": 11202 }, { "epoch": 0.7590622670912663, "grad_norm": 7.2479095458984375, "learning_rate": 8.569101238962283e-05, "loss": 0.7297, "step": 11203 }, { "epoch": 0.7591300223592384, "grad_norm": 5.631564140319824, "learning_rate": 8.568964337052501e-05, "loss": 0.5615, "step": 11204 }, { "epoch": 0.7591977776272105, "grad_norm": 6.547656536102295, "learning_rate": 8.568827435142721e-05, "loss": 0.7367, "step": 11205 }, { "epoch": 0.7592655328951826, "grad_norm": 5.980971813201904, "learning_rate": 8.568690533232939e-05, "loss": 0.857, "step": 11206 }, { "epoch": 0.7593332881631547, "grad_norm": 5.813320159912109, "learning_rate": 8.568553631323157e-05, "loss": 0.9032, "step": 11207 }, { "epoch": 0.7594010434311268, "grad_norm": 8.85381031036377, "learning_rate": 8.568416729413376e-05, "loss": 1.2161, "step": 11208 }, { "epoch": 0.7594687986990989, "grad_norm": 5.185744285583496, "learning_rate": 8.568279827503594e-05, "loss": 0.7288, "step": 11209 }, { "epoch": 0.759536553967071, "grad_norm": 6.757566452026367, "learning_rate": 8.568142925593812e-05, "loss": 0.8418, "step": 11210 }, { "epoch": 0.7596043092350431, "grad_norm": 6.195647239685059, "learning_rate": 8.568006023684032e-05, "loss": 0.8931, "step": 11211 }, { "epoch": 0.759672064503015, "grad_norm": 5.0666117668151855, "learning_rate": 8.56786912177425e-05, "loss": 0.677, "step": 11212 }, { "epoch": 0.7597398197709871, "grad_norm": 6.256303787231445, "learning_rate": 8.567732219864468e-05, "loss": 0.8905, "step": 11213 }, { "epoch": 0.7598075750389592, "grad_norm": 5.787086009979248, "learning_rate": 8.567595317954687e-05, "loss": 0.7823, "step": 11214 }, { "epoch": 0.7598753303069313, "grad_norm": 6.895840644836426, "learning_rate": 8.567458416044905e-05, "loss": 0.7453, "step": 11215 }, { "epoch": 0.7599430855749034, "grad_norm": 5.626133441925049, "learning_rate": 8.567321514135123e-05, "loss": 0.681, "step": 11216 }, { "epoch": 0.7600108408428755, "grad_norm": 5.967296123504639, "learning_rate": 8.567184612225341e-05, "loss": 0.7011, "step": 11217 }, { "epoch": 0.7600785961108476, "grad_norm": 6.5361175537109375, "learning_rate": 8.567047710315559e-05, "loss": 0.9365, "step": 11218 }, { "epoch": 0.7601463513788197, "grad_norm": 5.349835395812988, "learning_rate": 8.566910808405779e-05, "loss": 0.8221, "step": 11219 }, { "epoch": 0.7602141066467918, "grad_norm": 5.8798089027404785, "learning_rate": 8.566773906495997e-05, "loss": 0.8863, "step": 11220 }, { "epoch": 0.7602818619147639, "grad_norm": 5.611867427825928, "learning_rate": 8.566637004586215e-05, "loss": 0.7967, "step": 11221 }, { "epoch": 0.760349617182736, "grad_norm": 7.200530529022217, "learning_rate": 8.566500102676433e-05, "loss": 0.7193, "step": 11222 }, { "epoch": 0.7604173724507081, "grad_norm": 8.199413299560547, "learning_rate": 8.566363200766651e-05, "loss": 1.0548, "step": 11223 }, { "epoch": 0.7604851277186802, "grad_norm": 6.439299583435059, "learning_rate": 8.56622629885687e-05, "loss": 0.9681, "step": 11224 }, { "epoch": 0.7605528829866522, "grad_norm": 5.07982873916626, "learning_rate": 8.566089396947088e-05, "loss": 0.5863, "step": 11225 }, { "epoch": 0.7606206382546243, "grad_norm": 5.301721572875977, "learning_rate": 8.565952495037306e-05, "loss": 0.6788, "step": 11226 }, { "epoch": 0.7606883935225964, "grad_norm": 6.359459400177002, "learning_rate": 8.565815593127524e-05, "loss": 0.8885, "step": 11227 }, { "epoch": 0.7607561487905685, "grad_norm": 7.1541619300842285, "learning_rate": 8.565678691217744e-05, "loss": 0.7039, "step": 11228 }, { "epoch": 0.7608239040585405, "grad_norm": 5.493987560272217, "learning_rate": 8.565541789307962e-05, "loss": 0.698, "step": 11229 }, { "epoch": 0.7608916593265126, "grad_norm": 5.63759708404541, "learning_rate": 8.56540488739818e-05, "loss": 0.73, "step": 11230 }, { "epoch": 0.7609594145944847, "grad_norm": 4.844949245452881, "learning_rate": 8.565267985488398e-05, "loss": 0.9189, "step": 11231 }, { "epoch": 0.7610271698624568, "grad_norm": 5.61946964263916, "learning_rate": 8.565131083578616e-05, "loss": 0.9857, "step": 11232 }, { "epoch": 0.7610949251304289, "grad_norm": 6.210287570953369, "learning_rate": 8.564994181668835e-05, "loss": 0.9976, "step": 11233 }, { "epoch": 0.761162680398401, "grad_norm": 8.433357238769531, "learning_rate": 8.564857279759053e-05, "loss": 0.7793, "step": 11234 }, { "epoch": 0.761230435666373, "grad_norm": 6.107748031616211, "learning_rate": 8.564720377849271e-05, "loss": 0.8645, "step": 11235 }, { "epoch": 0.7612981909343451, "grad_norm": 5.744904041290283, "learning_rate": 8.564583475939489e-05, "loss": 0.9381, "step": 11236 }, { "epoch": 0.7613659462023172, "grad_norm": 7.262130260467529, "learning_rate": 8.564446574029709e-05, "loss": 0.6813, "step": 11237 }, { "epoch": 0.7614337014702893, "grad_norm": 5.340671062469482, "learning_rate": 8.564309672119927e-05, "loss": 0.8911, "step": 11238 }, { "epoch": 0.7615014567382614, "grad_norm": 5.608626842498779, "learning_rate": 8.564172770210145e-05, "loss": 0.8672, "step": 11239 }, { "epoch": 0.7615692120062335, "grad_norm": 5.490970611572266, "learning_rate": 8.564035868300363e-05, "loss": 0.7373, "step": 11240 }, { "epoch": 0.7616369672742056, "grad_norm": 5.939301013946533, "learning_rate": 8.563898966390581e-05, "loss": 0.8574, "step": 11241 }, { "epoch": 0.7617047225421777, "grad_norm": 5.254984378814697, "learning_rate": 8.5637620644808e-05, "loss": 0.888, "step": 11242 }, { "epoch": 0.7617724778101498, "grad_norm": 5.318180561065674, "learning_rate": 8.563625162571018e-05, "loss": 0.7723, "step": 11243 }, { "epoch": 0.7618402330781219, "grad_norm": 6.106221675872803, "learning_rate": 8.563488260661236e-05, "loss": 1.0503, "step": 11244 }, { "epoch": 0.7619079883460939, "grad_norm": 6.882452011108398, "learning_rate": 8.563351358751454e-05, "loss": 0.875, "step": 11245 }, { "epoch": 0.761975743614066, "grad_norm": 5.541532516479492, "learning_rate": 8.563214456841674e-05, "loss": 0.7503, "step": 11246 }, { "epoch": 0.762043498882038, "grad_norm": 6.156836986541748, "learning_rate": 8.563077554931892e-05, "loss": 0.6966, "step": 11247 }, { "epoch": 0.7621112541500101, "grad_norm": 5.691517353057861, "learning_rate": 8.56294065302211e-05, "loss": 0.8133, "step": 11248 }, { "epoch": 0.7621790094179822, "grad_norm": 5.82022762298584, "learning_rate": 8.562803751112328e-05, "loss": 0.8547, "step": 11249 }, { "epoch": 0.7622467646859543, "grad_norm": 7.37861442565918, "learning_rate": 8.562666849202546e-05, "loss": 0.9013, "step": 11250 }, { "epoch": 0.7623145199539264, "grad_norm": 6.810284614562988, "learning_rate": 8.562529947292765e-05, "loss": 0.8586, "step": 11251 }, { "epoch": 0.7623822752218985, "grad_norm": 5.333098411560059, "learning_rate": 8.562393045382983e-05, "loss": 0.719, "step": 11252 }, { "epoch": 0.7624500304898706, "grad_norm": 5.309335231781006, "learning_rate": 8.562256143473201e-05, "loss": 0.6253, "step": 11253 }, { "epoch": 0.7625177857578427, "grad_norm": 6.738717079162598, "learning_rate": 8.562119241563419e-05, "loss": 0.8135, "step": 11254 }, { "epoch": 0.7625855410258148, "grad_norm": 7.389588832855225, "learning_rate": 8.561982339653639e-05, "loss": 0.9612, "step": 11255 }, { "epoch": 0.7626532962937869, "grad_norm": 5.986783504486084, "learning_rate": 8.561845437743857e-05, "loss": 0.8069, "step": 11256 }, { "epoch": 0.762721051561759, "grad_norm": 4.995396614074707, "learning_rate": 8.561708535834076e-05, "loss": 0.8176, "step": 11257 }, { "epoch": 0.762788806829731, "grad_norm": 5.639811992645264, "learning_rate": 8.561571633924294e-05, "loss": 0.8188, "step": 11258 }, { "epoch": 0.7628565620977031, "grad_norm": 5.036534309387207, "learning_rate": 8.561434732014512e-05, "loss": 0.6055, "step": 11259 }, { "epoch": 0.7629243173656752, "grad_norm": 7.262217998504639, "learning_rate": 8.561297830104732e-05, "loss": 0.9039, "step": 11260 }, { "epoch": 0.7629920726336472, "grad_norm": 6.3017497062683105, "learning_rate": 8.56116092819495e-05, "loss": 0.9808, "step": 11261 }, { "epoch": 0.7630598279016193, "grad_norm": 5.999700546264648, "learning_rate": 8.561024026285168e-05, "loss": 0.771, "step": 11262 }, { "epoch": 0.7631275831695914, "grad_norm": 5.911481857299805, "learning_rate": 8.560887124375386e-05, "loss": 0.8681, "step": 11263 }, { "epoch": 0.7631953384375635, "grad_norm": 6.147214412689209, "learning_rate": 8.560750222465604e-05, "loss": 0.7072, "step": 11264 }, { "epoch": 0.7632630937055356, "grad_norm": 7.115184783935547, "learning_rate": 8.560613320555823e-05, "loss": 0.6084, "step": 11265 }, { "epoch": 0.7633308489735077, "grad_norm": 5.635624885559082, "learning_rate": 8.560476418646041e-05, "loss": 0.7086, "step": 11266 }, { "epoch": 0.7633986042414798, "grad_norm": 5.630052089691162, "learning_rate": 8.560339516736259e-05, "loss": 0.7175, "step": 11267 }, { "epoch": 0.7634663595094519, "grad_norm": 6.82834529876709, "learning_rate": 8.560202614826477e-05, "loss": 0.8163, "step": 11268 }, { "epoch": 0.763534114777424, "grad_norm": 5.716002464294434, "learning_rate": 8.560065712916697e-05, "loss": 0.7745, "step": 11269 }, { "epoch": 0.763601870045396, "grad_norm": 8.498682022094727, "learning_rate": 8.559928811006915e-05, "loss": 1.2038, "step": 11270 }, { "epoch": 0.7636696253133681, "grad_norm": 6.311810493469238, "learning_rate": 8.559791909097133e-05, "loss": 0.723, "step": 11271 }, { "epoch": 0.7637373805813402, "grad_norm": 8.272360801696777, "learning_rate": 8.55965500718735e-05, "loss": 0.8831, "step": 11272 }, { "epoch": 0.7638051358493123, "grad_norm": 8.232126235961914, "learning_rate": 8.559518105277569e-05, "loss": 0.9191, "step": 11273 }, { "epoch": 0.7638728911172844, "grad_norm": 5.009904384613037, "learning_rate": 8.559381203367788e-05, "loss": 0.7523, "step": 11274 }, { "epoch": 0.7639406463852565, "grad_norm": 5.405948638916016, "learning_rate": 8.559244301458006e-05, "loss": 0.8288, "step": 11275 }, { "epoch": 0.7640084016532286, "grad_norm": 5.3381428718566895, "learning_rate": 8.559107399548224e-05, "loss": 0.8073, "step": 11276 }, { "epoch": 0.7640761569212007, "grad_norm": 5.603418827056885, "learning_rate": 8.558970497638442e-05, "loss": 0.5939, "step": 11277 }, { "epoch": 0.7641439121891727, "grad_norm": 6.921864986419678, "learning_rate": 8.55883359572866e-05, "loss": 0.7877, "step": 11278 }, { "epoch": 0.7642116674571447, "grad_norm": 7.049600124359131, "learning_rate": 8.55869669381888e-05, "loss": 1.0853, "step": 11279 }, { "epoch": 0.7642794227251168, "grad_norm": 6.981689929962158, "learning_rate": 8.558559791909098e-05, "loss": 0.9528, "step": 11280 }, { "epoch": 0.7643471779930889, "grad_norm": 5.135133266448975, "learning_rate": 8.558422889999316e-05, "loss": 0.6623, "step": 11281 }, { "epoch": 0.764414933261061, "grad_norm": 4.53966760635376, "learning_rate": 8.558285988089534e-05, "loss": 0.5281, "step": 11282 }, { "epoch": 0.7644826885290331, "grad_norm": 7.814493656158447, "learning_rate": 8.558149086179753e-05, "loss": 1.132, "step": 11283 }, { "epoch": 0.7645504437970052, "grad_norm": 7.641327381134033, "learning_rate": 8.558012184269971e-05, "loss": 1.0694, "step": 11284 }, { "epoch": 0.7646181990649773, "grad_norm": 5.932433128356934, "learning_rate": 8.557875282360189e-05, "loss": 0.9439, "step": 11285 }, { "epoch": 0.7646859543329494, "grad_norm": 5.546622276306152, "learning_rate": 8.557738380450407e-05, "loss": 0.8643, "step": 11286 }, { "epoch": 0.7647537096009215, "grad_norm": 5.611812114715576, "learning_rate": 8.557601478540625e-05, "loss": 0.8404, "step": 11287 }, { "epoch": 0.7648214648688936, "grad_norm": 6.550889015197754, "learning_rate": 8.557464576630845e-05, "loss": 0.6958, "step": 11288 }, { "epoch": 0.7648892201368657, "grad_norm": 5.113431930541992, "learning_rate": 8.557327674721063e-05, "loss": 0.5798, "step": 11289 }, { "epoch": 0.7649569754048378, "grad_norm": 5.645472526550293, "learning_rate": 8.557190772811281e-05, "loss": 0.9511, "step": 11290 }, { "epoch": 0.7650247306728098, "grad_norm": 6.488102912902832, "learning_rate": 8.557053870901499e-05, "loss": 1.1031, "step": 11291 }, { "epoch": 0.7650924859407819, "grad_norm": 5.89171028137207, "learning_rate": 8.556916968991718e-05, "loss": 0.6539, "step": 11292 }, { "epoch": 0.765160241208754, "grad_norm": 6.806115627288818, "learning_rate": 8.556780067081936e-05, "loss": 0.7376, "step": 11293 }, { "epoch": 0.765227996476726, "grad_norm": 4.954026222229004, "learning_rate": 8.556643165172154e-05, "loss": 0.8474, "step": 11294 }, { "epoch": 0.7652957517446981, "grad_norm": 5.056695461273193, "learning_rate": 8.556506263262372e-05, "loss": 0.6977, "step": 11295 }, { "epoch": 0.7653635070126702, "grad_norm": 6.3873162269592285, "learning_rate": 8.55636936135259e-05, "loss": 0.8378, "step": 11296 }, { "epoch": 0.7654312622806423, "grad_norm": 6.397019386291504, "learning_rate": 8.55623245944281e-05, "loss": 0.7574, "step": 11297 }, { "epoch": 0.7654990175486144, "grad_norm": 7.052022457122803, "learning_rate": 8.556095557533028e-05, "loss": 0.6341, "step": 11298 }, { "epoch": 0.7655667728165865, "grad_norm": 6.845788478851318, "learning_rate": 8.555958655623246e-05, "loss": 0.9614, "step": 11299 }, { "epoch": 0.7656345280845586, "grad_norm": 6.307339668273926, "learning_rate": 8.555821753713464e-05, "loss": 0.8428, "step": 11300 }, { "epoch": 0.7657022833525307, "grad_norm": 9.020713806152344, "learning_rate": 8.555684851803683e-05, "loss": 0.9878, "step": 11301 }, { "epoch": 0.7657700386205027, "grad_norm": 5.308076858520508, "learning_rate": 8.555547949893901e-05, "loss": 0.614, "step": 11302 }, { "epoch": 0.7658377938884748, "grad_norm": 5.961923122406006, "learning_rate": 8.555411047984119e-05, "loss": 0.7601, "step": 11303 }, { "epoch": 0.7659055491564469, "grad_norm": 5.368841648101807, "learning_rate": 8.555274146074339e-05, "loss": 0.802, "step": 11304 }, { "epoch": 0.765973304424419, "grad_norm": 6.24576473236084, "learning_rate": 8.555137244164557e-05, "loss": 0.8612, "step": 11305 }, { "epoch": 0.7660410596923911, "grad_norm": 6.770591735839844, "learning_rate": 8.555000342254775e-05, "loss": 0.9074, "step": 11306 }, { "epoch": 0.7661088149603632, "grad_norm": 7.066356658935547, "learning_rate": 8.554863440344994e-05, "loss": 0.9979, "step": 11307 }, { "epoch": 0.7661765702283353, "grad_norm": 6.5950727462768555, "learning_rate": 8.554726538435212e-05, "loss": 0.6511, "step": 11308 }, { "epoch": 0.7662443254963074, "grad_norm": 7.035087585449219, "learning_rate": 8.55458963652543e-05, "loss": 0.6259, "step": 11309 }, { "epoch": 0.7663120807642794, "grad_norm": 5.893954753875732, "learning_rate": 8.554452734615648e-05, "loss": 0.8179, "step": 11310 }, { "epoch": 0.7663798360322515, "grad_norm": 9.012917518615723, "learning_rate": 8.554315832705868e-05, "loss": 0.8251, "step": 11311 }, { "epoch": 0.7664475913002236, "grad_norm": 6.316323280334473, "learning_rate": 8.554178930796086e-05, "loss": 0.9139, "step": 11312 }, { "epoch": 0.7665153465681956, "grad_norm": 7.094173908233643, "learning_rate": 8.554042028886304e-05, "loss": 0.8208, "step": 11313 }, { "epoch": 0.7665831018361677, "grad_norm": 6.499126434326172, "learning_rate": 8.553905126976522e-05, "loss": 0.7567, "step": 11314 }, { "epoch": 0.7666508571041398, "grad_norm": 5.729644775390625, "learning_rate": 8.553768225066741e-05, "loss": 0.7671, "step": 11315 }, { "epoch": 0.7667186123721119, "grad_norm": 8.627473831176758, "learning_rate": 8.553631323156959e-05, "loss": 1.0222, "step": 11316 }, { "epoch": 0.766786367640084, "grad_norm": 6.386945724487305, "learning_rate": 8.553494421247177e-05, "loss": 0.7227, "step": 11317 }, { "epoch": 0.7668541229080561, "grad_norm": 5.726612567901611, "learning_rate": 8.553357519337395e-05, "loss": 0.931, "step": 11318 }, { "epoch": 0.7669218781760282, "grad_norm": 6.600430488586426, "learning_rate": 8.553220617427613e-05, "loss": 0.9788, "step": 11319 }, { "epoch": 0.7669896334440003, "grad_norm": 4.86123514175415, "learning_rate": 8.553083715517833e-05, "loss": 0.7648, "step": 11320 }, { "epoch": 0.7670573887119724, "grad_norm": 6.815804481506348, "learning_rate": 8.55294681360805e-05, "loss": 0.5505, "step": 11321 }, { "epoch": 0.7671251439799445, "grad_norm": 6.213870048522949, "learning_rate": 8.552809911698269e-05, "loss": 0.8116, "step": 11322 }, { "epoch": 0.7671928992479166, "grad_norm": 6.11044454574585, "learning_rate": 8.552673009788487e-05, "loss": 0.8966, "step": 11323 }, { "epoch": 0.7672606545158887, "grad_norm": 6.12937068939209, "learning_rate": 8.552536107878706e-05, "loss": 0.7981, "step": 11324 }, { "epoch": 0.7673284097838607, "grad_norm": 6.787543773651123, "learning_rate": 8.552399205968924e-05, "loss": 0.8515, "step": 11325 }, { "epoch": 0.7673961650518328, "grad_norm": 6.377131462097168, "learning_rate": 8.552262304059142e-05, "loss": 0.7469, "step": 11326 }, { "epoch": 0.7674639203198048, "grad_norm": 6.438225746154785, "learning_rate": 8.55212540214936e-05, "loss": 0.8255, "step": 11327 }, { "epoch": 0.7675316755877769, "grad_norm": 8.09716510772705, "learning_rate": 8.551988500239578e-05, "loss": 1.0083, "step": 11328 }, { "epoch": 0.767599430855749, "grad_norm": 6.187016487121582, "learning_rate": 8.551851598329798e-05, "loss": 0.8532, "step": 11329 }, { "epoch": 0.7676671861237211, "grad_norm": 6.157771587371826, "learning_rate": 8.551714696420016e-05, "loss": 0.7899, "step": 11330 }, { "epoch": 0.7677349413916932, "grad_norm": 6.330532073974609, "learning_rate": 8.551577794510234e-05, "loss": 0.8263, "step": 11331 }, { "epoch": 0.7678026966596653, "grad_norm": 6.730197906494141, "learning_rate": 8.551440892600452e-05, "loss": 0.7482, "step": 11332 }, { "epoch": 0.7678704519276374, "grad_norm": 9.031699180603027, "learning_rate": 8.55130399069067e-05, "loss": 0.7286, "step": 11333 }, { "epoch": 0.7679382071956095, "grad_norm": 7.25761604309082, "learning_rate": 8.551167088780889e-05, "loss": 1.0674, "step": 11334 }, { "epoch": 0.7680059624635815, "grad_norm": 6.347570896148682, "learning_rate": 8.551030186871107e-05, "loss": 0.7851, "step": 11335 }, { "epoch": 0.7680737177315536, "grad_norm": 5.421909809112549, "learning_rate": 8.550893284961325e-05, "loss": 0.869, "step": 11336 }, { "epoch": 0.7681414729995257, "grad_norm": 6.994184970855713, "learning_rate": 8.550756383051543e-05, "loss": 0.9512, "step": 11337 }, { "epoch": 0.7682092282674978, "grad_norm": 5.763876914978027, "learning_rate": 8.550619481141763e-05, "loss": 0.7909, "step": 11338 }, { "epoch": 0.7682769835354699, "grad_norm": 5.998788833618164, "learning_rate": 8.55048257923198e-05, "loss": 0.8029, "step": 11339 }, { "epoch": 0.768344738803442, "grad_norm": 5.988320827484131, "learning_rate": 8.550345677322199e-05, "loss": 0.7934, "step": 11340 }, { "epoch": 0.7684124940714141, "grad_norm": 4.84094762802124, "learning_rate": 8.550208775412417e-05, "loss": 0.8953, "step": 11341 }, { "epoch": 0.7684802493393862, "grad_norm": 6.44489049911499, "learning_rate": 8.550071873502635e-05, "loss": 1.0824, "step": 11342 }, { "epoch": 0.7685480046073582, "grad_norm": 5.414109230041504, "learning_rate": 8.549934971592854e-05, "loss": 0.8749, "step": 11343 }, { "epoch": 0.7686157598753303, "grad_norm": 5.344674110412598, "learning_rate": 8.549798069683072e-05, "loss": 0.5946, "step": 11344 }, { "epoch": 0.7686835151433024, "grad_norm": 5.795503616333008, "learning_rate": 8.54966116777329e-05, "loss": 0.8889, "step": 11345 }, { "epoch": 0.7687512704112744, "grad_norm": 6.298269271850586, "learning_rate": 8.549524265863508e-05, "loss": 0.5105, "step": 11346 }, { "epoch": 0.7688190256792465, "grad_norm": 5.435570240020752, "learning_rate": 8.549387363953728e-05, "loss": 0.738, "step": 11347 }, { "epoch": 0.7688867809472186, "grad_norm": 5.928055763244629, "learning_rate": 8.549250462043946e-05, "loss": 0.8317, "step": 11348 }, { "epoch": 0.7689545362151907, "grad_norm": 6.4229350090026855, "learning_rate": 8.549113560134164e-05, "loss": 0.6707, "step": 11349 }, { "epoch": 0.7690222914831628, "grad_norm": 6.353605270385742, "learning_rate": 8.548976658224383e-05, "loss": 0.9008, "step": 11350 }, { "epoch": 0.7690900467511349, "grad_norm": 5.449488639831543, "learning_rate": 8.548839756314601e-05, "loss": 0.966, "step": 11351 }, { "epoch": 0.769157802019107, "grad_norm": 4.989541053771973, "learning_rate": 8.548702854404819e-05, "loss": 0.604, "step": 11352 }, { "epoch": 0.7692255572870791, "grad_norm": 5.058244228363037, "learning_rate": 8.548565952495039e-05, "loss": 0.6988, "step": 11353 }, { "epoch": 0.7692933125550512, "grad_norm": 5.852552890777588, "learning_rate": 8.548429050585257e-05, "loss": 0.6343, "step": 11354 }, { "epoch": 0.7693610678230233, "grad_norm": 7.096445560455322, "learning_rate": 8.548292148675475e-05, "loss": 0.7653, "step": 11355 }, { "epoch": 0.7694288230909954, "grad_norm": 6.468287467956543, "learning_rate": 8.548155246765693e-05, "loss": 0.7799, "step": 11356 }, { "epoch": 0.7694965783589675, "grad_norm": 7.869673252105713, "learning_rate": 8.548018344855912e-05, "loss": 0.9937, "step": 11357 }, { "epoch": 0.7695643336269395, "grad_norm": 6.300357341766357, "learning_rate": 8.54788144294613e-05, "loss": 0.644, "step": 11358 }, { "epoch": 0.7696320888949115, "grad_norm": 6.4069318771362305, "learning_rate": 8.547744541036348e-05, "loss": 0.8422, "step": 11359 }, { "epoch": 0.7696998441628836, "grad_norm": 5.846930980682373, "learning_rate": 8.547607639126566e-05, "loss": 0.7335, "step": 11360 }, { "epoch": 0.7697675994308557, "grad_norm": 6.80328893661499, "learning_rate": 8.547470737216785e-05, "loss": 0.6947, "step": 11361 }, { "epoch": 0.7698353546988278, "grad_norm": 5.2702412605285645, "learning_rate": 8.547333835307004e-05, "loss": 0.6963, "step": 11362 }, { "epoch": 0.7699031099667999, "grad_norm": 5.755353927612305, "learning_rate": 8.547196933397222e-05, "loss": 0.7093, "step": 11363 }, { "epoch": 0.769970865234772, "grad_norm": 5.961780071258545, "learning_rate": 8.54706003148744e-05, "loss": 0.8871, "step": 11364 }, { "epoch": 0.7700386205027441, "grad_norm": 5.585000514984131, "learning_rate": 8.546923129577658e-05, "loss": 0.677, "step": 11365 }, { "epoch": 0.7701063757707162, "grad_norm": 5.904151916503906, "learning_rate": 8.546786227667877e-05, "loss": 0.8357, "step": 11366 }, { "epoch": 0.7701741310386883, "grad_norm": 5.614160537719727, "learning_rate": 8.546649325758095e-05, "loss": 0.8266, "step": 11367 }, { "epoch": 0.7702418863066604, "grad_norm": 5.502987861633301, "learning_rate": 8.546512423848313e-05, "loss": 0.715, "step": 11368 }, { "epoch": 0.7703096415746324, "grad_norm": 5.5060625076293945, "learning_rate": 8.546375521938531e-05, "loss": 0.6332, "step": 11369 }, { "epoch": 0.7703773968426045, "grad_norm": 6.361515522003174, "learning_rate": 8.54623862002875e-05, "loss": 0.8958, "step": 11370 }, { "epoch": 0.7704451521105766, "grad_norm": 5.42423152923584, "learning_rate": 8.546101718118969e-05, "loss": 0.5965, "step": 11371 }, { "epoch": 0.7705129073785487, "grad_norm": 7.705050945281982, "learning_rate": 8.545964816209187e-05, "loss": 0.8397, "step": 11372 }, { "epoch": 0.7705806626465208, "grad_norm": 8.009198188781738, "learning_rate": 8.545827914299405e-05, "loss": 0.861, "step": 11373 }, { "epoch": 0.7706484179144929, "grad_norm": 7.349557876586914, "learning_rate": 8.545691012389623e-05, "loss": 1.1441, "step": 11374 }, { "epoch": 0.770716173182465, "grad_norm": 6.47999382019043, "learning_rate": 8.545554110479842e-05, "loss": 0.8032, "step": 11375 }, { "epoch": 0.770783928450437, "grad_norm": 8.485747337341309, "learning_rate": 8.54541720857006e-05, "loss": 0.8073, "step": 11376 }, { "epoch": 0.7708516837184091, "grad_norm": 7.178839683532715, "learning_rate": 8.545280306660278e-05, "loss": 0.8595, "step": 11377 }, { "epoch": 0.7709194389863812, "grad_norm": 7.898037910461426, "learning_rate": 8.545143404750496e-05, "loss": 1.0232, "step": 11378 }, { "epoch": 0.7709871942543532, "grad_norm": 6.554230690002441, "learning_rate": 8.545006502840716e-05, "loss": 0.8139, "step": 11379 }, { "epoch": 0.7710549495223253, "grad_norm": 5.184169769287109, "learning_rate": 8.544869600930934e-05, "loss": 0.8208, "step": 11380 }, { "epoch": 0.7711227047902974, "grad_norm": 6.4749579429626465, "learning_rate": 8.544732699021152e-05, "loss": 0.9345, "step": 11381 }, { "epoch": 0.7711904600582695, "grad_norm": 6.739956378936768, "learning_rate": 8.54459579711137e-05, "loss": 0.8543, "step": 11382 }, { "epoch": 0.7712582153262416, "grad_norm": 6.217097759246826, "learning_rate": 8.544458895201588e-05, "loss": 1.0393, "step": 11383 }, { "epoch": 0.7713259705942137, "grad_norm": 5.0891828536987305, "learning_rate": 8.544321993291807e-05, "loss": 0.7836, "step": 11384 }, { "epoch": 0.7713937258621858, "grad_norm": 4.892578125, "learning_rate": 8.544185091382025e-05, "loss": 0.8143, "step": 11385 }, { "epoch": 0.7714614811301579, "grad_norm": 5.4404168128967285, "learning_rate": 8.544048189472243e-05, "loss": 0.8841, "step": 11386 }, { "epoch": 0.77152923639813, "grad_norm": 6.725840091705322, "learning_rate": 8.543911287562461e-05, "loss": 0.9308, "step": 11387 }, { "epoch": 0.7715969916661021, "grad_norm": 4.622491359710693, "learning_rate": 8.543774385652679e-05, "loss": 0.5187, "step": 11388 }, { "epoch": 0.7716647469340742, "grad_norm": 5.4316182136535645, "learning_rate": 8.543637483742899e-05, "loss": 0.7526, "step": 11389 }, { "epoch": 0.7717325022020463, "grad_norm": 6.089479923248291, "learning_rate": 8.543500581833117e-05, "loss": 0.8785, "step": 11390 }, { "epoch": 0.7718002574700183, "grad_norm": 7.127860069274902, "learning_rate": 8.543363679923335e-05, "loss": 1.0935, "step": 11391 }, { "epoch": 0.7718680127379903, "grad_norm": 6.1316399574279785, "learning_rate": 8.543226778013553e-05, "loss": 0.7755, "step": 11392 }, { "epoch": 0.7719357680059624, "grad_norm": 5.672332286834717, "learning_rate": 8.543089876103772e-05, "loss": 0.8099, "step": 11393 }, { "epoch": 0.7720035232739345, "grad_norm": 7.249478816986084, "learning_rate": 8.54295297419399e-05, "loss": 0.7333, "step": 11394 }, { "epoch": 0.7720712785419066, "grad_norm": 6.270430088043213, "learning_rate": 8.542816072284208e-05, "loss": 0.7662, "step": 11395 }, { "epoch": 0.7721390338098787, "grad_norm": 5.953348636627197, "learning_rate": 8.542679170374428e-05, "loss": 0.7991, "step": 11396 }, { "epoch": 0.7722067890778508, "grad_norm": 9.44437313079834, "learning_rate": 8.542542268464646e-05, "loss": 0.8061, "step": 11397 }, { "epoch": 0.7722745443458229, "grad_norm": 6.52545690536499, "learning_rate": 8.542405366554864e-05, "loss": 1.0734, "step": 11398 }, { "epoch": 0.772342299613795, "grad_norm": 6.201472282409668, "learning_rate": 8.542268464645083e-05, "loss": 1.0589, "step": 11399 }, { "epoch": 0.7724100548817671, "grad_norm": 6.648289680480957, "learning_rate": 8.542131562735301e-05, "loss": 0.6268, "step": 11400 }, { "epoch": 0.7724778101497392, "grad_norm": 6.1713433265686035, "learning_rate": 8.541994660825519e-05, "loss": 0.8162, "step": 11401 }, { "epoch": 0.7725455654177112, "grad_norm": 6.964521884918213, "learning_rate": 8.541857758915738e-05, "loss": 0.8242, "step": 11402 }, { "epoch": 0.7726133206856833, "grad_norm": 6.025942802429199, "learning_rate": 8.541720857005956e-05, "loss": 0.748, "step": 11403 }, { "epoch": 0.7726810759536554, "grad_norm": 5.319873809814453, "learning_rate": 8.541583955096175e-05, "loss": 0.7437, "step": 11404 }, { "epoch": 0.7727488312216275, "grad_norm": 7.246333599090576, "learning_rate": 8.541447053186393e-05, "loss": 0.7307, "step": 11405 }, { "epoch": 0.7728165864895996, "grad_norm": 8.031798362731934, "learning_rate": 8.54131015127661e-05, "loss": 1.2193, "step": 11406 }, { "epoch": 0.7728843417575717, "grad_norm": 5.135364532470703, "learning_rate": 8.54117324936683e-05, "loss": 0.7107, "step": 11407 }, { "epoch": 0.7729520970255437, "grad_norm": 8.0211820602417, "learning_rate": 8.541036347457048e-05, "loss": 0.7786, "step": 11408 }, { "epoch": 0.7730198522935158, "grad_norm": 7.091327667236328, "learning_rate": 8.540899445547266e-05, "loss": 0.7232, "step": 11409 }, { "epoch": 0.7730876075614879, "grad_norm": 7.18704891204834, "learning_rate": 8.540762543637484e-05, "loss": 1.0204, "step": 11410 }, { "epoch": 0.77315536282946, "grad_norm": 5.484143257141113, "learning_rate": 8.540625641727702e-05, "loss": 0.9076, "step": 11411 }, { "epoch": 0.773223118097432, "grad_norm": 8.248093605041504, "learning_rate": 8.540488739817921e-05, "loss": 0.8882, "step": 11412 }, { "epoch": 0.7732908733654041, "grad_norm": 5.386202335357666, "learning_rate": 8.54035183790814e-05, "loss": 0.8552, "step": 11413 }, { "epoch": 0.7733586286333762, "grad_norm": 5.230656623840332, "learning_rate": 8.540214935998358e-05, "loss": 0.6101, "step": 11414 }, { "epoch": 0.7734263839013483, "grad_norm": 6.001654624938965, "learning_rate": 8.540078034088576e-05, "loss": 0.905, "step": 11415 }, { "epoch": 0.7734941391693204, "grad_norm": 5.581679344177246, "learning_rate": 8.539941132178795e-05, "loss": 0.8538, "step": 11416 }, { "epoch": 0.7735618944372925, "grad_norm": 5.559355735778809, "learning_rate": 8.539804230269013e-05, "loss": 0.8776, "step": 11417 }, { "epoch": 0.7736296497052646, "grad_norm": 4.970002174377441, "learning_rate": 8.539667328359231e-05, "loss": 0.6365, "step": 11418 }, { "epoch": 0.7736974049732367, "grad_norm": 6.487758159637451, "learning_rate": 8.539530426449449e-05, "loss": 1.0137, "step": 11419 }, { "epoch": 0.7737651602412088, "grad_norm": 6.612691402435303, "learning_rate": 8.539393524539667e-05, "loss": 0.9539, "step": 11420 }, { "epoch": 0.7738329155091809, "grad_norm": 9.316730499267578, "learning_rate": 8.539256622629887e-05, "loss": 0.761, "step": 11421 }, { "epoch": 0.773900670777153, "grad_norm": 6.791004180908203, "learning_rate": 8.539119720720105e-05, "loss": 0.9281, "step": 11422 }, { "epoch": 0.7739684260451251, "grad_norm": 6.122042179107666, "learning_rate": 8.538982818810323e-05, "loss": 0.8082, "step": 11423 }, { "epoch": 0.774036181313097, "grad_norm": 5.878390312194824, "learning_rate": 8.53884591690054e-05, "loss": 0.7934, "step": 11424 }, { "epoch": 0.7741039365810691, "grad_norm": 5.959982395172119, "learning_rate": 8.53870901499076e-05, "loss": 0.7919, "step": 11425 }, { "epoch": 0.7741716918490412, "grad_norm": 7.212625980377197, "learning_rate": 8.538572113080978e-05, "loss": 0.6346, "step": 11426 }, { "epoch": 0.7742394471170133, "grad_norm": 5.554856300354004, "learning_rate": 8.538435211171196e-05, "loss": 0.757, "step": 11427 }, { "epoch": 0.7743072023849854, "grad_norm": 7.405214309692383, "learning_rate": 8.538298309261414e-05, "loss": 0.8239, "step": 11428 }, { "epoch": 0.7743749576529575, "grad_norm": 5.671273231506348, "learning_rate": 8.538161407351632e-05, "loss": 0.6547, "step": 11429 }, { "epoch": 0.7744427129209296, "grad_norm": 5.7590436935424805, "learning_rate": 8.538024505441852e-05, "loss": 0.6526, "step": 11430 }, { "epoch": 0.7745104681889017, "grad_norm": 5.3632283210754395, "learning_rate": 8.53788760353207e-05, "loss": 0.6368, "step": 11431 }, { "epoch": 0.7745782234568738, "grad_norm": 5.514582633972168, "learning_rate": 8.537750701622288e-05, "loss": 0.8242, "step": 11432 }, { "epoch": 0.7746459787248459, "grad_norm": 6.291776657104492, "learning_rate": 8.537613799712506e-05, "loss": 0.7272, "step": 11433 }, { "epoch": 0.774713733992818, "grad_norm": 6.716917991638184, "learning_rate": 8.537476897802725e-05, "loss": 0.658, "step": 11434 }, { "epoch": 0.77478148926079, "grad_norm": 6.479708194732666, "learning_rate": 8.537339995892943e-05, "loss": 1.0441, "step": 11435 }, { "epoch": 0.7748492445287621, "grad_norm": 7.607060432434082, "learning_rate": 8.537203093983161e-05, "loss": 0.849, "step": 11436 }, { "epoch": 0.7749169997967342, "grad_norm": 5.455706596374512, "learning_rate": 8.537066192073379e-05, "loss": 0.8267, "step": 11437 }, { "epoch": 0.7749847550647063, "grad_norm": 6.571371078491211, "learning_rate": 8.536929290163597e-05, "loss": 0.9552, "step": 11438 }, { "epoch": 0.7750525103326784, "grad_norm": 7.029003143310547, "learning_rate": 8.536792388253817e-05, "loss": 0.7186, "step": 11439 }, { "epoch": 0.7751202656006505, "grad_norm": 6.117072582244873, "learning_rate": 8.536655486344035e-05, "loss": 0.6946, "step": 11440 }, { "epoch": 0.7751880208686225, "grad_norm": 6.154125690460205, "learning_rate": 8.536518584434253e-05, "loss": 0.6694, "step": 11441 }, { "epoch": 0.7752557761365946, "grad_norm": 5.271462917327881, "learning_rate": 8.536381682524472e-05, "loss": 0.6984, "step": 11442 }, { "epoch": 0.7753235314045667, "grad_norm": 7.075232982635498, "learning_rate": 8.53624478061469e-05, "loss": 0.8232, "step": 11443 }, { "epoch": 0.7753912866725388, "grad_norm": 8.889443397521973, "learning_rate": 8.536107878704908e-05, "loss": 0.9988, "step": 11444 }, { "epoch": 0.7754590419405109, "grad_norm": 6.270964622497559, "learning_rate": 8.535970976795127e-05, "loss": 0.8725, "step": 11445 }, { "epoch": 0.7755267972084829, "grad_norm": 6.906674861907959, "learning_rate": 8.535834074885345e-05, "loss": 0.8918, "step": 11446 }, { "epoch": 0.775594552476455, "grad_norm": 7.697040557861328, "learning_rate": 8.535697172975564e-05, "loss": 0.8286, "step": 11447 }, { "epoch": 0.7756623077444271, "grad_norm": 6.875545024871826, "learning_rate": 8.535560271065783e-05, "loss": 0.9181, "step": 11448 }, { "epoch": 0.7757300630123992, "grad_norm": 6.059504985809326, "learning_rate": 8.535423369156001e-05, "loss": 0.7066, "step": 11449 }, { "epoch": 0.7757978182803713, "grad_norm": 6.182839870452881, "learning_rate": 8.535286467246219e-05, "loss": 0.7472, "step": 11450 }, { "epoch": 0.7758655735483434, "grad_norm": 6.222153663635254, "learning_rate": 8.535149565336437e-05, "loss": 0.7874, "step": 11451 }, { "epoch": 0.7759333288163155, "grad_norm": 5.410420894622803, "learning_rate": 8.535012663426655e-05, "loss": 0.6223, "step": 11452 }, { "epoch": 0.7760010840842876, "grad_norm": 6.875960350036621, "learning_rate": 8.534875761516874e-05, "loss": 0.668, "step": 11453 }, { "epoch": 0.7760688393522597, "grad_norm": 6.519979953765869, "learning_rate": 8.534738859607092e-05, "loss": 0.6604, "step": 11454 }, { "epoch": 0.7761365946202318, "grad_norm": 5.24510383605957, "learning_rate": 8.53460195769731e-05, "loss": 0.685, "step": 11455 }, { "epoch": 0.7762043498882039, "grad_norm": 5.399496555328369, "learning_rate": 8.534465055787529e-05, "loss": 0.6585, "step": 11456 }, { "epoch": 0.7762721051561758, "grad_norm": 6.070941925048828, "learning_rate": 8.534328153877748e-05, "loss": 0.6592, "step": 11457 }, { "epoch": 0.7763398604241479, "grad_norm": 6.634647846221924, "learning_rate": 8.534191251967966e-05, "loss": 0.6772, "step": 11458 }, { "epoch": 0.77640761569212, "grad_norm": 4.490801811218262, "learning_rate": 8.534054350058184e-05, "loss": 0.6787, "step": 11459 }, { "epoch": 0.7764753709600921, "grad_norm": 9.601375579833984, "learning_rate": 8.533917448148402e-05, "loss": 0.7703, "step": 11460 }, { "epoch": 0.7765431262280642, "grad_norm": 8.57113265991211, "learning_rate": 8.53378054623862e-05, "loss": 0.6969, "step": 11461 }, { "epoch": 0.7766108814960363, "grad_norm": 5.441416263580322, "learning_rate": 8.53364364432884e-05, "loss": 0.7218, "step": 11462 }, { "epoch": 0.7766786367640084, "grad_norm": 5.4659857749938965, "learning_rate": 8.533506742419057e-05, "loss": 0.7042, "step": 11463 }, { "epoch": 0.7767463920319805, "grad_norm": 5.655581951141357, "learning_rate": 8.533369840509276e-05, "loss": 0.8021, "step": 11464 }, { "epoch": 0.7768141472999526, "grad_norm": 5.530257225036621, "learning_rate": 8.533232938599494e-05, "loss": 0.8872, "step": 11465 }, { "epoch": 0.7768819025679247, "grad_norm": 6.074810981750488, "learning_rate": 8.533096036689712e-05, "loss": 0.7353, "step": 11466 }, { "epoch": 0.7769496578358968, "grad_norm": 5.74225378036499, "learning_rate": 8.532959134779931e-05, "loss": 0.9214, "step": 11467 }, { "epoch": 0.7770174131038688, "grad_norm": 8.271156311035156, "learning_rate": 8.532822232870149e-05, "loss": 0.9913, "step": 11468 }, { "epoch": 0.7770851683718409, "grad_norm": 5.124596118927002, "learning_rate": 8.532685330960367e-05, "loss": 0.6335, "step": 11469 }, { "epoch": 0.777152923639813, "grad_norm": 6.918234348297119, "learning_rate": 8.532548429050585e-05, "loss": 0.9158, "step": 11470 }, { "epoch": 0.7772206789077851, "grad_norm": 5.966104984283447, "learning_rate": 8.532411527140804e-05, "loss": 0.9534, "step": 11471 }, { "epoch": 0.7772884341757572, "grad_norm": 6.7147536277771, "learning_rate": 8.532274625231023e-05, "loss": 0.7466, "step": 11472 }, { "epoch": 0.7773561894437292, "grad_norm": 6.448988914489746, "learning_rate": 8.53213772332124e-05, "loss": 1.0013, "step": 11473 }, { "epoch": 0.7774239447117013, "grad_norm": 4.238838195800781, "learning_rate": 8.532000821411459e-05, "loss": 0.6666, "step": 11474 }, { "epoch": 0.7774916999796734, "grad_norm": 6.865900993347168, "learning_rate": 8.531863919501677e-05, "loss": 0.7966, "step": 11475 }, { "epoch": 0.7775594552476455, "grad_norm": 5.297105312347412, "learning_rate": 8.531727017591896e-05, "loss": 0.8075, "step": 11476 }, { "epoch": 0.7776272105156176, "grad_norm": 6.865470886230469, "learning_rate": 8.531590115682114e-05, "loss": 0.7531, "step": 11477 }, { "epoch": 0.7776949657835897, "grad_norm": 12.770363807678223, "learning_rate": 8.531453213772332e-05, "loss": 0.765, "step": 11478 }, { "epoch": 0.7777627210515617, "grad_norm": 5.952977180480957, "learning_rate": 8.53131631186255e-05, "loss": 0.6781, "step": 11479 }, { "epoch": 0.7778304763195338, "grad_norm": 6.006490230560303, "learning_rate": 8.53117940995277e-05, "loss": 0.7149, "step": 11480 }, { "epoch": 0.7778982315875059, "grad_norm": 5.353832244873047, "learning_rate": 8.531042508042988e-05, "loss": 0.5234, "step": 11481 }, { "epoch": 0.777965986855478, "grad_norm": 7.221078395843506, "learning_rate": 8.530905606133206e-05, "loss": 0.8062, "step": 11482 }, { "epoch": 0.7780337421234501, "grad_norm": 5.661943435668945, "learning_rate": 8.530768704223424e-05, "loss": 0.7662, "step": 11483 }, { "epoch": 0.7781014973914222, "grad_norm": 7.68919038772583, "learning_rate": 8.530631802313642e-05, "loss": 0.9475, "step": 11484 }, { "epoch": 0.7781692526593943, "grad_norm": 6.683036804199219, "learning_rate": 8.530494900403861e-05, "loss": 0.8855, "step": 11485 }, { "epoch": 0.7782370079273664, "grad_norm": 6.948663234710693, "learning_rate": 8.530357998494079e-05, "loss": 0.8388, "step": 11486 }, { "epoch": 0.7783047631953385, "grad_norm": 6.295228004455566, "learning_rate": 8.530221096584297e-05, "loss": 0.8796, "step": 11487 }, { "epoch": 0.7783725184633106, "grad_norm": 5.598083019256592, "learning_rate": 8.530084194674516e-05, "loss": 1.1443, "step": 11488 }, { "epoch": 0.7784402737312827, "grad_norm": 6.99757194519043, "learning_rate": 8.529947292764735e-05, "loss": 0.9209, "step": 11489 }, { "epoch": 0.7785080289992546, "grad_norm": 4.9374189376831055, "learning_rate": 8.529810390854953e-05, "loss": 0.7056, "step": 11490 }, { "epoch": 0.7785757842672267, "grad_norm": 5.831791877746582, "learning_rate": 8.529673488945172e-05, "loss": 0.9647, "step": 11491 }, { "epoch": 0.7786435395351988, "grad_norm": 5.7701945304870605, "learning_rate": 8.52953658703539e-05, "loss": 0.5656, "step": 11492 }, { "epoch": 0.7787112948031709, "grad_norm": 6.0373945236206055, "learning_rate": 8.529399685125608e-05, "loss": 0.7656, "step": 11493 }, { "epoch": 0.778779050071143, "grad_norm": 5.0245184898376465, "learning_rate": 8.529262783215827e-05, "loss": 0.6954, "step": 11494 }, { "epoch": 0.7788468053391151, "grad_norm": 5.802394390106201, "learning_rate": 8.529125881306045e-05, "loss": 0.7926, "step": 11495 }, { "epoch": 0.7789145606070872, "grad_norm": 5.6064252853393555, "learning_rate": 8.528988979396263e-05, "loss": 0.6536, "step": 11496 }, { "epoch": 0.7789823158750593, "grad_norm": 6.606382846832275, "learning_rate": 8.528852077486481e-05, "loss": 0.8506, "step": 11497 }, { "epoch": 0.7790500711430314, "grad_norm": 5.858041286468506, "learning_rate": 8.5287151755767e-05, "loss": 0.9821, "step": 11498 }, { "epoch": 0.7791178264110035, "grad_norm": 7.08564567565918, "learning_rate": 8.528578273666919e-05, "loss": 0.8554, "step": 11499 }, { "epoch": 0.7791855816789756, "grad_norm": 8.084799766540527, "learning_rate": 8.528441371757137e-05, "loss": 0.7015, "step": 11500 }, { "epoch": 0.7792533369469477, "grad_norm": 5.143333911895752, "learning_rate": 8.528304469847355e-05, "loss": 0.815, "step": 11501 }, { "epoch": 0.7793210922149197, "grad_norm": 6.680109977722168, "learning_rate": 8.528167567937573e-05, "loss": 0.8661, "step": 11502 }, { "epoch": 0.7793888474828918, "grad_norm": 6.3008341789245605, "learning_rate": 8.528030666027792e-05, "loss": 0.9275, "step": 11503 }, { "epoch": 0.7794566027508639, "grad_norm": 6.935196876525879, "learning_rate": 8.52789376411801e-05, "loss": 0.6915, "step": 11504 }, { "epoch": 0.779524358018836, "grad_norm": 5.888092517852783, "learning_rate": 8.527756862208228e-05, "loss": 0.6035, "step": 11505 }, { "epoch": 0.779592113286808, "grad_norm": 6.207918167114258, "learning_rate": 8.527619960298447e-05, "loss": 0.6619, "step": 11506 }, { "epoch": 0.7796598685547801, "grad_norm": 6.820822238922119, "learning_rate": 8.527483058388665e-05, "loss": 0.9905, "step": 11507 }, { "epoch": 0.7797276238227522, "grad_norm": 6.440364837646484, "learning_rate": 8.527346156478884e-05, "loss": 1.0333, "step": 11508 }, { "epoch": 0.7797953790907243, "grad_norm": 6.21598482131958, "learning_rate": 8.527209254569102e-05, "loss": 0.6562, "step": 11509 }, { "epoch": 0.7798631343586964, "grad_norm": 8.483455657958984, "learning_rate": 8.52707235265932e-05, "loss": 0.6767, "step": 11510 }, { "epoch": 0.7799308896266685, "grad_norm": 6.934547424316406, "learning_rate": 8.526935450749538e-05, "loss": 0.8354, "step": 11511 }, { "epoch": 0.7799986448946405, "grad_norm": 8.640101432800293, "learning_rate": 8.526798548839757e-05, "loss": 0.9985, "step": 11512 }, { "epoch": 0.7800664001626126, "grad_norm": 5.356478214263916, "learning_rate": 8.526661646929975e-05, "loss": 0.8524, "step": 11513 }, { "epoch": 0.7801341554305847, "grad_norm": 6.551882743835449, "learning_rate": 8.526524745020193e-05, "loss": 0.9502, "step": 11514 }, { "epoch": 0.7802019106985568, "grad_norm": 6.971212387084961, "learning_rate": 8.526387843110412e-05, "loss": 0.909, "step": 11515 }, { "epoch": 0.7802696659665289, "grad_norm": 7.040616035461426, "learning_rate": 8.52625094120063e-05, "loss": 1.1058, "step": 11516 }, { "epoch": 0.780337421234501, "grad_norm": 5.899086952209473, "learning_rate": 8.526114039290849e-05, "loss": 0.7611, "step": 11517 }, { "epoch": 0.7804051765024731, "grad_norm": 6.242020606994629, "learning_rate": 8.525977137381067e-05, "loss": 0.7059, "step": 11518 }, { "epoch": 0.7804729317704452, "grad_norm": 6.418991565704346, "learning_rate": 8.525840235471285e-05, "loss": 0.8625, "step": 11519 }, { "epoch": 0.7805406870384173, "grad_norm": 4.487674236297607, "learning_rate": 8.525703333561503e-05, "loss": 0.7343, "step": 11520 }, { "epoch": 0.7806084423063894, "grad_norm": 5.526739597320557, "learning_rate": 8.525566431651721e-05, "loss": 0.8331, "step": 11521 }, { "epoch": 0.7806761975743614, "grad_norm": 7.298591136932373, "learning_rate": 8.52542952974194e-05, "loss": 1.0112, "step": 11522 }, { "epoch": 0.7807439528423334, "grad_norm": 7.665398120880127, "learning_rate": 8.525292627832159e-05, "loss": 0.8248, "step": 11523 }, { "epoch": 0.7808117081103055, "grad_norm": 5.981564044952393, "learning_rate": 8.525155725922377e-05, "loss": 0.8557, "step": 11524 }, { "epoch": 0.7808794633782776, "grad_norm": 6.268359184265137, "learning_rate": 8.525018824012595e-05, "loss": 0.5943, "step": 11525 }, { "epoch": 0.7809472186462497, "grad_norm": 5.51030969619751, "learning_rate": 8.524881922102814e-05, "loss": 0.7657, "step": 11526 }, { "epoch": 0.7810149739142218, "grad_norm": 6.035849094390869, "learning_rate": 8.524745020193032e-05, "loss": 0.7501, "step": 11527 }, { "epoch": 0.7810827291821939, "grad_norm": 5.455019474029541, "learning_rate": 8.52460811828325e-05, "loss": 0.6627, "step": 11528 }, { "epoch": 0.781150484450166, "grad_norm": 5.731950283050537, "learning_rate": 8.524471216373468e-05, "loss": 0.7456, "step": 11529 }, { "epoch": 0.7812182397181381, "grad_norm": 6.446641445159912, "learning_rate": 8.524334314463686e-05, "loss": 0.668, "step": 11530 }, { "epoch": 0.7812859949861102, "grad_norm": 6.093938827514648, "learning_rate": 8.524197412553905e-05, "loss": 0.7248, "step": 11531 }, { "epoch": 0.7813537502540823, "grad_norm": 5.524089813232422, "learning_rate": 8.524060510644124e-05, "loss": 0.8411, "step": 11532 }, { "epoch": 0.7814215055220544, "grad_norm": 6.203104496002197, "learning_rate": 8.523923608734342e-05, "loss": 0.8882, "step": 11533 }, { "epoch": 0.7814892607900265, "grad_norm": 5.39666748046875, "learning_rate": 8.52378670682456e-05, "loss": 0.8111, "step": 11534 }, { "epoch": 0.7815570160579985, "grad_norm": 5.305294990539551, "learning_rate": 8.523649804914779e-05, "loss": 0.6809, "step": 11535 }, { "epoch": 0.7816247713259706, "grad_norm": 5.251042366027832, "learning_rate": 8.523512903004997e-05, "loss": 0.8533, "step": 11536 }, { "epoch": 0.7816925265939427, "grad_norm": 7.346649169921875, "learning_rate": 8.523376001095215e-05, "loss": 0.857, "step": 11537 }, { "epoch": 0.7817602818619148, "grad_norm": 6.78896427154541, "learning_rate": 8.523239099185434e-05, "loss": 0.77, "step": 11538 }, { "epoch": 0.7818280371298868, "grad_norm": 6.815920352935791, "learning_rate": 8.523102197275652e-05, "loss": 0.6838, "step": 11539 }, { "epoch": 0.7818957923978589, "grad_norm": 5.649730682373047, "learning_rate": 8.52296529536587e-05, "loss": 0.833, "step": 11540 }, { "epoch": 0.781963547665831, "grad_norm": 7.253706932067871, "learning_rate": 8.52282839345609e-05, "loss": 0.8077, "step": 11541 }, { "epoch": 0.7820313029338031, "grad_norm": 4.436539649963379, "learning_rate": 8.522691491546308e-05, "loss": 0.6202, "step": 11542 }, { "epoch": 0.7820990582017752, "grad_norm": 5.917550563812256, "learning_rate": 8.522554589636526e-05, "loss": 0.82, "step": 11543 }, { "epoch": 0.7821668134697473, "grad_norm": 5.576394081115723, "learning_rate": 8.522417687726744e-05, "loss": 0.6957, "step": 11544 }, { "epoch": 0.7822345687377193, "grad_norm": 7.591821670532227, "learning_rate": 8.522280785816963e-05, "loss": 0.9026, "step": 11545 }, { "epoch": 0.7823023240056914, "grad_norm": 8.16065502166748, "learning_rate": 8.522143883907181e-05, "loss": 1.0274, "step": 11546 }, { "epoch": 0.7823700792736635, "grad_norm": 9.164224624633789, "learning_rate": 8.5220069819974e-05, "loss": 0.7935, "step": 11547 }, { "epoch": 0.7824378345416356, "grad_norm": 5.825256824493408, "learning_rate": 8.521870080087617e-05, "loss": 0.8263, "step": 11548 }, { "epoch": 0.7825055898096077, "grad_norm": 7.315790176391602, "learning_rate": 8.521733178177837e-05, "loss": 0.9282, "step": 11549 }, { "epoch": 0.7825733450775798, "grad_norm": 6.677700519561768, "learning_rate": 8.521596276268055e-05, "loss": 1.011, "step": 11550 }, { "epoch": 0.7826411003455519, "grad_norm": 7.136357307434082, "learning_rate": 8.521459374358273e-05, "loss": 0.7062, "step": 11551 }, { "epoch": 0.782708855613524, "grad_norm": 6.2828168869018555, "learning_rate": 8.521322472448491e-05, "loss": 0.6732, "step": 11552 }, { "epoch": 0.7827766108814961, "grad_norm": 6.77725887298584, "learning_rate": 8.521185570538709e-05, "loss": 0.8692, "step": 11553 }, { "epoch": 0.7828443661494682, "grad_norm": 6.065176963806152, "learning_rate": 8.521048668628928e-05, "loss": 0.7296, "step": 11554 }, { "epoch": 0.7829121214174402, "grad_norm": 6.393941402435303, "learning_rate": 8.520911766719146e-05, "loss": 0.7382, "step": 11555 }, { "epoch": 0.7829798766854122, "grad_norm": 5.480118751525879, "learning_rate": 8.520774864809364e-05, "loss": 1.0189, "step": 11556 }, { "epoch": 0.7830476319533843, "grad_norm": 5.192300796508789, "learning_rate": 8.520637962899583e-05, "loss": 0.7421, "step": 11557 }, { "epoch": 0.7831153872213564, "grad_norm": 4.797957897186279, "learning_rate": 8.520501060989802e-05, "loss": 0.5936, "step": 11558 }, { "epoch": 0.7831831424893285, "grad_norm": 5.605537414550781, "learning_rate": 8.52036415908002e-05, "loss": 0.6871, "step": 11559 }, { "epoch": 0.7832508977573006, "grad_norm": 4.938405513763428, "learning_rate": 8.520227257170238e-05, "loss": 0.7197, "step": 11560 }, { "epoch": 0.7833186530252727, "grad_norm": 5.017856597900391, "learning_rate": 8.520090355260456e-05, "loss": 0.9076, "step": 11561 }, { "epoch": 0.7833864082932448, "grad_norm": 4.1797285079956055, "learning_rate": 8.519953453350674e-05, "loss": 0.7057, "step": 11562 }, { "epoch": 0.7834541635612169, "grad_norm": 6.84492301940918, "learning_rate": 8.519816551440893e-05, "loss": 0.7758, "step": 11563 }, { "epoch": 0.783521918829189, "grad_norm": 5.999660968780518, "learning_rate": 8.519679649531111e-05, "loss": 0.7158, "step": 11564 }, { "epoch": 0.7835896740971611, "grad_norm": 6.352871894836426, "learning_rate": 8.51954274762133e-05, "loss": 0.8369, "step": 11565 }, { "epoch": 0.7836574293651332, "grad_norm": 7.0241923332214355, "learning_rate": 8.519405845711548e-05, "loss": 0.8714, "step": 11566 }, { "epoch": 0.7837251846331053, "grad_norm": 6.709263324737549, "learning_rate": 8.519268943801767e-05, "loss": 0.7421, "step": 11567 }, { "epoch": 0.7837929399010773, "grad_norm": 6.393216133117676, "learning_rate": 8.519132041891985e-05, "loss": 0.7692, "step": 11568 }, { "epoch": 0.7838606951690494, "grad_norm": 5.52962589263916, "learning_rate": 8.518995139982203e-05, "loss": 0.8172, "step": 11569 }, { "epoch": 0.7839284504370215, "grad_norm": 5.78670072555542, "learning_rate": 8.518858238072421e-05, "loss": 0.8083, "step": 11570 }, { "epoch": 0.7839962057049935, "grad_norm": 6.439652919769287, "learning_rate": 8.518721336162639e-05, "loss": 0.8051, "step": 11571 }, { "epoch": 0.7840639609729656, "grad_norm": 7.955451011657715, "learning_rate": 8.518584434252858e-05, "loss": 0.8791, "step": 11572 }, { "epoch": 0.7841317162409377, "grad_norm": 9.079748153686523, "learning_rate": 8.518447532343076e-05, "loss": 0.8787, "step": 11573 }, { "epoch": 0.7841994715089098, "grad_norm": 7.1748366355896, "learning_rate": 8.518310630433295e-05, "loss": 0.9591, "step": 11574 }, { "epoch": 0.7842672267768819, "grad_norm": 4.792274475097656, "learning_rate": 8.518173728523513e-05, "loss": 0.6126, "step": 11575 }, { "epoch": 0.784334982044854, "grad_norm": 8.38217544555664, "learning_rate": 8.51803682661373e-05, "loss": 1.1548, "step": 11576 }, { "epoch": 0.7844027373128261, "grad_norm": 8.210965156555176, "learning_rate": 8.51789992470395e-05, "loss": 0.879, "step": 11577 }, { "epoch": 0.7844704925807982, "grad_norm": 6.52365255355835, "learning_rate": 8.517763022794168e-05, "loss": 1.1208, "step": 11578 }, { "epoch": 0.7845382478487702, "grad_norm": 8.54340934753418, "learning_rate": 8.517626120884386e-05, "loss": 0.8938, "step": 11579 }, { "epoch": 0.7846060031167423, "grad_norm": 4.85086727142334, "learning_rate": 8.517489218974604e-05, "loss": 0.5674, "step": 11580 }, { "epoch": 0.7846737583847144, "grad_norm": 5.40913200378418, "learning_rate": 8.517352317064823e-05, "loss": 0.6512, "step": 11581 }, { "epoch": 0.7847415136526865, "grad_norm": 5.471793174743652, "learning_rate": 8.517215415155041e-05, "loss": 0.6023, "step": 11582 }, { "epoch": 0.7848092689206586, "grad_norm": 6.162317752838135, "learning_rate": 8.51707851324526e-05, "loss": 0.6595, "step": 11583 }, { "epoch": 0.7848770241886307, "grad_norm": 4.846681594848633, "learning_rate": 8.516941611335479e-05, "loss": 0.7467, "step": 11584 }, { "epoch": 0.7849447794566028, "grad_norm": 6.785501956939697, "learning_rate": 8.516804709425697e-05, "loss": 0.9231, "step": 11585 }, { "epoch": 0.7850125347245749, "grad_norm": 5.503603935241699, "learning_rate": 8.516667807515915e-05, "loss": 0.6025, "step": 11586 }, { "epoch": 0.785080289992547, "grad_norm": 6.446911334991455, "learning_rate": 8.516530905606134e-05, "loss": 0.8209, "step": 11587 }, { "epoch": 0.785148045260519, "grad_norm": 6.04754114151001, "learning_rate": 8.516394003696352e-05, "loss": 0.7663, "step": 11588 }, { "epoch": 0.785215800528491, "grad_norm": 8.239023208618164, "learning_rate": 8.51625710178657e-05, "loss": 1.06, "step": 11589 }, { "epoch": 0.7852835557964631, "grad_norm": 5.434386253356934, "learning_rate": 8.51612019987679e-05, "loss": 0.6585, "step": 11590 }, { "epoch": 0.7853513110644352, "grad_norm": 7.99647855758667, "learning_rate": 8.515983297967008e-05, "loss": 1.3215, "step": 11591 }, { "epoch": 0.7854190663324073, "grad_norm": 6.199028491973877, "learning_rate": 8.515846396057226e-05, "loss": 0.7499, "step": 11592 }, { "epoch": 0.7854868216003794, "grad_norm": 5.438849449157715, "learning_rate": 8.515709494147444e-05, "loss": 1.0288, "step": 11593 }, { "epoch": 0.7855545768683515, "grad_norm": 6.933850288391113, "learning_rate": 8.515572592237662e-05, "loss": 0.8108, "step": 11594 }, { "epoch": 0.7856223321363236, "grad_norm": 7.720444679260254, "learning_rate": 8.515435690327881e-05, "loss": 0.7747, "step": 11595 }, { "epoch": 0.7856900874042957, "grad_norm": 5.852978706359863, "learning_rate": 8.5152987884181e-05, "loss": 0.9843, "step": 11596 }, { "epoch": 0.7857578426722678, "grad_norm": 5.039735794067383, "learning_rate": 8.515161886508317e-05, "loss": 0.8137, "step": 11597 }, { "epoch": 0.7858255979402399, "grad_norm": 5.9263529777526855, "learning_rate": 8.515024984598535e-05, "loss": 0.7445, "step": 11598 }, { "epoch": 0.785893353208212, "grad_norm": 7.095968246459961, "learning_rate": 8.514888082688753e-05, "loss": 0.9883, "step": 11599 }, { "epoch": 0.7859611084761841, "grad_norm": 7.839550018310547, "learning_rate": 8.514751180778973e-05, "loss": 0.8652, "step": 11600 }, { "epoch": 0.7860288637441561, "grad_norm": 6.799243927001953, "learning_rate": 8.514614278869191e-05, "loss": 0.9413, "step": 11601 }, { "epoch": 0.7860966190121282, "grad_norm": 5.903491020202637, "learning_rate": 8.514477376959409e-05, "loss": 0.7944, "step": 11602 }, { "epoch": 0.7861643742801003, "grad_norm": 5.674754619598389, "learning_rate": 8.514340475049627e-05, "loss": 0.639, "step": 11603 }, { "epoch": 0.7862321295480723, "grad_norm": 6.983302593231201, "learning_rate": 8.514203573139846e-05, "loss": 1.0028, "step": 11604 }, { "epoch": 0.7862998848160444, "grad_norm": 6.731539726257324, "learning_rate": 8.514066671230064e-05, "loss": 0.8127, "step": 11605 }, { "epoch": 0.7863676400840165, "grad_norm": 5.513458251953125, "learning_rate": 8.513929769320282e-05, "loss": 0.8266, "step": 11606 }, { "epoch": 0.7864353953519886, "grad_norm": 5.592813968658447, "learning_rate": 8.5137928674105e-05, "loss": 0.8913, "step": 11607 }, { "epoch": 0.7865031506199607, "grad_norm": 6.7772603034973145, "learning_rate": 8.513655965500719e-05, "loss": 0.9662, "step": 11608 }, { "epoch": 0.7865709058879328, "grad_norm": 6.447290420532227, "learning_rate": 8.513519063590938e-05, "loss": 0.8599, "step": 11609 }, { "epoch": 0.7866386611559049, "grad_norm": 5.317022800445557, "learning_rate": 8.513382161681156e-05, "loss": 0.7079, "step": 11610 }, { "epoch": 0.786706416423877, "grad_norm": 6.355508327484131, "learning_rate": 8.513245259771374e-05, "loss": 0.783, "step": 11611 }, { "epoch": 0.786774171691849, "grad_norm": 6.773859977722168, "learning_rate": 8.513108357861592e-05, "loss": 0.9146, "step": 11612 }, { "epoch": 0.7868419269598211, "grad_norm": 6.100228786468506, "learning_rate": 8.512971455951811e-05, "loss": 0.8626, "step": 11613 }, { "epoch": 0.7869096822277932, "grad_norm": 4.6573004722595215, "learning_rate": 8.51283455404203e-05, "loss": 0.4562, "step": 11614 }, { "epoch": 0.7869774374957653, "grad_norm": 5.893970966339111, "learning_rate": 8.512697652132247e-05, "loss": 0.9437, "step": 11615 }, { "epoch": 0.7870451927637374, "grad_norm": 6.907064914703369, "learning_rate": 8.512560750222465e-05, "loss": 0.7592, "step": 11616 }, { "epoch": 0.7871129480317095, "grad_norm": 5.383542060852051, "learning_rate": 8.512423848312684e-05, "loss": 0.8429, "step": 11617 }, { "epoch": 0.7871807032996816, "grad_norm": 4.651388168334961, "learning_rate": 8.512286946402903e-05, "loss": 0.7447, "step": 11618 }, { "epoch": 0.7872484585676537, "grad_norm": 7.3148417472839355, "learning_rate": 8.512150044493121e-05, "loss": 0.905, "step": 11619 }, { "epoch": 0.7873162138356257, "grad_norm": 6.7118353843688965, "learning_rate": 8.512013142583339e-05, "loss": 0.8032, "step": 11620 }, { "epoch": 0.7873839691035978, "grad_norm": 7.792928695678711, "learning_rate": 8.511876240673557e-05, "loss": 0.7426, "step": 11621 }, { "epoch": 0.7874517243715699, "grad_norm": 7.6762614250183105, "learning_rate": 8.511739338763776e-05, "loss": 0.7188, "step": 11622 }, { "epoch": 0.7875194796395419, "grad_norm": 4.8527607917785645, "learning_rate": 8.511602436853994e-05, "loss": 0.4442, "step": 11623 }, { "epoch": 0.787587234907514, "grad_norm": 4.9824957847595215, "learning_rate": 8.511465534944212e-05, "loss": 0.7234, "step": 11624 }, { "epoch": 0.7876549901754861, "grad_norm": 6.530035495758057, "learning_rate": 8.51132863303443e-05, "loss": 0.8197, "step": 11625 }, { "epoch": 0.7877227454434582, "grad_norm": 6.13261079788208, "learning_rate": 8.511191731124649e-05, "loss": 0.922, "step": 11626 }, { "epoch": 0.7877905007114303, "grad_norm": 7.777993202209473, "learning_rate": 8.511054829214868e-05, "loss": 0.7275, "step": 11627 }, { "epoch": 0.7878582559794024, "grad_norm": 5.4319281578063965, "learning_rate": 8.510917927305086e-05, "loss": 0.6158, "step": 11628 }, { "epoch": 0.7879260112473745, "grad_norm": 7.162403583526611, "learning_rate": 8.510781025395304e-05, "loss": 0.6238, "step": 11629 }, { "epoch": 0.7879937665153466, "grad_norm": 7.020395755767822, "learning_rate": 8.510644123485523e-05, "loss": 1.0743, "step": 11630 }, { "epoch": 0.7880615217833187, "grad_norm": 7.972400665283203, "learning_rate": 8.510507221575741e-05, "loss": 0.8562, "step": 11631 }, { "epoch": 0.7881292770512908, "grad_norm": 7.600156307220459, "learning_rate": 8.51037031966596e-05, "loss": 0.9075, "step": 11632 }, { "epoch": 0.7881970323192629, "grad_norm": 6.119834899902344, "learning_rate": 8.510233417756179e-05, "loss": 1.0396, "step": 11633 }, { "epoch": 0.788264787587235, "grad_norm": 6.254417896270752, "learning_rate": 8.510096515846397e-05, "loss": 0.8023, "step": 11634 }, { "epoch": 0.788332542855207, "grad_norm": 7.466210842132568, "learning_rate": 8.509959613936615e-05, "loss": 0.9744, "step": 11635 }, { "epoch": 0.788400298123179, "grad_norm": 6.635150909423828, "learning_rate": 8.509822712026834e-05, "loss": 0.8722, "step": 11636 }, { "epoch": 0.7884680533911511, "grad_norm": 6.4519362449646, "learning_rate": 8.509685810117052e-05, "loss": 0.7355, "step": 11637 }, { "epoch": 0.7885358086591232, "grad_norm": 6.042237758636475, "learning_rate": 8.50954890820727e-05, "loss": 0.7921, "step": 11638 }, { "epoch": 0.7886035639270953, "grad_norm": 6.3814616203308105, "learning_rate": 8.509412006297488e-05, "loss": 0.8843, "step": 11639 }, { "epoch": 0.7886713191950674, "grad_norm": 7.140514373779297, "learning_rate": 8.509275104387706e-05, "loss": 0.8913, "step": 11640 }, { "epoch": 0.7887390744630395, "grad_norm": 5.622684478759766, "learning_rate": 8.509138202477926e-05, "loss": 0.8172, "step": 11641 }, { "epoch": 0.7888068297310116, "grad_norm": 5.332169532775879, "learning_rate": 8.509001300568144e-05, "loss": 0.7585, "step": 11642 }, { "epoch": 0.7888745849989837, "grad_norm": 5.908132553100586, "learning_rate": 8.508864398658362e-05, "loss": 0.9236, "step": 11643 }, { "epoch": 0.7889423402669558, "grad_norm": 5.472514629364014, "learning_rate": 8.50872749674858e-05, "loss": 0.8923, "step": 11644 }, { "epoch": 0.7890100955349278, "grad_norm": 6.155751705169678, "learning_rate": 8.5085905948388e-05, "loss": 0.9054, "step": 11645 }, { "epoch": 0.7890778508028999, "grad_norm": 5.493722438812256, "learning_rate": 8.508453692929017e-05, "loss": 0.7483, "step": 11646 }, { "epoch": 0.789145606070872, "grad_norm": 7.661139965057373, "learning_rate": 8.508316791019235e-05, "loss": 0.7433, "step": 11647 }, { "epoch": 0.7892133613388441, "grad_norm": 6.037461757659912, "learning_rate": 8.508179889109453e-05, "loss": 0.933, "step": 11648 }, { "epoch": 0.7892811166068162, "grad_norm": 5.072673797607422, "learning_rate": 8.508042987199671e-05, "loss": 0.8115, "step": 11649 }, { "epoch": 0.7893488718747883, "grad_norm": 5.48813009262085, "learning_rate": 8.507906085289891e-05, "loss": 0.7581, "step": 11650 }, { "epoch": 0.7894166271427604, "grad_norm": 7.160548686981201, "learning_rate": 8.507769183380109e-05, "loss": 0.8798, "step": 11651 }, { "epoch": 0.7894843824107325, "grad_norm": 5.741982936859131, "learning_rate": 8.507632281470327e-05, "loss": 0.7952, "step": 11652 }, { "epoch": 0.7895521376787045, "grad_norm": 6.27877950668335, "learning_rate": 8.507495379560545e-05, "loss": 0.9341, "step": 11653 }, { "epoch": 0.7896198929466766, "grad_norm": 6.65634822845459, "learning_rate": 8.507358477650763e-05, "loss": 1.0704, "step": 11654 }, { "epoch": 0.7896876482146487, "grad_norm": 4.268200397491455, "learning_rate": 8.507221575740982e-05, "loss": 0.6692, "step": 11655 }, { "epoch": 0.7897554034826207, "grad_norm": 6.084795951843262, "learning_rate": 8.5070846738312e-05, "loss": 0.9024, "step": 11656 }, { "epoch": 0.7898231587505928, "grad_norm": 6.263867378234863, "learning_rate": 8.506947771921418e-05, "loss": 0.7795, "step": 11657 }, { "epoch": 0.7898909140185649, "grad_norm": 5.393991947174072, "learning_rate": 8.506810870011636e-05, "loss": 0.7585, "step": 11658 }, { "epoch": 0.789958669286537, "grad_norm": 5.708785057067871, "learning_rate": 8.506673968101856e-05, "loss": 0.9771, "step": 11659 }, { "epoch": 0.7900264245545091, "grad_norm": 5.779507637023926, "learning_rate": 8.506537066192074e-05, "loss": 0.7698, "step": 11660 }, { "epoch": 0.7900941798224812, "grad_norm": 5.451954364776611, "learning_rate": 8.506400164282292e-05, "loss": 0.8186, "step": 11661 }, { "epoch": 0.7901619350904533, "grad_norm": 6.849606513977051, "learning_rate": 8.50626326237251e-05, "loss": 0.5914, "step": 11662 }, { "epoch": 0.7902296903584254, "grad_norm": 6.583099365234375, "learning_rate": 8.506126360462728e-05, "loss": 0.7948, "step": 11663 }, { "epoch": 0.7902974456263975, "grad_norm": 5.769179821014404, "learning_rate": 8.505989458552947e-05, "loss": 0.7482, "step": 11664 }, { "epoch": 0.7903652008943696, "grad_norm": 6.720520496368408, "learning_rate": 8.505852556643165e-05, "loss": 0.8047, "step": 11665 }, { "epoch": 0.7904329561623417, "grad_norm": 5.712310791015625, "learning_rate": 8.505715654733383e-05, "loss": 0.744, "step": 11666 }, { "epoch": 0.7905007114303138, "grad_norm": 5.840827941894531, "learning_rate": 8.505578752823601e-05, "loss": 0.6534, "step": 11667 }, { "epoch": 0.7905684666982858, "grad_norm": 5.9518351554870605, "learning_rate": 8.505441850913821e-05, "loss": 0.9304, "step": 11668 }, { "epoch": 0.7906362219662578, "grad_norm": 6.9761528968811035, "learning_rate": 8.505304949004039e-05, "loss": 0.8118, "step": 11669 }, { "epoch": 0.7907039772342299, "grad_norm": 5.4346842765808105, "learning_rate": 8.505168047094257e-05, "loss": 0.7391, "step": 11670 }, { "epoch": 0.790771732502202, "grad_norm": 6.628547191619873, "learning_rate": 8.505031145184475e-05, "loss": 0.5708, "step": 11671 }, { "epoch": 0.7908394877701741, "grad_norm": 7.494357109069824, "learning_rate": 8.504894243274693e-05, "loss": 0.8687, "step": 11672 }, { "epoch": 0.7909072430381462, "grad_norm": 4.743175029754639, "learning_rate": 8.504757341364912e-05, "loss": 0.7287, "step": 11673 }, { "epoch": 0.7909749983061183, "grad_norm": 6.512623310089111, "learning_rate": 8.50462043945513e-05, "loss": 0.9047, "step": 11674 }, { "epoch": 0.7910427535740904, "grad_norm": 7.142396926879883, "learning_rate": 8.504483537545348e-05, "loss": 0.7012, "step": 11675 }, { "epoch": 0.7911105088420625, "grad_norm": 5.33573055267334, "learning_rate": 8.504346635635568e-05, "loss": 0.6606, "step": 11676 }, { "epoch": 0.7911782641100346, "grad_norm": 6.757303714752197, "learning_rate": 8.504209733725786e-05, "loss": 0.8881, "step": 11677 }, { "epoch": 0.7912460193780066, "grad_norm": 6.163415908813477, "learning_rate": 8.504072831816004e-05, "loss": 0.8034, "step": 11678 }, { "epoch": 0.7913137746459787, "grad_norm": 5.699300765991211, "learning_rate": 8.503935929906223e-05, "loss": 0.9178, "step": 11679 }, { "epoch": 0.7913815299139508, "grad_norm": 5.064122676849365, "learning_rate": 8.503799027996441e-05, "loss": 0.8219, "step": 11680 }, { "epoch": 0.7914492851819229, "grad_norm": 5.224904537200928, "learning_rate": 8.50366212608666e-05, "loss": 0.7344, "step": 11681 }, { "epoch": 0.791517040449895, "grad_norm": 4.929017543792725, "learning_rate": 8.503525224176879e-05, "loss": 0.7713, "step": 11682 }, { "epoch": 0.7915847957178671, "grad_norm": 6.1249918937683105, "learning_rate": 8.503388322267097e-05, "loss": 0.8553, "step": 11683 }, { "epoch": 0.7916525509858392, "grad_norm": 5.393836975097656, "learning_rate": 8.503251420357315e-05, "loss": 0.6989, "step": 11684 }, { "epoch": 0.7917203062538112, "grad_norm": 6.436197757720947, "learning_rate": 8.503114518447533e-05, "loss": 1.0481, "step": 11685 }, { "epoch": 0.7917880615217833, "grad_norm": 5.787166118621826, "learning_rate": 8.502977616537751e-05, "loss": 0.9203, "step": 11686 }, { "epoch": 0.7918558167897554, "grad_norm": 5.892452716827393, "learning_rate": 8.50284071462797e-05, "loss": 0.9302, "step": 11687 }, { "epoch": 0.7919235720577275, "grad_norm": 7.195859909057617, "learning_rate": 8.502703812718188e-05, "loss": 0.9175, "step": 11688 }, { "epoch": 0.7919913273256995, "grad_norm": 6.343230247497559, "learning_rate": 8.502566910808406e-05, "loss": 0.7292, "step": 11689 }, { "epoch": 0.7920590825936716, "grad_norm": 6.082936763763428, "learning_rate": 8.502430008898624e-05, "loss": 0.8826, "step": 11690 }, { "epoch": 0.7921268378616437, "grad_norm": 5.115715980529785, "learning_rate": 8.502293106988844e-05, "loss": 0.6697, "step": 11691 }, { "epoch": 0.7921945931296158, "grad_norm": 6.96610164642334, "learning_rate": 8.502156205079062e-05, "loss": 0.9914, "step": 11692 }, { "epoch": 0.7922623483975879, "grad_norm": 7.457095146179199, "learning_rate": 8.50201930316928e-05, "loss": 0.7839, "step": 11693 }, { "epoch": 0.79233010366556, "grad_norm": 7.025375843048096, "learning_rate": 8.501882401259498e-05, "loss": 1.0105, "step": 11694 }, { "epoch": 0.7923978589335321, "grad_norm": 5.951284408569336, "learning_rate": 8.501745499349716e-05, "loss": 0.6889, "step": 11695 }, { "epoch": 0.7924656142015042, "grad_norm": 5.650534629821777, "learning_rate": 8.501608597439935e-05, "loss": 0.7848, "step": 11696 }, { "epoch": 0.7925333694694763, "grad_norm": 5.552826881408691, "learning_rate": 8.501471695530153e-05, "loss": 0.6213, "step": 11697 }, { "epoch": 0.7926011247374484, "grad_norm": 5.661040306091309, "learning_rate": 8.501334793620371e-05, "loss": 0.5737, "step": 11698 }, { "epoch": 0.7926688800054205, "grad_norm": 5.965978622436523, "learning_rate": 8.50119789171059e-05, "loss": 0.8278, "step": 11699 }, { "epoch": 0.7927366352733926, "grad_norm": 5.854281902313232, "learning_rate": 8.501060989800809e-05, "loss": 0.831, "step": 11700 }, { "epoch": 0.7928043905413646, "grad_norm": 6.782879829406738, "learning_rate": 8.500924087891027e-05, "loss": 0.8058, "step": 11701 }, { "epoch": 0.7928721458093366, "grad_norm": 6.70954704284668, "learning_rate": 8.500787185981245e-05, "loss": 0.6725, "step": 11702 }, { "epoch": 0.7929399010773087, "grad_norm": 5.684144020080566, "learning_rate": 8.500650284071463e-05, "loss": 0.8527, "step": 11703 }, { "epoch": 0.7930076563452808, "grad_norm": 5.054625511169434, "learning_rate": 8.500513382161681e-05, "loss": 0.8579, "step": 11704 }, { "epoch": 0.7930754116132529, "grad_norm": 6.6226887702941895, "learning_rate": 8.5003764802519e-05, "loss": 0.9449, "step": 11705 }, { "epoch": 0.793143166881225, "grad_norm": 6.413197040557861, "learning_rate": 8.500239578342118e-05, "loss": 0.646, "step": 11706 }, { "epoch": 0.7932109221491971, "grad_norm": 4.308269500732422, "learning_rate": 8.500102676432336e-05, "loss": 0.6515, "step": 11707 }, { "epoch": 0.7932786774171692, "grad_norm": 5.270321846008301, "learning_rate": 8.499965774522554e-05, "loss": 0.6476, "step": 11708 }, { "epoch": 0.7933464326851413, "grad_norm": 6.440086364746094, "learning_rate": 8.499828872612772e-05, "loss": 0.8295, "step": 11709 }, { "epoch": 0.7934141879531134, "grad_norm": 6.078103065490723, "learning_rate": 8.499691970702992e-05, "loss": 0.8173, "step": 11710 }, { "epoch": 0.7934819432210855, "grad_norm": 4.972411155700684, "learning_rate": 8.49955506879321e-05, "loss": 0.6696, "step": 11711 }, { "epoch": 0.7935496984890575, "grad_norm": 5.865548610687256, "learning_rate": 8.499418166883428e-05, "loss": 0.7337, "step": 11712 }, { "epoch": 0.7936174537570296, "grad_norm": 6.041398048400879, "learning_rate": 8.499281264973646e-05, "loss": 0.8586, "step": 11713 }, { "epoch": 0.7936852090250017, "grad_norm": 8.569158554077148, "learning_rate": 8.499144363063865e-05, "loss": 0.9027, "step": 11714 }, { "epoch": 0.7937529642929738, "grad_norm": 6.1328020095825195, "learning_rate": 8.499007461154083e-05, "loss": 0.7019, "step": 11715 }, { "epoch": 0.7938207195609459, "grad_norm": 5.501848220825195, "learning_rate": 8.498870559244301e-05, "loss": 0.8227, "step": 11716 }, { "epoch": 0.793888474828918, "grad_norm": 8.566222190856934, "learning_rate": 8.49873365733452e-05, "loss": 0.8148, "step": 11717 }, { "epoch": 0.79395623009689, "grad_norm": 10.178439140319824, "learning_rate": 8.498596755424737e-05, "loss": 1.0213, "step": 11718 }, { "epoch": 0.7940239853648621, "grad_norm": 7.6949310302734375, "learning_rate": 8.498459853514957e-05, "loss": 0.7581, "step": 11719 }, { "epoch": 0.7940917406328342, "grad_norm": 5.988775730133057, "learning_rate": 8.498322951605175e-05, "loss": 0.7738, "step": 11720 }, { "epoch": 0.7941594959008063, "grad_norm": 5.81986665725708, "learning_rate": 8.498186049695393e-05, "loss": 0.7798, "step": 11721 }, { "epoch": 0.7942272511687783, "grad_norm": 5.84140157699585, "learning_rate": 8.498049147785612e-05, "loss": 0.8285, "step": 11722 }, { "epoch": 0.7942950064367504, "grad_norm": 4.675839900970459, "learning_rate": 8.49791224587583e-05, "loss": 0.6703, "step": 11723 }, { "epoch": 0.7943627617047225, "grad_norm": 5.857149124145508, "learning_rate": 8.497775343966048e-05, "loss": 0.8915, "step": 11724 }, { "epoch": 0.7944305169726946, "grad_norm": 10.169214248657227, "learning_rate": 8.497638442056268e-05, "loss": 0.8606, "step": 11725 }, { "epoch": 0.7944982722406667, "grad_norm": 5.2627058029174805, "learning_rate": 8.497501540146486e-05, "loss": 0.7705, "step": 11726 }, { "epoch": 0.7945660275086388, "grad_norm": 7.3032355308532715, "learning_rate": 8.497364638236704e-05, "loss": 1.0857, "step": 11727 }, { "epoch": 0.7946337827766109, "grad_norm": 7.499369144439697, "learning_rate": 8.497227736326923e-05, "loss": 0.8655, "step": 11728 }, { "epoch": 0.794701538044583, "grad_norm": 6.25002908706665, "learning_rate": 8.497090834417141e-05, "loss": 0.7271, "step": 11729 }, { "epoch": 0.7947692933125551, "grad_norm": 6.5583882331848145, "learning_rate": 8.496953932507359e-05, "loss": 0.771, "step": 11730 }, { "epoch": 0.7948370485805272, "grad_norm": 6.737629413604736, "learning_rate": 8.496817030597577e-05, "loss": 0.8865, "step": 11731 }, { "epoch": 0.7949048038484993, "grad_norm": 7.376718044281006, "learning_rate": 8.496680128687795e-05, "loss": 0.8407, "step": 11732 }, { "epoch": 0.7949725591164714, "grad_norm": 7.077400207519531, "learning_rate": 8.496543226778015e-05, "loss": 0.8339, "step": 11733 }, { "epoch": 0.7950403143844433, "grad_norm": 6.768246173858643, "learning_rate": 8.496406324868233e-05, "loss": 0.8654, "step": 11734 }, { "epoch": 0.7951080696524154, "grad_norm": 5.732030391693115, "learning_rate": 8.496269422958451e-05, "loss": 0.9901, "step": 11735 }, { "epoch": 0.7951758249203875, "grad_norm": 7.728169918060303, "learning_rate": 8.496132521048669e-05, "loss": 1.0275, "step": 11736 }, { "epoch": 0.7952435801883596, "grad_norm": 4.867015838623047, "learning_rate": 8.495995619138888e-05, "loss": 0.7226, "step": 11737 }, { "epoch": 0.7953113354563317, "grad_norm": 6.376992225646973, "learning_rate": 8.495858717229106e-05, "loss": 0.8501, "step": 11738 }, { "epoch": 0.7953790907243038, "grad_norm": 6.90419340133667, "learning_rate": 8.495721815319324e-05, "loss": 0.5617, "step": 11739 }, { "epoch": 0.7954468459922759, "grad_norm": 7.2407450675964355, "learning_rate": 8.495584913409542e-05, "loss": 0.8224, "step": 11740 }, { "epoch": 0.795514601260248, "grad_norm": 5.104798316955566, "learning_rate": 8.49544801149976e-05, "loss": 0.761, "step": 11741 }, { "epoch": 0.7955823565282201, "grad_norm": 5.49780797958374, "learning_rate": 8.49531110958998e-05, "loss": 0.6668, "step": 11742 }, { "epoch": 0.7956501117961922, "grad_norm": 4.905865669250488, "learning_rate": 8.495174207680198e-05, "loss": 0.7844, "step": 11743 }, { "epoch": 0.7957178670641643, "grad_norm": 6.6370391845703125, "learning_rate": 8.495037305770416e-05, "loss": 1.0102, "step": 11744 }, { "epoch": 0.7957856223321363, "grad_norm": 6.089507579803467, "learning_rate": 8.494900403860634e-05, "loss": 0.8679, "step": 11745 }, { "epoch": 0.7958533776001084, "grad_norm": 5.490042209625244, "learning_rate": 8.494763501950853e-05, "loss": 0.7481, "step": 11746 }, { "epoch": 0.7959211328680805, "grad_norm": 5.85631799697876, "learning_rate": 8.494626600041071e-05, "loss": 0.7926, "step": 11747 }, { "epoch": 0.7959888881360526, "grad_norm": 6.780750274658203, "learning_rate": 8.49448969813129e-05, "loss": 0.5902, "step": 11748 }, { "epoch": 0.7960566434040247, "grad_norm": 6.697319984436035, "learning_rate": 8.494352796221507e-05, "loss": 0.835, "step": 11749 }, { "epoch": 0.7961243986719968, "grad_norm": 6.056969165802002, "learning_rate": 8.494215894311725e-05, "loss": 0.8725, "step": 11750 }, { "epoch": 0.7961921539399688, "grad_norm": 9.009320259094238, "learning_rate": 8.494078992401945e-05, "loss": 0.8103, "step": 11751 }, { "epoch": 0.7962599092079409, "grad_norm": 7.897705554962158, "learning_rate": 8.493942090492163e-05, "loss": 0.7913, "step": 11752 }, { "epoch": 0.796327664475913, "grad_norm": 5.677363872528076, "learning_rate": 8.493805188582381e-05, "loss": 0.6845, "step": 11753 }, { "epoch": 0.7963954197438851, "grad_norm": 6.340780735015869, "learning_rate": 8.493668286672599e-05, "loss": 0.6519, "step": 11754 }, { "epoch": 0.7964631750118571, "grad_norm": 5.341319561004639, "learning_rate": 8.493531384762818e-05, "loss": 0.7437, "step": 11755 }, { "epoch": 0.7965309302798292, "grad_norm": 5.388099193572998, "learning_rate": 8.493394482853036e-05, "loss": 0.6879, "step": 11756 }, { "epoch": 0.7965986855478013, "grad_norm": 6.492825031280518, "learning_rate": 8.493257580943254e-05, "loss": 0.8562, "step": 11757 }, { "epoch": 0.7966664408157734, "grad_norm": 6.953293323516846, "learning_rate": 8.493120679033472e-05, "loss": 0.826, "step": 11758 }, { "epoch": 0.7967341960837455, "grad_norm": 5.61476993560791, "learning_rate": 8.49298377712369e-05, "loss": 0.7604, "step": 11759 }, { "epoch": 0.7968019513517176, "grad_norm": 7.391281604766846, "learning_rate": 8.49284687521391e-05, "loss": 0.9048, "step": 11760 }, { "epoch": 0.7968697066196897, "grad_norm": 5.455954074859619, "learning_rate": 8.492709973304128e-05, "loss": 0.8972, "step": 11761 }, { "epoch": 0.7969374618876618, "grad_norm": 10.402437210083008, "learning_rate": 8.492573071394346e-05, "loss": 0.8567, "step": 11762 }, { "epoch": 0.7970052171556339, "grad_norm": 7.704123497009277, "learning_rate": 8.492436169484564e-05, "loss": 0.8926, "step": 11763 }, { "epoch": 0.797072972423606, "grad_norm": 10.505579948425293, "learning_rate": 8.492299267574782e-05, "loss": 0.7695, "step": 11764 }, { "epoch": 0.7971407276915781, "grad_norm": 8.137372016906738, "learning_rate": 8.492162365665001e-05, "loss": 0.814, "step": 11765 }, { "epoch": 0.7972084829595502, "grad_norm": 7.7297587394714355, "learning_rate": 8.49202546375522e-05, "loss": 0.9038, "step": 11766 }, { "epoch": 0.7972762382275221, "grad_norm": 5.5629048347473145, "learning_rate": 8.491888561845437e-05, "loss": 0.5899, "step": 11767 }, { "epoch": 0.7973439934954942, "grad_norm": 9.380592346191406, "learning_rate": 8.491751659935655e-05, "loss": 0.8522, "step": 11768 }, { "epoch": 0.7974117487634663, "grad_norm": 5.808100700378418, "learning_rate": 8.491614758025875e-05, "loss": 0.923, "step": 11769 }, { "epoch": 0.7974795040314384, "grad_norm": 6.091804504394531, "learning_rate": 8.491477856116093e-05, "loss": 0.8042, "step": 11770 }, { "epoch": 0.7975472592994105, "grad_norm": 6.675506114959717, "learning_rate": 8.491340954206312e-05, "loss": 1.0141, "step": 11771 }, { "epoch": 0.7976150145673826, "grad_norm": 7.39612340927124, "learning_rate": 8.49120405229653e-05, "loss": 0.9027, "step": 11772 }, { "epoch": 0.7976827698353547, "grad_norm": 7.455977439880371, "learning_rate": 8.491067150386748e-05, "loss": 0.7705, "step": 11773 }, { "epoch": 0.7977505251033268, "grad_norm": 6.535350322723389, "learning_rate": 8.490930248476968e-05, "loss": 0.7648, "step": 11774 }, { "epoch": 0.7978182803712989, "grad_norm": 8.165600776672363, "learning_rate": 8.490793346567186e-05, "loss": 0.9073, "step": 11775 }, { "epoch": 0.797886035639271, "grad_norm": 5.424903869628906, "learning_rate": 8.490656444657404e-05, "loss": 0.7656, "step": 11776 }, { "epoch": 0.797953790907243, "grad_norm": 6.095433235168457, "learning_rate": 8.490519542747622e-05, "loss": 0.5978, "step": 11777 }, { "epoch": 0.7980215461752151, "grad_norm": 4.698237419128418, "learning_rate": 8.490382640837841e-05, "loss": 0.689, "step": 11778 }, { "epoch": 0.7980893014431872, "grad_norm": 5.7089691162109375, "learning_rate": 8.490245738928059e-05, "loss": 0.719, "step": 11779 }, { "epoch": 0.7981570567111593, "grad_norm": 4.3311848640441895, "learning_rate": 8.490108837018277e-05, "loss": 0.8484, "step": 11780 }, { "epoch": 0.7982248119791314, "grad_norm": 5.905073165893555, "learning_rate": 8.489971935108495e-05, "loss": 0.7679, "step": 11781 }, { "epoch": 0.7982925672471035, "grad_norm": 7.171839714050293, "learning_rate": 8.489835033198713e-05, "loss": 0.6716, "step": 11782 }, { "epoch": 0.7983603225150755, "grad_norm": 8.157262802124023, "learning_rate": 8.489698131288933e-05, "loss": 0.7821, "step": 11783 }, { "epoch": 0.7984280777830476, "grad_norm": 5.551645278930664, "learning_rate": 8.489561229379151e-05, "loss": 0.5619, "step": 11784 }, { "epoch": 0.7984958330510197, "grad_norm": 6.759763240814209, "learning_rate": 8.489424327469369e-05, "loss": 0.9527, "step": 11785 }, { "epoch": 0.7985635883189918, "grad_norm": 5.427465915679932, "learning_rate": 8.489287425559587e-05, "loss": 0.5603, "step": 11786 }, { "epoch": 0.7986313435869639, "grad_norm": 5.436514377593994, "learning_rate": 8.489150523649805e-05, "loss": 0.6464, "step": 11787 }, { "epoch": 0.798699098854936, "grad_norm": 5.558623313903809, "learning_rate": 8.489013621740024e-05, "loss": 0.7742, "step": 11788 }, { "epoch": 0.798766854122908, "grad_norm": 6.840981960296631, "learning_rate": 8.488876719830242e-05, "loss": 0.9908, "step": 11789 }, { "epoch": 0.7988346093908801, "grad_norm": 6.066009044647217, "learning_rate": 8.48873981792046e-05, "loss": 0.7004, "step": 11790 }, { "epoch": 0.7989023646588522, "grad_norm": 5.6321024894714355, "learning_rate": 8.488602916010678e-05, "loss": 0.7741, "step": 11791 }, { "epoch": 0.7989701199268243, "grad_norm": 6.667470932006836, "learning_rate": 8.488466014100898e-05, "loss": 0.9582, "step": 11792 }, { "epoch": 0.7990378751947964, "grad_norm": 5.693469524383545, "learning_rate": 8.488329112191116e-05, "loss": 0.8868, "step": 11793 }, { "epoch": 0.7991056304627685, "grad_norm": 4.270181179046631, "learning_rate": 8.488192210281334e-05, "loss": 0.5002, "step": 11794 }, { "epoch": 0.7991733857307406, "grad_norm": 6.748290061950684, "learning_rate": 8.488055308371552e-05, "loss": 0.9551, "step": 11795 }, { "epoch": 0.7992411409987127, "grad_norm": 6.088380336761475, "learning_rate": 8.48791840646177e-05, "loss": 0.735, "step": 11796 }, { "epoch": 0.7993088962666848, "grad_norm": 7.105894565582275, "learning_rate": 8.487781504551989e-05, "loss": 0.8939, "step": 11797 }, { "epoch": 0.7993766515346569, "grad_norm": 5.425162315368652, "learning_rate": 8.487644602642207e-05, "loss": 0.612, "step": 11798 }, { "epoch": 0.799444406802629, "grad_norm": 6.511662006378174, "learning_rate": 8.487507700732425e-05, "loss": 0.6636, "step": 11799 }, { "epoch": 0.7995121620706009, "grad_norm": 6.1298828125, "learning_rate": 8.487370798822643e-05, "loss": 0.7554, "step": 11800 }, { "epoch": 0.799579917338573, "grad_norm": 8.377950668334961, "learning_rate": 8.487233896912863e-05, "loss": 0.9965, "step": 11801 }, { "epoch": 0.7996476726065451, "grad_norm": 5.563699245452881, "learning_rate": 8.487096995003081e-05, "loss": 0.9248, "step": 11802 }, { "epoch": 0.7997154278745172, "grad_norm": 5.939857006072998, "learning_rate": 8.486960093093299e-05, "loss": 0.822, "step": 11803 }, { "epoch": 0.7997831831424893, "grad_norm": 7.076834678649902, "learning_rate": 8.486823191183517e-05, "loss": 0.9286, "step": 11804 }, { "epoch": 0.7998509384104614, "grad_norm": 5.123041152954102, "learning_rate": 8.486686289273735e-05, "loss": 0.5529, "step": 11805 }, { "epoch": 0.7999186936784335, "grad_norm": 7.112299919128418, "learning_rate": 8.486549387363954e-05, "loss": 1.0114, "step": 11806 }, { "epoch": 0.7999864489464056, "grad_norm": 5.081669807434082, "learning_rate": 8.486412485454172e-05, "loss": 0.6341, "step": 11807 }, { "epoch": 0.8000542042143777, "grad_norm": 4.5973310470581055, "learning_rate": 8.48627558354439e-05, "loss": 0.6517, "step": 11808 }, { "epoch": 0.8001219594823498, "grad_norm": 11.162668228149414, "learning_rate": 8.486138681634608e-05, "loss": 0.8336, "step": 11809 }, { "epoch": 0.8001897147503219, "grad_norm": 6.119869709014893, "learning_rate": 8.486001779724826e-05, "loss": 0.8596, "step": 11810 }, { "epoch": 0.800257470018294, "grad_norm": 6.878213405609131, "learning_rate": 8.485864877815046e-05, "loss": 1.0812, "step": 11811 }, { "epoch": 0.800325225286266, "grad_norm": 5.8862786293029785, "learning_rate": 8.485727975905264e-05, "loss": 0.5654, "step": 11812 }, { "epoch": 0.8003929805542381, "grad_norm": 5.611292362213135, "learning_rate": 8.485591073995482e-05, "loss": 0.7241, "step": 11813 }, { "epoch": 0.8004607358222102, "grad_norm": 5.294071197509766, "learning_rate": 8.4854541720857e-05, "loss": 0.8368, "step": 11814 }, { "epoch": 0.8005284910901823, "grad_norm": 7.11854362487793, "learning_rate": 8.485317270175919e-05, "loss": 0.7055, "step": 11815 }, { "epoch": 0.8005962463581543, "grad_norm": 6.1037373542785645, "learning_rate": 8.485180368266137e-05, "loss": 0.9473, "step": 11816 }, { "epoch": 0.8006640016261264, "grad_norm": 7.161137580871582, "learning_rate": 8.485043466356355e-05, "loss": 0.909, "step": 11817 }, { "epoch": 0.8007317568940985, "grad_norm": 5.295969009399414, "learning_rate": 8.484906564446575e-05, "loss": 0.7958, "step": 11818 }, { "epoch": 0.8007995121620706, "grad_norm": 7.93539571762085, "learning_rate": 8.484769662536793e-05, "loss": 0.823, "step": 11819 }, { "epoch": 0.8008672674300427, "grad_norm": 10.116933822631836, "learning_rate": 8.484632760627011e-05, "loss": 0.7204, "step": 11820 }, { "epoch": 0.8009350226980148, "grad_norm": 5.872991561889648, "learning_rate": 8.48449585871723e-05, "loss": 0.7758, "step": 11821 }, { "epoch": 0.8010027779659868, "grad_norm": 5.063296318054199, "learning_rate": 8.484358956807448e-05, "loss": 0.7124, "step": 11822 }, { "epoch": 0.8010705332339589, "grad_norm": 7.007580757141113, "learning_rate": 8.484222054897666e-05, "loss": 1.0091, "step": 11823 }, { "epoch": 0.801138288501931, "grad_norm": 7.909097671508789, "learning_rate": 8.484085152987886e-05, "loss": 0.7811, "step": 11824 }, { "epoch": 0.8012060437699031, "grad_norm": 6.1278486251831055, "learning_rate": 8.483948251078104e-05, "loss": 1.0353, "step": 11825 }, { "epoch": 0.8012737990378752, "grad_norm": 5.439823627471924, "learning_rate": 8.483811349168322e-05, "loss": 0.6914, "step": 11826 }, { "epoch": 0.8013415543058473, "grad_norm": 5.741817474365234, "learning_rate": 8.48367444725854e-05, "loss": 0.6987, "step": 11827 }, { "epoch": 0.8014093095738194, "grad_norm": 5.603649139404297, "learning_rate": 8.483537545348758e-05, "loss": 0.5928, "step": 11828 }, { "epoch": 0.8014770648417915, "grad_norm": 5.263033390045166, "learning_rate": 8.483400643438977e-05, "loss": 0.6639, "step": 11829 }, { "epoch": 0.8015448201097636, "grad_norm": 5.7066874504089355, "learning_rate": 8.483263741529195e-05, "loss": 0.8714, "step": 11830 }, { "epoch": 0.8016125753777357, "grad_norm": 7.526313781738281, "learning_rate": 8.483126839619413e-05, "loss": 0.8741, "step": 11831 }, { "epoch": 0.8016803306457077, "grad_norm": 7.491227626800537, "learning_rate": 8.482989937709631e-05, "loss": 0.7122, "step": 11832 }, { "epoch": 0.8017480859136797, "grad_norm": 5.182889938354492, "learning_rate": 8.482853035799851e-05, "loss": 0.7191, "step": 11833 }, { "epoch": 0.8018158411816518, "grad_norm": 7.201566219329834, "learning_rate": 8.482716133890069e-05, "loss": 1.0528, "step": 11834 }, { "epoch": 0.8018835964496239, "grad_norm": 6.984012126922607, "learning_rate": 8.482579231980287e-05, "loss": 0.943, "step": 11835 }, { "epoch": 0.801951351717596, "grad_norm": 6.960568904876709, "learning_rate": 8.482442330070505e-05, "loss": 0.8563, "step": 11836 }, { "epoch": 0.8020191069855681, "grad_norm": 5.898118495941162, "learning_rate": 8.482305428160723e-05, "loss": 0.5819, "step": 11837 }, { "epoch": 0.8020868622535402, "grad_norm": 7.0784592628479, "learning_rate": 8.482168526250942e-05, "loss": 1.0053, "step": 11838 }, { "epoch": 0.8021546175215123, "grad_norm": 5.741284370422363, "learning_rate": 8.48203162434116e-05, "loss": 0.8304, "step": 11839 }, { "epoch": 0.8022223727894844, "grad_norm": 6.108719348907471, "learning_rate": 8.481894722431378e-05, "loss": 0.7938, "step": 11840 }, { "epoch": 0.8022901280574565, "grad_norm": 8.281269073486328, "learning_rate": 8.481757820521596e-05, "loss": 0.8584, "step": 11841 }, { "epoch": 0.8023578833254286, "grad_norm": 9.610613822937012, "learning_rate": 8.481620918611814e-05, "loss": 0.9888, "step": 11842 }, { "epoch": 0.8024256385934007, "grad_norm": 7.831075191497803, "learning_rate": 8.481484016702034e-05, "loss": 0.7218, "step": 11843 }, { "epoch": 0.8024933938613728, "grad_norm": 5.8057756423950195, "learning_rate": 8.481347114792252e-05, "loss": 0.7296, "step": 11844 }, { "epoch": 0.8025611491293448, "grad_norm": 7.156800270080566, "learning_rate": 8.48121021288247e-05, "loss": 0.8085, "step": 11845 }, { "epoch": 0.8026289043973169, "grad_norm": 8.218076705932617, "learning_rate": 8.481073310972688e-05, "loss": 0.8111, "step": 11846 }, { "epoch": 0.802696659665289, "grad_norm": 5.695756435394287, "learning_rate": 8.480936409062907e-05, "loss": 0.6723, "step": 11847 }, { "epoch": 0.802764414933261, "grad_norm": 6.302215576171875, "learning_rate": 8.480799507153125e-05, "loss": 0.9768, "step": 11848 }, { "epoch": 0.8028321702012331, "grad_norm": 6.877220630645752, "learning_rate": 8.480662605243343e-05, "loss": 0.756, "step": 11849 }, { "epoch": 0.8028999254692052, "grad_norm": 5.5505805015563965, "learning_rate": 8.480525703333561e-05, "loss": 0.8171, "step": 11850 }, { "epoch": 0.8029676807371773, "grad_norm": 6.355271339416504, "learning_rate": 8.48038880142378e-05, "loss": 0.791, "step": 11851 }, { "epoch": 0.8030354360051494, "grad_norm": 5.083590984344482, "learning_rate": 8.480251899513999e-05, "loss": 0.6983, "step": 11852 }, { "epoch": 0.8031031912731215, "grad_norm": 5.824821472167969, "learning_rate": 8.480114997604217e-05, "loss": 0.8647, "step": 11853 }, { "epoch": 0.8031709465410936, "grad_norm": 5.969753742218018, "learning_rate": 8.479978095694435e-05, "loss": 0.6172, "step": 11854 }, { "epoch": 0.8032387018090656, "grad_norm": 6.784320831298828, "learning_rate": 8.479841193784653e-05, "loss": 0.9374, "step": 11855 }, { "epoch": 0.8033064570770377, "grad_norm": 8.921832084655762, "learning_rate": 8.479704291874872e-05, "loss": 0.6769, "step": 11856 }, { "epoch": 0.8033742123450098, "grad_norm": 5.738458633422852, "learning_rate": 8.47956738996509e-05, "loss": 0.784, "step": 11857 }, { "epoch": 0.8034419676129819, "grad_norm": 5.5067949295043945, "learning_rate": 8.479430488055308e-05, "loss": 0.8279, "step": 11858 }, { "epoch": 0.803509722880954, "grad_norm": 7.618658065795898, "learning_rate": 8.479293586145526e-05, "loss": 0.7528, "step": 11859 }, { "epoch": 0.8035774781489261, "grad_norm": 7.020671844482422, "learning_rate": 8.479156684235744e-05, "loss": 0.8522, "step": 11860 }, { "epoch": 0.8036452334168982, "grad_norm": 6.179223537445068, "learning_rate": 8.479019782325964e-05, "loss": 0.7367, "step": 11861 }, { "epoch": 0.8037129886848703, "grad_norm": 7.495842933654785, "learning_rate": 8.478882880416182e-05, "loss": 0.9065, "step": 11862 }, { "epoch": 0.8037807439528424, "grad_norm": 5.692570686340332, "learning_rate": 8.4787459785064e-05, "loss": 0.964, "step": 11863 }, { "epoch": 0.8038484992208145, "grad_norm": 6.177666664123535, "learning_rate": 8.478609076596619e-05, "loss": 1.0165, "step": 11864 }, { "epoch": 0.8039162544887865, "grad_norm": 5.492598533630371, "learning_rate": 8.478472174686837e-05, "loss": 0.7103, "step": 11865 }, { "epoch": 0.8039840097567585, "grad_norm": 6.331650257110596, "learning_rate": 8.478335272777055e-05, "loss": 0.7544, "step": 11866 }, { "epoch": 0.8040517650247306, "grad_norm": 6.0005412101745605, "learning_rate": 8.478198370867275e-05, "loss": 1.0188, "step": 11867 }, { "epoch": 0.8041195202927027, "grad_norm": 5.383848190307617, "learning_rate": 8.478061468957493e-05, "loss": 0.6002, "step": 11868 }, { "epoch": 0.8041872755606748, "grad_norm": 5.411609172821045, "learning_rate": 8.477924567047711e-05, "loss": 0.7154, "step": 11869 }, { "epoch": 0.8042550308286469, "grad_norm": 6.1650614738464355, "learning_rate": 8.47778766513793e-05, "loss": 0.6797, "step": 11870 }, { "epoch": 0.804322786096619, "grad_norm": 5.903098106384277, "learning_rate": 8.477650763228148e-05, "loss": 0.8813, "step": 11871 }, { "epoch": 0.8043905413645911, "grad_norm": 5.579502105712891, "learning_rate": 8.477513861318366e-05, "loss": 0.8935, "step": 11872 }, { "epoch": 0.8044582966325632, "grad_norm": 5.614695072174072, "learning_rate": 8.477376959408584e-05, "loss": 0.7914, "step": 11873 }, { "epoch": 0.8045260519005353, "grad_norm": 8.45317268371582, "learning_rate": 8.477240057498802e-05, "loss": 0.8245, "step": 11874 }, { "epoch": 0.8045938071685074, "grad_norm": 4.028397560119629, "learning_rate": 8.477103155589022e-05, "loss": 0.7105, "step": 11875 }, { "epoch": 0.8046615624364795, "grad_norm": 6.691359519958496, "learning_rate": 8.47696625367924e-05, "loss": 0.8871, "step": 11876 }, { "epoch": 0.8047293177044516, "grad_norm": 5.918386936187744, "learning_rate": 8.476829351769458e-05, "loss": 0.9558, "step": 11877 }, { "epoch": 0.8047970729724236, "grad_norm": 5.369225978851318, "learning_rate": 8.476692449859676e-05, "loss": 0.9613, "step": 11878 }, { "epoch": 0.8048648282403957, "grad_norm": 6.17078161239624, "learning_rate": 8.476555547949895e-05, "loss": 0.969, "step": 11879 }, { "epoch": 0.8049325835083678, "grad_norm": 7.199222087860107, "learning_rate": 8.476418646040113e-05, "loss": 1.0042, "step": 11880 }, { "epoch": 0.8050003387763398, "grad_norm": 4.739121913909912, "learning_rate": 8.476281744130331e-05, "loss": 0.695, "step": 11881 }, { "epoch": 0.8050680940443119, "grad_norm": 5.670197486877441, "learning_rate": 8.476144842220549e-05, "loss": 0.7452, "step": 11882 }, { "epoch": 0.805135849312284, "grad_norm": 5.977322578430176, "learning_rate": 8.476007940310767e-05, "loss": 0.9352, "step": 11883 }, { "epoch": 0.8052036045802561, "grad_norm": 4.873468399047852, "learning_rate": 8.475871038400987e-05, "loss": 0.857, "step": 11884 }, { "epoch": 0.8052713598482282, "grad_norm": 6.405252933502197, "learning_rate": 8.475734136491205e-05, "loss": 0.8847, "step": 11885 }, { "epoch": 0.8053391151162003, "grad_norm": 7.104851722717285, "learning_rate": 8.475597234581423e-05, "loss": 0.9097, "step": 11886 }, { "epoch": 0.8054068703841724, "grad_norm": 7.579138278961182, "learning_rate": 8.475460332671641e-05, "loss": 1.1768, "step": 11887 }, { "epoch": 0.8054746256521444, "grad_norm": 7.450385093688965, "learning_rate": 8.47532343076186e-05, "loss": 1.1769, "step": 11888 }, { "epoch": 0.8055423809201165, "grad_norm": 5.401349067687988, "learning_rate": 8.475186528852078e-05, "loss": 0.7389, "step": 11889 }, { "epoch": 0.8056101361880886, "grad_norm": 7.890493869781494, "learning_rate": 8.475049626942296e-05, "loss": 0.8679, "step": 11890 }, { "epoch": 0.8056778914560607, "grad_norm": 6.528770446777344, "learning_rate": 8.474912725032514e-05, "loss": 0.637, "step": 11891 }, { "epoch": 0.8057456467240328, "grad_norm": 5.3305277824401855, "learning_rate": 8.474775823122732e-05, "loss": 0.7671, "step": 11892 }, { "epoch": 0.8058134019920049, "grad_norm": 6.40350866317749, "learning_rate": 8.474638921212952e-05, "loss": 1.055, "step": 11893 }, { "epoch": 0.805881157259977, "grad_norm": 7.686482906341553, "learning_rate": 8.47450201930317e-05, "loss": 0.7023, "step": 11894 }, { "epoch": 0.8059489125279491, "grad_norm": 5.42139196395874, "learning_rate": 8.474365117393388e-05, "loss": 0.7528, "step": 11895 }, { "epoch": 0.8060166677959212, "grad_norm": 5.809099197387695, "learning_rate": 8.474228215483606e-05, "loss": 0.8843, "step": 11896 }, { "epoch": 0.8060844230638932, "grad_norm": 7.206499099731445, "learning_rate": 8.474091313573824e-05, "loss": 0.9504, "step": 11897 }, { "epoch": 0.8061521783318653, "grad_norm": 9.313186645507812, "learning_rate": 8.473954411664043e-05, "loss": 0.665, "step": 11898 }, { "epoch": 0.8062199335998373, "grad_norm": 6.453405857086182, "learning_rate": 8.473817509754261e-05, "loss": 0.6757, "step": 11899 }, { "epoch": 0.8062876888678094, "grad_norm": 6.309181213378906, "learning_rate": 8.473680607844479e-05, "loss": 0.8481, "step": 11900 }, { "epoch": 0.8063554441357815, "grad_norm": 7.917797088623047, "learning_rate": 8.473543705934697e-05, "loss": 0.7202, "step": 11901 }, { "epoch": 0.8064231994037536, "grad_norm": 5.929610252380371, "learning_rate": 8.473406804024917e-05, "loss": 0.8476, "step": 11902 }, { "epoch": 0.8064909546717257, "grad_norm": 7.981934070587158, "learning_rate": 8.473269902115135e-05, "loss": 0.6972, "step": 11903 }, { "epoch": 0.8065587099396978, "grad_norm": 6.3853440284729, "learning_rate": 8.473133000205353e-05, "loss": 0.881, "step": 11904 }, { "epoch": 0.8066264652076699, "grad_norm": 6.253373622894287, "learning_rate": 8.472996098295571e-05, "loss": 0.8463, "step": 11905 }, { "epoch": 0.806694220475642, "grad_norm": 5.279558181762695, "learning_rate": 8.472859196385789e-05, "loss": 0.8723, "step": 11906 }, { "epoch": 0.8067619757436141, "grad_norm": 7.0585126876831055, "learning_rate": 8.472722294476008e-05, "loss": 0.8384, "step": 11907 }, { "epoch": 0.8068297310115862, "grad_norm": 9.070088386535645, "learning_rate": 8.472585392566226e-05, "loss": 0.6578, "step": 11908 }, { "epoch": 0.8068974862795583, "grad_norm": 7.324275970458984, "learning_rate": 8.472448490656444e-05, "loss": 0.7454, "step": 11909 }, { "epoch": 0.8069652415475304, "grad_norm": 5.854486465454102, "learning_rate": 8.472311588746664e-05, "loss": 0.9014, "step": 11910 }, { "epoch": 0.8070329968155024, "grad_norm": 7.762482166290283, "learning_rate": 8.472174686836882e-05, "loss": 0.8265, "step": 11911 }, { "epoch": 0.8071007520834745, "grad_norm": 7.545839786529541, "learning_rate": 8.4720377849271e-05, "loss": 0.7831, "step": 11912 }, { "epoch": 0.8071685073514466, "grad_norm": 4.952934741973877, "learning_rate": 8.471900883017319e-05, "loss": 0.6514, "step": 11913 }, { "epoch": 0.8072362626194186, "grad_norm": 5.232631206512451, "learning_rate": 8.471763981107537e-05, "loss": 0.6642, "step": 11914 }, { "epoch": 0.8073040178873907, "grad_norm": 6.2648844718933105, "learning_rate": 8.471627079197755e-05, "loss": 0.9277, "step": 11915 }, { "epoch": 0.8073717731553628, "grad_norm": 6.143033027648926, "learning_rate": 8.471490177287975e-05, "loss": 0.6482, "step": 11916 }, { "epoch": 0.8074395284233349, "grad_norm": 7.901699066162109, "learning_rate": 8.471353275378193e-05, "loss": 0.739, "step": 11917 }, { "epoch": 0.807507283691307, "grad_norm": 7.756877899169922, "learning_rate": 8.471216373468411e-05, "loss": 0.8307, "step": 11918 }, { "epoch": 0.8075750389592791, "grad_norm": 8.90708065032959, "learning_rate": 8.471079471558629e-05, "loss": 1.0344, "step": 11919 }, { "epoch": 0.8076427942272512, "grad_norm": 6.328546524047852, "learning_rate": 8.470942569648847e-05, "loss": 0.8514, "step": 11920 }, { "epoch": 0.8077105494952233, "grad_norm": 7.169960975646973, "learning_rate": 8.470805667739066e-05, "loss": 0.8809, "step": 11921 }, { "epoch": 0.8077783047631953, "grad_norm": 6.628053188323975, "learning_rate": 8.470668765829284e-05, "loss": 0.9416, "step": 11922 }, { "epoch": 0.8078460600311674, "grad_norm": 7.435145378112793, "learning_rate": 8.470531863919502e-05, "loss": 0.6911, "step": 11923 }, { "epoch": 0.8079138152991395, "grad_norm": 5.010408878326416, "learning_rate": 8.47039496200972e-05, "loss": 0.7926, "step": 11924 }, { "epoch": 0.8079815705671116, "grad_norm": 5.625992298126221, "learning_rate": 8.47025806009994e-05, "loss": 0.8863, "step": 11925 }, { "epoch": 0.8080493258350837, "grad_norm": 8.575138092041016, "learning_rate": 8.470121158190158e-05, "loss": 0.7859, "step": 11926 }, { "epoch": 0.8081170811030558, "grad_norm": 5.657655239105225, "learning_rate": 8.469984256280376e-05, "loss": 0.7417, "step": 11927 }, { "epoch": 0.8081848363710279, "grad_norm": 6.131776809692383, "learning_rate": 8.469847354370594e-05, "loss": 0.8232, "step": 11928 }, { "epoch": 0.808252591639, "grad_norm": 7.183122158050537, "learning_rate": 8.469710452460812e-05, "loss": 0.7562, "step": 11929 }, { "epoch": 0.808320346906972, "grad_norm": 6.112959384918213, "learning_rate": 8.469573550551031e-05, "loss": 0.8744, "step": 11930 }, { "epoch": 0.8083881021749441, "grad_norm": 6.5298590660095215, "learning_rate": 8.469436648641249e-05, "loss": 0.8624, "step": 11931 }, { "epoch": 0.8084558574429161, "grad_norm": 7.30040979385376, "learning_rate": 8.469299746731467e-05, "loss": 0.8141, "step": 11932 }, { "epoch": 0.8085236127108882, "grad_norm": 5.7774977684021, "learning_rate": 8.469162844821685e-05, "loss": 0.7541, "step": 11933 }, { "epoch": 0.8085913679788603, "grad_norm": 6.179437637329102, "learning_rate": 8.469025942911905e-05, "loss": 0.7737, "step": 11934 }, { "epoch": 0.8086591232468324, "grad_norm": 5.715566635131836, "learning_rate": 8.468889041002123e-05, "loss": 0.7055, "step": 11935 }, { "epoch": 0.8087268785148045, "grad_norm": 5.082167148590088, "learning_rate": 8.468752139092341e-05, "loss": 0.7135, "step": 11936 }, { "epoch": 0.8087946337827766, "grad_norm": 5.4100117683410645, "learning_rate": 8.468615237182559e-05, "loss": 0.6587, "step": 11937 }, { "epoch": 0.8088623890507487, "grad_norm": 5.654635429382324, "learning_rate": 8.468478335272777e-05, "loss": 0.8371, "step": 11938 }, { "epoch": 0.8089301443187208, "grad_norm": 13.093804359436035, "learning_rate": 8.468341433362996e-05, "loss": 0.7622, "step": 11939 }, { "epoch": 0.8089978995866929, "grad_norm": 6.519128322601318, "learning_rate": 8.468204531453214e-05, "loss": 0.5443, "step": 11940 }, { "epoch": 0.809065654854665, "grad_norm": 6.814082622528076, "learning_rate": 8.468067629543432e-05, "loss": 0.7527, "step": 11941 }, { "epoch": 0.8091334101226371, "grad_norm": 5.064624786376953, "learning_rate": 8.46793072763365e-05, "loss": 0.7392, "step": 11942 }, { "epoch": 0.8092011653906092, "grad_norm": 6.819398403167725, "learning_rate": 8.467793825723868e-05, "loss": 0.8395, "step": 11943 }, { "epoch": 0.8092689206585812, "grad_norm": 5.4765520095825195, "learning_rate": 8.467656923814088e-05, "loss": 0.6145, "step": 11944 }, { "epoch": 0.8093366759265533, "grad_norm": 5.149988651275635, "learning_rate": 8.467520021904306e-05, "loss": 0.6676, "step": 11945 }, { "epoch": 0.8094044311945253, "grad_norm": 5.497957229614258, "learning_rate": 8.467383119994524e-05, "loss": 0.8605, "step": 11946 }, { "epoch": 0.8094721864624974, "grad_norm": 6.20892333984375, "learning_rate": 8.467246218084742e-05, "loss": 0.7578, "step": 11947 }, { "epoch": 0.8095399417304695, "grad_norm": 5.368823528289795, "learning_rate": 8.467109316174961e-05, "loss": 0.6105, "step": 11948 }, { "epoch": 0.8096076969984416, "grad_norm": 5.05849552154541, "learning_rate": 8.466972414265179e-05, "loss": 0.6165, "step": 11949 }, { "epoch": 0.8096754522664137, "grad_norm": 6.1569366455078125, "learning_rate": 8.466835512355397e-05, "loss": 0.6865, "step": 11950 }, { "epoch": 0.8097432075343858, "grad_norm": 4.664434432983398, "learning_rate": 8.466698610445615e-05, "loss": 0.7348, "step": 11951 }, { "epoch": 0.8098109628023579, "grad_norm": 5.691340446472168, "learning_rate": 8.466561708535833e-05, "loss": 0.625, "step": 11952 }, { "epoch": 0.80987871807033, "grad_norm": 5.343607425689697, "learning_rate": 8.466424806626053e-05, "loss": 0.5954, "step": 11953 }, { "epoch": 0.809946473338302, "grad_norm": 7.289133548736572, "learning_rate": 8.466287904716271e-05, "loss": 0.8564, "step": 11954 }, { "epoch": 0.8100142286062741, "grad_norm": 5.425955772399902, "learning_rate": 8.466151002806489e-05, "loss": 0.6588, "step": 11955 }, { "epoch": 0.8100819838742462, "grad_norm": 6.205384731292725, "learning_rate": 8.466014100896708e-05, "loss": 0.8809, "step": 11956 }, { "epoch": 0.8101497391422183, "grad_norm": 7.989570140838623, "learning_rate": 8.465877198986926e-05, "loss": 0.6923, "step": 11957 }, { "epoch": 0.8102174944101904, "grad_norm": 5.3845343589782715, "learning_rate": 8.465740297077144e-05, "loss": 0.645, "step": 11958 }, { "epoch": 0.8102852496781625, "grad_norm": 5.856838703155518, "learning_rate": 8.465603395167364e-05, "loss": 0.9173, "step": 11959 }, { "epoch": 0.8103530049461346, "grad_norm": 7.78700590133667, "learning_rate": 8.465466493257582e-05, "loss": 0.7514, "step": 11960 }, { "epoch": 0.8104207602141067, "grad_norm": 6.29768705368042, "learning_rate": 8.4653295913478e-05, "loss": 0.862, "step": 11961 }, { "epoch": 0.8104885154820788, "grad_norm": 5.1076579093933105, "learning_rate": 8.465192689438019e-05, "loss": 0.6395, "step": 11962 }, { "epoch": 0.8105562707500508, "grad_norm": 7.518921375274658, "learning_rate": 8.465055787528237e-05, "loss": 0.9251, "step": 11963 }, { "epoch": 0.8106240260180229, "grad_norm": 5.740368843078613, "learning_rate": 8.464918885618455e-05, "loss": 0.8354, "step": 11964 }, { "epoch": 0.810691781285995, "grad_norm": 7.306612491607666, "learning_rate": 8.464781983708673e-05, "loss": 0.7414, "step": 11965 }, { "epoch": 0.810759536553967, "grad_norm": 5.480811595916748, "learning_rate": 8.464645081798893e-05, "loss": 0.7021, "step": 11966 }, { "epoch": 0.8108272918219391, "grad_norm": 6.626734733581543, "learning_rate": 8.46450817988911e-05, "loss": 0.7866, "step": 11967 }, { "epoch": 0.8108950470899112, "grad_norm": 5.154781818389893, "learning_rate": 8.464371277979329e-05, "loss": 0.653, "step": 11968 }, { "epoch": 0.8109628023578833, "grad_norm": 6.271585464477539, "learning_rate": 8.464234376069547e-05, "loss": 0.9318, "step": 11969 }, { "epoch": 0.8110305576258554, "grad_norm": 6.008039951324463, "learning_rate": 8.464097474159765e-05, "loss": 0.6224, "step": 11970 }, { "epoch": 0.8110983128938275, "grad_norm": 6.575869560241699, "learning_rate": 8.463960572249984e-05, "loss": 0.685, "step": 11971 }, { "epoch": 0.8111660681617996, "grad_norm": 6.166112899780273, "learning_rate": 8.463823670340202e-05, "loss": 0.6738, "step": 11972 }, { "epoch": 0.8112338234297717, "grad_norm": 6.191090106964111, "learning_rate": 8.46368676843042e-05, "loss": 0.9672, "step": 11973 }, { "epoch": 0.8113015786977438, "grad_norm": 5.888790130615234, "learning_rate": 8.463549866520638e-05, "loss": 0.5671, "step": 11974 }, { "epoch": 0.8113693339657159, "grad_norm": 6.156980514526367, "learning_rate": 8.463412964610856e-05, "loss": 0.6792, "step": 11975 }, { "epoch": 0.811437089233688, "grad_norm": 6.996181011199951, "learning_rate": 8.463276062701076e-05, "loss": 0.9247, "step": 11976 }, { "epoch": 0.81150484450166, "grad_norm": 7.532526016235352, "learning_rate": 8.463139160791294e-05, "loss": 0.7807, "step": 11977 }, { "epoch": 0.8115725997696321, "grad_norm": 5.303388595581055, "learning_rate": 8.463002258881512e-05, "loss": 0.6836, "step": 11978 }, { "epoch": 0.8116403550376041, "grad_norm": 5.388332843780518, "learning_rate": 8.46286535697173e-05, "loss": 0.8513, "step": 11979 }, { "epoch": 0.8117081103055762, "grad_norm": 5.5608601570129395, "learning_rate": 8.462728455061949e-05, "loss": 0.6047, "step": 11980 }, { "epoch": 0.8117758655735483, "grad_norm": 6.766813278198242, "learning_rate": 8.462591553152167e-05, "loss": 0.8592, "step": 11981 }, { "epoch": 0.8118436208415204, "grad_norm": 6.640246868133545, "learning_rate": 8.462454651242385e-05, "loss": 0.6319, "step": 11982 }, { "epoch": 0.8119113761094925, "grad_norm": 6.7665815353393555, "learning_rate": 8.462317749332603e-05, "loss": 0.8778, "step": 11983 }, { "epoch": 0.8119791313774646, "grad_norm": 5.935091018676758, "learning_rate": 8.462180847422821e-05, "loss": 0.6987, "step": 11984 }, { "epoch": 0.8120468866454367, "grad_norm": 4.797418594360352, "learning_rate": 8.46204394551304e-05, "loss": 0.6862, "step": 11985 }, { "epoch": 0.8121146419134088, "grad_norm": 5.728845119476318, "learning_rate": 8.461907043603259e-05, "loss": 0.8859, "step": 11986 }, { "epoch": 0.8121823971813809, "grad_norm": 6.456442356109619, "learning_rate": 8.461770141693477e-05, "loss": 0.8329, "step": 11987 }, { "epoch": 0.812250152449353, "grad_norm": 6.974035739898682, "learning_rate": 8.461633239783695e-05, "loss": 0.8881, "step": 11988 }, { "epoch": 0.812317907717325, "grad_norm": 6.6539788246154785, "learning_rate": 8.461496337873914e-05, "loss": 1.1583, "step": 11989 }, { "epoch": 0.8123856629852971, "grad_norm": 4.992013931274414, "learning_rate": 8.461359435964132e-05, "loss": 0.6866, "step": 11990 }, { "epoch": 0.8124534182532692, "grad_norm": 6.416220664978027, "learning_rate": 8.46122253405435e-05, "loss": 1.1336, "step": 11991 }, { "epoch": 0.8125211735212413, "grad_norm": 6.000530242919922, "learning_rate": 8.461085632144568e-05, "loss": 0.8182, "step": 11992 }, { "epoch": 0.8125889287892134, "grad_norm": 6.560791492462158, "learning_rate": 8.460948730234786e-05, "loss": 0.5594, "step": 11993 }, { "epoch": 0.8126566840571855, "grad_norm": 5.342809200286865, "learning_rate": 8.460811828325006e-05, "loss": 0.9214, "step": 11994 }, { "epoch": 0.8127244393251575, "grad_norm": 6.472506046295166, "learning_rate": 8.460674926415224e-05, "loss": 0.9376, "step": 11995 }, { "epoch": 0.8127921945931296, "grad_norm": 10.083342552185059, "learning_rate": 8.460538024505442e-05, "loss": 0.8582, "step": 11996 }, { "epoch": 0.8128599498611017, "grad_norm": 6.755568504333496, "learning_rate": 8.46040112259566e-05, "loss": 1.2787, "step": 11997 }, { "epoch": 0.8129277051290738, "grad_norm": 5.924015998840332, "learning_rate": 8.460264220685878e-05, "loss": 0.7204, "step": 11998 }, { "epoch": 0.8129954603970458, "grad_norm": 7.797226428985596, "learning_rate": 8.460127318776097e-05, "loss": 1.0529, "step": 11999 }, { "epoch": 0.8130632156650179, "grad_norm": 6.317507743835449, "learning_rate": 8.459990416866315e-05, "loss": 0.4736, "step": 12000 }, { "epoch": 0.81313097093299, "grad_norm": 6.195952415466309, "learning_rate": 8.459853514956533e-05, "loss": 0.8751, "step": 12001 }, { "epoch": 0.8131987262009621, "grad_norm": 8.634666442871094, "learning_rate": 8.459716613046753e-05, "loss": 0.8552, "step": 12002 }, { "epoch": 0.8132664814689342, "grad_norm": 6.352993488311768, "learning_rate": 8.459579711136971e-05, "loss": 0.9277, "step": 12003 }, { "epoch": 0.8133342367369063, "grad_norm": 6.608835697174072, "learning_rate": 8.459442809227189e-05, "loss": 0.9077, "step": 12004 }, { "epoch": 0.8134019920048784, "grad_norm": 5.515098571777344, "learning_rate": 8.459305907317408e-05, "loss": 0.7215, "step": 12005 }, { "epoch": 0.8134697472728505, "grad_norm": 5.592660427093506, "learning_rate": 8.459169005407626e-05, "loss": 0.6318, "step": 12006 }, { "epoch": 0.8135375025408226, "grad_norm": 6.810677528381348, "learning_rate": 8.459032103497844e-05, "loss": 0.8533, "step": 12007 }, { "epoch": 0.8136052578087947, "grad_norm": 6.685205936431885, "learning_rate": 8.458895201588064e-05, "loss": 0.8084, "step": 12008 }, { "epoch": 0.8136730130767668, "grad_norm": 5.3733062744140625, "learning_rate": 8.458758299678282e-05, "loss": 0.8156, "step": 12009 }, { "epoch": 0.8137407683447389, "grad_norm": 4.988927841186523, "learning_rate": 8.4586213977685e-05, "loss": 0.777, "step": 12010 }, { "epoch": 0.8138085236127109, "grad_norm": 7.371654510498047, "learning_rate": 8.458484495858718e-05, "loss": 0.7513, "step": 12011 }, { "epoch": 0.8138762788806829, "grad_norm": 6.63214111328125, "learning_rate": 8.458347593948937e-05, "loss": 0.7991, "step": 12012 }, { "epoch": 0.813944034148655, "grad_norm": 6.847991466522217, "learning_rate": 8.458210692039155e-05, "loss": 0.8741, "step": 12013 }, { "epoch": 0.8140117894166271, "grad_norm": 6.964975357055664, "learning_rate": 8.458073790129373e-05, "loss": 0.8665, "step": 12014 }, { "epoch": 0.8140795446845992, "grad_norm": 6.188068866729736, "learning_rate": 8.457936888219591e-05, "loss": 0.8174, "step": 12015 }, { "epoch": 0.8141472999525713, "grad_norm": 8.014631271362305, "learning_rate": 8.457799986309809e-05, "loss": 0.8603, "step": 12016 }, { "epoch": 0.8142150552205434, "grad_norm": 4.80557107925415, "learning_rate": 8.457663084400029e-05, "loss": 0.7971, "step": 12017 }, { "epoch": 0.8142828104885155, "grad_norm": 4.926700115203857, "learning_rate": 8.457526182490247e-05, "loss": 0.6986, "step": 12018 }, { "epoch": 0.8143505657564876, "grad_norm": 7.593190670013428, "learning_rate": 8.457389280580465e-05, "loss": 0.8656, "step": 12019 }, { "epoch": 0.8144183210244597, "grad_norm": 5.325191497802734, "learning_rate": 8.457252378670683e-05, "loss": 0.7471, "step": 12020 }, { "epoch": 0.8144860762924317, "grad_norm": 6.783299446105957, "learning_rate": 8.457115476760902e-05, "loss": 0.9003, "step": 12021 }, { "epoch": 0.8145538315604038, "grad_norm": 5.972321033477783, "learning_rate": 8.45697857485112e-05, "loss": 0.9259, "step": 12022 }, { "epoch": 0.8146215868283759, "grad_norm": 4.9444708824157715, "learning_rate": 8.456841672941338e-05, "loss": 0.6313, "step": 12023 }, { "epoch": 0.814689342096348, "grad_norm": 6.034842014312744, "learning_rate": 8.456704771031556e-05, "loss": 0.736, "step": 12024 }, { "epoch": 0.8147570973643201, "grad_norm": 7.463682651519775, "learning_rate": 8.456567869121774e-05, "loss": 0.8651, "step": 12025 }, { "epoch": 0.8148248526322922, "grad_norm": 6.878032207489014, "learning_rate": 8.456430967211994e-05, "loss": 0.9458, "step": 12026 }, { "epoch": 0.8148926079002643, "grad_norm": 6.859936237335205, "learning_rate": 8.456294065302212e-05, "loss": 0.9125, "step": 12027 }, { "epoch": 0.8149603631682363, "grad_norm": 6.0320329666137695, "learning_rate": 8.45615716339243e-05, "loss": 0.6817, "step": 12028 }, { "epoch": 0.8150281184362084, "grad_norm": 6.321547031402588, "learning_rate": 8.456020261482648e-05, "loss": 0.7538, "step": 12029 }, { "epoch": 0.8150958737041805, "grad_norm": 6.318676471710205, "learning_rate": 8.455883359572866e-05, "loss": 0.9262, "step": 12030 }, { "epoch": 0.8151636289721526, "grad_norm": 5.807433605194092, "learning_rate": 8.455746457663085e-05, "loss": 0.7597, "step": 12031 }, { "epoch": 0.8152313842401246, "grad_norm": 6.104518413543701, "learning_rate": 8.455609555753303e-05, "loss": 0.7186, "step": 12032 }, { "epoch": 0.8152991395080967, "grad_norm": 6.957059860229492, "learning_rate": 8.455472653843521e-05, "loss": 0.7533, "step": 12033 }, { "epoch": 0.8153668947760688, "grad_norm": 6.928465366363525, "learning_rate": 8.455335751933739e-05, "loss": 0.578, "step": 12034 }, { "epoch": 0.8154346500440409, "grad_norm": 6.699448108673096, "learning_rate": 8.455198850023959e-05, "loss": 0.7888, "step": 12035 }, { "epoch": 0.815502405312013, "grad_norm": 7.328460693359375, "learning_rate": 8.455061948114177e-05, "loss": 0.72, "step": 12036 }, { "epoch": 0.8155701605799851, "grad_norm": 7.082894802093506, "learning_rate": 8.454925046204395e-05, "loss": 0.9115, "step": 12037 }, { "epoch": 0.8156379158479572, "grad_norm": 5.156605243682861, "learning_rate": 8.454788144294613e-05, "loss": 0.748, "step": 12038 }, { "epoch": 0.8157056711159293, "grad_norm": 6.401536464691162, "learning_rate": 8.454651242384831e-05, "loss": 0.8282, "step": 12039 }, { "epoch": 0.8157734263839014, "grad_norm": 7.056277275085449, "learning_rate": 8.45451434047505e-05, "loss": 0.9653, "step": 12040 }, { "epoch": 0.8158411816518735, "grad_norm": 5.628291130065918, "learning_rate": 8.454377438565268e-05, "loss": 0.8733, "step": 12041 }, { "epoch": 0.8159089369198456, "grad_norm": 5.863224506378174, "learning_rate": 8.454240536655486e-05, "loss": 0.8301, "step": 12042 }, { "epoch": 0.8159766921878177, "grad_norm": 7.33843994140625, "learning_rate": 8.454103634745704e-05, "loss": 0.6597, "step": 12043 }, { "epoch": 0.8160444474557896, "grad_norm": 5.626278400421143, "learning_rate": 8.453966732835924e-05, "loss": 0.7648, "step": 12044 }, { "epoch": 0.8161122027237617, "grad_norm": 5.470703125, "learning_rate": 8.453829830926142e-05, "loss": 0.6636, "step": 12045 }, { "epoch": 0.8161799579917338, "grad_norm": 4.597399711608887, "learning_rate": 8.45369292901636e-05, "loss": 0.7107, "step": 12046 }, { "epoch": 0.8162477132597059, "grad_norm": 6.172791957855225, "learning_rate": 8.453556027106578e-05, "loss": 0.7516, "step": 12047 }, { "epoch": 0.816315468527678, "grad_norm": 6.869264125823975, "learning_rate": 8.453419125196796e-05, "loss": 0.7626, "step": 12048 }, { "epoch": 0.8163832237956501, "grad_norm": 6.974149703979492, "learning_rate": 8.453282223287015e-05, "loss": 0.8517, "step": 12049 }, { "epoch": 0.8164509790636222, "grad_norm": 6.083059787750244, "learning_rate": 8.453145321377233e-05, "loss": 0.7275, "step": 12050 }, { "epoch": 0.8165187343315943, "grad_norm": 4.947962284088135, "learning_rate": 8.453008419467451e-05, "loss": 0.663, "step": 12051 }, { "epoch": 0.8165864895995664, "grad_norm": 5.22273588180542, "learning_rate": 8.45287151755767e-05, "loss": 0.6265, "step": 12052 }, { "epoch": 0.8166542448675385, "grad_norm": 7.42067289352417, "learning_rate": 8.452734615647889e-05, "loss": 1.1131, "step": 12053 }, { "epoch": 0.8167220001355106, "grad_norm": 6.975278854370117, "learning_rate": 8.452597713738107e-05, "loss": 0.8143, "step": 12054 }, { "epoch": 0.8167897554034826, "grad_norm": 5.899443626403809, "learning_rate": 8.452460811828326e-05, "loss": 0.7033, "step": 12055 }, { "epoch": 0.8168575106714547, "grad_norm": 5.515699863433838, "learning_rate": 8.452323909918544e-05, "loss": 0.676, "step": 12056 }, { "epoch": 0.8169252659394268, "grad_norm": 5.615140438079834, "learning_rate": 8.452187008008762e-05, "loss": 0.6282, "step": 12057 }, { "epoch": 0.8169930212073989, "grad_norm": 5.279138088226318, "learning_rate": 8.452050106098982e-05, "loss": 0.7139, "step": 12058 }, { "epoch": 0.817060776475371, "grad_norm": 4.64931583404541, "learning_rate": 8.4519132041892e-05, "loss": 0.7691, "step": 12059 }, { "epoch": 0.817128531743343, "grad_norm": 5.363344192504883, "learning_rate": 8.451776302279418e-05, "loss": 0.8622, "step": 12060 }, { "epoch": 0.8171962870113151, "grad_norm": 5.616733551025391, "learning_rate": 8.451639400369636e-05, "loss": 0.729, "step": 12061 }, { "epoch": 0.8172640422792872, "grad_norm": 6.5094451904296875, "learning_rate": 8.451502498459854e-05, "loss": 0.7148, "step": 12062 }, { "epoch": 0.8173317975472593, "grad_norm": 6.721555233001709, "learning_rate": 8.451365596550073e-05, "loss": 0.8657, "step": 12063 }, { "epoch": 0.8173995528152314, "grad_norm": 6.426924705505371, "learning_rate": 8.451228694640291e-05, "loss": 0.7729, "step": 12064 }, { "epoch": 0.8174673080832034, "grad_norm": 5.894415378570557, "learning_rate": 8.451091792730509e-05, "loss": 0.8842, "step": 12065 }, { "epoch": 0.8175350633511755, "grad_norm": 9.44097900390625, "learning_rate": 8.450954890820727e-05, "loss": 0.7221, "step": 12066 }, { "epoch": 0.8176028186191476, "grad_norm": 9.030364990234375, "learning_rate": 8.450817988910947e-05, "loss": 0.9898, "step": 12067 }, { "epoch": 0.8176705738871197, "grad_norm": 5.958207607269287, "learning_rate": 8.450681087001165e-05, "loss": 0.9868, "step": 12068 }, { "epoch": 0.8177383291550918, "grad_norm": 5.822267532348633, "learning_rate": 8.450544185091383e-05, "loss": 0.7575, "step": 12069 }, { "epoch": 0.8178060844230639, "grad_norm": 6.314889430999756, "learning_rate": 8.4504072831816e-05, "loss": 0.8706, "step": 12070 }, { "epoch": 0.817873839691036, "grad_norm": 9.256656646728516, "learning_rate": 8.450270381271819e-05, "loss": 0.7976, "step": 12071 }, { "epoch": 0.8179415949590081, "grad_norm": 5.871959209442139, "learning_rate": 8.450133479362038e-05, "loss": 0.6814, "step": 12072 }, { "epoch": 0.8180093502269802, "grad_norm": 9.304550170898438, "learning_rate": 8.449996577452256e-05, "loss": 0.6588, "step": 12073 }, { "epoch": 0.8180771054949523, "grad_norm": 5.510218620300293, "learning_rate": 8.449859675542474e-05, "loss": 0.5854, "step": 12074 }, { "epoch": 0.8181448607629244, "grad_norm": 4.799395561218262, "learning_rate": 8.449722773632692e-05, "loss": 0.6387, "step": 12075 }, { "epoch": 0.8182126160308965, "grad_norm": 7.109929084777832, "learning_rate": 8.44958587172291e-05, "loss": 1.0768, "step": 12076 }, { "epoch": 0.8182803712988684, "grad_norm": 5.443954944610596, "learning_rate": 8.44944896981313e-05, "loss": 0.7782, "step": 12077 }, { "epoch": 0.8183481265668405, "grad_norm": 5.463802814483643, "learning_rate": 8.449312067903348e-05, "loss": 0.8792, "step": 12078 }, { "epoch": 0.8184158818348126, "grad_norm": 6.221611022949219, "learning_rate": 8.449175165993566e-05, "loss": 0.786, "step": 12079 }, { "epoch": 0.8184836371027847, "grad_norm": 5.399687767028809, "learning_rate": 8.449038264083784e-05, "loss": 0.7112, "step": 12080 }, { "epoch": 0.8185513923707568, "grad_norm": 6.230489730834961, "learning_rate": 8.448901362174003e-05, "loss": 0.6785, "step": 12081 }, { "epoch": 0.8186191476387289, "grad_norm": 6.096298694610596, "learning_rate": 8.448764460264221e-05, "loss": 0.641, "step": 12082 }, { "epoch": 0.818686902906701, "grad_norm": 5.790489673614502, "learning_rate": 8.448627558354439e-05, "loss": 0.8582, "step": 12083 }, { "epoch": 0.8187546581746731, "grad_norm": 5.866037845611572, "learning_rate": 8.448490656444657e-05, "loss": 0.8527, "step": 12084 }, { "epoch": 0.8188224134426452, "grad_norm": 6.1855854988098145, "learning_rate": 8.448353754534875e-05, "loss": 0.8359, "step": 12085 }, { "epoch": 0.8188901687106173, "grad_norm": 6.506312370300293, "learning_rate": 8.448216852625095e-05, "loss": 0.6803, "step": 12086 }, { "epoch": 0.8189579239785894, "grad_norm": 8.091931343078613, "learning_rate": 8.448079950715313e-05, "loss": 1.0264, "step": 12087 }, { "epoch": 0.8190256792465614, "grad_norm": 6.240049362182617, "learning_rate": 8.447943048805531e-05, "loss": 0.7127, "step": 12088 }, { "epoch": 0.8190934345145335, "grad_norm": 6.7533650398254395, "learning_rate": 8.447806146895749e-05, "loss": 1.0634, "step": 12089 }, { "epoch": 0.8191611897825056, "grad_norm": 5.064426422119141, "learning_rate": 8.447669244985968e-05, "loss": 0.6213, "step": 12090 }, { "epoch": 0.8192289450504777, "grad_norm": 5.491628646850586, "learning_rate": 8.447532343076186e-05, "loss": 0.5707, "step": 12091 }, { "epoch": 0.8192967003184498, "grad_norm": 7.105623722076416, "learning_rate": 8.447395441166404e-05, "loss": 0.9926, "step": 12092 }, { "epoch": 0.8193644555864218, "grad_norm": 7.82690954208374, "learning_rate": 8.447258539256622e-05, "loss": 0.7902, "step": 12093 }, { "epoch": 0.8194322108543939, "grad_norm": 5.6463494300842285, "learning_rate": 8.44712163734684e-05, "loss": 0.8812, "step": 12094 }, { "epoch": 0.819499966122366, "grad_norm": 7.104325771331787, "learning_rate": 8.44698473543706e-05, "loss": 0.6544, "step": 12095 }, { "epoch": 0.8195677213903381, "grad_norm": 5.303103923797607, "learning_rate": 8.446847833527278e-05, "loss": 0.8304, "step": 12096 }, { "epoch": 0.8196354766583102, "grad_norm": 6.566699504852295, "learning_rate": 8.446710931617496e-05, "loss": 0.8582, "step": 12097 }, { "epoch": 0.8197032319262822, "grad_norm": 7.369137287139893, "learning_rate": 8.446574029707715e-05, "loss": 0.7852, "step": 12098 }, { "epoch": 0.8197709871942543, "grad_norm": 6.183825492858887, "learning_rate": 8.446437127797933e-05, "loss": 0.8196, "step": 12099 }, { "epoch": 0.8198387424622264, "grad_norm": 4.837382793426514, "learning_rate": 8.446300225888151e-05, "loss": 0.6597, "step": 12100 }, { "epoch": 0.8199064977301985, "grad_norm": 6.405309200286865, "learning_rate": 8.44616332397837e-05, "loss": 0.7934, "step": 12101 }, { "epoch": 0.8199742529981706, "grad_norm": 6.547097682952881, "learning_rate": 8.446026422068589e-05, "loss": 0.8375, "step": 12102 }, { "epoch": 0.8200420082661427, "grad_norm": 6.001138687133789, "learning_rate": 8.445889520158807e-05, "loss": 0.7713, "step": 12103 }, { "epoch": 0.8201097635341148, "grad_norm": 5.217280864715576, "learning_rate": 8.445752618249026e-05, "loss": 0.605, "step": 12104 }, { "epoch": 0.8201775188020869, "grad_norm": 5.498340129852295, "learning_rate": 8.445615716339244e-05, "loss": 0.7789, "step": 12105 }, { "epoch": 0.820245274070059, "grad_norm": 9.02701187133789, "learning_rate": 8.445478814429462e-05, "loss": 0.8801, "step": 12106 }, { "epoch": 0.8203130293380311, "grad_norm": 7.654047966003418, "learning_rate": 8.44534191251968e-05, "loss": 0.7072, "step": 12107 }, { "epoch": 0.8203807846060032, "grad_norm": 5.503271102905273, "learning_rate": 8.445205010609898e-05, "loss": 0.7304, "step": 12108 }, { "epoch": 0.8204485398739751, "grad_norm": 5.020559310913086, "learning_rate": 8.445068108700118e-05, "loss": 0.5477, "step": 12109 }, { "epoch": 0.8205162951419472, "grad_norm": 6.803164958953857, "learning_rate": 8.444931206790336e-05, "loss": 0.922, "step": 12110 }, { "epoch": 0.8205840504099193, "grad_norm": 5.567500114440918, "learning_rate": 8.444794304880554e-05, "loss": 0.5649, "step": 12111 }, { "epoch": 0.8206518056778914, "grad_norm": 6.515629291534424, "learning_rate": 8.444657402970772e-05, "loss": 0.5943, "step": 12112 }, { "epoch": 0.8207195609458635, "grad_norm": 6.220151424407959, "learning_rate": 8.444520501060991e-05, "loss": 0.7958, "step": 12113 }, { "epoch": 0.8207873162138356, "grad_norm": 6.00366735458374, "learning_rate": 8.444383599151209e-05, "loss": 0.6698, "step": 12114 }, { "epoch": 0.8208550714818077, "grad_norm": 7.6385955810546875, "learning_rate": 8.444246697241427e-05, "loss": 0.7866, "step": 12115 }, { "epoch": 0.8209228267497798, "grad_norm": 4.94298791885376, "learning_rate": 8.444109795331645e-05, "loss": 0.6862, "step": 12116 }, { "epoch": 0.8209905820177519, "grad_norm": 4.838351726531982, "learning_rate": 8.443972893421863e-05, "loss": 0.6723, "step": 12117 }, { "epoch": 0.821058337285724, "grad_norm": 5.836402893066406, "learning_rate": 8.443835991512083e-05, "loss": 0.6631, "step": 12118 }, { "epoch": 0.8211260925536961, "grad_norm": 6.094921588897705, "learning_rate": 8.4436990896023e-05, "loss": 0.6569, "step": 12119 }, { "epoch": 0.8211938478216682, "grad_norm": 6.998372554779053, "learning_rate": 8.443562187692519e-05, "loss": 0.8489, "step": 12120 }, { "epoch": 0.8212616030896402, "grad_norm": 8.239564895629883, "learning_rate": 8.443425285782737e-05, "loss": 0.9627, "step": 12121 }, { "epoch": 0.8213293583576123, "grad_norm": 5.836661338806152, "learning_rate": 8.443288383872956e-05, "loss": 0.6482, "step": 12122 }, { "epoch": 0.8213971136255844, "grad_norm": 5.142320156097412, "learning_rate": 8.443151481963174e-05, "loss": 0.5763, "step": 12123 }, { "epoch": 0.8214648688935565, "grad_norm": 5.773630619049072, "learning_rate": 8.443014580053392e-05, "loss": 0.7495, "step": 12124 }, { "epoch": 0.8215326241615286, "grad_norm": 6.265152931213379, "learning_rate": 8.44287767814361e-05, "loss": 0.732, "step": 12125 }, { "epoch": 0.8216003794295006, "grad_norm": 5.69442081451416, "learning_rate": 8.442740776233828e-05, "loss": 0.7479, "step": 12126 }, { "epoch": 0.8216681346974727, "grad_norm": 5.151772499084473, "learning_rate": 8.442603874324048e-05, "loss": 0.9404, "step": 12127 }, { "epoch": 0.8217358899654448, "grad_norm": 5.117092609405518, "learning_rate": 8.442466972414266e-05, "loss": 0.8087, "step": 12128 }, { "epoch": 0.8218036452334169, "grad_norm": 6.638974666595459, "learning_rate": 8.442330070504484e-05, "loss": 1.018, "step": 12129 }, { "epoch": 0.821871400501389, "grad_norm": 5.713891983032227, "learning_rate": 8.442193168594702e-05, "loss": 0.7464, "step": 12130 }, { "epoch": 0.821939155769361, "grad_norm": 5.336922645568848, "learning_rate": 8.44205626668492e-05, "loss": 0.7876, "step": 12131 }, { "epoch": 0.8220069110373331, "grad_norm": 6.789290904998779, "learning_rate": 8.441919364775139e-05, "loss": 0.8613, "step": 12132 }, { "epoch": 0.8220746663053052, "grad_norm": 5.347286701202393, "learning_rate": 8.441782462865357e-05, "loss": 0.899, "step": 12133 }, { "epoch": 0.8221424215732773, "grad_norm": 5.311189651489258, "learning_rate": 8.441645560955575e-05, "loss": 0.6064, "step": 12134 }, { "epoch": 0.8222101768412494, "grad_norm": 5.930995464324951, "learning_rate": 8.441508659045793e-05, "loss": 0.7213, "step": 12135 }, { "epoch": 0.8222779321092215, "grad_norm": 7.024041652679443, "learning_rate": 8.441371757136013e-05, "loss": 0.7926, "step": 12136 }, { "epoch": 0.8223456873771936, "grad_norm": 5.4607768058776855, "learning_rate": 8.44123485522623e-05, "loss": 0.752, "step": 12137 }, { "epoch": 0.8224134426451657, "grad_norm": 6.063724994659424, "learning_rate": 8.441097953316449e-05, "loss": 0.6209, "step": 12138 }, { "epoch": 0.8224811979131378, "grad_norm": 7.265159606933594, "learning_rate": 8.440961051406667e-05, "loss": 0.65, "step": 12139 }, { "epoch": 0.8225489531811099, "grad_norm": 7.551514148712158, "learning_rate": 8.440824149496885e-05, "loss": 1.0007, "step": 12140 }, { "epoch": 0.822616708449082, "grad_norm": 4.997889995574951, "learning_rate": 8.440687247587104e-05, "loss": 0.6301, "step": 12141 }, { "epoch": 0.822684463717054, "grad_norm": 5.728504657745361, "learning_rate": 8.440550345677322e-05, "loss": 0.8397, "step": 12142 }, { "epoch": 0.822752218985026, "grad_norm": 6.213530540466309, "learning_rate": 8.44041344376754e-05, "loss": 0.7488, "step": 12143 }, { "epoch": 0.8228199742529981, "grad_norm": 8.768404006958008, "learning_rate": 8.44027654185776e-05, "loss": 0.9469, "step": 12144 }, { "epoch": 0.8228877295209702, "grad_norm": 7.084804534912109, "learning_rate": 8.440139639947978e-05, "loss": 0.697, "step": 12145 }, { "epoch": 0.8229554847889423, "grad_norm": 4.6725239753723145, "learning_rate": 8.440002738038196e-05, "loss": 0.6902, "step": 12146 }, { "epoch": 0.8230232400569144, "grad_norm": 5.927494049072266, "learning_rate": 8.439865836128415e-05, "loss": 0.7912, "step": 12147 }, { "epoch": 0.8230909953248865, "grad_norm": 5.5850419998168945, "learning_rate": 8.439728934218633e-05, "loss": 0.6394, "step": 12148 }, { "epoch": 0.8231587505928586, "grad_norm": 6.570766448974609, "learning_rate": 8.439592032308851e-05, "loss": 1.004, "step": 12149 }, { "epoch": 0.8232265058608307, "grad_norm": 6.343209266662598, "learning_rate": 8.43945513039907e-05, "loss": 0.5969, "step": 12150 }, { "epoch": 0.8232942611288028, "grad_norm": 7.24255895614624, "learning_rate": 8.439318228489289e-05, "loss": 0.9409, "step": 12151 }, { "epoch": 0.8233620163967749, "grad_norm": 5.600708961486816, "learning_rate": 8.439181326579507e-05, "loss": 1.0606, "step": 12152 }, { "epoch": 0.823429771664747, "grad_norm": 7.920993804931641, "learning_rate": 8.439044424669725e-05, "loss": 0.8452, "step": 12153 }, { "epoch": 0.823497526932719, "grad_norm": 6.625662326812744, "learning_rate": 8.438907522759944e-05, "loss": 0.8304, "step": 12154 }, { "epoch": 0.8235652822006911, "grad_norm": 7.728579998016357, "learning_rate": 8.438770620850162e-05, "loss": 0.7777, "step": 12155 }, { "epoch": 0.8236330374686632, "grad_norm": 7.7875752449035645, "learning_rate": 8.43863371894038e-05, "loss": 0.8318, "step": 12156 }, { "epoch": 0.8237007927366353, "grad_norm": 5.524309158325195, "learning_rate": 8.438496817030598e-05, "loss": 0.7924, "step": 12157 }, { "epoch": 0.8237685480046073, "grad_norm": 4.976869106292725, "learning_rate": 8.438359915120816e-05, "loss": 0.7819, "step": 12158 }, { "epoch": 0.8238363032725794, "grad_norm": 7.557702541351318, "learning_rate": 8.438223013211036e-05, "loss": 0.6508, "step": 12159 }, { "epoch": 0.8239040585405515, "grad_norm": 6.236000061035156, "learning_rate": 8.438086111301254e-05, "loss": 0.9233, "step": 12160 }, { "epoch": 0.8239718138085236, "grad_norm": 4.986820220947266, "learning_rate": 8.437949209391472e-05, "loss": 0.7341, "step": 12161 }, { "epoch": 0.8240395690764957, "grad_norm": 6.939243793487549, "learning_rate": 8.43781230748169e-05, "loss": 1.0755, "step": 12162 }, { "epoch": 0.8241073243444678, "grad_norm": 5.3395843505859375, "learning_rate": 8.437675405571908e-05, "loss": 0.6432, "step": 12163 }, { "epoch": 0.8241750796124399, "grad_norm": 5.462789058685303, "learning_rate": 8.437538503662127e-05, "loss": 0.7004, "step": 12164 }, { "epoch": 0.824242834880412, "grad_norm": 7.232882022857666, "learning_rate": 8.437401601752345e-05, "loss": 0.7802, "step": 12165 }, { "epoch": 0.824310590148384, "grad_norm": 4.660044193267822, "learning_rate": 8.437264699842563e-05, "loss": 0.6253, "step": 12166 }, { "epoch": 0.8243783454163561, "grad_norm": 6.779306888580322, "learning_rate": 8.437127797932781e-05, "loss": 0.9231, "step": 12167 }, { "epoch": 0.8244461006843282, "grad_norm": 5.5832905769348145, "learning_rate": 8.436990896023e-05, "loss": 0.8409, "step": 12168 }, { "epoch": 0.8245138559523003, "grad_norm": 6.603589057922363, "learning_rate": 8.436853994113219e-05, "loss": 0.897, "step": 12169 }, { "epoch": 0.8245816112202724, "grad_norm": 7.075769424438477, "learning_rate": 8.436717092203437e-05, "loss": 0.6168, "step": 12170 }, { "epoch": 0.8246493664882445, "grad_norm": 5.7542948722839355, "learning_rate": 8.436580190293655e-05, "loss": 0.7924, "step": 12171 }, { "epoch": 0.8247171217562166, "grad_norm": 6.221360683441162, "learning_rate": 8.436443288383873e-05, "loss": 0.8444, "step": 12172 }, { "epoch": 0.8247848770241887, "grad_norm": 5.970016002655029, "learning_rate": 8.436306386474092e-05, "loss": 0.8126, "step": 12173 }, { "epoch": 0.8248526322921608, "grad_norm": 5.611728668212891, "learning_rate": 8.43616948456431e-05, "loss": 0.8069, "step": 12174 }, { "epoch": 0.8249203875601328, "grad_norm": 5.260378360748291, "learning_rate": 8.436032582654528e-05, "loss": 0.7535, "step": 12175 }, { "epoch": 0.8249881428281048, "grad_norm": 11.273239135742188, "learning_rate": 8.435895680744746e-05, "loss": 0.6942, "step": 12176 }, { "epoch": 0.8250558980960769, "grad_norm": 5.525880813598633, "learning_rate": 8.435758778834966e-05, "loss": 0.8583, "step": 12177 }, { "epoch": 0.825123653364049, "grad_norm": 8.410612106323242, "learning_rate": 8.435621876925184e-05, "loss": 0.852, "step": 12178 }, { "epoch": 0.8251914086320211, "grad_norm": 6.29329252243042, "learning_rate": 8.435484975015402e-05, "loss": 0.7961, "step": 12179 }, { "epoch": 0.8252591638999932, "grad_norm": 5.848037242889404, "learning_rate": 8.43534807310562e-05, "loss": 0.7964, "step": 12180 }, { "epoch": 0.8253269191679653, "grad_norm": 5.5981669425964355, "learning_rate": 8.435211171195838e-05, "loss": 0.7474, "step": 12181 }, { "epoch": 0.8253946744359374, "grad_norm": 7.036654949188232, "learning_rate": 8.435074269286057e-05, "loss": 0.9101, "step": 12182 }, { "epoch": 0.8254624297039095, "grad_norm": 6.488468170166016, "learning_rate": 8.434937367376275e-05, "loss": 1.1791, "step": 12183 }, { "epoch": 0.8255301849718816, "grad_norm": 6.368350505828857, "learning_rate": 8.434800465466493e-05, "loss": 0.7757, "step": 12184 }, { "epoch": 0.8255979402398537, "grad_norm": 5.316969394683838, "learning_rate": 8.434663563556711e-05, "loss": 0.7681, "step": 12185 }, { "epoch": 0.8256656955078258, "grad_norm": 6.011645793914795, "learning_rate": 8.434526661646929e-05, "loss": 0.7887, "step": 12186 }, { "epoch": 0.8257334507757979, "grad_norm": 6.3625664710998535, "learning_rate": 8.434389759737149e-05, "loss": 0.7103, "step": 12187 }, { "epoch": 0.8258012060437699, "grad_norm": 5.478143215179443, "learning_rate": 8.434252857827367e-05, "loss": 0.6532, "step": 12188 }, { "epoch": 0.825868961311742, "grad_norm": 6.654770851135254, "learning_rate": 8.434115955917585e-05, "loss": 0.7328, "step": 12189 }, { "epoch": 0.8259367165797141, "grad_norm": 5.170722007751465, "learning_rate": 8.433979054007804e-05, "loss": 0.7592, "step": 12190 }, { "epoch": 0.8260044718476861, "grad_norm": 5.70284366607666, "learning_rate": 8.433842152098022e-05, "loss": 0.7273, "step": 12191 }, { "epoch": 0.8260722271156582, "grad_norm": 5.822709560394287, "learning_rate": 8.43370525018824e-05, "loss": 0.9407, "step": 12192 }, { "epoch": 0.8261399823836303, "grad_norm": 7.529512882232666, "learning_rate": 8.43356834827846e-05, "loss": 0.9318, "step": 12193 }, { "epoch": 0.8262077376516024, "grad_norm": 5.989007949829102, "learning_rate": 8.433431446368678e-05, "loss": 0.6854, "step": 12194 }, { "epoch": 0.8262754929195745, "grad_norm": 5.390767574310303, "learning_rate": 8.433294544458896e-05, "loss": 0.9111, "step": 12195 }, { "epoch": 0.8263432481875466, "grad_norm": 6.274799346923828, "learning_rate": 8.433157642549115e-05, "loss": 0.5621, "step": 12196 }, { "epoch": 0.8264110034555187, "grad_norm": 6.217073917388916, "learning_rate": 8.433020740639333e-05, "loss": 0.8127, "step": 12197 }, { "epoch": 0.8264787587234907, "grad_norm": 5.437521457672119, "learning_rate": 8.432883838729551e-05, "loss": 0.6947, "step": 12198 }, { "epoch": 0.8265465139914628, "grad_norm": 6.302811145782471, "learning_rate": 8.432746936819769e-05, "loss": 0.855, "step": 12199 }, { "epoch": 0.8266142692594349, "grad_norm": 6.268338203430176, "learning_rate": 8.432610034909988e-05, "loss": 0.8669, "step": 12200 }, { "epoch": 0.826682024527407, "grad_norm": 6.356218338012695, "learning_rate": 8.432473133000207e-05, "loss": 0.7883, "step": 12201 }, { "epoch": 0.8267497797953791, "grad_norm": 7.442529201507568, "learning_rate": 8.432336231090425e-05, "loss": 1.159, "step": 12202 }, { "epoch": 0.8268175350633512, "grad_norm": 6.274961471557617, "learning_rate": 8.432199329180643e-05, "loss": 1.0073, "step": 12203 }, { "epoch": 0.8268852903313233, "grad_norm": 5.910490036010742, "learning_rate": 8.43206242727086e-05, "loss": 0.781, "step": 12204 }, { "epoch": 0.8269530455992954, "grad_norm": 8.246582984924316, "learning_rate": 8.43192552536108e-05, "loss": 0.9337, "step": 12205 }, { "epoch": 0.8270208008672675, "grad_norm": 6.15574312210083, "learning_rate": 8.431788623451298e-05, "loss": 0.7707, "step": 12206 }, { "epoch": 0.8270885561352395, "grad_norm": 5.993718147277832, "learning_rate": 8.431651721541516e-05, "loss": 0.73, "step": 12207 }, { "epoch": 0.8271563114032116, "grad_norm": 6.5491533279418945, "learning_rate": 8.431514819631734e-05, "loss": 0.6576, "step": 12208 }, { "epoch": 0.8272240666711836, "grad_norm": 5.608559608459473, "learning_rate": 8.431377917721953e-05, "loss": 0.7199, "step": 12209 }, { "epoch": 0.8272918219391557, "grad_norm": 6.178080081939697, "learning_rate": 8.431241015812172e-05, "loss": 0.5885, "step": 12210 }, { "epoch": 0.8273595772071278, "grad_norm": 6.405505657196045, "learning_rate": 8.43110411390239e-05, "loss": 0.7247, "step": 12211 }, { "epoch": 0.8274273324750999, "grad_norm": 4.907893180847168, "learning_rate": 8.430967211992608e-05, "loss": 0.6837, "step": 12212 }, { "epoch": 0.827495087743072, "grad_norm": 4.041346549987793, "learning_rate": 8.430830310082826e-05, "loss": 0.5236, "step": 12213 }, { "epoch": 0.8275628430110441, "grad_norm": 5.496605396270752, "learning_rate": 8.430693408173045e-05, "loss": 0.6765, "step": 12214 }, { "epoch": 0.8276305982790162, "grad_norm": 5.595060348510742, "learning_rate": 8.430556506263263e-05, "loss": 1.0309, "step": 12215 }, { "epoch": 0.8276983535469883, "grad_norm": 5.200067043304443, "learning_rate": 8.430419604353481e-05, "loss": 0.6944, "step": 12216 }, { "epoch": 0.8277661088149604, "grad_norm": 6.38013219833374, "learning_rate": 8.430282702443699e-05, "loss": 1.1399, "step": 12217 }, { "epoch": 0.8278338640829325, "grad_norm": 5.848254203796387, "learning_rate": 8.430145800533917e-05, "loss": 0.7259, "step": 12218 }, { "epoch": 0.8279016193509046, "grad_norm": 7.698366641998291, "learning_rate": 8.430008898624137e-05, "loss": 0.8736, "step": 12219 }, { "epoch": 0.8279693746188767, "grad_norm": 6.256243705749512, "learning_rate": 8.429871996714355e-05, "loss": 0.8018, "step": 12220 }, { "epoch": 0.8280371298868487, "grad_norm": 7.630728721618652, "learning_rate": 8.429735094804573e-05, "loss": 0.6287, "step": 12221 }, { "epoch": 0.8281048851548208, "grad_norm": 5.663332939147949, "learning_rate": 8.42959819289479e-05, "loss": 0.7079, "step": 12222 }, { "epoch": 0.8281726404227929, "grad_norm": 9.478702545166016, "learning_rate": 8.42946129098501e-05, "loss": 0.9036, "step": 12223 }, { "epoch": 0.8282403956907649, "grad_norm": 5.983333110809326, "learning_rate": 8.429324389075228e-05, "loss": 0.777, "step": 12224 }, { "epoch": 0.828308150958737, "grad_norm": 5.364030361175537, "learning_rate": 8.429187487165446e-05, "loss": 0.8591, "step": 12225 }, { "epoch": 0.8283759062267091, "grad_norm": 6.575251579284668, "learning_rate": 8.429050585255664e-05, "loss": 0.8834, "step": 12226 }, { "epoch": 0.8284436614946812, "grad_norm": 6.744650840759277, "learning_rate": 8.428913683345882e-05, "loss": 0.8544, "step": 12227 }, { "epoch": 0.8285114167626533, "grad_norm": 7.059446334838867, "learning_rate": 8.428776781436102e-05, "loss": 0.7385, "step": 12228 }, { "epoch": 0.8285791720306254, "grad_norm": 5.541356086730957, "learning_rate": 8.42863987952632e-05, "loss": 0.7784, "step": 12229 }, { "epoch": 0.8286469272985975, "grad_norm": 7.66465425491333, "learning_rate": 8.428502977616538e-05, "loss": 0.7394, "step": 12230 }, { "epoch": 0.8287146825665695, "grad_norm": 5.495204448699951, "learning_rate": 8.428366075706756e-05, "loss": 0.6893, "step": 12231 }, { "epoch": 0.8287824378345416, "grad_norm": 5.460160255432129, "learning_rate": 8.428229173796975e-05, "loss": 0.8543, "step": 12232 }, { "epoch": 0.8288501931025137, "grad_norm": 5.867201805114746, "learning_rate": 8.428092271887193e-05, "loss": 0.7469, "step": 12233 }, { "epoch": 0.8289179483704858, "grad_norm": 5.914271354675293, "learning_rate": 8.427955369977411e-05, "loss": 0.9328, "step": 12234 }, { "epoch": 0.8289857036384579, "grad_norm": 7.290322303771973, "learning_rate": 8.427818468067629e-05, "loss": 0.936, "step": 12235 }, { "epoch": 0.82905345890643, "grad_norm": 7.530186653137207, "learning_rate": 8.427681566157849e-05, "loss": 0.7333, "step": 12236 }, { "epoch": 0.8291212141744021, "grad_norm": 7.260166168212891, "learning_rate": 8.427544664248067e-05, "loss": 0.7697, "step": 12237 }, { "epoch": 0.8291889694423742, "grad_norm": 5.8253703117370605, "learning_rate": 8.427407762338285e-05, "loss": 0.8838, "step": 12238 }, { "epoch": 0.8292567247103463, "grad_norm": 6.672026634216309, "learning_rate": 8.427270860428504e-05, "loss": 0.5896, "step": 12239 }, { "epoch": 0.8293244799783183, "grad_norm": 5.347162246704102, "learning_rate": 8.427133958518722e-05, "loss": 0.7497, "step": 12240 }, { "epoch": 0.8293922352462904, "grad_norm": 8.071971893310547, "learning_rate": 8.42699705660894e-05, "loss": 0.9985, "step": 12241 }, { "epoch": 0.8294599905142624, "grad_norm": 5.0009636878967285, "learning_rate": 8.42686015469916e-05, "loss": 0.8166, "step": 12242 }, { "epoch": 0.8295277457822345, "grad_norm": 5.593808174133301, "learning_rate": 8.426723252789377e-05, "loss": 0.6928, "step": 12243 }, { "epoch": 0.8295955010502066, "grad_norm": 5.249474048614502, "learning_rate": 8.426586350879596e-05, "loss": 0.7625, "step": 12244 }, { "epoch": 0.8296632563181787, "grad_norm": 5.920688152313232, "learning_rate": 8.426449448969814e-05, "loss": 0.7011, "step": 12245 }, { "epoch": 0.8297310115861508, "grad_norm": 9.127151489257812, "learning_rate": 8.426312547060033e-05, "loss": 0.917, "step": 12246 }, { "epoch": 0.8297987668541229, "grad_norm": 6.6722822189331055, "learning_rate": 8.426175645150251e-05, "loss": 0.7894, "step": 12247 }, { "epoch": 0.829866522122095, "grad_norm": 7.910020351409912, "learning_rate": 8.426038743240469e-05, "loss": 0.6994, "step": 12248 }, { "epoch": 0.8299342773900671, "grad_norm": 7.736839294433594, "learning_rate": 8.425901841330687e-05, "loss": 0.6598, "step": 12249 }, { "epoch": 0.8300020326580392, "grad_norm": 5.101329803466797, "learning_rate": 8.425764939420905e-05, "loss": 0.8365, "step": 12250 }, { "epoch": 0.8300697879260113, "grad_norm": 6.494842052459717, "learning_rate": 8.425628037511124e-05, "loss": 0.8936, "step": 12251 }, { "epoch": 0.8301375431939834, "grad_norm": 5.946401596069336, "learning_rate": 8.425491135601343e-05, "loss": 0.8427, "step": 12252 }, { "epoch": 0.8302052984619555, "grad_norm": 5.827462673187256, "learning_rate": 8.42535423369156e-05, "loss": 0.7694, "step": 12253 }, { "epoch": 0.8302730537299275, "grad_norm": 5.955854415893555, "learning_rate": 8.425217331781779e-05, "loss": 0.8644, "step": 12254 }, { "epoch": 0.8303408089978996, "grad_norm": 5.749096870422363, "learning_rate": 8.425080429871998e-05, "loss": 0.7225, "step": 12255 }, { "epoch": 0.8304085642658716, "grad_norm": 5.03810453414917, "learning_rate": 8.424943527962216e-05, "loss": 0.5006, "step": 12256 }, { "epoch": 0.8304763195338437, "grad_norm": 7.007089614868164, "learning_rate": 8.424806626052434e-05, "loss": 0.9485, "step": 12257 }, { "epoch": 0.8305440748018158, "grad_norm": 5.671133041381836, "learning_rate": 8.424669724142652e-05, "loss": 0.6842, "step": 12258 }, { "epoch": 0.8306118300697879, "grad_norm": 4.326511383056641, "learning_rate": 8.42453282223287e-05, "loss": 0.7619, "step": 12259 }, { "epoch": 0.83067958533776, "grad_norm": 4.897543907165527, "learning_rate": 8.42439592032309e-05, "loss": 0.6889, "step": 12260 }, { "epoch": 0.8307473406057321, "grad_norm": 5.780319690704346, "learning_rate": 8.424259018413308e-05, "loss": 1.0366, "step": 12261 }, { "epoch": 0.8308150958737042, "grad_norm": 6.754616737365723, "learning_rate": 8.424122116503526e-05, "loss": 0.9706, "step": 12262 }, { "epoch": 0.8308828511416763, "grad_norm": 5.684625625610352, "learning_rate": 8.423985214593744e-05, "loss": 0.7783, "step": 12263 }, { "epoch": 0.8309506064096484, "grad_norm": 5.692160129547119, "learning_rate": 8.423848312683962e-05, "loss": 0.6993, "step": 12264 }, { "epoch": 0.8310183616776204, "grad_norm": 5.838659286499023, "learning_rate": 8.423711410774181e-05, "loss": 0.8698, "step": 12265 }, { "epoch": 0.8310861169455925, "grad_norm": 5.119133472442627, "learning_rate": 8.423574508864399e-05, "loss": 0.8026, "step": 12266 }, { "epoch": 0.8311538722135646, "grad_norm": 6.305530071258545, "learning_rate": 8.423437606954617e-05, "loss": 0.8705, "step": 12267 }, { "epoch": 0.8312216274815367, "grad_norm": 5.659543037414551, "learning_rate": 8.423300705044835e-05, "loss": 0.7701, "step": 12268 }, { "epoch": 0.8312893827495088, "grad_norm": 6.706612586975098, "learning_rate": 8.423163803135055e-05, "loss": 0.6325, "step": 12269 }, { "epoch": 0.8313571380174809, "grad_norm": 6.535792827606201, "learning_rate": 8.423026901225273e-05, "loss": 0.7957, "step": 12270 }, { "epoch": 0.831424893285453, "grad_norm": 5.140613079071045, "learning_rate": 8.42288999931549e-05, "loss": 0.7975, "step": 12271 }, { "epoch": 0.831492648553425, "grad_norm": 5.579657554626465, "learning_rate": 8.422753097405709e-05, "loss": 0.6756, "step": 12272 }, { "epoch": 0.8315604038213971, "grad_norm": 8.427629470825195, "learning_rate": 8.422616195495927e-05, "loss": 0.7253, "step": 12273 }, { "epoch": 0.8316281590893692, "grad_norm": 5.536694526672363, "learning_rate": 8.422479293586146e-05, "loss": 0.8035, "step": 12274 }, { "epoch": 0.8316959143573412, "grad_norm": 7.416363716125488, "learning_rate": 8.422342391676364e-05, "loss": 1.0227, "step": 12275 }, { "epoch": 0.8317636696253133, "grad_norm": 8.551451683044434, "learning_rate": 8.422205489766582e-05, "loss": 0.9759, "step": 12276 }, { "epoch": 0.8318314248932854, "grad_norm": 6.636923789978027, "learning_rate": 8.4220685878568e-05, "loss": 0.8017, "step": 12277 }, { "epoch": 0.8318991801612575, "grad_norm": 5.85496711730957, "learning_rate": 8.42193168594702e-05, "loss": 0.7672, "step": 12278 }, { "epoch": 0.8319669354292296, "grad_norm": 5.499538898468018, "learning_rate": 8.421794784037238e-05, "loss": 0.6539, "step": 12279 }, { "epoch": 0.8320346906972017, "grad_norm": 5.9310150146484375, "learning_rate": 8.421657882127456e-05, "loss": 0.6745, "step": 12280 }, { "epoch": 0.8321024459651738, "grad_norm": 5.379483222961426, "learning_rate": 8.421520980217674e-05, "loss": 0.7548, "step": 12281 }, { "epoch": 0.8321702012331459, "grad_norm": 5.287676811218262, "learning_rate": 8.421384078307892e-05, "loss": 0.7051, "step": 12282 }, { "epoch": 0.832237956501118, "grad_norm": 8.057753562927246, "learning_rate": 8.421247176398111e-05, "loss": 0.7589, "step": 12283 }, { "epoch": 0.8323057117690901, "grad_norm": 6.534327983856201, "learning_rate": 8.421110274488329e-05, "loss": 0.7855, "step": 12284 }, { "epoch": 0.8323734670370622, "grad_norm": 7.2967753410339355, "learning_rate": 8.420973372578547e-05, "loss": 0.7163, "step": 12285 }, { "epoch": 0.8324412223050343, "grad_norm": 7.196539402008057, "learning_rate": 8.420836470668767e-05, "loss": 0.8134, "step": 12286 }, { "epoch": 0.8325089775730063, "grad_norm": 5.886240005493164, "learning_rate": 8.420699568758985e-05, "loss": 0.7407, "step": 12287 }, { "epoch": 0.8325767328409784, "grad_norm": 6.262639045715332, "learning_rate": 8.420562666849204e-05, "loss": 0.7359, "step": 12288 }, { "epoch": 0.8326444881089504, "grad_norm": 6.491570949554443, "learning_rate": 8.420425764939422e-05, "loss": 0.7787, "step": 12289 }, { "epoch": 0.8327122433769225, "grad_norm": 5.070222854614258, "learning_rate": 8.42028886302964e-05, "loss": 0.7032, "step": 12290 }, { "epoch": 0.8327799986448946, "grad_norm": 5.188915252685547, "learning_rate": 8.420151961119858e-05, "loss": 0.7538, "step": 12291 }, { "epoch": 0.8328477539128667, "grad_norm": 5.764748573303223, "learning_rate": 8.420015059210077e-05, "loss": 0.8324, "step": 12292 }, { "epoch": 0.8329155091808388, "grad_norm": 5.611788272857666, "learning_rate": 8.419878157300295e-05, "loss": 0.6855, "step": 12293 }, { "epoch": 0.8329832644488109, "grad_norm": 5.202719688415527, "learning_rate": 8.419741255390513e-05, "loss": 0.7333, "step": 12294 }, { "epoch": 0.833051019716783, "grad_norm": 5.643661975860596, "learning_rate": 8.419604353480732e-05, "loss": 0.8347, "step": 12295 }, { "epoch": 0.8331187749847551, "grad_norm": 5.144847869873047, "learning_rate": 8.41946745157095e-05, "loss": 0.6602, "step": 12296 }, { "epoch": 0.8331865302527272, "grad_norm": 6.511287212371826, "learning_rate": 8.419330549661169e-05, "loss": 0.6691, "step": 12297 }, { "epoch": 0.8332542855206992, "grad_norm": 6.4027276039123535, "learning_rate": 8.419193647751387e-05, "loss": 0.7781, "step": 12298 }, { "epoch": 0.8333220407886713, "grad_norm": 6.630940914154053, "learning_rate": 8.419056745841605e-05, "loss": 0.8879, "step": 12299 }, { "epoch": 0.8333897960566434, "grad_norm": 7.193549633026123, "learning_rate": 8.418919843931823e-05, "loss": 1.0063, "step": 12300 }, { "epoch": 0.8334575513246155, "grad_norm": 5.909510612487793, "learning_rate": 8.418782942022042e-05, "loss": 0.8406, "step": 12301 }, { "epoch": 0.8335253065925876, "grad_norm": 5.470668315887451, "learning_rate": 8.41864604011226e-05, "loss": 0.7256, "step": 12302 }, { "epoch": 0.8335930618605597, "grad_norm": 5.57338809967041, "learning_rate": 8.418509138202479e-05, "loss": 0.6156, "step": 12303 }, { "epoch": 0.8336608171285318, "grad_norm": 5.950277805328369, "learning_rate": 8.418372236292697e-05, "loss": 0.7232, "step": 12304 }, { "epoch": 0.8337285723965038, "grad_norm": 4.642901420593262, "learning_rate": 8.418235334382915e-05, "loss": 0.634, "step": 12305 }, { "epoch": 0.8337963276644759, "grad_norm": 7.471027374267578, "learning_rate": 8.418098432473134e-05, "loss": 1.0318, "step": 12306 }, { "epoch": 0.833864082932448, "grad_norm": 5.720177173614502, "learning_rate": 8.417961530563352e-05, "loss": 0.806, "step": 12307 }, { "epoch": 0.83393183820042, "grad_norm": 7.918213844299316, "learning_rate": 8.41782462865357e-05, "loss": 0.8564, "step": 12308 }, { "epoch": 0.8339995934683921, "grad_norm": 6.492531776428223, "learning_rate": 8.417687726743788e-05, "loss": 1.0221, "step": 12309 }, { "epoch": 0.8340673487363642, "grad_norm": 5.253319263458252, "learning_rate": 8.417550824834007e-05, "loss": 0.7599, "step": 12310 }, { "epoch": 0.8341351040043363, "grad_norm": 6.8917975425720215, "learning_rate": 8.417413922924225e-05, "loss": 0.7486, "step": 12311 }, { "epoch": 0.8342028592723084, "grad_norm": 7.639297962188721, "learning_rate": 8.417277021014444e-05, "loss": 0.9052, "step": 12312 }, { "epoch": 0.8342706145402805, "grad_norm": 6.974343776702881, "learning_rate": 8.417140119104662e-05, "loss": 0.6297, "step": 12313 }, { "epoch": 0.8343383698082526, "grad_norm": 5.5928802490234375, "learning_rate": 8.41700321719488e-05, "loss": 0.8208, "step": 12314 }, { "epoch": 0.8344061250762247, "grad_norm": 5.503357410430908, "learning_rate": 8.416866315285099e-05, "loss": 0.7472, "step": 12315 }, { "epoch": 0.8344738803441968, "grad_norm": 6.915671348571777, "learning_rate": 8.416729413375317e-05, "loss": 0.7894, "step": 12316 }, { "epoch": 0.8345416356121689, "grad_norm": 8.041422843933105, "learning_rate": 8.416592511465535e-05, "loss": 0.9854, "step": 12317 }, { "epoch": 0.834609390880141, "grad_norm": 6.027585506439209, "learning_rate": 8.416455609555753e-05, "loss": 1.0832, "step": 12318 }, { "epoch": 0.8346771461481131, "grad_norm": 7.771341800689697, "learning_rate": 8.416318707645971e-05, "loss": 0.9197, "step": 12319 }, { "epoch": 0.8347449014160851, "grad_norm": 5.460988521575928, "learning_rate": 8.41618180573619e-05, "loss": 0.7626, "step": 12320 }, { "epoch": 0.8348126566840571, "grad_norm": 5.957553863525391, "learning_rate": 8.416044903826409e-05, "loss": 0.5958, "step": 12321 }, { "epoch": 0.8348804119520292, "grad_norm": 6.164322853088379, "learning_rate": 8.415908001916627e-05, "loss": 0.7692, "step": 12322 }, { "epoch": 0.8349481672200013, "grad_norm": 6.864661693572998, "learning_rate": 8.415771100006845e-05, "loss": 0.8181, "step": 12323 }, { "epoch": 0.8350159224879734, "grad_norm": 4.985629558563232, "learning_rate": 8.415634198097064e-05, "loss": 0.8815, "step": 12324 }, { "epoch": 0.8350836777559455, "grad_norm": 6.169389724731445, "learning_rate": 8.415497296187282e-05, "loss": 0.7982, "step": 12325 }, { "epoch": 0.8351514330239176, "grad_norm": 7.641390800476074, "learning_rate": 8.4153603942775e-05, "loss": 0.7122, "step": 12326 }, { "epoch": 0.8352191882918897, "grad_norm": 5.388051509857178, "learning_rate": 8.415223492367718e-05, "loss": 0.9924, "step": 12327 }, { "epoch": 0.8352869435598618, "grad_norm": 6.870946407318115, "learning_rate": 8.415086590457936e-05, "loss": 0.5997, "step": 12328 }, { "epoch": 0.8353546988278339, "grad_norm": 7.626512050628662, "learning_rate": 8.414949688548156e-05, "loss": 0.7789, "step": 12329 }, { "epoch": 0.835422454095806, "grad_norm": 5.378123760223389, "learning_rate": 8.414812786638374e-05, "loss": 0.8172, "step": 12330 }, { "epoch": 0.835490209363778, "grad_norm": 6.5015716552734375, "learning_rate": 8.414675884728592e-05, "loss": 0.805, "step": 12331 }, { "epoch": 0.8355579646317501, "grad_norm": 5.173733711242676, "learning_rate": 8.414538982818811e-05, "loss": 0.6039, "step": 12332 }, { "epoch": 0.8356257198997222, "grad_norm": 5.68528413772583, "learning_rate": 8.414402080909029e-05, "loss": 0.8144, "step": 12333 }, { "epoch": 0.8356934751676943, "grad_norm": 6.874687671661377, "learning_rate": 8.414265178999247e-05, "loss": 0.889, "step": 12334 }, { "epoch": 0.8357612304356664, "grad_norm": 8.360238075256348, "learning_rate": 8.414128277089466e-05, "loss": 0.9033, "step": 12335 }, { "epoch": 0.8358289857036385, "grad_norm": 5.4390106201171875, "learning_rate": 8.413991375179684e-05, "loss": 0.6946, "step": 12336 }, { "epoch": 0.8358967409716106, "grad_norm": 9.848974227905273, "learning_rate": 8.413854473269903e-05, "loss": 0.7939, "step": 12337 }, { "epoch": 0.8359644962395826, "grad_norm": 7.111310958862305, "learning_rate": 8.413717571360122e-05, "loss": 0.739, "step": 12338 }, { "epoch": 0.8360322515075547, "grad_norm": 6.9361443519592285, "learning_rate": 8.41358066945034e-05, "loss": 0.8542, "step": 12339 }, { "epoch": 0.8361000067755268, "grad_norm": 8.055535316467285, "learning_rate": 8.413443767540558e-05, "loss": 0.7256, "step": 12340 }, { "epoch": 0.8361677620434989, "grad_norm": 5.564542770385742, "learning_rate": 8.413306865630776e-05, "loss": 0.7472, "step": 12341 }, { "epoch": 0.8362355173114709, "grad_norm": 5.300485134124756, "learning_rate": 8.413169963720995e-05, "loss": 0.5295, "step": 12342 }, { "epoch": 0.836303272579443, "grad_norm": 5.996912479400635, "learning_rate": 8.413033061811213e-05, "loss": 0.9896, "step": 12343 }, { "epoch": 0.8363710278474151, "grad_norm": 5.013984680175781, "learning_rate": 8.412896159901431e-05, "loss": 0.8257, "step": 12344 }, { "epoch": 0.8364387831153872, "grad_norm": 6.624052047729492, "learning_rate": 8.41275925799165e-05, "loss": 0.9886, "step": 12345 }, { "epoch": 0.8365065383833593, "grad_norm": 6.04857063293457, "learning_rate": 8.412622356081868e-05, "loss": 0.8513, "step": 12346 }, { "epoch": 0.8365742936513314, "grad_norm": 4.840993404388428, "learning_rate": 8.412485454172087e-05, "loss": 0.8743, "step": 12347 }, { "epoch": 0.8366420489193035, "grad_norm": 7.008707046508789, "learning_rate": 8.412348552262305e-05, "loss": 0.85, "step": 12348 }, { "epoch": 0.8367098041872756, "grad_norm": 7.408865451812744, "learning_rate": 8.412211650352523e-05, "loss": 0.9099, "step": 12349 }, { "epoch": 0.8367775594552477, "grad_norm": 6.566858291625977, "learning_rate": 8.412074748442741e-05, "loss": 0.9804, "step": 12350 }, { "epoch": 0.8368453147232198, "grad_norm": 5.375271797180176, "learning_rate": 8.411937846532959e-05, "loss": 0.6209, "step": 12351 }, { "epoch": 0.8369130699911919, "grad_norm": 6.087400436401367, "learning_rate": 8.411800944623178e-05, "loss": 0.6413, "step": 12352 }, { "epoch": 0.836980825259164, "grad_norm": 6.311927318572998, "learning_rate": 8.411664042713396e-05, "loss": 0.7192, "step": 12353 }, { "epoch": 0.8370485805271359, "grad_norm": 7.907698154449463, "learning_rate": 8.411527140803615e-05, "loss": 0.9493, "step": 12354 }, { "epoch": 0.837116335795108, "grad_norm": 5.864373683929443, "learning_rate": 8.411390238893833e-05, "loss": 0.921, "step": 12355 }, { "epoch": 0.8371840910630801, "grad_norm": 5.77023458480835, "learning_rate": 8.411253336984052e-05, "loss": 0.8094, "step": 12356 }, { "epoch": 0.8372518463310522, "grad_norm": 7.120599746704102, "learning_rate": 8.41111643507427e-05, "loss": 0.7822, "step": 12357 }, { "epoch": 0.8373196015990243, "grad_norm": 6.42577600479126, "learning_rate": 8.410979533164488e-05, "loss": 0.9123, "step": 12358 }, { "epoch": 0.8373873568669964, "grad_norm": 6.504154205322266, "learning_rate": 8.410842631254706e-05, "loss": 0.7072, "step": 12359 }, { "epoch": 0.8374551121349685, "grad_norm": 6.982132434844971, "learning_rate": 8.410705729344924e-05, "loss": 0.9333, "step": 12360 }, { "epoch": 0.8375228674029406, "grad_norm": 5.410412311553955, "learning_rate": 8.410568827435143e-05, "loss": 0.7905, "step": 12361 }, { "epoch": 0.8375906226709127, "grad_norm": 5.881120681762695, "learning_rate": 8.410431925525361e-05, "loss": 0.7371, "step": 12362 }, { "epoch": 0.8376583779388848, "grad_norm": 6.792932033538818, "learning_rate": 8.41029502361558e-05, "loss": 0.6885, "step": 12363 }, { "epoch": 0.8377261332068568, "grad_norm": 6.715199947357178, "learning_rate": 8.410158121705798e-05, "loss": 0.8673, "step": 12364 }, { "epoch": 0.8377938884748289, "grad_norm": 6.608368873596191, "learning_rate": 8.410021219796017e-05, "loss": 1.0625, "step": 12365 }, { "epoch": 0.837861643742801, "grad_norm": 8.170140266418457, "learning_rate": 8.409884317886235e-05, "loss": 0.8656, "step": 12366 }, { "epoch": 0.8379293990107731, "grad_norm": 7.180694103240967, "learning_rate": 8.409747415976453e-05, "loss": 0.7619, "step": 12367 }, { "epoch": 0.8379971542787452, "grad_norm": 5.306380748748779, "learning_rate": 8.409610514066671e-05, "loss": 0.6672, "step": 12368 }, { "epoch": 0.8380649095467173, "grad_norm": 9.249306678771973, "learning_rate": 8.409473612156889e-05, "loss": 0.759, "step": 12369 }, { "epoch": 0.8381326648146893, "grad_norm": 6.1703572273254395, "learning_rate": 8.409336710247108e-05, "loss": 0.661, "step": 12370 }, { "epoch": 0.8382004200826614, "grad_norm": 4.662862300872803, "learning_rate": 8.409199808337327e-05, "loss": 0.6401, "step": 12371 }, { "epoch": 0.8382681753506335, "grad_norm": 4.667969226837158, "learning_rate": 8.409062906427545e-05, "loss": 0.6579, "step": 12372 }, { "epoch": 0.8383359306186056, "grad_norm": 6.247222900390625, "learning_rate": 8.408926004517763e-05, "loss": 0.7907, "step": 12373 }, { "epoch": 0.8384036858865777, "grad_norm": 6.933483123779297, "learning_rate": 8.40878910260798e-05, "loss": 0.8402, "step": 12374 }, { "epoch": 0.8384714411545497, "grad_norm": 5.13164758682251, "learning_rate": 8.4086522006982e-05, "loss": 0.7144, "step": 12375 }, { "epoch": 0.8385391964225218, "grad_norm": 5.230576515197754, "learning_rate": 8.408515298788418e-05, "loss": 0.6836, "step": 12376 }, { "epoch": 0.8386069516904939, "grad_norm": 6.535162448883057, "learning_rate": 8.408378396878636e-05, "loss": 0.9004, "step": 12377 }, { "epoch": 0.838674706958466, "grad_norm": 7.608928203582764, "learning_rate": 8.408241494968855e-05, "loss": 0.9791, "step": 12378 }, { "epoch": 0.8387424622264381, "grad_norm": 6.712092399597168, "learning_rate": 8.408104593059073e-05, "loss": 0.6513, "step": 12379 }, { "epoch": 0.8388102174944102, "grad_norm": 6.102575778961182, "learning_rate": 8.407967691149292e-05, "loss": 0.8751, "step": 12380 }, { "epoch": 0.8388779727623823, "grad_norm": 5.246453285217285, "learning_rate": 8.407830789239511e-05, "loss": 0.6477, "step": 12381 }, { "epoch": 0.8389457280303544, "grad_norm": 6.3806633949279785, "learning_rate": 8.407693887329729e-05, "loss": 0.7549, "step": 12382 }, { "epoch": 0.8390134832983265, "grad_norm": 6.284943103790283, "learning_rate": 8.407556985419947e-05, "loss": 0.7873, "step": 12383 }, { "epoch": 0.8390812385662986, "grad_norm": 6.552889823913574, "learning_rate": 8.407420083510166e-05, "loss": 0.8152, "step": 12384 }, { "epoch": 0.8391489938342707, "grad_norm": 7.694222927093506, "learning_rate": 8.407283181600384e-05, "loss": 0.6777, "step": 12385 }, { "epoch": 0.8392167491022428, "grad_norm": 6.1887125968933105, "learning_rate": 8.407146279690602e-05, "loss": 0.9402, "step": 12386 }, { "epoch": 0.8392845043702147, "grad_norm": 6.24276065826416, "learning_rate": 8.40700937778082e-05, "loss": 1.0976, "step": 12387 }, { "epoch": 0.8393522596381868, "grad_norm": 5.816521167755127, "learning_rate": 8.40687247587104e-05, "loss": 0.8681, "step": 12388 }, { "epoch": 0.8394200149061589, "grad_norm": 5.3127923011779785, "learning_rate": 8.406735573961258e-05, "loss": 0.7598, "step": 12389 }, { "epoch": 0.839487770174131, "grad_norm": 7.444540500640869, "learning_rate": 8.406598672051476e-05, "loss": 0.8451, "step": 12390 }, { "epoch": 0.8395555254421031, "grad_norm": 8.680950164794922, "learning_rate": 8.406461770141694e-05, "loss": 1.0441, "step": 12391 }, { "epoch": 0.8396232807100752, "grad_norm": 6.707736968994141, "learning_rate": 8.406324868231912e-05, "loss": 0.7834, "step": 12392 }, { "epoch": 0.8396910359780473, "grad_norm": 5.440469741821289, "learning_rate": 8.406187966322131e-05, "loss": 0.7424, "step": 12393 }, { "epoch": 0.8397587912460194, "grad_norm": 9.19237995147705, "learning_rate": 8.40605106441235e-05, "loss": 0.7367, "step": 12394 }, { "epoch": 0.8398265465139915, "grad_norm": 6.017999172210693, "learning_rate": 8.405914162502567e-05, "loss": 0.822, "step": 12395 }, { "epoch": 0.8398943017819636, "grad_norm": 6.79027795791626, "learning_rate": 8.405777260592785e-05, "loss": 0.9386, "step": 12396 }, { "epoch": 0.8399620570499357, "grad_norm": 5.836680889129639, "learning_rate": 8.405640358683004e-05, "loss": 0.7095, "step": 12397 }, { "epoch": 0.8400298123179077, "grad_norm": 5.866933345794678, "learning_rate": 8.405503456773223e-05, "loss": 0.7168, "step": 12398 }, { "epoch": 0.8400975675858798, "grad_norm": 6.663938999176025, "learning_rate": 8.405366554863441e-05, "loss": 0.867, "step": 12399 }, { "epoch": 0.8401653228538519, "grad_norm": 5.113254547119141, "learning_rate": 8.405229652953659e-05, "loss": 0.6785, "step": 12400 }, { "epoch": 0.840233078121824, "grad_norm": 6.106493949890137, "learning_rate": 8.405092751043877e-05, "loss": 0.8815, "step": 12401 }, { "epoch": 0.8403008333897961, "grad_norm": 5.644641399383545, "learning_rate": 8.404955849134096e-05, "loss": 0.6902, "step": 12402 }, { "epoch": 0.8403685886577681, "grad_norm": 5.706465244293213, "learning_rate": 8.404818947224314e-05, "loss": 0.7725, "step": 12403 }, { "epoch": 0.8404363439257402, "grad_norm": 8.362403869628906, "learning_rate": 8.404682045314532e-05, "loss": 0.9757, "step": 12404 }, { "epoch": 0.8405040991937123, "grad_norm": 5.341566562652588, "learning_rate": 8.40454514340475e-05, "loss": 0.8645, "step": 12405 }, { "epoch": 0.8405718544616844, "grad_norm": 6.9801859855651855, "learning_rate": 8.404408241494969e-05, "loss": 0.7933, "step": 12406 }, { "epoch": 0.8406396097296565, "grad_norm": 7.076079845428467, "learning_rate": 8.404271339585188e-05, "loss": 0.9387, "step": 12407 }, { "epoch": 0.8407073649976285, "grad_norm": 5.684225082397461, "learning_rate": 8.404134437675406e-05, "loss": 0.6575, "step": 12408 }, { "epoch": 0.8407751202656006, "grad_norm": 6.203160285949707, "learning_rate": 8.403997535765624e-05, "loss": 0.6188, "step": 12409 }, { "epoch": 0.8408428755335727, "grad_norm": 7.039827346801758, "learning_rate": 8.403860633855842e-05, "loss": 0.8741, "step": 12410 }, { "epoch": 0.8409106308015448, "grad_norm": 5.340671062469482, "learning_rate": 8.403723731946061e-05, "loss": 0.6957, "step": 12411 }, { "epoch": 0.8409783860695169, "grad_norm": 6.316356182098389, "learning_rate": 8.40358683003628e-05, "loss": 0.8684, "step": 12412 }, { "epoch": 0.841046141337489, "grad_norm": 5.906060218811035, "learning_rate": 8.403449928126497e-05, "loss": 0.9206, "step": 12413 }, { "epoch": 0.8411138966054611, "grad_norm": 7.139042854309082, "learning_rate": 8.403313026216716e-05, "loss": 0.862, "step": 12414 }, { "epoch": 0.8411816518734332, "grad_norm": 6.347969055175781, "learning_rate": 8.403176124306934e-05, "loss": 0.7029, "step": 12415 }, { "epoch": 0.8412494071414053, "grad_norm": 7.3854780197143555, "learning_rate": 8.403039222397153e-05, "loss": 0.9811, "step": 12416 }, { "epoch": 0.8413171624093774, "grad_norm": 5.81329870223999, "learning_rate": 8.402902320487371e-05, "loss": 0.9048, "step": 12417 }, { "epoch": 0.8413849176773495, "grad_norm": 5.879884719848633, "learning_rate": 8.402765418577589e-05, "loss": 0.8854, "step": 12418 }, { "epoch": 0.8414526729453214, "grad_norm": 5.32490348815918, "learning_rate": 8.402628516667807e-05, "loss": 1.0044, "step": 12419 }, { "epoch": 0.8415204282132935, "grad_norm": 6.529613018035889, "learning_rate": 8.402491614758026e-05, "loss": 0.7893, "step": 12420 }, { "epoch": 0.8415881834812656, "grad_norm": 6.274640083312988, "learning_rate": 8.402354712848244e-05, "loss": 1.0186, "step": 12421 }, { "epoch": 0.8416559387492377, "grad_norm": 6.493834972381592, "learning_rate": 8.402217810938463e-05, "loss": 0.9347, "step": 12422 }, { "epoch": 0.8417236940172098, "grad_norm": 5.429368495941162, "learning_rate": 8.40208090902868e-05, "loss": 0.7862, "step": 12423 }, { "epoch": 0.8417914492851819, "grad_norm": 5.897449970245361, "learning_rate": 8.4019440071189e-05, "loss": 0.7278, "step": 12424 }, { "epoch": 0.841859204553154, "grad_norm": 5.485132694244385, "learning_rate": 8.401807105209118e-05, "loss": 0.5984, "step": 12425 }, { "epoch": 0.8419269598211261, "grad_norm": 6.194313049316406, "learning_rate": 8.401670203299336e-05, "loss": 0.8396, "step": 12426 }, { "epoch": 0.8419947150890982, "grad_norm": 6.459996700286865, "learning_rate": 8.401533301389555e-05, "loss": 0.8757, "step": 12427 }, { "epoch": 0.8420624703570703, "grad_norm": 5.770567417144775, "learning_rate": 8.401396399479773e-05, "loss": 0.6524, "step": 12428 }, { "epoch": 0.8421302256250424, "grad_norm": 5.737586975097656, "learning_rate": 8.401259497569991e-05, "loss": 0.7936, "step": 12429 }, { "epoch": 0.8421979808930145, "grad_norm": 5.61273717880249, "learning_rate": 8.401122595660211e-05, "loss": 0.8404, "step": 12430 }, { "epoch": 0.8422657361609865, "grad_norm": 5.813633918762207, "learning_rate": 8.400985693750429e-05, "loss": 0.8042, "step": 12431 }, { "epoch": 0.8423334914289586, "grad_norm": 10.310413360595703, "learning_rate": 8.400848791840647e-05, "loss": 0.7039, "step": 12432 }, { "epoch": 0.8424012466969307, "grad_norm": 5.826198577880859, "learning_rate": 8.400711889930865e-05, "loss": 0.7276, "step": 12433 }, { "epoch": 0.8424690019649028, "grad_norm": 6.436339855194092, "learning_rate": 8.400574988021084e-05, "loss": 0.7409, "step": 12434 }, { "epoch": 0.8425367572328749, "grad_norm": 5.5825700759887695, "learning_rate": 8.400438086111302e-05, "loss": 0.6429, "step": 12435 }, { "epoch": 0.8426045125008469, "grad_norm": 5.8885297775268555, "learning_rate": 8.40030118420152e-05, "loss": 0.8675, "step": 12436 }, { "epoch": 0.842672267768819, "grad_norm": 6.454819679260254, "learning_rate": 8.400164282291738e-05, "loss": 0.6461, "step": 12437 }, { "epoch": 0.8427400230367911, "grad_norm": 6.442966938018799, "learning_rate": 8.400027380381956e-05, "loss": 0.9699, "step": 12438 }, { "epoch": 0.8428077783047632, "grad_norm": 5.841514587402344, "learning_rate": 8.399890478472176e-05, "loss": 0.734, "step": 12439 }, { "epoch": 0.8428755335727353, "grad_norm": 5.0523858070373535, "learning_rate": 8.399753576562394e-05, "loss": 0.7477, "step": 12440 }, { "epoch": 0.8429432888407073, "grad_norm": 6.7047810554504395, "learning_rate": 8.399616674652612e-05, "loss": 0.8632, "step": 12441 }, { "epoch": 0.8430110441086794, "grad_norm": 5.654105186462402, "learning_rate": 8.39947977274283e-05, "loss": 0.6073, "step": 12442 }, { "epoch": 0.8430787993766515, "grad_norm": 6.039260387420654, "learning_rate": 8.39934287083305e-05, "loss": 0.7417, "step": 12443 }, { "epoch": 0.8431465546446236, "grad_norm": 6.465257167816162, "learning_rate": 8.399205968923267e-05, "loss": 0.8625, "step": 12444 }, { "epoch": 0.8432143099125957, "grad_norm": 7.922903060913086, "learning_rate": 8.399069067013485e-05, "loss": 0.8621, "step": 12445 }, { "epoch": 0.8432820651805678, "grad_norm": 6.696178436279297, "learning_rate": 8.398932165103703e-05, "loss": 0.7575, "step": 12446 }, { "epoch": 0.8433498204485399, "grad_norm": 7.259800910949707, "learning_rate": 8.398795263193921e-05, "loss": 0.9376, "step": 12447 }, { "epoch": 0.843417575716512, "grad_norm": 5.065820217132568, "learning_rate": 8.398658361284141e-05, "loss": 0.8156, "step": 12448 }, { "epoch": 0.8434853309844841, "grad_norm": 6.689807415008545, "learning_rate": 8.398521459374359e-05, "loss": 0.6496, "step": 12449 }, { "epoch": 0.8435530862524562, "grad_norm": 7.013186454772949, "learning_rate": 8.398384557464577e-05, "loss": 1.0973, "step": 12450 }, { "epoch": 0.8436208415204283, "grad_norm": 6.942663669586182, "learning_rate": 8.398247655554795e-05, "loss": 0.9876, "step": 12451 }, { "epoch": 0.8436885967884002, "grad_norm": 6.426051139831543, "learning_rate": 8.398110753645013e-05, "loss": 0.6516, "step": 12452 }, { "epoch": 0.8437563520563723, "grad_norm": 5.993021488189697, "learning_rate": 8.397973851735232e-05, "loss": 0.5495, "step": 12453 }, { "epoch": 0.8438241073243444, "grad_norm": 7.648578643798828, "learning_rate": 8.39783694982545e-05, "loss": 1.3047, "step": 12454 }, { "epoch": 0.8438918625923165, "grad_norm": 6.795345306396484, "learning_rate": 8.397700047915668e-05, "loss": 0.8851, "step": 12455 }, { "epoch": 0.8439596178602886, "grad_norm": 9.486870765686035, "learning_rate": 8.397563146005887e-05, "loss": 0.7838, "step": 12456 }, { "epoch": 0.8440273731282607, "grad_norm": 6.156589031219482, "learning_rate": 8.397426244096106e-05, "loss": 0.7293, "step": 12457 }, { "epoch": 0.8440951283962328, "grad_norm": 6.535582542419434, "learning_rate": 8.397289342186324e-05, "loss": 0.853, "step": 12458 }, { "epoch": 0.8441628836642049, "grad_norm": 6.530213832855225, "learning_rate": 8.397152440276542e-05, "loss": 1.0661, "step": 12459 }, { "epoch": 0.844230638932177, "grad_norm": 5.963207721710205, "learning_rate": 8.39701553836676e-05, "loss": 0.6558, "step": 12460 }, { "epoch": 0.8442983942001491, "grad_norm": 6.132920742034912, "learning_rate": 8.396878636456978e-05, "loss": 0.7386, "step": 12461 }, { "epoch": 0.8443661494681212, "grad_norm": 4.533850193023682, "learning_rate": 8.396741734547197e-05, "loss": 0.6762, "step": 12462 }, { "epoch": 0.8444339047360933, "grad_norm": 5.42271614074707, "learning_rate": 8.396604832637415e-05, "loss": 0.9324, "step": 12463 }, { "epoch": 0.8445016600040653, "grad_norm": 7.894658088684082, "learning_rate": 8.396467930727633e-05, "loss": 1.0492, "step": 12464 }, { "epoch": 0.8445694152720374, "grad_norm": 6.1358137130737305, "learning_rate": 8.396331028817852e-05, "loss": 0.6701, "step": 12465 }, { "epoch": 0.8446371705400095, "grad_norm": 6.7011871337890625, "learning_rate": 8.396194126908071e-05, "loss": 0.9623, "step": 12466 }, { "epoch": 0.8447049258079816, "grad_norm": 6.27651834487915, "learning_rate": 8.396057224998289e-05, "loss": 0.8553, "step": 12467 }, { "epoch": 0.8447726810759536, "grad_norm": 6.427924156188965, "learning_rate": 8.395920323088507e-05, "loss": 0.9759, "step": 12468 }, { "epoch": 0.8448404363439257, "grad_norm": 6.353015422821045, "learning_rate": 8.395783421178725e-05, "loss": 0.7357, "step": 12469 }, { "epoch": 0.8449081916118978, "grad_norm": 6.444865703582764, "learning_rate": 8.395646519268944e-05, "loss": 0.6417, "step": 12470 }, { "epoch": 0.8449759468798699, "grad_norm": 6.375645160675049, "learning_rate": 8.395509617359162e-05, "loss": 1.0158, "step": 12471 }, { "epoch": 0.845043702147842, "grad_norm": 4.871046543121338, "learning_rate": 8.39537271544938e-05, "loss": 0.7491, "step": 12472 }, { "epoch": 0.8451114574158141, "grad_norm": 5.615902423858643, "learning_rate": 8.3952358135396e-05, "loss": 0.6925, "step": 12473 }, { "epoch": 0.8451792126837862, "grad_norm": 6.724735260009766, "learning_rate": 8.395098911629818e-05, "loss": 0.6802, "step": 12474 }, { "epoch": 0.8452469679517582, "grad_norm": 7.350205421447754, "learning_rate": 8.394962009720036e-05, "loss": 1.1414, "step": 12475 }, { "epoch": 0.8453147232197303, "grad_norm": 5.377062797546387, "learning_rate": 8.394825107810255e-05, "loss": 0.8551, "step": 12476 }, { "epoch": 0.8453824784877024, "grad_norm": 5.765392303466797, "learning_rate": 8.394688205900473e-05, "loss": 0.6638, "step": 12477 }, { "epoch": 0.8454502337556745, "grad_norm": 5.007123947143555, "learning_rate": 8.394551303990691e-05, "loss": 0.7236, "step": 12478 }, { "epoch": 0.8455179890236466, "grad_norm": 6.559772491455078, "learning_rate": 8.39441440208091e-05, "loss": 0.8623, "step": 12479 }, { "epoch": 0.8455857442916187, "grad_norm": 6.448508262634277, "learning_rate": 8.394277500171129e-05, "loss": 0.7342, "step": 12480 }, { "epoch": 0.8456534995595908, "grad_norm": 6.105748653411865, "learning_rate": 8.394140598261347e-05, "loss": 0.7703, "step": 12481 }, { "epoch": 0.8457212548275629, "grad_norm": 6.34128999710083, "learning_rate": 8.394003696351565e-05, "loss": 0.8306, "step": 12482 }, { "epoch": 0.845789010095535, "grad_norm": 6.052567005157471, "learning_rate": 8.393866794441783e-05, "loss": 0.7125, "step": 12483 }, { "epoch": 0.845856765363507, "grad_norm": 5.930888652801514, "learning_rate": 8.393729892532001e-05, "loss": 0.9559, "step": 12484 }, { "epoch": 0.845924520631479, "grad_norm": 5.138692378997803, "learning_rate": 8.39359299062222e-05, "loss": 0.6747, "step": 12485 }, { "epoch": 0.8459922758994511, "grad_norm": 6.211801528930664, "learning_rate": 8.393456088712438e-05, "loss": 0.7852, "step": 12486 }, { "epoch": 0.8460600311674232, "grad_norm": 6.416335105895996, "learning_rate": 8.393319186802656e-05, "loss": 0.9246, "step": 12487 }, { "epoch": 0.8461277864353953, "grad_norm": 5.819685459136963, "learning_rate": 8.393182284892874e-05, "loss": 0.9555, "step": 12488 }, { "epoch": 0.8461955417033674, "grad_norm": 6.297514915466309, "learning_rate": 8.393045382983094e-05, "loss": 0.8673, "step": 12489 }, { "epoch": 0.8462632969713395, "grad_norm": 11.973522186279297, "learning_rate": 8.392908481073312e-05, "loss": 0.9805, "step": 12490 }, { "epoch": 0.8463310522393116, "grad_norm": 7.551254749298096, "learning_rate": 8.39277157916353e-05, "loss": 0.855, "step": 12491 }, { "epoch": 0.8463988075072837, "grad_norm": 5.7769389152526855, "learning_rate": 8.392634677253748e-05, "loss": 0.6064, "step": 12492 }, { "epoch": 0.8464665627752558, "grad_norm": 4.919633388519287, "learning_rate": 8.392497775343966e-05, "loss": 0.7027, "step": 12493 }, { "epoch": 0.8465343180432279, "grad_norm": 5.183913230895996, "learning_rate": 8.392360873434185e-05, "loss": 0.8747, "step": 12494 }, { "epoch": 0.8466020733112, "grad_norm": 5.794585704803467, "learning_rate": 8.392223971524403e-05, "loss": 0.7961, "step": 12495 }, { "epoch": 0.8466698285791721, "grad_norm": 6.117403030395508, "learning_rate": 8.392087069614621e-05, "loss": 0.5892, "step": 12496 }, { "epoch": 0.8467375838471441, "grad_norm": 7.63482666015625, "learning_rate": 8.39195016770484e-05, "loss": 0.8159, "step": 12497 }, { "epoch": 0.8468053391151162, "grad_norm": 5.1039347648620605, "learning_rate": 8.391813265795059e-05, "loss": 0.5934, "step": 12498 }, { "epoch": 0.8468730943830883, "grad_norm": 6.369871616363525, "learning_rate": 8.391676363885277e-05, "loss": 0.9178, "step": 12499 }, { "epoch": 0.8469408496510604, "grad_norm": 6.773191928863525, "learning_rate": 8.391539461975495e-05, "loss": 1.0055, "step": 12500 }, { "epoch": 0.8470086049190324, "grad_norm": 5.994389057159424, "learning_rate": 8.391402560065713e-05, "loss": 0.8812, "step": 12501 }, { "epoch": 0.8470763601870045, "grad_norm": 5.712553977966309, "learning_rate": 8.391265658155931e-05, "loss": 0.7008, "step": 12502 }, { "epoch": 0.8471441154549766, "grad_norm": 5.810184955596924, "learning_rate": 8.39112875624615e-05, "loss": 0.7936, "step": 12503 }, { "epoch": 0.8472118707229487, "grad_norm": 6.312936782836914, "learning_rate": 8.390991854336368e-05, "loss": 0.8583, "step": 12504 }, { "epoch": 0.8472796259909208, "grad_norm": 6.539886474609375, "learning_rate": 8.390854952426586e-05, "loss": 0.7788, "step": 12505 }, { "epoch": 0.8473473812588929, "grad_norm": 7.018226146697998, "learning_rate": 8.390718050516804e-05, "loss": 0.7205, "step": 12506 }, { "epoch": 0.847415136526865, "grad_norm": 6.536552429199219, "learning_rate": 8.390581148607023e-05, "loss": 0.9422, "step": 12507 }, { "epoch": 0.847482891794837, "grad_norm": 6.521510601043701, "learning_rate": 8.390444246697242e-05, "loss": 0.7813, "step": 12508 }, { "epoch": 0.8475506470628091, "grad_norm": 5.533815383911133, "learning_rate": 8.39030734478746e-05, "loss": 0.8097, "step": 12509 }, { "epoch": 0.8476184023307812, "grad_norm": 5.778811931610107, "learning_rate": 8.390170442877678e-05, "loss": 0.7295, "step": 12510 }, { "epoch": 0.8476861575987533, "grad_norm": 5.98907995223999, "learning_rate": 8.390033540967896e-05, "loss": 0.7462, "step": 12511 }, { "epoch": 0.8477539128667254, "grad_norm": 5.19685697555542, "learning_rate": 8.389896639058115e-05, "loss": 0.735, "step": 12512 }, { "epoch": 0.8478216681346975, "grad_norm": 5.980901718139648, "learning_rate": 8.389759737148333e-05, "loss": 0.5655, "step": 12513 }, { "epoch": 0.8478894234026696, "grad_norm": 4.931701183319092, "learning_rate": 8.389622835238551e-05, "loss": 0.6426, "step": 12514 }, { "epoch": 0.8479571786706417, "grad_norm": 5.470427513122559, "learning_rate": 8.38948593332877e-05, "loss": 0.8698, "step": 12515 }, { "epoch": 0.8480249339386138, "grad_norm": 8.906782150268555, "learning_rate": 8.389349031418989e-05, "loss": 0.9118, "step": 12516 }, { "epoch": 0.8480926892065858, "grad_norm": 7.61644172668457, "learning_rate": 8.389212129509207e-05, "loss": 1.0529, "step": 12517 }, { "epoch": 0.8481604444745579, "grad_norm": 6.867774963378906, "learning_rate": 8.389075227599425e-05, "loss": 0.6104, "step": 12518 }, { "epoch": 0.8482281997425299, "grad_norm": 7.476839065551758, "learning_rate": 8.388938325689644e-05, "loss": 1.036, "step": 12519 }, { "epoch": 0.848295955010502, "grad_norm": 6.415992259979248, "learning_rate": 8.388801423779862e-05, "loss": 0.9505, "step": 12520 }, { "epoch": 0.8483637102784741, "grad_norm": 6.049834251403809, "learning_rate": 8.38866452187008e-05, "loss": 0.7437, "step": 12521 }, { "epoch": 0.8484314655464462, "grad_norm": 5.392312526702881, "learning_rate": 8.3885276199603e-05, "loss": 0.7305, "step": 12522 }, { "epoch": 0.8484992208144183, "grad_norm": 6.002782821655273, "learning_rate": 8.388390718050518e-05, "loss": 0.6241, "step": 12523 }, { "epoch": 0.8485669760823904, "grad_norm": 7.464806079864502, "learning_rate": 8.388253816140736e-05, "loss": 1.0126, "step": 12524 }, { "epoch": 0.8486347313503625, "grad_norm": 6.660724639892578, "learning_rate": 8.388116914230954e-05, "loss": 0.8958, "step": 12525 }, { "epoch": 0.8487024866183346, "grad_norm": 6.646303176879883, "learning_rate": 8.387980012321173e-05, "loss": 0.951, "step": 12526 }, { "epoch": 0.8487702418863067, "grad_norm": 6.518253803253174, "learning_rate": 8.387843110411391e-05, "loss": 0.8785, "step": 12527 }, { "epoch": 0.8488379971542788, "grad_norm": 4.314852237701416, "learning_rate": 8.38770620850161e-05, "loss": 0.8498, "step": 12528 }, { "epoch": 0.8489057524222509, "grad_norm": 5.918713092803955, "learning_rate": 8.387569306591827e-05, "loss": 0.9178, "step": 12529 }, { "epoch": 0.848973507690223, "grad_norm": 7.2677788734436035, "learning_rate": 8.387432404682045e-05, "loss": 0.8692, "step": 12530 }, { "epoch": 0.849041262958195, "grad_norm": 5.739150524139404, "learning_rate": 8.387295502772265e-05, "loss": 0.7924, "step": 12531 }, { "epoch": 0.8491090182261671, "grad_norm": 5.6296000480651855, "learning_rate": 8.387158600862483e-05, "loss": 0.7735, "step": 12532 }, { "epoch": 0.8491767734941391, "grad_norm": 5.1605000495910645, "learning_rate": 8.387021698952701e-05, "loss": 0.7029, "step": 12533 }, { "epoch": 0.8492445287621112, "grad_norm": 6.645575046539307, "learning_rate": 8.386884797042919e-05, "loss": 1.0384, "step": 12534 }, { "epoch": 0.8493122840300833, "grad_norm": 6.592693328857422, "learning_rate": 8.386747895133138e-05, "loss": 0.7987, "step": 12535 }, { "epoch": 0.8493800392980554, "grad_norm": 7.588740825653076, "learning_rate": 8.386610993223356e-05, "loss": 0.6359, "step": 12536 }, { "epoch": 0.8494477945660275, "grad_norm": 4.178662300109863, "learning_rate": 8.386474091313574e-05, "loss": 0.7081, "step": 12537 }, { "epoch": 0.8495155498339996, "grad_norm": 5.266997814178467, "learning_rate": 8.386337189403792e-05, "loss": 0.688, "step": 12538 }, { "epoch": 0.8495833051019717, "grad_norm": 5.189149379730225, "learning_rate": 8.38620028749401e-05, "loss": 0.8713, "step": 12539 }, { "epoch": 0.8496510603699438, "grad_norm": 5.311150074005127, "learning_rate": 8.38606338558423e-05, "loss": 0.7709, "step": 12540 }, { "epoch": 0.8497188156379158, "grad_norm": 4.873614311218262, "learning_rate": 8.385926483674448e-05, "loss": 0.8112, "step": 12541 }, { "epoch": 0.8497865709058879, "grad_norm": 6.431467056274414, "learning_rate": 8.385789581764666e-05, "loss": 0.9818, "step": 12542 }, { "epoch": 0.84985432617386, "grad_norm": 7.026850700378418, "learning_rate": 8.385652679854884e-05, "loss": 0.7773, "step": 12543 }, { "epoch": 0.8499220814418321, "grad_norm": 5.634081840515137, "learning_rate": 8.385515777945103e-05, "loss": 0.8703, "step": 12544 }, { "epoch": 0.8499898367098042, "grad_norm": 5.535772800445557, "learning_rate": 8.385378876035321e-05, "loss": 0.6963, "step": 12545 }, { "epoch": 0.8500575919777763, "grad_norm": 6.581893444061279, "learning_rate": 8.38524197412554e-05, "loss": 0.7044, "step": 12546 }, { "epoch": 0.8501253472457484, "grad_norm": 5.496252536773682, "learning_rate": 8.385105072215757e-05, "loss": 0.7298, "step": 12547 }, { "epoch": 0.8501931025137205, "grad_norm": 6.205685615539551, "learning_rate": 8.384968170305975e-05, "loss": 0.7728, "step": 12548 }, { "epoch": 0.8502608577816926, "grad_norm": 7.986889362335205, "learning_rate": 8.384831268396195e-05, "loss": 0.767, "step": 12549 }, { "epoch": 0.8503286130496646, "grad_norm": 6.026723384857178, "learning_rate": 8.384694366486413e-05, "loss": 0.9097, "step": 12550 }, { "epoch": 0.8503963683176367, "grad_norm": 5.750411510467529, "learning_rate": 8.384557464576631e-05, "loss": 0.8567, "step": 12551 }, { "epoch": 0.8504641235856087, "grad_norm": 6.5039896965026855, "learning_rate": 8.384420562666849e-05, "loss": 0.882, "step": 12552 }, { "epoch": 0.8505318788535808, "grad_norm": 6.847061634063721, "learning_rate": 8.384283660757068e-05, "loss": 0.5942, "step": 12553 }, { "epoch": 0.8505996341215529, "grad_norm": 4.668815612792969, "learning_rate": 8.384146758847286e-05, "loss": 0.5723, "step": 12554 }, { "epoch": 0.850667389389525, "grad_norm": 7.477560043334961, "learning_rate": 8.384009856937504e-05, "loss": 0.9085, "step": 12555 }, { "epoch": 0.8507351446574971, "grad_norm": 5.242186069488525, "learning_rate": 8.383872955027722e-05, "loss": 0.5811, "step": 12556 }, { "epoch": 0.8508028999254692, "grad_norm": 6.460277080535889, "learning_rate": 8.38373605311794e-05, "loss": 0.6622, "step": 12557 }, { "epoch": 0.8508706551934413, "grad_norm": 8.43552017211914, "learning_rate": 8.38359915120816e-05, "loss": 0.6877, "step": 12558 }, { "epoch": 0.8509384104614134, "grad_norm": 5.636725902557373, "learning_rate": 8.383462249298378e-05, "loss": 0.7697, "step": 12559 }, { "epoch": 0.8510061657293855, "grad_norm": 7.544033050537109, "learning_rate": 8.383325347388596e-05, "loss": 1.031, "step": 12560 }, { "epoch": 0.8510739209973576, "grad_norm": 10.117722511291504, "learning_rate": 8.383188445478814e-05, "loss": 1.1007, "step": 12561 }, { "epoch": 0.8511416762653297, "grad_norm": 5.651546955108643, "learning_rate": 8.383051543569032e-05, "loss": 0.6959, "step": 12562 }, { "epoch": 0.8512094315333018, "grad_norm": 7.87699556350708, "learning_rate": 8.382914641659251e-05, "loss": 0.873, "step": 12563 }, { "epoch": 0.8512771868012738, "grad_norm": 5.291513442993164, "learning_rate": 8.38277773974947e-05, "loss": 0.6965, "step": 12564 }, { "epoch": 0.8513449420692459, "grad_norm": 5.983782768249512, "learning_rate": 8.382640837839687e-05, "loss": 0.908, "step": 12565 }, { "epoch": 0.8514126973372179, "grad_norm": 4.902809143066406, "learning_rate": 8.382503935929907e-05, "loss": 0.6924, "step": 12566 }, { "epoch": 0.85148045260519, "grad_norm": 4.983574390411377, "learning_rate": 8.382367034020125e-05, "loss": 0.7984, "step": 12567 }, { "epoch": 0.8515482078731621, "grad_norm": 6.039658069610596, "learning_rate": 8.382230132110343e-05, "loss": 0.683, "step": 12568 }, { "epoch": 0.8516159631411342, "grad_norm": 5.577428340911865, "learning_rate": 8.382093230200562e-05, "loss": 0.8412, "step": 12569 }, { "epoch": 0.8516837184091063, "grad_norm": 6.592475891113281, "learning_rate": 8.38195632829078e-05, "loss": 0.6641, "step": 12570 }, { "epoch": 0.8517514736770784, "grad_norm": 5.534562587738037, "learning_rate": 8.381819426380998e-05, "loss": 0.9195, "step": 12571 }, { "epoch": 0.8518192289450505, "grad_norm": 5.535101413726807, "learning_rate": 8.381682524471218e-05, "loss": 0.723, "step": 12572 }, { "epoch": 0.8518869842130226, "grad_norm": 6.00454044342041, "learning_rate": 8.381545622561436e-05, "loss": 0.6079, "step": 12573 }, { "epoch": 0.8519547394809946, "grad_norm": 6.607147216796875, "learning_rate": 8.381408720651654e-05, "loss": 0.9253, "step": 12574 }, { "epoch": 0.8520224947489667, "grad_norm": 5.750394821166992, "learning_rate": 8.381271818741872e-05, "loss": 0.689, "step": 12575 }, { "epoch": 0.8520902500169388, "grad_norm": 6.017903804779053, "learning_rate": 8.381134916832091e-05, "loss": 0.8317, "step": 12576 }, { "epoch": 0.8521580052849109, "grad_norm": 6.839371681213379, "learning_rate": 8.380998014922309e-05, "loss": 0.9688, "step": 12577 }, { "epoch": 0.852225760552883, "grad_norm": 5.614028453826904, "learning_rate": 8.380861113012527e-05, "loss": 0.5495, "step": 12578 }, { "epoch": 0.8522935158208551, "grad_norm": 6.759237766265869, "learning_rate": 8.380724211102745e-05, "loss": 0.7634, "step": 12579 }, { "epoch": 0.8523612710888272, "grad_norm": 5.667263031005859, "learning_rate": 8.380587309192963e-05, "loss": 0.8539, "step": 12580 }, { "epoch": 0.8524290263567993, "grad_norm": 5.995064735412598, "learning_rate": 8.380450407283183e-05, "loss": 0.7092, "step": 12581 }, { "epoch": 0.8524967816247713, "grad_norm": 6.612915515899658, "learning_rate": 8.380313505373401e-05, "loss": 0.822, "step": 12582 }, { "epoch": 0.8525645368927434, "grad_norm": 5.579259395599365, "learning_rate": 8.380176603463619e-05, "loss": 0.682, "step": 12583 }, { "epoch": 0.8526322921607155, "grad_norm": 6.590492248535156, "learning_rate": 8.380039701553837e-05, "loss": 0.7243, "step": 12584 }, { "epoch": 0.8527000474286875, "grad_norm": 4.9304680824279785, "learning_rate": 8.379902799644055e-05, "loss": 0.8079, "step": 12585 }, { "epoch": 0.8527678026966596, "grad_norm": 8.054821014404297, "learning_rate": 8.379765897734274e-05, "loss": 0.9659, "step": 12586 }, { "epoch": 0.8528355579646317, "grad_norm": 5.432389736175537, "learning_rate": 8.379628995824492e-05, "loss": 0.7316, "step": 12587 }, { "epoch": 0.8529033132326038, "grad_norm": 5.523181915283203, "learning_rate": 8.37949209391471e-05, "loss": 0.9273, "step": 12588 }, { "epoch": 0.8529710685005759, "grad_norm": 6.227122783660889, "learning_rate": 8.379355192004928e-05, "loss": 0.7196, "step": 12589 }, { "epoch": 0.853038823768548, "grad_norm": 6.5586371421813965, "learning_rate": 8.379218290095148e-05, "loss": 0.793, "step": 12590 }, { "epoch": 0.8531065790365201, "grad_norm": 6.881863594055176, "learning_rate": 8.379081388185366e-05, "loss": 0.6888, "step": 12591 }, { "epoch": 0.8531743343044922, "grad_norm": 6.569089889526367, "learning_rate": 8.378944486275584e-05, "loss": 1.0791, "step": 12592 }, { "epoch": 0.8532420895724643, "grad_norm": 4.835020542144775, "learning_rate": 8.378807584365802e-05, "loss": 0.866, "step": 12593 }, { "epoch": 0.8533098448404364, "grad_norm": 5.276635646820068, "learning_rate": 8.37867068245602e-05, "loss": 0.5689, "step": 12594 }, { "epoch": 0.8533776001084085, "grad_norm": 5.557784080505371, "learning_rate": 8.378533780546239e-05, "loss": 0.8455, "step": 12595 }, { "epoch": 0.8534453553763806, "grad_norm": 6.33231258392334, "learning_rate": 8.378396878636457e-05, "loss": 0.6639, "step": 12596 }, { "epoch": 0.8535131106443526, "grad_norm": 6.476678371429443, "learning_rate": 8.378259976726675e-05, "loss": 0.6786, "step": 12597 }, { "epoch": 0.8535808659123247, "grad_norm": 5.376199245452881, "learning_rate": 8.378123074816893e-05, "loss": 0.7915, "step": 12598 }, { "epoch": 0.8536486211802967, "grad_norm": 6.826034069061279, "learning_rate": 8.377986172907113e-05, "loss": 0.7511, "step": 12599 }, { "epoch": 0.8537163764482688, "grad_norm": 5.847662925720215, "learning_rate": 8.377849270997331e-05, "loss": 0.7095, "step": 12600 }, { "epoch": 0.8537841317162409, "grad_norm": 5.112993240356445, "learning_rate": 8.377712369087549e-05, "loss": 0.5066, "step": 12601 }, { "epoch": 0.853851886984213, "grad_norm": 8.529839515686035, "learning_rate": 8.377575467177767e-05, "loss": 0.8007, "step": 12602 }, { "epoch": 0.8539196422521851, "grad_norm": 5.600361347198486, "learning_rate": 8.377438565267985e-05, "loss": 0.847, "step": 12603 }, { "epoch": 0.8539873975201572, "grad_norm": 7.553412914276123, "learning_rate": 8.377301663358204e-05, "loss": 0.8847, "step": 12604 }, { "epoch": 0.8540551527881293, "grad_norm": 4.93065881729126, "learning_rate": 8.377164761448422e-05, "loss": 0.5527, "step": 12605 }, { "epoch": 0.8541229080561014, "grad_norm": 6.301657676696777, "learning_rate": 8.37702785953864e-05, "loss": 0.9723, "step": 12606 }, { "epoch": 0.8541906633240735, "grad_norm": 6.898577690124512, "learning_rate": 8.376890957628858e-05, "loss": 0.6861, "step": 12607 }, { "epoch": 0.8542584185920455, "grad_norm": 6.174037933349609, "learning_rate": 8.376754055719078e-05, "loss": 0.8906, "step": 12608 }, { "epoch": 0.8543261738600176, "grad_norm": 5.768240928649902, "learning_rate": 8.376617153809296e-05, "loss": 0.8712, "step": 12609 }, { "epoch": 0.8543939291279897, "grad_norm": 5.855865001678467, "learning_rate": 8.376480251899514e-05, "loss": 0.8333, "step": 12610 }, { "epoch": 0.8544616843959618, "grad_norm": 5.274192810058594, "learning_rate": 8.376343349989732e-05, "loss": 0.6786, "step": 12611 }, { "epoch": 0.8545294396639339, "grad_norm": 5.603989601135254, "learning_rate": 8.376206448079951e-05, "loss": 0.7428, "step": 12612 }, { "epoch": 0.854597194931906, "grad_norm": 4.8484883308410645, "learning_rate": 8.37606954617017e-05, "loss": 0.7456, "step": 12613 }, { "epoch": 0.8546649501998781, "grad_norm": 5.10874605178833, "learning_rate": 8.375932644260387e-05, "loss": 0.6326, "step": 12614 }, { "epoch": 0.8547327054678501, "grad_norm": 4.878294944763184, "learning_rate": 8.375795742350607e-05, "loss": 0.6428, "step": 12615 }, { "epoch": 0.8548004607358222, "grad_norm": 7.003088474273682, "learning_rate": 8.375658840440825e-05, "loss": 0.8246, "step": 12616 }, { "epoch": 0.8548682160037943, "grad_norm": 7.047825336456299, "learning_rate": 8.375521938531043e-05, "loss": 0.833, "step": 12617 }, { "epoch": 0.8549359712717663, "grad_norm": 9.071378707885742, "learning_rate": 8.375385036621262e-05, "loss": 0.8094, "step": 12618 }, { "epoch": 0.8550037265397384, "grad_norm": 5.345335960388184, "learning_rate": 8.37524813471148e-05, "loss": 0.79, "step": 12619 }, { "epoch": 0.8550714818077105, "grad_norm": 5.579035758972168, "learning_rate": 8.375111232801698e-05, "loss": 0.7526, "step": 12620 }, { "epoch": 0.8551392370756826, "grad_norm": 6.771716117858887, "learning_rate": 8.374974330891916e-05, "loss": 0.7558, "step": 12621 }, { "epoch": 0.8552069923436547, "grad_norm": 7.533701419830322, "learning_rate": 8.374837428982136e-05, "loss": 1.1432, "step": 12622 }, { "epoch": 0.8552747476116268, "grad_norm": 7.239196300506592, "learning_rate": 8.374700527072354e-05, "loss": 1.1609, "step": 12623 }, { "epoch": 0.8553425028795989, "grad_norm": 5.9439544677734375, "learning_rate": 8.374563625162572e-05, "loss": 0.8908, "step": 12624 }, { "epoch": 0.855410258147571, "grad_norm": 7.268623352050781, "learning_rate": 8.37442672325279e-05, "loss": 1.2071, "step": 12625 }, { "epoch": 0.8554780134155431, "grad_norm": 8.557291984558105, "learning_rate": 8.374289821343008e-05, "loss": 1.0076, "step": 12626 }, { "epoch": 0.8555457686835152, "grad_norm": 5.16387939453125, "learning_rate": 8.374152919433227e-05, "loss": 0.7862, "step": 12627 }, { "epoch": 0.8556135239514873, "grad_norm": 5.909477710723877, "learning_rate": 8.374016017523445e-05, "loss": 0.8143, "step": 12628 }, { "epoch": 0.8556812792194594, "grad_norm": 6.213109016418457, "learning_rate": 8.373879115613663e-05, "loss": 0.8753, "step": 12629 }, { "epoch": 0.8557490344874314, "grad_norm": 6.664327621459961, "learning_rate": 8.373742213703881e-05, "loss": 0.7626, "step": 12630 }, { "epoch": 0.8558167897554034, "grad_norm": 5.750275135040283, "learning_rate": 8.373605311794101e-05, "loss": 0.8285, "step": 12631 }, { "epoch": 0.8558845450233755, "grad_norm": 6.6249237060546875, "learning_rate": 8.373468409884319e-05, "loss": 0.9556, "step": 12632 }, { "epoch": 0.8559523002913476, "grad_norm": 5.466978549957275, "learning_rate": 8.373331507974537e-05, "loss": 0.9843, "step": 12633 }, { "epoch": 0.8560200555593197, "grad_norm": 7.178730487823486, "learning_rate": 8.373194606064755e-05, "loss": 0.9921, "step": 12634 }, { "epoch": 0.8560878108272918, "grad_norm": 5.387322902679443, "learning_rate": 8.373057704154973e-05, "loss": 0.8924, "step": 12635 }, { "epoch": 0.8561555660952639, "grad_norm": 6.278534889221191, "learning_rate": 8.372920802245192e-05, "loss": 1.05, "step": 12636 }, { "epoch": 0.856223321363236, "grad_norm": 6.567953586578369, "learning_rate": 8.37278390033541e-05, "loss": 0.9592, "step": 12637 }, { "epoch": 0.8562910766312081, "grad_norm": 4.848402976989746, "learning_rate": 8.372646998425628e-05, "loss": 0.7223, "step": 12638 }, { "epoch": 0.8563588318991802, "grad_norm": 5.792343616485596, "learning_rate": 8.372510096515846e-05, "loss": 0.8339, "step": 12639 }, { "epoch": 0.8564265871671523, "grad_norm": 7.278899669647217, "learning_rate": 8.372373194606064e-05, "loss": 0.9728, "step": 12640 }, { "epoch": 0.8564943424351243, "grad_norm": 6.241243839263916, "learning_rate": 8.372236292696284e-05, "loss": 0.6706, "step": 12641 }, { "epoch": 0.8565620977030964, "grad_norm": 4.546154022216797, "learning_rate": 8.372099390786502e-05, "loss": 0.6978, "step": 12642 }, { "epoch": 0.8566298529710685, "grad_norm": 5.440952777862549, "learning_rate": 8.37196248887672e-05, "loss": 0.5912, "step": 12643 }, { "epoch": 0.8566976082390406, "grad_norm": 5.9767656326293945, "learning_rate": 8.371825586966938e-05, "loss": 0.8739, "step": 12644 }, { "epoch": 0.8567653635070127, "grad_norm": 5.622544288635254, "learning_rate": 8.371688685057157e-05, "loss": 0.9851, "step": 12645 }, { "epoch": 0.8568331187749848, "grad_norm": 4.423412322998047, "learning_rate": 8.371551783147375e-05, "loss": 0.578, "step": 12646 }, { "epoch": 0.8569008740429569, "grad_norm": 7.680568695068359, "learning_rate": 8.371414881237593e-05, "loss": 0.8078, "step": 12647 }, { "epoch": 0.8569686293109289, "grad_norm": 5.702794075012207, "learning_rate": 8.371277979327811e-05, "loss": 0.734, "step": 12648 }, { "epoch": 0.857036384578901, "grad_norm": 6.302651882171631, "learning_rate": 8.37114107741803e-05, "loss": 0.7974, "step": 12649 }, { "epoch": 0.8571041398468731, "grad_norm": 6.76057243347168, "learning_rate": 8.371004175508249e-05, "loss": 0.8451, "step": 12650 }, { "epoch": 0.8571718951148452, "grad_norm": 5.101822376251221, "learning_rate": 8.370867273598467e-05, "loss": 0.7528, "step": 12651 }, { "epoch": 0.8572396503828172, "grad_norm": 5.3127641677856445, "learning_rate": 8.370730371688685e-05, "loss": 0.7881, "step": 12652 }, { "epoch": 0.8573074056507893, "grad_norm": 6.015966892242432, "learning_rate": 8.370593469778903e-05, "loss": 0.6502, "step": 12653 }, { "epoch": 0.8573751609187614, "grad_norm": 4.573479175567627, "learning_rate": 8.370456567869122e-05, "loss": 0.5229, "step": 12654 }, { "epoch": 0.8574429161867335, "grad_norm": 7.478061676025391, "learning_rate": 8.37031966595934e-05, "loss": 1.1038, "step": 12655 }, { "epoch": 0.8575106714547056, "grad_norm": 6.543705940246582, "learning_rate": 8.370182764049558e-05, "loss": 0.733, "step": 12656 }, { "epoch": 0.8575784267226777, "grad_norm": 5.073485851287842, "learning_rate": 8.370045862139776e-05, "loss": 0.8401, "step": 12657 }, { "epoch": 0.8576461819906498, "grad_norm": 6.257830619812012, "learning_rate": 8.369908960229996e-05, "loss": 0.709, "step": 12658 }, { "epoch": 0.8577139372586219, "grad_norm": 7.744876384735107, "learning_rate": 8.369772058320214e-05, "loss": 0.7146, "step": 12659 }, { "epoch": 0.857781692526594, "grad_norm": 6.404613971710205, "learning_rate": 8.369635156410432e-05, "loss": 0.7723, "step": 12660 }, { "epoch": 0.8578494477945661, "grad_norm": 7.350032806396484, "learning_rate": 8.369498254500651e-05, "loss": 0.9622, "step": 12661 }, { "epoch": 0.8579172030625382, "grad_norm": 7.5004191398620605, "learning_rate": 8.369361352590869e-05, "loss": 0.8981, "step": 12662 }, { "epoch": 0.8579849583305103, "grad_norm": 6.804741382598877, "learning_rate": 8.369224450681087e-05, "loss": 0.7108, "step": 12663 }, { "epoch": 0.8580527135984822, "grad_norm": 6.440323829650879, "learning_rate": 8.369087548771307e-05, "loss": 0.7311, "step": 12664 }, { "epoch": 0.8581204688664543, "grad_norm": 8.581847190856934, "learning_rate": 8.368950646861525e-05, "loss": 0.9141, "step": 12665 }, { "epoch": 0.8581882241344264, "grad_norm": 9.157425880432129, "learning_rate": 8.368813744951743e-05, "loss": 0.8747, "step": 12666 }, { "epoch": 0.8582559794023985, "grad_norm": 5.952433109283447, "learning_rate": 8.368676843041961e-05, "loss": 0.6971, "step": 12667 }, { "epoch": 0.8583237346703706, "grad_norm": 7.463055610656738, "learning_rate": 8.36853994113218e-05, "loss": 0.8685, "step": 12668 }, { "epoch": 0.8583914899383427, "grad_norm": 6.422219753265381, "learning_rate": 8.368403039222398e-05, "loss": 0.7494, "step": 12669 }, { "epoch": 0.8584592452063148, "grad_norm": 7.621025085449219, "learning_rate": 8.368266137312616e-05, "loss": 0.7981, "step": 12670 }, { "epoch": 0.8585270004742869, "grad_norm": 5.8451690673828125, "learning_rate": 8.368129235402834e-05, "loss": 0.6134, "step": 12671 }, { "epoch": 0.858594755742259, "grad_norm": 6.098859786987305, "learning_rate": 8.367992333493052e-05, "loss": 0.7698, "step": 12672 }, { "epoch": 0.858662511010231, "grad_norm": 7.19790506362915, "learning_rate": 8.367855431583272e-05, "loss": 0.8783, "step": 12673 }, { "epoch": 0.8587302662782031, "grad_norm": 7.8366899490356445, "learning_rate": 8.36771852967349e-05, "loss": 0.8574, "step": 12674 }, { "epoch": 0.8587980215461752, "grad_norm": 6.070743083953857, "learning_rate": 8.367581627763708e-05, "loss": 0.9785, "step": 12675 }, { "epoch": 0.8588657768141473, "grad_norm": 7.112607002258301, "learning_rate": 8.367444725853926e-05, "loss": 0.8502, "step": 12676 }, { "epoch": 0.8589335320821194, "grad_norm": 5.572093963623047, "learning_rate": 8.367307823944145e-05, "loss": 0.9328, "step": 12677 }, { "epoch": 0.8590012873500915, "grad_norm": 5.674850940704346, "learning_rate": 8.367170922034363e-05, "loss": 0.8259, "step": 12678 }, { "epoch": 0.8590690426180636, "grad_norm": 5.8405890464782715, "learning_rate": 8.367034020124581e-05, "loss": 0.6563, "step": 12679 }, { "epoch": 0.8591367978860356, "grad_norm": 5.2222981452941895, "learning_rate": 8.366897118214799e-05, "loss": 0.7247, "step": 12680 }, { "epoch": 0.8592045531540077, "grad_norm": 5.854241847991943, "learning_rate": 8.366760216305017e-05, "loss": 0.5706, "step": 12681 }, { "epoch": 0.8592723084219798, "grad_norm": 6.589792251586914, "learning_rate": 8.366623314395237e-05, "loss": 0.6776, "step": 12682 }, { "epoch": 0.8593400636899519, "grad_norm": 5.009568691253662, "learning_rate": 8.366486412485455e-05, "loss": 0.7079, "step": 12683 }, { "epoch": 0.859407818957924, "grad_norm": 5.921725749969482, "learning_rate": 8.366349510575673e-05, "loss": 0.9658, "step": 12684 }, { "epoch": 0.859475574225896, "grad_norm": 7.070530414581299, "learning_rate": 8.366212608665891e-05, "loss": 0.7979, "step": 12685 }, { "epoch": 0.8595433294938681, "grad_norm": 8.296648979187012, "learning_rate": 8.36607570675611e-05, "loss": 0.8313, "step": 12686 }, { "epoch": 0.8596110847618402, "grad_norm": 5.253691673278809, "learning_rate": 8.365938804846328e-05, "loss": 0.7979, "step": 12687 }, { "epoch": 0.8596788400298123, "grad_norm": 6.794055461883545, "learning_rate": 8.365801902936546e-05, "loss": 0.8337, "step": 12688 }, { "epoch": 0.8597465952977844, "grad_norm": 5.086427211761475, "learning_rate": 8.365665001026764e-05, "loss": 0.4667, "step": 12689 }, { "epoch": 0.8598143505657565, "grad_norm": 5.407829761505127, "learning_rate": 8.365528099116982e-05, "loss": 0.7093, "step": 12690 }, { "epoch": 0.8598821058337286, "grad_norm": 5.971084117889404, "learning_rate": 8.365391197207202e-05, "loss": 1.0033, "step": 12691 }, { "epoch": 0.8599498611017007, "grad_norm": 5.477384090423584, "learning_rate": 8.36525429529742e-05, "loss": 0.5806, "step": 12692 }, { "epoch": 0.8600176163696728, "grad_norm": 7.340150833129883, "learning_rate": 8.365117393387638e-05, "loss": 0.7215, "step": 12693 }, { "epoch": 0.8600853716376449, "grad_norm": 5.252416133880615, "learning_rate": 8.364980491477856e-05, "loss": 0.7346, "step": 12694 }, { "epoch": 0.860153126905617, "grad_norm": 7.507197380065918, "learning_rate": 8.364843589568074e-05, "loss": 1.0633, "step": 12695 }, { "epoch": 0.860220882173589, "grad_norm": 4.731564998626709, "learning_rate": 8.364706687658293e-05, "loss": 0.5687, "step": 12696 }, { "epoch": 0.860288637441561, "grad_norm": 6.081350803375244, "learning_rate": 8.364569785748511e-05, "loss": 0.7199, "step": 12697 }, { "epoch": 0.8603563927095331, "grad_norm": 6.91575288772583, "learning_rate": 8.36443288383873e-05, "loss": 0.9717, "step": 12698 }, { "epoch": 0.8604241479775052, "grad_norm": 5.101013660430908, "learning_rate": 8.364295981928947e-05, "loss": 0.772, "step": 12699 }, { "epoch": 0.8604919032454773, "grad_norm": 6.185006141662598, "learning_rate": 8.364159080019167e-05, "loss": 0.601, "step": 12700 }, { "epoch": 0.8605596585134494, "grad_norm": 6.696321487426758, "learning_rate": 8.364022178109385e-05, "loss": 0.8923, "step": 12701 }, { "epoch": 0.8606274137814215, "grad_norm": 4.8003129959106445, "learning_rate": 8.363885276199603e-05, "loss": 0.656, "step": 12702 }, { "epoch": 0.8606951690493936, "grad_norm": 8.93622875213623, "learning_rate": 8.363748374289821e-05, "loss": 0.8062, "step": 12703 }, { "epoch": 0.8607629243173657, "grad_norm": 5.659854412078857, "learning_rate": 8.36361147238004e-05, "loss": 0.8201, "step": 12704 }, { "epoch": 0.8608306795853378, "grad_norm": 6.121464729309082, "learning_rate": 8.363474570470258e-05, "loss": 0.8329, "step": 12705 }, { "epoch": 0.8608984348533099, "grad_norm": 5.242605686187744, "learning_rate": 8.363337668560476e-05, "loss": 0.7151, "step": 12706 }, { "epoch": 0.860966190121282, "grad_norm": 5.1735382080078125, "learning_rate": 8.363200766650696e-05, "loss": 0.7501, "step": 12707 }, { "epoch": 0.861033945389254, "grad_norm": 7.21480655670166, "learning_rate": 8.363063864740914e-05, "loss": 0.9973, "step": 12708 }, { "epoch": 0.8611017006572261, "grad_norm": 6.435115814208984, "learning_rate": 8.362926962831132e-05, "loss": 0.7217, "step": 12709 }, { "epoch": 0.8611694559251982, "grad_norm": 6.879031658172607, "learning_rate": 8.362790060921351e-05, "loss": 0.9322, "step": 12710 }, { "epoch": 0.8612372111931703, "grad_norm": 5.913161277770996, "learning_rate": 8.362653159011569e-05, "loss": 0.6451, "step": 12711 }, { "epoch": 0.8613049664611424, "grad_norm": 4.3174920082092285, "learning_rate": 8.362516257101787e-05, "loss": 0.682, "step": 12712 }, { "epoch": 0.8613727217291144, "grad_norm": 5.966519832611084, "learning_rate": 8.362379355192005e-05, "loss": 0.6362, "step": 12713 }, { "epoch": 0.8614404769970865, "grad_norm": 6.3389410972595215, "learning_rate": 8.362242453282225e-05, "loss": 0.7318, "step": 12714 }, { "epoch": 0.8615082322650586, "grad_norm": 8.78852653503418, "learning_rate": 8.362105551372443e-05, "loss": 0.7471, "step": 12715 }, { "epoch": 0.8615759875330307, "grad_norm": 5.232541084289551, "learning_rate": 8.361968649462661e-05, "loss": 0.7463, "step": 12716 }, { "epoch": 0.8616437428010028, "grad_norm": 6.598004341125488, "learning_rate": 8.361831747552879e-05, "loss": 0.7651, "step": 12717 }, { "epoch": 0.8617114980689748, "grad_norm": 6.944410800933838, "learning_rate": 8.361694845643097e-05, "loss": 0.8616, "step": 12718 }, { "epoch": 0.8617792533369469, "grad_norm": 5.001755714416504, "learning_rate": 8.361557943733316e-05, "loss": 0.6435, "step": 12719 }, { "epoch": 0.861847008604919, "grad_norm": 5.759768486022949, "learning_rate": 8.361421041823534e-05, "loss": 0.8236, "step": 12720 }, { "epoch": 0.8619147638728911, "grad_norm": 7.86954927444458, "learning_rate": 8.361284139913752e-05, "loss": 0.9191, "step": 12721 }, { "epoch": 0.8619825191408632, "grad_norm": 7.7088541984558105, "learning_rate": 8.36114723800397e-05, "loss": 0.874, "step": 12722 }, { "epoch": 0.8620502744088353, "grad_norm": 5.526132583618164, "learning_rate": 8.36101033609419e-05, "loss": 0.8083, "step": 12723 }, { "epoch": 0.8621180296768074, "grad_norm": 6.153379440307617, "learning_rate": 8.360873434184408e-05, "loss": 0.8097, "step": 12724 }, { "epoch": 0.8621857849447795, "grad_norm": 6.498526573181152, "learning_rate": 8.360736532274626e-05, "loss": 0.8441, "step": 12725 }, { "epoch": 0.8622535402127516, "grad_norm": 6.869676113128662, "learning_rate": 8.360599630364844e-05, "loss": 0.8353, "step": 12726 }, { "epoch": 0.8623212954807237, "grad_norm": 5.733700275421143, "learning_rate": 8.360462728455062e-05, "loss": 0.6627, "step": 12727 }, { "epoch": 0.8623890507486958, "grad_norm": 5.4007415771484375, "learning_rate": 8.360325826545281e-05, "loss": 0.5945, "step": 12728 }, { "epoch": 0.8624568060166677, "grad_norm": 5.028225898742676, "learning_rate": 8.360188924635499e-05, "loss": 0.7625, "step": 12729 }, { "epoch": 0.8625245612846398, "grad_norm": 6.950478553771973, "learning_rate": 8.360052022725717e-05, "loss": 0.9458, "step": 12730 }, { "epoch": 0.8625923165526119, "grad_norm": 5.532278060913086, "learning_rate": 8.359915120815935e-05, "loss": 0.7427, "step": 12731 }, { "epoch": 0.862660071820584, "grad_norm": 6.450150966644287, "learning_rate": 8.359778218906155e-05, "loss": 0.7621, "step": 12732 }, { "epoch": 0.8627278270885561, "grad_norm": 8.56269645690918, "learning_rate": 8.359641316996373e-05, "loss": 1.0904, "step": 12733 }, { "epoch": 0.8627955823565282, "grad_norm": 5.204941272735596, "learning_rate": 8.359504415086591e-05, "loss": 0.6651, "step": 12734 }, { "epoch": 0.8628633376245003, "grad_norm": 6.284374237060547, "learning_rate": 8.359367513176809e-05, "loss": 0.7062, "step": 12735 }, { "epoch": 0.8629310928924724, "grad_norm": 5.432925224304199, "learning_rate": 8.359230611267027e-05, "loss": 0.7484, "step": 12736 }, { "epoch": 0.8629988481604445, "grad_norm": 8.31678581237793, "learning_rate": 8.359093709357246e-05, "loss": 0.7803, "step": 12737 }, { "epoch": 0.8630666034284166, "grad_norm": 5.666910171508789, "learning_rate": 8.358956807447464e-05, "loss": 0.6977, "step": 12738 }, { "epoch": 0.8631343586963887, "grad_norm": 5.904063701629639, "learning_rate": 8.358819905537682e-05, "loss": 0.9111, "step": 12739 }, { "epoch": 0.8632021139643608, "grad_norm": 5.75467586517334, "learning_rate": 8.3586830036279e-05, "loss": 0.8442, "step": 12740 }, { "epoch": 0.8632698692323328, "grad_norm": 6.118785381317139, "learning_rate": 8.35854610171812e-05, "loss": 0.8948, "step": 12741 }, { "epoch": 0.8633376245003049, "grad_norm": 6.068502426147461, "learning_rate": 8.358409199808338e-05, "loss": 0.6608, "step": 12742 }, { "epoch": 0.863405379768277, "grad_norm": 6.425013542175293, "learning_rate": 8.358272297898556e-05, "loss": 0.6886, "step": 12743 }, { "epoch": 0.8634731350362491, "grad_norm": 6.034631729125977, "learning_rate": 8.358135395988774e-05, "loss": 0.7105, "step": 12744 }, { "epoch": 0.8635408903042211, "grad_norm": 5.688156604766846, "learning_rate": 8.357998494078992e-05, "loss": 0.7705, "step": 12745 }, { "epoch": 0.8636086455721932, "grad_norm": 6.057868480682373, "learning_rate": 8.357861592169211e-05, "loss": 0.7228, "step": 12746 }, { "epoch": 0.8636764008401653, "grad_norm": 9.307186126708984, "learning_rate": 8.357724690259429e-05, "loss": 0.8587, "step": 12747 }, { "epoch": 0.8637441561081374, "grad_norm": 4.899465560913086, "learning_rate": 8.357587788349647e-05, "loss": 0.78, "step": 12748 }, { "epoch": 0.8638119113761095, "grad_norm": 5.833337783813477, "learning_rate": 8.357450886439865e-05, "loss": 0.8024, "step": 12749 }, { "epoch": 0.8638796666440816, "grad_norm": 5.353335380554199, "learning_rate": 8.357313984530085e-05, "loss": 0.9984, "step": 12750 }, { "epoch": 0.8639474219120536, "grad_norm": 5.963881969451904, "learning_rate": 8.357177082620303e-05, "loss": 0.6835, "step": 12751 }, { "epoch": 0.8640151771800257, "grad_norm": 5.2977166175842285, "learning_rate": 8.357040180710521e-05, "loss": 0.8039, "step": 12752 }, { "epoch": 0.8640829324479978, "grad_norm": 5.586292743682861, "learning_rate": 8.35690327880074e-05, "loss": 0.8733, "step": 12753 }, { "epoch": 0.8641506877159699, "grad_norm": 8.01198959350586, "learning_rate": 8.356766376890958e-05, "loss": 1.0226, "step": 12754 }, { "epoch": 0.864218442983942, "grad_norm": 5.4859089851379395, "learning_rate": 8.356629474981176e-05, "loss": 0.9956, "step": 12755 }, { "epoch": 0.8642861982519141, "grad_norm": 6.660008907318115, "learning_rate": 8.356492573071396e-05, "loss": 0.8324, "step": 12756 }, { "epoch": 0.8643539535198862, "grad_norm": 4.699826717376709, "learning_rate": 8.356355671161614e-05, "loss": 0.6846, "step": 12757 }, { "epoch": 0.8644217087878583, "grad_norm": 5.154027938842773, "learning_rate": 8.356218769251832e-05, "loss": 0.7131, "step": 12758 }, { "epoch": 0.8644894640558304, "grad_norm": 6.356171131134033, "learning_rate": 8.35608186734205e-05, "loss": 0.9622, "step": 12759 }, { "epoch": 0.8645572193238025, "grad_norm": 4.833785057067871, "learning_rate": 8.355944965432269e-05, "loss": 0.7694, "step": 12760 }, { "epoch": 0.8646249745917746, "grad_norm": 5.7593889236450195, "learning_rate": 8.355808063522487e-05, "loss": 0.5833, "step": 12761 }, { "epoch": 0.8646927298597465, "grad_norm": 5.679107189178467, "learning_rate": 8.355671161612705e-05, "loss": 0.7964, "step": 12762 }, { "epoch": 0.8647604851277186, "grad_norm": 7.541195869445801, "learning_rate": 8.355534259702923e-05, "loss": 0.7552, "step": 12763 }, { "epoch": 0.8648282403956907, "grad_norm": 7.550341606140137, "learning_rate": 8.355397357793143e-05, "loss": 0.9926, "step": 12764 }, { "epoch": 0.8648959956636628, "grad_norm": 5.840928554534912, "learning_rate": 8.35526045588336e-05, "loss": 0.7363, "step": 12765 }, { "epoch": 0.8649637509316349, "grad_norm": 5.607713222503662, "learning_rate": 8.355123553973579e-05, "loss": 0.7449, "step": 12766 }, { "epoch": 0.865031506199607, "grad_norm": 4.359575271606445, "learning_rate": 8.354986652063797e-05, "loss": 0.7983, "step": 12767 }, { "epoch": 0.8650992614675791, "grad_norm": 5.7104363441467285, "learning_rate": 8.354849750154015e-05, "loss": 0.7692, "step": 12768 }, { "epoch": 0.8651670167355512, "grad_norm": 6.246327877044678, "learning_rate": 8.354712848244234e-05, "loss": 0.8406, "step": 12769 }, { "epoch": 0.8652347720035233, "grad_norm": 5.536504745483398, "learning_rate": 8.354575946334452e-05, "loss": 0.8198, "step": 12770 }, { "epoch": 0.8653025272714954, "grad_norm": 6.570160388946533, "learning_rate": 8.35443904442467e-05, "loss": 0.4944, "step": 12771 }, { "epoch": 0.8653702825394675, "grad_norm": 4.442549705505371, "learning_rate": 8.354302142514888e-05, "loss": 0.6964, "step": 12772 }, { "epoch": 0.8654380378074396, "grad_norm": 5.418501853942871, "learning_rate": 8.354165240605106e-05, "loss": 0.7211, "step": 12773 }, { "epoch": 0.8655057930754116, "grad_norm": 8.48850154876709, "learning_rate": 8.354028338695326e-05, "loss": 0.7467, "step": 12774 }, { "epoch": 0.8655735483433837, "grad_norm": 7.069798946380615, "learning_rate": 8.353891436785544e-05, "loss": 0.6164, "step": 12775 }, { "epoch": 0.8656413036113558, "grad_norm": 9.349570274353027, "learning_rate": 8.353754534875762e-05, "loss": 0.7237, "step": 12776 }, { "epoch": 0.8657090588793279, "grad_norm": 6.17117977142334, "learning_rate": 8.35361763296598e-05, "loss": 0.9118, "step": 12777 }, { "epoch": 0.8657768141472999, "grad_norm": 6.606432914733887, "learning_rate": 8.353480731056199e-05, "loss": 0.7991, "step": 12778 }, { "epoch": 0.865844569415272, "grad_norm": 8.163229942321777, "learning_rate": 8.353343829146417e-05, "loss": 1.0244, "step": 12779 }, { "epoch": 0.8659123246832441, "grad_norm": 8.662758827209473, "learning_rate": 8.353206927236635e-05, "loss": 0.9709, "step": 12780 }, { "epoch": 0.8659800799512162, "grad_norm": 5.596740245819092, "learning_rate": 8.353070025326853e-05, "loss": 0.6034, "step": 12781 }, { "epoch": 0.8660478352191883, "grad_norm": 6.353254795074463, "learning_rate": 8.352933123417071e-05, "loss": 0.8139, "step": 12782 }, { "epoch": 0.8661155904871604, "grad_norm": 6.522273540496826, "learning_rate": 8.352796221507291e-05, "loss": 0.7098, "step": 12783 }, { "epoch": 0.8661833457551324, "grad_norm": 6.948729038238525, "learning_rate": 8.352659319597509e-05, "loss": 0.8503, "step": 12784 }, { "epoch": 0.8662511010231045, "grad_norm": 4.999991416931152, "learning_rate": 8.352522417687727e-05, "loss": 0.8581, "step": 12785 }, { "epoch": 0.8663188562910766, "grad_norm": 5.613487243652344, "learning_rate": 8.352385515777945e-05, "loss": 0.7931, "step": 12786 }, { "epoch": 0.8663866115590487, "grad_norm": 4.850801944732666, "learning_rate": 8.352248613868164e-05, "loss": 0.8297, "step": 12787 }, { "epoch": 0.8664543668270208, "grad_norm": 6.223756313323975, "learning_rate": 8.352111711958382e-05, "loss": 0.7722, "step": 12788 }, { "epoch": 0.8665221220949929, "grad_norm": 6.001883029937744, "learning_rate": 8.3519748100486e-05, "loss": 0.7325, "step": 12789 }, { "epoch": 0.866589877362965, "grad_norm": 6.25042724609375, "learning_rate": 8.351837908138818e-05, "loss": 0.5647, "step": 12790 }, { "epoch": 0.8666576326309371, "grad_norm": 5.111363410949707, "learning_rate": 8.351701006229036e-05, "loss": 0.6094, "step": 12791 }, { "epoch": 0.8667253878989092, "grad_norm": 6.161365985870361, "learning_rate": 8.351564104319256e-05, "loss": 0.9958, "step": 12792 }, { "epoch": 0.8667931431668813, "grad_norm": 7.109573841094971, "learning_rate": 8.351427202409474e-05, "loss": 0.8822, "step": 12793 }, { "epoch": 0.8668608984348533, "grad_norm": 5.816695690155029, "learning_rate": 8.351290300499692e-05, "loss": 0.7643, "step": 12794 }, { "epoch": 0.8669286537028253, "grad_norm": 6.266007423400879, "learning_rate": 8.35115339858991e-05, "loss": 0.6835, "step": 12795 }, { "epoch": 0.8669964089707974, "grad_norm": 5.289031505584717, "learning_rate": 8.351016496680129e-05, "loss": 0.6873, "step": 12796 }, { "epoch": 0.8670641642387695, "grad_norm": 6.6583251953125, "learning_rate": 8.350879594770347e-05, "loss": 0.9201, "step": 12797 }, { "epoch": 0.8671319195067416, "grad_norm": 8.268150329589844, "learning_rate": 8.350742692860565e-05, "loss": 1.0113, "step": 12798 }, { "epoch": 0.8671996747747137, "grad_norm": 5.9207377433776855, "learning_rate": 8.350605790950783e-05, "loss": 0.8132, "step": 12799 }, { "epoch": 0.8672674300426858, "grad_norm": 7.960853099822998, "learning_rate": 8.350468889041003e-05, "loss": 0.8865, "step": 12800 }, { "epoch": 0.8673351853106579, "grad_norm": 7.3755364418029785, "learning_rate": 8.350331987131221e-05, "loss": 1.0102, "step": 12801 }, { "epoch": 0.86740294057863, "grad_norm": 5.826414108276367, "learning_rate": 8.35019508522144e-05, "loss": 0.6119, "step": 12802 }, { "epoch": 0.8674706958466021, "grad_norm": 4.6021857261657715, "learning_rate": 8.350058183311658e-05, "loss": 0.7426, "step": 12803 }, { "epoch": 0.8675384511145742, "grad_norm": 6.744377136230469, "learning_rate": 8.349921281401876e-05, "loss": 0.6373, "step": 12804 }, { "epoch": 0.8676062063825463, "grad_norm": 5.894944190979004, "learning_rate": 8.349784379492094e-05, "loss": 0.8591, "step": 12805 }, { "epoch": 0.8676739616505184, "grad_norm": 6.757943630218506, "learning_rate": 8.349647477582314e-05, "loss": 0.8015, "step": 12806 }, { "epoch": 0.8677417169184904, "grad_norm": 4.994389057159424, "learning_rate": 8.349510575672532e-05, "loss": 0.8663, "step": 12807 }, { "epoch": 0.8678094721864625, "grad_norm": 6.01758337020874, "learning_rate": 8.34937367376275e-05, "loss": 0.8132, "step": 12808 }, { "epoch": 0.8678772274544346, "grad_norm": 5.205808639526367, "learning_rate": 8.349236771852968e-05, "loss": 0.7275, "step": 12809 }, { "epoch": 0.8679449827224067, "grad_norm": 6.259244441986084, "learning_rate": 8.349099869943187e-05, "loss": 0.9726, "step": 12810 }, { "epoch": 0.8680127379903787, "grad_norm": 9.059528350830078, "learning_rate": 8.348962968033405e-05, "loss": 0.9158, "step": 12811 }, { "epoch": 0.8680804932583508, "grad_norm": 4.859555244445801, "learning_rate": 8.348826066123623e-05, "loss": 1.0459, "step": 12812 }, { "epoch": 0.8681482485263229, "grad_norm": 5.262827396392822, "learning_rate": 8.348689164213841e-05, "loss": 0.6795, "step": 12813 }, { "epoch": 0.868216003794295, "grad_norm": 7.7913818359375, "learning_rate": 8.348552262304059e-05, "loss": 0.9334, "step": 12814 }, { "epoch": 0.8682837590622671, "grad_norm": 6.01145601272583, "learning_rate": 8.348415360394279e-05, "loss": 0.8221, "step": 12815 }, { "epoch": 0.8683515143302392, "grad_norm": 6.34686279296875, "learning_rate": 8.348278458484497e-05, "loss": 0.7442, "step": 12816 }, { "epoch": 0.8684192695982113, "grad_norm": 6.925653457641602, "learning_rate": 8.348141556574715e-05, "loss": 0.9746, "step": 12817 }, { "epoch": 0.8684870248661833, "grad_norm": 5.890630722045898, "learning_rate": 8.348004654664933e-05, "loss": 0.8322, "step": 12818 }, { "epoch": 0.8685547801341554, "grad_norm": 5.628925800323486, "learning_rate": 8.347867752755152e-05, "loss": 0.7154, "step": 12819 }, { "epoch": 0.8686225354021275, "grad_norm": 5.708745956420898, "learning_rate": 8.34773085084537e-05, "loss": 0.5718, "step": 12820 }, { "epoch": 0.8686902906700996, "grad_norm": 5.534167289733887, "learning_rate": 8.347593948935588e-05, "loss": 0.84, "step": 12821 }, { "epoch": 0.8687580459380717, "grad_norm": 6.858389854431152, "learning_rate": 8.347457047025806e-05, "loss": 0.7687, "step": 12822 }, { "epoch": 0.8688258012060438, "grad_norm": 5.650959491729736, "learning_rate": 8.347320145116024e-05, "loss": 0.8645, "step": 12823 }, { "epoch": 0.8688935564740159, "grad_norm": 5.0962324142456055, "learning_rate": 8.347183243206244e-05, "loss": 0.6123, "step": 12824 }, { "epoch": 0.868961311741988, "grad_norm": 7.235836029052734, "learning_rate": 8.347046341296462e-05, "loss": 0.831, "step": 12825 }, { "epoch": 0.8690290670099601, "grad_norm": 5.751955986022949, "learning_rate": 8.34690943938668e-05, "loss": 0.6362, "step": 12826 }, { "epoch": 0.8690968222779321, "grad_norm": 6.159754276275635, "learning_rate": 8.346772537476898e-05, "loss": 0.6077, "step": 12827 }, { "epoch": 0.8691645775459041, "grad_norm": 5.868882179260254, "learning_rate": 8.346635635567116e-05, "loss": 0.6687, "step": 12828 }, { "epoch": 0.8692323328138762, "grad_norm": 6.147858142852783, "learning_rate": 8.346498733657335e-05, "loss": 0.7494, "step": 12829 }, { "epoch": 0.8693000880818483, "grad_norm": 7.173940181732178, "learning_rate": 8.346361831747553e-05, "loss": 0.906, "step": 12830 }, { "epoch": 0.8693678433498204, "grad_norm": 6.0628814697265625, "learning_rate": 8.346224929837771e-05, "loss": 0.6627, "step": 12831 }, { "epoch": 0.8694355986177925, "grad_norm": 5.7082343101501465, "learning_rate": 8.346088027927989e-05, "loss": 0.7305, "step": 12832 }, { "epoch": 0.8695033538857646, "grad_norm": 7.864633083343506, "learning_rate": 8.345951126018209e-05, "loss": 0.7189, "step": 12833 }, { "epoch": 0.8695711091537367, "grad_norm": 6.37849235534668, "learning_rate": 8.345814224108427e-05, "loss": 0.6766, "step": 12834 }, { "epoch": 0.8696388644217088, "grad_norm": 5.001320838928223, "learning_rate": 8.345677322198645e-05, "loss": 0.6396, "step": 12835 }, { "epoch": 0.8697066196896809, "grad_norm": 5.3315606117248535, "learning_rate": 8.345540420288863e-05, "loss": 0.7312, "step": 12836 }, { "epoch": 0.869774374957653, "grad_norm": 8.665613174438477, "learning_rate": 8.345403518379081e-05, "loss": 0.7123, "step": 12837 }, { "epoch": 0.8698421302256251, "grad_norm": 6.103067874908447, "learning_rate": 8.3452666164693e-05, "loss": 0.6624, "step": 12838 }, { "epoch": 0.8699098854935972, "grad_norm": 5.679276943206787, "learning_rate": 8.345129714559518e-05, "loss": 0.8496, "step": 12839 }, { "epoch": 0.8699776407615692, "grad_norm": 5.381316184997559, "learning_rate": 8.344992812649736e-05, "loss": 0.5979, "step": 12840 }, { "epoch": 0.8700453960295413, "grad_norm": 7.85809326171875, "learning_rate": 8.344855910739954e-05, "loss": 0.934, "step": 12841 }, { "epoch": 0.8701131512975134, "grad_norm": 5.871927738189697, "learning_rate": 8.344719008830174e-05, "loss": 0.8423, "step": 12842 }, { "epoch": 0.8701809065654854, "grad_norm": 7.4773268699646, "learning_rate": 8.344582106920392e-05, "loss": 0.9766, "step": 12843 }, { "epoch": 0.8702486618334575, "grad_norm": 5.408422946929932, "learning_rate": 8.34444520501061e-05, "loss": 0.8514, "step": 12844 }, { "epoch": 0.8703164171014296, "grad_norm": 5.86200475692749, "learning_rate": 8.344308303100828e-05, "loss": 0.7288, "step": 12845 }, { "epoch": 0.8703841723694017, "grad_norm": 5.635140419006348, "learning_rate": 8.344171401191047e-05, "loss": 0.7645, "step": 12846 }, { "epoch": 0.8704519276373738, "grad_norm": 4.9930524826049805, "learning_rate": 8.344034499281265e-05, "loss": 0.6171, "step": 12847 }, { "epoch": 0.8705196829053459, "grad_norm": 7.424667835235596, "learning_rate": 8.343897597371483e-05, "loss": 0.6401, "step": 12848 }, { "epoch": 0.870587438173318, "grad_norm": 6.124965667724609, "learning_rate": 8.343760695461703e-05, "loss": 0.8959, "step": 12849 }, { "epoch": 0.87065519344129, "grad_norm": 7.060937881469727, "learning_rate": 8.34362379355192e-05, "loss": 0.9213, "step": 12850 }, { "epoch": 0.8707229487092621, "grad_norm": 5.401867389678955, "learning_rate": 8.343486891642139e-05, "loss": 0.6287, "step": 12851 }, { "epoch": 0.8707907039772342, "grad_norm": 6.958117485046387, "learning_rate": 8.343349989732358e-05, "loss": 0.9408, "step": 12852 }, { "epoch": 0.8708584592452063, "grad_norm": 6.046300888061523, "learning_rate": 8.343213087822576e-05, "loss": 0.875, "step": 12853 }, { "epoch": 0.8709262145131784, "grad_norm": 6.581618785858154, "learning_rate": 8.343076185912794e-05, "loss": 0.9302, "step": 12854 }, { "epoch": 0.8709939697811505, "grad_norm": 6.795581817626953, "learning_rate": 8.342939284003012e-05, "loss": 0.8144, "step": 12855 }, { "epoch": 0.8710617250491226, "grad_norm": 7.541666030883789, "learning_rate": 8.342802382093232e-05, "loss": 0.7349, "step": 12856 }, { "epoch": 0.8711294803170947, "grad_norm": 7.175131320953369, "learning_rate": 8.34266548018345e-05, "loss": 0.9659, "step": 12857 }, { "epoch": 0.8711972355850668, "grad_norm": 4.722870349884033, "learning_rate": 8.342528578273668e-05, "loss": 0.819, "step": 12858 }, { "epoch": 0.8712649908530389, "grad_norm": 6.122445106506348, "learning_rate": 8.342391676363886e-05, "loss": 0.839, "step": 12859 }, { "epoch": 0.8713327461210109, "grad_norm": 6.808223724365234, "learning_rate": 8.342254774454104e-05, "loss": 0.8034, "step": 12860 }, { "epoch": 0.871400501388983, "grad_norm": 5.828917026519775, "learning_rate": 8.342117872544323e-05, "loss": 0.6161, "step": 12861 }, { "epoch": 0.871468256656955, "grad_norm": 5.570794582366943, "learning_rate": 8.341980970634541e-05, "loss": 0.738, "step": 12862 }, { "epoch": 0.8715360119249271, "grad_norm": 8.307096481323242, "learning_rate": 8.341844068724759e-05, "loss": 0.6682, "step": 12863 }, { "epoch": 0.8716037671928992, "grad_norm": 5.596463203430176, "learning_rate": 8.341707166814977e-05, "loss": 0.7721, "step": 12864 }, { "epoch": 0.8716715224608713, "grad_norm": 6.285654067993164, "learning_rate": 8.341570264905197e-05, "loss": 0.8688, "step": 12865 }, { "epoch": 0.8717392777288434, "grad_norm": 5.791274070739746, "learning_rate": 8.341433362995415e-05, "loss": 0.6696, "step": 12866 }, { "epoch": 0.8718070329968155, "grad_norm": 6.1828413009643555, "learning_rate": 8.341296461085633e-05, "loss": 0.6573, "step": 12867 }, { "epoch": 0.8718747882647876, "grad_norm": 6.662983417510986, "learning_rate": 8.341159559175851e-05, "loss": 1.0418, "step": 12868 }, { "epoch": 0.8719425435327597, "grad_norm": 6.25025749206543, "learning_rate": 8.341022657266069e-05, "loss": 0.8527, "step": 12869 }, { "epoch": 0.8720102988007318, "grad_norm": 5.201798915863037, "learning_rate": 8.340885755356288e-05, "loss": 1.0302, "step": 12870 }, { "epoch": 0.8720780540687039, "grad_norm": 7.105745315551758, "learning_rate": 8.340748853446506e-05, "loss": 0.8022, "step": 12871 }, { "epoch": 0.872145809336676, "grad_norm": 5.172214031219482, "learning_rate": 8.340611951536724e-05, "loss": 0.7184, "step": 12872 }, { "epoch": 0.872213564604648, "grad_norm": 4.862717151641846, "learning_rate": 8.340475049626942e-05, "loss": 0.5587, "step": 12873 }, { "epoch": 0.8722813198726201, "grad_norm": 5.38953971862793, "learning_rate": 8.340338147717162e-05, "loss": 0.7139, "step": 12874 }, { "epoch": 0.8723490751405922, "grad_norm": 6.497892379760742, "learning_rate": 8.34020124580738e-05, "loss": 0.7788, "step": 12875 }, { "epoch": 0.8724168304085642, "grad_norm": 5.505675792694092, "learning_rate": 8.340064343897598e-05, "loss": 0.8322, "step": 12876 }, { "epoch": 0.8724845856765363, "grad_norm": 6.221973419189453, "learning_rate": 8.339927441987816e-05, "loss": 0.9316, "step": 12877 }, { "epoch": 0.8725523409445084, "grad_norm": 5.295744895935059, "learning_rate": 8.339790540078034e-05, "loss": 0.8424, "step": 12878 }, { "epoch": 0.8726200962124805, "grad_norm": 8.020045280456543, "learning_rate": 8.339653638168253e-05, "loss": 0.7412, "step": 12879 }, { "epoch": 0.8726878514804526, "grad_norm": 7.37377405166626, "learning_rate": 8.339516736258471e-05, "loss": 0.6915, "step": 12880 }, { "epoch": 0.8727556067484247, "grad_norm": 4.970482349395752, "learning_rate": 8.339379834348689e-05, "loss": 0.6638, "step": 12881 }, { "epoch": 0.8728233620163968, "grad_norm": 5.549528121948242, "learning_rate": 8.339242932438907e-05, "loss": 0.6753, "step": 12882 }, { "epoch": 0.8728911172843689, "grad_norm": 6.604176044464111, "learning_rate": 8.339106030529125e-05, "loss": 0.8787, "step": 12883 }, { "epoch": 0.872958872552341, "grad_norm": 5.274704933166504, "learning_rate": 8.338969128619345e-05, "loss": 0.7042, "step": 12884 }, { "epoch": 0.873026627820313, "grad_norm": 6.73942232131958, "learning_rate": 8.338832226709563e-05, "loss": 0.9337, "step": 12885 }, { "epoch": 0.8730943830882851, "grad_norm": 5.404533863067627, "learning_rate": 8.338695324799781e-05, "loss": 0.8108, "step": 12886 }, { "epoch": 0.8731621383562572, "grad_norm": 5.6985087394714355, "learning_rate": 8.338558422889999e-05, "loss": 0.8906, "step": 12887 }, { "epoch": 0.8732298936242293, "grad_norm": 6.844064235687256, "learning_rate": 8.338421520980218e-05, "loss": 0.8528, "step": 12888 }, { "epoch": 0.8732976488922014, "grad_norm": 7.021403789520264, "learning_rate": 8.338284619070436e-05, "loss": 0.8084, "step": 12889 }, { "epoch": 0.8733654041601735, "grad_norm": 7.886348247528076, "learning_rate": 8.338147717160654e-05, "loss": 0.6421, "step": 12890 }, { "epoch": 0.8734331594281456, "grad_norm": 6.461071968078613, "learning_rate": 8.338010815250872e-05, "loss": 0.7556, "step": 12891 }, { "epoch": 0.8735009146961176, "grad_norm": 5.232357501983643, "learning_rate": 8.337873913341092e-05, "loss": 0.7246, "step": 12892 }, { "epoch": 0.8735686699640897, "grad_norm": 4.585625648498535, "learning_rate": 8.33773701143131e-05, "loss": 0.6826, "step": 12893 }, { "epoch": 0.8736364252320618, "grad_norm": 5.381401538848877, "learning_rate": 8.337600109521528e-05, "loss": 0.8486, "step": 12894 }, { "epoch": 0.8737041805000338, "grad_norm": 6.673171043395996, "learning_rate": 8.337463207611747e-05, "loss": 0.6716, "step": 12895 }, { "epoch": 0.8737719357680059, "grad_norm": 9.136744499206543, "learning_rate": 8.337326305701965e-05, "loss": 1.0048, "step": 12896 }, { "epoch": 0.873839691035978, "grad_norm": 7.344038963317871, "learning_rate": 8.337189403792183e-05, "loss": 1.0041, "step": 12897 }, { "epoch": 0.8739074463039501, "grad_norm": 5.5072832107543945, "learning_rate": 8.337052501882403e-05, "loss": 0.6608, "step": 12898 }, { "epoch": 0.8739752015719222, "grad_norm": 6.74569845199585, "learning_rate": 8.33691559997262e-05, "loss": 0.9765, "step": 12899 }, { "epoch": 0.8740429568398943, "grad_norm": 6.700042247772217, "learning_rate": 8.336778698062839e-05, "loss": 0.8127, "step": 12900 }, { "epoch": 0.8741107121078664, "grad_norm": 5.775038242340088, "learning_rate": 8.336641796153057e-05, "loss": 0.8717, "step": 12901 }, { "epoch": 0.8741784673758385, "grad_norm": 6.800004005432129, "learning_rate": 8.336504894243276e-05, "loss": 0.9006, "step": 12902 }, { "epoch": 0.8742462226438106, "grad_norm": 6.40112829208374, "learning_rate": 8.336367992333494e-05, "loss": 1.0521, "step": 12903 }, { "epoch": 0.8743139779117827, "grad_norm": 8.33122444152832, "learning_rate": 8.336231090423712e-05, "loss": 0.8228, "step": 12904 }, { "epoch": 0.8743817331797548, "grad_norm": 6.429588317871094, "learning_rate": 8.33609418851393e-05, "loss": 0.8859, "step": 12905 }, { "epoch": 0.8744494884477269, "grad_norm": 6.242363452911377, "learning_rate": 8.335957286604148e-05, "loss": 0.7872, "step": 12906 }, { "epoch": 0.8745172437156989, "grad_norm": 4.990901470184326, "learning_rate": 8.335820384694368e-05, "loss": 0.6444, "step": 12907 }, { "epoch": 0.874584998983671, "grad_norm": 5.876582145690918, "learning_rate": 8.335683482784586e-05, "loss": 1.0041, "step": 12908 }, { "epoch": 0.874652754251643, "grad_norm": 5.543231964111328, "learning_rate": 8.335546580874804e-05, "loss": 0.658, "step": 12909 }, { "epoch": 0.8747205095196151, "grad_norm": 6.464064598083496, "learning_rate": 8.335409678965022e-05, "loss": 0.642, "step": 12910 }, { "epoch": 0.8747882647875872, "grad_norm": 6.268253803253174, "learning_rate": 8.335272777055241e-05, "loss": 0.9617, "step": 12911 }, { "epoch": 0.8748560200555593, "grad_norm": 6.467350482940674, "learning_rate": 8.335135875145459e-05, "loss": 0.7592, "step": 12912 }, { "epoch": 0.8749237753235314, "grad_norm": 5.470207214355469, "learning_rate": 8.334998973235677e-05, "loss": 0.7742, "step": 12913 }, { "epoch": 0.8749915305915035, "grad_norm": 6.371903419494629, "learning_rate": 8.334862071325895e-05, "loss": 0.6947, "step": 12914 }, { "epoch": 0.8750592858594756, "grad_norm": 4.678818702697754, "learning_rate": 8.334725169416113e-05, "loss": 0.6938, "step": 12915 }, { "epoch": 0.8751270411274477, "grad_norm": 9.119450569152832, "learning_rate": 8.334588267506333e-05, "loss": 1.0259, "step": 12916 }, { "epoch": 0.8751947963954197, "grad_norm": 5.258639335632324, "learning_rate": 8.33445136559655e-05, "loss": 0.9058, "step": 12917 }, { "epoch": 0.8752625516633918, "grad_norm": 6.689220905303955, "learning_rate": 8.334314463686769e-05, "loss": 0.9682, "step": 12918 }, { "epoch": 0.8753303069313639, "grad_norm": 5.658355712890625, "learning_rate": 8.334177561776987e-05, "loss": 0.7693, "step": 12919 }, { "epoch": 0.875398062199336, "grad_norm": 5.163204193115234, "learning_rate": 8.334040659867206e-05, "loss": 0.7801, "step": 12920 }, { "epoch": 0.8754658174673081, "grad_norm": 5.308339595794678, "learning_rate": 8.333903757957424e-05, "loss": 0.9244, "step": 12921 }, { "epoch": 0.8755335727352802, "grad_norm": 6.663207054138184, "learning_rate": 8.333766856047642e-05, "loss": 0.677, "step": 12922 }, { "epoch": 0.8756013280032523, "grad_norm": 5.615358829498291, "learning_rate": 8.33362995413786e-05, "loss": 0.7344, "step": 12923 }, { "epoch": 0.8756690832712244, "grad_norm": 6.694277763366699, "learning_rate": 8.333493052228078e-05, "loss": 0.8418, "step": 12924 }, { "epoch": 0.8757368385391964, "grad_norm": 7.067589282989502, "learning_rate": 8.333356150318298e-05, "loss": 0.8682, "step": 12925 }, { "epoch": 0.8758045938071685, "grad_norm": 6.071625709533691, "learning_rate": 8.333219248408516e-05, "loss": 1.0198, "step": 12926 }, { "epoch": 0.8758723490751406, "grad_norm": 5.014382362365723, "learning_rate": 8.333082346498734e-05, "loss": 0.8123, "step": 12927 }, { "epoch": 0.8759401043431126, "grad_norm": 7.893514156341553, "learning_rate": 8.332945444588952e-05, "loss": 0.7958, "step": 12928 }, { "epoch": 0.8760078596110847, "grad_norm": 9.142471313476562, "learning_rate": 8.332808542679171e-05, "loss": 0.9072, "step": 12929 }, { "epoch": 0.8760756148790568, "grad_norm": 4.87850284576416, "learning_rate": 8.332671640769389e-05, "loss": 0.6293, "step": 12930 }, { "epoch": 0.8761433701470289, "grad_norm": 6.533822536468506, "learning_rate": 8.332534738859607e-05, "loss": 0.6865, "step": 12931 }, { "epoch": 0.876211125415001, "grad_norm": 6.230561256408691, "learning_rate": 8.332397836949825e-05, "loss": 0.8655, "step": 12932 }, { "epoch": 0.8762788806829731, "grad_norm": 5.474959373474121, "learning_rate": 8.332260935040043e-05, "loss": 0.7077, "step": 12933 }, { "epoch": 0.8763466359509452, "grad_norm": 6.593677043914795, "learning_rate": 8.332124033130263e-05, "loss": 0.8323, "step": 12934 }, { "epoch": 0.8764143912189173, "grad_norm": 6.031239986419678, "learning_rate": 8.33198713122048e-05, "loss": 0.8379, "step": 12935 }, { "epoch": 0.8764821464868894, "grad_norm": 5.653693199157715, "learning_rate": 8.331850229310699e-05, "loss": 0.6913, "step": 12936 }, { "epoch": 0.8765499017548615, "grad_norm": 4.6549296379089355, "learning_rate": 8.331713327400917e-05, "loss": 0.7085, "step": 12937 }, { "epoch": 0.8766176570228336, "grad_norm": 7.875330448150635, "learning_rate": 8.331576425491136e-05, "loss": 0.7635, "step": 12938 }, { "epoch": 0.8766854122908057, "grad_norm": 6.341701030731201, "learning_rate": 8.331439523581354e-05, "loss": 1.2747, "step": 12939 }, { "epoch": 0.8767531675587777, "grad_norm": 5.573342323303223, "learning_rate": 8.331302621671572e-05, "loss": 0.8176, "step": 12940 }, { "epoch": 0.8768209228267497, "grad_norm": 6.964317321777344, "learning_rate": 8.331165719761792e-05, "loss": 0.8216, "step": 12941 }, { "epoch": 0.8768886780947218, "grad_norm": 5.422325611114502, "learning_rate": 8.33102881785201e-05, "loss": 0.6236, "step": 12942 }, { "epoch": 0.8769564333626939, "grad_norm": 6.422939300537109, "learning_rate": 8.330891915942228e-05, "loss": 1.0188, "step": 12943 }, { "epoch": 0.877024188630666, "grad_norm": 5.843238830566406, "learning_rate": 8.330755014032447e-05, "loss": 0.9579, "step": 12944 }, { "epoch": 0.8770919438986381, "grad_norm": 5.675114631652832, "learning_rate": 8.330618112122665e-05, "loss": 0.7776, "step": 12945 }, { "epoch": 0.8771596991666102, "grad_norm": 6.512584686279297, "learning_rate": 8.330481210212883e-05, "loss": 0.8351, "step": 12946 }, { "epoch": 0.8772274544345823, "grad_norm": 7.719078063964844, "learning_rate": 8.330344308303101e-05, "loss": 1.0703, "step": 12947 }, { "epoch": 0.8772952097025544, "grad_norm": 6.948828220367432, "learning_rate": 8.33020740639332e-05, "loss": 0.843, "step": 12948 }, { "epoch": 0.8773629649705265, "grad_norm": 5.105550289154053, "learning_rate": 8.330070504483539e-05, "loss": 0.7685, "step": 12949 }, { "epoch": 0.8774307202384986, "grad_norm": 4.994384288787842, "learning_rate": 8.329933602573757e-05, "loss": 0.6291, "step": 12950 }, { "epoch": 0.8774984755064706, "grad_norm": 5.846680164337158, "learning_rate": 8.329796700663975e-05, "loss": 0.7701, "step": 12951 }, { "epoch": 0.8775662307744427, "grad_norm": 8.621549606323242, "learning_rate": 8.329659798754194e-05, "loss": 0.7899, "step": 12952 }, { "epoch": 0.8776339860424148, "grad_norm": 5.434492588043213, "learning_rate": 8.329522896844412e-05, "loss": 0.7942, "step": 12953 }, { "epoch": 0.8777017413103869, "grad_norm": 8.437414169311523, "learning_rate": 8.32938599493463e-05, "loss": 0.8845, "step": 12954 }, { "epoch": 0.877769496578359, "grad_norm": 5.602294921875, "learning_rate": 8.329249093024848e-05, "loss": 0.7475, "step": 12955 }, { "epoch": 0.8778372518463311, "grad_norm": 6.249868392944336, "learning_rate": 8.329112191115066e-05, "loss": 0.6117, "step": 12956 }, { "epoch": 0.8779050071143031, "grad_norm": 6.123753547668457, "learning_rate": 8.328975289205286e-05, "loss": 0.996, "step": 12957 }, { "epoch": 0.8779727623822752, "grad_norm": 6.797160625457764, "learning_rate": 8.328838387295504e-05, "loss": 0.8562, "step": 12958 }, { "epoch": 0.8780405176502473, "grad_norm": 6.009333610534668, "learning_rate": 8.328701485385722e-05, "loss": 0.7396, "step": 12959 }, { "epoch": 0.8781082729182194, "grad_norm": 6.817856788635254, "learning_rate": 8.32856458347594e-05, "loss": 0.9193, "step": 12960 }, { "epoch": 0.8781760281861914, "grad_norm": 6.004026412963867, "learning_rate": 8.328427681566158e-05, "loss": 0.7822, "step": 12961 }, { "epoch": 0.8782437834541635, "grad_norm": 5.585984230041504, "learning_rate": 8.328290779656377e-05, "loss": 0.6767, "step": 12962 }, { "epoch": 0.8783115387221356, "grad_norm": 9.209588050842285, "learning_rate": 8.328153877746595e-05, "loss": 0.9714, "step": 12963 }, { "epoch": 0.8783792939901077, "grad_norm": 6.652541160583496, "learning_rate": 8.328016975836813e-05, "loss": 0.7528, "step": 12964 }, { "epoch": 0.8784470492580798, "grad_norm": 6.689975261688232, "learning_rate": 8.327880073927031e-05, "loss": 0.7516, "step": 12965 }, { "epoch": 0.8785148045260519, "grad_norm": 5.454050540924072, "learning_rate": 8.32774317201725e-05, "loss": 0.7394, "step": 12966 }, { "epoch": 0.878582559794024, "grad_norm": 4.421219348907471, "learning_rate": 8.327606270107469e-05, "loss": 0.7704, "step": 12967 }, { "epoch": 0.8786503150619961, "grad_norm": 4.957309246063232, "learning_rate": 8.327469368197687e-05, "loss": 0.7643, "step": 12968 }, { "epoch": 0.8787180703299682, "grad_norm": 5.908604621887207, "learning_rate": 8.327332466287905e-05, "loss": 0.9247, "step": 12969 }, { "epoch": 0.8787858255979403, "grad_norm": 7.434008598327637, "learning_rate": 8.327195564378123e-05, "loss": 0.9366, "step": 12970 }, { "epoch": 0.8788535808659124, "grad_norm": 7.337964057922363, "learning_rate": 8.327058662468342e-05, "loss": 0.94, "step": 12971 }, { "epoch": 0.8789213361338845, "grad_norm": 6.679739475250244, "learning_rate": 8.32692176055856e-05, "loss": 0.9922, "step": 12972 }, { "epoch": 0.8789890914018565, "grad_norm": 5.989971160888672, "learning_rate": 8.326784858648778e-05, "loss": 0.8581, "step": 12973 }, { "epoch": 0.8790568466698285, "grad_norm": 4.795865058898926, "learning_rate": 8.326647956738996e-05, "loss": 0.8417, "step": 12974 }, { "epoch": 0.8791246019378006, "grad_norm": 6.574687957763672, "learning_rate": 8.326511054829216e-05, "loss": 0.9009, "step": 12975 }, { "epoch": 0.8791923572057727, "grad_norm": 5.56545352935791, "learning_rate": 8.326374152919434e-05, "loss": 0.7627, "step": 12976 }, { "epoch": 0.8792601124737448, "grad_norm": 7.365011215209961, "learning_rate": 8.326237251009652e-05, "loss": 1.0212, "step": 12977 }, { "epoch": 0.8793278677417169, "grad_norm": 5.9187140464782715, "learning_rate": 8.32610034909987e-05, "loss": 0.804, "step": 12978 }, { "epoch": 0.879395623009689, "grad_norm": 5.450742721557617, "learning_rate": 8.325963447190088e-05, "loss": 0.7946, "step": 12979 }, { "epoch": 0.8794633782776611, "grad_norm": 5.892670154571533, "learning_rate": 8.325826545280307e-05, "loss": 0.7495, "step": 12980 }, { "epoch": 0.8795311335456332, "grad_norm": 5.977981090545654, "learning_rate": 8.325689643370525e-05, "loss": 0.789, "step": 12981 }, { "epoch": 0.8795988888136053, "grad_norm": 6.273918628692627, "learning_rate": 8.325552741460743e-05, "loss": 1.0261, "step": 12982 }, { "epoch": 0.8796666440815774, "grad_norm": 5.750874996185303, "learning_rate": 8.325415839550961e-05, "loss": 0.8008, "step": 12983 }, { "epoch": 0.8797343993495494, "grad_norm": 6.168051242828369, "learning_rate": 8.32527893764118e-05, "loss": 0.7802, "step": 12984 }, { "epoch": 0.8798021546175215, "grad_norm": 6.125532627105713, "learning_rate": 8.325142035731399e-05, "loss": 0.8281, "step": 12985 }, { "epoch": 0.8798699098854936, "grad_norm": 5.973419666290283, "learning_rate": 8.325005133821617e-05, "loss": 0.8108, "step": 12986 }, { "epoch": 0.8799376651534657, "grad_norm": 5.780817031860352, "learning_rate": 8.324868231911836e-05, "loss": 0.6535, "step": 12987 }, { "epoch": 0.8800054204214378, "grad_norm": 5.342846870422363, "learning_rate": 8.324731330002054e-05, "loss": 0.6626, "step": 12988 }, { "epoch": 0.8800731756894099, "grad_norm": 5.165645599365234, "learning_rate": 8.324594428092272e-05, "loss": 0.8454, "step": 12989 }, { "epoch": 0.8801409309573819, "grad_norm": 7.062921524047852, "learning_rate": 8.324457526182492e-05, "loss": 0.6814, "step": 12990 }, { "epoch": 0.880208686225354, "grad_norm": 8.127230644226074, "learning_rate": 8.32432062427271e-05, "loss": 0.9044, "step": 12991 }, { "epoch": 0.8802764414933261, "grad_norm": 7.321716785430908, "learning_rate": 8.324183722362928e-05, "loss": 0.8596, "step": 12992 }, { "epoch": 0.8803441967612982, "grad_norm": 6.0520806312561035, "learning_rate": 8.324046820453146e-05, "loss": 0.7636, "step": 12993 }, { "epoch": 0.8804119520292703, "grad_norm": 4.8115973472595215, "learning_rate": 8.323909918543365e-05, "loss": 0.5694, "step": 12994 }, { "epoch": 0.8804797072972423, "grad_norm": 7.053106784820557, "learning_rate": 8.323773016633583e-05, "loss": 0.5812, "step": 12995 }, { "epoch": 0.8805474625652144, "grad_norm": 5.414585590362549, "learning_rate": 8.323636114723801e-05, "loss": 0.6227, "step": 12996 }, { "epoch": 0.8806152178331865, "grad_norm": 6.686954498291016, "learning_rate": 8.323499212814019e-05, "loss": 0.8822, "step": 12997 }, { "epoch": 0.8806829731011586, "grad_norm": 5.530512809753418, "learning_rate": 8.323362310904239e-05, "loss": 0.745, "step": 12998 }, { "epoch": 0.8807507283691307, "grad_norm": 7.229578495025635, "learning_rate": 8.323225408994457e-05, "loss": 0.7521, "step": 12999 }, { "epoch": 0.8808184836371028, "grad_norm": 5.444945335388184, "learning_rate": 8.323088507084675e-05, "loss": 0.741, "step": 13000 }, { "epoch": 0.8808862389050749, "grad_norm": 5.646410942077637, "learning_rate": 8.322951605174893e-05, "loss": 0.6562, "step": 13001 }, { "epoch": 0.880953994173047, "grad_norm": 6.224180698394775, "learning_rate": 8.32281470326511e-05, "loss": 0.6087, "step": 13002 }, { "epoch": 0.8810217494410191, "grad_norm": 5.3466997146606445, "learning_rate": 8.32267780135533e-05, "loss": 0.7514, "step": 13003 }, { "epoch": 0.8810895047089912, "grad_norm": 8.124218940734863, "learning_rate": 8.322540899445548e-05, "loss": 0.8344, "step": 13004 }, { "epoch": 0.8811572599769633, "grad_norm": 5.5622172355651855, "learning_rate": 8.322403997535766e-05, "loss": 0.8112, "step": 13005 }, { "epoch": 0.8812250152449352, "grad_norm": 6.751789093017578, "learning_rate": 8.322267095625984e-05, "loss": 0.9156, "step": 13006 }, { "epoch": 0.8812927705129073, "grad_norm": 6.383172512054443, "learning_rate": 8.322130193716204e-05, "loss": 0.8049, "step": 13007 }, { "epoch": 0.8813605257808794, "grad_norm": 5.960168838500977, "learning_rate": 8.321993291806422e-05, "loss": 0.9027, "step": 13008 }, { "epoch": 0.8814282810488515, "grad_norm": 5.624762535095215, "learning_rate": 8.32185638989664e-05, "loss": 0.6223, "step": 13009 }, { "epoch": 0.8814960363168236, "grad_norm": 5.830256938934326, "learning_rate": 8.321719487986858e-05, "loss": 0.9301, "step": 13010 }, { "epoch": 0.8815637915847957, "grad_norm": 11.064977645874023, "learning_rate": 8.321582586077076e-05, "loss": 0.8125, "step": 13011 }, { "epoch": 0.8816315468527678, "grad_norm": 5.739172458648682, "learning_rate": 8.321445684167295e-05, "loss": 0.8507, "step": 13012 }, { "epoch": 0.8816993021207399, "grad_norm": 5.400548458099365, "learning_rate": 8.321308782257513e-05, "loss": 0.7381, "step": 13013 }, { "epoch": 0.881767057388712, "grad_norm": 5.973668098449707, "learning_rate": 8.321171880347731e-05, "loss": 0.7881, "step": 13014 }, { "epoch": 0.8818348126566841, "grad_norm": 7.544906139373779, "learning_rate": 8.321034978437949e-05, "loss": 0.7593, "step": 13015 }, { "epoch": 0.8819025679246562, "grad_norm": 5.305513381958008, "learning_rate": 8.320898076528167e-05, "loss": 0.7552, "step": 13016 }, { "epoch": 0.8819703231926282, "grad_norm": 5.9566545486450195, "learning_rate": 8.320761174618387e-05, "loss": 0.8524, "step": 13017 }, { "epoch": 0.8820380784606003, "grad_norm": 5.806572437286377, "learning_rate": 8.320624272708605e-05, "loss": 0.6832, "step": 13018 }, { "epoch": 0.8821058337285724, "grad_norm": 5.694754600524902, "learning_rate": 8.320487370798823e-05, "loss": 0.8726, "step": 13019 }, { "epoch": 0.8821735889965445, "grad_norm": 6.508894920349121, "learning_rate": 8.32035046888904e-05, "loss": 0.8468, "step": 13020 }, { "epoch": 0.8822413442645166, "grad_norm": 5.450093746185303, "learning_rate": 8.32021356697926e-05, "loss": 0.715, "step": 13021 }, { "epoch": 0.8823090995324887, "grad_norm": 7.250792026519775, "learning_rate": 8.320076665069478e-05, "loss": 0.8587, "step": 13022 }, { "epoch": 0.8823768548004607, "grad_norm": 7.200087070465088, "learning_rate": 8.319939763159696e-05, "loss": 0.7188, "step": 13023 }, { "epoch": 0.8824446100684328, "grad_norm": 7.003962993621826, "learning_rate": 8.319802861249914e-05, "loss": 0.7586, "step": 13024 }, { "epoch": 0.8825123653364049, "grad_norm": 5.545292854309082, "learning_rate": 8.319665959340132e-05, "loss": 0.7623, "step": 13025 }, { "epoch": 0.882580120604377, "grad_norm": 7.445784091949463, "learning_rate": 8.319529057430352e-05, "loss": 1.0729, "step": 13026 }, { "epoch": 0.882647875872349, "grad_norm": 7.989095211029053, "learning_rate": 8.31939215552057e-05, "loss": 0.8033, "step": 13027 }, { "epoch": 0.8827156311403211, "grad_norm": 5.488166809082031, "learning_rate": 8.319255253610788e-05, "loss": 0.7301, "step": 13028 }, { "epoch": 0.8827833864082932, "grad_norm": 7.751863956451416, "learning_rate": 8.319118351701006e-05, "loss": 0.5935, "step": 13029 }, { "epoch": 0.8828511416762653, "grad_norm": 7.284234046936035, "learning_rate": 8.318981449791225e-05, "loss": 0.8942, "step": 13030 }, { "epoch": 0.8829188969442374, "grad_norm": 6.073469638824463, "learning_rate": 8.318844547881443e-05, "loss": 0.7782, "step": 13031 }, { "epoch": 0.8829866522122095, "grad_norm": 5.341745853424072, "learning_rate": 8.318707645971661e-05, "loss": 0.8347, "step": 13032 }, { "epoch": 0.8830544074801816, "grad_norm": 5.9635820388793945, "learning_rate": 8.31857074406188e-05, "loss": 0.9042, "step": 13033 }, { "epoch": 0.8831221627481537, "grad_norm": 5.801054954528809, "learning_rate": 8.318433842152099e-05, "loss": 0.7764, "step": 13034 }, { "epoch": 0.8831899180161258, "grad_norm": 7.042034149169922, "learning_rate": 8.318296940242317e-05, "loss": 1.0679, "step": 13035 }, { "epoch": 0.8832576732840979, "grad_norm": 4.915299892425537, "learning_rate": 8.318160038332536e-05, "loss": 0.7412, "step": 13036 }, { "epoch": 0.88332542855207, "grad_norm": 9.298592567443848, "learning_rate": 8.318023136422754e-05, "loss": 0.8991, "step": 13037 }, { "epoch": 0.8833931838200421, "grad_norm": 4.614468097686768, "learning_rate": 8.317886234512972e-05, "loss": 0.6786, "step": 13038 }, { "epoch": 0.883460939088014, "grad_norm": 5.394043445587158, "learning_rate": 8.31774933260319e-05, "loss": 0.7442, "step": 13039 }, { "epoch": 0.8835286943559861, "grad_norm": 6.138361930847168, "learning_rate": 8.31761243069341e-05, "loss": 0.5959, "step": 13040 }, { "epoch": 0.8835964496239582, "grad_norm": 6.376340866088867, "learning_rate": 8.317475528783628e-05, "loss": 1.0073, "step": 13041 }, { "epoch": 0.8836642048919303, "grad_norm": 4.798174858093262, "learning_rate": 8.317338626873846e-05, "loss": 0.6251, "step": 13042 }, { "epoch": 0.8837319601599024, "grad_norm": 6.462924957275391, "learning_rate": 8.317201724964064e-05, "loss": 0.9081, "step": 13043 }, { "epoch": 0.8837997154278745, "grad_norm": 5.447483062744141, "learning_rate": 8.317064823054283e-05, "loss": 0.8058, "step": 13044 }, { "epoch": 0.8838674706958466, "grad_norm": 6.656740188598633, "learning_rate": 8.316927921144501e-05, "loss": 0.8681, "step": 13045 }, { "epoch": 0.8839352259638187, "grad_norm": 5.062714099884033, "learning_rate": 8.316791019234719e-05, "loss": 0.5941, "step": 13046 }, { "epoch": 0.8840029812317908, "grad_norm": 6.954104900360107, "learning_rate": 8.316654117324937e-05, "loss": 0.9577, "step": 13047 }, { "epoch": 0.8840707364997629, "grad_norm": 5.395656585693359, "learning_rate": 8.316517215415155e-05, "loss": 0.6636, "step": 13048 }, { "epoch": 0.884138491767735, "grad_norm": 7.5406270027160645, "learning_rate": 8.316380313505374e-05, "loss": 0.8675, "step": 13049 }, { "epoch": 0.884206247035707, "grad_norm": 4.966914653778076, "learning_rate": 8.316243411595593e-05, "loss": 0.7242, "step": 13050 }, { "epoch": 0.8842740023036791, "grad_norm": 6.133572101593018, "learning_rate": 8.31610650968581e-05, "loss": 0.8462, "step": 13051 }, { "epoch": 0.8843417575716512, "grad_norm": 4.549497127532959, "learning_rate": 8.315969607776029e-05, "loss": 0.7792, "step": 13052 }, { "epoch": 0.8844095128396233, "grad_norm": 5.715592861175537, "learning_rate": 8.315832705866248e-05, "loss": 0.6261, "step": 13053 }, { "epoch": 0.8844772681075954, "grad_norm": 7.551342010498047, "learning_rate": 8.315695803956466e-05, "loss": 0.8017, "step": 13054 }, { "epoch": 0.8845450233755674, "grad_norm": 5.671367645263672, "learning_rate": 8.315558902046684e-05, "loss": 0.8092, "step": 13055 }, { "epoch": 0.8846127786435395, "grad_norm": 5.61579704284668, "learning_rate": 8.315422000136902e-05, "loss": 0.7854, "step": 13056 }, { "epoch": 0.8846805339115116, "grad_norm": 8.364435195922852, "learning_rate": 8.31528509822712e-05, "loss": 0.7893, "step": 13057 }, { "epoch": 0.8847482891794837, "grad_norm": 7.481292724609375, "learning_rate": 8.31514819631734e-05, "loss": 0.7083, "step": 13058 }, { "epoch": 0.8848160444474558, "grad_norm": 6.476237773895264, "learning_rate": 8.315011294407558e-05, "loss": 0.7749, "step": 13059 }, { "epoch": 0.8848837997154279, "grad_norm": 6.060905456542969, "learning_rate": 8.314874392497776e-05, "loss": 0.8042, "step": 13060 }, { "epoch": 0.8849515549834, "grad_norm": 5.672494888305664, "learning_rate": 8.314737490587994e-05, "loss": 0.5887, "step": 13061 }, { "epoch": 0.885019310251372, "grad_norm": 8.22986888885498, "learning_rate": 8.314600588678213e-05, "loss": 1.1182, "step": 13062 }, { "epoch": 0.8850870655193441, "grad_norm": 8.389451026916504, "learning_rate": 8.314463686768431e-05, "loss": 1.022, "step": 13063 }, { "epoch": 0.8851548207873162, "grad_norm": 5.47923469543457, "learning_rate": 8.314326784858649e-05, "loss": 0.7902, "step": 13064 }, { "epoch": 0.8852225760552883, "grad_norm": 5.775954723358154, "learning_rate": 8.314189882948867e-05, "loss": 0.8831, "step": 13065 }, { "epoch": 0.8852903313232604, "grad_norm": 6.233015060424805, "learning_rate": 8.314052981039085e-05, "loss": 0.7723, "step": 13066 }, { "epoch": 0.8853580865912325, "grad_norm": 6.433823585510254, "learning_rate": 8.313916079129305e-05, "loss": 0.7764, "step": 13067 }, { "epoch": 0.8854258418592046, "grad_norm": 8.425026893615723, "learning_rate": 8.313779177219523e-05, "loss": 0.9493, "step": 13068 }, { "epoch": 0.8854935971271767, "grad_norm": 5.355708599090576, "learning_rate": 8.31364227530974e-05, "loss": 0.7761, "step": 13069 }, { "epoch": 0.8855613523951488, "grad_norm": 4.858754634857178, "learning_rate": 8.313505373399959e-05, "loss": 0.6928, "step": 13070 }, { "epoch": 0.8856291076631209, "grad_norm": 6.304715633392334, "learning_rate": 8.313368471490177e-05, "loss": 0.7542, "step": 13071 }, { "epoch": 0.8856968629310928, "grad_norm": 5.10394811630249, "learning_rate": 8.313231569580396e-05, "loss": 0.6926, "step": 13072 }, { "epoch": 0.8857646181990649, "grad_norm": 5.681312561035156, "learning_rate": 8.313094667670614e-05, "loss": 0.5444, "step": 13073 }, { "epoch": 0.885832373467037, "grad_norm": 5.030150890350342, "learning_rate": 8.312957765760832e-05, "loss": 0.7594, "step": 13074 }, { "epoch": 0.8859001287350091, "grad_norm": 5.284333229064941, "learning_rate": 8.31282086385105e-05, "loss": 0.6214, "step": 13075 }, { "epoch": 0.8859678840029812, "grad_norm": 6.305645942687988, "learning_rate": 8.31268396194127e-05, "loss": 0.5335, "step": 13076 }, { "epoch": 0.8860356392709533, "grad_norm": 4.828126907348633, "learning_rate": 8.312547060031488e-05, "loss": 0.5699, "step": 13077 }, { "epoch": 0.8861033945389254, "grad_norm": 6.154019355773926, "learning_rate": 8.312410158121706e-05, "loss": 0.8389, "step": 13078 }, { "epoch": 0.8861711498068975, "grad_norm": 7.554732322692871, "learning_rate": 8.312273256211924e-05, "loss": 1.1726, "step": 13079 }, { "epoch": 0.8862389050748696, "grad_norm": 7.06660270690918, "learning_rate": 8.312136354302143e-05, "loss": 0.7025, "step": 13080 }, { "epoch": 0.8863066603428417, "grad_norm": 6.983936786651611, "learning_rate": 8.311999452392361e-05, "loss": 0.7986, "step": 13081 }, { "epoch": 0.8863744156108138, "grad_norm": 6.566361427307129, "learning_rate": 8.311862550482579e-05, "loss": 0.6886, "step": 13082 }, { "epoch": 0.8864421708787859, "grad_norm": 7.804087162017822, "learning_rate": 8.311725648572798e-05, "loss": 0.8087, "step": 13083 }, { "epoch": 0.8865099261467579, "grad_norm": 7.015219211578369, "learning_rate": 8.311588746663017e-05, "loss": 0.7983, "step": 13084 }, { "epoch": 0.88657768141473, "grad_norm": 7.756356239318848, "learning_rate": 8.311451844753235e-05, "loss": 0.865, "step": 13085 }, { "epoch": 0.8866454366827021, "grad_norm": 4.957078456878662, "learning_rate": 8.311314942843454e-05, "loss": 0.7427, "step": 13086 }, { "epoch": 0.8867131919506742, "grad_norm": 7.29202127456665, "learning_rate": 8.311178040933672e-05, "loss": 0.9095, "step": 13087 }, { "epoch": 0.8867809472186462, "grad_norm": 6.850350856781006, "learning_rate": 8.31104113902389e-05, "loss": 0.6982, "step": 13088 }, { "epoch": 0.8868487024866183, "grad_norm": 7.409617900848389, "learning_rate": 8.310904237114108e-05, "loss": 0.7422, "step": 13089 }, { "epoch": 0.8869164577545904, "grad_norm": 6.317065715789795, "learning_rate": 8.310767335204327e-05, "loss": 0.7854, "step": 13090 }, { "epoch": 0.8869842130225625, "grad_norm": 6.616084098815918, "learning_rate": 8.310630433294545e-05, "loss": 0.5892, "step": 13091 }, { "epoch": 0.8870519682905346, "grad_norm": 4.691561222076416, "learning_rate": 8.310493531384764e-05, "loss": 0.7925, "step": 13092 }, { "epoch": 0.8871197235585067, "grad_norm": 6.057029724121094, "learning_rate": 8.310356629474982e-05, "loss": 0.9739, "step": 13093 }, { "epoch": 0.8871874788264787, "grad_norm": 6.407534122467041, "learning_rate": 8.3102197275652e-05, "loss": 0.8429, "step": 13094 }, { "epoch": 0.8872552340944508, "grad_norm": 5.778253078460693, "learning_rate": 8.310082825655419e-05, "loss": 0.8443, "step": 13095 }, { "epoch": 0.8873229893624229, "grad_norm": 6.153006553649902, "learning_rate": 8.309945923745637e-05, "loss": 0.8315, "step": 13096 }, { "epoch": 0.887390744630395, "grad_norm": 6.307031631469727, "learning_rate": 8.309809021835855e-05, "loss": 1.0075, "step": 13097 }, { "epoch": 0.8874584998983671, "grad_norm": 6.513178825378418, "learning_rate": 8.309672119926073e-05, "loss": 0.7945, "step": 13098 }, { "epoch": 0.8875262551663392, "grad_norm": 7.987000942230225, "learning_rate": 8.309535218016292e-05, "loss": 0.7709, "step": 13099 }, { "epoch": 0.8875940104343113, "grad_norm": 5.424191951751709, "learning_rate": 8.30939831610651e-05, "loss": 0.7174, "step": 13100 }, { "epoch": 0.8876617657022834, "grad_norm": 4.814406871795654, "learning_rate": 8.309261414196729e-05, "loss": 0.8897, "step": 13101 }, { "epoch": 0.8877295209702555, "grad_norm": 5.997096538543701, "learning_rate": 8.309124512286947e-05, "loss": 0.6153, "step": 13102 }, { "epoch": 0.8877972762382276, "grad_norm": 7.016286373138428, "learning_rate": 8.308987610377165e-05, "loss": 0.9323, "step": 13103 }, { "epoch": 0.8878650315061996, "grad_norm": 5.908369541168213, "learning_rate": 8.308850708467384e-05, "loss": 0.7758, "step": 13104 }, { "epoch": 0.8879327867741716, "grad_norm": 6.3806376457214355, "learning_rate": 8.308713806557602e-05, "loss": 0.803, "step": 13105 }, { "epoch": 0.8880005420421437, "grad_norm": 5.187054634094238, "learning_rate": 8.30857690464782e-05, "loss": 0.8509, "step": 13106 }, { "epoch": 0.8880682973101158, "grad_norm": 5.6671905517578125, "learning_rate": 8.308440002738038e-05, "loss": 0.9532, "step": 13107 }, { "epoch": 0.8881360525780879, "grad_norm": 5.64413595199585, "learning_rate": 8.308303100828257e-05, "loss": 0.6387, "step": 13108 }, { "epoch": 0.88820380784606, "grad_norm": 7.69677209854126, "learning_rate": 8.308166198918476e-05, "loss": 0.9477, "step": 13109 }, { "epoch": 0.8882715631140321, "grad_norm": 6.669020652770996, "learning_rate": 8.308029297008694e-05, "loss": 0.7831, "step": 13110 }, { "epoch": 0.8883393183820042, "grad_norm": 8.060406684875488, "learning_rate": 8.307892395098912e-05, "loss": 0.7206, "step": 13111 }, { "epoch": 0.8884070736499763, "grad_norm": 6.262596607208252, "learning_rate": 8.30775549318913e-05, "loss": 0.7359, "step": 13112 }, { "epoch": 0.8884748289179484, "grad_norm": 7.678366661071777, "learning_rate": 8.307618591279349e-05, "loss": 0.7463, "step": 13113 }, { "epoch": 0.8885425841859205, "grad_norm": 4.828142166137695, "learning_rate": 8.307481689369567e-05, "loss": 0.6186, "step": 13114 }, { "epoch": 0.8886103394538926, "grad_norm": 6.147395133972168, "learning_rate": 8.307344787459785e-05, "loss": 0.7742, "step": 13115 }, { "epoch": 0.8886780947218647, "grad_norm": 7.005827903747559, "learning_rate": 8.307207885550003e-05, "loss": 0.899, "step": 13116 }, { "epoch": 0.8887458499898367, "grad_norm": 6.948807239532471, "learning_rate": 8.307070983640221e-05, "loss": 0.9572, "step": 13117 }, { "epoch": 0.8888136052578088, "grad_norm": 5.939198970794678, "learning_rate": 8.30693408173044e-05, "loss": 0.8103, "step": 13118 }, { "epoch": 0.8888813605257809, "grad_norm": 5.84893274307251, "learning_rate": 8.306797179820659e-05, "loss": 0.7653, "step": 13119 }, { "epoch": 0.888949115793753, "grad_norm": 9.817476272583008, "learning_rate": 8.306660277910877e-05, "loss": 0.8496, "step": 13120 }, { "epoch": 0.889016871061725, "grad_norm": 6.369333267211914, "learning_rate": 8.306523376001095e-05, "loss": 0.8215, "step": 13121 }, { "epoch": 0.8890846263296971, "grad_norm": 6.099016189575195, "learning_rate": 8.306386474091314e-05, "loss": 0.913, "step": 13122 }, { "epoch": 0.8891523815976692, "grad_norm": 5.678265571594238, "learning_rate": 8.306249572181532e-05, "loss": 0.7645, "step": 13123 }, { "epoch": 0.8892201368656413, "grad_norm": 5.685331344604492, "learning_rate": 8.30611267027175e-05, "loss": 0.8976, "step": 13124 }, { "epoch": 0.8892878921336134, "grad_norm": 6.949743747711182, "learning_rate": 8.305975768361968e-05, "loss": 0.708, "step": 13125 }, { "epoch": 0.8893556474015855, "grad_norm": 6.98148250579834, "learning_rate": 8.305838866452188e-05, "loss": 1.0055, "step": 13126 }, { "epoch": 0.8894234026695576, "grad_norm": 6.679849147796631, "learning_rate": 8.305701964542406e-05, "loss": 0.7262, "step": 13127 }, { "epoch": 0.8894911579375296, "grad_norm": 7.759592533111572, "learning_rate": 8.305565062632624e-05, "loss": 0.5805, "step": 13128 }, { "epoch": 0.8895589132055017, "grad_norm": 5.812005519866943, "learning_rate": 8.305428160722843e-05, "loss": 0.8662, "step": 13129 }, { "epoch": 0.8896266684734738, "grad_norm": 4.861279487609863, "learning_rate": 8.305291258813061e-05, "loss": 0.5661, "step": 13130 }, { "epoch": 0.8896944237414459, "grad_norm": 6.805129051208496, "learning_rate": 8.305154356903279e-05, "loss": 0.9156, "step": 13131 }, { "epoch": 0.889762179009418, "grad_norm": 4.98897647857666, "learning_rate": 8.305017454993498e-05, "loss": 0.698, "step": 13132 }, { "epoch": 0.8898299342773901, "grad_norm": 6.2775726318359375, "learning_rate": 8.304880553083716e-05, "loss": 0.9092, "step": 13133 }, { "epoch": 0.8898976895453622, "grad_norm": 7.023934841156006, "learning_rate": 8.304743651173934e-05, "loss": 0.7898, "step": 13134 }, { "epoch": 0.8899654448133343, "grad_norm": 6.283311367034912, "learning_rate": 8.304606749264153e-05, "loss": 0.7596, "step": 13135 }, { "epoch": 0.8900332000813064, "grad_norm": 4.842438220977783, "learning_rate": 8.304469847354372e-05, "loss": 0.7008, "step": 13136 }, { "epoch": 0.8901009553492784, "grad_norm": 5.7367987632751465, "learning_rate": 8.30433294544459e-05, "loss": 0.987, "step": 13137 }, { "epoch": 0.8901687106172504, "grad_norm": 6.082010746002197, "learning_rate": 8.304196043534808e-05, "loss": 0.6194, "step": 13138 }, { "epoch": 0.8902364658852225, "grad_norm": 4.863292217254639, "learning_rate": 8.304059141625026e-05, "loss": 0.689, "step": 13139 }, { "epoch": 0.8903042211531946, "grad_norm": 7.170241832733154, "learning_rate": 8.303922239715245e-05, "loss": 0.8191, "step": 13140 }, { "epoch": 0.8903719764211667, "grad_norm": 5.541210174560547, "learning_rate": 8.303785337805463e-05, "loss": 0.7614, "step": 13141 }, { "epoch": 0.8904397316891388, "grad_norm": 4.93407678604126, "learning_rate": 8.303648435895681e-05, "loss": 0.6698, "step": 13142 }, { "epoch": 0.8905074869571109, "grad_norm": 5.758559226989746, "learning_rate": 8.3035115339859e-05, "loss": 0.8883, "step": 13143 }, { "epoch": 0.890575242225083, "grad_norm": 5.253477096557617, "learning_rate": 8.303374632076118e-05, "loss": 0.7487, "step": 13144 }, { "epoch": 0.8906429974930551, "grad_norm": 7.421339511871338, "learning_rate": 8.303237730166337e-05, "loss": 0.7125, "step": 13145 }, { "epoch": 0.8907107527610272, "grad_norm": 6.252211093902588, "learning_rate": 8.303100828256555e-05, "loss": 0.7768, "step": 13146 }, { "epoch": 0.8907785080289993, "grad_norm": 5.624354362487793, "learning_rate": 8.302963926346773e-05, "loss": 0.7325, "step": 13147 }, { "epoch": 0.8908462632969714, "grad_norm": 7.876077651977539, "learning_rate": 8.302827024436991e-05, "loss": 0.9607, "step": 13148 }, { "epoch": 0.8909140185649435, "grad_norm": 7.236328601837158, "learning_rate": 8.302690122527209e-05, "loss": 0.9411, "step": 13149 }, { "epoch": 0.8909817738329155, "grad_norm": 5.283116817474365, "learning_rate": 8.302553220617428e-05, "loss": 0.8631, "step": 13150 }, { "epoch": 0.8910495291008876, "grad_norm": 4.565162181854248, "learning_rate": 8.302416318707646e-05, "loss": 0.5822, "step": 13151 }, { "epoch": 0.8911172843688597, "grad_norm": 8.308517456054688, "learning_rate": 8.302279416797865e-05, "loss": 0.7722, "step": 13152 }, { "epoch": 0.8911850396368317, "grad_norm": 5.399427890777588, "learning_rate": 8.302142514888083e-05, "loss": 0.7398, "step": 13153 }, { "epoch": 0.8912527949048038, "grad_norm": 6.947094917297363, "learning_rate": 8.302005612978302e-05, "loss": 0.8397, "step": 13154 }, { "epoch": 0.8913205501727759, "grad_norm": 5.054037094116211, "learning_rate": 8.30186871106852e-05, "loss": 0.8779, "step": 13155 }, { "epoch": 0.891388305440748, "grad_norm": 5.343729496002197, "learning_rate": 8.301731809158738e-05, "loss": 0.6804, "step": 13156 }, { "epoch": 0.8914560607087201, "grad_norm": 5.4643425941467285, "learning_rate": 8.301594907248956e-05, "loss": 0.764, "step": 13157 }, { "epoch": 0.8915238159766922, "grad_norm": 5.799702167510986, "learning_rate": 8.301458005339174e-05, "loss": 0.8367, "step": 13158 }, { "epoch": 0.8915915712446643, "grad_norm": 5.780505657196045, "learning_rate": 8.301321103429393e-05, "loss": 0.6387, "step": 13159 }, { "epoch": 0.8916593265126364, "grad_norm": 7.076272964477539, "learning_rate": 8.301184201519612e-05, "loss": 0.8751, "step": 13160 }, { "epoch": 0.8917270817806084, "grad_norm": 5.820427417755127, "learning_rate": 8.30104729960983e-05, "loss": 0.7296, "step": 13161 }, { "epoch": 0.8917948370485805, "grad_norm": 6.612248420715332, "learning_rate": 8.300910397700048e-05, "loss": 0.9585, "step": 13162 }, { "epoch": 0.8918625923165526, "grad_norm": 5.051817893981934, "learning_rate": 8.300773495790267e-05, "loss": 0.7389, "step": 13163 }, { "epoch": 0.8919303475845247, "grad_norm": 6.8403639793396, "learning_rate": 8.300636593880485e-05, "loss": 0.9632, "step": 13164 }, { "epoch": 0.8919981028524968, "grad_norm": 5.8375983238220215, "learning_rate": 8.300499691970703e-05, "loss": 0.7462, "step": 13165 }, { "epoch": 0.8920658581204689, "grad_norm": 4.138559341430664, "learning_rate": 8.300362790060921e-05, "loss": 0.5961, "step": 13166 }, { "epoch": 0.892133613388441, "grad_norm": 5.837608814239502, "learning_rate": 8.300225888151139e-05, "loss": 0.8189, "step": 13167 }, { "epoch": 0.8922013686564131, "grad_norm": 5.9026875495910645, "learning_rate": 8.300088986241358e-05, "loss": 0.8394, "step": 13168 }, { "epoch": 0.8922691239243851, "grad_norm": 5.615271091461182, "learning_rate": 8.299952084331577e-05, "loss": 0.7897, "step": 13169 }, { "epoch": 0.8923368791923572, "grad_norm": 5.872547149658203, "learning_rate": 8.299815182421795e-05, "loss": 0.7276, "step": 13170 }, { "epoch": 0.8924046344603292, "grad_norm": 7.943142414093018, "learning_rate": 8.299678280512013e-05, "loss": 0.7512, "step": 13171 }, { "epoch": 0.8924723897283013, "grad_norm": 5.548654079437256, "learning_rate": 8.299541378602232e-05, "loss": 0.8011, "step": 13172 }, { "epoch": 0.8925401449962734, "grad_norm": 5.295721054077148, "learning_rate": 8.29940447669245e-05, "loss": 0.7481, "step": 13173 }, { "epoch": 0.8926079002642455, "grad_norm": 5.564395904541016, "learning_rate": 8.299267574782668e-05, "loss": 0.7028, "step": 13174 }, { "epoch": 0.8926756555322176, "grad_norm": 6.351992607116699, "learning_rate": 8.299130672872887e-05, "loss": 0.6943, "step": 13175 }, { "epoch": 0.8927434108001897, "grad_norm": 8.362895965576172, "learning_rate": 8.298993770963105e-05, "loss": 0.9287, "step": 13176 }, { "epoch": 0.8928111660681618, "grad_norm": 6.428536415100098, "learning_rate": 8.298856869053324e-05, "loss": 0.8992, "step": 13177 }, { "epoch": 0.8928789213361339, "grad_norm": 6.148324489593506, "learning_rate": 8.298719967143543e-05, "loss": 0.6605, "step": 13178 }, { "epoch": 0.892946676604106, "grad_norm": 6.530026912689209, "learning_rate": 8.298583065233761e-05, "loss": 0.6085, "step": 13179 }, { "epoch": 0.8930144318720781, "grad_norm": 5.863303184509277, "learning_rate": 8.298446163323979e-05, "loss": 0.7368, "step": 13180 }, { "epoch": 0.8930821871400502, "grad_norm": 11.581231117248535, "learning_rate": 8.298309261414197e-05, "loss": 0.732, "step": 13181 }, { "epoch": 0.8931499424080223, "grad_norm": 7.566738128662109, "learning_rate": 8.298172359504416e-05, "loss": 0.9834, "step": 13182 }, { "epoch": 0.8932176976759943, "grad_norm": 7.298031330108643, "learning_rate": 8.298035457594634e-05, "loss": 0.9663, "step": 13183 }, { "epoch": 0.8932854529439664, "grad_norm": 6.619592666625977, "learning_rate": 8.297898555684852e-05, "loss": 0.6992, "step": 13184 }, { "epoch": 0.8933532082119385, "grad_norm": 6.211091041564941, "learning_rate": 8.29776165377507e-05, "loss": 0.6848, "step": 13185 }, { "epoch": 0.8934209634799105, "grad_norm": 5.228729248046875, "learning_rate": 8.29762475186529e-05, "loss": 0.629, "step": 13186 }, { "epoch": 0.8934887187478826, "grad_norm": 5.966601848602295, "learning_rate": 8.297487849955508e-05, "loss": 0.7837, "step": 13187 }, { "epoch": 0.8935564740158547, "grad_norm": 5.975196838378906, "learning_rate": 8.297350948045726e-05, "loss": 0.8608, "step": 13188 }, { "epoch": 0.8936242292838268, "grad_norm": 7.837411880493164, "learning_rate": 8.297214046135944e-05, "loss": 1.006, "step": 13189 }, { "epoch": 0.8936919845517989, "grad_norm": 6.313420295715332, "learning_rate": 8.297077144226162e-05, "loss": 0.9157, "step": 13190 }, { "epoch": 0.893759739819771, "grad_norm": 4.761128902435303, "learning_rate": 8.296940242316381e-05, "loss": 0.7262, "step": 13191 }, { "epoch": 0.8938274950877431, "grad_norm": 6.320545673370361, "learning_rate": 8.2968033404066e-05, "loss": 0.8437, "step": 13192 }, { "epoch": 0.8938952503557152, "grad_norm": 6.234335899353027, "learning_rate": 8.296666438496817e-05, "loss": 0.8268, "step": 13193 }, { "epoch": 0.8939630056236872, "grad_norm": 6.060486316680908, "learning_rate": 8.296529536587036e-05, "loss": 0.6344, "step": 13194 }, { "epoch": 0.8940307608916593, "grad_norm": 4.9132771492004395, "learning_rate": 8.296392634677255e-05, "loss": 0.6422, "step": 13195 }, { "epoch": 0.8940985161596314, "grad_norm": 7.546984672546387, "learning_rate": 8.296255732767473e-05, "loss": 0.8527, "step": 13196 }, { "epoch": 0.8941662714276035, "grad_norm": 5.650018215179443, "learning_rate": 8.296118830857691e-05, "loss": 0.7624, "step": 13197 }, { "epoch": 0.8942340266955756, "grad_norm": 6.712080955505371, "learning_rate": 8.295981928947909e-05, "loss": 0.9443, "step": 13198 }, { "epoch": 0.8943017819635477, "grad_norm": 7.756785869598389, "learning_rate": 8.295845027038127e-05, "loss": 0.9292, "step": 13199 }, { "epoch": 0.8943695372315198, "grad_norm": 5.824887752532959, "learning_rate": 8.295708125128346e-05, "loss": 0.7814, "step": 13200 }, { "epoch": 0.8944372924994919, "grad_norm": 6.378854751586914, "learning_rate": 8.295571223218564e-05, "loss": 0.7699, "step": 13201 }, { "epoch": 0.8945050477674639, "grad_norm": 6.914346218109131, "learning_rate": 8.295434321308782e-05, "loss": 0.7734, "step": 13202 }, { "epoch": 0.894572803035436, "grad_norm": 5.830096244812012, "learning_rate": 8.295297419399e-05, "loss": 0.762, "step": 13203 }, { "epoch": 0.894640558303408, "grad_norm": 5.791236877441406, "learning_rate": 8.295160517489219e-05, "loss": 0.6882, "step": 13204 }, { "epoch": 0.8947083135713801, "grad_norm": 5.9879326820373535, "learning_rate": 8.295023615579438e-05, "loss": 0.8134, "step": 13205 }, { "epoch": 0.8947760688393522, "grad_norm": 6.294375419616699, "learning_rate": 8.294886713669656e-05, "loss": 0.6101, "step": 13206 }, { "epoch": 0.8948438241073243, "grad_norm": 5.5092267990112305, "learning_rate": 8.294749811759874e-05, "loss": 0.8178, "step": 13207 }, { "epoch": 0.8949115793752964, "grad_norm": 5.606123924255371, "learning_rate": 8.294612909850092e-05, "loss": 0.7347, "step": 13208 }, { "epoch": 0.8949793346432685, "grad_norm": 7.427051544189453, "learning_rate": 8.294476007940311e-05, "loss": 0.8645, "step": 13209 }, { "epoch": 0.8950470899112406, "grad_norm": 8.88985538482666, "learning_rate": 8.29433910603053e-05, "loss": 0.793, "step": 13210 }, { "epoch": 0.8951148451792127, "grad_norm": 5.20978307723999, "learning_rate": 8.294202204120748e-05, "loss": 0.954, "step": 13211 }, { "epoch": 0.8951826004471848, "grad_norm": 8.192554473876953, "learning_rate": 8.294065302210966e-05, "loss": 0.812, "step": 13212 }, { "epoch": 0.8952503557151569, "grad_norm": 5.569815158843994, "learning_rate": 8.293928400301184e-05, "loss": 0.702, "step": 13213 }, { "epoch": 0.895318110983129, "grad_norm": 5.993325233459473, "learning_rate": 8.293791498391403e-05, "loss": 0.9014, "step": 13214 }, { "epoch": 0.8953858662511011, "grad_norm": 5.743150234222412, "learning_rate": 8.293654596481621e-05, "loss": 0.5031, "step": 13215 }, { "epoch": 0.8954536215190732, "grad_norm": 6.546820163726807, "learning_rate": 8.293517694571839e-05, "loss": 0.8145, "step": 13216 }, { "epoch": 0.8955213767870452, "grad_norm": 6.425495147705078, "learning_rate": 8.293380792662057e-05, "loss": 0.6972, "step": 13217 }, { "epoch": 0.8955891320550172, "grad_norm": 5.768934726715088, "learning_rate": 8.293243890752276e-05, "loss": 0.7145, "step": 13218 }, { "epoch": 0.8956568873229893, "grad_norm": 5.6524224281311035, "learning_rate": 8.293106988842494e-05, "loss": 0.8436, "step": 13219 }, { "epoch": 0.8957246425909614, "grad_norm": 7.842732906341553, "learning_rate": 8.292970086932713e-05, "loss": 0.9397, "step": 13220 }, { "epoch": 0.8957923978589335, "grad_norm": 6.612356185913086, "learning_rate": 8.292833185022932e-05, "loss": 0.7468, "step": 13221 }, { "epoch": 0.8958601531269056, "grad_norm": 6.291922569274902, "learning_rate": 8.29269628311315e-05, "loss": 0.7622, "step": 13222 }, { "epoch": 0.8959279083948777, "grad_norm": 6.623983860015869, "learning_rate": 8.292559381203368e-05, "loss": 0.79, "step": 13223 }, { "epoch": 0.8959956636628498, "grad_norm": 5.58621883392334, "learning_rate": 8.292422479293587e-05, "loss": 0.7481, "step": 13224 }, { "epoch": 0.8960634189308219, "grad_norm": 5.839790344238281, "learning_rate": 8.292285577383805e-05, "loss": 0.7201, "step": 13225 }, { "epoch": 0.896131174198794, "grad_norm": 7.310943126678467, "learning_rate": 8.292148675474023e-05, "loss": 1.062, "step": 13226 }, { "epoch": 0.896198929466766, "grad_norm": 7.1324872970581055, "learning_rate": 8.292011773564241e-05, "loss": 0.86, "step": 13227 }, { "epoch": 0.8962666847347381, "grad_norm": 5.25512170791626, "learning_rate": 8.291874871654461e-05, "loss": 0.7487, "step": 13228 }, { "epoch": 0.8963344400027102, "grad_norm": 4.826694488525391, "learning_rate": 8.291737969744679e-05, "loss": 0.6497, "step": 13229 }, { "epoch": 0.8964021952706823, "grad_norm": 6.0009918212890625, "learning_rate": 8.291601067834897e-05, "loss": 0.7122, "step": 13230 }, { "epoch": 0.8964699505386544, "grad_norm": 6.770015716552734, "learning_rate": 8.291464165925115e-05, "loss": 0.9121, "step": 13231 }, { "epoch": 0.8965377058066265, "grad_norm": 7.527721405029297, "learning_rate": 8.291327264015334e-05, "loss": 0.7042, "step": 13232 }, { "epoch": 0.8966054610745986, "grad_norm": 5.726056098937988, "learning_rate": 8.291190362105552e-05, "loss": 0.7324, "step": 13233 }, { "epoch": 0.8966732163425707, "grad_norm": 5.054379463195801, "learning_rate": 8.29105346019577e-05, "loss": 0.8064, "step": 13234 }, { "epoch": 0.8967409716105427, "grad_norm": 5.584482669830322, "learning_rate": 8.290916558285988e-05, "loss": 0.7225, "step": 13235 }, { "epoch": 0.8968087268785148, "grad_norm": 6.002537727355957, "learning_rate": 8.290779656376206e-05, "loss": 1.0223, "step": 13236 }, { "epoch": 0.8968764821464869, "grad_norm": 6.58087158203125, "learning_rate": 8.290642754466426e-05, "loss": 0.847, "step": 13237 }, { "epoch": 0.896944237414459, "grad_norm": 7.690030097961426, "learning_rate": 8.290505852556644e-05, "loss": 0.6217, "step": 13238 }, { "epoch": 0.897011992682431, "grad_norm": 6.937661170959473, "learning_rate": 8.290368950646862e-05, "loss": 0.8138, "step": 13239 }, { "epoch": 0.8970797479504031, "grad_norm": 5.334490776062012, "learning_rate": 8.29023204873708e-05, "loss": 0.8908, "step": 13240 }, { "epoch": 0.8971475032183752, "grad_norm": 5.833104133605957, "learning_rate": 8.2900951468273e-05, "loss": 0.5743, "step": 13241 }, { "epoch": 0.8972152584863473, "grad_norm": 5.803739547729492, "learning_rate": 8.289958244917517e-05, "loss": 0.6845, "step": 13242 }, { "epoch": 0.8972830137543194, "grad_norm": 5.436889171600342, "learning_rate": 8.289821343007735e-05, "loss": 0.8216, "step": 13243 }, { "epoch": 0.8973507690222915, "grad_norm": 4.356090068817139, "learning_rate": 8.289684441097953e-05, "loss": 0.7757, "step": 13244 }, { "epoch": 0.8974185242902636, "grad_norm": 5.816674709320068, "learning_rate": 8.289547539188172e-05, "loss": 0.8839, "step": 13245 }, { "epoch": 0.8974862795582357, "grad_norm": 7.878244876861572, "learning_rate": 8.289410637278391e-05, "loss": 1.0252, "step": 13246 }, { "epoch": 0.8975540348262078, "grad_norm": 6.409861087799072, "learning_rate": 8.289273735368609e-05, "loss": 0.8281, "step": 13247 }, { "epoch": 0.8976217900941799, "grad_norm": 6.617053031921387, "learning_rate": 8.289136833458827e-05, "loss": 1.0406, "step": 13248 }, { "epoch": 0.897689545362152, "grad_norm": 5.893568992614746, "learning_rate": 8.288999931549045e-05, "loss": 0.8713, "step": 13249 }, { "epoch": 0.897757300630124, "grad_norm": 5.194582939147949, "learning_rate": 8.288863029639263e-05, "loss": 0.7163, "step": 13250 }, { "epoch": 0.897825055898096, "grad_norm": 5.706815719604492, "learning_rate": 8.288726127729482e-05, "loss": 0.7675, "step": 13251 }, { "epoch": 0.8978928111660681, "grad_norm": 5.490322113037109, "learning_rate": 8.2885892258197e-05, "loss": 0.7559, "step": 13252 }, { "epoch": 0.8979605664340402, "grad_norm": 4.902472019195557, "learning_rate": 8.288452323909918e-05, "loss": 0.6095, "step": 13253 }, { "epoch": 0.8980283217020123, "grad_norm": 4.228281497955322, "learning_rate": 8.288315422000137e-05, "loss": 0.5824, "step": 13254 }, { "epoch": 0.8980960769699844, "grad_norm": 5.2611494064331055, "learning_rate": 8.288178520090356e-05, "loss": 0.8144, "step": 13255 }, { "epoch": 0.8981638322379565, "grad_norm": 5.848245143890381, "learning_rate": 8.288041618180574e-05, "loss": 0.7095, "step": 13256 }, { "epoch": 0.8982315875059286, "grad_norm": 5.738656044006348, "learning_rate": 8.287904716270792e-05, "loss": 0.731, "step": 13257 }, { "epoch": 0.8982993427739007, "grad_norm": 6.212946891784668, "learning_rate": 8.28776781436101e-05, "loss": 0.6781, "step": 13258 }, { "epoch": 0.8983670980418728, "grad_norm": 4.827934741973877, "learning_rate": 8.287630912451228e-05, "loss": 0.7693, "step": 13259 }, { "epoch": 0.8984348533098448, "grad_norm": 5.135469436645508, "learning_rate": 8.287494010541447e-05, "loss": 0.7819, "step": 13260 }, { "epoch": 0.8985026085778169, "grad_norm": 8.885536193847656, "learning_rate": 8.287357108631665e-05, "loss": 0.7556, "step": 13261 }, { "epoch": 0.898570363845789, "grad_norm": 8.197842597961426, "learning_rate": 8.287220206721884e-05, "loss": 0.7379, "step": 13262 }, { "epoch": 0.8986381191137611, "grad_norm": 5.406810283660889, "learning_rate": 8.287083304812102e-05, "loss": 0.6618, "step": 13263 }, { "epoch": 0.8987058743817332, "grad_norm": 5.688655376434326, "learning_rate": 8.286946402902321e-05, "loss": 0.9949, "step": 13264 }, { "epoch": 0.8987736296497053, "grad_norm": 7.776078701019287, "learning_rate": 8.286809500992539e-05, "loss": 0.5491, "step": 13265 }, { "epoch": 0.8988413849176774, "grad_norm": 7.738804817199707, "learning_rate": 8.286672599082757e-05, "loss": 1.06, "step": 13266 }, { "epoch": 0.8989091401856494, "grad_norm": 5.860600471496582, "learning_rate": 8.286535697172976e-05, "loss": 0.7621, "step": 13267 }, { "epoch": 0.8989768954536215, "grad_norm": 5.878091335296631, "learning_rate": 8.286398795263194e-05, "loss": 0.9419, "step": 13268 }, { "epoch": 0.8990446507215936, "grad_norm": 6.572638511657715, "learning_rate": 8.286261893353412e-05, "loss": 1.1296, "step": 13269 }, { "epoch": 0.8991124059895657, "grad_norm": 6.110701084136963, "learning_rate": 8.286124991443632e-05, "loss": 0.6976, "step": 13270 }, { "epoch": 0.8991801612575377, "grad_norm": 5.455756187438965, "learning_rate": 8.28598808953385e-05, "loss": 0.7511, "step": 13271 }, { "epoch": 0.8992479165255098, "grad_norm": 5.473435878753662, "learning_rate": 8.285851187624068e-05, "loss": 0.7217, "step": 13272 }, { "epoch": 0.8993156717934819, "grad_norm": 6.81781005859375, "learning_rate": 8.285714285714287e-05, "loss": 0.6594, "step": 13273 }, { "epoch": 0.899383427061454, "grad_norm": 6.982245445251465, "learning_rate": 8.285577383804505e-05, "loss": 0.8544, "step": 13274 }, { "epoch": 0.8994511823294261, "grad_norm": 4.480033874511719, "learning_rate": 8.285440481894723e-05, "loss": 0.8935, "step": 13275 }, { "epoch": 0.8995189375973982, "grad_norm": 5.940613746643066, "learning_rate": 8.285303579984941e-05, "loss": 0.8299, "step": 13276 }, { "epoch": 0.8995866928653703, "grad_norm": 5.0714335441589355, "learning_rate": 8.28516667807516e-05, "loss": 0.6672, "step": 13277 }, { "epoch": 0.8996544481333424, "grad_norm": 5.617218971252441, "learning_rate": 8.285029776165379e-05, "loss": 0.8913, "step": 13278 }, { "epoch": 0.8997222034013145, "grad_norm": 8.896486282348633, "learning_rate": 8.284892874255597e-05, "loss": 0.9386, "step": 13279 }, { "epoch": 0.8997899586692866, "grad_norm": 6.059647083282471, "learning_rate": 8.284755972345815e-05, "loss": 0.8023, "step": 13280 }, { "epoch": 0.8998577139372587, "grad_norm": 5.9106950759887695, "learning_rate": 8.284619070436033e-05, "loss": 0.8094, "step": 13281 }, { "epoch": 0.8999254692052308, "grad_norm": 7.264159679412842, "learning_rate": 8.284482168526251e-05, "loss": 0.8013, "step": 13282 }, { "epoch": 0.8999932244732028, "grad_norm": 7.696917533874512, "learning_rate": 8.28434526661647e-05, "loss": 0.8387, "step": 13283 }, { "epoch": 0.9000609797411748, "grad_norm": 6.536654472351074, "learning_rate": 8.284208364706688e-05, "loss": 0.8861, "step": 13284 }, { "epoch": 0.9001287350091469, "grad_norm": 5.371990203857422, "learning_rate": 8.284071462796906e-05, "loss": 0.7841, "step": 13285 }, { "epoch": 0.900196490277119, "grad_norm": 7.402823448181152, "learning_rate": 8.283934560887124e-05, "loss": 0.9058, "step": 13286 }, { "epoch": 0.9002642455450911, "grad_norm": 5.5579328536987305, "learning_rate": 8.283797658977344e-05, "loss": 0.698, "step": 13287 }, { "epoch": 0.9003320008130632, "grad_norm": 5.994121074676514, "learning_rate": 8.283660757067562e-05, "loss": 0.7502, "step": 13288 }, { "epoch": 0.9003997560810353, "grad_norm": 6.9693427085876465, "learning_rate": 8.28352385515778e-05, "loss": 0.9358, "step": 13289 }, { "epoch": 0.9004675113490074, "grad_norm": 6.08130407333374, "learning_rate": 8.283386953247998e-05, "loss": 0.7868, "step": 13290 }, { "epoch": 0.9005352666169795, "grad_norm": 6.161886692047119, "learning_rate": 8.283250051338216e-05, "loss": 0.6941, "step": 13291 }, { "epoch": 0.9006030218849516, "grad_norm": 6.822792053222656, "learning_rate": 8.283113149428435e-05, "loss": 0.8085, "step": 13292 }, { "epoch": 0.9006707771529237, "grad_norm": 5.883656978607178, "learning_rate": 8.282976247518653e-05, "loss": 0.7429, "step": 13293 }, { "epoch": 0.9007385324208957, "grad_norm": 4.72324800491333, "learning_rate": 8.282839345608871e-05, "loss": 0.6083, "step": 13294 }, { "epoch": 0.9008062876888678, "grad_norm": 5.912334442138672, "learning_rate": 8.28270244369909e-05, "loss": 0.8443, "step": 13295 }, { "epoch": 0.9008740429568399, "grad_norm": 4.501663684844971, "learning_rate": 8.282565541789309e-05, "loss": 0.7635, "step": 13296 }, { "epoch": 0.900941798224812, "grad_norm": 6.771607875823975, "learning_rate": 8.282428639879527e-05, "loss": 0.9044, "step": 13297 }, { "epoch": 0.9010095534927841, "grad_norm": 6.7166428565979, "learning_rate": 8.282291737969745e-05, "loss": 0.8295, "step": 13298 }, { "epoch": 0.9010773087607562, "grad_norm": 6.394474029541016, "learning_rate": 8.282154836059963e-05, "loss": 0.7899, "step": 13299 }, { "epoch": 0.9011450640287282, "grad_norm": 5.574833869934082, "learning_rate": 8.282017934150181e-05, "loss": 0.752, "step": 13300 }, { "epoch": 0.9012128192967003, "grad_norm": 6.585177898406982, "learning_rate": 8.2818810322404e-05, "loss": 0.6169, "step": 13301 }, { "epoch": 0.9012805745646724, "grad_norm": 5.940279960632324, "learning_rate": 8.281744130330618e-05, "loss": 0.7563, "step": 13302 }, { "epoch": 0.9013483298326445, "grad_norm": 6.039457321166992, "learning_rate": 8.281607228420836e-05, "loss": 0.7505, "step": 13303 }, { "epoch": 0.9014160851006165, "grad_norm": 10.80169677734375, "learning_rate": 8.281470326511054e-05, "loss": 0.8651, "step": 13304 }, { "epoch": 0.9014838403685886, "grad_norm": 5.8975043296813965, "learning_rate": 8.281333424601273e-05, "loss": 0.8325, "step": 13305 }, { "epoch": 0.9015515956365607, "grad_norm": 5.902426719665527, "learning_rate": 8.281196522691492e-05, "loss": 0.697, "step": 13306 }, { "epoch": 0.9016193509045328, "grad_norm": 7.651483058929443, "learning_rate": 8.28105962078171e-05, "loss": 0.8322, "step": 13307 }, { "epoch": 0.9016871061725049, "grad_norm": 5.933381080627441, "learning_rate": 8.280922718871928e-05, "loss": 0.834, "step": 13308 }, { "epoch": 0.901754861440477, "grad_norm": 5.620283126831055, "learning_rate": 8.280785816962146e-05, "loss": 0.8047, "step": 13309 }, { "epoch": 0.9018226167084491, "grad_norm": 4.271581172943115, "learning_rate": 8.280648915052365e-05, "loss": 0.7422, "step": 13310 }, { "epoch": 0.9018903719764212, "grad_norm": 7.821619033813477, "learning_rate": 8.280512013142583e-05, "loss": 0.9909, "step": 13311 }, { "epoch": 0.9019581272443933, "grad_norm": 5.665806770324707, "learning_rate": 8.280375111232801e-05, "loss": 0.6748, "step": 13312 }, { "epoch": 0.9020258825123654, "grad_norm": 7.500571250915527, "learning_rate": 8.28023820932302e-05, "loss": 0.9495, "step": 13313 }, { "epoch": 0.9020936377803375, "grad_norm": 7.833176612854004, "learning_rate": 8.280101307413239e-05, "loss": 0.8774, "step": 13314 }, { "epoch": 0.9021613930483096, "grad_norm": 10.599181175231934, "learning_rate": 8.279964405503457e-05, "loss": 0.9064, "step": 13315 }, { "epoch": 0.9022291483162815, "grad_norm": 5.582043647766113, "learning_rate": 8.279827503593675e-05, "loss": 0.5968, "step": 13316 }, { "epoch": 0.9022969035842536, "grad_norm": 10.205772399902344, "learning_rate": 8.279690601683894e-05, "loss": 0.8455, "step": 13317 }, { "epoch": 0.9023646588522257, "grad_norm": 6.769802093505859, "learning_rate": 8.279553699774112e-05, "loss": 0.8242, "step": 13318 }, { "epoch": 0.9024324141201978, "grad_norm": 8.564204216003418, "learning_rate": 8.279416797864332e-05, "loss": 0.9918, "step": 13319 }, { "epoch": 0.9025001693881699, "grad_norm": 6.2231645584106445, "learning_rate": 8.27927989595455e-05, "loss": 0.8375, "step": 13320 }, { "epoch": 0.902567924656142, "grad_norm": 6.065762519836426, "learning_rate": 8.279142994044768e-05, "loss": 0.8746, "step": 13321 }, { "epoch": 0.9026356799241141, "grad_norm": 7.134408950805664, "learning_rate": 8.279006092134986e-05, "loss": 1.1725, "step": 13322 }, { "epoch": 0.9027034351920862, "grad_norm": 7.31404972076416, "learning_rate": 8.278869190225204e-05, "loss": 0.6658, "step": 13323 }, { "epoch": 0.9027711904600583, "grad_norm": 4.640890121459961, "learning_rate": 8.278732288315423e-05, "loss": 0.882, "step": 13324 }, { "epoch": 0.9028389457280304, "grad_norm": 6.120473861694336, "learning_rate": 8.278595386405641e-05, "loss": 0.5502, "step": 13325 }, { "epoch": 0.9029067009960025, "grad_norm": 5.745514392852783, "learning_rate": 8.27845848449586e-05, "loss": 0.6134, "step": 13326 }, { "epoch": 0.9029744562639745, "grad_norm": 8.348088264465332, "learning_rate": 8.278321582586077e-05, "loss": 0.8996, "step": 13327 }, { "epoch": 0.9030422115319466, "grad_norm": 6.585142612457275, "learning_rate": 8.278184680676297e-05, "loss": 0.8028, "step": 13328 }, { "epoch": 0.9031099667999187, "grad_norm": 4.813357830047607, "learning_rate": 8.278047778766515e-05, "loss": 0.6632, "step": 13329 }, { "epoch": 0.9031777220678908, "grad_norm": 6.714639663696289, "learning_rate": 8.277910876856733e-05, "loss": 0.7389, "step": 13330 }, { "epoch": 0.9032454773358629, "grad_norm": 6.200788974761963, "learning_rate": 8.277773974946951e-05, "loss": 0.8529, "step": 13331 }, { "epoch": 0.903313232603835, "grad_norm": 6.508268356323242, "learning_rate": 8.277637073037169e-05, "loss": 0.9533, "step": 13332 }, { "epoch": 0.903380987871807, "grad_norm": 6.651120662689209, "learning_rate": 8.277500171127388e-05, "loss": 0.5568, "step": 13333 }, { "epoch": 0.9034487431397791, "grad_norm": 6.641012668609619, "learning_rate": 8.277363269217606e-05, "loss": 0.7402, "step": 13334 }, { "epoch": 0.9035164984077512, "grad_norm": 5.423649787902832, "learning_rate": 8.277226367307824e-05, "loss": 0.6719, "step": 13335 }, { "epoch": 0.9035842536757233, "grad_norm": 4.8691558837890625, "learning_rate": 8.277089465398042e-05, "loss": 0.5897, "step": 13336 }, { "epoch": 0.9036520089436954, "grad_norm": 4.2445759773254395, "learning_rate": 8.27695256348826e-05, "loss": 0.8443, "step": 13337 }, { "epoch": 0.9037197642116674, "grad_norm": 5.379455089569092, "learning_rate": 8.27681566157848e-05, "loss": 0.6049, "step": 13338 }, { "epoch": 0.9037875194796395, "grad_norm": 5.717207908630371, "learning_rate": 8.276678759668698e-05, "loss": 0.8009, "step": 13339 }, { "epoch": 0.9038552747476116, "grad_norm": 5.412480354309082, "learning_rate": 8.276541857758916e-05, "loss": 0.7753, "step": 13340 }, { "epoch": 0.9039230300155837, "grad_norm": 8.551285743713379, "learning_rate": 8.276404955849134e-05, "loss": 0.9724, "step": 13341 }, { "epoch": 0.9039907852835558, "grad_norm": 6.7664899826049805, "learning_rate": 8.276268053939353e-05, "loss": 0.9088, "step": 13342 }, { "epoch": 0.9040585405515279, "grad_norm": 6.312598705291748, "learning_rate": 8.276131152029571e-05, "loss": 0.6874, "step": 13343 }, { "epoch": 0.9041262958195, "grad_norm": 6.274691104888916, "learning_rate": 8.27599425011979e-05, "loss": 0.9076, "step": 13344 }, { "epoch": 0.9041940510874721, "grad_norm": 5.1724958419799805, "learning_rate": 8.275857348210007e-05, "loss": 0.6779, "step": 13345 }, { "epoch": 0.9042618063554442, "grad_norm": 6.403627872467041, "learning_rate": 8.275720446300225e-05, "loss": 0.8524, "step": 13346 }, { "epoch": 0.9043295616234163, "grad_norm": 5.662676811218262, "learning_rate": 8.275583544390445e-05, "loss": 0.8278, "step": 13347 }, { "epoch": 0.9043973168913884, "grad_norm": 5.776680946350098, "learning_rate": 8.275446642480663e-05, "loss": 0.7354, "step": 13348 }, { "epoch": 0.9044650721593603, "grad_norm": 5.1496076583862305, "learning_rate": 8.275309740570881e-05, "loss": 0.7018, "step": 13349 }, { "epoch": 0.9045328274273324, "grad_norm": 6.148200035095215, "learning_rate": 8.275172838661099e-05, "loss": 0.7145, "step": 13350 }, { "epoch": 0.9046005826953045, "grad_norm": 6.302008152008057, "learning_rate": 8.275035936751318e-05, "loss": 0.7148, "step": 13351 }, { "epoch": 0.9046683379632766, "grad_norm": 5.558827877044678, "learning_rate": 8.274899034841536e-05, "loss": 0.7591, "step": 13352 }, { "epoch": 0.9047360932312487, "grad_norm": 4.537057876586914, "learning_rate": 8.274762132931754e-05, "loss": 0.5739, "step": 13353 }, { "epoch": 0.9048038484992208, "grad_norm": 5.1838555335998535, "learning_rate": 8.274625231021972e-05, "loss": 0.7811, "step": 13354 }, { "epoch": 0.9048716037671929, "grad_norm": 5.07068395614624, "learning_rate": 8.27448832911219e-05, "loss": 0.9203, "step": 13355 }, { "epoch": 0.904939359035165, "grad_norm": 6.439149856567383, "learning_rate": 8.27435142720241e-05, "loss": 0.7654, "step": 13356 }, { "epoch": 0.9050071143031371, "grad_norm": 8.04731559753418, "learning_rate": 8.274214525292628e-05, "loss": 1.1907, "step": 13357 }, { "epoch": 0.9050748695711092, "grad_norm": 5.4005961418151855, "learning_rate": 8.274077623382846e-05, "loss": 0.8081, "step": 13358 }, { "epoch": 0.9051426248390813, "grad_norm": 5.338225364685059, "learning_rate": 8.273940721473064e-05, "loss": 0.5817, "step": 13359 }, { "epoch": 0.9052103801070533, "grad_norm": 7.287635326385498, "learning_rate": 8.273803819563283e-05, "loss": 0.9362, "step": 13360 }, { "epoch": 0.9052781353750254, "grad_norm": 7.328275203704834, "learning_rate": 8.273666917653501e-05, "loss": 0.9302, "step": 13361 }, { "epoch": 0.9053458906429975, "grad_norm": 5.7136359214782715, "learning_rate": 8.27353001574372e-05, "loss": 0.8013, "step": 13362 }, { "epoch": 0.9054136459109696, "grad_norm": 5.051130294799805, "learning_rate": 8.273393113833939e-05, "loss": 0.6682, "step": 13363 }, { "epoch": 0.9054814011789417, "grad_norm": 6.250859260559082, "learning_rate": 8.273256211924157e-05, "loss": 0.8889, "step": 13364 }, { "epoch": 0.9055491564469137, "grad_norm": 6.39178991317749, "learning_rate": 8.273119310014375e-05, "loss": 0.6838, "step": 13365 }, { "epoch": 0.9056169117148858, "grad_norm": 5.243597507476807, "learning_rate": 8.272982408104594e-05, "loss": 0.8019, "step": 13366 }, { "epoch": 0.9056846669828579, "grad_norm": 6.917558670043945, "learning_rate": 8.272845506194812e-05, "loss": 0.7121, "step": 13367 }, { "epoch": 0.90575242225083, "grad_norm": 4.401981830596924, "learning_rate": 8.27270860428503e-05, "loss": 0.5813, "step": 13368 }, { "epoch": 0.9058201775188021, "grad_norm": 6.838183879852295, "learning_rate": 8.272571702375248e-05, "loss": 0.8273, "step": 13369 }, { "epoch": 0.9058879327867742, "grad_norm": 6.264207363128662, "learning_rate": 8.272434800465468e-05, "loss": 0.6854, "step": 13370 }, { "epoch": 0.9059556880547462, "grad_norm": 4.945788383483887, "learning_rate": 8.272297898555686e-05, "loss": 0.5941, "step": 13371 }, { "epoch": 0.9060234433227183, "grad_norm": 6.928656578063965, "learning_rate": 8.272160996645904e-05, "loss": 0.754, "step": 13372 }, { "epoch": 0.9060911985906904, "grad_norm": 4.5859551429748535, "learning_rate": 8.272024094736122e-05, "loss": 0.6271, "step": 13373 }, { "epoch": 0.9061589538586625, "grad_norm": 6.357034206390381, "learning_rate": 8.271887192826341e-05, "loss": 0.6632, "step": 13374 }, { "epoch": 0.9062267091266346, "grad_norm": 6.899624824523926, "learning_rate": 8.271750290916559e-05, "loss": 0.8332, "step": 13375 }, { "epoch": 0.9062944643946067, "grad_norm": 8.641779899597168, "learning_rate": 8.271613389006777e-05, "loss": 0.8412, "step": 13376 }, { "epoch": 0.9063622196625788, "grad_norm": 6.443648815155029, "learning_rate": 8.271476487096995e-05, "loss": 0.8374, "step": 13377 }, { "epoch": 0.9064299749305509, "grad_norm": 8.222790718078613, "learning_rate": 8.271339585187213e-05, "loss": 0.9038, "step": 13378 }, { "epoch": 0.906497730198523, "grad_norm": 5.121990203857422, "learning_rate": 8.271202683277433e-05, "loss": 0.6617, "step": 13379 }, { "epoch": 0.9065654854664951, "grad_norm": 6.855564117431641, "learning_rate": 8.271065781367651e-05, "loss": 0.7803, "step": 13380 }, { "epoch": 0.906633240734467, "grad_norm": 6.987429141998291, "learning_rate": 8.270928879457869e-05, "loss": 0.6917, "step": 13381 }, { "epoch": 0.9067009960024391, "grad_norm": 6.5699896812438965, "learning_rate": 8.270791977548087e-05, "loss": 0.9687, "step": 13382 }, { "epoch": 0.9067687512704112, "grad_norm": 6.9197797775268555, "learning_rate": 8.270655075638306e-05, "loss": 0.7054, "step": 13383 }, { "epoch": 0.9068365065383833, "grad_norm": 6.5933380126953125, "learning_rate": 8.270518173728524e-05, "loss": 0.7276, "step": 13384 }, { "epoch": 0.9069042618063554, "grad_norm": 5.359493732452393, "learning_rate": 8.270381271818742e-05, "loss": 0.6566, "step": 13385 }, { "epoch": 0.9069720170743275, "grad_norm": 9.177874565124512, "learning_rate": 8.27024436990896e-05, "loss": 0.7313, "step": 13386 }, { "epoch": 0.9070397723422996, "grad_norm": 7.813451766967773, "learning_rate": 8.270107467999178e-05, "loss": 0.6503, "step": 13387 }, { "epoch": 0.9071075276102717, "grad_norm": 5.215133190155029, "learning_rate": 8.269970566089398e-05, "loss": 0.8036, "step": 13388 }, { "epoch": 0.9071752828782438, "grad_norm": 5.473424434661865, "learning_rate": 8.269833664179616e-05, "loss": 0.7178, "step": 13389 }, { "epoch": 0.9072430381462159, "grad_norm": 7.770848751068115, "learning_rate": 8.269696762269834e-05, "loss": 0.8419, "step": 13390 }, { "epoch": 0.907310793414188, "grad_norm": 6.786417007446289, "learning_rate": 8.269559860360052e-05, "loss": 0.9242, "step": 13391 }, { "epoch": 0.9073785486821601, "grad_norm": 4.775079250335693, "learning_rate": 8.26942295845027e-05, "loss": 0.7514, "step": 13392 }, { "epoch": 0.9074463039501321, "grad_norm": 5.795116901397705, "learning_rate": 8.26928605654049e-05, "loss": 0.8425, "step": 13393 }, { "epoch": 0.9075140592181042, "grad_norm": 7.329837322235107, "learning_rate": 8.269149154630707e-05, "loss": 0.8125, "step": 13394 }, { "epoch": 0.9075818144860763, "grad_norm": 4.981736660003662, "learning_rate": 8.269012252720925e-05, "loss": 0.6766, "step": 13395 }, { "epoch": 0.9076495697540484, "grad_norm": 7.599316120147705, "learning_rate": 8.268875350811143e-05, "loss": 0.6769, "step": 13396 }, { "epoch": 0.9077173250220205, "grad_norm": 5.966447830200195, "learning_rate": 8.268738448901363e-05, "loss": 0.7356, "step": 13397 }, { "epoch": 0.9077850802899925, "grad_norm": 6.569231986999512, "learning_rate": 8.268601546991581e-05, "loss": 0.84, "step": 13398 }, { "epoch": 0.9078528355579646, "grad_norm": 5.759048938751221, "learning_rate": 8.268464645081799e-05, "loss": 0.9159, "step": 13399 }, { "epoch": 0.9079205908259367, "grad_norm": 5.395700931549072, "learning_rate": 8.268327743172017e-05, "loss": 0.662, "step": 13400 }, { "epoch": 0.9079883460939088, "grad_norm": 6.1797099113464355, "learning_rate": 8.268190841262235e-05, "loss": 0.6498, "step": 13401 }, { "epoch": 0.9080561013618809, "grad_norm": 7.686827659606934, "learning_rate": 8.268053939352454e-05, "loss": 0.9687, "step": 13402 }, { "epoch": 0.908123856629853, "grad_norm": 6.104824066162109, "learning_rate": 8.267917037442672e-05, "loss": 0.7184, "step": 13403 }, { "epoch": 0.908191611897825, "grad_norm": 6.225331783294678, "learning_rate": 8.26778013553289e-05, "loss": 0.8403, "step": 13404 }, { "epoch": 0.9082593671657971, "grad_norm": 5.349873065948486, "learning_rate": 8.267643233623108e-05, "loss": 0.8676, "step": 13405 }, { "epoch": 0.9083271224337692, "grad_norm": 5.427150249481201, "learning_rate": 8.267506331713328e-05, "loss": 0.7199, "step": 13406 }, { "epoch": 0.9083948777017413, "grad_norm": 6.000942707061768, "learning_rate": 8.267369429803546e-05, "loss": 0.6516, "step": 13407 }, { "epoch": 0.9084626329697134, "grad_norm": 7.7418532371521, "learning_rate": 8.267232527893764e-05, "loss": 0.8854, "step": 13408 }, { "epoch": 0.9085303882376855, "grad_norm": 6.350762844085693, "learning_rate": 8.267095625983983e-05, "loss": 1.0689, "step": 13409 }, { "epoch": 0.9085981435056576, "grad_norm": 6.675297260284424, "learning_rate": 8.266958724074201e-05, "loss": 1.1803, "step": 13410 }, { "epoch": 0.9086658987736297, "grad_norm": 5.116997718811035, "learning_rate": 8.26682182216442e-05, "loss": 0.6724, "step": 13411 }, { "epoch": 0.9087336540416018, "grad_norm": 6.529482364654541, "learning_rate": 8.266684920254639e-05, "loss": 0.7508, "step": 13412 }, { "epoch": 0.9088014093095739, "grad_norm": 6.559128284454346, "learning_rate": 8.266548018344857e-05, "loss": 0.7786, "step": 13413 }, { "epoch": 0.9088691645775459, "grad_norm": 6.002798080444336, "learning_rate": 8.266411116435075e-05, "loss": 0.613, "step": 13414 }, { "epoch": 0.9089369198455179, "grad_norm": 5.554568767547607, "learning_rate": 8.266274214525293e-05, "loss": 0.7622, "step": 13415 }, { "epoch": 0.90900467511349, "grad_norm": 4.8590521812438965, "learning_rate": 8.266137312615512e-05, "loss": 0.6765, "step": 13416 }, { "epoch": 0.9090724303814621, "grad_norm": 5.818526744842529, "learning_rate": 8.26600041070573e-05, "loss": 0.8034, "step": 13417 }, { "epoch": 0.9091401856494342, "grad_norm": 5.614035606384277, "learning_rate": 8.265863508795948e-05, "loss": 0.6917, "step": 13418 }, { "epoch": 0.9092079409174063, "grad_norm": 4.978684425354004, "learning_rate": 8.265726606886166e-05, "loss": 0.6436, "step": 13419 }, { "epoch": 0.9092756961853784, "grad_norm": 4.747771263122559, "learning_rate": 8.265589704976386e-05, "loss": 0.7033, "step": 13420 }, { "epoch": 0.9093434514533505, "grad_norm": 5.64393424987793, "learning_rate": 8.265452803066604e-05, "loss": 0.6504, "step": 13421 }, { "epoch": 0.9094112067213226, "grad_norm": 4.84307336807251, "learning_rate": 8.265315901156822e-05, "loss": 0.6661, "step": 13422 }, { "epoch": 0.9094789619892947, "grad_norm": 7.957591533660889, "learning_rate": 8.26517899924704e-05, "loss": 0.797, "step": 13423 }, { "epoch": 0.9095467172572668, "grad_norm": 7.481930255889893, "learning_rate": 8.265042097337258e-05, "loss": 0.9757, "step": 13424 }, { "epoch": 0.9096144725252389, "grad_norm": 9.150352478027344, "learning_rate": 8.264905195427477e-05, "loss": 0.6074, "step": 13425 }, { "epoch": 0.909682227793211, "grad_norm": 7.335263252258301, "learning_rate": 8.264768293517695e-05, "loss": 0.7082, "step": 13426 }, { "epoch": 0.909749983061183, "grad_norm": 6.145523548126221, "learning_rate": 8.264631391607913e-05, "loss": 0.7136, "step": 13427 }, { "epoch": 0.9098177383291551, "grad_norm": 8.037848472595215, "learning_rate": 8.264494489698131e-05, "loss": 0.7071, "step": 13428 }, { "epoch": 0.9098854935971272, "grad_norm": 7.532377243041992, "learning_rate": 8.264357587788351e-05, "loss": 0.7352, "step": 13429 }, { "epoch": 0.9099532488650992, "grad_norm": 5.716782093048096, "learning_rate": 8.264220685878569e-05, "loss": 0.6108, "step": 13430 }, { "epoch": 0.9100210041330713, "grad_norm": 5.553226947784424, "learning_rate": 8.264083783968787e-05, "loss": 0.8414, "step": 13431 }, { "epoch": 0.9100887594010434, "grad_norm": 5.714873313903809, "learning_rate": 8.263946882059005e-05, "loss": 0.7346, "step": 13432 }, { "epoch": 0.9101565146690155, "grad_norm": 8.493986129760742, "learning_rate": 8.263809980149223e-05, "loss": 0.7027, "step": 13433 }, { "epoch": 0.9102242699369876, "grad_norm": 4.617614269256592, "learning_rate": 8.263673078239442e-05, "loss": 0.7141, "step": 13434 }, { "epoch": 0.9102920252049597, "grad_norm": 5.8947649002075195, "learning_rate": 8.26353617632966e-05, "loss": 0.8719, "step": 13435 }, { "epoch": 0.9103597804729318, "grad_norm": 7.168681621551514, "learning_rate": 8.263399274419878e-05, "loss": 1.0277, "step": 13436 }, { "epoch": 0.9104275357409038, "grad_norm": 7.014023303985596, "learning_rate": 8.263262372510096e-05, "loss": 0.9736, "step": 13437 }, { "epoch": 0.9104952910088759, "grad_norm": 6.311854362487793, "learning_rate": 8.263125470600314e-05, "loss": 0.7287, "step": 13438 }, { "epoch": 0.910563046276848, "grad_norm": 6.3631486892700195, "learning_rate": 8.262988568690534e-05, "loss": 0.8432, "step": 13439 }, { "epoch": 0.9106308015448201, "grad_norm": 7.433747291564941, "learning_rate": 8.262851666780752e-05, "loss": 0.8998, "step": 13440 }, { "epoch": 0.9106985568127922, "grad_norm": 6.583968639373779, "learning_rate": 8.26271476487097e-05, "loss": 0.7926, "step": 13441 }, { "epoch": 0.9107663120807643, "grad_norm": 5.414675235748291, "learning_rate": 8.262577862961188e-05, "loss": 0.6653, "step": 13442 }, { "epoch": 0.9108340673487364, "grad_norm": 6.068636417388916, "learning_rate": 8.262440961051407e-05, "loss": 0.7299, "step": 13443 }, { "epoch": 0.9109018226167085, "grad_norm": 6.4102277755737305, "learning_rate": 8.262304059141625e-05, "loss": 0.6988, "step": 13444 }, { "epoch": 0.9109695778846806, "grad_norm": 7.261764049530029, "learning_rate": 8.262167157231843e-05, "loss": 0.7473, "step": 13445 }, { "epoch": 0.9110373331526527, "grad_norm": 6.129340171813965, "learning_rate": 8.262030255322061e-05, "loss": 0.7437, "step": 13446 }, { "epoch": 0.9111050884206247, "grad_norm": 7.05224609375, "learning_rate": 8.26189335341228e-05, "loss": 0.8106, "step": 13447 }, { "epoch": 0.9111728436885967, "grad_norm": 6.788548469543457, "learning_rate": 8.261756451502499e-05, "loss": 0.8289, "step": 13448 }, { "epoch": 0.9112405989565688, "grad_norm": 6.397700786590576, "learning_rate": 8.261619549592717e-05, "loss": 0.7633, "step": 13449 }, { "epoch": 0.9113083542245409, "grad_norm": 6.438706874847412, "learning_rate": 8.261482647682935e-05, "loss": 1.0421, "step": 13450 }, { "epoch": 0.911376109492513, "grad_norm": 5.280536651611328, "learning_rate": 8.261345745773153e-05, "loss": 0.7404, "step": 13451 }, { "epoch": 0.9114438647604851, "grad_norm": 5.514378070831299, "learning_rate": 8.261208843863372e-05, "loss": 0.6536, "step": 13452 }, { "epoch": 0.9115116200284572, "grad_norm": 6.182071685791016, "learning_rate": 8.26107194195359e-05, "loss": 1.0355, "step": 13453 }, { "epoch": 0.9115793752964293, "grad_norm": 5.280673503875732, "learning_rate": 8.260935040043808e-05, "loss": 0.6193, "step": 13454 }, { "epoch": 0.9116471305644014, "grad_norm": 6.420233726501465, "learning_rate": 8.260798138134028e-05, "loss": 0.7545, "step": 13455 }, { "epoch": 0.9117148858323735, "grad_norm": 4.983266353607178, "learning_rate": 8.260661236224246e-05, "loss": 0.7821, "step": 13456 }, { "epoch": 0.9117826411003456, "grad_norm": 7.941768646240234, "learning_rate": 8.260524334314464e-05, "loss": 0.851, "step": 13457 }, { "epoch": 0.9118503963683177, "grad_norm": 6.576548099517822, "learning_rate": 8.260387432404683e-05, "loss": 1.0445, "step": 13458 }, { "epoch": 0.9119181516362898, "grad_norm": 4.9454121589660645, "learning_rate": 8.260250530494901e-05, "loss": 0.7082, "step": 13459 }, { "epoch": 0.9119859069042618, "grad_norm": 5.048241138458252, "learning_rate": 8.260113628585119e-05, "loss": 0.6747, "step": 13460 }, { "epoch": 0.9120536621722339, "grad_norm": 6.229146957397461, "learning_rate": 8.259976726675339e-05, "loss": 0.9753, "step": 13461 }, { "epoch": 0.912121417440206, "grad_norm": 7.635250568389893, "learning_rate": 8.259839824765557e-05, "loss": 0.7616, "step": 13462 }, { "epoch": 0.912189172708178, "grad_norm": 5.580092430114746, "learning_rate": 8.259702922855775e-05, "loss": 0.5658, "step": 13463 }, { "epoch": 0.9122569279761501, "grad_norm": 5.2181715965271, "learning_rate": 8.259566020945993e-05, "loss": 0.7053, "step": 13464 }, { "epoch": 0.9123246832441222, "grad_norm": 6.143484592437744, "learning_rate": 8.259429119036211e-05, "loss": 0.6313, "step": 13465 }, { "epoch": 0.9123924385120943, "grad_norm": 5.8051323890686035, "learning_rate": 8.25929221712643e-05, "loss": 0.7655, "step": 13466 }, { "epoch": 0.9124601937800664, "grad_norm": 5.798500061035156, "learning_rate": 8.259155315216648e-05, "loss": 0.7226, "step": 13467 }, { "epoch": 0.9125279490480385, "grad_norm": 5.753519535064697, "learning_rate": 8.259018413306866e-05, "loss": 0.7487, "step": 13468 }, { "epoch": 0.9125957043160106, "grad_norm": 5.924856662750244, "learning_rate": 8.258881511397084e-05, "loss": 0.821, "step": 13469 }, { "epoch": 0.9126634595839827, "grad_norm": 5.954171180725098, "learning_rate": 8.258744609487302e-05, "loss": 0.8115, "step": 13470 }, { "epoch": 0.9127312148519547, "grad_norm": 6.430693626403809, "learning_rate": 8.258607707577522e-05, "loss": 0.7691, "step": 13471 }, { "epoch": 0.9127989701199268, "grad_norm": 4.200928688049316, "learning_rate": 8.25847080566774e-05, "loss": 0.6181, "step": 13472 }, { "epoch": 0.9128667253878989, "grad_norm": 6.997094631195068, "learning_rate": 8.258333903757958e-05, "loss": 1.0318, "step": 13473 }, { "epoch": 0.912934480655871, "grad_norm": 6.03936243057251, "learning_rate": 8.258197001848176e-05, "loss": 0.8103, "step": 13474 }, { "epoch": 0.9130022359238431, "grad_norm": 8.341856002807617, "learning_rate": 8.258060099938395e-05, "loss": 0.8473, "step": 13475 }, { "epoch": 0.9130699911918152, "grad_norm": 7.553666591644287, "learning_rate": 8.257923198028613e-05, "loss": 0.8751, "step": 13476 }, { "epoch": 0.9131377464597873, "grad_norm": 4.391775608062744, "learning_rate": 8.257786296118831e-05, "loss": 0.6136, "step": 13477 }, { "epoch": 0.9132055017277594, "grad_norm": 5.554635047912598, "learning_rate": 8.25764939420905e-05, "loss": 0.584, "step": 13478 }, { "epoch": 0.9132732569957314, "grad_norm": 4.761404037475586, "learning_rate": 8.257512492299267e-05, "loss": 0.7827, "step": 13479 }, { "epoch": 0.9133410122637035, "grad_norm": 5.858761310577393, "learning_rate": 8.257375590389487e-05, "loss": 0.7184, "step": 13480 }, { "epoch": 0.9134087675316755, "grad_norm": 5.618086338043213, "learning_rate": 8.257238688479705e-05, "loss": 0.6523, "step": 13481 }, { "epoch": 0.9134765227996476, "grad_norm": 5.3456854820251465, "learning_rate": 8.257101786569923e-05, "loss": 0.7383, "step": 13482 }, { "epoch": 0.9135442780676197, "grad_norm": 7.581971645355225, "learning_rate": 8.256964884660141e-05, "loss": 0.8992, "step": 13483 }, { "epoch": 0.9136120333355918, "grad_norm": 7.308854579925537, "learning_rate": 8.25682798275036e-05, "loss": 0.8322, "step": 13484 }, { "epoch": 0.9136797886035639, "grad_norm": 6.603922367095947, "learning_rate": 8.256691080840578e-05, "loss": 0.7841, "step": 13485 }, { "epoch": 0.913747543871536, "grad_norm": 5.805631637573242, "learning_rate": 8.256554178930796e-05, "loss": 0.6594, "step": 13486 }, { "epoch": 0.9138152991395081, "grad_norm": 5.206735134124756, "learning_rate": 8.256417277021014e-05, "loss": 0.6513, "step": 13487 }, { "epoch": 0.9138830544074802, "grad_norm": 7.223725318908691, "learning_rate": 8.256280375111232e-05, "loss": 0.768, "step": 13488 }, { "epoch": 0.9139508096754523, "grad_norm": 5.860501766204834, "learning_rate": 8.256143473201452e-05, "loss": 0.8824, "step": 13489 }, { "epoch": 0.9140185649434244, "grad_norm": 7.430714130401611, "learning_rate": 8.25600657129167e-05, "loss": 0.8971, "step": 13490 }, { "epoch": 0.9140863202113965, "grad_norm": 6.006752014160156, "learning_rate": 8.255869669381888e-05, "loss": 0.7208, "step": 13491 }, { "epoch": 0.9141540754793686, "grad_norm": 6.061511993408203, "learning_rate": 8.255732767472106e-05, "loss": 0.7222, "step": 13492 }, { "epoch": 0.9142218307473406, "grad_norm": 5.890130043029785, "learning_rate": 8.255595865562324e-05, "loss": 0.7764, "step": 13493 }, { "epoch": 0.9142895860153127, "grad_norm": 6.152371883392334, "learning_rate": 8.255458963652543e-05, "loss": 0.896, "step": 13494 }, { "epoch": 0.9143573412832848, "grad_norm": 5.317947864532471, "learning_rate": 8.255322061742761e-05, "loss": 0.7058, "step": 13495 }, { "epoch": 0.9144250965512568, "grad_norm": 5.516234397888184, "learning_rate": 8.25518515983298e-05, "loss": 0.7669, "step": 13496 }, { "epoch": 0.9144928518192289, "grad_norm": 5.327831268310547, "learning_rate": 8.255048257923197e-05, "loss": 0.6878, "step": 13497 }, { "epoch": 0.914560607087201, "grad_norm": 6.158792495727539, "learning_rate": 8.254911356013417e-05, "loss": 0.7393, "step": 13498 }, { "epoch": 0.9146283623551731, "grad_norm": 5.450939655303955, "learning_rate": 8.254774454103635e-05, "loss": 0.7131, "step": 13499 }, { "epoch": 0.9146961176231452, "grad_norm": 5.03593635559082, "learning_rate": 8.254637552193853e-05, "loss": 0.5819, "step": 13500 }, { "epoch": 0.9147638728911173, "grad_norm": 7.179828643798828, "learning_rate": 8.254500650284072e-05, "loss": 0.8142, "step": 13501 }, { "epoch": 0.9148316281590894, "grad_norm": 7.470744609832764, "learning_rate": 8.25436374837429e-05, "loss": 0.7455, "step": 13502 }, { "epoch": 0.9148993834270615, "grad_norm": 6.138060569763184, "learning_rate": 8.254226846464508e-05, "loss": 0.7474, "step": 13503 }, { "epoch": 0.9149671386950335, "grad_norm": 6.90313720703125, "learning_rate": 8.254089944554728e-05, "loss": 0.6724, "step": 13504 }, { "epoch": 0.9150348939630056, "grad_norm": 6.103466033935547, "learning_rate": 8.253953042644946e-05, "loss": 0.7163, "step": 13505 }, { "epoch": 0.9151026492309777, "grad_norm": 7.782615661621094, "learning_rate": 8.253816140735164e-05, "loss": 0.9758, "step": 13506 }, { "epoch": 0.9151704044989498, "grad_norm": 6.611595630645752, "learning_rate": 8.253679238825383e-05, "loss": 0.7287, "step": 13507 }, { "epoch": 0.9152381597669219, "grad_norm": 7.578495502471924, "learning_rate": 8.253542336915601e-05, "loss": 0.8566, "step": 13508 }, { "epoch": 0.915305915034894, "grad_norm": 5.957526206970215, "learning_rate": 8.253405435005819e-05, "loss": 0.712, "step": 13509 }, { "epoch": 0.9153736703028661, "grad_norm": 7.773584365844727, "learning_rate": 8.253268533096037e-05, "loss": 0.8836, "step": 13510 }, { "epoch": 0.9154414255708382, "grad_norm": 6.391456604003906, "learning_rate": 8.253131631186255e-05, "loss": 0.6992, "step": 13511 }, { "epoch": 0.9155091808388102, "grad_norm": 6.0647687911987305, "learning_rate": 8.252994729276475e-05, "loss": 0.6445, "step": 13512 }, { "epoch": 0.9155769361067823, "grad_norm": 5.610140800476074, "learning_rate": 8.252857827366693e-05, "loss": 0.7257, "step": 13513 }, { "epoch": 0.9156446913747543, "grad_norm": 4.631004810333252, "learning_rate": 8.252720925456911e-05, "loss": 0.7086, "step": 13514 }, { "epoch": 0.9157124466427264, "grad_norm": 5.651133060455322, "learning_rate": 8.252584023547129e-05, "loss": 1.0037, "step": 13515 }, { "epoch": 0.9157802019106985, "grad_norm": 6.308481693267822, "learning_rate": 8.252447121637348e-05, "loss": 0.8149, "step": 13516 }, { "epoch": 0.9158479571786706, "grad_norm": 7.645565509796143, "learning_rate": 8.252310219727566e-05, "loss": 0.698, "step": 13517 }, { "epoch": 0.9159157124466427, "grad_norm": 6.530562400817871, "learning_rate": 8.252173317817784e-05, "loss": 0.6629, "step": 13518 }, { "epoch": 0.9159834677146148, "grad_norm": 6.290469169616699, "learning_rate": 8.252036415908002e-05, "loss": 0.9803, "step": 13519 }, { "epoch": 0.9160512229825869, "grad_norm": 6.369983673095703, "learning_rate": 8.25189951399822e-05, "loss": 0.8061, "step": 13520 }, { "epoch": 0.916118978250559, "grad_norm": 5.648921489715576, "learning_rate": 8.25176261208844e-05, "loss": 0.8378, "step": 13521 }, { "epoch": 0.9161867335185311, "grad_norm": 5.623139381408691, "learning_rate": 8.251625710178658e-05, "loss": 0.9232, "step": 13522 }, { "epoch": 0.9162544887865032, "grad_norm": 7.031765460968018, "learning_rate": 8.251488808268876e-05, "loss": 0.9497, "step": 13523 }, { "epoch": 0.9163222440544753, "grad_norm": 5.612135410308838, "learning_rate": 8.251351906359094e-05, "loss": 0.931, "step": 13524 }, { "epoch": 0.9163899993224474, "grad_norm": 4.216965675354004, "learning_rate": 8.251215004449312e-05, "loss": 0.716, "step": 13525 }, { "epoch": 0.9164577545904194, "grad_norm": 5.5640645027160645, "learning_rate": 8.251078102539531e-05, "loss": 0.6835, "step": 13526 }, { "epoch": 0.9165255098583915, "grad_norm": 4.81538200378418, "learning_rate": 8.250941200629749e-05, "loss": 0.7627, "step": 13527 }, { "epoch": 0.9165932651263635, "grad_norm": 5.384982109069824, "learning_rate": 8.250804298719967e-05, "loss": 0.5343, "step": 13528 }, { "epoch": 0.9166610203943356, "grad_norm": 6.007334232330322, "learning_rate": 8.250667396810185e-05, "loss": 0.7204, "step": 13529 }, { "epoch": 0.9167287756623077, "grad_norm": 6.315242290496826, "learning_rate": 8.250530494900405e-05, "loss": 0.7633, "step": 13530 }, { "epoch": 0.9167965309302798, "grad_norm": 5.613879203796387, "learning_rate": 8.250393592990623e-05, "loss": 0.6349, "step": 13531 }, { "epoch": 0.9168642861982519, "grad_norm": 6.26859712600708, "learning_rate": 8.250256691080841e-05, "loss": 0.7504, "step": 13532 }, { "epoch": 0.916932041466224, "grad_norm": 7.103095054626465, "learning_rate": 8.250119789171059e-05, "loss": 1.1573, "step": 13533 }, { "epoch": 0.9169997967341961, "grad_norm": 5.007758617401123, "learning_rate": 8.249982887261277e-05, "loss": 0.7824, "step": 13534 }, { "epoch": 0.9170675520021682, "grad_norm": 7.608954429626465, "learning_rate": 8.249845985351496e-05, "loss": 0.9533, "step": 13535 }, { "epoch": 0.9171353072701403, "grad_norm": 6.751156806945801, "learning_rate": 8.249709083441714e-05, "loss": 0.6626, "step": 13536 }, { "epoch": 0.9172030625381123, "grad_norm": 6.883885860443115, "learning_rate": 8.249572181531932e-05, "loss": 0.7154, "step": 13537 }, { "epoch": 0.9172708178060844, "grad_norm": 5.1738481521606445, "learning_rate": 8.24943527962215e-05, "loss": 0.6065, "step": 13538 }, { "epoch": 0.9173385730740565, "grad_norm": 5.373385906219482, "learning_rate": 8.24929837771237e-05, "loss": 0.7061, "step": 13539 }, { "epoch": 0.9174063283420286, "grad_norm": 6.883655548095703, "learning_rate": 8.249161475802588e-05, "loss": 1.0318, "step": 13540 }, { "epoch": 0.9174740836100007, "grad_norm": 6.240233898162842, "learning_rate": 8.249024573892806e-05, "loss": 0.7813, "step": 13541 }, { "epoch": 0.9175418388779728, "grad_norm": 3.9278335571289062, "learning_rate": 8.248887671983024e-05, "loss": 0.5349, "step": 13542 }, { "epoch": 0.9176095941459449, "grad_norm": 4.724002838134766, "learning_rate": 8.248750770073242e-05, "loss": 0.6608, "step": 13543 }, { "epoch": 0.917677349413917, "grad_norm": 6.562617778778076, "learning_rate": 8.248613868163461e-05, "loss": 0.7865, "step": 13544 }, { "epoch": 0.917745104681889, "grad_norm": 5.862000942230225, "learning_rate": 8.248476966253679e-05, "loss": 0.7887, "step": 13545 }, { "epoch": 0.9178128599498611, "grad_norm": 5.171735763549805, "learning_rate": 8.248340064343897e-05, "loss": 0.5521, "step": 13546 }, { "epoch": 0.9178806152178332, "grad_norm": 6.028750419616699, "learning_rate": 8.248203162434117e-05, "loss": 0.7401, "step": 13547 }, { "epoch": 0.9179483704858052, "grad_norm": 6.340005397796631, "learning_rate": 8.248066260524335e-05, "loss": 0.8042, "step": 13548 }, { "epoch": 0.9180161257537773, "grad_norm": 6.127305030822754, "learning_rate": 8.247929358614553e-05, "loss": 0.5721, "step": 13549 }, { "epoch": 0.9180838810217494, "grad_norm": 8.631043434143066, "learning_rate": 8.247792456704772e-05, "loss": 0.9929, "step": 13550 }, { "epoch": 0.9181516362897215, "grad_norm": 7.284387111663818, "learning_rate": 8.24765555479499e-05, "loss": 0.9029, "step": 13551 }, { "epoch": 0.9182193915576936, "grad_norm": 4.782154560089111, "learning_rate": 8.247518652885208e-05, "loss": 0.6233, "step": 13552 }, { "epoch": 0.9182871468256657, "grad_norm": 7.067320346832275, "learning_rate": 8.247381750975428e-05, "loss": 0.8092, "step": 13553 }, { "epoch": 0.9183549020936378, "grad_norm": 6.034327983856201, "learning_rate": 8.247244849065646e-05, "loss": 0.8534, "step": 13554 }, { "epoch": 0.9184226573616099, "grad_norm": 7.13966178894043, "learning_rate": 8.247107947155864e-05, "loss": 0.8442, "step": 13555 }, { "epoch": 0.918490412629582, "grad_norm": 6.099913120269775, "learning_rate": 8.246971045246082e-05, "loss": 0.862, "step": 13556 }, { "epoch": 0.9185581678975541, "grad_norm": 5.149213790893555, "learning_rate": 8.2468341433363e-05, "loss": 0.6381, "step": 13557 }, { "epoch": 0.9186259231655262, "grad_norm": 5.378545761108398, "learning_rate": 8.246697241426519e-05, "loss": 0.8651, "step": 13558 }, { "epoch": 0.9186936784334983, "grad_norm": 5.342377185821533, "learning_rate": 8.246560339516737e-05, "loss": 0.7287, "step": 13559 }, { "epoch": 0.9187614337014703, "grad_norm": 6.145232200622559, "learning_rate": 8.246423437606955e-05, "loss": 0.9639, "step": 13560 }, { "epoch": 0.9188291889694423, "grad_norm": 5.195524215698242, "learning_rate": 8.246286535697173e-05, "loss": 0.5293, "step": 13561 }, { "epoch": 0.9188969442374144, "grad_norm": 5.601246356964111, "learning_rate": 8.246149633787393e-05, "loss": 0.808, "step": 13562 }, { "epoch": 0.9189646995053865, "grad_norm": 7.764710426330566, "learning_rate": 8.246012731877611e-05, "loss": 0.6616, "step": 13563 }, { "epoch": 0.9190324547733586, "grad_norm": 5.822594165802002, "learning_rate": 8.245875829967829e-05, "loss": 0.6981, "step": 13564 }, { "epoch": 0.9191002100413307, "grad_norm": 6.025305271148682, "learning_rate": 8.245738928058047e-05, "loss": 0.8169, "step": 13565 }, { "epoch": 0.9191679653093028, "grad_norm": 5.495863437652588, "learning_rate": 8.245602026148265e-05, "loss": 0.9271, "step": 13566 }, { "epoch": 0.9192357205772749, "grad_norm": 6.753162384033203, "learning_rate": 8.245465124238484e-05, "loss": 0.5892, "step": 13567 }, { "epoch": 0.919303475845247, "grad_norm": 6.381758213043213, "learning_rate": 8.245328222328702e-05, "loss": 0.9818, "step": 13568 }, { "epoch": 0.9193712311132191, "grad_norm": 6.070631980895996, "learning_rate": 8.24519132041892e-05, "loss": 0.7322, "step": 13569 }, { "epoch": 0.9194389863811911, "grad_norm": 4.506857872009277, "learning_rate": 8.245054418509138e-05, "loss": 0.6529, "step": 13570 }, { "epoch": 0.9195067416491632, "grad_norm": 6.378490924835205, "learning_rate": 8.244917516599356e-05, "loss": 0.8146, "step": 13571 }, { "epoch": 0.9195744969171353, "grad_norm": 4.585328102111816, "learning_rate": 8.244780614689576e-05, "loss": 0.6215, "step": 13572 }, { "epoch": 0.9196422521851074, "grad_norm": 6.161722660064697, "learning_rate": 8.244643712779794e-05, "loss": 0.7576, "step": 13573 }, { "epoch": 0.9197100074530795, "grad_norm": 5.033822536468506, "learning_rate": 8.244506810870012e-05, "loss": 0.625, "step": 13574 }, { "epoch": 0.9197777627210516, "grad_norm": 4.940533638000488, "learning_rate": 8.24436990896023e-05, "loss": 0.5779, "step": 13575 }, { "epoch": 0.9198455179890237, "grad_norm": 5.566405296325684, "learning_rate": 8.244233007050449e-05, "loss": 0.7108, "step": 13576 }, { "epoch": 0.9199132732569957, "grad_norm": 5.4210896492004395, "learning_rate": 8.244096105140667e-05, "loss": 0.6497, "step": 13577 }, { "epoch": 0.9199810285249678, "grad_norm": 8.807047843933105, "learning_rate": 8.243959203230885e-05, "loss": 0.8228, "step": 13578 }, { "epoch": 0.9200487837929399, "grad_norm": 6.093658447265625, "learning_rate": 8.243822301321103e-05, "loss": 0.99, "step": 13579 }, { "epoch": 0.920116539060912, "grad_norm": 7.17921781539917, "learning_rate": 8.243685399411321e-05, "loss": 0.9162, "step": 13580 }, { "epoch": 0.920184294328884, "grad_norm": 5.841926574707031, "learning_rate": 8.243548497501541e-05, "loss": 0.8073, "step": 13581 }, { "epoch": 0.9202520495968561, "grad_norm": 6.103463649749756, "learning_rate": 8.243411595591759e-05, "loss": 0.7466, "step": 13582 }, { "epoch": 0.9203198048648282, "grad_norm": 7.478387355804443, "learning_rate": 8.243274693681977e-05, "loss": 0.5682, "step": 13583 }, { "epoch": 0.9203875601328003, "grad_norm": 7.954921245574951, "learning_rate": 8.243137791772195e-05, "loss": 0.7956, "step": 13584 }, { "epoch": 0.9204553154007724, "grad_norm": 5.1390509605407715, "learning_rate": 8.243000889862414e-05, "loss": 0.7722, "step": 13585 }, { "epoch": 0.9205230706687445, "grad_norm": 5.33015251159668, "learning_rate": 8.242863987952632e-05, "loss": 0.8534, "step": 13586 }, { "epoch": 0.9205908259367166, "grad_norm": 6.494523525238037, "learning_rate": 8.24272708604285e-05, "loss": 0.7041, "step": 13587 }, { "epoch": 0.9206585812046887, "grad_norm": 5.0413055419921875, "learning_rate": 8.242590184133068e-05, "loss": 0.8746, "step": 13588 }, { "epoch": 0.9207263364726608, "grad_norm": 5.492376327514648, "learning_rate": 8.242453282223286e-05, "loss": 0.7865, "step": 13589 }, { "epoch": 0.9207940917406329, "grad_norm": 4.8724870681762695, "learning_rate": 8.242316380313506e-05, "loss": 0.7237, "step": 13590 }, { "epoch": 0.920861847008605, "grad_norm": 7.61802339553833, "learning_rate": 8.242179478403724e-05, "loss": 1.0021, "step": 13591 }, { "epoch": 0.920929602276577, "grad_norm": 7.968650817871094, "learning_rate": 8.242042576493942e-05, "loss": 0.8771, "step": 13592 }, { "epoch": 0.920997357544549, "grad_norm": 4.494787216186523, "learning_rate": 8.24190567458416e-05, "loss": 0.6646, "step": 13593 }, { "epoch": 0.9210651128125211, "grad_norm": 7.044222831726074, "learning_rate": 8.241768772674379e-05, "loss": 0.9055, "step": 13594 }, { "epoch": 0.9211328680804932, "grad_norm": 6.417403697967529, "learning_rate": 8.241631870764597e-05, "loss": 0.6975, "step": 13595 }, { "epoch": 0.9212006233484653, "grad_norm": 7.259861469268799, "learning_rate": 8.241494968854815e-05, "loss": 0.736, "step": 13596 }, { "epoch": 0.9212683786164374, "grad_norm": 7.667874813079834, "learning_rate": 8.241358066945035e-05, "loss": 0.741, "step": 13597 }, { "epoch": 0.9213361338844095, "grad_norm": 5.993992805480957, "learning_rate": 8.241221165035253e-05, "loss": 0.74, "step": 13598 }, { "epoch": 0.9214038891523816, "grad_norm": 6.172394275665283, "learning_rate": 8.241084263125471e-05, "loss": 0.6285, "step": 13599 }, { "epoch": 0.9214716444203537, "grad_norm": 5.315210342407227, "learning_rate": 8.24094736121569e-05, "loss": 0.8465, "step": 13600 }, { "epoch": 0.9215393996883258, "grad_norm": 7.3281683921813965, "learning_rate": 8.240810459305908e-05, "loss": 0.8761, "step": 13601 }, { "epoch": 0.9216071549562979, "grad_norm": 4.546838760375977, "learning_rate": 8.240673557396126e-05, "loss": 0.5582, "step": 13602 }, { "epoch": 0.92167491022427, "grad_norm": 10.452659606933594, "learning_rate": 8.240536655486344e-05, "loss": 0.7345, "step": 13603 }, { "epoch": 0.921742665492242, "grad_norm": 5.254201412200928, "learning_rate": 8.240399753576564e-05, "loss": 0.6984, "step": 13604 }, { "epoch": 0.9218104207602141, "grad_norm": 7.967467784881592, "learning_rate": 8.240262851666782e-05, "loss": 0.775, "step": 13605 }, { "epoch": 0.9218781760281862, "grad_norm": 8.285964012145996, "learning_rate": 8.240125949757e-05, "loss": 0.7648, "step": 13606 }, { "epoch": 0.9219459312961583, "grad_norm": 5.65543794631958, "learning_rate": 8.239989047847218e-05, "loss": 0.6109, "step": 13607 }, { "epoch": 0.9220136865641304, "grad_norm": 5.535354137420654, "learning_rate": 8.239852145937437e-05, "loss": 0.5679, "step": 13608 }, { "epoch": 0.9220814418321025, "grad_norm": 4.902824878692627, "learning_rate": 8.239715244027655e-05, "loss": 0.574, "step": 13609 }, { "epoch": 0.9221491971000745, "grad_norm": 5.251827716827393, "learning_rate": 8.239578342117873e-05, "loss": 0.5183, "step": 13610 }, { "epoch": 0.9222169523680466, "grad_norm": 4.334627151489258, "learning_rate": 8.239441440208091e-05, "loss": 0.6654, "step": 13611 }, { "epoch": 0.9222847076360187, "grad_norm": 5.845222473144531, "learning_rate": 8.239304538298309e-05, "loss": 0.7711, "step": 13612 }, { "epoch": 0.9223524629039908, "grad_norm": 4.581849575042725, "learning_rate": 8.239167636388529e-05, "loss": 0.6276, "step": 13613 }, { "epoch": 0.9224202181719628, "grad_norm": 6.175132751464844, "learning_rate": 8.239030734478747e-05, "loss": 0.6509, "step": 13614 }, { "epoch": 0.9224879734399349, "grad_norm": 5.571422100067139, "learning_rate": 8.238893832568965e-05, "loss": 0.874, "step": 13615 }, { "epoch": 0.922555728707907, "grad_norm": 6.991871356964111, "learning_rate": 8.238756930659183e-05, "loss": 0.7676, "step": 13616 }, { "epoch": 0.9226234839758791, "grad_norm": 7.442737102508545, "learning_rate": 8.238620028749402e-05, "loss": 0.9373, "step": 13617 }, { "epoch": 0.9226912392438512, "grad_norm": 6.320655822753906, "learning_rate": 8.23848312683962e-05, "loss": 0.6271, "step": 13618 }, { "epoch": 0.9227589945118233, "grad_norm": 4.894016265869141, "learning_rate": 8.238346224929838e-05, "loss": 0.7149, "step": 13619 }, { "epoch": 0.9228267497797954, "grad_norm": 6.294023036956787, "learning_rate": 8.238209323020056e-05, "loss": 0.6531, "step": 13620 }, { "epoch": 0.9228945050477675, "grad_norm": 8.631871223449707, "learning_rate": 8.238072421110274e-05, "loss": 0.6823, "step": 13621 }, { "epoch": 0.9229622603157396, "grad_norm": 9.09997844696045, "learning_rate": 8.237935519200494e-05, "loss": 0.8513, "step": 13622 }, { "epoch": 0.9230300155837117, "grad_norm": 5.959611892700195, "learning_rate": 8.237798617290712e-05, "loss": 0.8519, "step": 13623 }, { "epoch": 0.9230977708516838, "grad_norm": 7.821328163146973, "learning_rate": 8.23766171538093e-05, "loss": 0.7974, "step": 13624 }, { "epoch": 0.9231655261196559, "grad_norm": 5.780052185058594, "learning_rate": 8.237524813471148e-05, "loss": 0.8274, "step": 13625 }, { "epoch": 0.9232332813876278, "grad_norm": 5.173895359039307, "learning_rate": 8.237387911561366e-05, "loss": 0.6296, "step": 13626 }, { "epoch": 0.9233010366555999, "grad_norm": 6.263997554779053, "learning_rate": 8.237251009651585e-05, "loss": 0.8015, "step": 13627 }, { "epoch": 0.923368791923572, "grad_norm": 5.063817501068115, "learning_rate": 8.237114107741803e-05, "loss": 0.8485, "step": 13628 }, { "epoch": 0.9234365471915441, "grad_norm": 5.420182704925537, "learning_rate": 8.236977205832021e-05, "loss": 0.8133, "step": 13629 }, { "epoch": 0.9235043024595162, "grad_norm": 6.21091890335083, "learning_rate": 8.236840303922239e-05, "loss": 0.9033, "step": 13630 }, { "epoch": 0.9235720577274883, "grad_norm": 5.321242332458496, "learning_rate": 8.236703402012459e-05, "loss": 0.7809, "step": 13631 }, { "epoch": 0.9236398129954604, "grad_norm": 4.922853469848633, "learning_rate": 8.236566500102677e-05, "loss": 0.6684, "step": 13632 }, { "epoch": 0.9237075682634325, "grad_norm": 7.8503193855285645, "learning_rate": 8.236429598192895e-05, "loss": 0.9825, "step": 13633 }, { "epoch": 0.9237753235314046, "grad_norm": 6.110293388366699, "learning_rate": 8.236292696283113e-05, "loss": 0.7034, "step": 13634 }, { "epoch": 0.9238430787993767, "grad_norm": 7.469491481781006, "learning_rate": 8.236155794373331e-05, "loss": 0.9736, "step": 13635 }, { "epoch": 0.9239108340673488, "grad_norm": 6.1995086669921875, "learning_rate": 8.23601889246355e-05, "loss": 0.8417, "step": 13636 }, { "epoch": 0.9239785893353208, "grad_norm": 4.243618011474609, "learning_rate": 8.235881990553768e-05, "loss": 0.5993, "step": 13637 }, { "epoch": 0.9240463446032929, "grad_norm": 5.490956783294678, "learning_rate": 8.235745088643986e-05, "loss": 0.7956, "step": 13638 }, { "epoch": 0.924114099871265, "grad_norm": 6.069158554077148, "learning_rate": 8.235608186734204e-05, "loss": 0.6775, "step": 13639 }, { "epoch": 0.9241818551392371, "grad_norm": 6.96622896194458, "learning_rate": 8.235471284824424e-05, "loss": 0.8268, "step": 13640 }, { "epoch": 0.9242496104072092, "grad_norm": 8.088959693908691, "learning_rate": 8.235334382914642e-05, "loss": 0.9065, "step": 13641 }, { "epoch": 0.9243173656751812, "grad_norm": 5.17006778717041, "learning_rate": 8.23519748100486e-05, "loss": 0.5922, "step": 13642 }, { "epoch": 0.9243851209431533, "grad_norm": 5.012120246887207, "learning_rate": 8.235060579095079e-05, "loss": 0.6376, "step": 13643 }, { "epoch": 0.9244528762111254, "grad_norm": 5.864466667175293, "learning_rate": 8.234923677185297e-05, "loss": 0.6989, "step": 13644 }, { "epoch": 0.9245206314790975, "grad_norm": 5.791763782501221, "learning_rate": 8.234786775275515e-05, "loss": 0.818, "step": 13645 }, { "epoch": 0.9245883867470696, "grad_norm": 5.13279390335083, "learning_rate": 8.234649873365735e-05, "loss": 0.6238, "step": 13646 }, { "epoch": 0.9246561420150416, "grad_norm": 6.566293716430664, "learning_rate": 8.234512971455953e-05, "loss": 0.7528, "step": 13647 }, { "epoch": 0.9247238972830137, "grad_norm": 4.981358528137207, "learning_rate": 8.234376069546171e-05, "loss": 0.713, "step": 13648 }, { "epoch": 0.9247916525509858, "grad_norm": 5.388360500335693, "learning_rate": 8.23423916763639e-05, "loss": 0.8413, "step": 13649 }, { "epoch": 0.9248594078189579, "grad_norm": 5.867083549499512, "learning_rate": 8.234102265726608e-05, "loss": 0.7376, "step": 13650 }, { "epoch": 0.92492716308693, "grad_norm": 6.004108428955078, "learning_rate": 8.233965363816826e-05, "loss": 0.7179, "step": 13651 }, { "epoch": 0.9249949183549021, "grad_norm": 5.956449031829834, "learning_rate": 8.233828461907044e-05, "loss": 0.856, "step": 13652 }, { "epoch": 0.9250626736228742, "grad_norm": 4.9217209815979, "learning_rate": 8.233691559997262e-05, "loss": 0.7254, "step": 13653 }, { "epoch": 0.9251304288908463, "grad_norm": 5.971928596496582, "learning_rate": 8.233554658087482e-05, "loss": 0.8707, "step": 13654 }, { "epoch": 0.9251981841588184, "grad_norm": 5.525825500488281, "learning_rate": 8.2334177561777e-05, "loss": 0.6256, "step": 13655 }, { "epoch": 0.9252659394267905, "grad_norm": 6.858259201049805, "learning_rate": 8.233280854267918e-05, "loss": 0.771, "step": 13656 }, { "epoch": 0.9253336946947626, "grad_norm": 5.588975429534912, "learning_rate": 8.233143952358136e-05, "loss": 0.7937, "step": 13657 }, { "epoch": 0.9254014499627347, "grad_norm": 5.949060916900635, "learning_rate": 8.233007050448354e-05, "loss": 0.7975, "step": 13658 }, { "epoch": 0.9254692052307066, "grad_norm": 5.091200828552246, "learning_rate": 8.232870148538573e-05, "loss": 0.5729, "step": 13659 }, { "epoch": 0.9255369604986787, "grad_norm": 7.370169639587402, "learning_rate": 8.232733246628791e-05, "loss": 0.9528, "step": 13660 }, { "epoch": 0.9256047157666508, "grad_norm": 5.088458061218262, "learning_rate": 8.232596344719009e-05, "loss": 0.7491, "step": 13661 }, { "epoch": 0.9256724710346229, "grad_norm": 6.5189433097839355, "learning_rate": 8.232459442809227e-05, "loss": 0.7068, "step": 13662 }, { "epoch": 0.925740226302595, "grad_norm": 6.852166175842285, "learning_rate": 8.232322540899447e-05, "loss": 1.0309, "step": 13663 }, { "epoch": 0.9258079815705671, "grad_norm": 6.470550060272217, "learning_rate": 8.232185638989665e-05, "loss": 0.8087, "step": 13664 }, { "epoch": 0.9258757368385392, "grad_norm": 6.046988010406494, "learning_rate": 8.232048737079883e-05, "loss": 0.7183, "step": 13665 }, { "epoch": 0.9259434921065113, "grad_norm": 5.661501884460449, "learning_rate": 8.231911835170101e-05, "loss": 0.806, "step": 13666 }, { "epoch": 0.9260112473744834, "grad_norm": 7.144009113311768, "learning_rate": 8.231774933260319e-05, "loss": 0.8562, "step": 13667 }, { "epoch": 0.9260790026424555, "grad_norm": 6.237162113189697, "learning_rate": 8.231638031350538e-05, "loss": 0.6964, "step": 13668 }, { "epoch": 0.9261467579104276, "grad_norm": 10.49614429473877, "learning_rate": 8.231501129440756e-05, "loss": 0.8433, "step": 13669 }, { "epoch": 0.9262145131783996, "grad_norm": 8.371081352233887, "learning_rate": 8.231364227530974e-05, "loss": 0.8938, "step": 13670 }, { "epoch": 0.9262822684463717, "grad_norm": 5.304482936859131, "learning_rate": 8.231227325621192e-05, "loss": 0.8161, "step": 13671 }, { "epoch": 0.9263500237143438, "grad_norm": 7.333506107330322, "learning_rate": 8.231090423711412e-05, "loss": 1.0648, "step": 13672 }, { "epoch": 0.9264177789823159, "grad_norm": 6.7197065353393555, "learning_rate": 8.23095352180163e-05, "loss": 0.5901, "step": 13673 }, { "epoch": 0.926485534250288, "grad_norm": 6.151739120483398, "learning_rate": 8.230816619891848e-05, "loss": 0.832, "step": 13674 }, { "epoch": 0.92655328951826, "grad_norm": 5.662747859954834, "learning_rate": 8.230679717982066e-05, "loss": 0.8096, "step": 13675 }, { "epoch": 0.9266210447862321, "grad_norm": 6.029799938201904, "learning_rate": 8.230542816072284e-05, "loss": 0.7201, "step": 13676 }, { "epoch": 0.9266888000542042, "grad_norm": 4.9484686851501465, "learning_rate": 8.230405914162503e-05, "loss": 0.6618, "step": 13677 }, { "epoch": 0.9267565553221763, "grad_norm": 6.27154016494751, "learning_rate": 8.230269012252721e-05, "loss": 0.9026, "step": 13678 }, { "epoch": 0.9268243105901484, "grad_norm": 7.863352298736572, "learning_rate": 8.230132110342939e-05, "loss": 0.6036, "step": 13679 }, { "epoch": 0.9268920658581205, "grad_norm": 6.766676425933838, "learning_rate": 8.229995208433157e-05, "loss": 0.9587, "step": 13680 }, { "epoch": 0.9269598211260925, "grad_norm": 7.4474711418151855, "learning_rate": 8.229858306523375e-05, "loss": 0.8504, "step": 13681 }, { "epoch": 0.9270275763940646, "grad_norm": 5.815866470336914, "learning_rate": 8.229721404613595e-05, "loss": 0.6245, "step": 13682 }, { "epoch": 0.9270953316620367, "grad_norm": 7.574525833129883, "learning_rate": 8.229584502703813e-05, "loss": 0.6652, "step": 13683 }, { "epoch": 0.9271630869300088, "grad_norm": 8.183697700500488, "learning_rate": 8.229447600794031e-05, "loss": 0.7956, "step": 13684 }, { "epoch": 0.9272308421979809, "grad_norm": 5.091358184814453, "learning_rate": 8.229310698884249e-05, "loss": 0.6387, "step": 13685 }, { "epoch": 0.927298597465953, "grad_norm": 5.971275806427002, "learning_rate": 8.229173796974468e-05, "loss": 1.0076, "step": 13686 }, { "epoch": 0.9273663527339251, "grad_norm": 5.206945896148682, "learning_rate": 8.229036895064686e-05, "loss": 0.615, "step": 13687 }, { "epoch": 0.9274341080018972, "grad_norm": 6.2954864501953125, "learning_rate": 8.228899993154904e-05, "loss": 0.7028, "step": 13688 }, { "epoch": 0.9275018632698693, "grad_norm": 7.77675724029541, "learning_rate": 8.228763091245124e-05, "loss": 0.8924, "step": 13689 }, { "epoch": 0.9275696185378414, "grad_norm": 7.986929893493652, "learning_rate": 8.228626189335342e-05, "loss": 0.9092, "step": 13690 }, { "epoch": 0.9276373738058133, "grad_norm": 6.880285739898682, "learning_rate": 8.22848928742556e-05, "loss": 0.8328, "step": 13691 }, { "epoch": 0.9277051290737854, "grad_norm": 5.309557914733887, "learning_rate": 8.228352385515779e-05, "loss": 0.6386, "step": 13692 }, { "epoch": 0.9277728843417575, "grad_norm": 5.744555473327637, "learning_rate": 8.228215483605997e-05, "loss": 0.8106, "step": 13693 }, { "epoch": 0.9278406396097296, "grad_norm": 10.664202690124512, "learning_rate": 8.228078581696215e-05, "loss": 0.6584, "step": 13694 }, { "epoch": 0.9279083948777017, "grad_norm": 5.7767791748046875, "learning_rate": 8.227941679786435e-05, "loss": 0.7672, "step": 13695 }, { "epoch": 0.9279761501456738, "grad_norm": 4.621129035949707, "learning_rate": 8.227804777876653e-05, "loss": 0.8555, "step": 13696 }, { "epoch": 0.9280439054136459, "grad_norm": 5.1587018966674805, "learning_rate": 8.22766787596687e-05, "loss": 0.526, "step": 13697 }, { "epoch": 0.928111660681618, "grad_norm": 6.041534900665283, "learning_rate": 8.227530974057089e-05, "loss": 0.6588, "step": 13698 }, { "epoch": 0.9281794159495901, "grad_norm": 5.863411903381348, "learning_rate": 8.227394072147307e-05, "loss": 0.821, "step": 13699 }, { "epoch": 0.9282471712175622, "grad_norm": 5.905420780181885, "learning_rate": 8.227257170237526e-05, "loss": 0.9116, "step": 13700 }, { "epoch": 0.9283149264855343, "grad_norm": 8.96728229522705, "learning_rate": 8.227120268327744e-05, "loss": 0.7997, "step": 13701 }, { "epoch": 0.9283826817535064, "grad_norm": 7.430652141571045, "learning_rate": 8.226983366417962e-05, "loss": 0.8505, "step": 13702 }, { "epoch": 0.9284504370214784, "grad_norm": 6.5419921875, "learning_rate": 8.22684646450818e-05, "loss": 1.1061, "step": 13703 }, { "epoch": 0.9285181922894505, "grad_norm": 5.459079265594482, "learning_rate": 8.226709562598398e-05, "loss": 0.7481, "step": 13704 }, { "epoch": 0.9285859475574226, "grad_norm": 6.421117782592773, "learning_rate": 8.226572660688618e-05, "loss": 0.7031, "step": 13705 }, { "epoch": 0.9286537028253947, "grad_norm": 5.486372470855713, "learning_rate": 8.226435758778836e-05, "loss": 0.7731, "step": 13706 }, { "epoch": 0.9287214580933668, "grad_norm": 8.439654350280762, "learning_rate": 8.226298856869054e-05, "loss": 0.8965, "step": 13707 }, { "epoch": 0.9287892133613388, "grad_norm": 5.904208183288574, "learning_rate": 8.226161954959272e-05, "loss": 0.8596, "step": 13708 }, { "epoch": 0.9288569686293109, "grad_norm": 7.829963684082031, "learning_rate": 8.226025053049491e-05, "loss": 0.8663, "step": 13709 }, { "epoch": 0.928924723897283, "grad_norm": 6.749328136444092, "learning_rate": 8.225888151139709e-05, "loss": 0.6586, "step": 13710 }, { "epoch": 0.9289924791652551, "grad_norm": 6.042569637298584, "learning_rate": 8.225751249229927e-05, "loss": 0.6156, "step": 13711 }, { "epoch": 0.9290602344332272, "grad_norm": 4.903025150299072, "learning_rate": 8.225614347320145e-05, "loss": 0.7658, "step": 13712 }, { "epoch": 0.9291279897011993, "grad_norm": 5.892563343048096, "learning_rate": 8.225477445410363e-05, "loss": 0.8609, "step": 13713 }, { "epoch": 0.9291957449691713, "grad_norm": 5.452030658721924, "learning_rate": 8.225340543500583e-05, "loss": 1.0457, "step": 13714 }, { "epoch": 0.9292635002371434, "grad_norm": 6.059377193450928, "learning_rate": 8.2252036415908e-05, "loss": 0.8657, "step": 13715 }, { "epoch": 0.9293312555051155, "grad_norm": 5.9446210861206055, "learning_rate": 8.225066739681019e-05, "loss": 1.0325, "step": 13716 }, { "epoch": 0.9293990107730876, "grad_norm": 5.6474690437316895, "learning_rate": 8.224929837771237e-05, "loss": 0.6533, "step": 13717 }, { "epoch": 0.9294667660410597, "grad_norm": 6.072984218597412, "learning_rate": 8.224792935861456e-05, "loss": 0.8738, "step": 13718 }, { "epoch": 0.9295345213090318, "grad_norm": 7.003471851348877, "learning_rate": 8.224656033951674e-05, "loss": 0.7797, "step": 13719 }, { "epoch": 0.9296022765770039, "grad_norm": 6.7634148597717285, "learning_rate": 8.224519132041892e-05, "loss": 0.9885, "step": 13720 }, { "epoch": 0.929670031844976, "grad_norm": 4.705183506011963, "learning_rate": 8.22438223013211e-05, "loss": 0.8115, "step": 13721 }, { "epoch": 0.9297377871129481, "grad_norm": 5.442021369934082, "learning_rate": 8.224245328222328e-05, "loss": 0.9005, "step": 13722 }, { "epoch": 0.9298055423809202, "grad_norm": 5.542171001434326, "learning_rate": 8.224108426312548e-05, "loss": 0.6873, "step": 13723 }, { "epoch": 0.9298732976488921, "grad_norm": 4.973824977874756, "learning_rate": 8.223971524402766e-05, "loss": 0.8902, "step": 13724 }, { "epoch": 0.9299410529168642, "grad_norm": 6.488674640655518, "learning_rate": 8.223834622492984e-05, "loss": 0.6895, "step": 13725 }, { "epoch": 0.9300088081848363, "grad_norm": 6.3187150955200195, "learning_rate": 8.223697720583202e-05, "loss": 0.591, "step": 13726 }, { "epoch": 0.9300765634528084, "grad_norm": 4.76509952545166, "learning_rate": 8.223560818673421e-05, "loss": 0.5401, "step": 13727 }, { "epoch": 0.9301443187207805, "grad_norm": 6.474403381347656, "learning_rate": 8.223423916763639e-05, "loss": 0.788, "step": 13728 }, { "epoch": 0.9302120739887526, "grad_norm": 5.295207977294922, "learning_rate": 8.223287014853857e-05, "loss": 0.8156, "step": 13729 }, { "epoch": 0.9302798292567247, "grad_norm": 4.847303867340088, "learning_rate": 8.223150112944075e-05, "loss": 0.7142, "step": 13730 }, { "epoch": 0.9303475845246968, "grad_norm": 7.824878692626953, "learning_rate": 8.223013211034293e-05, "loss": 0.6943, "step": 13731 }, { "epoch": 0.9304153397926689, "grad_norm": 5.352818965911865, "learning_rate": 8.222876309124513e-05, "loss": 0.6813, "step": 13732 }, { "epoch": 0.930483095060641, "grad_norm": 5.922751426696777, "learning_rate": 8.222739407214731e-05, "loss": 0.6284, "step": 13733 }, { "epoch": 0.9305508503286131, "grad_norm": 6.715836048126221, "learning_rate": 8.222602505304949e-05, "loss": 0.7602, "step": 13734 }, { "epoch": 0.9306186055965852, "grad_norm": 5.062824726104736, "learning_rate": 8.222465603395168e-05, "loss": 0.6008, "step": 13735 }, { "epoch": 0.9306863608645572, "grad_norm": 5.607885837554932, "learning_rate": 8.222328701485386e-05, "loss": 0.7161, "step": 13736 }, { "epoch": 0.9307541161325293, "grad_norm": 6.048231601715088, "learning_rate": 8.222191799575604e-05, "loss": 0.6162, "step": 13737 }, { "epoch": 0.9308218714005014, "grad_norm": 6.022711277008057, "learning_rate": 8.222054897665824e-05, "loss": 0.7804, "step": 13738 }, { "epoch": 0.9308896266684735, "grad_norm": 8.368260383605957, "learning_rate": 8.221917995756042e-05, "loss": 0.8318, "step": 13739 }, { "epoch": 0.9309573819364455, "grad_norm": 4.99493932723999, "learning_rate": 8.22178109384626e-05, "loss": 0.6902, "step": 13740 }, { "epoch": 0.9310251372044176, "grad_norm": 5.73579740524292, "learning_rate": 8.221644191936479e-05, "loss": 0.6253, "step": 13741 }, { "epoch": 0.9310928924723897, "grad_norm": 6.078760623931885, "learning_rate": 8.221507290026697e-05, "loss": 0.9188, "step": 13742 }, { "epoch": 0.9311606477403618, "grad_norm": 5.923210620880127, "learning_rate": 8.221370388116915e-05, "loss": 0.8844, "step": 13743 }, { "epoch": 0.9312284030083339, "grad_norm": 4.4179229736328125, "learning_rate": 8.221233486207133e-05, "loss": 0.6355, "step": 13744 }, { "epoch": 0.931296158276306, "grad_norm": 5.3702311515808105, "learning_rate": 8.221096584297351e-05, "loss": 0.8017, "step": 13745 }, { "epoch": 0.931363913544278, "grad_norm": 7.519911766052246, "learning_rate": 8.22095968238757e-05, "loss": 0.8302, "step": 13746 }, { "epoch": 0.9314316688122501, "grad_norm": 9.204391479492188, "learning_rate": 8.220822780477789e-05, "loss": 0.8657, "step": 13747 }, { "epoch": 0.9314994240802222, "grad_norm": 6.563374042510986, "learning_rate": 8.220685878568007e-05, "loss": 0.7257, "step": 13748 }, { "epoch": 0.9315671793481943, "grad_norm": 6.106207370758057, "learning_rate": 8.220548976658225e-05, "loss": 0.836, "step": 13749 }, { "epoch": 0.9316349346161664, "grad_norm": 8.019493103027344, "learning_rate": 8.220412074748444e-05, "loss": 0.7405, "step": 13750 }, { "epoch": 0.9317026898841385, "grad_norm": 5.366190433502197, "learning_rate": 8.220275172838662e-05, "loss": 0.7408, "step": 13751 }, { "epoch": 0.9317704451521106, "grad_norm": 3.966377019882202, "learning_rate": 8.22013827092888e-05, "loss": 0.5997, "step": 13752 }, { "epoch": 0.9318382004200827, "grad_norm": 8.112096786499023, "learning_rate": 8.220001369019098e-05, "loss": 1.1498, "step": 13753 }, { "epoch": 0.9319059556880548, "grad_norm": 7.886656284332275, "learning_rate": 8.219864467109316e-05, "loss": 0.8555, "step": 13754 }, { "epoch": 0.9319737109560269, "grad_norm": 5.4407219886779785, "learning_rate": 8.219727565199536e-05, "loss": 0.8506, "step": 13755 }, { "epoch": 0.932041466223999, "grad_norm": 6.584644794464111, "learning_rate": 8.219590663289754e-05, "loss": 0.8408, "step": 13756 }, { "epoch": 0.932109221491971, "grad_norm": 5.497494697570801, "learning_rate": 8.219453761379972e-05, "loss": 0.6424, "step": 13757 }, { "epoch": 0.932176976759943, "grad_norm": 5.6908698081970215, "learning_rate": 8.21931685947019e-05, "loss": 0.6629, "step": 13758 }, { "epoch": 0.9322447320279151, "grad_norm": 5.446488380432129, "learning_rate": 8.219179957560408e-05, "loss": 0.6875, "step": 13759 }, { "epoch": 0.9323124872958872, "grad_norm": 5.45862340927124, "learning_rate": 8.219043055650627e-05, "loss": 0.6987, "step": 13760 }, { "epoch": 0.9323802425638593, "grad_norm": 5.105297565460205, "learning_rate": 8.218906153740845e-05, "loss": 0.5886, "step": 13761 }, { "epoch": 0.9324479978318314, "grad_norm": 5.2332892417907715, "learning_rate": 8.218769251831063e-05, "loss": 0.7667, "step": 13762 }, { "epoch": 0.9325157530998035, "grad_norm": 5.849374294281006, "learning_rate": 8.218632349921281e-05, "loss": 0.7964, "step": 13763 }, { "epoch": 0.9325835083677756, "grad_norm": 5.5074872970581055, "learning_rate": 8.2184954480115e-05, "loss": 0.8375, "step": 13764 }, { "epoch": 0.9326512636357477, "grad_norm": 6.26788854598999, "learning_rate": 8.218358546101719e-05, "loss": 0.6793, "step": 13765 }, { "epoch": 0.9327190189037198, "grad_norm": 7.429625988006592, "learning_rate": 8.218221644191937e-05, "loss": 0.8389, "step": 13766 }, { "epoch": 0.9327867741716919, "grad_norm": 9.86705207824707, "learning_rate": 8.218084742282155e-05, "loss": 0.9663, "step": 13767 }, { "epoch": 0.932854529439664, "grad_norm": 7.741724491119385, "learning_rate": 8.217947840372373e-05, "loss": 0.8483, "step": 13768 }, { "epoch": 0.932922284707636, "grad_norm": 5.413618087768555, "learning_rate": 8.217810938462592e-05, "loss": 0.6838, "step": 13769 }, { "epoch": 0.9329900399756081, "grad_norm": 5.695152282714844, "learning_rate": 8.21767403655281e-05, "loss": 0.8978, "step": 13770 }, { "epoch": 0.9330577952435802, "grad_norm": 6.387299537658691, "learning_rate": 8.217537134643028e-05, "loss": 0.5846, "step": 13771 }, { "epoch": 0.9331255505115523, "grad_norm": 4.797947883605957, "learning_rate": 8.217400232733246e-05, "loss": 0.617, "step": 13772 }, { "epoch": 0.9331933057795243, "grad_norm": 5.718557357788086, "learning_rate": 8.217263330823466e-05, "loss": 0.7501, "step": 13773 }, { "epoch": 0.9332610610474964, "grad_norm": 6.03369665145874, "learning_rate": 8.217126428913684e-05, "loss": 0.5945, "step": 13774 }, { "epoch": 0.9333288163154685, "grad_norm": 4.953873634338379, "learning_rate": 8.216989527003902e-05, "loss": 0.6359, "step": 13775 }, { "epoch": 0.9333965715834406, "grad_norm": 6.724967956542969, "learning_rate": 8.21685262509412e-05, "loss": 0.788, "step": 13776 }, { "epoch": 0.9334643268514127, "grad_norm": 5.307444095611572, "learning_rate": 8.216715723184338e-05, "loss": 0.8166, "step": 13777 }, { "epoch": 0.9335320821193848, "grad_norm": 7.63820219039917, "learning_rate": 8.216578821274557e-05, "loss": 1.0157, "step": 13778 }, { "epoch": 0.9335998373873569, "grad_norm": 5.278408527374268, "learning_rate": 8.216441919364775e-05, "loss": 0.6433, "step": 13779 }, { "epoch": 0.933667592655329, "grad_norm": 8.680825233459473, "learning_rate": 8.216305017454993e-05, "loss": 0.84, "step": 13780 }, { "epoch": 0.933735347923301, "grad_norm": 5.572779655456543, "learning_rate": 8.216168115545213e-05, "loss": 0.783, "step": 13781 }, { "epoch": 0.9338031031912731, "grad_norm": 9.58791732788086, "learning_rate": 8.21603121363543e-05, "loss": 1.0121, "step": 13782 }, { "epoch": 0.9338708584592452, "grad_norm": 6.080748081207275, "learning_rate": 8.215894311725649e-05, "loss": 0.6494, "step": 13783 }, { "epoch": 0.9339386137272173, "grad_norm": 6.609582901000977, "learning_rate": 8.215757409815868e-05, "loss": 0.7456, "step": 13784 }, { "epoch": 0.9340063689951894, "grad_norm": 7.191807746887207, "learning_rate": 8.215620507906086e-05, "loss": 0.7427, "step": 13785 }, { "epoch": 0.9340741242631615, "grad_norm": 6.619680404663086, "learning_rate": 8.215483605996304e-05, "loss": 0.8251, "step": 13786 }, { "epoch": 0.9341418795311336, "grad_norm": 6.401364803314209, "learning_rate": 8.215346704086524e-05, "loss": 0.6732, "step": 13787 }, { "epoch": 0.9342096347991057, "grad_norm": 4.641686916351318, "learning_rate": 8.215209802176742e-05, "loss": 0.5844, "step": 13788 }, { "epoch": 0.9342773900670777, "grad_norm": 5.998747825622559, "learning_rate": 8.21507290026696e-05, "loss": 0.9057, "step": 13789 }, { "epoch": 0.9343451453350498, "grad_norm": 6.920633316040039, "learning_rate": 8.214935998357178e-05, "loss": 1.0017, "step": 13790 }, { "epoch": 0.9344129006030218, "grad_norm": 5.0339460372924805, "learning_rate": 8.214799096447396e-05, "loss": 0.7182, "step": 13791 }, { "epoch": 0.9344806558709939, "grad_norm": 6.2433762550354, "learning_rate": 8.214662194537615e-05, "loss": 0.5608, "step": 13792 }, { "epoch": 0.934548411138966, "grad_norm": 4.9427056312561035, "learning_rate": 8.214525292627833e-05, "loss": 0.6635, "step": 13793 }, { "epoch": 0.9346161664069381, "grad_norm": 5.293502330780029, "learning_rate": 8.214388390718051e-05, "loss": 0.6524, "step": 13794 }, { "epoch": 0.9346839216749102, "grad_norm": 5.291229724884033, "learning_rate": 8.214251488808269e-05, "loss": 0.8088, "step": 13795 }, { "epoch": 0.9347516769428823, "grad_norm": 6.6330389976501465, "learning_rate": 8.214114586898489e-05, "loss": 0.83, "step": 13796 }, { "epoch": 0.9348194322108544, "grad_norm": 4.883660316467285, "learning_rate": 8.213977684988707e-05, "loss": 0.849, "step": 13797 }, { "epoch": 0.9348871874788265, "grad_norm": 9.394811630249023, "learning_rate": 8.213840783078925e-05, "loss": 1.1274, "step": 13798 }, { "epoch": 0.9349549427467986, "grad_norm": 7.191971778869629, "learning_rate": 8.213703881169143e-05, "loss": 1.022, "step": 13799 }, { "epoch": 0.9350226980147707, "grad_norm": 5.284100532531738, "learning_rate": 8.21356697925936e-05, "loss": 0.7243, "step": 13800 }, { "epoch": 0.9350904532827428, "grad_norm": 4.5223469734191895, "learning_rate": 8.21343007734958e-05, "loss": 0.7437, "step": 13801 }, { "epoch": 0.9351582085507149, "grad_norm": 9.137798309326172, "learning_rate": 8.213293175439798e-05, "loss": 0.8741, "step": 13802 }, { "epoch": 0.935225963818687, "grad_norm": 5.241482257843018, "learning_rate": 8.213156273530016e-05, "loss": 0.7517, "step": 13803 }, { "epoch": 0.935293719086659, "grad_norm": 6.769075870513916, "learning_rate": 8.213019371620234e-05, "loss": 0.9897, "step": 13804 }, { "epoch": 0.935361474354631, "grad_norm": 5.048720836639404, "learning_rate": 8.212882469710454e-05, "loss": 0.7311, "step": 13805 }, { "epoch": 0.9354292296226031, "grad_norm": 5.8939385414123535, "learning_rate": 8.212745567800672e-05, "loss": 0.7927, "step": 13806 }, { "epoch": 0.9354969848905752, "grad_norm": 6.683951377868652, "learning_rate": 8.21260866589089e-05, "loss": 0.7332, "step": 13807 }, { "epoch": 0.9355647401585473, "grad_norm": 6.631941318511963, "learning_rate": 8.212471763981108e-05, "loss": 0.8927, "step": 13808 }, { "epoch": 0.9356324954265194, "grad_norm": 6.161096096038818, "learning_rate": 8.212334862071326e-05, "loss": 0.9347, "step": 13809 }, { "epoch": 0.9357002506944915, "grad_norm": 9.04511547088623, "learning_rate": 8.212197960161545e-05, "loss": 0.7569, "step": 13810 }, { "epoch": 0.9357680059624636, "grad_norm": 6.299793720245361, "learning_rate": 8.212061058251763e-05, "loss": 0.8722, "step": 13811 }, { "epoch": 0.9358357612304357, "grad_norm": 6.218029975891113, "learning_rate": 8.211924156341981e-05, "loss": 0.4517, "step": 13812 }, { "epoch": 0.9359035164984078, "grad_norm": 6.690893650054932, "learning_rate": 8.211787254432199e-05, "loss": 0.7149, "step": 13813 }, { "epoch": 0.9359712717663798, "grad_norm": 5.8090009689331055, "learning_rate": 8.211650352522417e-05, "loss": 0.7553, "step": 13814 }, { "epoch": 0.9360390270343519, "grad_norm": 6.034711837768555, "learning_rate": 8.211513450612637e-05, "loss": 0.7217, "step": 13815 }, { "epoch": 0.936106782302324, "grad_norm": 5.093395233154297, "learning_rate": 8.211376548702855e-05, "loss": 0.9626, "step": 13816 }, { "epoch": 0.9361745375702961, "grad_norm": 7.061664581298828, "learning_rate": 8.211239646793073e-05, "loss": 1.0739, "step": 13817 }, { "epoch": 0.9362422928382682, "grad_norm": 5.97583532333374, "learning_rate": 8.211102744883291e-05, "loss": 1.0293, "step": 13818 }, { "epoch": 0.9363100481062403, "grad_norm": 6.2469282150268555, "learning_rate": 8.21096584297351e-05, "loss": 0.8048, "step": 13819 }, { "epoch": 0.9363778033742124, "grad_norm": 6.801420211791992, "learning_rate": 8.210828941063728e-05, "loss": 1.1723, "step": 13820 }, { "epoch": 0.9364455586421845, "grad_norm": 5.765828609466553, "learning_rate": 8.210692039153946e-05, "loss": 0.8612, "step": 13821 }, { "epoch": 0.9365133139101565, "grad_norm": 5.4772233963012695, "learning_rate": 8.210555137244164e-05, "loss": 0.7415, "step": 13822 }, { "epoch": 0.9365810691781286, "grad_norm": 5.3950042724609375, "learning_rate": 8.210418235334382e-05, "loss": 0.623, "step": 13823 }, { "epoch": 0.9366488244461006, "grad_norm": 6.950892925262451, "learning_rate": 8.210281333424602e-05, "loss": 0.9234, "step": 13824 }, { "epoch": 0.9367165797140727, "grad_norm": 6.8263044357299805, "learning_rate": 8.21014443151482e-05, "loss": 0.6791, "step": 13825 }, { "epoch": 0.9367843349820448, "grad_norm": 6.001094818115234, "learning_rate": 8.210007529605038e-05, "loss": 0.8851, "step": 13826 }, { "epoch": 0.9368520902500169, "grad_norm": 5.480862617492676, "learning_rate": 8.209870627695256e-05, "loss": 0.6752, "step": 13827 }, { "epoch": 0.936919845517989, "grad_norm": 8.880070686340332, "learning_rate": 8.209733725785475e-05, "loss": 0.6895, "step": 13828 }, { "epoch": 0.9369876007859611, "grad_norm": 5.244697570800781, "learning_rate": 8.209596823875693e-05, "loss": 0.7239, "step": 13829 }, { "epoch": 0.9370553560539332, "grad_norm": 5.342809677124023, "learning_rate": 8.209459921965911e-05, "loss": 0.9466, "step": 13830 }, { "epoch": 0.9371231113219053, "grad_norm": 5.547102928161621, "learning_rate": 8.20932302005613e-05, "loss": 0.668, "step": 13831 }, { "epoch": 0.9371908665898774, "grad_norm": 5.530054092407227, "learning_rate": 8.209186118146349e-05, "loss": 0.7622, "step": 13832 }, { "epoch": 0.9372586218578495, "grad_norm": 6.689416885375977, "learning_rate": 8.209049216236567e-05, "loss": 0.7291, "step": 13833 }, { "epoch": 0.9373263771258216, "grad_norm": 5.052443981170654, "learning_rate": 8.208912314326786e-05, "loss": 0.7491, "step": 13834 }, { "epoch": 0.9373941323937937, "grad_norm": 8.584953308105469, "learning_rate": 8.208775412417004e-05, "loss": 0.867, "step": 13835 }, { "epoch": 0.9374618876617657, "grad_norm": 6.425045967102051, "learning_rate": 8.208638510507222e-05, "loss": 0.6844, "step": 13836 }, { "epoch": 0.9375296429297378, "grad_norm": 7.346054553985596, "learning_rate": 8.20850160859744e-05, "loss": 0.7747, "step": 13837 }, { "epoch": 0.9375973981977098, "grad_norm": 5.191468715667725, "learning_rate": 8.20836470668766e-05, "loss": 0.8087, "step": 13838 }, { "epoch": 0.9376651534656819, "grad_norm": 5.554154396057129, "learning_rate": 8.208227804777878e-05, "loss": 0.6445, "step": 13839 }, { "epoch": 0.937732908733654, "grad_norm": 6.577019214630127, "learning_rate": 8.208090902868096e-05, "loss": 0.9777, "step": 13840 }, { "epoch": 0.9378006640016261, "grad_norm": 5.5466742515563965, "learning_rate": 8.207954000958314e-05, "loss": 0.7303, "step": 13841 }, { "epoch": 0.9378684192695982, "grad_norm": 6.088878154754639, "learning_rate": 8.207817099048533e-05, "loss": 0.8181, "step": 13842 }, { "epoch": 0.9379361745375703, "grad_norm": 6.197640895843506, "learning_rate": 8.207680197138751e-05, "loss": 0.7192, "step": 13843 }, { "epoch": 0.9380039298055424, "grad_norm": 4.92954158782959, "learning_rate": 8.207543295228969e-05, "loss": 0.6276, "step": 13844 }, { "epoch": 0.9380716850735145, "grad_norm": 5.180205345153809, "learning_rate": 8.207406393319187e-05, "loss": 0.783, "step": 13845 }, { "epoch": 0.9381394403414866, "grad_norm": 5.946224212646484, "learning_rate": 8.207269491409405e-05, "loss": 0.775, "step": 13846 }, { "epoch": 0.9382071956094586, "grad_norm": 5.4234490394592285, "learning_rate": 8.207132589499625e-05, "loss": 0.6428, "step": 13847 }, { "epoch": 0.9382749508774307, "grad_norm": 4.664503574371338, "learning_rate": 8.206995687589843e-05, "loss": 0.7846, "step": 13848 }, { "epoch": 0.9383427061454028, "grad_norm": 5.197528839111328, "learning_rate": 8.20685878568006e-05, "loss": 0.7588, "step": 13849 }, { "epoch": 0.9384104614133749, "grad_norm": 6.116962909698486, "learning_rate": 8.206721883770279e-05, "loss": 0.9564, "step": 13850 }, { "epoch": 0.938478216681347, "grad_norm": 5.113432884216309, "learning_rate": 8.206584981860498e-05, "loss": 0.7534, "step": 13851 }, { "epoch": 0.9385459719493191, "grad_norm": 6.907203674316406, "learning_rate": 8.206448079950716e-05, "loss": 0.9213, "step": 13852 }, { "epoch": 0.9386137272172912, "grad_norm": 7.037908554077148, "learning_rate": 8.206311178040934e-05, "loss": 0.7943, "step": 13853 }, { "epoch": 0.9386814824852632, "grad_norm": 6.385385036468506, "learning_rate": 8.206174276131152e-05, "loss": 0.7192, "step": 13854 }, { "epoch": 0.9387492377532353, "grad_norm": 6.877847194671631, "learning_rate": 8.20603737422137e-05, "loss": 1.0433, "step": 13855 }, { "epoch": 0.9388169930212074, "grad_norm": 6.244350910186768, "learning_rate": 8.20590047231159e-05, "loss": 0.7018, "step": 13856 }, { "epoch": 0.9388847482891794, "grad_norm": 5.529423236846924, "learning_rate": 8.205763570401808e-05, "loss": 0.825, "step": 13857 }, { "epoch": 0.9389525035571515, "grad_norm": 5.644784450531006, "learning_rate": 8.205626668492026e-05, "loss": 0.6996, "step": 13858 }, { "epoch": 0.9390202588251236, "grad_norm": 6.088039875030518, "learning_rate": 8.205489766582244e-05, "loss": 0.7494, "step": 13859 }, { "epoch": 0.9390880140930957, "grad_norm": 4.0728936195373535, "learning_rate": 8.205352864672463e-05, "loss": 0.5493, "step": 13860 }, { "epoch": 0.9391557693610678, "grad_norm": 6.4296698570251465, "learning_rate": 8.205215962762681e-05, "loss": 0.8059, "step": 13861 }, { "epoch": 0.9392235246290399, "grad_norm": 4.51001501083374, "learning_rate": 8.205079060852899e-05, "loss": 0.6298, "step": 13862 }, { "epoch": 0.939291279897012, "grad_norm": 5.673875331878662, "learning_rate": 8.204942158943117e-05, "loss": 0.8035, "step": 13863 }, { "epoch": 0.9393590351649841, "grad_norm": 6.233775615692139, "learning_rate": 8.204805257033335e-05, "loss": 0.7389, "step": 13864 }, { "epoch": 0.9394267904329562, "grad_norm": 6.151493549346924, "learning_rate": 8.204668355123555e-05, "loss": 0.6998, "step": 13865 }, { "epoch": 0.9394945457009283, "grad_norm": 4.745614051818848, "learning_rate": 8.204531453213773e-05, "loss": 0.5749, "step": 13866 }, { "epoch": 0.9395623009689004, "grad_norm": 6.141815185546875, "learning_rate": 8.20439455130399e-05, "loss": 0.5891, "step": 13867 }, { "epoch": 0.9396300562368725, "grad_norm": 6.6597490310668945, "learning_rate": 8.204257649394209e-05, "loss": 0.7727, "step": 13868 }, { "epoch": 0.9396978115048445, "grad_norm": 6.619930267333984, "learning_rate": 8.204120747484427e-05, "loss": 0.6702, "step": 13869 }, { "epoch": 0.9397655667728166, "grad_norm": 5.507278919219971, "learning_rate": 8.203983845574646e-05, "loss": 0.7741, "step": 13870 }, { "epoch": 0.9398333220407886, "grad_norm": 5.685328960418701, "learning_rate": 8.203846943664864e-05, "loss": 0.9224, "step": 13871 }, { "epoch": 0.9399010773087607, "grad_norm": 5.3461456298828125, "learning_rate": 8.203710041755082e-05, "loss": 0.6182, "step": 13872 }, { "epoch": 0.9399688325767328, "grad_norm": 8.528802871704102, "learning_rate": 8.2035731398453e-05, "loss": 0.7898, "step": 13873 }, { "epoch": 0.9400365878447049, "grad_norm": 6.036088943481445, "learning_rate": 8.20343623793552e-05, "loss": 1.0215, "step": 13874 }, { "epoch": 0.940104343112677, "grad_norm": 7.7004289627075195, "learning_rate": 8.203299336025738e-05, "loss": 0.7898, "step": 13875 }, { "epoch": 0.9401720983806491, "grad_norm": 6.902210712432861, "learning_rate": 8.203162434115956e-05, "loss": 0.7609, "step": 13876 }, { "epoch": 0.9402398536486212, "grad_norm": 9.464262962341309, "learning_rate": 8.203025532206175e-05, "loss": 1.1929, "step": 13877 }, { "epoch": 0.9403076089165933, "grad_norm": 6.184045791625977, "learning_rate": 8.202888630296393e-05, "loss": 0.7753, "step": 13878 }, { "epoch": 0.9403753641845654, "grad_norm": 7.176342964172363, "learning_rate": 8.202751728386611e-05, "loss": 0.6658, "step": 13879 }, { "epoch": 0.9404431194525374, "grad_norm": 5.406729221343994, "learning_rate": 8.20261482647683e-05, "loss": 0.6304, "step": 13880 }, { "epoch": 0.9405108747205095, "grad_norm": 6.102396488189697, "learning_rate": 8.202477924567049e-05, "loss": 0.8271, "step": 13881 }, { "epoch": 0.9405786299884816, "grad_norm": 5.444383144378662, "learning_rate": 8.202341022657267e-05, "loss": 0.6307, "step": 13882 }, { "epoch": 0.9406463852564537, "grad_norm": 4.637354850769043, "learning_rate": 8.202204120747486e-05, "loss": 0.7097, "step": 13883 }, { "epoch": 0.9407141405244258, "grad_norm": 9.361139297485352, "learning_rate": 8.202067218837704e-05, "loss": 0.7545, "step": 13884 }, { "epoch": 0.9407818957923979, "grad_norm": 5.362797260284424, "learning_rate": 8.201930316927922e-05, "loss": 0.8605, "step": 13885 }, { "epoch": 0.94084965106037, "grad_norm": 5.900387287139893, "learning_rate": 8.20179341501814e-05, "loss": 0.6518, "step": 13886 }, { "epoch": 0.940917406328342, "grad_norm": 5.875095844268799, "learning_rate": 8.201656513108358e-05, "loss": 0.7603, "step": 13887 }, { "epoch": 0.9409851615963141, "grad_norm": 7.324997425079346, "learning_rate": 8.201519611198577e-05, "loss": 0.7821, "step": 13888 }, { "epoch": 0.9410529168642862, "grad_norm": 6.422165870666504, "learning_rate": 8.201382709288796e-05, "loss": 0.7973, "step": 13889 }, { "epoch": 0.9411206721322583, "grad_norm": 5.074305534362793, "learning_rate": 8.201245807379014e-05, "loss": 0.6436, "step": 13890 }, { "epoch": 0.9411884274002303, "grad_norm": 6.061481952667236, "learning_rate": 8.201108905469232e-05, "loss": 0.7467, "step": 13891 }, { "epoch": 0.9412561826682024, "grad_norm": 5.551920413970947, "learning_rate": 8.20097200355945e-05, "loss": 0.6732, "step": 13892 }, { "epoch": 0.9413239379361745, "grad_norm": 6.896834373474121, "learning_rate": 8.200835101649669e-05, "loss": 0.7616, "step": 13893 }, { "epoch": 0.9413916932041466, "grad_norm": 6.073055744171143, "learning_rate": 8.200698199739887e-05, "loss": 0.6834, "step": 13894 }, { "epoch": 0.9414594484721187, "grad_norm": 7.429131984710693, "learning_rate": 8.200561297830105e-05, "loss": 0.6438, "step": 13895 }, { "epoch": 0.9415272037400908, "grad_norm": 8.393160820007324, "learning_rate": 8.200424395920323e-05, "loss": 0.8012, "step": 13896 }, { "epoch": 0.9415949590080629, "grad_norm": 6.223710060119629, "learning_rate": 8.200287494010542e-05, "loss": 0.8734, "step": 13897 }, { "epoch": 0.941662714276035, "grad_norm": 4.849613666534424, "learning_rate": 8.20015059210076e-05, "loss": 0.8162, "step": 13898 }, { "epoch": 0.9417304695440071, "grad_norm": 5.789686679840088, "learning_rate": 8.200013690190979e-05, "loss": 0.6433, "step": 13899 }, { "epoch": 0.9417982248119792, "grad_norm": 5.670434951782227, "learning_rate": 8.199876788281197e-05, "loss": 0.7761, "step": 13900 }, { "epoch": 0.9418659800799513, "grad_norm": 6.698735237121582, "learning_rate": 8.199739886371415e-05, "loss": 0.6752, "step": 13901 }, { "epoch": 0.9419337353479234, "grad_norm": 6.325132369995117, "learning_rate": 8.199602984461634e-05, "loss": 0.7789, "step": 13902 }, { "epoch": 0.9420014906158953, "grad_norm": 5.625211715698242, "learning_rate": 8.199466082551852e-05, "loss": 0.883, "step": 13903 }, { "epoch": 0.9420692458838674, "grad_norm": 6.5696516036987305, "learning_rate": 8.19932918064207e-05, "loss": 0.7999, "step": 13904 }, { "epoch": 0.9421370011518395, "grad_norm": 4.903794288635254, "learning_rate": 8.199192278732288e-05, "loss": 0.8234, "step": 13905 }, { "epoch": 0.9422047564198116, "grad_norm": 4.687190532684326, "learning_rate": 8.199055376822508e-05, "loss": 0.7286, "step": 13906 }, { "epoch": 0.9422725116877837, "grad_norm": 6.075998783111572, "learning_rate": 8.198918474912726e-05, "loss": 0.8778, "step": 13907 }, { "epoch": 0.9423402669557558, "grad_norm": 6.295614719390869, "learning_rate": 8.198781573002944e-05, "loss": 0.8878, "step": 13908 }, { "epoch": 0.9424080222237279, "grad_norm": 8.557680130004883, "learning_rate": 8.198644671093162e-05, "loss": 0.776, "step": 13909 }, { "epoch": 0.9424757774917, "grad_norm": 6.864640712738037, "learning_rate": 8.19850776918338e-05, "loss": 1.0199, "step": 13910 }, { "epoch": 0.9425435327596721, "grad_norm": 5.549873352050781, "learning_rate": 8.198370867273599e-05, "loss": 0.8881, "step": 13911 }, { "epoch": 0.9426112880276442, "grad_norm": 6.9793548583984375, "learning_rate": 8.198233965363817e-05, "loss": 0.79, "step": 13912 }, { "epoch": 0.9426790432956162, "grad_norm": 7.035325527191162, "learning_rate": 8.198097063454035e-05, "loss": 0.7801, "step": 13913 }, { "epoch": 0.9427467985635883, "grad_norm": 6.695631980895996, "learning_rate": 8.197960161544253e-05, "loss": 0.7903, "step": 13914 }, { "epoch": 0.9428145538315604, "grad_norm": 7.544031143188477, "learning_rate": 8.197823259634473e-05, "loss": 0.5813, "step": 13915 }, { "epoch": 0.9428823090995325, "grad_norm": 5.196893215179443, "learning_rate": 8.19768635772469e-05, "loss": 0.7587, "step": 13916 }, { "epoch": 0.9429500643675046, "grad_norm": 6.548089027404785, "learning_rate": 8.197549455814909e-05, "loss": 0.9122, "step": 13917 }, { "epoch": 0.9430178196354767, "grad_norm": 4.924033164978027, "learning_rate": 8.197412553905127e-05, "loss": 0.6318, "step": 13918 }, { "epoch": 0.9430855749034488, "grad_norm": 5.648553371429443, "learning_rate": 8.197275651995345e-05, "loss": 0.7063, "step": 13919 }, { "epoch": 0.9431533301714208, "grad_norm": 7.385311126708984, "learning_rate": 8.197138750085564e-05, "loss": 0.782, "step": 13920 }, { "epoch": 0.9432210854393929, "grad_norm": 5.233833312988281, "learning_rate": 8.197001848175782e-05, "loss": 0.6857, "step": 13921 }, { "epoch": 0.943288840707365, "grad_norm": 7.0770487785339355, "learning_rate": 8.196864946266e-05, "loss": 0.6966, "step": 13922 }, { "epoch": 0.943356595975337, "grad_norm": 6.8348307609558105, "learning_rate": 8.19672804435622e-05, "loss": 0.8207, "step": 13923 }, { "epoch": 0.9434243512433091, "grad_norm": 4.237405300140381, "learning_rate": 8.196591142446438e-05, "loss": 0.8074, "step": 13924 }, { "epoch": 0.9434921065112812, "grad_norm": 6.041856288909912, "learning_rate": 8.196454240536656e-05, "loss": 0.7794, "step": 13925 }, { "epoch": 0.9435598617792533, "grad_norm": 8.183391571044922, "learning_rate": 8.196317338626875e-05, "loss": 0.7523, "step": 13926 }, { "epoch": 0.9436276170472254, "grad_norm": 5.387772083282471, "learning_rate": 8.196180436717093e-05, "loss": 0.7759, "step": 13927 }, { "epoch": 0.9436953723151975, "grad_norm": 5.296437740325928, "learning_rate": 8.196043534807311e-05, "loss": 0.7972, "step": 13928 }, { "epoch": 0.9437631275831696, "grad_norm": 5.269390106201172, "learning_rate": 8.19590663289753e-05, "loss": 0.7055, "step": 13929 }, { "epoch": 0.9438308828511417, "grad_norm": 7.258601665496826, "learning_rate": 8.195769730987748e-05, "loss": 0.8937, "step": 13930 }, { "epoch": 0.9438986381191138, "grad_norm": 6.784587383270264, "learning_rate": 8.195632829077966e-05, "loss": 1.0161, "step": 13931 }, { "epoch": 0.9439663933870859, "grad_norm": 5.093071937561035, "learning_rate": 8.195495927168185e-05, "loss": 0.7299, "step": 13932 }, { "epoch": 0.944034148655058, "grad_norm": 5.3693013191223145, "learning_rate": 8.195359025258403e-05, "loss": 0.7403, "step": 13933 }, { "epoch": 0.9441019039230301, "grad_norm": 5.527505874633789, "learning_rate": 8.195222123348622e-05, "loss": 0.6796, "step": 13934 }, { "epoch": 0.9441696591910022, "grad_norm": 5.87165641784668, "learning_rate": 8.19508522143884e-05, "loss": 0.7385, "step": 13935 }, { "epoch": 0.9442374144589741, "grad_norm": 6.087599754333496, "learning_rate": 8.194948319529058e-05, "loss": 0.545, "step": 13936 }, { "epoch": 0.9443051697269462, "grad_norm": 5.642385005950928, "learning_rate": 8.194811417619276e-05, "loss": 0.7826, "step": 13937 }, { "epoch": 0.9443729249949183, "grad_norm": 4.871824741363525, "learning_rate": 8.194674515709495e-05, "loss": 0.6956, "step": 13938 }, { "epoch": 0.9444406802628904, "grad_norm": 4.260469913482666, "learning_rate": 8.194537613799713e-05, "loss": 0.561, "step": 13939 }, { "epoch": 0.9445084355308625, "grad_norm": 5.726165294647217, "learning_rate": 8.194400711889932e-05, "loss": 0.9119, "step": 13940 }, { "epoch": 0.9445761907988346, "grad_norm": 8.435538291931152, "learning_rate": 8.19426380998015e-05, "loss": 1.0353, "step": 13941 }, { "epoch": 0.9446439460668067, "grad_norm": 5.619915962219238, "learning_rate": 8.194126908070368e-05, "loss": 0.7213, "step": 13942 }, { "epoch": 0.9447117013347788, "grad_norm": 6.574455261230469, "learning_rate": 8.193990006160587e-05, "loss": 0.9633, "step": 13943 }, { "epoch": 0.9447794566027509, "grad_norm": 6.439619064331055, "learning_rate": 8.193853104250805e-05, "loss": 0.7964, "step": 13944 }, { "epoch": 0.944847211870723, "grad_norm": 6.305572509765625, "learning_rate": 8.193716202341023e-05, "loss": 0.72, "step": 13945 }, { "epoch": 0.944914967138695, "grad_norm": 5.285477638244629, "learning_rate": 8.193579300431241e-05, "loss": 0.9051, "step": 13946 }, { "epoch": 0.9449827224066671, "grad_norm": 5.163026332855225, "learning_rate": 8.193442398521459e-05, "loss": 0.57, "step": 13947 }, { "epoch": 0.9450504776746392, "grad_norm": 5.176440715789795, "learning_rate": 8.193305496611678e-05, "loss": 0.7803, "step": 13948 }, { "epoch": 0.9451182329426113, "grad_norm": 6.866820335388184, "learning_rate": 8.193168594701897e-05, "loss": 0.7486, "step": 13949 }, { "epoch": 0.9451859882105834, "grad_norm": 6.827968597412109, "learning_rate": 8.193031692792115e-05, "loss": 0.7738, "step": 13950 }, { "epoch": 0.9452537434785555, "grad_norm": 6.111838340759277, "learning_rate": 8.192894790882333e-05, "loss": 0.8302, "step": 13951 }, { "epoch": 0.9453214987465275, "grad_norm": 6.111614227294922, "learning_rate": 8.192757888972552e-05, "loss": 0.8413, "step": 13952 }, { "epoch": 0.9453892540144996, "grad_norm": 5.463453769683838, "learning_rate": 8.19262098706277e-05, "loss": 0.7054, "step": 13953 }, { "epoch": 0.9454570092824717, "grad_norm": 6.099150657653809, "learning_rate": 8.192484085152988e-05, "loss": 0.596, "step": 13954 }, { "epoch": 0.9455247645504438, "grad_norm": 6.040791988372803, "learning_rate": 8.192347183243206e-05, "loss": 0.6796, "step": 13955 }, { "epoch": 0.9455925198184159, "grad_norm": 6.111145973205566, "learning_rate": 8.192210281333424e-05, "loss": 0.8444, "step": 13956 }, { "epoch": 0.945660275086388, "grad_norm": 6.970156192779541, "learning_rate": 8.192073379423644e-05, "loss": 0.5742, "step": 13957 }, { "epoch": 0.94572803035436, "grad_norm": 6.0487165451049805, "learning_rate": 8.191936477513862e-05, "loss": 0.8061, "step": 13958 }, { "epoch": 0.9457957856223321, "grad_norm": 4.807767868041992, "learning_rate": 8.19179957560408e-05, "loss": 0.6691, "step": 13959 }, { "epoch": 0.9458635408903042, "grad_norm": 5.062884330749512, "learning_rate": 8.191662673694298e-05, "loss": 0.9048, "step": 13960 }, { "epoch": 0.9459312961582763, "grad_norm": 6.347843647003174, "learning_rate": 8.191525771784517e-05, "loss": 0.8603, "step": 13961 }, { "epoch": 0.9459990514262484, "grad_norm": 5.537858009338379, "learning_rate": 8.191388869874735e-05, "loss": 0.8675, "step": 13962 }, { "epoch": 0.9460668066942205, "grad_norm": 5.778237819671631, "learning_rate": 8.191251967964953e-05, "loss": 0.5481, "step": 13963 }, { "epoch": 0.9461345619621926, "grad_norm": 5.969203472137451, "learning_rate": 8.191115066055171e-05, "loss": 0.7099, "step": 13964 }, { "epoch": 0.9462023172301647, "grad_norm": 7.126950263977051, "learning_rate": 8.190978164145389e-05, "loss": 0.8117, "step": 13965 }, { "epoch": 0.9462700724981368, "grad_norm": 5.3434624671936035, "learning_rate": 8.190841262235609e-05, "loss": 0.8133, "step": 13966 }, { "epoch": 0.9463378277661089, "grad_norm": 5.5889105796813965, "learning_rate": 8.190704360325827e-05, "loss": 0.8104, "step": 13967 }, { "epoch": 0.946405583034081, "grad_norm": 6.536361217498779, "learning_rate": 8.190567458416045e-05, "loss": 0.7897, "step": 13968 }, { "epoch": 0.9464733383020529, "grad_norm": 6.82296085357666, "learning_rate": 8.190430556506264e-05, "loss": 1.0064, "step": 13969 }, { "epoch": 0.946541093570025, "grad_norm": 6.4476470947265625, "learning_rate": 8.190293654596482e-05, "loss": 0.9211, "step": 13970 }, { "epoch": 0.9466088488379971, "grad_norm": 6.636000156402588, "learning_rate": 8.1901567526867e-05, "loss": 0.6633, "step": 13971 }, { "epoch": 0.9466766041059692, "grad_norm": 6.36086368560791, "learning_rate": 8.19001985077692e-05, "loss": 0.6275, "step": 13972 }, { "epoch": 0.9467443593739413, "grad_norm": 7.286365032196045, "learning_rate": 8.189882948867137e-05, "loss": 0.7431, "step": 13973 }, { "epoch": 0.9468121146419134, "grad_norm": 7.394494533538818, "learning_rate": 8.189746046957356e-05, "loss": 0.8701, "step": 13974 }, { "epoch": 0.9468798699098855, "grad_norm": 5.378866195678711, "learning_rate": 8.189609145047575e-05, "loss": 0.6846, "step": 13975 }, { "epoch": 0.9469476251778576, "grad_norm": 9.122882843017578, "learning_rate": 8.189472243137793e-05, "loss": 0.8023, "step": 13976 }, { "epoch": 0.9470153804458297, "grad_norm": 6.454582214355469, "learning_rate": 8.189335341228011e-05, "loss": 0.7631, "step": 13977 }, { "epoch": 0.9470831357138018, "grad_norm": 5.218212604522705, "learning_rate": 8.189198439318229e-05, "loss": 0.7752, "step": 13978 }, { "epoch": 0.9471508909817739, "grad_norm": 6.383564472198486, "learning_rate": 8.189061537408447e-05, "loss": 0.7355, "step": 13979 }, { "epoch": 0.9472186462497459, "grad_norm": 4.193008899688721, "learning_rate": 8.188924635498666e-05, "loss": 0.5955, "step": 13980 }, { "epoch": 0.947286401517718, "grad_norm": 6.7840423583984375, "learning_rate": 8.188787733588884e-05, "loss": 0.8695, "step": 13981 }, { "epoch": 0.9473541567856901, "grad_norm": 5.606986999511719, "learning_rate": 8.188650831679102e-05, "loss": 0.8062, "step": 13982 }, { "epoch": 0.9474219120536622, "grad_norm": 4.632526397705078, "learning_rate": 8.18851392976932e-05, "loss": 0.9447, "step": 13983 }, { "epoch": 0.9474896673216343, "grad_norm": 7.079931259155273, "learning_rate": 8.18837702785954e-05, "loss": 0.7606, "step": 13984 }, { "epoch": 0.9475574225896063, "grad_norm": 6.6071858406066895, "learning_rate": 8.188240125949758e-05, "loss": 1.0442, "step": 13985 }, { "epoch": 0.9476251778575784, "grad_norm": 6.782355308532715, "learning_rate": 8.188103224039976e-05, "loss": 0.6173, "step": 13986 }, { "epoch": 0.9476929331255505, "grad_norm": 6.116751194000244, "learning_rate": 8.187966322130194e-05, "loss": 0.7438, "step": 13987 }, { "epoch": 0.9477606883935226, "grad_norm": 6.912460803985596, "learning_rate": 8.187829420220412e-05, "loss": 0.7509, "step": 13988 }, { "epoch": 0.9478284436614947, "grad_norm": 6.737586498260498, "learning_rate": 8.187692518310631e-05, "loss": 0.8741, "step": 13989 }, { "epoch": 0.9478961989294667, "grad_norm": 4.6144938468933105, "learning_rate": 8.18755561640085e-05, "loss": 0.8468, "step": 13990 }, { "epoch": 0.9479639541974388, "grad_norm": 6.440502166748047, "learning_rate": 8.187418714491068e-05, "loss": 0.7353, "step": 13991 }, { "epoch": 0.9480317094654109, "grad_norm": 7.09966516494751, "learning_rate": 8.187281812581286e-05, "loss": 0.8539, "step": 13992 }, { "epoch": 0.948099464733383, "grad_norm": 7.213123321533203, "learning_rate": 8.187144910671505e-05, "loss": 0.5578, "step": 13993 }, { "epoch": 0.9481672200013551, "grad_norm": 5.926023006439209, "learning_rate": 8.187008008761723e-05, "loss": 0.7481, "step": 13994 }, { "epoch": 0.9482349752693272, "grad_norm": 6.6714768409729, "learning_rate": 8.186871106851941e-05, "loss": 0.8587, "step": 13995 }, { "epoch": 0.9483027305372993, "grad_norm": 6.985418319702148, "learning_rate": 8.186734204942159e-05, "loss": 0.9756, "step": 13996 }, { "epoch": 0.9483704858052714, "grad_norm": 4.339714050292969, "learning_rate": 8.186597303032377e-05, "loss": 0.7206, "step": 13997 }, { "epoch": 0.9484382410732435, "grad_norm": 5.789028644561768, "learning_rate": 8.186460401122596e-05, "loss": 0.7851, "step": 13998 }, { "epoch": 0.9485059963412156, "grad_norm": 4.326128005981445, "learning_rate": 8.186323499212814e-05, "loss": 0.6611, "step": 13999 }, { "epoch": 0.9485737516091877, "grad_norm": 4.751250267028809, "learning_rate": 8.186186597303033e-05, "loss": 0.791, "step": 14000 }, { "epoch": 0.9486415068771596, "grad_norm": 6.147752285003662, "learning_rate": 8.18604969539325e-05, "loss": 0.9781, "step": 14001 }, { "epoch": 0.9487092621451317, "grad_norm": 5.021920680999756, "learning_rate": 8.185912793483469e-05, "loss": 0.634, "step": 14002 }, { "epoch": 0.9487770174131038, "grad_norm": 5.617037773132324, "learning_rate": 8.185775891573688e-05, "loss": 0.6997, "step": 14003 }, { "epoch": 0.9488447726810759, "grad_norm": 5.281715393066406, "learning_rate": 8.185638989663906e-05, "loss": 0.6422, "step": 14004 }, { "epoch": 0.948912527949048, "grad_norm": 5.466105937957764, "learning_rate": 8.185502087754124e-05, "loss": 0.7793, "step": 14005 }, { "epoch": 0.9489802832170201, "grad_norm": 8.395241737365723, "learning_rate": 8.185365185844342e-05, "loss": 1.0168, "step": 14006 }, { "epoch": 0.9490480384849922, "grad_norm": 5.434301376342773, "learning_rate": 8.185228283934561e-05, "loss": 0.6043, "step": 14007 }, { "epoch": 0.9491157937529643, "grad_norm": 6.209643363952637, "learning_rate": 8.18509138202478e-05, "loss": 0.9309, "step": 14008 }, { "epoch": 0.9491835490209364, "grad_norm": 5.438107013702393, "learning_rate": 8.184954480114998e-05, "loss": 0.8226, "step": 14009 }, { "epoch": 0.9492513042889085, "grad_norm": 5.4521050453186035, "learning_rate": 8.184817578205216e-05, "loss": 0.9139, "step": 14010 }, { "epoch": 0.9493190595568806, "grad_norm": 6.864973068237305, "learning_rate": 8.184680676295434e-05, "loss": 0.9011, "step": 14011 }, { "epoch": 0.9493868148248527, "grad_norm": 3.955416202545166, "learning_rate": 8.184543774385653e-05, "loss": 0.6179, "step": 14012 }, { "epoch": 0.9494545700928247, "grad_norm": 4.496962547302246, "learning_rate": 8.184406872475871e-05, "loss": 0.7532, "step": 14013 }, { "epoch": 0.9495223253607968, "grad_norm": 7.178885459899902, "learning_rate": 8.184269970566089e-05, "loss": 0.8709, "step": 14014 }, { "epoch": 0.9495900806287689, "grad_norm": 8.725399017333984, "learning_rate": 8.184133068656308e-05, "loss": 0.6709, "step": 14015 }, { "epoch": 0.949657835896741, "grad_norm": 4.831404685974121, "learning_rate": 8.183996166746526e-05, "loss": 0.6221, "step": 14016 }, { "epoch": 0.949725591164713, "grad_norm": 5.273083209991455, "learning_rate": 8.183859264836745e-05, "loss": 0.6876, "step": 14017 }, { "epoch": 0.9497933464326851, "grad_norm": 5.9395833015441895, "learning_rate": 8.183722362926964e-05, "loss": 0.7032, "step": 14018 }, { "epoch": 0.9498611017006572, "grad_norm": 5.861425876617432, "learning_rate": 8.183585461017182e-05, "loss": 0.7697, "step": 14019 }, { "epoch": 0.9499288569686293, "grad_norm": 6.970915794372559, "learning_rate": 8.1834485591074e-05, "loss": 0.693, "step": 14020 }, { "epoch": 0.9499966122366014, "grad_norm": 5.096658229827881, "learning_rate": 8.18331165719762e-05, "loss": 0.9212, "step": 14021 }, { "epoch": 0.9500643675045735, "grad_norm": 6.328000068664551, "learning_rate": 8.183174755287837e-05, "loss": 0.7588, "step": 14022 }, { "epoch": 0.9501321227725456, "grad_norm": 5.18841552734375, "learning_rate": 8.183037853378055e-05, "loss": 0.57, "step": 14023 }, { "epoch": 0.9501998780405176, "grad_norm": 7.505560398101807, "learning_rate": 8.182900951468273e-05, "loss": 0.7265, "step": 14024 }, { "epoch": 0.9502676333084897, "grad_norm": 6.134244918823242, "learning_rate": 8.182764049558492e-05, "loss": 0.6735, "step": 14025 }, { "epoch": 0.9503353885764618, "grad_norm": 6.682973384857178, "learning_rate": 8.182627147648711e-05, "loss": 0.7814, "step": 14026 }, { "epoch": 0.9504031438444339, "grad_norm": 6.492305755615234, "learning_rate": 8.182490245738929e-05, "loss": 0.7248, "step": 14027 }, { "epoch": 0.950470899112406, "grad_norm": 4.836461067199707, "learning_rate": 8.182353343829147e-05, "loss": 0.5105, "step": 14028 }, { "epoch": 0.9505386543803781, "grad_norm": 6.350803375244141, "learning_rate": 8.182216441919365e-05, "loss": 0.9946, "step": 14029 }, { "epoch": 0.9506064096483502, "grad_norm": 6.154294490814209, "learning_rate": 8.182079540009584e-05, "loss": 0.6859, "step": 14030 }, { "epoch": 0.9506741649163223, "grad_norm": 5.958618640899658, "learning_rate": 8.181942638099802e-05, "loss": 0.8397, "step": 14031 }, { "epoch": 0.9507419201842944, "grad_norm": 5.353884696960449, "learning_rate": 8.18180573619002e-05, "loss": 0.8651, "step": 14032 }, { "epoch": 0.9508096754522665, "grad_norm": 4.889853000640869, "learning_rate": 8.181668834280238e-05, "loss": 0.7572, "step": 14033 }, { "epoch": 0.9508774307202384, "grad_norm": 6.175332069396973, "learning_rate": 8.181531932370457e-05, "loss": 0.9212, "step": 14034 }, { "epoch": 0.9509451859882105, "grad_norm": 6.263129711151123, "learning_rate": 8.181395030460676e-05, "loss": 0.6871, "step": 14035 }, { "epoch": 0.9510129412561826, "grad_norm": 5.999005317687988, "learning_rate": 8.181258128550894e-05, "loss": 0.8786, "step": 14036 }, { "epoch": 0.9510806965241547, "grad_norm": 5.486205577850342, "learning_rate": 8.181121226641112e-05, "loss": 0.7679, "step": 14037 }, { "epoch": 0.9511484517921268, "grad_norm": 4.255964756011963, "learning_rate": 8.18098432473133e-05, "loss": 0.5138, "step": 14038 }, { "epoch": 0.9512162070600989, "grad_norm": 7.037053108215332, "learning_rate": 8.18084742282155e-05, "loss": 0.8799, "step": 14039 }, { "epoch": 0.951283962328071, "grad_norm": 5.0967302322387695, "learning_rate": 8.180710520911767e-05, "loss": 0.6126, "step": 14040 }, { "epoch": 0.9513517175960431, "grad_norm": 4.5067458152771, "learning_rate": 8.180573619001985e-05, "loss": 0.5718, "step": 14041 }, { "epoch": 0.9514194728640152, "grad_norm": 4.694755554199219, "learning_rate": 8.180436717092204e-05, "loss": 0.712, "step": 14042 }, { "epoch": 0.9514872281319873, "grad_norm": 6.1123738288879395, "learning_rate": 8.180299815182422e-05, "loss": 0.7131, "step": 14043 }, { "epoch": 0.9515549833999594, "grad_norm": 7.102774143218994, "learning_rate": 8.180162913272641e-05, "loss": 0.6774, "step": 14044 }, { "epoch": 0.9516227386679315, "grad_norm": 5.857600212097168, "learning_rate": 8.180026011362859e-05, "loss": 0.7464, "step": 14045 }, { "epoch": 0.9516904939359035, "grad_norm": 5.622432708740234, "learning_rate": 8.179889109453077e-05, "loss": 0.6566, "step": 14046 }, { "epoch": 0.9517582492038756, "grad_norm": 8.488066673278809, "learning_rate": 8.179752207543295e-05, "loss": 0.9199, "step": 14047 }, { "epoch": 0.9518260044718477, "grad_norm": 7.23390531539917, "learning_rate": 8.179615305633514e-05, "loss": 0.884, "step": 14048 }, { "epoch": 0.9518937597398198, "grad_norm": 6.6131391525268555, "learning_rate": 8.179478403723732e-05, "loss": 1.0312, "step": 14049 }, { "epoch": 0.9519615150077918, "grad_norm": 5.167440891265869, "learning_rate": 8.17934150181395e-05, "loss": 0.6041, "step": 14050 }, { "epoch": 0.9520292702757639, "grad_norm": 6.33022403717041, "learning_rate": 8.179204599904169e-05, "loss": 0.8595, "step": 14051 }, { "epoch": 0.952097025543736, "grad_norm": 6.872222900390625, "learning_rate": 8.179067697994387e-05, "loss": 0.7336, "step": 14052 }, { "epoch": 0.9521647808117081, "grad_norm": 4.217645168304443, "learning_rate": 8.178930796084606e-05, "loss": 0.4555, "step": 14053 }, { "epoch": 0.9522325360796802, "grad_norm": 4.8638529777526855, "learning_rate": 8.178793894174824e-05, "loss": 0.6661, "step": 14054 }, { "epoch": 0.9523002913476523, "grad_norm": 7.580918788909912, "learning_rate": 8.178656992265042e-05, "loss": 0.7857, "step": 14055 }, { "epoch": 0.9523680466156244, "grad_norm": 6.182562351226807, "learning_rate": 8.17852009035526e-05, "loss": 0.7932, "step": 14056 }, { "epoch": 0.9524358018835964, "grad_norm": 7.604414939880371, "learning_rate": 8.178383188445478e-05, "loss": 0.5992, "step": 14057 }, { "epoch": 0.9525035571515685, "grad_norm": 6.542990684509277, "learning_rate": 8.178246286535697e-05, "loss": 0.888, "step": 14058 }, { "epoch": 0.9525713124195406, "grad_norm": 5.210031986236572, "learning_rate": 8.178109384625916e-05, "loss": 0.7931, "step": 14059 }, { "epoch": 0.9526390676875127, "grad_norm": 8.13759994506836, "learning_rate": 8.177972482716134e-05, "loss": 0.6912, "step": 14060 }, { "epoch": 0.9527068229554848, "grad_norm": 5.919729709625244, "learning_rate": 8.177835580806353e-05, "loss": 0.7952, "step": 14061 }, { "epoch": 0.9527745782234569, "grad_norm": 5.694857597351074, "learning_rate": 8.177698678896571e-05, "loss": 0.7651, "step": 14062 }, { "epoch": 0.952842333491429, "grad_norm": 6.005049705505371, "learning_rate": 8.177561776986789e-05, "loss": 0.6854, "step": 14063 }, { "epoch": 0.9529100887594011, "grad_norm": 7.230431079864502, "learning_rate": 8.177424875077008e-05, "loss": 0.815, "step": 14064 }, { "epoch": 0.9529778440273732, "grad_norm": 6.85237455368042, "learning_rate": 8.177287973167226e-05, "loss": 0.6984, "step": 14065 }, { "epoch": 0.9530455992953452, "grad_norm": 5.569474697113037, "learning_rate": 8.177151071257444e-05, "loss": 0.8195, "step": 14066 }, { "epoch": 0.9531133545633172, "grad_norm": 8.014307022094727, "learning_rate": 8.177014169347664e-05, "loss": 0.8865, "step": 14067 }, { "epoch": 0.9531811098312893, "grad_norm": 5.345339775085449, "learning_rate": 8.176877267437882e-05, "loss": 0.8422, "step": 14068 }, { "epoch": 0.9532488650992614, "grad_norm": 7.717896461486816, "learning_rate": 8.1767403655281e-05, "loss": 0.8969, "step": 14069 }, { "epoch": 0.9533166203672335, "grad_norm": 5.41009521484375, "learning_rate": 8.176603463618318e-05, "loss": 0.9792, "step": 14070 }, { "epoch": 0.9533843756352056, "grad_norm": 4.825246334075928, "learning_rate": 8.176466561708537e-05, "loss": 0.5923, "step": 14071 }, { "epoch": 0.9534521309031777, "grad_norm": 4.686081409454346, "learning_rate": 8.176329659798755e-05, "loss": 0.8116, "step": 14072 }, { "epoch": 0.9535198861711498, "grad_norm": 4.988349914550781, "learning_rate": 8.176192757888973e-05, "loss": 0.8101, "step": 14073 }, { "epoch": 0.9535876414391219, "grad_norm": 6.294574737548828, "learning_rate": 8.176055855979191e-05, "loss": 0.7203, "step": 14074 }, { "epoch": 0.953655396707094, "grad_norm": 4.857511520385742, "learning_rate": 8.17591895406941e-05, "loss": 0.8133, "step": 14075 }, { "epoch": 0.9537231519750661, "grad_norm": 6.600233554840088, "learning_rate": 8.175782052159629e-05, "loss": 0.9022, "step": 14076 }, { "epoch": 0.9537909072430382, "grad_norm": 6.04002046585083, "learning_rate": 8.175645150249847e-05, "loss": 0.7713, "step": 14077 }, { "epoch": 0.9538586625110103, "grad_norm": 4.784701824188232, "learning_rate": 8.175508248340065e-05, "loss": 0.5853, "step": 14078 }, { "epoch": 0.9539264177789823, "grad_norm": 5.057199954986572, "learning_rate": 8.175371346430283e-05, "loss": 0.7118, "step": 14079 }, { "epoch": 0.9539941730469544, "grad_norm": 6.518017768859863, "learning_rate": 8.175234444520501e-05, "loss": 0.8091, "step": 14080 }, { "epoch": 0.9540619283149265, "grad_norm": 6.6895575523376465, "learning_rate": 8.17509754261072e-05, "loss": 0.8893, "step": 14081 }, { "epoch": 0.9541296835828986, "grad_norm": 5.571439743041992, "learning_rate": 8.174960640700938e-05, "loss": 0.6693, "step": 14082 }, { "epoch": 0.9541974388508706, "grad_norm": 5.339274883270264, "learning_rate": 8.174823738791156e-05, "loss": 0.5533, "step": 14083 }, { "epoch": 0.9542651941188427, "grad_norm": 6.4230523109436035, "learning_rate": 8.174686836881374e-05, "loss": 0.6819, "step": 14084 }, { "epoch": 0.9543329493868148, "grad_norm": 5.337852954864502, "learning_rate": 8.174549934971594e-05, "loss": 0.5664, "step": 14085 }, { "epoch": 0.9544007046547869, "grad_norm": 5.271894454956055, "learning_rate": 8.174413033061812e-05, "loss": 0.678, "step": 14086 }, { "epoch": 0.954468459922759, "grad_norm": 5.684970855712891, "learning_rate": 8.17427613115203e-05, "loss": 0.8186, "step": 14087 }, { "epoch": 0.9545362151907311, "grad_norm": 8.407366752624512, "learning_rate": 8.174139229242248e-05, "loss": 0.7385, "step": 14088 }, { "epoch": 0.9546039704587032, "grad_norm": 5.270580768585205, "learning_rate": 8.174002327332466e-05, "loss": 0.7859, "step": 14089 }, { "epoch": 0.9546717257266752, "grad_norm": 6.393465518951416, "learning_rate": 8.173865425422685e-05, "loss": 0.7205, "step": 14090 }, { "epoch": 0.9547394809946473, "grad_norm": 6.487541675567627, "learning_rate": 8.173728523512903e-05, "loss": 0.9124, "step": 14091 }, { "epoch": 0.9548072362626194, "grad_norm": 5.790227890014648, "learning_rate": 8.173591621603121e-05, "loss": 0.8357, "step": 14092 }, { "epoch": 0.9548749915305915, "grad_norm": 6.707381725311279, "learning_rate": 8.17345471969334e-05, "loss": 0.8281, "step": 14093 }, { "epoch": 0.9549427467985636, "grad_norm": 5.875377655029297, "learning_rate": 8.173317817783559e-05, "loss": 0.917, "step": 14094 }, { "epoch": 0.9550105020665357, "grad_norm": 9.384751319885254, "learning_rate": 8.173180915873777e-05, "loss": 0.8046, "step": 14095 }, { "epoch": 0.9550782573345078, "grad_norm": 4.465388298034668, "learning_rate": 8.173044013963995e-05, "loss": 0.4959, "step": 14096 }, { "epoch": 0.9551460126024799, "grad_norm": 5.929595947265625, "learning_rate": 8.172907112054213e-05, "loss": 0.655, "step": 14097 }, { "epoch": 0.955213767870452, "grad_norm": 6.126537322998047, "learning_rate": 8.172770210144431e-05, "loss": 0.8669, "step": 14098 }, { "epoch": 0.955281523138424, "grad_norm": 4.894435405731201, "learning_rate": 8.17263330823465e-05, "loss": 0.8827, "step": 14099 }, { "epoch": 0.955349278406396, "grad_norm": 5.8009490966796875, "learning_rate": 8.172496406324868e-05, "loss": 0.6743, "step": 14100 }, { "epoch": 0.9554170336743681, "grad_norm": 6.6259965896606445, "learning_rate": 8.172359504415086e-05, "loss": 0.7902, "step": 14101 }, { "epoch": 0.9554847889423402, "grad_norm": 8.393582344055176, "learning_rate": 8.172222602505305e-05, "loss": 0.7179, "step": 14102 }, { "epoch": 0.9555525442103123, "grad_norm": 5.586965560913086, "learning_rate": 8.172085700595524e-05, "loss": 0.8491, "step": 14103 }, { "epoch": 0.9556202994782844, "grad_norm": 6.042308807373047, "learning_rate": 8.171948798685742e-05, "loss": 0.5834, "step": 14104 }, { "epoch": 0.9556880547462565, "grad_norm": 10.39201831817627, "learning_rate": 8.17181189677596e-05, "loss": 0.8719, "step": 14105 }, { "epoch": 0.9557558100142286, "grad_norm": 6.800583839416504, "learning_rate": 8.171674994866178e-05, "loss": 0.6832, "step": 14106 }, { "epoch": 0.9558235652822007, "grad_norm": 4.868492603302002, "learning_rate": 8.171538092956396e-05, "loss": 0.8444, "step": 14107 }, { "epoch": 0.9558913205501728, "grad_norm": 6.521236419677734, "learning_rate": 8.171401191046615e-05, "loss": 0.6928, "step": 14108 }, { "epoch": 0.9559590758181449, "grad_norm": 6.01854944229126, "learning_rate": 8.171264289136833e-05, "loss": 0.9047, "step": 14109 }, { "epoch": 0.956026831086117, "grad_norm": 5.620432376861572, "learning_rate": 8.171127387227052e-05, "loss": 0.625, "step": 14110 }, { "epoch": 0.9560945863540891, "grad_norm": 6.074321269989014, "learning_rate": 8.170990485317271e-05, "loss": 0.8075, "step": 14111 }, { "epoch": 0.9561623416220612, "grad_norm": 5.468603134155273, "learning_rate": 8.170853583407489e-05, "loss": 0.9023, "step": 14112 }, { "epoch": 0.9562300968900332, "grad_norm": 7.003314971923828, "learning_rate": 8.170716681497707e-05, "loss": 1.0302, "step": 14113 }, { "epoch": 0.9562978521580053, "grad_norm": 5.144251346588135, "learning_rate": 8.170579779587926e-05, "loss": 0.7184, "step": 14114 }, { "epoch": 0.9563656074259773, "grad_norm": 5.8166823387146, "learning_rate": 8.170442877678144e-05, "loss": 0.8568, "step": 14115 }, { "epoch": 0.9564333626939494, "grad_norm": 5.200114727020264, "learning_rate": 8.170305975768362e-05, "loss": 0.8245, "step": 14116 }, { "epoch": 0.9565011179619215, "grad_norm": 6.467376708984375, "learning_rate": 8.170169073858582e-05, "loss": 0.8682, "step": 14117 }, { "epoch": 0.9565688732298936, "grad_norm": 5.500349998474121, "learning_rate": 8.1700321719488e-05, "loss": 0.6647, "step": 14118 }, { "epoch": 0.9566366284978657, "grad_norm": 5.1452765464782715, "learning_rate": 8.169895270039018e-05, "loss": 0.7039, "step": 14119 }, { "epoch": 0.9567043837658378, "grad_norm": 7.137358665466309, "learning_rate": 8.169758368129236e-05, "loss": 0.7246, "step": 14120 }, { "epoch": 0.9567721390338099, "grad_norm": 5.405989170074463, "learning_rate": 8.169621466219454e-05, "loss": 0.8214, "step": 14121 }, { "epoch": 0.956839894301782, "grad_norm": 6.709090232849121, "learning_rate": 8.169484564309673e-05, "loss": 0.8312, "step": 14122 }, { "epoch": 0.956907649569754, "grad_norm": 5.676616668701172, "learning_rate": 8.169347662399891e-05, "loss": 0.9024, "step": 14123 }, { "epoch": 0.9569754048377261, "grad_norm": 4.852606296539307, "learning_rate": 8.16921076049011e-05, "loss": 0.7261, "step": 14124 }, { "epoch": 0.9570431601056982, "grad_norm": 6.199010372161865, "learning_rate": 8.169073858580327e-05, "loss": 0.7481, "step": 14125 }, { "epoch": 0.9571109153736703, "grad_norm": 5.474722385406494, "learning_rate": 8.168936956670547e-05, "loss": 0.7711, "step": 14126 }, { "epoch": 0.9571786706416424, "grad_norm": 6.915562152862549, "learning_rate": 8.168800054760765e-05, "loss": 0.9033, "step": 14127 }, { "epoch": 0.9572464259096145, "grad_norm": 6.63683557510376, "learning_rate": 8.168663152850983e-05, "loss": 0.9166, "step": 14128 }, { "epoch": 0.9573141811775866, "grad_norm": 5.392688751220703, "learning_rate": 8.168526250941201e-05, "loss": 0.7428, "step": 14129 }, { "epoch": 0.9573819364455587, "grad_norm": 7.527129173278809, "learning_rate": 8.168389349031419e-05, "loss": 0.7533, "step": 14130 }, { "epoch": 0.9574496917135308, "grad_norm": 7.458190441131592, "learning_rate": 8.168252447121638e-05, "loss": 0.9663, "step": 14131 }, { "epoch": 0.9575174469815028, "grad_norm": 5.647728443145752, "learning_rate": 8.168115545211856e-05, "loss": 0.8357, "step": 14132 }, { "epoch": 0.9575852022494749, "grad_norm": 6.73082971572876, "learning_rate": 8.167978643302074e-05, "loss": 0.9228, "step": 14133 }, { "epoch": 0.957652957517447, "grad_norm": 5.708244800567627, "learning_rate": 8.167841741392292e-05, "loss": 0.742, "step": 14134 }, { "epoch": 0.957720712785419, "grad_norm": 5.192925453186035, "learning_rate": 8.16770483948251e-05, "loss": 0.5755, "step": 14135 }, { "epoch": 0.9577884680533911, "grad_norm": 5.057267665863037, "learning_rate": 8.16756793757273e-05, "loss": 0.7179, "step": 14136 }, { "epoch": 0.9578562233213632, "grad_norm": 5.001532554626465, "learning_rate": 8.167431035662948e-05, "loss": 0.7356, "step": 14137 }, { "epoch": 0.9579239785893353, "grad_norm": 5.9870781898498535, "learning_rate": 8.167294133753166e-05, "loss": 0.6898, "step": 14138 }, { "epoch": 0.9579917338573074, "grad_norm": 6.193863868713379, "learning_rate": 8.167157231843384e-05, "loss": 0.7337, "step": 14139 }, { "epoch": 0.9580594891252795, "grad_norm": 5.425492286682129, "learning_rate": 8.167020329933603e-05, "loss": 0.5645, "step": 14140 }, { "epoch": 0.9581272443932516, "grad_norm": 6.2710700035095215, "learning_rate": 8.166883428023821e-05, "loss": 0.6244, "step": 14141 }, { "epoch": 0.9581949996612237, "grad_norm": 6.745750904083252, "learning_rate": 8.16674652611404e-05, "loss": 0.7168, "step": 14142 }, { "epoch": 0.9582627549291958, "grad_norm": 4.833362579345703, "learning_rate": 8.166609624204257e-05, "loss": 0.6343, "step": 14143 }, { "epoch": 0.9583305101971679, "grad_norm": 7.704063415527344, "learning_rate": 8.166472722294476e-05, "loss": 0.8166, "step": 14144 }, { "epoch": 0.95839826546514, "grad_norm": 4.506795406341553, "learning_rate": 8.166335820384695e-05, "loss": 0.7254, "step": 14145 }, { "epoch": 0.958466020733112, "grad_norm": 4.970558166503906, "learning_rate": 8.166198918474913e-05, "loss": 0.6922, "step": 14146 }, { "epoch": 0.9585337760010841, "grad_norm": 6.441205024719238, "learning_rate": 8.166062016565131e-05, "loss": 0.8758, "step": 14147 }, { "epoch": 0.9586015312690561, "grad_norm": 5.769437789916992, "learning_rate": 8.165925114655349e-05, "loss": 0.6237, "step": 14148 }, { "epoch": 0.9586692865370282, "grad_norm": 5.401442527770996, "learning_rate": 8.165788212745568e-05, "loss": 1.0237, "step": 14149 }, { "epoch": 0.9587370418050003, "grad_norm": 6.560751438140869, "learning_rate": 8.165651310835786e-05, "loss": 1.0011, "step": 14150 }, { "epoch": 0.9588047970729724, "grad_norm": 5.372631072998047, "learning_rate": 8.165514408926004e-05, "loss": 0.787, "step": 14151 }, { "epoch": 0.9588725523409445, "grad_norm": 4.6542558670043945, "learning_rate": 8.165377507016222e-05, "loss": 0.6897, "step": 14152 }, { "epoch": 0.9589403076089166, "grad_norm": 6.8552141189575195, "learning_rate": 8.16524060510644e-05, "loss": 0.9957, "step": 14153 }, { "epoch": 0.9590080628768887, "grad_norm": 6.167290687561035, "learning_rate": 8.16510370319666e-05, "loss": 0.7579, "step": 14154 }, { "epoch": 0.9590758181448608, "grad_norm": 4.993210315704346, "learning_rate": 8.164966801286878e-05, "loss": 0.8123, "step": 14155 }, { "epoch": 0.9591435734128329, "grad_norm": 6.3289713859558105, "learning_rate": 8.164829899377096e-05, "loss": 0.7427, "step": 14156 }, { "epoch": 0.9592113286808049, "grad_norm": 4.5227837562561035, "learning_rate": 8.164692997467315e-05, "loss": 0.7607, "step": 14157 }, { "epoch": 0.959279083948777, "grad_norm": 5.881022930145264, "learning_rate": 8.164556095557533e-05, "loss": 0.8952, "step": 14158 }, { "epoch": 0.9593468392167491, "grad_norm": 5.290414810180664, "learning_rate": 8.164419193647751e-05, "loss": 0.7087, "step": 14159 }, { "epoch": 0.9594145944847212, "grad_norm": 6.688441276550293, "learning_rate": 8.164282291737971e-05, "loss": 0.635, "step": 14160 }, { "epoch": 0.9594823497526933, "grad_norm": 5.5742058753967285, "learning_rate": 8.164145389828189e-05, "loss": 0.5194, "step": 14161 }, { "epoch": 0.9595501050206654, "grad_norm": 7.248497486114502, "learning_rate": 8.164008487918407e-05, "loss": 0.603, "step": 14162 }, { "epoch": 0.9596178602886375, "grad_norm": 5.80116081237793, "learning_rate": 8.163871586008626e-05, "loss": 0.7965, "step": 14163 }, { "epoch": 0.9596856155566095, "grad_norm": 7.90059232711792, "learning_rate": 8.163734684098844e-05, "loss": 1.0141, "step": 14164 }, { "epoch": 0.9597533708245816, "grad_norm": 4.742366790771484, "learning_rate": 8.163597782189062e-05, "loss": 0.8223, "step": 14165 }, { "epoch": 0.9598211260925537, "grad_norm": 7.762453079223633, "learning_rate": 8.16346088027928e-05, "loss": 0.7047, "step": 14166 }, { "epoch": 0.9598888813605257, "grad_norm": 5.143554210662842, "learning_rate": 8.163323978369498e-05, "loss": 0.7639, "step": 14167 }, { "epoch": 0.9599566366284978, "grad_norm": 6.1197285652160645, "learning_rate": 8.163187076459718e-05, "loss": 0.6587, "step": 14168 }, { "epoch": 0.9600243918964699, "grad_norm": 5.0258049964904785, "learning_rate": 8.163050174549936e-05, "loss": 0.576, "step": 14169 }, { "epoch": 0.960092147164442, "grad_norm": 4.709690570831299, "learning_rate": 8.162913272640154e-05, "loss": 0.5911, "step": 14170 }, { "epoch": 0.9601599024324141, "grad_norm": 5.788050174713135, "learning_rate": 8.162776370730372e-05, "loss": 0.7834, "step": 14171 }, { "epoch": 0.9602276577003862, "grad_norm": 5.154922008514404, "learning_rate": 8.162639468820591e-05, "loss": 0.5965, "step": 14172 }, { "epoch": 0.9602954129683583, "grad_norm": 8.04469108581543, "learning_rate": 8.16250256691081e-05, "loss": 0.722, "step": 14173 }, { "epoch": 0.9603631682363304, "grad_norm": 6.361427307128906, "learning_rate": 8.162365665001027e-05, "loss": 0.478, "step": 14174 }, { "epoch": 0.9604309235043025, "grad_norm": 6.577165603637695, "learning_rate": 8.162228763091245e-05, "loss": 0.9066, "step": 14175 }, { "epoch": 0.9604986787722746, "grad_norm": 6.255192756652832, "learning_rate": 8.162091861181463e-05, "loss": 0.8372, "step": 14176 }, { "epoch": 0.9605664340402467, "grad_norm": 7.014744758605957, "learning_rate": 8.161954959271683e-05, "loss": 0.9053, "step": 14177 }, { "epoch": 0.9606341893082188, "grad_norm": 5.058319091796875, "learning_rate": 8.161818057361901e-05, "loss": 0.7486, "step": 14178 }, { "epoch": 0.9607019445761908, "grad_norm": 6.558164119720459, "learning_rate": 8.161681155452119e-05, "loss": 0.8439, "step": 14179 }, { "epoch": 0.9607696998441629, "grad_norm": 6.055545330047607, "learning_rate": 8.161544253542337e-05, "loss": 0.6836, "step": 14180 }, { "epoch": 0.9608374551121349, "grad_norm": 6.022161483764648, "learning_rate": 8.161407351632556e-05, "loss": 0.7918, "step": 14181 }, { "epoch": 0.960905210380107, "grad_norm": 5.69798469543457, "learning_rate": 8.161270449722774e-05, "loss": 0.9815, "step": 14182 }, { "epoch": 0.9609729656480791, "grad_norm": 4.769881248474121, "learning_rate": 8.161133547812992e-05, "loss": 0.7156, "step": 14183 }, { "epoch": 0.9610407209160512, "grad_norm": 5.956376552581787, "learning_rate": 8.16099664590321e-05, "loss": 0.8751, "step": 14184 }, { "epoch": 0.9611084761840233, "grad_norm": 5.9965128898620605, "learning_rate": 8.160859743993428e-05, "loss": 0.808, "step": 14185 }, { "epoch": 0.9611762314519954, "grad_norm": 5.199033737182617, "learning_rate": 8.160722842083648e-05, "loss": 0.6829, "step": 14186 }, { "epoch": 0.9612439867199675, "grad_norm": 6.520019054412842, "learning_rate": 8.160585940173866e-05, "loss": 0.8844, "step": 14187 }, { "epoch": 0.9613117419879396, "grad_norm": 5.944149017333984, "learning_rate": 8.160449038264084e-05, "loss": 0.7207, "step": 14188 }, { "epoch": 0.9613794972559117, "grad_norm": 5.289858818054199, "learning_rate": 8.160312136354302e-05, "loss": 0.7532, "step": 14189 }, { "epoch": 0.9614472525238837, "grad_norm": 5.0533223152160645, "learning_rate": 8.16017523444452e-05, "loss": 0.9016, "step": 14190 }, { "epoch": 0.9615150077918558, "grad_norm": 5.705595016479492, "learning_rate": 8.16003833253474e-05, "loss": 0.6944, "step": 14191 }, { "epoch": 0.9615827630598279, "grad_norm": 5.3832292556762695, "learning_rate": 8.159901430624957e-05, "loss": 0.6348, "step": 14192 }, { "epoch": 0.9616505183278, "grad_norm": 5.7924041748046875, "learning_rate": 8.159764528715175e-05, "loss": 0.7927, "step": 14193 }, { "epoch": 0.9617182735957721, "grad_norm": 5.289419174194336, "learning_rate": 8.159627626805393e-05, "loss": 0.7989, "step": 14194 }, { "epoch": 0.9617860288637442, "grad_norm": 7.650732040405273, "learning_rate": 8.159490724895613e-05, "loss": 0.82, "step": 14195 }, { "epoch": 0.9618537841317163, "grad_norm": 6.727295398712158, "learning_rate": 8.159353822985831e-05, "loss": 0.7943, "step": 14196 }, { "epoch": 0.9619215393996883, "grad_norm": 5.865251064300537, "learning_rate": 8.159216921076049e-05, "loss": 0.8241, "step": 14197 }, { "epoch": 0.9619892946676604, "grad_norm": 6.261574745178223, "learning_rate": 8.159080019166267e-05, "loss": 0.8547, "step": 14198 }, { "epoch": 0.9620570499356325, "grad_norm": 5.138889789581299, "learning_rate": 8.158943117256485e-05, "loss": 0.806, "step": 14199 }, { "epoch": 0.9621248052036045, "grad_norm": 6.540297508239746, "learning_rate": 8.158806215346704e-05, "loss": 0.783, "step": 14200 }, { "epoch": 0.9621925604715766, "grad_norm": 5.47922945022583, "learning_rate": 8.158669313436922e-05, "loss": 0.9455, "step": 14201 }, { "epoch": 0.9622603157395487, "grad_norm": 4.53643798828125, "learning_rate": 8.15853241152714e-05, "loss": 0.6482, "step": 14202 }, { "epoch": 0.9623280710075208, "grad_norm": 7.245009422302246, "learning_rate": 8.15839550961736e-05, "loss": 1.0062, "step": 14203 }, { "epoch": 0.9623958262754929, "grad_norm": 5.578246116638184, "learning_rate": 8.158258607707578e-05, "loss": 0.7564, "step": 14204 }, { "epoch": 0.962463581543465, "grad_norm": 5.920526027679443, "learning_rate": 8.158121705797796e-05, "loss": 0.6843, "step": 14205 }, { "epoch": 0.9625313368114371, "grad_norm": 6.317756652832031, "learning_rate": 8.157984803888015e-05, "loss": 0.8409, "step": 14206 }, { "epoch": 0.9625990920794092, "grad_norm": 7.818577766418457, "learning_rate": 8.157847901978233e-05, "loss": 0.8203, "step": 14207 }, { "epoch": 0.9626668473473813, "grad_norm": 6.75308084487915, "learning_rate": 8.157711000068451e-05, "loss": 0.6391, "step": 14208 }, { "epoch": 0.9627346026153534, "grad_norm": 8.25421142578125, "learning_rate": 8.157574098158671e-05, "loss": 0.7555, "step": 14209 }, { "epoch": 0.9628023578833255, "grad_norm": 7.660693168640137, "learning_rate": 8.157437196248889e-05, "loss": 0.9737, "step": 14210 }, { "epoch": 0.9628701131512976, "grad_norm": 5.611537456512451, "learning_rate": 8.157300294339107e-05, "loss": 0.835, "step": 14211 }, { "epoch": 0.9629378684192696, "grad_norm": 6.340275287628174, "learning_rate": 8.157163392429325e-05, "loss": 0.6979, "step": 14212 }, { "epoch": 0.9630056236872416, "grad_norm": 7.211668968200684, "learning_rate": 8.157026490519543e-05, "loss": 0.8135, "step": 14213 }, { "epoch": 0.9630733789552137, "grad_norm": 7.507893085479736, "learning_rate": 8.156889588609762e-05, "loss": 1.0498, "step": 14214 }, { "epoch": 0.9631411342231858, "grad_norm": 6.938470840454102, "learning_rate": 8.15675268669998e-05, "loss": 0.6888, "step": 14215 }, { "epoch": 0.9632088894911579, "grad_norm": 6.91562557220459, "learning_rate": 8.156615784790198e-05, "loss": 0.6908, "step": 14216 }, { "epoch": 0.96327664475913, "grad_norm": 5.775163650512695, "learning_rate": 8.156478882880416e-05, "loss": 0.7053, "step": 14217 }, { "epoch": 0.9633444000271021, "grad_norm": 6.454747676849365, "learning_rate": 8.156341980970636e-05, "loss": 0.7105, "step": 14218 }, { "epoch": 0.9634121552950742, "grad_norm": 6.036716461181641, "learning_rate": 8.156205079060854e-05, "loss": 1.0272, "step": 14219 }, { "epoch": 0.9634799105630463, "grad_norm": 5.749178886413574, "learning_rate": 8.156068177151072e-05, "loss": 0.8823, "step": 14220 }, { "epoch": 0.9635476658310184, "grad_norm": 6.171823978424072, "learning_rate": 8.15593127524129e-05, "loss": 0.6209, "step": 14221 }, { "epoch": 0.9636154210989905, "grad_norm": 6.761941432952881, "learning_rate": 8.155794373331508e-05, "loss": 0.7299, "step": 14222 }, { "epoch": 0.9636831763669625, "grad_norm": 4.620570659637451, "learning_rate": 8.155657471421727e-05, "loss": 0.9088, "step": 14223 }, { "epoch": 0.9637509316349346, "grad_norm": 5.813077449798584, "learning_rate": 8.155520569511945e-05, "loss": 0.7737, "step": 14224 }, { "epoch": 0.9638186869029067, "grad_norm": 6.989836692810059, "learning_rate": 8.155383667602163e-05, "loss": 1.0168, "step": 14225 }, { "epoch": 0.9638864421708788, "grad_norm": 8.76766586303711, "learning_rate": 8.155246765692381e-05, "loss": 0.7488, "step": 14226 }, { "epoch": 0.9639541974388509, "grad_norm": 6.669302940368652, "learning_rate": 8.155109863782601e-05, "loss": 0.8634, "step": 14227 }, { "epoch": 0.964021952706823, "grad_norm": 6.353033065795898, "learning_rate": 8.154972961872819e-05, "loss": 0.8018, "step": 14228 }, { "epoch": 0.964089707974795, "grad_norm": 7.134089946746826, "learning_rate": 8.154836059963037e-05, "loss": 0.7599, "step": 14229 }, { "epoch": 0.9641574632427671, "grad_norm": 5.467618465423584, "learning_rate": 8.154699158053255e-05, "loss": 0.7725, "step": 14230 }, { "epoch": 0.9642252185107392, "grad_norm": 7.044497489929199, "learning_rate": 8.154562256143473e-05, "loss": 0.9365, "step": 14231 }, { "epoch": 0.9642929737787113, "grad_norm": 5.421668529510498, "learning_rate": 8.154425354233692e-05, "loss": 0.7032, "step": 14232 }, { "epoch": 0.9643607290466834, "grad_norm": 7.38834285736084, "learning_rate": 8.15428845232391e-05, "loss": 0.8996, "step": 14233 }, { "epoch": 0.9644284843146554, "grad_norm": 6.052585124969482, "learning_rate": 8.154151550414128e-05, "loss": 0.7436, "step": 14234 }, { "epoch": 0.9644962395826275, "grad_norm": 5.307525634765625, "learning_rate": 8.154014648504346e-05, "loss": 0.6881, "step": 14235 }, { "epoch": 0.9645639948505996, "grad_norm": 5.4196062088012695, "learning_rate": 8.153877746594566e-05, "loss": 0.8157, "step": 14236 }, { "epoch": 0.9646317501185717, "grad_norm": 7.922184467315674, "learning_rate": 8.153740844684784e-05, "loss": 0.7146, "step": 14237 }, { "epoch": 0.9646995053865438, "grad_norm": 6.831099510192871, "learning_rate": 8.153603942775002e-05, "loss": 0.7294, "step": 14238 }, { "epoch": 0.9647672606545159, "grad_norm": 4.776399612426758, "learning_rate": 8.15346704086522e-05, "loss": 0.6228, "step": 14239 }, { "epoch": 0.964835015922488, "grad_norm": 6.230729103088379, "learning_rate": 8.153330138955438e-05, "loss": 0.8572, "step": 14240 }, { "epoch": 0.9649027711904601, "grad_norm": 5.276001453399658, "learning_rate": 8.153193237045657e-05, "loss": 0.7314, "step": 14241 }, { "epoch": 0.9649705264584322, "grad_norm": 7.109437465667725, "learning_rate": 8.153056335135875e-05, "loss": 1.0352, "step": 14242 }, { "epoch": 0.9650382817264043, "grad_norm": 7.070680141448975, "learning_rate": 8.152919433226093e-05, "loss": 0.8926, "step": 14243 }, { "epoch": 0.9651060369943764, "grad_norm": 6.073431015014648, "learning_rate": 8.152782531316311e-05, "loss": 0.7816, "step": 14244 }, { "epoch": 0.9651737922623485, "grad_norm": 8.69691276550293, "learning_rate": 8.15264562940653e-05, "loss": 0.9329, "step": 14245 }, { "epoch": 0.9652415475303204, "grad_norm": 5.673532962799072, "learning_rate": 8.152508727496749e-05, "loss": 0.7321, "step": 14246 }, { "epoch": 0.9653093027982925, "grad_norm": 5.062224864959717, "learning_rate": 8.152371825586967e-05, "loss": 0.7234, "step": 14247 }, { "epoch": 0.9653770580662646, "grad_norm": 7.115236282348633, "learning_rate": 8.152234923677185e-05, "loss": 0.6885, "step": 14248 }, { "epoch": 0.9654448133342367, "grad_norm": 5.9798173904418945, "learning_rate": 8.152098021767404e-05, "loss": 0.8179, "step": 14249 }, { "epoch": 0.9655125686022088, "grad_norm": 6.661346435546875, "learning_rate": 8.151961119857622e-05, "loss": 0.7467, "step": 14250 }, { "epoch": 0.9655803238701809, "grad_norm": 6.935898780822754, "learning_rate": 8.15182421794784e-05, "loss": 0.6645, "step": 14251 }, { "epoch": 0.965648079138153, "grad_norm": 4.401814937591553, "learning_rate": 8.15168731603806e-05, "loss": 0.5323, "step": 14252 }, { "epoch": 0.9657158344061251, "grad_norm": 5.884891033172607, "learning_rate": 8.151550414128278e-05, "loss": 0.7988, "step": 14253 }, { "epoch": 0.9657835896740972, "grad_norm": 5.859108924865723, "learning_rate": 8.151413512218496e-05, "loss": 0.7108, "step": 14254 }, { "epoch": 0.9658513449420693, "grad_norm": 6.580816745758057, "learning_rate": 8.151276610308715e-05, "loss": 0.8701, "step": 14255 }, { "epoch": 0.9659191002100413, "grad_norm": 5.882786750793457, "learning_rate": 8.151139708398933e-05, "loss": 0.735, "step": 14256 }, { "epoch": 0.9659868554780134, "grad_norm": 6.60660982131958, "learning_rate": 8.151002806489151e-05, "loss": 0.7883, "step": 14257 }, { "epoch": 0.9660546107459855, "grad_norm": 6.070260047912598, "learning_rate": 8.15086590457937e-05, "loss": 0.763, "step": 14258 }, { "epoch": 0.9661223660139576, "grad_norm": 5.992532730102539, "learning_rate": 8.150729002669589e-05, "loss": 1.0009, "step": 14259 }, { "epoch": 0.9661901212819297, "grad_norm": 5.648770332336426, "learning_rate": 8.150592100759807e-05, "loss": 0.6503, "step": 14260 }, { "epoch": 0.9662578765499018, "grad_norm": 7.13828706741333, "learning_rate": 8.150455198850025e-05, "loss": 0.6251, "step": 14261 }, { "epoch": 0.9663256318178738, "grad_norm": 5.505036354064941, "learning_rate": 8.150318296940243e-05, "loss": 0.5837, "step": 14262 }, { "epoch": 0.9663933870858459, "grad_norm": 7.71685791015625, "learning_rate": 8.150181395030461e-05, "loss": 0.6676, "step": 14263 }, { "epoch": 0.966461142353818, "grad_norm": 5.185730934143066, "learning_rate": 8.15004449312068e-05, "loss": 0.6651, "step": 14264 }, { "epoch": 0.9665288976217901, "grad_norm": 6.014042854309082, "learning_rate": 8.149907591210898e-05, "loss": 0.6928, "step": 14265 }, { "epoch": 0.9665966528897622, "grad_norm": 6.797776699066162, "learning_rate": 8.149770689301116e-05, "loss": 0.6754, "step": 14266 }, { "epoch": 0.9666644081577342, "grad_norm": 4.643877029418945, "learning_rate": 8.149633787391334e-05, "loss": 0.4717, "step": 14267 }, { "epoch": 0.9667321634257063, "grad_norm": 5.930227756500244, "learning_rate": 8.149496885481552e-05, "loss": 0.6709, "step": 14268 }, { "epoch": 0.9667999186936784, "grad_norm": 6.046914100646973, "learning_rate": 8.149359983571772e-05, "loss": 0.7147, "step": 14269 }, { "epoch": 0.9668676739616505, "grad_norm": 6.127531051635742, "learning_rate": 8.14922308166199e-05, "loss": 0.6944, "step": 14270 }, { "epoch": 0.9669354292296226, "grad_norm": 7.445454120635986, "learning_rate": 8.149086179752208e-05, "loss": 0.7084, "step": 14271 }, { "epoch": 0.9670031844975947, "grad_norm": 5.6586995124816895, "learning_rate": 8.148949277842426e-05, "loss": 0.7989, "step": 14272 }, { "epoch": 0.9670709397655668, "grad_norm": 7.164182186126709, "learning_rate": 8.148812375932645e-05, "loss": 0.9223, "step": 14273 }, { "epoch": 0.9671386950335389, "grad_norm": 5.000169277191162, "learning_rate": 8.148675474022863e-05, "loss": 0.723, "step": 14274 }, { "epoch": 0.967206450301511, "grad_norm": 6.657342433929443, "learning_rate": 8.148538572113081e-05, "loss": 0.6303, "step": 14275 }, { "epoch": 0.9672742055694831, "grad_norm": 6.077153205871582, "learning_rate": 8.1484016702033e-05, "loss": 0.8003, "step": 14276 }, { "epoch": 0.9673419608374552, "grad_norm": 6.259696960449219, "learning_rate": 8.148264768293517e-05, "loss": 0.8303, "step": 14277 }, { "epoch": 0.9674097161054271, "grad_norm": 5.3037190437316895, "learning_rate": 8.148127866383737e-05, "loss": 0.5536, "step": 14278 }, { "epoch": 0.9674774713733992, "grad_norm": 4.652920722961426, "learning_rate": 8.147990964473955e-05, "loss": 0.5946, "step": 14279 }, { "epoch": 0.9675452266413713, "grad_norm": 8.112478256225586, "learning_rate": 8.147854062564173e-05, "loss": 0.7329, "step": 14280 }, { "epoch": 0.9676129819093434, "grad_norm": 6.562613487243652, "learning_rate": 8.147717160654391e-05, "loss": 0.81, "step": 14281 }, { "epoch": 0.9676807371773155, "grad_norm": 4.556642532348633, "learning_rate": 8.14758025874461e-05, "loss": 0.925, "step": 14282 }, { "epoch": 0.9677484924452876, "grad_norm": 8.244071006774902, "learning_rate": 8.147443356834828e-05, "loss": 1.117, "step": 14283 }, { "epoch": 0.9678162477132597, "grad_norm": 7.206400394439697, "learning_rate": 8.147306454925046e-05, "loss": 0.5954, "step": 14284 }, { "epoch": 0.9678840029812318, "grad_norm": 6.174105644226074, "learning_rate": 8.147169553015264e-05, "loss": 0.8997, "step": 14285 }, { "epoch": 0.9679517582492039, "grad_norm": 6.076737880706787, "learning_rate": 8.147032651105482e-05, "loss": 0.7188, "step": 14286 }, { "epoch": 0.968019513517176, "grad_norm": 6.2391462326049805, "learning_rate": 8.146895749195702e-05, "loss": 0.8216, "step": 14287 }, { "epoch": 0.9680872687851481, "grad_norm": 6.028003215789795, "learning_rate": 8.14675884728592e-05, "loss": 0.8959, "step": 14288 }, { "epoch": 0.9681550240531202, "grad_norm": 7.618561744689941, "learning_rate": 8.146621945376138e-05, "loss": 0.7853, "step": 14289 }, { "epoch": 0.9682227793210922, "grad_norm": 5.370663642883301, "learning_rate": 8.146485043466356e-05, "loss": 0.7142, "step": 14290 }, { "epoch": 0.9682905345890643, "grad_norm": 5.558692455291748, "learning_rate": 8.146348141556574e-05, "loss": 0.7017, "step": 14291 }, { "epoch": 0.9683582898570364, "grad_norm": 4.1195902824401855, "learning_rate": 8.146211239646793e-05, "loss": 0.6091, "step": 14292 }, { "epoch": 0.9684260451250085, "grad_norm": 4.796550750732422, "learning_rate": 8.146074337737011e-05, "loss": 0.6297, "step": 14293 }, { "epoch": 0.9684938003929806, "grad_norm": 5.585738658905029, "learning_rate": 8.14593743582723e-05, "loss": 0.7508, "step": 14294 }, { "epoch": 0.9685615556609526, "grad_norm": 7.204619407653809, "learning_rate": 8.145800533917449e-05, "loss": 0.8782, "step": 14295 }, { "epoch": 0.9686293109289247, "grad_norm": 6.494380474090576, "learning_rate": 8.145663632007667e-05, "loss": 0.8831, "step": 14296 }, { "epoch": 0.9686970661968968, "grad_norm": 4.968986511230469, "learning_rate": 8.145526730097885e-05, "loss": 0.6463, "step": 14297 }, { "epoch": 0.9687648214648689, "grad_norm": 4.912354946136475, "learning_rate": 8.145389828188104e-05, "loss": 0.8504, "step": 14298 }, { "epoch": 0.968832576732841, "grad_norm": 8.093084335327148, "learning_rate": 8.145252926278322e-05, "loss": 0.7451, "step": 14299 }, { "epoch": 0.968900332000813, "grad_norm": 4.853938579559326, "learning_rate": 8.14511602436854e-05, "loss": 0.7233, "step": 14300 }, { "epoch": 0.9689680872687851, "grad_norm": 5.394782066345215, "learning_rate": 8.14497912245876e-05, "loss": 0.9122, "step": 14301 }, { "epoch": 0.9690358425367572, "grad_norm": 5.5748820304870605, "learning_rate": 8.144842220548978e-05, "loss": 0.9087, "step": 14302 }, { "epoch": 0.9691035978047293, "grad_norm": 5.095332145690918, "learning_rate": 8.144705318639196e-05, "loss": 0.8094, "step": 14303 }, { "epoch": 0.9691713530727014, "grad_norm": 6.252867221832275, "learning_rate": 8.144568416729414e-05, "loss": 0.8471, "step": 14304 }, { "epoch": 0.9692391083406735, "grad_norm": 5.3329949378967285, "learning_rate": 8.144431514819633e-05, "loss": 0.7303, "step": 14305 }, { "epoch": 0.9693068636086456, "grad_norm": 6.976050853729248, "learning_rate": 8.144294612909851e-05, "loss": 0.7868, "step": 14306 }, { "epoch": 0.9693746188766177, "grad_norm": 6.1792073249816895, "learning_rate": 8.144157711000069e-05, "loss": 0.9973, "step": 14307 }, { "epoch": 0.9694423741445898, "grad_norm": 4.349170684814453, "learning_rate": 8.144020809090287e-05, "loss": 0.7458, "step": 14308 }, { "epoch": 0.9695101294125619, "grad_norm": 7.618136405944824, "learning_rate": 8.143883907180505e-05, "loss": 0.6679, "step": 14309 }, { "epoch": 0.969577884680534, "grad_norm": 5.811389923095703, "learning_rate": 8.143747005270725e-05, "loss": 0.6621, "step": 14310 }, { "epoch": 0.9696456399485059, "grad_norm": 7.1004252433776855, "learning_rate": 8.143610103360943e-05, "loss": 0.9404, "step": 14311 }, { "epoch": 0.969713395216478, "grad_norm": 6.3730998039245605, "learning_rate": 8.143473201451161e-05, "loss": 0.9286, "step": 14312 }, { "epoch": 0.9697811504844501, "grad_norm": 5.840987205505371, "learning_rate": 8.143336299541379e-05, "loss": 0.6846, "step": 14313 }, { "epoch": 0.9698489057524222, "grad_norm": 6.054294109344482, "learning_rate": 8.143199397631598e-05, "loss": 0.836, "step": 14314 }, { "epoch": 0.9699166610203943, "grad_norm": 5.046802043914795, "learning_rate": 8.143062495721816e-05, "loss": 0.7202, "step": 14315 }, { "epoch": 0.9699844162883664, "grad_norm": 4.955052375793457, "learning_rate": 8.142925593812034e-05, "loss": 0.7018, "step": 14316 }, { "epoch": 0.9700521715563385, "grad_norm": 5.141872882843018, "learning_rate": 8.142788691902252e-05, "loss": 0.817, "step": 14317 }, { "epoch": 0.9701199268243106, "grad_norm": 9.178304672241211, "learning_rate": 8.14265178999247e-05, "loss": 0.6665, "step": 14318 }, { "epoch": 0.9701876820922827, "grad_norm": 9.323168754577637, "learning_rate": 8.14251488808269e-05, "loss": 0.997, "step": 14319 }, { "epoch": 0.9702554373602548, "grad_norm": 5.158806800842285, "learning_rate": 8.142377986172908e-05, "loss": 0.619, "step": 14320 }, { "epoch": 0.9703231926282269, "grad_norm": 4.141146183013916, "learning_rate": 8.142241084263126e-05, "loss": 0.6202, "step": 14321 }, { "epoch": 0.970390947896199, "grad_norm": 6.7444634437561035, "learning_rate": 8.142104182353344e-05, "loss": 0.9239, "step": 14322 }, { "epoch": 0.970458703164171, "grad_norm": 6.318787574768066, "learning_rate": 8.141967280443562e-05, "loss": 0.6277, "step": 14323 }, { "epoch": 0.9705264584321431, "grad_norm": 7.356907844543457, "learning_rate": 8.141830378533781e-05, "loss": 0.8639, "step": 14324 }, { "epoch": 0.9705942137001152, "grad_norm": 6.62352180480957, "learning_rate": 8.141693476623999e-05, "loss": 0.8497, "step": 14325 }, { "epoch": 0.9706619689680873, "grad_norm": 7.7815093994140625, "learning_rate": 8.141556574714217e-05, "loss": 0.788, "step": 14326 }, { "epoch": 0.9707297242360593, "grad_norm": 5.715222358703613, "learning_rate": 8.141419672804435e-05, "loss": 0.6403, "step": 14327 }, { "epoch": 0.9707974795040314, "grad_norm": 6.145988464355469, "learning_rate": 8.141282770894655e-05, "loss": 0.4776, "step": 14328 }, { "epoch": 0.9708652347720035, "grad_norm": 5.906881809234619, "learning_rate": 8.141145868984873e-05, "loss": 0.6636, "step": 14329 }, { "epoch": 0.9709329900399756, "grad_norm": 4.619365215301514, "learning_rate": 8.141008967075091e-05, "loss": 0.678, "step": 14330 }, { "epoch": 0.9710007453079477, "grad_norm": 6.293912410736084, "learning_rate": 8.140872065165309e-05, "loss": 0.6734, "step": 14331 }, { "epoch": 0.9710685005759198, "grad_norm": 8.143105506896973, "learning_rate": 8.140735163255527e-05, "loss": 0.708, "step": 14332 }, { "epoch": 0.9711362558438918, "grad_norm": 5.470721244812012, "learning_rate": 8.140598261345746e-05, "loss": 0.9017, "step": 14333 }, { "epoch": 0.9712040111118639, "grad_norm": 6.059875011444092, "learning_rate": 8.140461359435964e-05, "loss": 0.8164, "step": 14334 }, { "epoch": 0.971271766379836, "grad_norm": 6.589235782623291, "learning_rate": 8.140324457526182e-05, "loss": 0.9063, "step": 14335 }, { "epoch": 0.9713395216478081, "grad_norm": 5.400428771972656, "learning_rate": 8.1401875556164e-05, "loss": 0.5756, "step": 14336 }, { "epoch": 0.9714072769157802, "grad_norm": 4.511440277099609, "learning_rate": 8.14005065370662e-05, "loss": 0.6725, "step": 14337 }, { "epoch": 0.9714750321837523, "grad_norm": 8.179146766662598, "learning_rate": 8.139913751796838e-05, "loss": 0.8878, "step": 14338 }, { "epoch": 0.9715427874517244, "grad_norm": 9.839139938354492, "learning_rate": 8.139776849887056e-05, "loss": 0.9057, "step": 14339 }, { "epoch": 0.9716105427196965, "grad_norm": 4.975765705108643, "learning_rate": 8.139639947977274e-05, "loss": 0.694, "step": 14340 }, { "epoch": 0.9716782979876686, "grad_norm": 4.968737602233887, "learning_rate": 8.139503046067492e-05, "loss": 0.5297, "step": 14341 }, { "epoch": 0.9717460532556407, "grad_norm": 7.143984317779541, "learning_rate": 8.139366144157711e-05, "loss": 0.8471, "step": 14342 }, { "epoch": 0.9718138085236128, "grad_norm": 6.1333818435668945, "learning_rate": 8.13922924224793e-05, "loss": 0.8179, "step": 14343 }, { "epoch": 0.9718815637915847, "grad_norm": 5.501560688018799, "learning_rate": 8.139092340338147e-05, "loss": 0.6908, "step": 14344 }, { "epoch": 0.9719493190595568, "grad_norm": 5.844865322113037, "learning_rate": 8.138955438428367e-05, "loss": 0.7405, "step": 14345 }, { "epoch": 0.9720170743275289, "grad_norm": 8.380400657653809, "learning_rate": 8.138818536518585e-05, "loss": 0.9078, "step": 14346 }, { "epoch": 0.972084829595501, "grad_norm": 5.638479232788086, "learning_rate": 8.138681634608803e-05, "loss": 0.7455, "step": 14347 }, { "epoch": 0.9721525848634731, "grad_norm": 8.631559371948242, "learning_rate": 8.138544732699022e-05, "loss": 0.7743, "step": 14348 }, { "epoch": 0.9722203401314452, "grad_norm": 4.673583507537842, "learning_rate": 8.13840783078924e-05, "loss": 0.5662, "step": 14349 }, { "epoch": 0.9722880953994173, "grad_norm": 4.844860553741455, "learning_rate": 8.138270928879458e-05, "loss": 0.6779, "step": 14350 }, { "epoch": 0.9723558506673894, "grad_norm": 5.8541154861450195, "learning_rate": 8.138134026969678e-05, "loss": 0.7505, "step": 14351 }, { "epoch": 0.9724236059353615, "grad_norm": 4.8828654289245605, "learning_rate": 8.137997125059896e-05, "loss": 0.7733, "step": 14352 }, { "epoch": 0.9724913612033336, "grad_norm": 5.3108086585998535, "learning_rate": 8.137860223150114e-05, "loss": 0.7705, "step": 14353 }, { "epoch": 0.9725591164713057, "grad_norm": 6.572815418243408, "learning_rate": 8.137723321240332e-05, "loss": 1.0102, "step": 14354 }, { "epoch": 0.9726268717392778, "grad_norm": 6.047853946685791, "learning_rate": 8.13758641933055e-05, "loss": 0.9682, "step": 14355 }, { "epoch": 0.9726946270072498, "grad_norm": 5.171698093414307, "learning_rate": 8.137449517420769e-05, "loss": 0.8997, "step": 14356 }, { "epoch": 0.9727623822752219, "grad_norm": 5.896270275115967, "learning_rate": 8.137312615510987e-05, "loss": 0.9272, "step": 14357 }, { "epoch": 0.972830137543194, "grad_norm": 6.361771583557129, "learning_rate": 8.137175713601205e-05, "loss": 0.8364, "step": 14358 }, { "epoch": 0.9728978928111661, "grad_norm": 6.05178165435791, "learning_rate": 8.137038811691423e-05, "loss": 0.9812, "step": 14359 }, { "epoch": 0.9729656480791381, "grad_norm": 5.797706127166748, "learning_rate": 8.136901909781643e-05, "loss": 0.8025, "step": 14360 }, { "epoch": 0.9730334033471102, "grad_norm": 5.319764614105225, "learning_rate": 8.136765007871861e-05, "loss": 0.708, "step": 14361 }, { "epoch": 0.9731011586150823, "grad_norm": 6.920981407165527, "learning_rate": 8.136628105962079e-05, "loss": 0.6947, "step": 14362 }, { "epoch": 0.9731689138830544, "grad_norm": 8.11839771270752, "learning_rate": 8.136491204052297e-05, "loss": 0.8238, "step": 14363 }, { "epoch": 0.9732366691510265, "grad_norm": 5.530993938446045, "learning_rate": 8.136354302142515e-05, "loss": 0.5501, "step": 14364 }, { "epoch": 0.9733044244189986, "grad_norm": 5.251955986022949, "learning_rate": 8.136217400232734e-05, "loss": 0.706, "step": 14365 }, { "epoch": 0.9733721796869707, "grad_norm": 6.496428966522217, "learning_rate": 8.136080498322952e-05, "loss": 0.8281, "step": 14366 }, { "epoch": 0.9734399349549427, "grad_norm": 4.871181488037109, "learning_rate": 8.13594359641317e-05, "loss": 0.7147, "step": 14367 }, { "epoch": 0.9735076902229148, "grad_norm": 5.397392272949219, "learning_rate": 8.135806694503388e-05, "loss": 0.733, "step": 14368 }, { "epoch": 0.9735754454908869, "grad_norm": 5.09192419052124, "learning_rate": 8.135669792593608e-05, "loss": 0.8182, "step": 14369 }, { "epoch": 0.973643200758859, "grad_norm": 7.045880317687988, "learning_rate": 8.135532890683826e-05, "loss": 0.8304, "step": 14370 }, { "epoch": 0.9737109560268311, "grad_norm": 5.559905529022217, "learning_rate": 8.135395988774044e-05, "loss": 0.7637, "step": 14371 }, { "epoch": 0.9737787112948032, "grad_norm": 4.846694469451904, "learning_rate": 8.135259086864262e-05, "loss": 0.7953, "step": 14372 }, { "epoch": 0.9738464665627753, "grad_norm": 4.448090076446533, "learning_rate": 8.13512218495448e-05, "loss": 0.7446, "step": 14373 }, { "epoch": 0.9739142218307474, "grad_norm": 5.447312355041504, "learning_rate": 8.134985283044699e-05, "loss": 0.8034, "step": 14374 }, { "epoch": 0.9739819770987195, "grad_norm": 6.372121334075928, "learning_rate": 8.134848381134917e-05, "loss": 0.7034, "step": 14375 }, { "epoch": 0.9740497323666915, "grad_norm": 6.085412979125977, "learning_rate": 8.134711479225135e-05, "loss": 0.7508, "step": 14376 }, { "epoch": 0.9741174876346635, "grad_norm": 5.742150783538818, "learning_rate": 8.134574577315353e-05, "loss": 0.8239, "step": 14377 }, { "epoch": 0.9741852429026356, "grad_norm": 5.154967308044434, "learning_rate": 8.134437675405571e-05, "loss": 0.6667, "step": 14378 }, { "epoch": 0.9742529981706077, "grad_norm": 6.060741901397705, "learning_rate": 8.134300773495791e-05, "loss": 0.803, "step": 14379 }, { "epoch": 0.9743207534385798, "grad_norm": 6.090625286102295, "learning_rate": 8.134163871586009e-05, "loss": 0.8115, "step": 14380 }, { "epoch": 0.9743885087065519, "grad_norm": 6.097843170166016, "learning_rate": 8.134026969676227e-05, "loss": 0.7386, "step": 14381 }, { "epoch": 0.974456263974524, "grad_norm": 6.539214611053467, "learning_rate": 8.133890067766445e-05, "loss": 0.6162, "step": 14382 }, { "epoch": 0.9745240192424961, "grad_norm": 5.923181056976318, "learning_rate": 8.133753165856664e-05, "loss": 1.0001, "step": 14383 }, { "epoch": 0.9745917745104682, "grad_norm": 6.29371976852417, "learning_rate": 8.133616263946882e-05, "loss": 0.7975, "step": 14384 }, { "epoch": 0.9746595297784403, "grad_norm": 6.171021461486816, "learning_rate": 8.1334793620371e-05, "loss": 0.8991, "step": 14385 }, { "epoch": 0.9747272850464124, "grad_norm": 5.814321041107178, "learning_rate": 8.133342460127318e-05, "loss": 0.525, "step": 14386 }, { "epoch": 0.9747950403143845, "grad_norm": 5.774600028991699, "learning_rate": 8.133205558217536e-05, "loss": 0.7866, "step": 14387 }, { "epoch": 0.9748627955823566, "grad_norm": 5.909730434417725, "learning_rate": 8.133068656307756e-05, "loss": 0.6867, "step": 14388 }, { "epoch": 0.9749305508503286, "grad_norm": 6.84520149230957, "learning_rate": 8.132931754397974e-05, "loss": 0.7958, "step": 14389 }, { "epoch": 0.9749983061183007, "grad_norm": 6.0649518966674805, "learning_rate": 8.132794852488192e-05, "loss": 0.8553, "step": 14390 }, { "epoch": 0.9750660613862728, "grad_norm": 6.470561504364014, "learning_rate": 8.132657950578411e-05, "loss": 0.7563, "step": 14391 }, { "epoch": 0.9751338166542449, "grad_norm": 5.470592021942139, "learning_rate": 8.132521048668629e-05, "loss": 0.7057, "step": 14392 }, { "epoch": 0.9752015719222169, "grad_norm": 5.857933044433594, "learning_rate": 8.132384146758847e-05, "loss": 0.6859, "step": 14393 }, { "epoch": 0.975269327190189, "grad_norm": 6.267986297607422, "learning_rate": 8.132247244849067e-05, "loss": 0.6744, "step": 14394 }, { "epoch": 0.9753370824581611, "grad_norm": 6.363813400268555, "learning_rate": 8.132110342939285e-05, "loss": 0.7194, "step": 14395 }, { "epoch": 0.9754048377261332, "grad_norm": 5.119122505187988, "learning_rate": 8.131973441029503e-05, "loss": 0.7292, "step": 14396 }, { "epoch": 0.9754725929941053, "grad_norm": 5.589879035949707, "learning_rate": 8.131836539119722e-05, "loss": 0.786, "step": 14397 }, { "epoch": 0.9755403482620774, "grad_norm": 6.809702396392822, "learning_rate": 8.13169963720994e-05, "loss": 0.7465, "step": 14398 }, { "epoch": 0.9756081035300495, "grad_norm": 5.51494026184082, "learning_rate": 8.131562735300158e-05, "loss": 0.7014, "step": 14399 }, { "epoch": 0.9756758587980215, "grad_norm": 5.632194519042969, "learning_rate": 8.131425833390376e-05, "loss": 0.7217, "step": 14400 }, { "epoch": 0.9757436140659936, "grad_norm": 4.676552772521973, "learning_rate": 8.131288931480594e-05, "loss": 0.6789, "step": 14401 }, { "epoch": 0.9758113693339657, "grad_norm": 5.2599945068359375, "learning_rate": 8.131152029570814e-05, "loss": 0.6691, "step": 14402 }, { "epoch": 0.9758791246019378, "grad_norm": 5.05983829498291, "learning_rate": 8.131015127661032e-05, "loss": 0.6848, "step": 14403 }, { "epoch": 0.9759468798699099, "grad_norm": 5.274669170379639, "learning_rate": 8.13087822575125e-05, "loss": 0.7596, "step": 14404 }, { "epoch": 0.976014635137882, "grad_norm": 6.312376976013184, "learning_rate": 8.130741323841468e-05, "loss": 0.6008, "step": 14405 }, { "epoch": 0.9760823904058541, "grad_norm": 5.092833995819092, "learning_rate": 8.130604421931687e-05, "loss": 0.7497, "step": 14406 }, { "epoch": 0.9761501456738262, "grad_norm": 4.330989837646484, "learning_rate": 8.130467520021905e-05, "loss": 0.6639, "step": 14407 }, { "epoch": 0.9762179009417983, "grad_norm": 6.680692195892334, "learning_rate": 8.130330618112123e-05, "loss": 0.9758, "step": 14408 }, { "epoch": 0.9762856562097703, "grad_norm": 4.872468948364258, "learning_rate": 8.130193716202341e-05, "loss": 0.5678, "step": 14409 }, { "epoch": 0.9763534114777424, "grad_norm": 5.9608659744262695, "learning_rate": 8.130056814292559e-05, "loss": 0.766, "step": 14410 }, { "epoch": 0.9764211667457144, "grad_norm": 8.0074462890625, "learning_rate": 8.129919912382779e-05, "loss": 0.808, "step": 14411 }, { "epoch": 0.9764889220136865, "grad_norm": 6.391083240509033, "learning_rate": 8.129783010472997e-05, "loss": 0.9077, "step": 14412 }, { "epoch": 0.9765566772816586, "grad_norm": 9.006216049194336, "learning_rate": 8.129646108563215e-05, "loss": 0.8002, "step": 14413 }, { "epoch": 0.9766244325496307, "grad_norm": 5.454526901245117, "learning_rate": 8.129509206653433e-05, "loss": 0.662, "step": 14414 }, { "epoch": 0.9766921878176028, "grad_norm": 5.372074604034424, "learning_rate": 8.129372304743652e-05, "loss": 0.7083, "step": 14415 }, { "epoch": 0.9767599430855749, "grad_norm": 5.040616035461426, "learning_rate": 8.12923540283387e-05, "loss": 0.7178, "step": 14416 }, { "epoch": 0.976827698353547, "grad_norm": 6.438399314880371, "learning_rate": 8.129098500924088e-05, "loss": 0.9576, "step": 14417 }, { "epoch": 0.9768954536215191, "grad_norm": 5.687475204467773, "learning_rate": 8.128961599014306e-05, "loss": 0.8086, "step": 14418 }, { "epoch": 0.9769632088894912, "grad_norm": 5.61614990234375, "learning_rate": 8.128824697104524e-05, "loss": 0.7917, "step": 14419 }, { "epoch": 0.9770309641574633, "grad_norm": 6.389354228973389, "learning_rate": 8.128687795194744e-05, "loss": 0.7769, "step": 14420 }, { "epoch": 0.9770987194254354, "grad_norm": 7.1694231033325195, "learning_rate": 8.128550893284962e-05, "loss": 0.6114, "step": 14421 }, { "epoch": 0.9771664746934074, "grad_norm": 6.265122890472412, "learning_rate": 8.12841399137518e-05, "loss": 0.6074, "step": 14422 }, { "epoch": 0.9772342299613795, "grad_norm": 9.472161293029785, "learning_rate": 8.128277089465398e-05, "loss": 0.5208, "step": 14423 }, { "epoch": 0.9773019852293516, "grad_norm": 4.76262903213501, "learning_rate": 8.128140187555616e-05, "loss": 0.7851, "step": 14424 }, { "epoch": 0.9773697404973236, "grad_norm": 4.94804048538208, "learning_rate": 8.128003285645835e-05, "loss": 0.7913, "step": 14425 }, { "epoch": 0.9774374957652957, "grad_norm": 5.631475448608398, "learning_rate": 8.127866383736053e-05, "loss": 0.7001, "step": 14426 }, { "epoch": 0.9775052510332678, "grad_norm": 6.504068851470947, "learning_rate": 8.127729481826271e-05, "loss": 0.6949, "step": 14427 }, { "epoch": 0.9775730063012399, "grad_norm": 4.534459114074707, "learning_rate": 8.12759257991649e-05, "loss": 0.6107, "step": 14428 }, { "epoch": 0.977640761569212, "grad_norm": 5.583250045776367, "learning_rate": 8.127455678006709e-05, "loss": 0.6784, "step": 14429 }, { "epoch": 0.9777085168371841, "grad_norm": 5.706220626831055, "learning_rate": 8.127318776096927e-05, "loss": 0.7571, "step": 14430 }, { "epoch": 0.9777762721051562, "grad_norm": 5.701255798339844, "learning_rate": 8.127181874187145e-05, "loss": 0.7379, "step": 14431 }, { "epoch": 0.9778440273731283, "grad_norm": 5.034109592437744, "learning_rate": 8.127044972277363e-05, "loss": 0.6254, "step": 14432 }, { "epoch": 0.9779117826411003, "grad_norm": 7.113419532775879, "learning_rate": 8.126908070367581e-05, "loss": 0.7667, "step": 14433 }, { "epoch": 0.9779795379090724, "grad_norm": 5.958342552185059, "learning_rate": 8.1267711684578e-05, "loss": 0.8242, "step": 14434 }, { "epoch": 0.9780472931770445, "grad_norm": 5.358188629150391, "learning_rate": 8.126634266548018e-05, "loss": 0.8029, "step": 14435 }, { "epoch": 0.9781150484450166, "grad_norm": 5.703382968902588, "learning_rate": 8.126497364638236e-05, "loss": 0.64, "step": 14436 }, { "epoch": 0.9781828037129887, "grad_norm": 6.205333709716797, "learning_rate": 8.126360462728456e-05, "loss": 0.7054, "step": 14437 }, { "epoch": 0.9782505589809608, "grad_norm": 6.106006145477295, "learning_rate": 8.126223560818674e-05, "loss": 0.7458, "step": 14438 }, { "epoch": 0.9783183142489329, "grad_norm": 7.8420305252075195, "learning_rate": 8.126086658908892e-05, "loss": 0.9453, "step": 14439 }, { "epoch": 0.978386069516905, "grad_norm": 5.559987545013428, "learning_rate": 8.125949756999111e-05, "loss": 0.5765, "step": 14440 }, { "epoch": 0.978453824784877, "grad_norm": 6.2737040519714355, "learning_rate": 8.125812855089329e-05, "loss": 0.6614, "step": 14441 }, { "epoch": 0.9785215800528491, "grad_norm": 6.900593280792236, "learning_rate": 8.125675953179547e-05, "loss": 0.8438, "step": 14442 }, { "epoch": 0.9785893353208212, "grad_norm": 5.86058235168457, "learning_rate": 8.125539051269767e-05, "loss": 0.7445, "step": 14443 }, { "epoch": 0.9786570905887932, "grad_norm": 4.0302348136901855, "learning_rate": 8.125402149359985e-05, "loss": 0.767, "step": 14444 }, { "epoch": 0.9787248458567653, "grad_norm": 5.090617656707764, "learning_rate": 8.125265247450203e-05, "loss": 0.6529, "step": 14445 }, { "epoch": 0.9787926011247374, "grad_norm": 5.470541477203369, "learning_rate": 8.125128345540421e-05, "loss": 0.7689, "step": 14446 }, { "epoch": 0.9788603563927095, "grad_norm": 5.9749627113342285, "learning_rate": 8.12499144363064e-05, "loss": 1.0099, "step": 14447 }, { "epoch": 0.9789281116606816, "grad_norm": 5.537027359008789, "learning_rate": 8.124854541720858e-05, "loss": 0.6033, "step": 14448 }, { "epoch": 0.9789958669286537, "grad_norm": 4.773642063140869, "learning_rate": 8.124717639811076e-05, "loss": 0.6899, "step": 14449 }, { "epoch": 0.9790636221966258, "grad_norm": 6.153696537017822, "learning_rate": 8.124580737901294e-05, "loss": 0.9249, "step": 14450 }, { "epoch": 0.9791313774645979, "grad_norm": 5.920269966125488, "learning_rate": 8.124443835991512e-05, "loss": 0.7868, "step": 14451 }, { "epoch": 0.97919913273257, "grad_norm": 5.916412830352783, "learning_rate": 8.124306934081732e-05, "loss": 0.6885, "step": 14452 }, { "epoch": 0.9792668880005421, "grad_norm": 7.684385299682617, "learning_rate": 8.12417003217195e-05, "loss": 0.8591, "step": 14453 }, { "epoch": 0.9793346432685142, "grad_norm": 4.769680500030518, "learning_rate": 8.124033130262168e-05, "loss": 0.59, "step": 14454 }, { "epoch": 0.9794023985364863, "grad_norm": 6.372145175933838, "learning_rate": 8.123896228352386e-05, "loss": 0.9633, "step": 14455 }, { "epoch": 0.9794701538044583, "grad_norm": 4.667510509490967, "learning_rate": 8.123759326442604e-05, "loss": 0.6554, "step": 14456 }, { "epoch": 0.9795379090724304, "grad_norm": 6.719541549682617, "learning_rate": 8.123622424532823e-05, "loss": 0.6992, "step": 14457 }, { "epoch": 0.9796056643404024, "grad_norm": 5.012288570404053, "learning_rate": 8.123485522623041e-05, "loss": 0.6368, "step": 14458 }, { "epoch": 0.9796734196083745, "grad_norm": 5.666974067687988, "learning_rate": 8.123348620713259e-05, "loss": 0.737, "step": 14459 }, { "epoch": 0.9797411748763466, "grad_norm": 6.177326679229736, "learning_rate": 8.123211718803477e-05, "loss": 0.8604, "step": 14460 }, { "epoch": 0.9798089301443187, "grad_norm": 6.877957344055176, "learning_rate": 8.123074816893697e-05, "loss": 0.7917, "step": 14461 }, { "epoch": 0.9798766854122908, "grad_norm": 7.843241214752197, "learning_rate": 8.122937914983915e-05, "loss": 0.68, "step": 14462 }, { "epoch": 0.9799444406802629, "grad_norm": 5.025186061859131, "learning_rate": 8.122801013074133e-05, "loss": 0.7137, "step": 14463 }, { "epoch": 0.980012195948235, "grad_norm": 5.27938175201416, "learning_rate": 8.122664111164351e-05, "loss": 0.7029, "step": 14464 }, { "epoch": 0.9800799512162071, "grad_norm": 6.0738301277160645, "learning_rate": 8.122527209254569e-05, "loss": 0.7922, "step": 14465 }, { "epoch": 0.9801477064841791, "grad_norm": 7.217299461364746, "learning_rate": 8.122390307344788e-05, "loss": 0.6941, "step": 14466 }, { "epoch": 0.9802154617521512, "grad_norm": 4.96075439453125, "learning_rate": 8.122253405435006e-05, "loss": 0.6182, "step": 14467 }, { "epoch": 0.9802832170201233, "grad_norm": 5.550745964050293, "learning_rate": 8.122116503525224e-05, "loss": 0.7081, "step": 14468 }, { "epoch": 0.9803509722880954, "grad_norm": 6.0965752601623535, "learning_rate": 8.121979601615442e-05, "loss": 0.6694, "step": 14469 }, { "epoch": 0.9804187275560675, "grad_norm": 6.101776123046875, "learning_rate": 8.121842699705662e-05, "loss": 0.7803, "step": 14470 }, { "epoch": 0.9804864828240396, "grad_norm": 5.098122596740723, "learning_rate": 8.12170579779588e-05, "loss": 0.8434, "step": 14471 }, { "epoch": 0.9805542380920117, "grad_norm": 4.281574726104736, "learning_rate": 8.121568895886098e-05, "loss": 0.7203, "step": 14472 }, { "epoch": 0.9806219933599838, "grad_norm": 7.562485694885254, "learning_rate": 8.121431993976316e-05, "loss": 0.8041, "step": 14473 }, { "epoch": 0.9806897486279558, "grad_norm": 8.38601303100586, "learning_rate": 8.121295092066534e-05, "loss": 0.6553, "step": 14474 }, { "epoch": 0.9807575038959279, "grad_norm": 7.885288715362549, "learning_rate": 8.121158190156753e-05, "loss": 0.9021, "step": 14475 }, { "epoch": 0.9808252591639, "grad_norm": 5.9442877769470215, "learning_rate": 8.121021288246971e-05, "loss": 0.7343, "step": 14476 }, { "epoch": 0.980893014431872, "grad_norm": 7.727574348449707, "learning_rate": 8.120884386337189e-05, "loss": 0.9092, "step": 14477 }, { "epoch": 0.9809607696998441, "grad_norm": 7.154834270477295, "learning_rate": 8.120747484427407e-05, "loss": 0.824, "step": 14478 }, { "epoch": 0.9810285249678162, "grad_norm": 5.356253623962402, "learning_rate": 8.120610582517625e-05, "loss": 0.7594, "step": 14479 }, { "epoch": 0.9810962802357883, "grad_norm": 6.602542400360107, "learning_rate": 8.120473680607845e-05, "loss": 0.7641, "step": 14480 }, { "epoch": 0.9811640355037604, "grad_norm": 5.2316060066223145, "learning_rate": 8.120336778698063e-05, "loss": 0.7131, "step": 14481 }, { "epoch": 0.9812317907717325, "grad_norm": 5.679458141326904, "learning_rate": 8.120199876788281e-05, "loss": 0.7762, "step": 14482 }, { "epoch": 0.9812995460397046, "grad_norm": 6.47125244140625, "learning_rate": 8.1200629748785e-05, "loss": 0.7332, "step": 14483 }, { "epoch": 0.9813673013076767, "grad_norm": 4.7544169425964355, "learning_rate": 8.119926072968718e-05, "loss": 0.7308, "step": 14484 }, { "epoch": 0.9814350565756488, "grad_norm": 8.939432144165039, "learning_rate": 8.119789171058936e-05, "loss": 0.9312, "step": 14485 }, { "epoch": 0.9815028118436209, "grad_norm": 6.649098873138428, "learning_rate": 8.119652269149156e-05, "loss": 0.7315, "step": 14486 }, { "epoch": 0.981570567111593, "grad_norm": 6.609123706817627, "learning_rate": 8.119515367239374e-05, "loss": 0.7891, "step": 14487 }, { "epoch": 0.981638322379565, "grad_norm": 4.026298522949219, "learning_rate": 8.119378465329592e-05, "loss": 0.6256, "step": 14488 }, { "epoch": 0.9817060776475371, "grad_norm": 5.0410075187683105, "learning_rate": 8.119241563419811e-05, "loss": 0.5491, "step": 14489 }, { "epoch": 0.9817738329155091, "grad_norm": 6.390183448791504, "learning_rate": 8.119104661510029e-05, "loss": 0.6793, "step": 14490 }, { "epoch": 0.9818415881834812, "grad_norm": 5.242403984069824, "learning_rate": 8.118967759600247e-05, "loss": 0.6271, "step": 14491 }, { "epoch": 0.9819093434514533, "grad_norm": 6.249171257019043, "learning_rate": 8.118830857690465e-05, "loss": 0.8151, "step": 14492 }, { "epoch": 0.9819770987194254, "grad_norm": 6.398001194000244, "learning_rate": 8.118693955780685e-05, "loss": 0.6908, "step": 14493 }, { "epoch": 0.9820448539873975, "grad_norm": 8.341864585876465, "learning_rate": 8.118557053870903e-05, "loss": 0.9941, "step": 14494 }, { "epoch": 0.9821126092553696, "grad_norm": 7.000919818878174, "learning_rate": 8.11842015196112e-05, "loss": 0.751, "step": 14495 }, { "epoch": 0.9821803645233417, "grad_norm": 6.331650257110596, "learning_rate": 8.118283250051339e-05, "loss": 0.6585, "step": 14496 }, { "epoch": 0.9822481197913138, "grad_norm": 6.7721848487854, "learning_rate": 8.118146348141557e-05, "loss": 0.8229, "step": 14497 }, { "epoch": 0.9823158750592859, "grad_norm": 6.479053497314453, "learning_rate": 8.118009446231776e-05, "loss": 0.8373, "step": 14498 }, { "epoch": 0.982383630327258, "grad_norm": 6.043035984039307, "learning_rate": 8.117872544321994e-05, "loss": 0.7682, "step": 14499 }, { "epoch": 0.98245138559523, "grad_norm": 6.7178778648376465, "learning_rate": 8.117735642412212e-05, "loss": 0.6542, "step": 14500 }, { "epoch": 0.9825191408632021, "grad_norm": 9.148183822631836, "learning_rate": 8.11759874050243e-05, "loss": 0.7574, "step": 14501 }, { "epoch": 0.9825868961311742, "grad_norm": 5.086976051330566, "learning_rate": 8.11746183859265e-05, "loss": 0.6415, "step": 14502 }, { "epoch": 0.9826546513991463, "grad_norm": 6.981233596801758, "learning_rate": 8.117324936682868e-05, "loss": 1.0351, "step": 14503 }, { "epoch": 0.9827224066671184, "grad_norm": 6.20076847076416, "learning_rate": 8.117188034773086e-05, "loss": 0.734, "step": 14504 }, { "epoch": 0.9827901619350905, "grad_norm": 6.142386436462402, "learning_rate": 8.117051132863304e-05, "loss": 0.9636, "step": 14505 }, { "epoch": 0.9828579172030626, "grad_norm": 5.595977306365967, "learning_rate": 8.116914230953522e-05, "loss": 0.8142, "step": 14506 }, { "epoch": 0.9829256724710346, "grad_norm": 5.602009296417236, "learning_rate": 8.116777329043741e-05, "loss": 0.8624, "step": 14507 }, { "epoch": 0.9829934277390067, "grad_norm": 5.37421989440918, "learning_rate": 8.116640427133959e-05, "loss": 0.9434, "step": 14508 }, { "epoch": 0.9830611830069788, "grad_norm": 6.307192325592041, "learning_rate": 8.116503525224177e-05, "loss": 0.7215, "step": 14509 }, { "epoch": 0.9831289382749508, "grad_norm": 5.990005970001221, "learning_rate": 8.116366623314395e-05, "loss": 0.6622, "step": 14510 }, { "epoch": 0.9831966935429229, "grad_norm": 6.682214736938477, "learning_rate": 8.116229721404613e-05, "loss": 0.8302, "step": 14511 }, { "epoch": 0.983264448810895, "grad_norm": 8.388869285583496, "learning_rate": 8.116092819494833e-05, "loss": 0.948, "step": 14512 }, { "epoch": 0.9833322040788671, "grad_norm": 6.352821350097656, "learning_rate": 8.115955917585051e-05, "loss": 0.8845, "step": 14513 }, { "epoch": 0.9833999593468392, "grad_norm": 5.503759384155273, "learning_rate": 8.115819015675269e-05, "loss": 0.6619, "step": 14514 }, { "epoch": 0.9834677146148113, "grad_norm": 8.024614334106445, "learning_rate": 8.115682113765487e-05, "loss": 0.6846, "step": 14515 }, { "epoch": 0.9835354698827834, "grad_norm": 5.9330902099609375, "learning_rate": 8.115545211855706e-05, "loss": 0.7577, "step": 14516 }, { "epoch": 0.9836032251507555, "grad_norm": 5.600277423858643, "learning_rate": 8.115408309945924e-05, "loss": 0.8694, "step": 14517 }, { "epoch": 0.9836709804187276, "grad_norm": 5.870060443878174, "learning_rate": 8.115271408036142e-05, "loss": 0.7465, "step": 14518 }, { "epoch": 0.9837387356866997, "grad_norm": 5.257311820983887, "learning_rate": 8.11513450612636e-05, "loss": 0.58, "step": 14519 }, { "epoch": 0.9838064909546718, "grad_norm": 5.327232837677002, "learning_rate": 8.114997604216578e-05, "loss": 0.823, "step": 14520 }, { "epoch": 0.9838742462226439, "grad_norm": 7.09609842300415, "learning_rate": 8.114860702306798e-05, "loss": 0.7926, "step": 14521 }, { "epoch": 0.983942001490616, "grad_norm": 5.685835838317871, "learning_rate": 8.114723800397016e-05, "loss": 0.8741, "step": 14522 }, { "epoch": 0.9840097567585879, "grad_norm": 5.486968517303467, "learning_rate": 8.114586898487234e-05, "loss": 0.8518, "step": 14523 }, { "epoch": 0.98407751202656, "grad_norm": 4.868188858032227, "learning_rate": 8.114449996577452e-05, "loss": 0.6013, "step": 14524 }, { "epoch": 0.9841452672945321, "grad_norm": 6.388209819793701, "learning_rate": 8.114313094667671e-05, "loss": 0.827, "step": 14525 }, { "epoch": 0.9842130225625042, "grad_norm": 6.438693523406982, "learning_rate": 8.114176192757889e-05, "loss": 0.9702, "step": 14526 }, { "epoch": 0.9842807778304763, "grad_norm": 4.985934734344482, "learning_rate": 8.114039290848107e-05, "loss": 0.6018, "step": 14527 }, { "epoch": 0.9843485330984484, "grad_norm": 6.320969581604004, "learning_rate": 8.113902388938325e-05, "loss": 0.7803, "step": 14528 }, { "epoch": 0.9844162883664205, "grad_norm": 7.519181728363037, "learning_rate": 8.113765487028545e-05, "loss": 0.7211, "step": 14529 }, { "epoch": 0.9844840436343926, "grad_norm": 5.232812404632568, "learning_rate": 8.113628585118763e-05, "loss": 1.0151, "step": 14530 }, { "epoch": 0.9845517989023647, "grad_norm": 6.038102149963379, "learning_rate": 8.113491683208981e-05, "loss": 0.8097, "step": 14531 }, { "epoch": 0.9846195541703368, "grad_norm": 6.541941165924072, "learning_rate": 8.1133547812992e-05, "loss": 0.704, "step": 14532 }, { "epoch": 0.9846873094383088, "grad_norm": 6.207378387451172, "learning_rate": 8.113217879389418e-05, "loss": 0.6292, "step": 14533 }, { "epoch": 0.9847550647062809, "grad_norm": 7.586195945739746, "learning_rate": 8.113080977479636e-05, "loss": 0.8351, "step": 14534 }, { "epoch": 0.984822819974253, "grad_norm": 5.649599075317383, "learning_rate": 8.112944075569856e-05, "loss": 0.7277, "step": 14535 }, { "epoch": 0.9848905752422251, "grad_norm": 6.21948766708374, "learning_rate": 8.112807173660074e-05, "loss": 0.8101, "step": 14536 }, { "epoch": 0.9849583305101972, "grad_norm": 5.665065765380859, "learning_rate": 8.112670271750292e-05, "loss": 0.9378, "step": 14537 }, { "epoch": 0.9850260857781693, "grad_norm": 6.819377899169922, "learning_rate": 8.11253336984051e-05, "loss": 0.8962, "step": 14538 }, { "epoch": 0.9850938410461413, "grad_norm": 5.853285789489746, "learning_rate": 8.112396467930729e-05, "loss": 0.7442, "step": 14539 }, { "epoch": 0.9851615963141134, "grad_norm": 6.921646595001221, "learning_rate": 8.112259566020947e-05, "loss": 0.8819, "step": 14540 }, { "epoch": 0.9852293515820855, "grad_norm": 5.253473281860352, "learning_rate": 8.112122664111165e-05, "loss": 0.8615, "step": 14541 }, { "epoch": 0.9852971068500576, "grad_norm": 6.091032981872559, "learning_rate": 8.111985762201383e-05, "loss": 0.9232, "step": 14542 }, { "epoch": 0.9853648621180296, "grad_norm": 4.946970462799072, "learning_rate": 8.111848860291601e-05, "loss": 0.5256, "step": 14543 }, { "epoch": 0.9854326173860017, "grad_norm": 7.805113792419434, "learning_rate": 8.11171195838182e-05, "loss": 0.7799, "step": 14544 }, { "epoch": 0.9855003726539738, "grad_norm": 5.948545932769775, "learning_rate": 8.111575056472039e-05, "loss": 0.8107, "step": 14545 }, { "epoch": 0.9855681279219459, "grad_norm": 4.266178131103516, "learning_rate": 8.111438154562257e-05, "loss": 0.6324, "step": 14546 }, { "epoch": 0.985635883189918, "grad_norm": 5.126527309417725, "learning_rate": 8.111301252652475e-05, "loss": 0.6852, "step": 14547 }, { "epoch": 0.9857036384578901, "grad_norm": 7.069212436676025, "learning_rate": 8.111164350742694e-05, "loss": 0.7833, "step": 14548 }, { "epoch": 0.9857713937258622, "grad_norm": 4.909544944763184, "learning_rate": 8.111027448832912e-05, "loss": 0.7001, "step": 14549 }, { "epoch": 0.9858391489938343, "grad_norm": 8.045904159545898, "learning_rate": 8.11089054692313e-05, "loss": 0.7565, "step": 14550 }, { "epoch": 0.9859069042618064, "grad_norm": 6.734328269958496, "learning_rate": 8.110753645013348e-05, "loss": 0.6459, "step": 14551 }, { "epoch": 0.9859746595297785, "grad_norm": 6.844288349151611, "learning_rate": 8.110616743103566e-05, "loss": 0.6501, "step": 14552 }, { "epoch": 0.9860424147977506, "grad_norm": 6.474472522735596, "learning_rate": 8.110479841193786e-05, "loss": 0.8652, "step": 14553 }, { "epoch": 0.9861101700657227, "grad_norm": 5.087035655975342, "learning_rate": 8.110342939284004e-05, "loss": 0.6611, "step": 14554 }, { "epoch": 0.9861779253336947, "grad_norm": 5.499162673950195, "learning_rate": 8.110206037374222e-05, "loss": 0.7477, "step": 14555 }, { "epoch": 0.9862456806016667, "grad_norm": 6.489079475402832, "learning_rate": 8.11006913546444e-05, "loss": 0.6523, "step": 14556 }, { "epoch": 0.9863134358696388, "grad_norm": 5.147678375244141, "learning_rate": 8.109932233554658e-05, "loss": 0.7646, "step": 14557 }, { "epoch": 0.9863811911376109, "grad_norm": 6.976253986358643, "learning_rate": 8.109795331644877e-05, "loss": 0.7634, "step": 14558 }, { "epoch": 0.986448946405583, "grad_norm": 6.211210250854492, "learning_rate": 8.109658429735095e-05, "loss": 0.788, "step": 14559 }, { "epoch": 0.9865167016735551, "grad_norm": 5.917699813842773, "learning_rate": 8.109521527825313e-05, "loss": 0.8491, "step": 14560 }, { "epoch": 0.9865844569415272, "grad_norm": 9.201217651367188, "learning_rate": 8.109384625915531e-05, "loss": 0.8377, "step": 14561 }, { "epoch": 0.9866522122094993, "grad_norm": 6.403718948364258, "learning_rate": 8.10924772400575e-05, "loss": 0.991, "step": 14562 }, { "epoch": 0.9867199674774714, "grad_norm": 4.908394813537598, "learning_rate": 8.109110822095969e-05, "loss": 0.7742, "step": 14563 }, { "epoch": 0.9867877227454435, "grad_norm": 5.759329795837402, "learning_rate": 8.108973920186187e-05, "loss": 0.682, "step": 14564 }, { "epoch": 0.9868554780134156, "grad_norm": 5.077083587646484, "learning_rate": 8.108837018276405e-05, "loss": 0.6575, "step": 14565 }, { "epoch": 0.9869232332813876, "grad_norm": 6.402769088745117, "learning_rate": 8.108700116366623e-05, "loss": 0.6313, "step": 14566 }, { "epoch": 0.9869909885493597, "grad_norm": 6.894180774688721, "learning_rate": 8.108563214456842e-05, "loss": 0.779, "step": 14567 }, { "epoch": 0.9870587438173318, "grad_norm": 7.2585062980651855, "learning_rate": 8.10842631254706e-05, "loss": 0.8281, "step": 14568 }, { "epoch": 0.9871264990853039, "grad_norm": 4.876138210296631, "learning_rate": 8.108289410637278e-05, "loss": 0.6501, "step": 14569 }, { "epoch": 0.987194254353276, "grad_norm": 6.457757949829102, "learning_rate": 8.108152508727496e-05, "loss": 0.6609, "step": 14570 }, { "epoch": 0.9872620096212481, "grad_norm": 5.902544021606445, "learning_rate": 8.108015606817716e-05, "loss": 0.8231, "step": 14571 }, { "epoch": 0.9873297648892201, "grad_norm": 7.340898513793945, "learning_rate": 8.107878704907934e-05, "loss": 0.9017, "step": 14572 }, { "epoch": 0.9873975201571922, "grad_norm": 4.826013088226318, "learning_rate": 8.107741802998152e-05, "loss": 0.7838, "step": 14573 }, { "epoch": 0.9874652754251643, "grad_norm": 6.566142559051514, "learning_rate": 8.10760490108837e-05, "loss": 0.8644, "step": 14574 }, { "epoch": 0.9875330306931364, "grad_norm": 6.540748119354248, "learning_rate": 8.107467999178589e-05, "loss": 0.7461, "step": 14575 }, { "epoch": 0.9876007859611085, "grad_norm": 8.179930686950684, "learning_rate": 8.107331097268807e-05, "loss": 0.6283, "step": 14576 }, { "epoch": 0.9876685412290805, "grad_norm": 6.814093112945557, "learning_rate": 8.107194195359025e-05, "loss": 0.6905, "step": 14577 }, { "epoch": 0.9877362964970526, "grad_norm": 5.342179775238037, "learning_rate": 8.107057293449245e-05, "loss": 0.636, "step": 14578 }, { "epoch": 0.9878040517650247, "grad_norm": 4.811430931091309, "learning_rate": 8.106920391539463e-05, "loss": 0.7746, "step": 14579 }, { "epoch": 0.9878718070329968, "grad_norm": 10.804598808288574, "learning_rate": 8.10678348962968e-05, "loss": 0.7885, "step": 14580 }, { "epoch": 0.9879395623009689, "grad_norm": 5.6060709953308105, "learning_rate": 8.1066465877199e-05, "loss": 1.06, "step": 14581 }, { "epoch": 0.988007317568941, "grad_norm": 5.063103675842285, "learning_rate": 8.106509685810118e-05, "loss": 0.8395, "step": 14582 }, { "epoch": 0.9880750728369131, "grad_norm": 6.959988594055176, "learning_rate": 8.106372783900336e-05, "loss": 0.6454, "step": 14583 }, { "epoch": 0.9881428281048852, "grad_norm": 8.020045280456543, "learning_rate": 8.106235881990554e-05, "loss": 0.7632, "step": 14584 }, { "epoch": 0.9882105833728573, "grad_norm": 5.018332481384277, "learning_rate": 8.106098980080774e-05, "loss": 0.5049, "step": 14585 }, { "epoch": 0.9882783386408294, "grad_norm": 6.3518967628479, "learning_rate": 8.105962078170992e-05, "loss": 0.7407, "step": 14586 }, { "epoch": 0.9883460939088015, "grad_norm": 6.3573899269104, "learning_rate": 8.10582517626121e-05, "loss": 0.8245, "step": 14587 }, { "epoch": 0.9884138491767734, "grad_norm": 7.538825988769531, "learning_rate": 8.105688274351428e-05, "loss": 1.0365, "step": 14588 }, { "epoch": 0.9884816044447455, "grad_norm": 6.961862564086914, "learning_rate": 8.105551372441646e-05, "loss": 0.688, "step": 14589 }, { "epoch": 0.9885493597127176, "grad_norm": 4.587418556213379, "learning_rate": 8.105414470531865e-05, "loss": 0.565, "step": 14590 }, { "epoch": 0.9886171149806897, "grad_norm": 6.72902774810791, "learning_rate": 8.105277568622083e-05, "loss": 0.7931, "step": 14591 }, { "epoch": 0.9886848702486618, "grad_norm": 5.065114974975586, "learning_rate": 8.105140666712301e-05, "loss": 0.6943, "step": 14592 }, { "epoch": 0.9887526255166339, "grad_norm": 4.853360652923584, "learning_rate": 8.105003764802519e-05, "loss": 0.6457, "step": 14593 }, { "epoch": 0.988820380784606, "grad_norm": 8.033453941345215, "learning_rate": 8.104866862892739e-05, "loss": 0.7982, "step": 14594 }, { "epoch": 0.9888881360525781, "grad_norm": 5.311244487762451, "learning_rate": 8.104729960982957e-05, "loss": 0.8363, "step": 14595 }, { "epoch": 0.9889558913205502, "grad_norm": 6.700189590454102, "learning_rate": 8.104593059073175e-05, "loss": 0.9098, "step": 14596 }, { "epoch": 0.9890236465885223, "grad_norm": 6.449888706207275, "learning_rate": 8.104456157163393e-05, "loss": 0.7272, "step": 14597 }, { "epoch": 0.9890914018564944, "grad_norm": 6.6049909591674805, "learning_rate": 8.104319255253611e-05, "loss": 0.7782, "step": 14598 }, { "epoch": 0.9891591571244664, "grad_norm": 5.873762130737305, "learning_rate": 8.10418235334383e-05, "loss": 0.7901, "step": 14599 }, { "epoch": 0.9892269123924385, "grad_norm": 5.933529376983643, "learning_rate": 8.104045451434048e-05, "loss": 0.7743, "step": 14600 }, { "epoch": 0.9892946676604106, "grad_norm": 6.905478477478027, "learning_rate": 8.103908549524266e-05, "loss": 0.7662, "step": 14601 }, { "epoch": 0.9893624229283827, "grad_norm": 5.39354944229126, "learning_rate": 8.103771647614484e-05, "loss": 0.6157, "step": 14602 }, { "epoch": 0.9894301781963548, "grad_norm": 6.440433979034424, "learning_rate": 8.103634745704704e-05, "loss": 0.8263, "step": 14603 }, { "epoch": 0.9894979334643269, "grad_norm": 5.653606414794922, "learning_rate": 8.103497843794922e-05, "loss": 0.7081, "step": 14604 }, { "epoch": 0.9895656887322989, "grad_norm": 5.021646499633789, "learning_rate": 8.10336094188514e-05, "loss": 0.7988, "step": 14605 }, { "epoch": 0.989633444000271, "grad_norm": 7.331817150115967, "learning_rate": 8.103224039975358e-05, "loss": 0.9294, "step": 14606 }, { "epoch": 0.9897011992682431, "grad_norm": 6.1719584465026855, "learning_rate": 8.103087138065576e-05, "loss": 0.7923, "step": 14607 }, { "epoch": 0.9897689545362152, "grad_norm": 7.179246425628662, "learning_rate": 8.102950236155795e-05, "loss": 0.9297, "step": 14608 }, { "epoch": 0.9898367098041873, "grad_norm": 5.388153076171875, "learning_rate": 8.102813334246013e-05, "loss": 0.7016, "step": 14609 }, { "epoch": 0.9899044650721593, "grad_norm": 5.60443115234375, "learning_rate": 8.102676432336231e-05, "loss": 0.7853, "step": 14610 }, { "epoch": 0.9899722203401314, "grad_norm": 6.330133438110352, "learning_rate": 8.102539530426449e-05, "loss": 0.6971, "step": 14611 }, { "epoch": 0.9900399756081035, "grad_norm": 8.957503318786621, "learning_rate": 8.102402628516667e-05, "loss": 0.7834, "step": 14612 }, { "epoch": 0.9901077308760756, "grad_norm": 6.6696929931640625, "learning_rate": 8.102265726606887e-05, "loss": 0.8092, "step": 14613 }, { "epoch": 0.9901754861440477, "grad_norm": 6.236315727233887, "learning_rate": 8.102128824697105e-05, "loss": 0.8904, "step": 14614 }, { "epoch": 0.9902432414120198, "grad_norm": 4.87846040725708, "learning_rate": 8.101991922787323e-05, "loss": 0.6445, "step": 14615 }, { "epoch": 0.9903109966799919, "grad_norm": 6.903649806976318, "learning_rate": 8.101855020877541e-05, "loss": 0.6749, "step": 14616 }, { "epoch": 0.990378751947964, "grad_norm": 5.653696537017822, "learning_rate": 8.10171811896776e-05, "loss": 0.4365, "step": 14617 }, { "epoch": 0.9904465072159361, "grad_norm": 6.188183784484863, "learning_rate": 8.101581217057978e-05, "loss": 0.8174, "step": 14618 }, { "epoch": 0.9905142624839082, "grad_norm": 5.165365695953369, "learning_rate": 8.101444315148196e-05, "loss": 0.6121, "step": 14619 }, { "epoch": 0.9905820177518803, "grad_norm": 7.228616237640381, "learning_rate": 8.101307413238414e-05, "loss": 0.5768, "step": 14620 }, { "epoch": 0.9906497730198522, "grad_norm": 6.415277004241943, "learning_rate": 8.101170511328632e-05, "loss": 0.67, "step": 14621 }, { "epoch": 0.9907175282878243, "grad_norm": 6.051969528198242, "learning_rate": 8.101033609418852e-05, "loss": 0.8459, "step": 14622 }, { "epoch": 0.9907852835557964, "grad_norm": 5.808727741241455, "learning_rate": 8.10089670750907e-05, "loss": 0.738, "step": 14623 }, { "epoch": 0.9908530388237685, "grad_norm": 7.00636625289917, "learning_rate": 8.100759805599288e-05, "loss": 0.6573, "step": 14624 }, { "epoch": 0.9909207940917406, "grad_norm": 6.8472900390625, "learning_rate": 8.100622903689507e-05, "loss": 1.0119, "step": 14625 }, { "epoch": 0.9909885493597127, "grad_norm": 6.012302875518799, "learning_rate": 8.100486001779725e-05, "loss": 0.5468, "step": 14626 }, { "epoch": 0.9910563046276848, "grad_norm": 4.420849323272705, "learning_rate": 8.100349099869943e-05, "loss": 0.5134, "step": 14627 }, { "epoch": 0.9911240598956569, "grad_norm": 5.315083980560303, "learning_rate": 8.100212197960163e-05, "loss": 0.803, "step": 14628 }, { "epoch": 0.991191815163629, "grad_norm": 8.18484115600586, "learning_rate": 8.10007529605038e-05, "loss": 1.0685, "step": 14629 }, { "epoch": 0.9912595704316011, "grad_norm": 6.134372711181641, "learning_rate": 8.099938394140599e-05, "loss": 0.8893, "step": 14630 }, { "epoch": 0.9913273256995732, "grad_norm": 7.056315898895264, "learning_rate": 8.099801492230818e-05, "loss": 1.0268, "step": 14631 }, { "epoch": 0.9913950809675453, "grad_norm": 7.75009298324585, "learning_rate": 8.099664590321036e-05, "loss": 0.8632, "step": 14632 }, { "epoch": 0.9914628362355173, "grad_norm": 5.707569122314453, "learning_rate": 8.099527688411254e-05, "loss": 0.7005, "step": 14633 }, { "epoch": 0.9915305915034894, "grad_norm": 5.789709091186523, "learning_rate": 8.099390786501472e-05, "loss": 0.9823, "step": 14634 }, { "epoch": 0.9915983467714615, "grad_norm": 7.6483941078186035, "learning_rate": 8.099253884591692e-05, "loss": 0.7865, "step": 14635 }, { "epoch": 0.9916661020394336, "grad_norm": 6.485616683959961, "learning_rate": 8.09911698268191e-05, "loss": 0.7863, "step": 14636 }, { "epoch": 0.9917338573074056, "grad_norm": 4.72672176361084, "learning_rate": 8.098980080772128e-05, "loss": 0.6081, "step": 14637 }, { "epoch": 0.9918016125753777, "grad_norm": 5.638705253601074, "learning_rate": 8.098843178862346e-05, "loss": 0.773, "step": 14638 }, { "epoch": 0.9918693678433498, "grad_norm": 5.565254211425781, "learning_rate": 8.098706276952564e-05, "loss": 0.7205, "step": 14639 }, { "epoch": 0.9919371231113219, "grad_norm": 5.114184379577637, "learning_rate": 8.098569375042783e-05, "loss": 0.6835, "step": 14640 }, { "epoch": 0.992004878379294, "grad_norm": 6.048708438873291, "learning_rate": 8.098432473133001e-05, "loss": 0.8163, "step": 14641 }, { "epoch": 0.9920726336472661, "grad_norm": 7.4688215255737305, "learning_rate": 8.098295571223219e-05, "loss": 0.5373, "step": 14642 }, { "epoch": 0.9921403889152381, "grad_norm": 5.477198600769043, "learning_rate": 8.098158669313437e-05, "loss": 0.8911, "step": 14643 }, { "epoch": 0.9922081441832102, "grad_norm": 5.453397274017334, "learning_rate": 8.098021767403655e-05, "loss": 0.7153, "step": 14644 }, { "epoch": 0.9922758994511823, "grad_norm": 7.451064586639404, "learning_rate": 8.097884865493875e-05, "loss": 0.7384, "step": 14645 }, { "epoch": 0.9923436547191544, "grad_norm": 6.131191253662109, "learning_rate": 8.097747963584093e-05, "loss": 0.7987, "step": 14646 }, { "epoch": 0.9924114099871265, "grad_norm": 6.377108573913574, "learning_rate": 8.09761106167431e-05, "loss": 0.7305, "step": 14647 }, { "epoch": 0.9924791652550986, "grad_norm": 7.368172645568848, "learning_rate": 8.097474159764529e-05, "loss": 0.7361, "step": 14648 }, { "epoch": 0.9925469205230707, "grad_norm": 5.075170993804932, "learning_rate": 8.097337257854748e-05, "loss": 0.8124, "step": 14649 }, { "epoch": 0.9926146757910428, "grad_norm": 4.833024501800537, "learning_rate": 8.097200355944966e-05, "loss": 0.5609, "step": 14650 }, { "epoch": 0.9926824310590149, "grad_norm": 5.358253002166748, "learning_rate": 8.097063454035184e-05, "loss": 0.6095, "step": 14651 }, { "epoch": 0.992750186326987, "grad_norm": 5.228174209594727, "learning_rate": 8.096926552125402e-05, "loss": 0.8636, "step": 14652 }, { "epoch": 0.992817941594959, "grad_norm": 5.842618465423584, "learning_rate": 8.09678965021562e-05, "loss": 0.6013, "step": 14653 }, { "epoch": 0.992885696862931, "grad_norm": 6.454665184020996, "learning_rate": 8.09665274830584e-05, "loss": 0.7059, "step": 14654 }, { "epoch": 0.9929534521309031, "grad_norm": 4.836513042449951, "learning_rate": 8.096515846396058e-05, "loss": 0.7923, "step": 14655 }, { "epoch": 0.9930212073988752, "grad_norm": 5.523789405822754, "learning_rate": 8.096378944486276e-05, "loss": 0.733, "step": 14656 }, { "epoch": 0.9930889626668473, "grad_norm": 6.529998779296875, "learning_rate": 8.096242042576494e-05, "loss": 0.7117, "step": 14657 }, { "epoch": 0.9931567179348194, "grad_norm": 4.7176408767700195, "learning_rate": 8.096105140666713e-05, "loss": 0.603, "step": 14658 }, { "epoch": 0.9932244732027915, "grad_norm": 5.4773993492126465, "learning_rate": 8.095968238756931e-05, "loss": 0.6101, "step": 14659 }, { "epoch": 0.9932922284707636, "grad_norm": 5.661228179931641, "learning_rate": 8.095831336847149e-05, "loss": 0.7999, "step": 14660 }, { "epoch": 0.9933599837387357, "grad_norm": 6.280917644500732, "learning_rate": 8.095694434937367e-05, "loss": 0.6691, "step": 14661 }, { "epoch": 0.9934277390067078, "grad_norm": 7.072371959686279, "learning_rate": 8.095557533027585e-05, "loss": 0.8523, "step": 14662 }, { "epoch": 0.9934954942746799, "grad_norm": 5.228603363037109, "learning_rate": 8.095420631117805e-05, "loss": 0.6841, "step": 14663 }, { "epoch": 0.993563249542652, "grad_norm": 6.489750862121582, "learning_rate": 8.095283729208023e-05, "loss": 0.9927, "step": 14664 }, { "epoch": 0.993631004810624, "grad_norm": 6.142980575561523, "learning_rate": 8.09514682729824e-05, "loss": 0.62, "step": 14665 }, { "epoch": 0.9936987600785961, "grad_norm": 6.388856887817383, "learning_rate": 8.095009925388459e-05, "loss": 0.6953, "step": 14666 }, { "epoch": 0.9937665153465682, "grad_norm": 6.71579647064209, "learning_rate": 8.094873023478677e-05, "loss": 0.6856, "step": 14667 }, { "epoch": 0.9938342706145403, "grad_norm": 4.24629545211792, "learning_rate": 8.094736121568896e-05, "loss": 0.9145, "step": 14668 }, { "epoch": 0.9939020258825124, "grad_norm": 5.748496055603027, "learning_rate": 8.094599219659114e-05, "loss": 0.8127, "step": 14669 }, { "epoch": 0.9939697811504844, "grad_norm": 6.51283597946167, "learning_rate": 8.094462317749332e-05, "loss": 0.9989, "step": 14670 }, { "epoch": 0.9940375364184565, "grad_norm": 5.742320537567139, "learning_rate": 8.094325415839552e-05, "loss": 0.7456, "step": 14671 }, { "epoch": 0.9941052916864286, "grad_norm": 5.277899265289307, "learning_rate": 8.09418851392977e-05, "loss": 0.7498, "step": 14672 }, { "epoch": 0.9941730469544007, "grad_norm": 5.872328281402588, "learning_rate": 8.094051612019988e-05, "loss": 0.7343, "step": 14673 }, { "epoch": 0.9942408022223728, "grad_norm": 5.946358680725098, "learning_rate": 8.093914710110207e-05, "loss": 0.7802, "step": 14674 }, { "epoch": 0.9943085574903449, "grad_norm": 6.713497161865234, "learning_rate": 8.093777808200425e-05, "loss": 0.9639, "step": 14675 }, { "epoch": 0.994376312758317, "grad_norm": 5.236899375915527, "learning_rate": 8.093640906290643e-05, "loss": 0.7111, "step": 14676 }, { "epoch": 0.994444068026289, "grad_norm": 6.121020317077637, "learning_rate": 8.093504004380862e-05, "loss": 0.8627, "step": 14677 }, { "epoch": 0.9945118232942611, "grad_norm": 6.684620380401611, "learning_rate": 8.09336710247108e-05, "loss": 0.8863, "step": 14678 }, { "epoch": 0.9945795785622332, "grad_norm": 5.249447345733643, "learning_rate": 8.093230200561299e-05, "loss": 0.6532, "step": 14679 }, { "epoch": 0.9946473338302053, "grad_norm": 5.633591651916504, "learning_rate": 8.093093298651517e-05, "loss": 0.8192, "step": 14680 }, { "epoch": 0.9947150890981774, "grad_norm": 5.300892353057861, "learning_rate": 8.092956396741736e-05, "loss": 0.6628, "step": 14681 }, { "epoch": 0.9947828443661495, "grad_norm": 6.91768741607666, "learning_rate": 8.092819494831954e-05, "loss": 0.9256, "step": 14682 }, { "epoch": 0.9948505996341216, "grad_norm": 6.193354606628418, "learning_rate": 8.092682592922172e-05, "loss": 0.8907, "step": 14683 }, { "epoch": 0.9949183549020937, "grad_norm": 5.23224401473999, "learning_rate": 8.09254569101239e-05, "loss": 0.6606, "step": 14684 }, { "epoch": 0.9949861101700658, "grad_norm": 5.615985870361328, "learning_rate": 8.092408789102608e-05, "loss": 0.7438, "step": 14685 }, { "epoch": 0.9950538654380378, "grad_norm": 4.997095584869385, "learning_rate": 8.092271887192828e-05, "loss": 0.6359, "step": 14686 }, { "epoch": 0.9951216207060098, "grad_norm": 4.995532989501953, "learning_rate": 8.092134985283046e-05, "loss": 0.7636, "step": 14687 }, { "epoch": 0.9951893759739819, "grad_norm": 5.627157688140869, "learning_rate": 8.091998083373264e-05, "loss": 0.7524, "step": 14688 }, { "epoch": 0.995257131241954, "grad_norm": 6.29923152923584, "learning_rate": 8.091861181463482e-05, "loss": 0.8923, "step": 14689 }, { "epoch": 0.9953248865099261, "grad_norm": 5.104131698608398, "learning_rate": 8.091724279553701e-05, "loss": 0.6621, "step": 14690 }, { "epoch": 0.9953926417778982, "grad_norm": 6.44821834564209, "learning_rate": 8.091587377643919e-05, "loss": 1.0041, "step": 14691 }, { "epoch": 0.9954603970458703, "grad_norm": 4.547702789306641, "learning_rate": 8.091450475734137e-05, "loss": 0.7067, "step": 14692 }, { "epoch": 0.9955281523138424, "grad_norm": 5.443078517913818, "learning_rate": 8.091313573824355e-05, "loss": 0.7664, "step": 14693 }, { "epoch": 0.9955959075818145, "grad_norm": 5.759512901306152, "learning_rate": 8.091176671914573e-05, "loss": 0.7241, "step": 14694 }, { "epoch": 0.9956636628497866, "grad_norm": 4.9956746101379395, "learning_rate": 8.091039770004793e-05, "loss": 0.6777, "step": 14695 }, { "epoch": 0.9957314181177587, "grad_norm": 6.713644027709961, "learning_rate": 8.09090286809501e-05, "loss": 0.7727, "step": 14696 }, { "epoch": 0.9957991733857308, "grad_norm": 5.988772869110107, "learning_rate": 8.090765966185229e-05, "loss": 0.6803, "step": 14697 }, { "epoch": 0.9958669286537029, "grad_norm": 5.6772966384887695, "learning_rate": 8.090629064275447e-05, "loss": 0.733, "step": 14698 }, { "epoch": 0.995934683921675, "grad_norm": 5.93316125869751, "learning_rate": 8.090492162365665e-05, "loss": 0.7119, "step": 14699 }, { "epoch": 0.996002439189647, "grad_norm": 5.35646915435791, "learning_rate": 8.090355260455884e-05, "loss": 0.9498, "step": 14700 }, { "epoch": 0.9960701944576191, "grad_norm": 6.932188510894775, "learning_rate": 8.090218358546102e-05, "loss": 0.7779, "step": 14701 }, { "epoch": 0.9961379497255911, "grad_norm": 4.6098856925964355, "learning_rate": 8.09008145663632e-05, "loss": 0.7614, "step": 14702 }, { "epoch": 0.9962057049935632, "grad_norm": 6.402628421783447, "learning_rate": 8.089944554726538e-05, "loss": 0.7212, "step": 14703 }, { "epoch": 0.9962734602615353, "grad_norm": 5.7452473640441895, "learning_rate": 8.089807652816758e-05, "loss": 0.9323, "step": 14704 }, { "epoch": 0.9963412155295074, "grad_norm": 8.710042953491211, "learning_rate": 8.089670750906976e-05, "loss": 0.7616, "step": 14705 }, { "epoch": 0.9964089707974795, "grad_norm": 6.53500509262085, "learning_rate": 8.089533848997194e-05, "loss": 0.8054, "step": 14706 }, { "epoch": 0.9964767260654516, "grad_norm": 6.090463161468506, "learning_rate": 8.089396947087412e-05, "loss": 0.8993, "step": 14707 }, { "epoch": 0.9965444813334237, "grad_norm": 4.988658428192139, "learning_rate": 8.08926004517763e-05, "loss": 0.6045, "step": 14708 }, { "epoch": 0.9966122366013958, "grad_norm": 5.254270553588867, "learning_rate": 8.089123143267849e-05, "loss": 0.7135, "step": 14709 }, { "epoch": 0.9966799918693678, "grad_norm": 6.4775848388671875, "learning_rate": 8.088986241358067e-05, "loss": 0.8647, "step": 14710 }, { "epoch": 0.9967477471373399, "grad_norm": 8.165122985839844, "learning_rate": 8.088849339448285e-05, "loss": 0.5061, "step": 14711 }, { "epoch": 0.996815502405312, "grad_norm": 6.5195159912109375, "learning_rate": 8.088712437538503e-05, "loss": 1.0305, "step": 14712 }, { "epoch": 0.9968832576732841, "grad_norm": 4.683186054229736, "learning_rate": 8.088575535628723e-05, "loss": 0.696, "step": 14713 }, { "epoch": 0.9969510129412562, "grad_norm": 8.721222877502441, "learning_rate": 8.08843863371894e-05, "loss": 0.7871, "step": 14714 }, { "epoch": 0.9970187682092283, "grad_norm": 4.208126544952393, "learning_rate": 8.088301731809159e-05, "loss": 0.4982, "step": 14715 }, { "epoch": 0.9970865234772004, "grad_norm": 8.587434768676758, "learning_rate": 8.088164829899377e-05, "loss": 0.7851, "step": 14716 }, { "epoch": 0.9971542787451725, "grad_norm": 6.511172294616699, "learning_rate": 8.088027927989596e-05, "loss": 0.6705, "step": 14717 }, { "epoch": 0.9972220340131446, "grad_norm": 9.866682052612305, "learning_rate": 8.087891026079814e-05, "loss": 0.7538, "step": 14718 }, { "epoch": 0.9972897892811166, "grad_norm": 5.130730628967285, "learning_rate": 8.087754124170032e-05, "loss": 0.5205, "step": 14719 }, { "epoch": 0.9973575445490886, "grad_norm": 6.190724849700928, "learning_rate": 8.087617222260252e-05, "loss": 0.8204, "step": 14720 }, { "epoch": 0.9974252998170607, "grad_norm": 6.667228698730469, "learning_rate": 8.08748032035047e-05, "loss": 0.8681, "step": 14721 }, { "epoch": 0.9974930550850328, "grad_norm": 5.230597019195557, "learning_rate": 8.087343418440688e-05, "loss": 0.6311, "step": 14722 }, { "epoch": 0.9975608103530049, "grad_norm": 4.286562919616699, "learning_rate": 8.087206516530907e-05, "loss": 0.5128, "step": 14723 }, { "epoch": 0.997628565620977, "grad_norm": 8.196592330932617, "learning_rate": 8.087069614621125e-05, "loss": 1.0953, "step": 14724 }, { "epoch": 0.9976963208889491, "grad_norm": 5.942284107208252, "learning_rate": 8.086932712711343e-05, "loss": 0.8896, "step": 14725 }, { "epoch": 0.9977640761569212, "grad_norm": 7.276721477508545, "learning_rate": 8.086795810801561e-05, "loss": 0.8503, "step": 14726 }, { "epoch": 0.9978318314248933, "grad_norm": 6.109722137451172, "learning_rate": 8.08665890889178e-05, "loss": 0.563, "step": 14727 }, { "epoch": 0.9978995866928654, "grad_norm": 7.156121253967285, "learning_rate": 8.086522006981998e-05, "loss": 0.6586, "step": 14728 }, { "epoch": 0.9979673419608375, "grad_norm": 5.004053592681885, "learning_rate": 8.086385105072217e-05, "loss": 0.7652, "step": 14729 }, { "epoch": 0.9980350972288096, "grad_norm": 6.767992973327637, "learning_rate": 8.086248203162435e-05, "loss": 0.9194, "step": 14730 }, { "epoch": 0.9981028524967817, "grad_norm": 4.350346565246582, "learning_rate": 8.086111301252653e-05, "loss": 0.5671, "step": 14731 }, { "epoch": 0.9981706077647537, "grad_norm": 4.5186052322387695, "learning_rate": 8.085974399342872e-05, "loss": 0.7229, "step": 14732 }, { "epoch": 0.9982383630327258, "grad_norm": 4.803741931915283, "learning_rate": 8.08583749743309e-05, "loss": 0.5626, "step": 14733 }, { "epoch": 0.9983061183006979, "grad_norm": 5.849275588989258, "learning_rate": 8.085700595523308e-05, "loss": 0.7053, "step": 14734 }, { "epoch": 0.9983738735686699, "grad_norm": 6.240585803985596, "learning_rate": 8.085563693613526e-05, "loss": 1.0339, "step": 14735 }, { "epoch": 0.998441628836642, "grad_norm": 5.102994441986084, "learning_rate": 8.085426791703745e-05, "loss": 0.6594, "step": 14736 }, { "epoch": 0.9985093841046141, "grad_norm": 7.48859977722168, "learning_rate": 8.085289889793964e-05, "loss": 0.7227, "step": 14737 }, { "epoch": 0.9985771393725862, "grad_norm": 4.829802513122559, "learning_rate": 8.085152987884182e-05, "loss": 0.6739, "step": 14738 }, { "epoch": 0.9986448946405583, "grad_norm": 5.353672504425049, "learning_rate": 8.0850160859744e-05, "loss": 0.8112, "step": 14739 }, { "epoch": 0.9987126499085304, "grad_norm": 5.165256023406982, "learning_rate": 8.084879184064618e-05, "loss": 0.7894, "step": 14740 }, { "epoch": 0.9987804051765025, "grad_norm": 6.365817070007324, "learning_rate": 8.084742282154837e-05, "loss": 0.6763, "step": 14741 }, { "epoch": 0.9988481604444746, "grad_norm": 5.996483325958252, "learning_rate": 8.084605380245055e-05, "loss": 0.6717, "step": 14742 }, { "epoch": 0.9989159157124466, "grad_norm": 5.232832431793213, "learning_rate": 8.084468478335273e-05, "loss": 0.779, "step": 14743 }, { "epoch": 0.9989836709804187, "grad_norm": 7.0810089111328125, "learning_rate": 8.084331576425491e-05, "loss": 1.0872, "step": 14744 }, { "epoch": 0.9990514262483908, "grad_norm": 6.616766452789307, "learning_rate": 8.084194674515709e-05, "loss": 0.8411, "step": 14745 }, { "epoch": 0.9991191815163629, "grad_norm": 6.06799840927124, "learning_rate": 8.084057772605929e-05, "loss": 0.6083, "step": 14746 }, { "epoch": 0.999186936784335, "grad_norm": 6.634158134460449, "learning_rate": 8.083920870696147e-05, "loss": 0.627, "step": 14747 }, { "epoch": 0.9992546920523071, "grad_norm": 6.214893817901611, "learning_rate": 8.083783968786365e-05, "loss": 1.025, "step": 14748 }, { "epoch": 0.9993224473202792, "grad_norm": 7.438349723815918, "learning_rate": 8.083647066876583e-05, "loss": 0.8148, "step": 14749 }, { "epoch": 0.9993902025882513, "grad_norm": 5.228566646575928, "learning_rate": 8.083510164966802e-05, "loss": 0.741, "step": 14750 }, { "epoch": 0.9994579578562233, "grad_norm": 6.1171064376831055, "learning_rate": 8.08337326305702e-05, "loss": 0.6582, "step": 14751 }, { "epoch": 0.9995257131241954, "grad_norm": 5.6771368980407715, "learning_rate": 8.083236361147238e-05, "loss": 0.7143, "step": 14752 }, { "epoch": 0.9995934683921675, "grad_norm": 6.686731815338135, "learning_rate": 8.083099459237456e-05, "loss": 0.9172, "step": 14753 }, { "epoch": 0.9996612236601395, "grad_norm": 5.874754905700684, "learning_rate": 8.082962557327674e-05, "loss": 0.8786, "step": 14754 }, { "epoch": 0.9997289789281116, "grad_norm": 7.658037185668945, "learning_rate": 8.082825655417894e-05, "loss": 0.4877, "step": 14755 }, { "epoch": 0.9997967341960837, "grad_norm": 7.869183540344238, "learning_rate": 8.082688753508112e-05, "loss": 0.811, "step": 14756 }, { "epoch": 0.9997967341960837, "eval_loss": 0.7405052185058594, "eval_noise_accuracy": 0.0, "eval_runtime": 1547.7438, "eval_samples_per_second": 3.32, "eval_steps_per_second": 0.208, "eval_wer": 78.51462535428304, "step": 14756 }, { "epoch": 0.9998644894640558, "grad_norm": 6.051870822906494, "learning_rate": 8.08255185159833e-05, "loss": 0.9022, "step": 14757 }, { "epoch": 0.9999322447320279, "grad_norm": 5.921606063842773, "learning_rate": 8.082414949688548e-05, "loss": 0.8192, "step": 14758 }, { "epoch": 1.0, "grad_norm": 8.148659706115723, "learning_rate": 8.082278047778767e-05, "loss": 0.7852, "step": 14759 }, { "epoch": 1.000067755267972, "grad_norm": 5.229124069213867, "learning_rate": 8.082141145868985e-05, "loss": 0.8277, "step": 14760 }, { "epoch": 1.0001355105359442, "grad_norm": 5.082587242126465, "learning_rate": 8.082004243959203e-05, "loss": 0.7213, "step": 14761 }, { "epoch": 1.0002032658039162, "grad_norm": 6.1493024826049805, "learning_rate": 8.081867342049421e-05, "loss": 0.6404, "step": 14762 }, { "epoch": 1.0002710210718884, "grad_norm": 6.247502326965332, "learning_rate": 8.08173044013964e-05, "loss": 1.1216, "step": 14763 }, { "epoch": 1.0003387763398603, "grad_norm": 6.559756755828857, "learning_rate": 8.081593538229859e-05, "loss": 0.5577, "step": 14764 }, { "epoch": 1.0004065316078325, "grad_norm": 4.563822269439697, "learning_rate": 8.081456636320077e-05, "loss": 0.6457, "step": 14765 }, { "epoch": 1.0004742868758045, "grad_norm": 4.815116882324219, "learning_rate": 8.081319734410296e-05, "loss": 0.8247, "step": 14766 }, { "epoch": 1.0005420421437767, "grad_norm": 4.924731731414795, "learning_rate": 8.081182832500514e-05, "loss": 0.8842, "step": 14767 }, { "epoch": 1.0006097974117487, "grad_norm": 6.610191822052002, "learning_rate": 8.081045930590732e-05, "loss": 0.7282, "step": 14768 }, { "epoch": 1.000677552679721, "grad_norm": 6.175731658935547, "learning_rate": 8.080909028680951e-05, "loss": 0.7621, "step": 14769 }, { "epoch": 1.000745307947693, "grad_norm": 5.9794111251831055, "learning_rate": 8.08077212677117e-05, "loss": 0.7258, "step": 14770 }, { "epoch": 1.000813063215665, "grad_norm": 13.052425384521484, "learning_rate": 8.080635224861388e-05, "loss": 0.8328, "step": 14771 }, { "epoch": 1.000880818483637, "grad_norm": 6.011143684387207, "learning_rate": 8.080498322951606e-05, "loss": 0.62, "step": 14772 }, { "epoch": 1.0009485737516093, "grad_norm": 7.789141654968262, "learning_rate": 8.080361421041825e-05, "loss": 0.7544, "step": 14773 }, { "epoch": 1.0010163290195813, "grad_norm": 5.623871326446533, "learning_rate": 8.080224519132043e-05, "loss": 0.5816, "step": 14774 }, { "epoch": 1.0010840842875535, "grad_norm": 6.538607597351074, "learning_rate": 8.080087617222261e-05, "loss": 0.8612, "step": 14775 }, { "epoch": 1.0011518395555254, "grad_norm": 5.990743160247803, "learning_rate": 8.079950715312479e-05, "loss": 0.7108, "step": 14776 }, { "epoch": 1.0012195948234974, "grad_norm": 5.186371326446533, "learning_rate": 8.079813813402697e-05, "loss": 0.5226, "step": 14777 }, { "epoch": 1.0012873500914696, "grad_norm": 8.262914657592773, "learning_rate": 8.079676911492916e-05, "loss": 0.5101, "step": 14778 }, { "epoch": 1.0013551053594416, "grad_norm": 7.4032769203186035, "learning_rate": 8.079540009583134e-05, "loss": 0.579, "step": 14779 }, { "epoch": 1.0014228606274138, "grad_norm": 5.568051815032959, "learning_rate": 8.079403107673353e-05, "loss": 0.7007, "step": 14780 }, { "epoch": 1.0014906158953858, "grad_norm": 4.613144397735596, "learning_rate": 8.07926620576357e-05, "loss": 0.6206, "step": 14781 }, { "epoch": 1.001558371163358, "grad_norm": 5.073683261871338, "learning_rate": 8.07912930385379e-05, "loss": 0.4866, "step": 14782 }, { "epoch": 1.00162612643133, "grad_norm": 4.251949310302734, "learning_rate": 8.078992401944008e-05, "loss": 0.5746, "step": 14783 }, { "epoch": 1.0016938816993022, "grad_norm": 6.774303436279297, "learning_rate": 8.078855500034226e-05, "loss": 0.81, "step": 14784 }, { "epoch": 1.0017616369672742, "grad_norm": 7.1045918464660645, "learning_rate": 8.078718598124444e-05, "loss": 0.7654, "step": 14785 }, { "epoch": 1.0018293922352464, "grad_norm": 4.904873371124268, "learning_rate": 8.078581696214662e-05, "loss": 0.8256, "step": 14786 }, { "epoch": 1.0018971475032183, "grad_norm": 6.918723106384277, "learning_rate": 8.078444794304881e-05, "loss": 0.7021, "step": 14787 }, { "epoch": 1.0019649027711905, "grad_norm": 6.581482887268066, "learning_rate": 8.0783078923951e-05, "loss": 0.6402, "step": 14788 }, { "epoch": 1.0020326580391625, "grad_norm": 5.610997200012207, "learning_rate": 8.078170990485318e-05, "loss": 0.7777, "step": 14789 }, { "epoch": 1.0021004133071347, "grad_norm": 6.357514381408691, "learning_rate": 8.078034088575536e-05, "loss": 0.7679, "step": 14790 }, { "epoch": 1.0021681685751067, "grad_norm": 6.965141296386719, "learning_rate": 8.077897186665755e-05, "loss": 0.4878, "step": 14791 }, { "epoch": 1.0022359238430787, "grad_norm": 6.122716426849365, "learning_rate": 8.077760284755973e-05, "loss": 0.6921, "step": 14792 }, { "epoch": 1.002303679111051, "grad_norm": 4.804755210876465, "learning_rate": 8.077623382846191e-05, "loss": 0.6617, "step": 14793 }, { "epoch": 1.0023714343790229, "grad_norm": 5.5113844871521, "learning_rate": 8.077486480936409e-05, "loss": 0.7798, "step": 14794 }, { "epoch": 1.002439189646995, "grad_norm": 6.461663246154785, "learning_rate": 8.077349579026627e-05, "loss": 0.7959, "step": 14795 }, { "epoch": 1.002506944914967, "grad_norm": 6.894604206085205, "learning_rate": 8.077212677116846e-05, "loss": 0.7367, "step": 14796 }, { "epoch": 1.0025747001829393, "grad_norm": 5.337347984313965, "learning_rate": 8.077075775207065e-05, "loss": 0.5788, "step": 14797 }, { "epoch": 1.0026424554509112, "grad_norm": 6.755460262298584, "learning_rate": 8.076938873297283e-05, "loss": 0.8379, "step": 14798 }, { "epoch": 1.0027102107188834, "grad_norm": 5.582238674163818, "learning_rate": 8.0768019713875e-05, "loss": 0.6805, "step": 14799 }, { "epoch": 1.0027779659868554, "grad_norm": 5.460515975952148, "learning_rate": 8.076665069477719e-05, "loss": 0.7466, "step": 14800 }, { "epoch": 1.0028457212548276, "grad_norm": 7.810371398925781, "learning_rate": 8.076528167567938e-05, "loss": 0.743, "step": 14801 }, { "epoch": 1.0029134765227996, "grad_norm": 5.568098545074463, "learning_rate": 8.076391265658156e-05, "loss": 0.7604, "step": 14802 }, { "epoch": 1.0029812317907718, "grad_norm": 4.993529319763184, "learning_rate": 8.076254363748374e-05, "loss": 0.5721, "step": 14803 }, { "epoch": 1.0030489870587438, "grad_norm": 4.659602642059326, "learning_rate": 8.076117461838592e-05, "loss": 0.5619, "step": 14804 }, { "epoch": 1.003116742326716, "grad_norm": 4.9584221839904785, "learning_rate": 8.075980559928812e-05, "loss": 0.6407, "step": 14805 }, { "epoch": 1.003184497594688, "grad_norm": 6.771777629852295, "learning_rate": 8.07584365801903e-05, "loss": 0.6165, "step": 14806 }, { "epoch": 1.0032522528626602, "grad_norm": 6.287032604217529, "learning_rate": 8.075706756109248e-05, "loss": 0.8346, "step": 14807 }, { "epoch": 1.0033200081306322, "grad_norm": 5.438133239746094, "learning_rate": 8.075569854199466e-05, "loss": 0.6621, "step": 14808 }, { "epoch": 1.0033877633986041, "grad_norm": 5.8191633224487305, "learning_rate": 8.075432952289685e-05, "loss": 0.6837, "step": 14809 }, { "epoch": 1.0034555186665763, "grad_norm": 5.39967679977417, "learning_rate": 8.075296050379903e-05, "loss": 0.6687, "step": 14810 }, { "epoch": 1.0035232739345483, "grad_norm": 6.197573184967041, "learning_rate": 8.075159148470121e-05, "loss": 0.6243, "step": 14811 }, { "epoch": 1.0035910292025205, "grad_norm": 5.200948715209961, "learning_rate": 8.07502224656034e-05, "loss": 0.6812, "step": 14812 }, { "epoch": 1.0036587844704925, "grad_norm": 4.868316650390625, "learning_rate": 8.074885344650558e-05, "loss": 0.6131, "step": 14813 }, { "epoch": 1.0037265397384647, "grad_norm": 4.966255187988281, "learning_rate": 8.074748442740777e-05, "loss": 0.7787, "step": 14814 }, { "epoch": 1.0037942950064367, "grad_norm": 6.747678756713867, "learning_rate": 8.074611540830996e-05, "loss": 0.6185, "step": 14815 }, { "epoch": 1.003862050274409, "grad_norm": 6.168325424194336, "learning_rate": 8.074474638921214e-05, "loss": 0.7458, "step": 14816 }, { "epoch": 1.0039298055423809, "grad_norm": 4.884849548339844, "learning_rate": 8.074337737011432e-05, "loss": 0.604, "step": 14817 }, { "epoch": 1.003997560810353, "grad_norm": 4.729106903076172, "learning_rate": 8.07420083510165e-05, "loss": 0.6351, "step": 14818 }, { "epoch": 1.004065316078325, "grad_norm": 6.679864406585693, "learning_rate": 8.07406393319187e-05, "loss": 0.8324, "step": 14819 }, { "epoch": 1.0041330713462973, "grad_norm": 5.62283182144165, "learning_rate": 8.073927031282087e-05, "loss": 0.8227, "step": 14820 }, { "epoch": 1.0042008266142692, "grad_norm": 8.426139831542969, "learning_rate": 8.073790129372305e-05, "loss": 0.6578, "step": 14821 }, { "epoch": 1.0042685818822414, "grad_norm": 6.8322038650512695, "learning_rate": 8.073653227462524e-05, "loss": 0.7437, "step": 14822 }, { "epoch": 1.0043363371502134, "grad_norm": 8.170849800109863, "learning_rate": 8.073516325552743e-05, "loss": 0.7791, "step": 14823 }, { "epoch": 1.0044040924181856, "grad_norm": 6.2695441246032715, "learning_rate": 8.073379423642961e-05, "loss": 0.7124, "step": 14824 }, { "epoch": 1.0044718476861576, "grad_norm": 6.771942615509033, "learning_rate": 8.073242521733179e-05, "loss": 0.7781, "step": 14825 }, { "epoch": 1.0045396029541296, "grad_norm": 5.956495761871338, "learning_rate": 8.073105619823397e-05, "loss": 0.6476, "step": 14826 }, { "epoch": 1.0046073582221018, "grad_norm": 7.166341304779053, "learning_rate": 8.072968717913615e-05, "loss": 0.8113, "step": 14827 }, { "epoch": 1.0046751134900738, "grad_norm": 5.417853832244873, "learning_rate": 8.072831816003834e-05, "loss": 0.5846, "step": 14828 }, { "epoch": 1.004742868758046, "grad_norm": 5.144546031951904, "learning_rate": 8.072694914094052e-05, "loss": 0.693, "step": 14829 }, { "epoch": 1.004810624026018, "grad_norm": 4.3079094886779785, "learning_rate": 8.07255801218427e-05, "loss": 0.7312, "step": 14830 }, { "epoch": 1.0048783792939902, "grad_norm": 5.390209674835205, "learning_rate": 8.072421110274489e-05, "loss": 0.6347, "step": 14831 }, { "epoch": 1.0049461345619621, "grad_norm": 5.938960552215576, "learning_rate": 8.072284208364707e-05, "loss": 0.76, "step": 14832 }, { "epoch": 1.0050138898299343, "grad_norm": 5.8623247146606445, "learning_rate": 8.072147306454926e-05, "loss": 0.8752, "step": 14833 }, { "epoch": 1.0050816450979063, "grad_norm": 6.3055219650268555, "learning_rate": 8.072010404545144e-05, "loss": 0.783, "step": 14834 }, { "epoch": 1.0051494003658785, "grad_norm": 6.132151126861572, "learning_rate": 8.071873502635362e-05, "loss": 0.6859, "step": 14835 }, { "epoch": 1.0052171556338505, "grad_norm": 4.891525745391846, "learning_rate": 8.07173660072558e-05, "loss": 0.5293, "step": 14836 }, { "epoch": 1.0052849109018227, "grad_norm": 6.941277980804443, "learning_rate": 8.0715996988158e-05, "loss": 0.5511, "step": 14837 }, { "epoch": 1.0053526661697947, "grad_norm": 7.667896270751953, "learning_rate": 8.071462796906017e-05, "loss": 0.6837, "step": 14838 }, { "epoch": 1.0054204214377669, "grad_norm": 4.408740043640137, "learning_rate": 8.071325894996236e-05, "loss": 0.5733, "step": 14839 }, { "epoch": 1.0054881767057389, "grad_norm": 7.213569164276123, "learning_rate": 8.071188993086454e-05, "loss": 0.6838, "step": 14840 }, { "epoch": 1.0055559319737108, "grad_norm": 6.651516914367676, "learning_rate": 8.071052091176672e-05, "loss": 0.8254, "step": 14841 }, { "epoch": 1.005623687241683, "grad_norm": 5.002716541290283, "learning_rate": 8.070915189266891e-05, "loss": 0.5535, "step": 14842 }, { "epoch": 1.005691442509655, "grad_norm": 7.2078728675842285, "learning_rate": 8.070778287357109e-05, "loss": 0.7439, "step": 14843 }, { "epoch": 1.0057591977776272, "grad_norm": 4.879020690917969, "learning_rate": 8.070641385447327e-05, "loss": 0.7649, "step": 14844 }, { "epoch": 1.0058269530455992, "grad_norm": 7.177512168884277, "learning_rate": 8.070504483537545e-05, "loss": 0.599, "step": 14845 }, { "epoch": 1.0058947083135714, "grad_norm": 7.1817708015441895, "learning_rate": 8.070367581627764e-05, "loss": 0.6034, "step": 14846 }, { "epoch": 1.0059624635815434, "grad_norm": 11.819538116455078, "learning_rate": 8.070230679717982e-05, "loss": 0.6347, "step": 14847 }, { "epoch": 1.0060302188495156, "grad_norm": 5.775310516357422, "learning_rate": 8.0700937778082e-05, "loss": 0.577, "step": 14848 }, { "epoch": 1.0060979741174876, "grad_norm": 6.324820518493652, "learning_rate": 8.069956875898419e-05, "loss": 0.7638, "step": 14849 }, { "epoch": 1.0061657293854598, "grad_norm": 5.748277187347412, "learning_rate": 8.069819973988637e-05, "loss": 0.6587, "step": 14850 }, { "epoch": 1.0062334846534318, "grad_norm": 5.290812015533447, "learning_rate": 8.069683072078856e-05, "loss": 0.5606, "step": 14851 }, { "epoch": 1.006301239921404, "grad_norm": 5.5858941078186035, "learning_rate": 8.069546170169074e-05, "loss": 0.4992, "step": 14852 }, { "epoch": 1.006368995189376, "grad_norm": 5.581445693969727, "learning_rate": 8.069409268259292e-05, "loss": 0.5722, "step": 14853 }, { "epoch": 1.0064367504573482, "grad_norm": 6.363956451416016, "learning_rate": 8.06927236634951e-05, "loss": 0.5024, "step": 14854 }, { "epoch": 1.0065045057253201, "grad_norm": 6.256494998931885, "learning_rate": 8.069135464439728e-05, "loss": 0.4683, "step": 14855 }, { "epoch": 1.0065722609932923, "grad_norm": 5.651278972625732, "learning_rate": 8.068998562529948e-05, "loss": 0.6379, "step": 14856 }, { "epoch": 1.0066400162612643, "grad_norm": 7.8025712966918945, "learning_rate": 8.068861660620166e-05, "loss": 0.7948, "step": 14857 }, { "epoch": 1.0067077715292363, "grad_norm": 6.639724254608154, "learning_rate": 8.068724758710384e-05, "loss": 0.8023, "step": 14858 }, { "epoch": 1.0067755267972085, "grad_norm": 3.8018884658813477, "learning_rate": 8.068587856800603e-05, "loss": 0.5517, "step": 14859 }, { "epoch": 1.0068432820651805, "grad_norm": 5.326318740844727, "learning_rate": 8.068450954890821e-05, "loss": 0.611, "step": 14860 }, { "epoch": 1.0069110373331527, "grad_norm": 6.377422332763672, "learning_rate": 8.068314052981039e-05, "loss": 0.6394, "step": 14861 }, { "epoch": 1.0069787926011247, "grad_norm": 5.33223295211792, "learning_rate": 8.068177151071258e-05, "loss": 0.6427, "step": 14862 }, { "epoch": 1.0070465478690969, "grad_norm": 6.365980625152588, "learning_rate": 8.068040249161476e-05, "loss": 0.7221, "step": 14863 }, { "epoch": 1.0071143031370688, "grad_norm": 6.96981143951416, "learning_rate": 8.067903347251694e-05, "loss": 0.9021, "step": 14864 }, { "epoch": 1.007182058405041, "grad_norm": 8.809569358825684, "learning_rate": 8.067766445341914e-05, "loss": 0.8451, "step": 14865 }, { "epoch": 1.007249813673013, "grad_norm": 6.544775485992432, "learning_rate": 8.067629543432132e-05, "loss": 0.6729, "step": 14866 }, { "epoch": 1.0073175689409852, "grad_norm": 4.71762228012085, "learning_rate": 8.06749264152235e-05, "loss": 0.6192, "step": 14867 }, { "epoch": 1.0073853242089572, "grad_norm": 5.027548313140869, "learning_rate": 8.067355739612568e-05, "loss": 0.6514, "step": 14868 }, { "epoch": 1.0074530794769294, "grad_norm": 6.541376113891602, "learning_rate": 8.067218837702787e-05, "loss": 0.7091, "step": 14869 }, { "epoch": 1.0075208347449014, "grad_norm": 4.369992733001709, "learning_rate": 8.067081935793005e-05, "loss": 0.5207, "step": 14870 }, { "epoch": 1.0075885900128736, "grad_norm": 6.614040851593018, "learning_rate": 8.066945033883223e-05, "loss": 0.4269, "step": 14871 }, { "epoch": 1.0076563452808456, "grad_norm": 5.504267692565918, "learning_rate": 8.066808131973441e-05, "loss": 0.867, "step": 14872 }, { "epoch": 1.0077241005488178, "grad_norm": 5.660549640655518, "learning_rate": 8.06667123006366e-05, "loss": 0.7685, "step": 14873 }, { "epoch": 1.0077918558167898, "grad_norm": 6.468850612640381, "learning_rate": 8.066534328153879e-05, "loss": 0.7089, "step": 14874 }, { "epoch": 1.0078596110847617, "grad_norm": 5.692544460296631, "learning_rate": 8.066397426244097e-05, "loss": 0.6021, "step": 14875 }, { "epoch": 1.007927366352734, "grad_norm": 9.963805198669434, "learning_rate": 8.066260524334315e-05, "loss": 0.5066, "step": 14876 }, { "epoch": 1.007995121620706, "grad_norm": 6.6660308837890625, "learning_rate": 8.066123622424533e-05, "loss": 0.5434, "step": 14877 }, { "epoch": 1.0080628768886781, "grad_norm": 5.235628604888916, "learning_rate": 8.065986720514751e-05, "loss": 0.7677, "step": 14878 }, { "epoch": 1.00813063215665, "grad_norm": 3.8803293704986572, "learning_rate": 8.06584981860497e-05, "loss": 0.5942, "step": 14879 }, { "epoch": 1.0081983874246223, "grad_norm": 6.2981390953063965, "learning_rate": 8.065712916695188e-05, "loss": 0.7267, "step": 14880 }, { "epoch": 1.0082661426925943, "grad_norm": 5.843595504760742, "learning_rate": 8.065576014785406e-05, "loss": 0.6444, "step": 14881 }, { "epoch": 1.0083338979605665, "grad_norm": 4.541314601898193, "learning_rate": 8.065439112875625e-05, "loss": 0.5712, "step": 14882 }, { "epoch": 1.0084016532285385, "grad_norm": 6.081702709197998, "learning_rate": 8.065302210965844e-05, "loss": 0.5991, "step": 14883 }, { "epoch": 1.0084694084965107, "grad_norm": 6.3184943199157715, "learning_rate": 8.065165309056062e-05, "loss": 0.6225, "step": 14884 }, { "epoch": 1.0085371637644827, "grad_norm": 4.152682304382324, "learning_rate": 8.06502840714628e-05, "loss": 0.6958, "step": 14885 }, { "epoch": 1.0086049190324549, "grad_norm": 5.545871257781982, "learning_rate": 8.064891505236498e-05, "loss": 0.6238, "step": 14886 }, { "epoch": 1.0086726743004268, "grad_norm": 5.534969329833984, "learning_rate": 8.064754603326716e-05, "loss": 0.6741, "step": 14887 }, { "epoch": 1.008740429568399, "grad_norm": 5.907772064208984, "learning_rate": 8.064617701416935e-05, "loss": 0.6827, "step": 14888 }, { "epoch": 1.008808184836371, "grad_norm": 6.224740028381348, "learning_rate": 8.064480799507153e-05, "loss": 0.7227, "step": 14889 }, { "epoch": 1.008875940104343, "grad_norm": 5.671234607696533, "learning_rate": 8.064343897597372e-05, "loss": 0.6511, "step": 14890 }, { "epoch": 1.0089436953723152, "grad_norm": 4.597881317138672, "learning_rate": 8.06420699568759e-05, "loss": 0.5303, "step": 14891 }, { "epoch": 1.0090114506402872, "grad_norm": 7.3619561195373535, "learning_rate": 8.064070093777809e-05, "loss": 0.6853, "step": 14892 }, { "epoch": 1.0090792059082594, "grad_norm": 5.377906322479248, "learning_rate": 8.063933191868027e-05, "loss": 0.6067, "step": 14893 }, { "epoch": 1.0091469611762314, "grad_norm": 4.887160778045654, "learning_rate": 8.063796289958245e-05, "loss": 0.7488, "step": 14894 }, { "epoch": 1.0092147164442036, "grad_norm": 6.19053316116333, "learning_rate": 8.063659388048463e-05, "loss": 0.6629, "step": 14895 }, { "epoch": 1.0092824717121756, "grad_norm": 5.102216720581055, "learning_rate": 8.063522486138681e-05, "loss": 0.765, "step": 14896 }, { "epoch": 1.0093502269801478, "grad_norm": 8.829859733581543, "learning_rate": 8.0633855842289e-05, "loss": 0.5257, "step": 14897 }, { "epoch": 1.0094179822481197, "grad_norm": 11.013011932373047, "learning_rate": 8.063248682319118e-05, "loss": 0.7178, "step": 14898 }, { "epoch": 1.009485737516092, "grad_norm": 6.39233922958374, "learning_rate": 8.063111780409337e-05, "loss": 0.7438, "step": 14899 }, { "epoch": 1.009553492784064, "grad_norm": 5.155503749847412, "learning_rate": 8.062974878499555e-05, "loss": 0.5149, "step": 14900 }, { "epoch": 1.0096212480520361, "grad_norm": 5.446846961975098, "learning_rate": 8.062837976589774e-05, "loss": 0.5956, "step": 14901 }, { "epoch": 1.009689003320008, "grad_norm": 4.781370639801025, "learning_rate": 8.062701074679992e-05, "loss": 0.7076, "step": 14902 }, { "epoch": 1.0097567585879803, "grad_norm": 5.34435510635376, "learning_rate": 8.06256417277021e-05, "loss": 0.6906, "step": 14903 }, { "epoch": 1.0098245138559523, "grad_norm": 9.561019897460938, "learning_rate": 8.062427270860428e-05, "loss": 0.6931, "step": 14904 }, { "epoch": 1.0098922691239245, "grad_norm": 5.806305408477783, "learning_rate": 8.062290368950647e-05, "loss": 0.9629, "step": 14905 }, { "epoch": 1.0099600243918965, "grad_norm": 6.2731032371521, "learning_rate": 8.062153467040865e-05, "loss": 0.8255, "step": 14906 }, { "epoch": 1.0100277796598685, "grad_norm": 5.084347248077393, "learning_rate": 8.062016565131084e-05, "loss": 0.756, "step": 14907 }, { "epoch": 1.0100955349278407, "grad_norm": 5.779279708862305, "learning_rate": 8.061879663221303e-05, "loss": 0.8397, "step": 14908 }, { "epoch": 1.0101632901958126, "grad_norm": 8.087071418762207, "learning_rate": 8.061742761311521e-05, "loss": 0.7148, "step": 14909 }, { "epoch": 1.0102310454637848, "grad_norm": 7.416251182556152, "learning_rate": 8.061605859401739e-05, "loss": 0.5286, "step": 14910 }, { "epoch": 1.0102988007317568, "grad_norm": 7.369309425354004, "learning_rate": 8.061468957491958e-05, "loss": 0.6396, "step": 14911 }, { "epoch": 1.010366555999729, "grad_norm": 5.095036029815674, "learning_rate": 8.061332055582176e-05, "loss": 0.5745, "step": 14912 }, { "epoch": 1.010434311267701, "grad_norm": 5.032590866088867, "learning_rate": 8.061195153672394e-05, "loss": 0.8218, "step": 14913 }, { "epoch": 1.0105020665356732, "grad_norm": 5.7722039222717285, "learning_rate": 8.061058251762612e-05, "loss": 0.5213, "step": 14914 }, { "epoch": 1.0105698218036452, "grad_norm": 5.260948181152344, "learning_rate": 8.060921349852832e-05, "loss": 0.755, "step": 14915 }, { "epoch": 1.0106375770716174, "grad_norm": 6.892032623291016, "learning_rate": 8.06078444794305e-05, "loss": 0.7952, "step": 14916 }, { "epoch": 1.0107053323395894, "grad_norm": 6.230368137359619, "learning_rate": 8.060647546033268e-05, "loss": 0.5725, "step": 14917 }, { "epoch": 1.0107730876075616, "grad_norm": 6.269092559814453, "learning_rate": 8.060510644123486e-05, "loss": 0.4972, "step": 14918 }, { "epoch": 1.0108408428755336, "grad_norm": 6.031118869781494, "learning_rate": 8.060373742213704e-05, "loss": 0.8361, "step": 14919 }, { "epoch": 1.0109085981435058, "grad_norm": 8.682488441467285, "learning_rate": 8.060236840303923e-05, "loss": 0.6005, "step": 14920 }, { "epoch": 1.0109763534114777, "grad_norm": 6.82516622543335, "learning_rate": 8.060099938394141e-05, "loss": 0.7075, "step": 14921 }, { "epoch": 1.01104410867945, "grad_norm": 6.251338958740234, "learning_rate": 8.05996303648436e-05, "loss": 0.7116, "step": 14922 }, { "epoch": 1.011111863947422, "grad_norm": 7.739006519317627, "learning_rate": 8.059826134574577e-05, "loss": 0.9093, "step": 14923 }, { "epoch": 1.011179619215394, "grad_norm": 6.702469825744629, "learning_rate": 8.059689232664797e-05, "loss": 0.5837, "step": 14924 }, { "epoch": 1.011247374483366, "grad_norm": 6.131649494171143, "learning_rate": 8.059552330755015e-05, "loss": 0.5026, "step": 14925 }, { "epoch": 1.011315129751338, "grad_norm": 4.714535713195801, "learning_rate": 8.059415428845233e-05, "loss": 0.5705, "step": 14926 }, { "epoch": 1.0113828850193103, "grad_norm": 5.460876941680908, "learning_rate": 8.059278526935451e-05, "loss": 0.9523, "step": 14927 }, { "epoch": 1.0114506402872823, "grad_norm": 5.706765174865723, "learning_rate": 8.059141625025669e-05, "loss": 0.7936, "step": 14928 }, { "epoch": 1.0115183955552545, "grad_norm": 6.572486877441406, "learning_rate": 8.059004723115888e-05, "loss": 0.8149, "step": 14929 }, { "epoch": 1.0115861508232264, "grad_norm": 9.406861305236816, "learning_rate": 8.058867821206106e-05, "loss": 0.5752, "step": 14930 }, { "epoch": 1.0116539060911987, "grad_norm": 5.10825252532959, "learning_rate": 8.058730919296324e-05, "loss": 0.4359, "step": 14931 }, { "epoch": 1.0117216613591706, "grad_norm": 6.5198516845703125, "learning_rate": 8.058594017386542e-05, "loss": 0.7252, "step": 14932 }, { "epoch": 1.0117894166271428, "grad_norm": 5.7710981369018555, "learning_rate": 8.05845711547676e-05, "loss": 0.6666, "step": 14933 }, { "epoch": 1.0118571718951148, "grad_norm": 5.927985191345215, "learning_rate": 8.05832021356698e-05, "loss": 0.7755, "step": 14934 }, { "epoch": 1.011924927163087, "grad_norm": 7.585999488830566, "learning_rate": 8.058183311657198e-05, "loss": 0.7536, "step": 14935 }, { "epoch": 1.011992682431059, "grad_norm": 4.760716915130615, "learning_rate": 8.058046409747416e-05, "loss": 0.6324, "step": 14936 }, { "epoch": 1.0120604376990312, "grad_norm": 5.466569423675537, "learning_rate": 8.057909507837634e-05, "loss": 0.7267, "step": 14937 }, { "epoch": 1.0121281929670032, "grad_norm": 4.7150797843933105, "learning_rate": 8.057772605927853e-05, "loss": 0.6393, "step": 14938 }, { "epoch": 1.0121959482349752, "grad_norm": 4.741540908813477, "learning_rate": 8.057635704018071e-05, "loss": 0.4859, "step": 14939 }, { "epoch": 1.0122637035029474, "grad_norm": 5.607858657836914, "learning_rate": 8.05749880210829e-05, "loss": 0.738, "step": 14940 }, { "epoch": 1.0123314587709193, "grad_norm": 6.989736080169678, "learning_rate": 8.057361900198508e-05, "loss": 0.6129, "step": 14941 }, { "epoch": 1.0123992140388915, "grad_norm": 9.055098533630371, "learning_rate": 8.057224998288726e-05, "loss": 0.7514, "step": 14942 }, { "epoch": 1.0124669693068635, "grad_norm": 5.395411014556885, "learning_rate": 8.057088096378945e-05, "loss": 0.817, "step": 14943 }, { "epoch": 1.0125347245748357, "grad_norm": 5.7741851806640625, "learning_rate": 8.056951194469163e-05, "loss": 0.5735, "step": 14944 }, { "epoch": 1.0126024798428077, "grad_norm": 6.427124500274658, "learning_rate": 8.056814292559381e-05, "loss": 0.613, "step": 14945 }, { "epoch": 1.01267023511078, "grad_norm": 4.106490135192871, "learning_rate": 8.056677390649599e-05, "loss": 0.6788, "step": 14946 }, { "epoch": 1.012737990378752, "grad_norm": 5.7934370040893555, "learning_rate": 8.056540488739818e-05, "loss": 0.7784, "step": 14947 }, { "epoch": 1.012805745646724, "grad_norm": 7.501285076141357, "learning_rate": 8.056403586830036e-05, "loss": 0.77, "step": 14948 }, { "epoch": 1.012873500914696, "grad_norm": 4.6254472732543945, "learning_rate": 8.056266684920254e-05, "loss": 0.5918, "step": 14949 }, { "epoch": 1.0129412561826683, "grad_norm": 5.470935821533203, "learning_rate": 8.056129783010473e-05, "loss": 0.6471, "step": 14950 }, { "epoch": 1.0130090114506403, "grad_norm": 4.878664970397949, "learning_rate": 8.055992881100692e-05, "loss": 0.5186, "step": 14951 }, { "epoch": 1.0130767667186125, "grad_norm": 5.364741802215576, "learning_rate": 8.05585597919091e-05, "loss": 0.7577, "step": 14952 }, { "epoch": 1.0131445219865844, "grad_norm": 5.699579238891602, "learning_rate": 8.055719077281128e-05, "loss": 0.6527, "step": 14953 }, { "epoch": 1.0132122772545566, "grad_norm": 5.406833648681641, "learning_rate": 8.055582175371347e-05, "loss": 0.7961, "step": 14954 }, { "epoch": 1.0132800325225286, "grad_norm": 7.075387954711914, "learning_rate": 8.055445273461565e-05, "loss": 0.5801, "step": 14955 }, { "epoch": 1.0133477877905006, "grad_norm": 7.106254577636719, "learning_rate": 8.055308371551783e-05, "loss": 0.7384, "step": 14956 }, { "epoch": 1.0134155430584728, "grad_norm": 5.849175453186035, "learning_rate": 8.055171469642003e-05, "loss": 0.5413, "step": 14957 }, { "epoch": 1.0134832983264448, "grad_norm": 10.159880638122559, "learning_rate": 8.055034567732221e-05, "loss": 0.6702, "step": 14958 }, { "epoch": 1.013551053594417, "grad_norm": 5.166466236114502, "learning_rate": 8.054897665822439e-05, "loss": 0.8039, "step": 14959 }, { "epoch": 1.013618808862389, "grad_norm": 5.084167957305908, "learning_rate": 8.054760763912657e-05, "loss": 0.6388, "step": 14960 }, { "epoch": 1.0136865641303612, "grad_norm": 7.328620433807373, "learning_rate": 8.054623862002876e-05, "loss": 0.7152, "step": 14961 }, { "epoch": 1.0137543193983332, "grad_norm": 6.941154479980469, "learning_rate": 8.054486960093094e-05, "loss": 0.744, "step": 14962 }, { "epoch": 1.0138220746663054, "grad_norm": 5.957236289978027, "learning_rate": 8.054350058183312e-05, "loss": 0.5999, "step": 14963 }, { "epoch": 1.0138898299342773, "grad_norm": 5.381267547607422, "learning_rate": 8.05421315627353e-05, "loss": 0.8635, "step": 14964 }, { "epoch": 1.0139575852022495, "grad_norm": 5.371838569641113, "learning_rate": 8.054076254363748e-05, "loss": 0.6552, "step": 14965 }, { "epoch": 1.0140253404702215, "grad_norm": 5.056934356689453, "learning_rate": 8.053939352453968e-05, "loss": 0.5843, "step": 14966 }, { "epoch": 1.0140930957381937, "grad_norm": 5.23961877822876, "learning_rate": 8.053802450544186e-05, "loss": 0.8306, "step": 14967 }, { "epoch": 1.0141608510061657, "grad_norm": 6.583441734313965, "learning_rate": 8.053665548634404e-05, "loss": 0.686, "step": 14968 }, { "epoch": 1.014228606274138, "grad_norm": 7.94297981262207, "learning_rate": 8.053528646724622e-05, "loss": 0.7668, "step": 14969 }, { "epoch": 1.01429636154211, "grad_norm": 5.7727885246276855, "learning_rate": 8.053391744814841e-05, "loss": 0.7958, "step": 14970 }, { "epoch": 1.014364116810082, "grad_norm": 4.770255088806152, "learning_rate": 8.05325484290506e-05, "loss": 0.8039, "step": 14971 }, { "epoch": 1.014431872078054, "grad_norm": 6.65435266494751, "learning_rate": 8.053117940995277e-05, "loss": 0.7278, "step": 14972 }, { "epoch": 1.014499627346026, "grad_norm": 5.573418140411377, "learning_rate": 8.052981039085495e-05, "loss": 0.8147, "step": 14973 }, { "epoch": 1.0145673826139983, "grad_norm": 6.491858005523682, "learning_rate": 8.052844137175713e-05, "loss": 0.6853, "step": 14974 }, { "epoch": 1.0146351378819702, "grad_norm": 5.823346138000488, "learning_rate": 8.052707235265933e-05, "loss": 0.6468, "step": 14975 }, { "epoch": 1.0147028931499424, "grad_norm": 7.527129650115967, "learning_rate": 8.052570333356151e-05, "loss": 0.7114, "step": 14976 }, { "epoch": 1.0147706484179144, "grad_norm": 5.738404273986816, "learning_rate": 8.052433431446369e-05, "loss": 0.584, "step": 14977 }, { "epoch": 1.0148384036858866, "grad_norm": 5.364207744598389, "learning_rate": 8.052296529536587e-05, "loss": 0.7407, "step": 14978 }, { "epoch": 1.0149061589538586, "grad_norm": 5.664304256439209, "learning_rate": 8.052159627626806e-05, "loss": 0.5753, "step": 14979 }, { "epoch": 1.0149739142218308, "grad_norm": 8.1001558303833, "learning_rate": 8.052022725717024e-05, "loss": 0.6435, "step": 14980 }, { "epoch": 1.0150416694898028, "grad_norm": 6.447721481323242, "learning_rate": 8.051885823807242e-05, "loss": 0.7063, "step": 14981 }, { "epoch": 1.015109424757775, "grad_norm": 5.5510969161987305, "learning_rate": 8.05174892189746e-05, "loss": 0.5911, "step": 14982 }, { "epoch": 1.015177180025747, "grad_norm": 5.534966468811035, "learning_rate": 8.051612019987678e-05, "loss": 0.5952, "step": 14983 }, { "epoch": 1.0152449352937192, "grad_norm": 5.641912460327148, "learning_rate": 8.051475118077898e-05, "loss": 0.8201, "step": 14984 }, { "epoch": 1.0153126905616912, "grad_norm": 5.643457412719727, "learning_rate": 8.051338216168116e-05, "loss": 0.5825, "step": 14985 }, { "epoch": 1.0153804458296634, "grad_norm": 5.161952972412109, "learning_rate": 8.051201314258334e-05, "loss": 0.5534, "step": 14986 }, { "epoch": 1.0154482010976353, "grad_norm": 6.023798942565918, "learning_rate": 8.051064412348552e-05, "loss": 0.7081, "step": 14987 }, { "epoch": 1.0155159563656073, "grad_norm": 8.365532875061035, "learning_rate": 8.05092751043877e-05, "loss": 0.6779, "step": 14988 }, { "epoch": 1.0155837116335795, "grad_norm": 6.577813625335693, "learning_rate": 8.05079060852899e-05, "loss": 0.6511, "step": 14989 }, { "epoch": 1.0156514669015515, "grad_norm": 6.61271858215332, "learning_rate": 8.050653706619207e-05, "loss": 0.6625, "step": 14990 }, { "epoch": 1.0157192221695237, "grad_norm": 5.8450517654418945, "learning_rate": 8.050516804709425e-05, "loss": 0.6154, "step": 14991 }, { "epoch": 1.0157869774374957, "grad_norm": 6.10211181640625, "learning_rate": 8.050379902799644e-05, "loss": 0.7207, "step": 14992 }, { "epoch": 1.0158547327054679, "grad_norm": 6.336976528167725, "learning_rate": 8.050243000889863e-05, "loss": 0.7093, "step": 14993 }, { "epoch": 1.0159224879734399, "grad_norm": 5.03262996673584, "learning_rate": 8.050106098980081e-05, "loss": 0.5328, "step": 14994 }, { "epoch": 1.015990243241412, "grad_norm": 6.129831314086914, "learning_rate": 8.049969197070299e-05, "loss": 1.0295, "step": 14995 }, { "epoch": 1.016057998509384, "grad_norm": 5.919708728790283, "learning_rate": 8.049832295160517e-05, "loss": 0.8243, "step": 14996 }, { "epoch": 1.0161257537773563, "grad_norm": 11.239713668823242, "learning_rate": 8.049695393250736e-05, "loss": 0.6003, "step": 14997 }, { "epoch": 1.0161935090453282, "grad_norm": 4.68793249130249, "learning_rate": 8.049558491340954e-05, "loss": 0.64, "step": 14998 }, { "epoch": 1.0162612643133004, "grad_norm": 6.901069164276123, "learning_rate": 8.049421589431172e-05, "loss": 0.7363, "step": 14999 }, { "epoch": 1.0163290195812724, "grad_norm": 5.286013603210449, "learning_rate": 8.049284687521392e-05, "loss": 0.5766, "step": 15000 }, { "epoch": 1.0163967748492446, "grad_norm": 9.495491027832031, "learning_rate": 8.04914778561161e-05, "loss": 0.5364, "step": 15001 }, { "epoch": 1.0164645301172166, "grad_norm": 5.1024932861328125, "learning_rate": 8.049010883701828e-05, "loss": 0.7218, "step": 15002 }, { "epoch": 1.0165322853851888, "grad_norm": 6.650432586669922, "learning_rate": 8.048873981792047e-05, "loss": 0.8232, "step": 15003 }, { "epoch": 1.0166000406531608, "grad_norm": 6.3598313331604, "learning_rate": 8.048737079882265e-05, "loss": 0.5487, "step": 15004 }, { "epoch": 1.0166677959211328, "grad_norm": 7.906015872955322, "learning_rate": 8.048600177972483e-05, "loss": 0.7754, "step": 15005 }, { "epoch": 1.016735551189105, "grad_norm": 8.92300033569336, "learning_rate": 8.048463276062701e-05, "loss": 0.8194, "step": 15006 }, { "epoch": 1.016803306457077, "grad_norm": 5.348727703094482, "learning_rate": 8.048326374152921e-05, "loss": 0.7689, "step": 15007 }, { "epoch": 1.0168710617250492, "grad_norm": 4.177212715148926, "learning_rate": 8.048189472243139e-05, "loss": 0.5612, "step": 15008 }, { "epoch": 1.0169388169930211, "grad_norm": 5.465108871459961, "learning_rate": 8.048052570333357e-05, "loss": 0.7477, "step": 15009 }, { "epoch": 1.0170065722609933, "grad_norm": 6.203986644744873, "learning_rate": 8.047915668423575e-05, "loss": 0.6927, "step": 15010 }, { "epoch": 1.0170743275289653, "grad_norm": 5.149818420410156, "learning_rate": 8.047778766513793e-05, "loss": 0.6977, "step": 15011 }, { "epoch": 1.0171420827969375, "grad_norm": 8.325011253356934, "learning_rate": 8.047641864604012e-05, "loss": 0.6829, "step": 15012 }, { "epoch": 1.0172098380649095, "grad_norm": 5.652045726776123, "learning_rate": 8.04750496269423e-05, "loss": 0.7819, "step": 15013 }, { "epoch": 1.0172775933328817, "grad_norm": 5.549155235290527, "learning_rate": 8.047368060784448e-05, "loss": 0.6268, "step": 15014 }, { "epoch": 1.0173453486008537, "grad_norm": 6.955049991607666, "learning_rate": 8.047231158874666e-05, "loss": 0.8359, "step": 15015 }, { "epoch": 1.0174131038688259, "grad_norm": 5.063580513000488, "learning_rate": 8.047094256964886e-05, "loss": 0.6845, "step": 15016 }, { "epoch": 1.0174808591367979, "grad_norm": 4.619150638580322, "learning_rate": 8.046957355055104e-05, "loss": 0.5725, "step": 15017 }, { "epoch": 1.01754861440477, "grad_norm": 5.2023820877075195, "learning_rate": 8.046820453145322e-05, "loss": 0.8112, "step": 15018 }, { "epoch": 1.017616369672742, "grad_norm": 6.25310754776001, "learning_rate": 8.04668355123554e-05, "loss": 0.772, "step": 15019 }, { "epoch": 1.0176841249407143, "grad_norm": 6.057315349578857, "learning_rate": 8.046546649325758e-05, "loss": 0.726, "step": 15020 }, { "epoch": 1.0177518802086862, "grad_norm": 6.200272560119629, "learning_rate": 8.046409747415977e-05, "loss": 0.8038, "step": 15021 }, { "epoch": 1.0178196354766582, "grad_norm": 6.192685604095459, "learning_rate": 8.046272845506195e-05, "loss": 0.8221, "step": 15022 }, { "epoch": 1.0178873907446304, "grad_norm": 6.599914073944092, "learning_rate": 8.046135943596413e-05, "loss": 0.8947, "step": 15023 }, { "epoch": 1.0179551460126024, "grad_norm": 5.081368923187256, "learning_rate": 8.045999041686631e-05, "loss": 0.5658, "step": 15024 }, { "epoch": 1.0180229012805746, "grad_norm": 4.629168510437012, "learning_rate": 8.045862139776851e-05, "loss": 0.4918, "step": 15025 }, { "epoch": 1.0180906565485466, "grad_norm": 5.531469345092773, "learning_rate": 8.045725237867069e-05, "loss": 0.9338, "step": 15026 }, { "epoch": 1.0181584118165188, "grad_norm": 5.594359874725342, "learning_rate": 8.045588335957287e-05, "loss": 0.8965, "step": 15027 }, { "epoch": 1.0182261670844908, "grad_norm": 5.746640205383301, "learning_rate": 8.045451434047505e-05, "loss": 0.8001, "step": 15028 }, { "epoch": 1.018293922352463, "grad_norm": 6.410648822784424, "learning_rate": 8.045314532137723e-05, "loss": 0.6035, "step": 15029 }, { "epoch": 1.018361677620435, "grad_norm": 5.184842109680176, "learning_rate": 8.045177630227942e-05, "loss": 0.6867, "step": 15030 }, { "epoch": 1.0184294328884071, "grad_norm": 4.650674343109131, "learning_rate": 8.04504072831816e-05, "loss": 0.5872, "step": 15031 }, { "epoch": 1.0184971881563791, "grad_norm": 5.6024346351623535, "learning_rate": 8.044903826408378e-05, "loss": 0.7804, "step": 15032 }, { "epoch": 1.0185649434243513, "grad_norm": 4.8978047370910645, "learning_rate": 8.044766924498596e-05, "loss": 0.6067, "step": 15033 }, { "epoch": 1.0186326986923233, "grad_norm": 5.678053379058838, "learning_rate": 8.044630022588816e-05, "loss": 0.7362, "step": 15034 }, { "epoch": 1.0187004539602955, "grad_norm": 5.044686317443848, "learning_rate": 8.044493120679034e-05, "loss": 0.9308, "step": 15035 }, { "epoch": 1.0187682092282675, "grad_norm": 6.241668701171875, "learning_rate": 8.044356218769252e-05, "loss": 0.7732, "step": 15036 }, { "epoch": 1.0188359644962395, "grad_norm": 5.17287540435791, "learning_rate": 8.04421931685947e-05, "loss": 0.6823, "step": 15037 }, { "epoch": 1.0189037197642117, "grad_norm": 5.231973648071289, "learning_rate": 8.044082414949688e-05, "loss": 0.5864, "step": 15038 }, { "epoch": 1.0189714750321837, "grad_norm": 5.848363876342773, "learning_rate": 8.043945513039907e-05, "loss": 0.6496, "step": 15039 }, { "epoch": 1.0190392303001559, "grad_norm": 5.491754531860352, "learning_rate": 8.043808611130125e-05, "loss": 0.6945, "step": 15040 }, { "epoch": 1.0191069855681278, "grad_norm": 5.215052604675293, "learning_rate": 8.043671709220343e-05, "loss": 0.7234, "step": 15041 }, { "epoch": 1.0191747408361, "grad_norm": 4.157303810119629, "learning_rate": 8.043534807310561e-05, "loss": 0.5688, "step": 15042 }, { "epoch": 1.019242496104072, "grad_norm": 5.955345630645752, "learning_rate": 8.043397905400781e-05, "loss": 0.7193, "step": 15043 }, { "epoch": 1.0193102513720442, "grad_norm": 5.868755340576172, "learning_rate": 8.043261003490999e-05, "loss": 0.7041, "step": 15044 }, { "epoch": 1.0193780066400162, "grad_norm": 5.522411823272705, "learning_rate": 8.043124101581217e-05, "loss": 0.6228, "step": 15045 }, { "epoch": 1.0194457619079884, "grad_norm": 5.676867961883545, "learning_rate": 8.042987199671436e-05, "loss": 0.6802, "step": 15046 }, { "epoch": 1.0195135171759604, "grad_norm": 5.678533554077148, "learning_rate": 8.042850297761654e-05, "loss": 0.8044, "step": 15047 }, { "epoch": 1.0195812724439326, "grad_norm": 6.804957866668701, "learning_rate": 8.042713395851872e-05, "loss": 0.8231, "step": 15048 }, { "epoch": 1.0196490277119046, "grad_norm": 4.685696601867676, "learning_rate": 8.042576493942092e-05, "loss": 0.5464, "step": 15049 }, { "epoch": 1.0197167829798768, "grad_norm": 5.141258239746094, "learning_rate": 8.04243959203231e-05, "loss": 0.5144, "step": 15050 }, { "epoch": 1.0197845382478488, "grad_norm": 5.105460166931152, "learning_rate": 8.042302690122528e-05, "loss": 0.6002, "step": 15051 }, { "epoch": 1.019852293515821, "grad_norm": 6.820575714111328, "learning_rate": 8.042165788212746e-05, "loss": 0.5949, "step": 15052 }, { "epoch": 1.019920048783793, "grad_norm": 6.264902114868164, "learning_rate": 8.042028886302965e-05, "loss": 0.8404, "step": 15053 }, { "epoch": 1.019987804051765, "grad_norm": 6.1790385246276855, "learning_rate": 8.041891984393183e-05, "loss": 0.5916, "step": 15054 }, { "epoch": 1.0200555593197371, "grad_norm": 5.779554843902588, "learning_rate": 8.041755082483401e-05, "loss": 0.6559, "step": 15055 }, { "epoch": 1.020123314587709, "grad_norm": 5.888326168060303, "learning_rate": 8.04161818057362e-05, "loss": 0.5994, "step": 15056 }, { "epoch": 1.0201910698556813, "grad_norm": 5.410882949829102, "learning_rate": 8.041481278663839e-05, "loss": 0.7267, "step": 15057 }, { "epoch": 1.0202588251236533, "grad_norm": 5.042235374450684, "learning_rate": 8.041344376754057e-05, "loss": 0.543, "step": 15058 }, { "epoch": 1.0203265803916255, "grad_norm": 5.355964183807373, "learning_rate": 8.041207474844275e-05, "loss": 0.7188, "step": 15059 }, { "epoch": 1.0203943356595975, "grad_norm": 4.4993510246276855, "learning_rate": 8.041070572934493e-05, "loss": 0.5713, "step": 15060 }, { "epoch": 1.0204620909275697, "grad_norm": 6.155606746673584, "learning_rate": 8.040933671024711e-05, "loss": 0.7806, "step": 15061 }, { "epoch": 1.0205298461955417, "grad_norm": 6.213912010192871, "learning_rate": 8.04079676911493e-05, "loss": 0.662, "step": 15062 }, { "epoch": 1.0205976014635139, "grad_norm": 4.450127124786377, "learning_rate": 8.040659867205148e-05, "loss": 0.7145, "step": 15063 }, { "epoch": 1.0206653567314858, "grad_norm": 5.589287757873535, "learning_rate": 8.040522965295366e-05, "loss": 0.7071, "step": 15064 }, { "epoch": 1.020733111999458, "grad_norm": 6.013401985168457, "learning_rate": 8.040386063385584e-05, "loss": 0.5325, "step": 15065 }, { "epoch": 1.02080086726743, "grad_norm": 6.579004287719727, "learning_rate": 8.040249161475802e-05, "loss": 0.6771, "step": 15066 }, { "epoch": 1.0208686225354022, "grad_norm": 6.377874374389648, "learning_rate": 8.040112259566022e-05, "loss": 0.9557, "step": 15067 }, { "epoch": 1.0209363778033742, "grad_norm": 6.904638767242432, "learning_rate": 8.03997535765624e-05, "loss": 0.8783, "step": 15068 }, { "epoch": 1.0210041330713464, "grad_norm": 6.453176975250244, "learning_rate": 8.039838455746458e-05, "loss": 0.675, "step": 15069 }, { "epoch": 1.0210718883393184, "grad_norm": 4.14583158493042, "learning_rate": 8.039701553836676e-05, "loss": 0.6133, "step": 15070 }, { "epoch": 1.0211396436072904, "grad_norm": 9.864669799804688, "learning_rate": 8.039564651926895e-05, "loss": 0.515, "step": 15071 }, { "epoch": 1.0212073988752626, "grad_norm": 13.798471450805664, "learning_rate": 8.039427750017113e-05, "loss": 0.7913, "step": 15072 }, { "epoch": 1.0212751541432346, "grad_norm": 6.997447967529297, "learning_rate": 8.039290848107331e-05, "loss": 0.7798, "step": 15073 }, { "epoch": 1.0213429094112068, "grad_norm": 7.579859256744385, "learning_rate": 8.03915394619755e-05, "loss": 0.4848, "step": 15074 }, { "epoch": 1.0214106646791787, "grad_norm": 6.31156063079834, "learning_rate": 8.039017044287767e-05, "loss": 0.8056, "step": 15075 }, { "epoch": 1.021478419947151, "grad_norm": 10.114006042480469, "learning_rate": 8.038880142377987e-05, "loss": 0.7082, "step": 15076 }, { "epoch": 1.021546175215123, "grad_norm": 4.31792688369751, "learning_rate": 8.038743240468205e-05, "loss": 0.4963, "step": 15077 }, { "epoch": 1.0216139304830951, "grad_norm": 8.745311737060547, "learning_rate": 8.038606338558423e-05, "loss": 0.6986, "step": 15078 }, { "epoch": 1.021681685751067, "grad_norm": 6.955810070037842, "learning_rate": 8.038469436648641e-05, "loss": 0.6558, "step": 15079 }, { "epoch": 1.0217494410190393, "grad_norm": 5.911043167114258, "learning_rate": 8.03833253473886e-05, "loss": 0.8858, "step": 15080 }, { "epoch": 1.0218171962870113, "grad_norm": 5.10263204574585, "learning_rate": 8.038195632829078e-05, "loss": 0.5157, "step": 15081 }, { "epoch": 1.0218849515549835, "grad_norm": 5.808590888977051, "learning_rate": 8.038058730919296e-05, "loss": 0.7309, "step": 15082 }, { "epoch": 1.0219527068229555, "grad_norm": 5.918943405151367, "learning_rate": 8.037921829009514e-05, "loss": 0.6521, "step": 15083 }, { "epoch": 1.0220204620909277, "grad_norm": 10.792163848876953, "learning_rate": 8.037784927099732e-05, "loss": 0.8682, "step": 15084 }, { "epoch": 1.0220882173588997, "grad_norm": 6.594424724578857, "learning_rate": 8.037648025189952e-05, "loss": 0.567, "step": 15085 }, { "epoch": 1.0221559726268716, "grad_norm": 7.634060382843018, "learning_rate": 8.03751112328017e-05, "loss": 0.6299, "step": 15086 }, { "epoch": 1.0222237278948438, "grad_norm": 13.075284004211426, "learning_rate": 8.037374221370388e-05, "loss": 0.6774, "step": 15087 }, { "epoch": 1.0222914831628158, "grad_norm": 5.656576156616211, "learning_rate": 8.037237319460606e-05, "loss": 0.7564, "step": 15088 }, { "epoch": 1.022359238430788, "grad_norm": 4.356131553649902, "learning_rate": 8.037100417550825e-05, "loss": 0.6635, "step": 15089 }, { "epoch": 1.02242699369876, "grad_norm": 8.44076919555664, "learning_rate": 8.036963515641043e-05, "loss": 0.8179, "step": 15090 }, { "epoch": 1.0224947489667322, "grad_norm": 6.754337310791016, "learning_rate": 8.036826613731261e-05, "loss": 0.6055, "step": 15091 }, { "epoch": 1.0225625042347042, "grad_norm": 4.849721908569336, "learning_rate": 8.036689711821481e-05, "loss": 0.5342, "step": 15092 }, { "epoch": 1.0226302595026764, "grad_norm": 6.975347518920898, "learning_rate": 8.036552809911699e-05, "loss": 0.7692, "step": 15093 }, { "epoch": 1.0226980147706484, "grad_norm": 5.742364406585693, "learning_rate": 8.036415908001917e-05, "loss": 0.605, "step": 15094 }, { "epoch": 1.0227657700386206, "grad_norm": 7.3798909187316895, "learning_rate": 8.036279006092136e-05, "loss": 0.7643, "step": 15095 }, { "epoch": 1.0228335253065926, "grad_norm": 6.8255510330200195, "learning_rate": 8.036142104182354e-05, "loss": 0.6628, "step": 15096 }, { "epoch": 1.0229012805745648, "grad_norm": 6.594735145568848, "learning_rate": 8.036005202272572e-05, "loss": 0.7144, "step": 15097 }, { "epoch": 1.0229690358425367, "grad_norm": 5.225235462188721, "learning_rate": 8.03586830036279e-05, "loss": 0.8352, "step": 15098 }, { "epoch": 1.023036791110509, "grad_norm": 6.080996036529541, "learning_rate": 8.03573139845301e-05, "loss": 0.8509, "step": 15099 }, { "epoch": 1.023104546378481, "grad_norm": 9.194795608520508, "learning_rate": 8.035594496543228e-05, "loss": 0.7135, "step": 15100 }, { "epoch": 1.0231723016464531, "grad_norm": 5.869052410125732, "learning_rate": 8.035457594633446e-05, "loss": 0.5557, "step": 15101 }, { "epoch": 1.023240056914425, "grad_norm": 5.882430076599121, "learning_rate": 8.035320692723664e-05, "loss": 0.6686, "step": 15102 }, { "epoch": 1.023307812182397, "grad_norm": 5.315283298492432, "learning_rate": 8.035183790813883e-05, "loss": 0.5841, "step": 15103 }, { "epoch": 1.0233755674503693, "grad_norm": 4.697854042053223, "learning_rate": 8.035046888904101e-05, "loss": 0.6619, "step": 15104 }, { "epoch": 1.0234433227183413, "grad_norm": 5.584891319274902, "learning_rate": 8.034909986994319e-05, "loss": 0.7641, "step": 15105 }, { "epoch": 1.0235110779863135, "grad_norm": 6.978733539581299, "learning_rate": 8.034773085084537e-05, "loss": 0.7094, "step": 15106 }, { "epoch": 1.0235788332542854, "grad_norm": 8.549322128295898, "learning_rate": 8.034636183174755e-05, "loss": 0.6725, "step": 15107 }, { "epoch": 1.0236465885222576, "grad_norm": 5.663527488708496, "learning_rate": 8.034499281264975e-05, "loss": 0.609, "step": 15108 }, { "epoch": 1.0237143437902296, "grad_norm": 5.843603610992432, "learning_rate": 8.034362379355193e-05, "loss": 0.7198, "step": 15109 }, { "epoch": 1.0237820990582018, "grad_norm": 6.912360668182373, "learning_rate": 8.034225477445411e-05, "loss": 0.728, "step": 15110 }, { "epoch": 1.0238498543261738, "grad_norm": 4.6928205490112305, "learning_rate": 8.034088575535629e-05, "loss": 0.8542, "step": 15111 }, { "epoch": 1.023917609594146, "grad_norm": 5.714449405670166, "learning_rate": 8.033951673625848e-05, "loss": 0.8885, "step": 15112 }, { "epoch": 1.023985364862118, "grad_norm": 9.74325180053711, "learning_rate": 8.033814771716066e-05, "loss": 0.7175, "step": 15113 }, { "epoch": 1.0240531201300902, "grad_norm": 6.803297519683838, "learning_rate": 8.033677869806284e-05, "loss": 0.8037, "step": 15114 }, { "epoch": 1.0241208753980622, "grad_norm": 5.692788600921631, "learning_rate": 8.033540967896502e-05, "loss": 0.6073, "step": 15115 }, { "epoch": 1.0241886306660344, "grad_norm": 4.766846179962158, "learning_rate": 8.03340406598672e-05, "loss": 0.687, "step": 15116 }, { "epoch": 1.0242563859340064, "grad_norm": 7.894632816314697, "learning_rate": 8.03326716407694e-05, "loss": 0.6866, "step": 15117 }, { "epoch": 1.0243241412019786, "grad_norm": 8.924421310424805, "learning_rate": 8.033130262167158e-05, "loss": 0.6033, "step": 15118 }, { "epoch": 1.0243918964699505, "grad_norm": 4.983919620513916, "learning_rate": 8.032993360257376e-05, "loss": 0.7635, "step": 15119 }, { "epoch": 1.0244596517379225, "grad_norm": 6.151333808898926, "learning_rate": 8.032856458347594e-05, "loss": 0.5241, "step": 15120 }, { "epoch": 1.0245274070058947, "grad_norm": 5.959475517272949, "learning_rate": 8.032719556437812e-05, "loss": 0.801, "step": 15121 }, { "epoch": 1.0245951622738667, "grad_norm": 6.7095255851745605, "learning_rate": 8.032582654528031e-05, "loss": 0.5638, "step": 15122 }, { "epoch": 1.024662917541839, "grad_norm": 6.7581939697265625, "learning_rate": 8.03244575261825e-05, "loss": 0.7744, "step": 15123 }, { "epoch": 1.024730672809811, "grad_norm": 4.929287433624268, "learning_rate": 8.032308850708467e-05, "loss": 0.5911, "step": 15124 }, { "epoch": 1.024798428077783, "grad_norm": 7.830074310302734, "learning_rate": 8.032171948798685e-05, "loss": 0.5202, "step": 15125 }, { "epoch": 1.024866183345755, "grad_norm": 5.351446628570557, "learning_rate": 8.032035046888905e-05, "loss": 0.5243, "step": 15126 }, { "epoch": 1.0249339386137273, "grad_norm": 6.004148960113525, "learning_rate": 8.031898144979123e-05, "loss": 0.7672, "step": 15127 }, { "epoch": 1.0250016938816993, "grad_norm": 5.840296745300293, "learning_rate": 8.031761243069341e-05, "loss": 0.6149, "step": 15128 }, { "epoch": 1.0250694491496715, "grad_norm": 5.449799537658691, "learning_rate": 8.031624341159559e-05, "loss": 0.9104, "step": 15129 }, { "epoch": 1.0251372044176434, "grad_norm": 4.739322662353516, "learning_rate": 8.031487439249777e-05, "loss": 0.586, "step": 15130 }, { "epoch": 1.0252049596856156, "grad_norm": 6.538787841796875, "learning_rate": 8.031350537339996e-05, "loss": 0.8415, "step": 15131 }, { "epoch": 1.0252727149535876, "grad_norm": 6.406269550323486, "learning_rate": 8.031213635430214e-05, "loss": 0.8221, "step": 15132 }, { "epoch": 1.0253404702215598, "grad_norm": 5.359891891479492, "learning_rate": 8.031076733520432e-05, "loss": 0.7246, "step": 15133 }, { "epoch": 1.0254082254895318, "grad_norm": 6.871723175048828, "learning_rate": 8.03093983161065e-05, "loss": 0.8127, "step": 15134 }, { "epoch": 1.0254759807575038, "grad_norm": 7.51058292388916, "learning_rate": 8.03080292970087e-05, "loss": 0.8611, "step": 15135 }, { "epoch": 1.025543736025476, "grad_norm": 6.180769920349121, "learning_rate": 8.030666027791088e-05, "loss": 0.6036, "step": 15136 }, { "epoch": 1.025611491293448, "grad_norm": 4.803264141082764, "learning_rate": 8.030529125881306e-05, "loss": 0.6907, "step": 15137 }, { "epoch": 1.0256792465614202, "grad_norm": 4.692986965179443, "learning_rate": 8.030392223971524e-05, "loss": 0.5427, "step": 15138 }, { "epoch": 1.0257470018293922, "grad_norm": 4.468636512756348, "learning_rate": 8.030255322061743e-05, "loss": 0.5688, "step": 15139 }, { "epoch": 1.0258147570973644, "grad_norm": 5.465978622436523, "learning_rate": 8.030118420151961e-05, "loss": 0.6728, "step": 15140 }, { "epoch": 1.0258825123653363, "grad_norm": 7.363076686859131, "learning_rate": 8.02998151824218e-05, "loss": 0.608, "step": 15141 }, { "epoch": 1.0259502676333085, "grad_norm": 6.24710750579834, "learning_rate": 8.029844616332399e-05, "loss": 0.7095, "step": 15142 }, { "epoch": 1.0260180229012805, "grad_norm": 5.708677291870117, "learning_rate": 8.029707714422617e-05, "loss": 0.7028, "step": 15143 }, { "epoch": 1.0260857781692527, "grad_norm": 6.651005744934082, "learning_rate": 8.029570812512835e-05, "loss": 0.8753, "step": 15144 }, { "epoch": 1.0261535334372247, "grad_norm": 7.2024688720703125, "learning_rate": 8.029433910603054e-05, "loss": 0.6307, "step": 15145 }, { "epoch": 1.026221288705197, "grad_norm": 8.108603477478027, "learning_rate": 8.029297008693272e-05, "loss": 0.7454, "step": 15146 }, { "epoch": 1.026289043973169, "grad_norm": 8.093022346496582, "learning_rate": 8.02916010678349e-05, "loss": 0.6492, "step": 15147 }, { "epoch": 1.026356799241141, "grad_norm": 6.092846393585205, "learning_rate": 8.029023204873708e-05, "loss": 0.8324, "step": 15148 }, { "epoch": 1.026424554509113, "grad_norm": 7.56104040145874, "learning_rate": 8.028886302963928e-05, "loss": 0.7282, "step": 15149 }, { "epoch": 1.0264923097770853, "grad_norm": 5.667774677276611, "learning_rate": 8.028749401054146e-05, "loss": 0.9169, "step": 15150 }, { "epoch": 1.0265600650450573, "grad_norm": 6.644709587097168, "learning_rate": 8.028612499144364e-05, "loss": 0.6861, "step": 15151 }, { "epoch": 1.0266278203130292, "grad_norm": 5.4562578201293945, "learning_rate": 8.028475597234582e-05, "loss": 0.8854, "step": 15152 }, { "epoch": 1.0266955755810014, "grad_norm": 4.384959697723389, "learning_rate": 8.0283386953248e-05, "loss": 0.6736, "step": 15153 }, { "epoch": 1.0267633308489734, "grad_norm": 5.349459648132324, "learning_rate": 8.028201793415019e-05, "loss": 0.704, "step": 15154 }, { "epoch": 1.0268310861169456, "grad_norm": 4.929141521453857, "learning_rate": 8.028064891505237e-05, "loss": 0.6705, "step": 15155 }, { "epoch": 1.0268988413849176, "grad_norm": 5.738053798675537, "learning_rate": 8.027927989595455e-05, "loss": 0.7195, "step": 15156 }, { "epoch": 1.0269665966528898, "grad_norm": 5.528050899505615, "learning_rate": 8.027791087685673e-05, "loss": 0.707, "step": 15157 }, { "epoch": 1.0270343519208618, "grad_norm": 5.940127372741699, "learning_rate": 8.027654185775893e-05, "loss": 0.7448, "step": 15158 }, { "epoch": 1.027102107188834, "grad_norm": 5.716156482696533, "learning_rate": 8.027517283866111e-05, "loss": 0.5385, "step": 15159 }, { "epoch": 1.027169862456806, "grad_norm": 6.133981227874756, "learning_rate": 8.027380381956329e-05, "loss": 0.459, "step": 15160 }, { "epoch": 1.0272376177247782, "grad_norm": 4.139076232910156, "learning_rate": 8.027243480046547e-05, "loss": 0.5117, "step": 15161 }, { "epoch": 1.0273053729927502, "grad_norm": 5.8663482666015625, "learning_rate": 8.027106578136765e-05, "loss": 0.6677, "step": 15162 }, { "epoch": 1.0273731282607224, "grad_norm": 6.346996784210205, "learning_rate": 8.026969676226984e-05, "loss": 0.794, "step": 15163 }, { "epoch": 1.0274408835286943, "grad_norm": 6.667228698730469, "learning_rate": 8.026832774317202e-05, "loss": 0.6352, "step": 15164 }, { "epoch": 1.0275086387966665, "grad_norm": 4.740386962890625, "learning_rate": 8.02669587240742e-05, "loss": 0.5717, "step": 15165 }, { "epoch": 1.0275763940646385, "grad_norm": 7.014326095581055, "learning_rate": 8.026558970497638e-05, "loss": 0.831, "step": 15166 }, { "epoch": 1.0276441493326105, "grad_norm": 5.337719440460205, "learning_rate": 8.026422068587858e-05, "loss": 0.7297, "step": 15167 }, { "epoch": 1.0277119046005827, "grad_norm": 5.5287957191467285, "learning_rate": 8.026285166678076e-05, "loss": 0.8606, "step": 15168 }, { "epoch": 1.0277796598685547, "grad_norm": 3.9618964195251465, "learning_rate": 8.026148264768294e-05, "loss": 0.4601, "step": 15169 }, { "epoch": 1.0278474151365269, "grad_norm": 6.047200679779053, "learning_rate": 8.026011362858512e-05, "loss": 0.7246, "step": 15170 }, { "epoch": 1.0279151704044989, "grad_norm": 6.743053913116455, "learning_rate": 8.02587446094873e-05, "loss": 0.7362, "step": 15171 }, { "epoch": 1.027982925672471, "grad_norm": 6.415504455566406, "learning_rate": 8.025737559038949e-05, "loss": 0.8653, "step": 15172 }, { "epoch": 1.028050680940443, "grad_norm": 5.755765438079834, "learning_rate": 8.025600657129167e-05, "loss": 0.7041, "step": 15173 }, { "epoch": 1.0281184362084153, "grad_norm": 6.556794166564941, "learning_rate": 8.025463755219385e-05, "loss": 0.8511, "step": 15174 }, { "epoch": 1.0281861914763872, "grad_norm": 5.8607025146484375, "learning_rate": 8.025326853309603e-05, "loss": 0.6352, "step": 15175 }, { "epoch": 1.0282539467443594, "grad_norm": 5.547766208648682, "learning_rate": 8.025189951399821e-05, "loss": 0.7237, "step": 15176 }, { "epoch": 1.0283217020123314, "grad_norm": 6.3242363929748535, "learning_rate": 8.025053049490041e-05, "loss": 0.7579, "step": 15177 }, { "epoch": 1.0283894572803036, "grad_norm": 5.0050764083862305, "learning_rate": 8.024916147580259e-05, "loss": 0.765, "step": 15178 }, { "epoch": 1.0284572125482756, "grad_norm": 6.55527400970459, "learning_rate": 8.024779245670477e-05, "loss": 0.7241, "step": 15179 }, { "epoch": 1.0285249678162478, "grad_norm": 6.457221984863281, "learning_rate": 8.024642343760695e-05, "loss": 0.6609, "step": 15180 }, { "epoch": 1.0285927230842198, "grad_norm": 6.385996341705322, "learning_rate": 8.024505441850914e-05, "loss": 0.8659, "step": 15181 }, { "epoch": 1.028660478352192, "grad_norm": 6.647210121154785, "learning_rate": 8.024368539941132e-05, "loss": 0.7662, "step": 15182 }, { "epoch": 1.028728233620164, "grad_norm": 5.438072204589844, "learning_rate": 8.02423163803135e-05, "loss": 0.5482, "step": 15183 }, { "epoch": 1.028795988888136, "grad_norm": 7.4278998374938965, "learning_rate": 8.024094736121568e-05, "loss": 0.8363, "step": 15184 }, { "epoch": 1.0288637441561082, "grad_norm": 5.1344451904296875, "learning_rate": 8.023957834211788e-05, "loss": 0.7989, "step": 15185 }, { "epoch": 1.0289314994240801, "grad_norm": 4.938210487365723, "learning_rate": 8.023820932302006e-05, "loss": 0.6469, "step": 15186 }, { "epoch": 1.0289992546920523, "grad_norm": 5.672119617462158, "learning_rate": 8.023684030392224e-05, "loss": 0.9169, "step": 15187 }, { "epoch": 1.0290670099600243, "grad_norm": 6.35474157333374, "learning_rate": 8.023547128482443e-05, "loss": 0.6798, "step": 15188 }, { "epoch": 1.0291347652279965, "grad_norm": 10.651616096496582, "learning_rate": 8.023410226572661e-05, "loss": 0.8043, "step": 15189 }, { "epoch": 1.0292025204959685, "grad_norm": 5.502874851226807, "learning_rate": 8.023273324662879e-05, "loss": 0.8463, "step": 15190 }, { "epoch": 1.0292702757639407, "grad_norm": 4.313845634460449, "learning_rate": 8.023136422753099e-05, "loss": 0.6796, "step": 15191 }, { "epoch": 1.0293380310319127, "grad_norm": 6.374643802642822, "learning_rate": 8.022999520843317e-05, "loss": 0.7316, "step": 15192 }, { "epoch": 1.0294057862998849, "grad_norm": 6.878230094909668, "learning_rate": 8.022862618933535e-05, "loss": 0.9329, "step": 15193 }, { "epoch": 1.0294735415678569, "grad_norm": 5.371943950653076, "learning_rate": 8.022725717023753e-05, "loss": 0.5674, "step": 15194 }, { "epoch": 1.029541296835829, "grad_norm": 5.623109340667725, "learning_rate": 8.022588815113972e-05, "loss": 0.7014, "step": 15195 }, { "epoch": 1.029609052103801, "grad_norm": 6.517233848571777, "learning_rate": 8.02245191320419e-05, "loss": 0.8067, "step": 15196 }, { "epoch": 1.0296768073717733, "grad_norm": 6.380861759185791, "learning_rate": 8.022315011294408e-05, "loss": 0.7762, "step": 15197 }, { "epoch": 1.0297445626397452, "grad_norm": 5.307709693908691, "learning_rate": 8.022178109384626e-05, "loss": 0.8181, "step": 15198 }, { "epoch": 1.0298123179077174, "grad_norm": 5.426553249359131, "learning_rate": 8.022041207474844e-05, "loss": 0.8538, "step": 15199 }, { "epoch": 1.0298800731756894, "grad_norm": 5.571786880493164, "learning_rate": 8.021904305565064e-05, "loss": 0.6279, "step": 15200 }, { "epoch": 1.0299478284436614, "grad_norm": 5.296565055847168, "learning_rate": 8.021767403655282e-05, "loss": 0.7286, "step": 15201 }, { "epoch": 1.0300155837116336, "grad_norm": 5.196515083312988, "learning_rate": 8.0216305017455e-05, "loss": 0.5467, "step": 15202 }, { "epoch": 1.0300833389796056, "grad_norm": 5.717817783355713, "learning_rate": 8.021493599835718e-05, "loss": 0.5466, "step": 15203 }, { "epoch": 1.0301510942475778, "grad_norm": 4.812506675720215, "learning_rate": 8.021356697925937e-05, "loss": 0.4824, "step": 15204 }, { "epoch": 1.0302188495155498, "grad_norm": 5.952337741851807, "learning_rate": 8.021219796016155e-05, "loss": 0.6964, "step": 15205 }, { "epoch": 1.030286604783522, "grad_norm": 6.307968616485596, "learning_rate": 8.021082894106373e-05, "loss": 0.804, "step": 15206 }, { "epoch": 1.030354360051494, "grad_norm": 6.3860626220703125, "learning_rate": 8.020945992196591e-05, "loss": 0.5327, "step": 15207 }, { "epoch": 1.0304221153194661, "grad_norm": 6.184783935546875, "learning_rate": 8.02080909028681e-05, "loss": 0.754, "step": 15208 }, { "epoch": 1.0304898705874381, "grad_norm": 5.628595352172852, "learning_rate": 8.020672188377029e-05, "loss": 0.6584, "step": 15209 }, { "epoch": 1.0305576258554103, "grad_norm": 6.341923713684082, "learning_rate": 8.020535286467247e-05, "loss": 0.7252, "step": 15210 }, { "epoch": 1.0306253811233823, "grad_norm": 7.7430853843688965, "learning_rate": 8.020398384557465e-05, "loss": 0.7014, "step": 15211 }, { "epoch": 1.0306931363913545, "grad_norm": 5.762584686279297, "learning_rate": 8.020261482647683e-05, "loss": 0.7568, "step": 15212 }, { "epoch": 1.0307608916593265, "grad_norm": 6.523187637329102, "learning_rate": 8.020124580737902e-05, "loss": 0.7841, "step": 15213 }, { "epoch": 1.0308286469272987, "grad_norm": 6.489431858062744, "learning_rate": 8.01998767882812e-05, "loss": 0.6339, "step": 15214 }, { "epoch": 1.0308964021952707, "grad_norm": 6.271862983703613, "learning_rate": 8.019850776918338e-05, "loss": 0.85, "step": 15215 }, { "epoch": 1.0309641574632427, "grad_norm": 5.647708415985107, "learning_rate": 8.019713875008556e-05, "loss": 0.7023, "step": 15216 }, { "epoch": 1.0310319127312149, "grad_norm": 4.607950210571289, "learning_rate": 8.019576973098774e-05, "loss": 0.7162, "step": 15217 }, { "epoch": 1.0310996679991868, "grad_norm": 6.540196895599365, "learning_rate": 8.019440071188994e-05, "loss": 0.7398, "step": 15218 }, { "epoch": 1.031167423267159, "grad_norm": 6.469182014465332, "learning_rate": 8.019303169279212e-05, "loss": 0.6353, "step": 15219 }, { "epoch": 1.031235178535131, "grad_norm": 6.716250896453857, "learning_rate": 8.01916626736943e-05, "loss": 0.8495, "step": 15220 }, { "epoch": 1.0313029338031032, "grad_norm": 5.285309791564941, "learning_rate": 8.019029365459648e-05, "loss": 0.5978, "step": 15221 }, { "epoch": 1.0313706890710752, "grad_norm": 5.688589572906494, "learning_rate": 8.018892463549867e-05, "loss": 0.8367, "step": 15222 }, { "epoch": 1.0314384443390474, "grad_norm": 6.630375385284424, "learning_rate": 8.018755561640085e-05, "loss": 0.772, "step": 15223 }, { "epoch": 1.0315061996070194, "grad_norm": 5.191856861114502, "learning_rate": 8.018618659730303e-05, "loss": 0.6048, "step": 15224 }, { "epoch": 1.0315739548749916, "grad_norm": 5.3768839836120605, "learning_rate": 8.018481757820521e-05, "loss": 0.5963, "step": 15225 }, { "epoch": 1.0316417101429636, "grad_norm": 6.222872734069824, "learning_rate": 8.01834485591074e-05, "loss": 0.6568, "step": 15226 }, { "epoch": 1.0317094654109358, "grad_norm": 4.519933700561523, "learning_rate": 8.018207954000959e-05, "loss": 0.5297, "step": 15227 }, { "epoch": 1.0317772206789078, "grad_norm": 5.497208595275879, "learning_rate": 8.018071052091177e-05, "loss": 0.723, "step": 15228 }, { "epoch": 1.03184497594688, "grad_norm": 5.850059509277344, "learning_rate": 8.017934150181395e-05, "loss": 0.5526, "step": 15229 }, { "epoch": 1.031912731214852, "grad_norm": 5.479531288146973, "learning_rate": 8.017797248271613e-05, "loss": 0.6473, "step": 15230 }, { "epoch": 1.0319804864828241, "grad_norm": 6.393281936645508, "learning_rate": 8.017660346361832e-05, "loss": 0.8833, "step": 15231 }, { "epoch": 1.0320482417507961, "grad_norm": 5.936949253082275, "learning_rate": 8.01752344445205e-05, "loss": 0.96, "step": 15232 }, { "epoch": 1.032115997018768, "grad_norm": 6.389046669006348, "learning_rate": 8.017386542542268e-05, "loss": 0.8469, "step": 15233 }, { "epoch": 1.0321837522867403, "grad_norm": 5.648378849029541, "learning_rate": 8.017249640632488e-05, "loss": 0.5647, "step": 15234 }, { "epoch": 1.0322515075547123, "grad_norm": 6.237347602844238, "learning_rate": 8.017112738722706e-05, "loss": 0.6116, "step": 15235 }, { "epoch": 1.0323192628226845, "grad_norm": 6.752171993255615, "learning_rate": 8.016975836812924e-05, "loss": 0.7117, "step": 15236 }, { "epoch": 1.0323870180906565, "grad_norm": 7.579265594482422, "learning_rate": 8.016838934903143e-05, "loss": 0.7308, "step": 15237 }, { "epoch": 1.0324547733586287, "grad_norm": 5.256562232971191, "learning_rate": 8.016702032993361e-05, "loss": 0.6299, "step": 15238 }, { "epoch": 1.0325225286266007, "grad_norm": 4.109334468841553, "learning_rate": 8.016565131083579e-05, "loss": 0.3859, "step": 15239 }, { "epoch": 1.0325902838945729, "grad_norm": 4.160473823547363, "learning_rate": 8.016428229173797e-05, "loss": 0.5872, "step": 15240 }, { "epoch": 1.0326580391625448, "grad_norm": 5.3488898277282715, "learning_rate": 8.016291327264017e-05, "loss": 0.6311, "step": 15241 }, { "epoch": 1.032725794430517, "grad_norm": 10.214828491210938, "learning_rate": 8.016154425354235e-05, "loss": 0.8249, "step": 15242 }, { "epoch": 1.032793549698489, "grad_norm": 6.101986885070801, "learning_rate": 8.016017523444453e-05, "loss": 0.7061, "step": 15243 }, { "epoch": 1.0328613049664612, "grad_norm": 4.178615570068359, "learning_rate": 8.015880621534671e-05, "loss": 0.5615, "step": 15244 }, { "epoch": 1.0329290602344332, "grad_norm": 5.862407207489014, "learning_rate": 8.01574371962489e-05, "loss": 0.6751, "step": 15245 }, { "epoch": 1.0329968155024054, "grad_norm": 9.34377670288086, "learning_rate": 8.015606817715108e-05, "loss": 0.9418, "step": 15246 }, { "epoch": 1.0330645707703774, "grad_norm": 5.592286109924316, "learning_rate": 8.015469915805326e-05, "loss": 0.8633, "step": 15247 }, { "epoch": 1.0331323260383494, "grad_norm": 5.773770332336426, "learning_rate": 8.015333013895544e-05, "loss": 0.6302, "step": 15248 }, { "epoch": 1.0332000813063216, "grad_norm": 5.281434535980225, "learning_rate": 8.015196111985762e-05, "loss": 0.6259, "step": 15249 }, { "epoch": 1.0332678365742936, "grad_norm": 5.689375877380371, "learning_rate": 8.015059210075982e-05, "loss": 0.6735, "step": 15250 }, { "epoch": 1.0333355918422658, "grad_norm": 6.111639976501465, "learning_rate": 8.0149223081662e-05, "loss": 0.8211, "step": 15251 }, { "epoch": 1.0334033471102377, "grad_norm": 5.1067328453063965, "learning_rate": 8.014785406256418e-05, "loss": 0.5156, "step": 15252 }, { "epoch": 1.03347110237821, "grad_norm": 6.3971638679504395, "learning_rate": 8.014648504346636e-05, "loss": 0.7773, "step": 15253 }, { "epoch": 1.033538857646182, "grad_norm": 7.105903148651123, "learning_rate": 8.014511602436854e-05, "loss": 0.8546, "step": 15254 }, { "epoch": 1.0336066129141541, "grad_norm": 5.161197185516357, "learning_rate": 8.014374700527073e-05, "loss": 0.7245, "step": 15255 }, { "epoch": 1.033674368182126, "grad_norm": 5.202775001525879, "learning_rate": 8.014237798617291e-05, "loss": 0.7735, "step": 15256 }, { "epoch": 1.0337421234500983, "grad_norm": 5.7077131271362305, "learning_rate": 8.014100896707509e-05, "loss": 0.6378, "step": 15257 }, { "epoch": 1.0338098787180703, "grad_norm": 6.216683864593506, "learning_rate": 8.013963994797727e-05, "loss": 0.8665, "step": 15258 }, { "epoch": 1.0338776339860425, "grad_norm": 6.291334629058838, "learning_rate": 8.013827092887947e-05, "loss": 0.6189, "step": 15259 }, { "epoch": 1.0339453892540145, "grad_norm": 6.946064472198486, "learning_rate": 8.013690190978165e-05, "loss": 0.7349, "step": 15260 }, { "epoch": 1.0340131445219867, "grad_norm": 5.636563301086426, "learning_rate": 8.013553289068383e-05, "loss": 0.5853, "step": 15261 }, { "epoch": 1.0340808997899587, "grad_norm": 6.100147724151611, "learning_rate": 8.013416387158601e-05, "loss": 0.8702, "step": 15262 }, { "epoch": 1.0341486550579309, "grad_norm": 6.185873985290527, "learning_rate": 8.013279485248819e-05, "loss": 0.6916, "step": 15263 }, { "epoch": 1.0342164103259028, "grad_norm": 7.08696174621582, "learning_rate": 8.013142583339038e-05, "loss": 0.6231, "step": 15264 }, { "epoch": 1.034284165593875, "grad_norm": 5.164997100830078, "learning_rate": 8.013005681429256e-05, "loss": 0.5656, "step": 15265 }, { "epoch": 1.034351920861847, "grad_norm": 5.154972553253174, "learning_rate": 8.012868779519474e-05, "loss": 0.5931, "step": 15266 }, { "epoch": 1.034419676129819, "grad_norm": 5.627068996429443, "learning_rate": 8.012731877609692e-05, "loss": 0.7211, "step": 15267 }, { "epoch": 1.0344874313977912, "grad_norm": 4.955535888671875, "learning_rate": 8.012594975699912e-05, "loss": 0.5718, "step": 15268 }, { "epoch": 1.0345551866657632, "grad_norm": 5.209494113922119, "learning_rate": 8.01245807379013e-05, "loss": 0.5789, "step": 15269 }, { "epoch": 1.0346229419337354, "grad_norm": 6.1501312255859375, "learning_rate": 8.012321171880348e-05, "loss": 0.7794, "step": 15270 }, { "epoch": 1.0346906972017074, "grad_norm": 5.943906307220459, "learning_rate": 8.012184269970566e-05, "loss": 0.6906, "step": 15271 }, { "epoch": 1.0347584524696796, "grad_norm": 6.266148567199707, "learning_rate": 8.012047368060784e-05, "loss": 0.6289, "step": 15272 }, { "epoch": 1.0348262077376515, "grad_norm": 5.710838794708252, "learning_rate": 8.011910466151003e-05, "loss": 0.8653, "step": 15273 }, { "epoch": 1.0348939630056238, "grad_norm": 4.5793585777282715, "learning_rate": 8.011773564241221e-05, "loss": 0.6731, "step": 15274 }, { "epoch": 1.0349617182735957, "grad_norm": 5.686688423156738, "learning_rate": 8.011636662331439e-05, "loss": 0.6466, "step": 15275 }, { "epoch": 1.035029473541568, "grad_norm": 6.409125804901123, "learning_rate": 8.011499760421657e-05, "loss": 0.8089, "step": 15276 }, { "epoch": 1.03509722880954, "grad_norm": 9.235058784484863, "learning_rate": 8.011362858511877e-05, "loss": 0.6794, "step": 15277 }, { "epoch": 1.0351649840775121, "grad_norm": 5.486425876617432, "learning_rate": 8.011225956602095e-05, "loss": 0.6879, "step": 15278 }, { "epoch": 1.035232739345484, "grad_norm": 5.220149040222168, "learning_rate": 8.011089054692313e-05, "loss": 0.5114, "step": 15279 }, { "epoch": 1.0353004946134563, "grad_norm": 4.368312835693359, "learning_rate": 8.010952152782532e-05, "loss": 0.455, "step": 15280 }, { "epoch": 1.0353682498814283, "grad_norm": 7.039102554321289, "learning_rate": 8.01081525087275e-05, "loss": 0.9742, "step": 15281 }, { "epoch": 1.0354360051494003, "grad_norm": 5.032625675201416, "learning_rate": 8.010678348962968e-05, "loss": 0.5802, "step": 15282 }, { "epoch": 1.0355037604173725, "grad_norm": 5.188567638397217, "learning_rate": 8.010541447053188e-05, "loss": 0.5383, "step": 15283 }, { "epoch": 1.0355715156853444, "grad_norm": 6.444438934326172, "learning_rate": 8.010404545143406e-05, "loss": 0.6963, "step": 15284 }, { "epoch": 1.0356392709533166, "grad_norm": 4.651839733123779, "learning_rate": 8.010267643233624e-05, "loss": 0.5967, "step": 15285 }, { "epoch": 1.0357070262212886, "grad_norm": 5.508388996124268, "learning_rate": 8.010130741323842e-05, "loss": 0.7505, "step": 15286 }, { "epoch": 1.0357747814892608, "grad_norm": 6.142048358917236, "learning_rate": 8.009993839414061e-05, "loss": 0.7186, "step": 15287 }, { "epoch": 1.0358425367572328, "grad_norm": 5.3503899574279785, "learning_rate": 8.009856937504279e-05, "loss": 0.6446, "step": 15288 }, { "epoch": 1.035910292025205, "grad_norm": 5.894726753234863, "learning_rate": 8.009720035594497e-05, "loss": 0.6653, "step": 15289 }, { "epoch": 1.035978047293177, "grad_norm": 6.199258804321289, "learning_rate": 8.009583133684715e-05, "loss": 0.5282, "step": 15290 }, { "epoch": 1.0360458025611492, "grad_norm": 7.289583683013916, "learning_rate": 8.009446231774935e-05, "loss": 0.6126, "step": 15291 }, { "epoch": 1.0361135578291212, "grad_norm": 5.015061378479004, "learning_rate": 8.009309329865153e-05, "loss": 0.6272, "step": 15292 }, { "epoch": 1.0361813130970934, "grad_norm": 8.497147560119629, "learning_rate": 8.009172427955371e-05, "loss": 0.7675, "step": 15293 }, { "epoch": 1.0362490683650654, "grad_norm": 7.492831230163574, "learning_rate": 8.009035526045589e-05, "loss": 0.8589, "step": 15294 }, { "epoch": 1.0363168236330376, "grad_norm": 8.964046478271484, "learning_rate": 8.008898624135807e-05, "loss": 0.8056, "step": 15295 }, { "epoch": 1.0363845789010095, "grad_norm": 6.669633388519287, "learning_rate": 8.008761722226026e-05, "loss": 0.7112, "step": 15296 }, { "epoch": 1.0364523341689815, "grad_norm": 6.454007148742676, "learning_rate": 8.008624820316244e-05, "loss": 0.6977, "step": 15297 }, { "epoch": 1.0365200894369537, "grad_norm": 5.101663112640381, "learning_rate": 8.008487918406462e-05, "loss": 0.6409, "step": 15298 }, { "epoch": 1.0365878447049257, "grad_norm": 6.301394462585449, "learning_rate": 8.00835101649668e-05, "loss": 0.7558, "step": 15299 }, { "epoch": 1.036655599972898, "grad_norm": 4.543341636657715, "learning_rate": 8.0082141145869e-05, "loss": 0.6226, "step": 15300 }, { "epoch": 1.03672335524087, "grad_norm": 6.267812728881836, "learning_rate": 8.008077212677118e-05, "loss": 0.7714, "step": 15301 }, { "epoch": 1.036791110508842, "grad_norm": 5.134308338165283, "learning_rate": 8.007940310767336e-05, "loss": 0.5897, "step": 15302 }, { "epoch": 1.036858865776814, "grad_norm": 6.004744052886963, "learning_rate": 8.007803408857554e-05, "loss": 0.6461, "step": 15303 }, { "epoch": 1.0369266210447863, "grad_norm": 6.2730631828308105, "learning_rate": 8.007666506947772e-05, "loss": 0.7272, "step": 15304 }, { "epoch": 1.0369943763127583, "grad_norm": 7.032886981964111, "learning_rate": 8.007529605037991e-05, "loss": 0.708, "step": 15305 }, { "epoch": 1.0370621315807305, "grad_norm": 7.366089344024658, "learning_rate": 8.007392703128209e-05, "loss": 0.5729, "step": 15306 }, { "epoch": 1.0371298868487024, "grad_norm": 5.599564075469971, "learning_rate": 8.007255801218427e-05, "loss": 0.7446, "step": 15307 }, { "epoch": 1.0371976421166746, "grad_norm": 4.722618103027344, "learning_rate": 8.007118899308645e-05, "loss": 0.571, "step": 15308 }, { "epoch": 1.0372653973846466, "grad_norm": 5.094542026519775, "learning_rate": 8.006981997398863e-05, "loss": 0.8405, "step": 15309 }, { "epoch": 1.0373331526526188, "grad_norm": 6.558629989624023, "learning_rate": 8.006845095489083e-05, "loss": 0.6621, "step": 15310 }, { "epoch": 1.0374009079205908, "grad_norm": 7.169748306274414, "learning_rate": 8.006708193579301e-05, "loss": 0.7468, "step": 15311 }, { "epoch": 1.037468663188563, "grad_norm": 7.064498424530029, "learning_rate": 8.006571291669519e-05, "loss": 0.9875, "step": 15312 }, { "epoch": 1.037536418456535, "grad_norm": 8.31985092163086, "learning_rate": 8.006434389759737e-05, "loss": 0.7591, "step": 15313 }, { "epoch": 1.037604173724507, "grad_norm": 8.058642387390137, "learning_rate": 8.006297487849956e-05, "loss": 0.7805, "step": 15314 }, { "epoch": 1.0376719289924792, "grad_norm": 7.940720081329346, "learning_rate": 8.006160585940174e-05, "loss": 0.5895, "step": 15315 }, { "epoch": 1.0377396842604512, "grad_norm": 4.879898548126221, "learning_rate": 8.006023684030392e-05, "loss": 0.6924, "step": 15316 }, { "epoch": 1.0378074395284234, "grad_norm": 4.655125617980957, "learning_rate": 8.00588678212061e-05, "loss": 0.6833, "step": 15317 }, { "epoch": 1.0378751947963953, "grad_norm": 6.936232566833496, "learning_rate": 8.005749880210828e-05, "loss": 0.7441, "step": 15318 }, { "epoch": 1.0379429500643675, "grad_norm": 9.707219123840332, "learning_rate": 8.005612978301048e-05, "loss": 0.8677, "step": 15319 }, { "epoch": 1.0380107053323395, "grad_norm": 6.091515064239502, "learning_rate": 8.005476076391266e-05, "loss": 0.8092, "step": 15320 }, { "epoch": 1.0380784606003117, "grad_norm": 8.209754943847656, "learning_rate": 8.005339174481484e-05, "loss": 0.5553, "step": 15321 }, { "epoch": 1.0381462158682837, "grad_norm": 6.475118160247803, "learning_rate": 8.005202272571702e-05, "loss": 0.6681, "step": 15322 }, { "epoch": 1.038213971136256, "grad_norm": 8.269380569458008, "learning_rate": 8.005065370661921e-05, "loss": 0.7209, "step": 15323 }, { "epoch": 1.038281726404228, "grad_norm": 6.947020053863525, "learning_rate": 8.004928468752139e-05, "loss": 0.8772, "step": 15324 }, { "epoch": 1.0383494816722, "grad_norm": 4.593833923339844, "learning_rate": 8.004791566842357e-05, "loss": 0.5989, "step": 15325 }, { "epoch": 1.038417236940172, "grad_norm": 5.82660436630249, "learning_rate": 8.004654664932577e-05, "loss": 0.7262, "step": 15326 }, { "epoch": 1.0384849922081443, "grad_norm": 4.537695407867432, "learning_rate": 8.004517763022795e-05, "loss": 0.4765, "step": 15327 }, { "epoch": 1.0385527474761163, "grad_norm": 7.33888053894043, "learning_rate": 8.004380861113013e-05, "loss": 0.6138, "step": 15328 }, { "epoch": 1.0386205027440885, "grad_norm": 4.84438943862915, "learning_rate": 8.004243959203232e-05, "loss": 0.7464, "step": 15329 }, { "epoch": 1.0386882580120604, "grad_norm": 8.118237495422363, "learning_rate": 8.00410705729345e-05, "loss": 0.4538, "step": 15330 }, { "epoch": 1.0387560132800324, "grad_norm": 6.666751384735107, "learning_rate": 8.003970155383668e-05, "loss": 0.6187, "step": 15331 }, { "epoch": 1.0388237685480046, "grad_norm": 6.705092906951904, "learning_rate": 8.003833253473886e-05, "loss": 0.8744, "step": 15332 }, { "epoch": 1.0388915238159766, "grad_norm": 6.252575874328613, "learning_rate": 8.003696351564106e-05, "loss": 0.654, "step": 15333 }, { "epoch": 1.0389592790839488, "grad_norm": 4.896333694458008, "learning_rate": 8.003559449654324e-05, "loss": 0.6385, "step": 15334 }, { "epoch": 1.0390270343519208, "grad_norm": 6.1699957847595215, "learning_rate": 8.003422547744542e-05, "loss": 0.7625, "step": 15335 }, { "epoch": 1.039094789619893, "grad_norm": 5.307643413543701, "learning_rate": 8.00328564583476e-05, "loss": 0.7154, "step": 15336 }, { "epoch": 1.039162544887865, "grad_norm": 5.892039775848389, "learning_rate": 8.003148743924979e-05, "loss": 0.8284, "step": 15337 }, { "epoch": 1.0392303001558372, "grad_norm": 5.1963372230529785, "learning_rate": 8.003011842015197e-05, "loss": 0.6057, "step": 15338 }, { "epoch": 1.0392980554238092, "grad_norm": 5.593263626098633, "learning_rate": 8.002874940105415e-05, "loss": 0.7043, "step": 15339 }, { "epoch": 1.0393658106917814, "grad_norm": 4.905797481536865, "learning_rate": 8.002738038195633e-05, "loss": 0.6327, "step": 15340 }, { "epoch": 1.0394335659597533, "grad_norm": 7.234451770782471, "learning_rate": 8.002601136285851e-05, "loss": 0.5896, "step": 15341 }, { "epoch": 1.0395013212277255, "grad_norm": 6.471969127655029, "learning_rate": 8.00246423437607e-05, "loss": 0.8219, "step": 15342 }, { "epoch": 1.0395690764956975, "grad_norm": 6.145739555358887, "learning_rate": 8.002327332466289e-05, "loss": 0.6992, "step": 15343 }, { "epoch": 1.0396368317636697, "grad_norm": 4.9458842277526855, "learning_rate": 8.002190430556507e-05, "loss": 0.5296, "step": 15344 }, { "epoch": 1.0397045870316417, "grad_norm": 6.5056562423706055, "learning_rate": 8.002053528646725e-05, "loss": 0.7327, "step": 15345 }, { "epoch": 1.0397723422996137, "grad_norm": 5.6960320472717285, "learning_rate": 8.001916626736944e-05, "loss": 0.5978, "step": 15346 }, { "epoch": 1.0398400975675859, "grad_norm": 5.816061496734619, "learning_rate": 8.001779724827162e-05, "loss": 0.677, "step": 15347 }, { "epoch": 1.0399078528355579, "grad_norm": 5.3048577308654785, "learning_rate": 8.00164282291738e-05, "loss": 0.5438, "step": 15348 }, { "epoch": 1.03997560810353, "grad_norm": 4.549619197845459, "learning_rate": 8.001505921007598e-05, "loss": 0.3974, "step": 15349 }, { "epoch": 1.040043363371502, "grad_norm": 7.864236831665039, "learning_rate": 8.001369019097816e-05, "loss": 0.7221, "step": 15350 }, { "epoch": 1.0401111186394743, "grad_norm": 6.662132740020752, "learning_rate": 8.001232117188036e-05, "loss": 0.7352, "step": 15351 }, { "epoch": 1.0401788739074462, "grad_norm": 5.497745037078857, "learning_rate": 8.001095215278254e-05, "loss": 0.6255, "step": 15352 }, { "epoch": 1.0402466291754184, "grad_norm": 10.039993286132812, "learning_rate": 8.000958313368472e-05, "loss": 0.5771, "step": 15353 }, { "epoch": 1.0403143844433904, "grad_norm": 5.307958126068115, "learning_rate": 8.00082141145869e-05, "loss": 0.5976, "step": 15354 }, { "epoch": 1.0403821397113626, "grad_norm": 5.4257917404174805, "learning_rate": 8.000684509548909e-05, "loss": 0.7848, "step": 15355 }, { "epoch": 1.0404498949793346, "grad_norm": 7.313519477844238, "learning_rate": 8.000547607639127e-05, "loss": 0.9212, "step": 15356 }, { "epoch": 1.0405176502473068, "grad_norm": 6.327539443969727, "learning_rate": 8.000410705729345e-05, "loss": 0.6187, "step": 15357 }, { "epoch": 1.0405854055152788, "grad_norm": 5.164039134979248, "learning_rate": 8.000273803819563e-05, "loss": 0.5013, "step": 15358 }, { "epoch": 1.040653160783251, "grad_norm": 5.942699909210205, "learning_rate": 8.000136901909781e-05, "loss": 0.6973, "step": 15359 }, { "epoch": 1.040720916051223, "grad_norm": 4.6758527755737305, "learning_rate": 8e-05, "loss": 0.6294, "step": 15360 }, { "epoch": 1.0407886713191952, "grad_norm": 7.673499584197998, "learning_rate": 7.999863098090219e-05, "loss": 0.4294, "step": 15361 }, { "epoch": 1.0408564265871671, "grad_norm": 6.975398063659668, "learning_rate": 7.999726196180437e-05, "loss": 1.0246, "step": 15362 }, { "epoch": 1.0409241818551391, "grad_norm": 5.054098606109619, "learning_rate": 7.999589294270655e-05, "loss": 0.6756, "step": 15363 }, { "epoch": 1.0409919371231113, "grad_norm": 5.625120639801025, "learning_rate": 7.999452392360873e-05, "loss": 0.6515, "step": 15364 }, { "epoch": 1.0410596923910833, "grad_norm": 5.1921820640563965, "learning_rate": 7.999315490451092e-05, "loss": 0.707, "step": 15365 }, { "epoch": 1.0411274476590555, "grad_norm": 11.847484588623047, "learning_rate": 7.99917858854131e-05, "loss": 0.641, "step": 15366 }, { "epoch": 1.0411952029270275, "grad_norm": 5.949629783630371, "learning_rate": 7.999041686631528e-05, "loss": 0.6544, "step": 15367 }, { "epoch": 1.0412629581949997, "grad_norm": 5.2862138748168945, "learning_rate": 7.998904784721746e-05, "loss": 0.5595, "step": 15368 }, { "epoch": 1.0413307134629717, "grad_norm": 6.709194660186768, "learning_rate": 7.998767882811966e-05, "loss": 0.6076, "step": 15369 }, { "epoch": 1.0413984687309439, "grad_norm": 5.2147088050842285, "learning_rate": 7.998630980902184e-05, "loss": 0.3128, "step": 15370 }, { "epoch": 1.0414662239989159, "grad_norm": 6.64967155456543, "learning_rate": 7.998494078992402e-05, "loss": 0.6488, "step": 15371 }, { "epoch": 1.041533979266888, "grad_norm": 6.93261194229126, "learning_rate": 7.99835717708262e-05, "loss": 0.802, "step": 15372 }, { "epoch": 1.04160173453486, "grad_norm": 4.657343864440918, "learning_rate": 7.998220275172839e-05, "loss": 0.6395, "step": 15373 }, { "epoch": 1.0416694898028322, "grad_norm": 7.187932968139648, "learning_rate": 7.998083373263057e-05, "loss": 0.6507, "step": 15374 }, { "epoch": 1.0417372450708042, "grad_norm": 7.809886455535889, "learning_rate": 7.997946471353275e-05, "loss": 0.72, "step": 15375 }, { "epoch": 1.0418050003387764, "grad_norm": 7.339730262756348, "learning_rate": 7.997809569443495e-05, "loss": 0.7263, "step": 15376 }, { "epoch": 1.0418727556067484, "grad_norm": 6.153295516967773, "learning_rate": 7.997672667533713e-05, "loss": 0.6731, "step": 15377 }, { "epoch": 1.0419405108747206, "grad_norm": 5.918948650360107, "learning_rate": 7.997535765623931e-05, "loss": 0.6788, "step": 15378 }, { "epoch": 1.0420082661426926, "grad_norm": 7.921314716339111, "learning_rate": 7.99739886371415e-05, "loss": 0.7971, "step": 15379 }, { "epoch": 1.0420760214106646, "grad_norm": 5.347783088684082, "learning_rate": 7.997261961804368e-05, "loss": 0.5056, "step": 15380 }, { "epoch": 1.0421437766786368, "grad_norm": 4.612505912780762, "learning_rate": 7.997125059894586e-05, "loss": 0.6825, "step": 15381 }, { "epoch": 1.0422115319466088, "grad_norm": 8.001241683959961, "learning_rate": 7.996988157984804e-05, "loss": 0.5893, "step": 15382 }, { "epoch": 1.042279287214581, "grad_norm": 5.214502811431885, "learning_rate": 7.996851256075024e-05, "loss": 0.6628, "step": 15383 }, { "epoch": 1.042347042482553, "grad_norm": 8.609155654907227, "learning_rate": 7.996714354165242e-05, "loss": 0.9572, "step": 15384 }, { "epoch": 1.0424147977505251, "grad_norm": 4.416464328765869, "learning_rate": 7.99657745225546e-05, "loss": 0.6881, "step": 15385 }, { "epoch": 1.0424825530184971, "grad_norm": 6.996601104736328, "learning_rate": 7.996440550345678e-05, "loss": 0.8354, "step": 15386 }, { "epoch": 1.0425503082864693, "grad_norm": 7.7891035079956055, "learning_rate": 7.996303648435896e-05, "loss": 0.5579, "step": 15387 }, { "epoch": 1.0426180635544413, "grad_norm": 5.543262004852295, "learning_rate": 7.996166746526115e-05, "loss": 0.7516, "step": 15388 }, { "epoch": 1.0426858188224135, "grad_norm": 5.837127685546875, "learning_rate": 7.996029844616333e-05, "loss": 0.7111, "step": 15389 }, { "epoch": 1.0427535740903855, "grad_norm": 7.789334774017334, "learning_rate": 7.995892942706551e-05, "loss": 0.7419, "step": 15390 }, { "epoch": 1.0428213293583577, "grad_norm": 5.3756022453308105, "learning_rate": 7.995756040796769e-05, "loss": 0.7475, "step": 15391 }, { "epoch": 1.0428890846263297, "grad_norm": 4.391982555389404, "learning_rate": 7.995619138886989e-05, "loss": 0.6616, "step": 15392 }, { "epoch": 1.0429568398943019, "grad_norm": 5.609358787536621, "learning_rate": 7.995482236977207e-05, "loss": 0.817, "step": 15393 }, { "epoch": 1.0430245951622739, "grad_norm": 4.2053046226501465, "learning_rate": 7.995345335067425e-05, "loss": 0.5884, "step": 15394 }, { "epoch": 1.0430923504302458, "grad_norm": 7.365318775177002, "learning_rate": 7.995208433157643e-05, "loss": 0.7841, "step": 15395 }, { "epoch": 1.043160105698218, "grad_norm": 5.268418312072754, "learning_rate": 7.995071531247861e-05, "loss": 0.7165, "step": 15396 }, { "epoch": 1.04322786096619, "grad_norm": 4.734410762786865, "learning_rate": 7.99493462933808e-05, "loss": 0.5459, "step": 15397 }, { "epoch": 1.0432956162341622, "grad_norm": 6.027469635009766, "learning_rate": 7.994797727428298e-05, "loss": 0.7033, "step": 15398 }, { "epoch": 1.0433633715021342, "grad_norm": 5.068356037139893, "learning_rate": 7.994660825518516e-05, "loss": 0.7333, "step": 15399 }, { "epoch": 1.0434311267701064, "grad_norm": 5.465489387512207, "learning_rate": 7.994523923608734e-05, "loss": 0.6387, "step": 15400 }, { "epoch": 1.0434988820380784, "grad_norm": 6.087520122528076, "learning_rate": 7.994387021698954e-05, "loss": 0.6957, "step": 15401 }, { "epoch": 1.0435666373060506, "grad_norm": 6.731315612792969, "learning_rate": 7.994250119789172e-05, "loss": 0.6397, "step": 15402 }, { "epoch": 1.0436343925740226, "grad_norm": 5.57260274887085, "learning_rate": 7.99411321787939e-05, "loss": 0.6853, "step": 15403 }, { "epoch": 1.0437021478419948, "grad_norm": 5.95422887802124, "learning_rate": 7.993976315969608e-05, "loss": 0.6726, "step": 15404 }, { "epoch": 1.0437699031099668, "grad_norm": 5.570609092712402, "learning_rate": 7.993839414059826e-05, "loss": 0.7738, "step": 15405 }, { "epoch": 1.043837658377939, "grad_norm": 5.522849082946777, "learning_rate": 7.993702512150045e-05, "loss": 0.6653, "step": 15406 }, { "epoch": 1.043905413645911, "grad_norm": 5.089807987213135, "learning_rate": 7.993565610240263e-05, "loss": 0.5422, "step": 15407 }, { "epoch": 1.0439731689138831, "grad_norm": 5.952882766723633, "learning_rate": 7.993428708330481e-05, "loss": 0.714, "step": 15408 }, { "epoch": 1.0440409241818551, "grad_norm": 5.655484199523926, "learning_rate": 7.993291806420699e-05, "loss": 0.461, "step": 15409 }, { "epoch": 1.0441086794498273, "grad_norm": 6.936872959136963, "learning_rate": 7.993154904510919e-05, "loss": 0.6569, "step": 15410 }, { "epoch": 1.0441764347177993, "grad_norm": 6.010931968688965, "learning_rate": 7.993018002601137e-05, "loss": 0.7323, "step": 15411 }, { "epoch": 1.0442441899857713, "grad_norm": 7.187067985534668, "learning_rate": 7.992881100691355e-05, "loss": 0.9217, "step": 15412 }, { "epoch": 1.0443119452537435, "grad_norm": 5.070352554321289, "learning_rate": 7.992744198781573e-05, "loss": 0.6433, "step": 15413 }, { "epoch": 1.0443797005217155, "grad_norm": 6.546175479888916, "learning_rate": 7.992607296871791e-05, "loss": 0.671, "step": 15414 }, { "epoch": 1.0444474557896877, "grad_norm": 6.2877349853515625, "learning_rate": 7.99247039496201e-05, "loss": 0.8915, "step": 15415 }, { "epoch": 1.0445152110576597, "grad_norm": 5.39522647857666, "learning_rate": 7.992333493052228e-05, "loss": 0.7388, "step": 15416 }, { "epoch": 1.0445829663256319, "grad_norm": 6.453690052032471, "learning_rate": 7.992196591142446e-05, "loss": 0.7892, "step": 15417 }, { "epoch": 1.0446507215936038, "grad_norm": 6.763200759887695, "learning_rate": 7.992059689232664e-05, "loss": 0.6219, "step": 15418 }, { "epoch": 1.044718476861576, "grad_norm": 5.610644817352295, "learning_rate": 7.991922787322884e-05, "loss": 0.7382, "step": 15419 }, { "epoch": 1.044786232129548, "grad_norm": 6.314178466796875, "learning_rate": 7.991785885413102e-05, "loss": 0.7147, "step": 15420 }, { "epoch": 1.0448539873975202, "grad_norm": 5.987586498260498, "learning_rate": 7.99164898350332e-05, "loss": 0.537, "step": 15421 }, { "epoch": 1.0449217426654922, "grad_norm": 7.661067008972168, "learning_rate": 7.991512081593539e-05, "loss": 0.9005, "step": 15422 }, { "epoch": 1.0449894979334644, "grad_norm": 5.166024684906006, "learning_rate": 7.991375179683757e-05, "loss": 0.755, "step": 15423 }, { "epoch": 1.0450572532014364, "grad_norm": 6.560528755187988, "learning_rate": 7.991238277773975e-05, "loss": 0.7315, "step": 15424 }, { "epoch": 1.0451250084694086, "grad_norm": 6.162966728210449, "learning_rate": 7.991101375864195e-05, "loss": 0.5871, "step": 15425 }, { "epoch": 1.0451927637373806, "grad_norm": 6.7889628410339355, "learning_rate": 7.990964473954413e-05, "loss": 0.7381, "step": 15426 }, { "epoch": 1.0452605190053528, "grad_norm": 5.342012405395508, "learning_rate": 7.99082757204463e-05, "loss": 0.7028, "step": 15427 }, { "epoch": 1.0453282742733248, "grad_norm": 5.235086441040039, "learning_rate": 7.990690670134849e-05, "loss": 0.5145, "step": 15428 }, { "epoch": 1.0453960295412967, "grad_norm": 5.529267311096191, "learning_rate": 7.990553768225068e-05, "loss": 0.5921, "step": 15429 }, { "epoch": 1.045463784809269, "grad_norm": 5.2520880699157715, "learning_rate": 7.990416866315286e-05, "loss": 0.7905, "step": 15430 }, { "epoch": 1.045531540077241, "grad_norm": 5.065273761749268, "learning_rate": 7.990279964405504e-05, "loss": 0.6968, "step": 15431 }, { "epoch": 1.0455992953452131, "grad_norm": 5.409516334533691, "learning_rate": 7.990143062495722e-05, "loss": 0.6831, "step": 15432 }, { "epoch": 1.045667050613185, "grad_norm": 5.545697212219238, "learning_rate": 7.990006160585942e-05, "loss": 0.6522, "step": 15433 }, { "epoch": 1.0457348058811573, "grad_norm": 5.714067459106445, "learning_rate": 7.98986925867616e-05, "loss": 0.5569, "step": 15434 }, { "epoch": 1.0458025611491293, "grad_norm": 7.723472595214844, "learning_rate": 7.989732356766378e-05, "loss": 0.6271, "step": 15435 }, { "epoch": 1.0458703164171015, "grad_norm": 7.13986349105835, "learning_rate": 7.989595454856596e-05, "loss": 0.8158, "step": 15436 }, { "epoch": 1.0459380716850735, "grad_norm": 6.393859386444092, "learning_rate": 7.989458552946814e-05, "loss": 0.575, "step": 15437 }, { "epoch": 1.0460058269530457, "grad_norm": 8.466290473937988, "learning_rate": 7.989321651037033e-05, "loss": 0.6657, "step": 15438 }, { "epoch": 1.0460735822210177, "grad_norm": 4.97887659072876, "learning_rate": 7.989184749127251e-05, "loss": 0.6869, "step": 15439 }, { "epoch": 1.0461413374889899, "grad_norm": 6.120355129241943, "learning_rate": 7.989047847217469e-05, "loss": 0.7158, "step": 15440 }, { "epoch": 1.0462090927569618, "grad_norm": 5.353936672210693, "learning_rate": 7.988910945307687e-05, "loss": 0.6081, "step": 15441 }, { "epoch": 1.046276848024934, "grad_norm": 5.116786956787109, "learning_rate": 7.988774043397905e-05, "loss": 0.725, "step": 15442 }, { "epoch": 1.046344603292906, "grad_norm": 5.482798099517822, "learning_rate": 7.988637141488125e-05, "loss": 0.7154, "step": 15443 }, { "epoch": 1.046412358560878, "grad_norm": 6.663843631744385, "learning_rate": 7.988500239578343e-05, "loss": 0.7235, "step": 15444 }, { "epoch": 1.0464801138288502, "grad_norm": 5.938141822814941, "learning_rate": 7.98836333766856e-05, "loss": 0.5438, "step": 15445 }, { "epoch": 1.0465478690968222, "grad_norm": 4.406172752380371, "learning_rate": 7.988226435758779e-05, "loss": 0.6383, "step": 15446 }, { "epoch": 1.0466156243647944, "grad_norm": 4.991530895233154, "learning_rate": 7.988089533848998e-05, "loss": 0.6158, "step": 15447 }, { "epoch": 1.0466833796327664, "grad_norm": 5.490728378295898, "learning_rate": 7.987952631939216e-05, "loss": 0.6724, "step": 15448 }, { "epoch": 1.0467511349007386, "grad_norm": 6.0448384284973145, "learning_rate": 7.987815730029434e-05, "loss": 0.5423, "step": 15449 }, { "epoch": 1.0468188901687105, "grad_norm": 5.0079731941223145, "learning_rate": 7.987678828119652e-05, "loss": 0.7416, "step": 15450 }, { "epoch": 1.0468866454366828, "grad_norm": 6.357697486877441, "learning_rate": 7.98754192620987e-05, "loss": 0.5595, "step": 15451 }, { "epoch": 1.0469544007046547, "grad_norm": 5.9440226554870605, "learning_rate": 7.98740502430009e-05, "loss": 0.6615, "step": 15452 }, { "epoch": 1.047022155972627, "grad_norm": 5.571059226989746, "learning_rate": 7.987268122390308e-05, "loss": 0.6681, "step": 15453 }, { "epoch": 1.047089911240599, "grad_norm": 5.801420211791992, "learning_rate": 7.987131220480526e-05, "loss": 0.6817, "step": 15454 }, { "epoch": 1.0471576665085711, "grad_norm": 5.256460666656494, "learning_rate": 7.986994318570744e-05, "loss": 0.5682, "step": 15455 }, { "epoch": 1.047225421776543, "grad_norm": 5.437204360961914, "learning_rate": 7.986857416660963e-05, "loss": 0.8709, "step": 15456 }, { "epoch": 1.0472931770445153, "grad_norm": 4.7969651222229, "learning_rate": 7.986720514751181e-05, "loss": 0.5018, "step": 15457 }, { "epoch": 1.0473609323124873, "grad_norm": 8.146381378173828, "learning_rate": 7.986583612841399e-05, "loss": 0.6968, "step": 15458 }, { "epoch": 1.0474286875804595, "grad_norm": 7.133126258850098, "learning_rate": 7.986446710931617e-05, "loss": 0.7164, "step": 15459 }, { "epoch": 1.0474964428484315, "grad_norm": 8.4725980758667, "learning_rate": 7.986309809021835e-05, "loss": 0.543, "step": 15460 }, { "epoch": 1.0475641981164034, "grad_norm": 6.91729211807251, "learning_rate": 7.986172907112055e-05, "loss": 0.4047, "step": 15461 }, { "epoch": 1.0476319533843756, "grad_norm": 5.977921009063721, "learning_rate": 7.986036005202273e-05, "loss": 0.4638, "step": 15462 }, { "epoch": 1.0476997086523476, "grad_norm": 5.780356407165527, "learning_rate": 7.985899103292491e-05, "loss": 0.6565, "step": 15463 }, { "epoch": 1.0477674639203198, "grad_norm": 5.508984088897705, "learning_rate": 7.985762201382709e-05, "loss": 0.6388, "step": 15464 }, { "epoch": 1.0478352191882918, "grad_norm": 7.178645610809326, "learning_rate": 7.985625299472928e-05, "loss": 0.6638, "step": 15465 }, { "epoch": 1.047902974456264, "grad_norm": 5.548744201660156, "learning_rate": 7.985488397563146e-05, "loss": 0.5982, "step": 15466 }, { "epoch": 1.047970729724236, "grad_norm": 6.19132661819458, "learning_rate": 7.985351495653364e-05, "loss": 0.6313, "step": 15467 }, { "epoch": 1.0480384849922082, "grad_norm": 5.953742504119873, "learning_rate": 7.985214593743584e-05, "loss": 0.9055, "step": 15468 }, { "epoch": 1.0481062402601802, "grad_norm": 6.040510177612305, "learning_rate": 7.985077691833802e-05, "loss": 0.8007, "step": 15469 }, { "epoch": 1.0481739955281524, "grad_norm": 6.583937168121338, "learning_rate": 7.98494078992402e-05, "loss": 0.5396, "step": 15470 }, { "epoch": 1.0482417507961244, "grad_norm": 6.282310485839844, "learning_rate": 7.984803888014239e-05, "loss": 0.7954, "step": 15471 }, { "epoch": 1.0483095060640966, "grad_norm": 5.081226348876953, "learning_rate": 7.984666986104457e-05, "loss": 0.753, "step": 15472 }, { "epoch": 1.0483772613320685, "grad_norm": 7.40177583694458, "learning_rate": 7.984530084194675e-05, "loss": 0.8124, "step": 15473 }, { "epoch": 1.0484450166000407, "grad_norm": 5.868223667144775, "learning_rate": 7.984393182284893e-05, "loss": 0.651, "step": 15474 }, { "epoch": 1.0485127718680127, "grad_norm": 4.443064212799072, "learning_rate": 7.984256280375113e-05, "loss": 0.5547, "step": 15475 }, { "epoch": 1.048580527135985, "grad_norm": 6.508821964263916, "learning_rate": 7.98411937846533e-05, "loss": 0.8823, "step": 15476 }, { "epoch": 1.048648282403957, "grad_norm": 8.762590408325195, "learning_rate": 7.983982476555549e-05, "loss": 0.7839, "step": 15477 }, { "epoch": 1.048716037671929, "grad_norm": 6.139303684234619, "learning_rate": 7.983845574645767e-05, "loss": 0.6002, "step": 15478 }, { "epoch": 1.048783792939901, "grad_norm": 5.247832775115967, "learning_rate": 7.983708672735986e-05, "loss": 0.6179, "step": 15479 }, { "epoch": 1.048851548207873, "grad_norm": 6.066807270050049, "learning_rate": 7.983571770826204e-05, "loss": 0.585, "step": 15480 }, { "epoch": 1.0489193034758453, "grad_norm": 4.7573747634887695, "learning_rate": 7.983434868916422e-05, "loss": 0.6707, "step": 15481 }, { "epoch": 1.0489870587438173, "grad_norm": 7.490664958953857, "learning_rate": 7.98329796700664e-05, "loss": 0.7973, "step": 15482 }, { "epoch": 1.0490548140117895, "grad_norm": 4.913379669189453, "learning_rate": 7.983161065096858e-05, "loss": 0.6074, "step": 15483 }, { "epoch": 1.0491225692797614, "grad_norm": 5.44326639175415, "learning_rate": 7.983024163187078e-05, "loss": 0.5872, "step": 15484 }, { "epoch": 1.0491903245477336, "grad_norm": 6.307222843170166, "learning_rate": 7.982887261277296e-05, "loss": 0.7191, "step": 15485 }, { "epoch": 1.0492580798157056, "grad_norm": 7.862999439239502, "learning_rate": 7.982750359367514e-05, "loss": 0.6033, "step": 15486 }, { "epoch": 1.0493258350836778, "grad_norm": 7.700031757354736, "learning_rate": 7.982613457457732e-05, "loss": 0.8108, "step": 15487 }, { "epoch": 1.0493935903516498, "grad_norm": 4.863072872161865, "learning_rate": 7.982476555547951e-05, "loss": 0.6379, "step": 15488 }, { "epoch": 1.049461345619622, "grad_norm": 6.879638671875, "learning_rate": 7.982339653638169e-05, "loss": 0.6425, "step": 15489 }, { "epoch": 1.049529100887594, "grad_norm": 5.878018379211426, "learning_rate": 7.982202751728387e-05, "loss": 0.62, "step": 15490 }, { "epoch": 1.0495968561555662, "grad_norm": 5.8812055587768555, "learning_rate": 7.982065849818605e-05, "loss": 0.5707, "step": 15491 }, { "epoch": 1.0496646114235382, "grad_norm": 5.584284782409668, "learning_rate": 7.981928947908823e-05, "loss": 0.5941, "step": 15492 }, { "epoch": 1.0497323666915102, "grad_norm": 11.247712135314941, "learning_rate": 7.981792045999043e-05, "loss": 0.9765, "step": 15493 }, { "epoch": 1.0498001219594824, "grad_norm": 5.628141403198242, "learning_rate": 7.98165514408926e-05, "loss": 0.5359, "step": 15494 }, { "epoch": 1.0498678772274543, "grad_norm": 6.264639377593994, "learning_rate": 7.981518242179479e-05, "loss": 0.7608, "step": 15495 }, { "epoch": 1.0499356324954265, "grad_norm": 7.300719261169434, "learning_rate": 7.981381340269697e-05, "loss": 0.7166, "step": 15496 }, { "epoch": 1.0500033877633985, "grad_norm": 5.170079231262207, "learning_rate": 7.981244438359915e-05, "loss": 0.5827, "step": 15497 }, { "epoch": 1.0500711430313707, "grad_norm": 6.566158294677734, "learning_rate": 7.981107536450134e-05, "loss": 0.6063, "step": 15498 }, { "epoch": 1.0501388982993427, "grad_norm": 7.797521114349365, "learning_rate": 7.980970634540352e-05, "loss": 0.4916, "step": 15499 }, { "epoch": 1.050206653567315, "grad_norm": 6.960193157196045, "learning_rate": 7.98083373263057e-05, "loss": 0.7103, "step": 15500 }, { "epoch": 1.0502744088352869, "grad_norm": 7.210864543914795, "learning_rate": 7.980696830720788e-05, "loss": 0.7338, "step": 15501 }, { "epoch": 1.050342164103259, "grad_norm": 7.08119535446167, "learning_rate": 7.980559928811008e-05, "loss": 0.6056, "step": 15502 }, { "epoch": 1.050409919371231, "grad_norm": 6.606025695800781, "learning_rate": 7.980423026901226e-05, "loss": 0.7785, "step": 15503 }, { "epoch": 1.0504776746392033, "grad_norm": 7.610130786895752, "learning_rate": 7.980286124991444e-05, "loss": 0.7014, "step": 15504 }, { "epoch": 1.0505454299071753, "grad_norm": 6.512502670288086, "learning_rate": 7.980149223081662e-05, "loss": 0.606, "step": 15505 }, { "epoch": 1.0506131851751475, "grad_norm": 6.440487861633301, "learning_rate": 7.98001232117188e-05, "loss": 0.6465, "step": 15506 }, { "epoch": 1.0506809404431194, "grad_norm": 6.003171920776367, "learning_rate": 7.979875419262099e-05, "loss": 0.5331, "step": 15507 }, { "epoch": 1.0507486957110916, "grad_norm": 5.904346466064453, "learning_rate": 7.979738517352317e-05, "loss": 0.824, "step": 15508 }, { "epoch": 1.0508164509790636, "grad_norm": 5.533935070037842, "learning_rate": 7.979601615442535e-05, "loss": 0.8511, "step": 15509 }, { "epoch": 1.0508842062470356, "grad_norm": 6.289936065673828, "learning_rate": 7.979464713532753e-05, "loss": 0.7926, "step": 15510 }, { "epoch": 1.0509519615150078, "grad_norm": 5.81295919418335, "learning_rate": 7.979327811622973e-05, "loss": 0.9554, "step": 15511 }, { "epoch": 1.0510197167829798, "grad_norm": 5.214400768280029, "learning_rate": 7.97919090971319e-05, "loss": 0.9091, "step": 15512 }, { "epoch": 1.051087472050952, "grad_norm": 7.468234539031982, "learning_rate": 7.979054007803409e-05, "loss": 0.593, "step": 15513 }, { "epoch": 1.051155227318924, "grad_norm": 6.6748366355896, "learning_rate": 7.978917105893628e-05, "loss": 0.9312, "step": 15514 }, { "epoch": 1.0512229825868962, "grad_norm": 7.021498680114746, "learning_rate": 7.978780203983846e-05, "loss": 0.5446, "step": 15515 }, { "epoch": 1.0512907378548682, "grad_norm": 7.151730060577393, "learning_rate": 7.978643302074064e-05, "loss": 0.6672, "step": 15516 }, { "epoch": 1.0513584931228404, "grad_norm": 8.595193862915039, "learning_rate": 7.978506400164284e-05, "loss": 0.8893, "step": 15517 }, { "epoch": 1.0514262483908123, "grad_norm": 6.063235282897949, "learning_rate": 7.978369498254502e-05, "loss": 0.5404, "step": 15518 }, { "epoch": 1.0514940036587845, "grad_norm": 6.944025039672852, "learning_rate": 7.97823259634472e-05, "loss": 0.4193, "step": 15519 }, { "epoch": 1.0515617589267565, "grad_norm": 6.7520976066589355, "learning_rate": 7.978095694434938e-05, "loss": 0.8591, "step": 15520 }, { "epoch": 1.0516295141947287, "grad_norm": 5.863169193267822, "learning_rate": 7.977958792525157e-05, "loss": 0.6715, "step": 15521 }, { "epoch": 1.0516972694627007, "grad_norm": 7.465962886810303, "learning_rate": 7.977821890615375e-05, "loss": 0.6657, "step": 15522 }, { "epoch": 1.051765024730673, "grad_norm": 4.659458637237549, "learning_rate": 7.977684988705593e-05, "loss": 0.8134, "step": 15523 }, { "epoch": 1.0518327799986449, "grad_norm": 7.612015247344971, "learning_rate": 7.977548086795811e-05, "loss": 0.6289, "step": 15524 }, { "epoch": 1.051900535266617, "grad_norm": 5.157435894012451, "learning_rate": 7.97741118488603e-05, "loss": 0.7039, "step": 15525 }, { "epoch": 1.051968290534589, "grad_norm": 5.263278484344482, "learning_rate": 7.977274282976249e-05, "loss": 0.5698, "step": 15526 }, { "epoch": 1.052036045802561, "grad_norm": 5.186944484710693, "learning_rate": 7.977137381066467e-05, "loss": 0.5398, "step": 15527 }, { "epoch": 1.0521038010705333, "grad_norm": 6.908531188964844, "learning_rate": 7.977000479156685e-05, "loss": 0.637, "step": 15528 }, { "epoch": 1.0521715563385052, "grad_norm": 5.298072338104248, "learning_rate": 7.976863577246903e-05, "loss": 0.55, "step": 15529 }, { "epoch": 1.0522393116064774, "grad_norm": 4.47366189956665, "learning_rate": 7.976726675337122e-05, "loss": 0.5943, "step": 15530 }, { "epoch": 1.0523070668744494, "grad_norm": 5.410435676574707, "learning_rate": 7.97658977342734e-05, "loss": 0.9073, "step": 15531 }, { "epoch": 1.0523748221424216, "grad_norm": 5.6297478675842285, "learning_rate": 7.976452871517558e-05, "loss": 0.7233, "step": 15532 }, { "epoch": 1.0524425774103936, "grad_norm": 5.776382923126221, "learning_rate": 7.976315969607776e-05, "loss": 0.6116, "step": 15533 }, { "epoch": 1.0525103326783658, "grad_norm": 7.4787774085998535, "learning_rate": 7.976179067697996e-05, "loss": 0.7542, "step": 15534 }, { "epoch": 1.0525780879463378, "grad_norm": 4.613537311553955, "learning_rate": 7.976042165788214e-05, "loss": 0.6745, "step": 15535 }, { "epoch": 1.05264584321431, "grad_norm": 6.3547282218933105, "learning_rate": 7.975905263878432e-05, "loss": 0.7607, "step": 15536 }, { "epoch": 1.052713598482282, "grad_norm": 5.2433366775512695, "learning_rate": 7.97576836196865e-05, "loss": 0.679, "step": 15537 }, { "epoch": 1.0527813537502542, "grad_norm": 6.4581522941589355, "learning_rate": 7.975631460058868e-05, "loss": 0.8027, "step": 15538 }, { "epoch": 1.0528491090182261, "grad_norm": 6.2019267082214355, "learning_rate": 7.975494558149087e-05, "loss": 0.8168, "step": 15539 }, { "epoch": 1.0529168642861984, "grad_norm": 5.71392297744751, "learning_rate": 7.975357656239305e-05, "loss": 0.6555, "step": 15540 }, { "epoch": 1.0529846195541703, "grad_norm": 6.714015007019043, "learning_rate": 7.975220754329523e-05, "loss": 0.6979, "step": 15541 }, { "epoch": 1.0530523748221423, "grad_norm": 6.717672348022461, "learning_rate": 7.975083852419741e-05, "loss": 0.8411, "step": 15542 }, { "epoch": 1.0531201300901145, "grad_norm": 7.295918941497803, "learning_rate": 7.97494695050996e-05, "loss": 0.702, "step": 15543 }, { "epoch": 1.0531878853580865, "grad_norm": 6.989544868469238, "learning_rate": 7.974810048600179e-05, "loss": 0.6153, "step": 15544 }, { "epoch": 1.0532556406260587, "grad_norm": 6.641953945159912, "learning_rate": 7.974673146690397e-05, "loss": 0.4917, "step": 15545 }, { "epoch": 1.0533233958940307, "grad_norm": 7.03810977935791, "learning_rate": 7.974536244780615e-05, "loss": 0.6252, "step": 15546 }, { "epoch": 1.0533911511620029, "grad_norm": 5.024344444274902, "learning_rate": 7.974399342870833e-05, "loss": 0.7038, "step": 15547 }, { "epoch": 1.0534589064299749, "grad_norm": 6.919520854949951, "learning_rate": 7.974262440961052e-05, "loss": 0.9027, "step": 15548 }, { "epoch": 1.053526661697947, "grad_norm": 5.6881537437438965, "learning_rate": 7.97412553905127e-05, "loss": 0.6556, "step": 15549 }, { "epoch": 1.053594416965919, "grad_norm": 6.098855495452881, "learning_rate": 7.973988637141488e-05, "loss": 0.6225, "step": 15550 }, { "epoch": 1.0536621722338912, "grad_norm": 5.481220245361328, "learning_rate": 7.973851735231706e-05, "loss": 0.7031, "step": 15551 }, { "epoch": 1.0537299275018632, "grad_norm": 7.0920586585998535, "learning_rate": 7.973714833321924e-05, "loss": 0.672, "step": 15552 }, { "epoch": 1.0537976827698354, "grad_norm": 4.710086345672607, "learning_rate": 7.973577931412144e-05, "loss": 0.652, "step": 15553 }, { "epoch": 1.0538654380378074, "grad_norm": 5.764717102050781, "learning_rate": 7.973441029502362e-05, "loss": 0.7777, "step": 15554 }, { "epoch": 1.0539331933057796, "grad_norm": 6.428826332092285, "learning_rate": 7.97330412759258e-05, "loss": 0.7785, "step": 15555 }, { "epoch": 1.0540009485737516, "grad_norm": 5.741217136383057, "learning_rate": 7.973167225682798e-05, "loss": 0.6728, "step": 15556 }, { "epoch": 1.0540687038417238, "grad_norm": 7.573585510253906, "learning_rate": 7.973030323773017e-05, "loss": 0.7876, "step": 15557 }, { "epoch": 1.0541364591096958, "grad_norm": 4.414328575134277, "learning_rate": 7.972893421863235e-05, "loss": 0.556, "step": 15558 }, { "epoch": 1.0542042143776678, "grad_norm": 4.814504623413086, "learning_rate": 7.972756519953453e-05, "loss": 0.6419, "step": 15559 }, { "epoch": 1.05427196964564, "grad_norm": 6.726992130279541, "learning_rate": 7.972619618043673e-05, "loss": 0.7071, "step": 15560 }, { "epoch": 1.054339724913612, "grad_norm": 4.732487201690674, "learning_rate": 7.97248271613389e-05, "loss": 0.6135, "step": 15561 }, { "epoch": 1.0544074801815841, "grad_norm": 5.262027740478516, "learning_rate": 7.972345814224109e-05, "loss": 0.657, "step": 15562 }, { "epoch": 1.0544752354495561, "grad_norm": 7.205251693725586, "learning_rate": 7.972208912314328e-05, "loss": 0.6268, "step": 15563 }, { "epoch": 1.0545429907175283, "grad_norm": 6.2166337966918945, "learning_rate": 7.972072010404546e-05, "loss": 0.678, "step": 15564 }, { "epoch": 1.0546107459855003, "grad_norm": 5.0857343673706055, "learning_rate": 7.971935108494764e-05, "loss": 0.6984, "step": 15565 }, { "epoch": 1.0546785012534725, "grad_norm": 5.1078782081604, "learning_rate": 7.971798206584983e-05, "loss": 0.7758, "step": 15566 }, { "epoch": 1.0547462565214445, "grad_norm": 6.676407337188721, "learning_rate": 7.971661304675201e-05, "loss": 0.9758, "step": 15567 }, { "epoch": 1.0548140117894167, "grad_norm": 10.701401710510254, "learning_rate": 7.97152440276542e-05, "loss": 0.7816, "step": 15568 }, { "epoch": 1.0548817670573887, "grad_norm": 11.736532211303711, "learning_rate": 7.971387500855638e-05, "loss": 0.7215, "step": 15569 }, { "epoch": 1.0549495223253609, "grad_norm": 5.571110248565674, "learning_rate": 7.971250598945856e-05, "loss": 0.683, "step": 15570 }, { "epoch": 1.0550172775933329, "grad_norm": 6.118210315704346, "learning_rate": 7.971113697036075e-05, "loss": 0.6195, "step": 15571 }, { "epoch": 1.055085032861305, "grad_norm": 6.9824748039245605, "learning_rate": 7.970976795126293e-05, "loss": 0.6818, "step": 15572 }, { "epoch": 1.055152788129277, "grad_norm": 6.418920040130615, "learning_rate": 7.970839893216511e-05, "loss": 0.6653, "step": 15573 }, { "epoch": 1.0552205433972492, "grad_norm": 5.251111030578613, "learning_rate": 7.970702991306729e-05, "loss": 0.5911, "step": 15574 }, { "epoch": 1.0552882986652212, "grad_norm": 6.46027135848999, "learning_rate": 7.970566089396947e-05, "loss": 0.6771, "step": 15575 }, { "epoch": 1.0553560539331932, "grad_norm": 5.67859411239624, "learning_rate": 7.970429187487166e-05, "loss": 0.6342, "step": 15576 }, { "epoch": 1.0554238092011654, "grad_norm": 6.3809661865234375, "learning_rate": 7.970292285577385e-05, "loss": 0.7269, "step": 15577 }, { "epoch": 1.0554915644691374, "grad_norm": 5.144848346710205, "learning_rate": 7.970155383667603e-05, "loss": 0.7396, "step": 15578 }, { "epoch": 1.0555593197371096, "grad_norm": 6.251219749450684, "learning_rate": 7.97001848175782e-05, "loss": 0.6375, "step": 15579 }, { "epoch": 1.0556270750050816, "grad_norm": 6.255589008331299, "learning_rate": 7.96988157984804e-05, "loss": 0.5346, "step": 15580 }, { "epoch": 1.0556948302730538, "grad_norm": 7.6489481925964355, "learning_rate": 7.969744677938258e-05, "loss": 0.6735, "step": 15581 }, { "epoch": 1.0557625855410258, "grad_norm": 6.068694114685059, "learning_rate": 7.969607776028476e-05, "loss": 0.6175, "step": 15582 }, { "epoch": 1.055830340808998, "grad_norm": 6.7391815185546875, "learning_rate": 7.969470874118694e-05, "loss": 0.8585, "step": 15583 }, { "epoch": 1.05589809607697, "grad_norm": 6.22243595123291, "learning_rate": 7.969333972208912e-05, "loss": 0.5475, "step": 15584 }, { "epoch": 1.0559658513449421, "grad_norm": 5.03702449798584, "learning_rate": 7.969197070299132e-05, "loss": 0.7151, "step": 15585 }, { "epoch": 1.0560336066129141, "grad_norm": 6.093680381774902, "learning_rate": 7.96906016838935e-05, "loss": 0.5883, "step": 15586 }, { "epoch": 1.0561013618808863, "grad_norm": 7.8765034675598145, "learning_rate": 7.968923266479568e-05, "loss": 0.7901, "step": 15587 }, { "epoch": 1.0561691171488583, "grad_norm": 6.4514031410217285, "learning_rate": 7.968786364569786e-05, "loss": 0.6763, "step": 15588 }, { "epoch": 1.0562368724168305, "grad_norm": 5.650920867919922, "learning_rate": 7.968649462660005e-05, "loss": 0.74, "step": 15589 }, { "epoch": 1.0563046276848025, "grad_norm": 5.893711090087891, "learning_rate": 7.968512560750223e-05, "loss": 0.696, "step": 15590 }, { "epoch": 1.0563723829527745, "grad_norm": 5.7477240562438965, "learning_rate": 7.968375658840441e-05, "loss": 0.6714, "step": 15591 }, { "epoch": 1.0564401382207467, "grad_norm": 5.326556205749512, "learning_rate": 7.968238756930659e-05, "loss": 0.6365, "step": 15592 }, { "epoch": 1.0565078934887187, "grad_norm": 6.540344715118408, "learning_rate": 7.968101855020877e-05, "loss": 0.7155, "step": 15593 }, { "epoch": 1.0565756487566909, "grad_norm": 6.695101261138916, "learning_rate": 7.967964953111097e-05, "loss": 0.7152, "step": 15594 }, { "epoch": 1.0566434040246628, "grad_norm": 6.3795905113220215, "learning_rate": 7.967828051201315e-05, "loss": 0.6307, "step": 15595 }, { "epoch": 1.056711159292635, "grad_norm": 6.77623987197876, "learning_rate": 7.967691149291533e-05, "loss": 0.6633, "step": 15596 }, { "epoch": 1.056778914560607, "grad_norm": 7.296997547149658, "learning_rate": 7.96755424738175e-05, "loss": 0.6032, "step": 15597 }, { "epoch": 1.0568466698285792, "grad_norm": 9.698561668395996, "learning_rate": 7.967417345471969e-05, "loss": 0.7025, "step": 15598 }, { "epoch": 1.0569144250965512, "grad_norm": 6.234988212585449, "learning_rate": 7.967280443562188e-05, "loss": 0.7538, "step": 15599 }, { "epoch": 1.0569821803645234, "grad_norm": 5.491406440734863, "learning_rate": 7.967143541652406e-05, "loss": 0.923, "step": 15600 }, { "epoch": 1.0570499356324954, "grad_norm": 5.402745723724365, "learning_rate": 7.967006639742624e-05, "loss": 0.6183, "step": 15601 }, { "epoch": 1.0571176909004676, "grad_norm": 6.674182415008545, "learning_rate": 7.966869737832842e-05, "loss": 0.6448, "step": 15602 }, { "epoch": 1.0571854461684396, "grad_norm": 6.507812976837158, "learning_rate": 7.966732835923062e-05, "loss": 0.8753, "step": 15603 }, { "epoch": 1.0572532014364118, "grad_norm": 5.593377590179443, "learning_rate": 7.96659593401328e-05, "loss": 0.7686, "step": 15604 }, { "epoch": 1.0573209567043838, "grad_norm": 6.690634250640869, "learning_rate": 7.966459032103498e-05, "loss": 1.1595, "step": 15605 }, { "epoch": 1.057388711972356, "grad_norm": 6.244167804718018, "learning_rate": 7.966322130193717e-05, "loss": 0.7725, "step": 15606 }, { "epoch": 1.057456467240328, "grad_norm": 5.173239707946777, "learning_rate": 7.966185228283935e-05, "loss": 0.6084, "step": 15607 }, { "epoch": 1.0575242225083, "grad_norm": 5.758760929107666, "learning_rate": 7.966048326374153e-05, "loss": 0.7413, "step": 15608 }, { "epoch": 1.0575919777762721, "grad_norm": 4.311896800994873, "learning_rate": 7.965911424464372e-05, "loss": 0.5449, "step": 15609 }, { "epoch": 1.057659733044244, "grad_norm": 7.174588203430176, "learning_rate": 7.96577452255459e-05, "loss": 0.7287, "step": 15610 }, { "epoch": 1.0577274883122163, "grad_norm": 5.071308135986328, "learning_rate": 7.965637620644809e-05, "loss": 0.6074, "step": 15611 }, { "epoch": 1.0577952435801883, "grad_norm": 6.668962001800537, "learning_rate": 7.965500718735028e-05, "loss": 0.6961, "step": 15612 }, { "epoch": 1.0578629988481605, "grad_norm": 5.634909629821777, "learning_rate": 7.965363816825246e-05, "loss": 0.7343, "step": 15613 }, { "epoch": 1.0579307541161325, "grad_norm": 5.5863800048828125, "learning_rate": 7.965226914915464e-05, "loss": 0.5128, "step": 15614 }, { "epoch": 1.0579985093841047, "grad_norm": 7.636445045471191, "learning_rate": 7.965090013005682e-05, "loss": 0.6415, "step": 15615 }, { "epoch": 1.0580662646520766, "grad_norm": 8.801673889160156, "learning_rate": 7.9649531110959e-05, "loss": 0.6823, "step": 15616 }, { "epoch": 1.0581340199200489, "grad_norm": 5.3621721267700195, "learning_rate": 7.96481620918612e-05, "loss": 0.8068, "step": 15617 }, { "epoch": 1.0582017751880208, "grad_norm": 6.023779392242432, "learning_rate": 7.964679307276337e-05, "loss": 0.8556, "step": 15618 }, { "epoch": 1.058269530455993, "grad_norm": 5.901587963104248, "learning_rate": 7.964542405366556e-05, "loss": 0.6895, "step": 15619 }, { "epoch": 1.058337285723965, "grad_norm": 5.533476829528809, "learning_rate": 7.964405503456774e-05, "loss": 0.6305, "step": 15620 }, { "epoch": 1.0584050409919372, "grad_norm": 4.859772682189941, "learning_rate": 7.964268601546993e-05, "loss": 0.4848, "step": 15621 }, { "epoch": 1.0584727962599092, "grad_norm": 4.763238430023193, "learning_rate": 7.964131699637211e-05, "loss": 0.6645, "step": 15622 }, { "epoch": 1.0585405515278814, "grad_norm": 5.757406711578369, "learning_rate": 7.963994797727429e-05, "loss": 0.5606, "step": 15623 }, { "epoch": 1.0586083067958534, "grad_norm": 5.710381984710693, "learning_rate": 7.963857895817647e-05, "loss": 0.6795, "step": 15624 }, { "epoch": 1.0586760620638254, "grad_norm": 6.074893951416016, "learning_rate": 7.963720993907865e-05, "loss": 0.7556, "step": 15625 }, { "epoch": 1.0587438173317976, "grad_norm": 4.727344512939453, "learning_rate": 7.963584091998084e-05, "loss": 0.4786, "step": 15626 }, { "epoch": 1.0588115725997695, "grad_norm": 5.696430683135986, "learning_rate": 7.963447190088302e-05, "loss": 0.8202, "step": 15627 }, { "epoch": 1.0588793278677417, "grad_norm": 6.602227210998535, "learning_rate": 7.96331028817852e-05, "loss": 0.5754, "step": 15628 }, { "epoch": 1.0589470831357137, "grad_norm": 5.352349758148193, "learning_rate": 7.963173386268739e-05, "loss": 0.7036, "step": 15629 }, { "epoch": 1.059014838403686, "grad_norm": 4.953490734100342, "learning_rate": 7.963036484358957e-05, "loss": 0.5278, "step": 15630 }, { "epoch": 1.059082593671658, "grad_norm": 5.749557971954346, "learning_rate": 7.962899582449176e-05, "loss": 0.7663, "step": 15631 }, { "epoch": 1.0591503489396301, "grad_norm": 5.01806640625, "learning_rate": 7.962762680539394e-05, "loss": 0.699, "step": 15632 }, { "epoch": 1.059218104207602, "grad_norm": 5.559873104095459, "learning_rate": 7.962625778629612e-05, "loss": 0.8066, "step": 15633 }, { "epoch": 1.0592858594755743, "grad_norm": 7.373801231384277, "learning_rate": 7.96248887671983e-05, "loss": 0.8825, "step": 15634 }, { "epoch": 1.0593536147435463, "grad_norm": 6.330979824066162, "learning_rate": 7.96235197481005e-05, "loss": 0.8277, "step": 15635 }, { "epoch": 1.0594213700115185, "grad_norm": 4.986181735992432, "learning_rate": 7.962215072900268e-05, "loss": 0.7141, "step": 15636 }, { "epoch": 1.0594891252794905, "grad_norm": 6.320187568664551, "learning_rate": 7.962078170990486e-05, "loss": 0.7843, "step": 15637 }, { "epoch": 1.0595568805474627, "grad_norm": 4.531287670135498, "learning_rate": 7.961941269080704e-05, "loss": 0.789, "step": 15638 }, { "epoch": 1.0596246358154346, "grad_norm": 6.472817897796631, "learning_rate": 7.961804367170922e-05, "loss": 0.7853, "step": 15639 }, { "epoch": 1.0596923910834066, "grad_norm": 6.977755546569824, "learning_rate": 7.961667465261141e-05, "loss": 0.8924, "step": 15640 }, { "epoch": 1.0597601463513788, "grad_norm": 4.986513137817383, "learning_rate": 7.961530563351359e-05, "loss": 0.6245, "step": 15641 }, { "epoch": 1.0598279016193508, "grad_norm": 6.008297443389893, "learning_rate": 7.961393661441577e-05, "loss": 0.6167, "step": 15642 }, { "epoch": 1.059895656887323, "grad_norm": 6.505741596221924, "learning_rate": 7.961256759531795e-05, "loss": 0.5389, "step": 15643 }, { "epoch": 1.059963412155295, "grad_norm": 7.218502521514893, "learning_rate": 7.961119857622014e-05, "loss": 0.7364, "step": 15644 }, { "epoch": 1.0600311674232672, "grad_norm": 6.085995197296143, "learning_rate": 7.960982955712233e-05, "loss": 0.9236, "step": 15645 }, { "epoch": 1.0600989226912392, "grad_norm": 5.947787284851074, "learning_rate": 7.96084605380245e-05, "loss": 0.5488, "step": 15646 }, { "epoch": 1.0601666779592114, "grad_norm": 6.727092742919922, "learning_rate": 7.960709151892669e-05, "loss": 0.9123, "step": 15647 }, { "epoch": 1.0602344332271834, "grad_norm": 6.658313751220703, "learning_rate": 7.960572249982887e-05, "loss": 0.8878, "step": 15648 }, { "epoch": 1.0603021884951556, "grad_norm": 7.2491984367370605, "learning_rate": 7.960435348073106e-05, "loss": 0.6701, "step": 15649 }, { "epoch": 1.0603699437631275, "grad_norm": 8.330307960510254, "learning_rate": 7.960298446163324e-05, "loss": 0.8083, "step": 15650 }, { "epoch": 1.0604376990310997, "grad_norm": 6.639986038208008, "learning_rate": 7.960161544253542e-05, "loss": 0.6546, "step": 15651 }, { "epoch": 1.0605054542990717, "grad_norm": 5.291508674621582, "learning_rate": 7.96002464234376e-05, "loss": 0.4886, "step": 15652 }, { "epoch": 1.060573209567044, "grad_norm": 6.049704551696777, "learning_rate": 7.95988774043398e-05, "loss": 0.8144, "step": 15653 }, { "epoch": 1.060640964835016, "grad_norm": 4.995707988739014, "learning_rate": 7.959750838524198e-05, "loss": 0.7146, "step": 15654 }, { "epoch": 1.0607087201029881, "grad_norm": 5.3545451164245605, "learning_rate": 7.959613936614416e-05, "loss": 0.6091, "step": 15655 }, { "epoch": 1.06077647537096, "grad_norm": 5.604588508605957, "learning_rate": 7.959477034704635e-05, "loss": 0.7015, "step": 15656 }, { "epoch": 1.060844230638932, "grad_norm": 4.693150997161865, "learning_rate": 7.959340132794853e-05, "loss": 0.6435, "step": 15657 }, { "epoch": 1.0609119859069043, "grad_norm": 5.757687568664551, "learning_rate": 7.959203230885071e-05, "loss": 0.7502, "step": 15658 }, { "epoch": 1.0609797411748763, "grad_norm": 5.2874064445495605, "learning_rate": 7.95906632897529e-05, "loss": 0.5808, "step": 15659 }, { "epoch": 1.0610474964428485, "grad_norm": 5.154580116271973, "learning_rate": 7.958929427065508e-05, "loss": 0.6679, "step": 15660 }, { "epoch": 1.0611152517108204, "grad_norm": 4.7557830810546875, "learning_rate": 7.958792525155726e-05, "loss": 0.452, "step": 15661 }, { "epoch": 1.0611830069787926, "grad_norm": 4.7612833976745605, "learning_rate": 7.958655623245945e-05, "loss": 0.5888, "step": 15662 }, { "epoch": 1.0612507622467646, "grad_norm": 6.610939025878906, "learning_rate": 7.958518721336164e-05, "loss": 0.8694, "step": 15663 }, { "epoch": 1.0613185175147368, "grad_norm": 4.907252788543701, "learning_rate": 7.958381819426382e-05, "loss": 0.5352, "step": 15664 }, { "epoch": 1.0613862727827088, "grad_norm": 5.652861595153809, "learning_rate": 7.9582449175166e-05, "loss": 0.7831, "step": 15665 }, { "epoch": 1.061454028050681, "grad_norm": 6.604598045349121, "learning_rate": 7.958108015606818e-05, "loss": 0.5988, "step": 15666 }, { "epoch": 1.061521783318653, "grad_norm": 6.462950706481934, "learning_rate": 7.957971113697037e-05, "loss": 0.9984, "step": 15667 }, { "epoch": 1.0615895385866252, "grad_norm": 5.563201427459717, "learning_rate": 7.957834211787255e-05, "loss": 0.583, "step": 15668 }, { "epoch": 1.0616572938545972, "grad_norm": 8.087369918823242, "learning_rate": 7.957697309877473e-05, "loss": 0.7112, "step": 15669 }, { "epoch": 1.0617250491225694, "grad_norm": 6.019291877746582, "learning_rate": 7.957560407967691e-05, "loss": 0.4987, "step": 15670 }, { "epoch": 1.0617928043905414, "grad_norm": 5.730112075805664, "learning_rate": 7.95742350605791e-05, "loss": 0.7165, "step": 15671 }, { "epoch": 1.0618605596585136, "grad_norm": 4.978113651275635, "learning_rate": 7.957286604148129e-05, "loss": 0.6191, "step": 15672 }, { "epoch": 1.0619283149264855, "grad_norm": 14.537919998168945, "learning_rate": 7.957149702238347e-05, "loss": 0.6737, "step": 15673 }, { "epoch": 1.0619960701944575, "grad_norm": 6.772073745727539, "learning_rate": 7.957012800328565e-05, "loss": 0.6738, "step": 15674 }, { "epoch": 1.0620638254624297, "grad_norm": 7.139239311218262, "learning_rate": 7.956875898418783e-05, "loss": 0.7071, "step": 15675 }, { "epoch": 1.0621315807304017, "grad_norm": 3.958707571029663, "learning_rate": 7.956738996509002e-05, "loss": 0.5774, "step": 15676 }, { "epoch": 1.062199335998374, "grad_norm": 7.803884029388428, "learning_rate": 7.95660209459922e-05, "loss": 1.1357, "step": 15677 }, { "epoch": 1.0622670912663459, "grad_norm": 4.009269714355469, "learning_rate": 7.956465192689438e-05, "loss": 0.5232, "step": 15678 }, { "epoch": 1.062334846534318, "grad_norm": 5.269588947296143, "learning_rate": 7.956328290779657e-05, "loss": 0.6048, "step": 15679 }, { "epoch": 1.06240260180229, "grad_norm": 6.135133266448975, "learning_rate": 7.956191388869875e-05, "loss": 0.8373, "step": 15680 }, { "epoch": 1.0624703570702623, "grad_norm": 7.336065769195557, "learning_rate": 7.956054486960094e-05, "loss": 0.7269, "step": 15681 }, { "epoch": 1.0625381123382343, "grad_norm": 5.202469825744629, "learning_rate": 7.955917585050312e-05, "loss": 0.5768, "step": 15682 }, { "epoch": 1.0626058676062065, "grad_norm": 9.39901351928711, "learning_rate": 7.95578068314053e-05, "loss": 0.7882, "step": 15683 }, { "epoch": 1.0626736228741784, "grad_norm": 6.687909126281738, "learning_rate": 7.955643781230748e-05, "loss": 0.7558, "step": 15684 }, { "epoch": 1.0627413781421506, "grad_norm": 6.786401271820068, "learning_rate": 7.955506879320966e-05, "loss": 0.8085, "step": 15685 }, { "epoch": 1.0628091334101226, "grad_norm": 5.360512733459473, "learning_rate": 7.955369977411185e-05, "loss": 0.7416, "step": 15686 }, { "epoch": 1.0628768886780948, "grad_norm": 6.797042369842529, "learning_rate": 7.955233075501403e-05, "loss": 0.6518, "step": 15687 }, { "epoch": 1.0629446439460668, "grad_norm": 9.375871658325195, "learning_rate": 7.955096173591622e-05, "loss": 0.6947, "step": 15688 }, { "epoch": 1.0630123992140388, "grad_norm": 4.795276641845703, "learning_rate": 7.95495927168184e-05, "loss": 0.5838, "step": 15689 }, { "epoch": 1.063080154482011, "grad_norm": 7.5510759353637695, "learning_rate": 7.954822369772059e-05, "loss": 0.6106, "step": 15690 }, { "epoch": 1.063147909749983, "grad_norm": 5.1748175621032715, "learning_rate": 7.954685467862277e-05, "loss": 0.7426, "step": 15691 }, { "epoch": 1.0632156650179552, "grad_norm": 4.72312593460083, "learning_rate": 7.954548565952495e-05, "loss": 0.4954, "step": 15692 }, { "epoch": 1.0632834202859271, "grad_norm": 6.420324325561523, "learning_rate": 7.954411664042713e-05, "loss": 0.7084, "step": 15693 }, { "epoch": 1.0633511755538994, "grad_norm": 5.019575119018555, "learning_rate": 7.954274762132931e-05, "loss": 0.6904, "step": 15694 }, { "epoch": 1.0634189308218713, "grad_norm": 7.187556743621826, "learning_rate": 7.95413786022315e-05, "loss": 0.752, "step": 15695 }, { "epoch": 1.0634866860898435, "grad_norm": 4.740534782409668, "learning_rate": 7.954000958313369e-05, "loss": 0.5472, "step": 15696 }, { "epoch": 1.0635544413578155, "grad_norm": 4.72992467880249, "learning_rate": 7.953864056403587e-05, "loss": 0.5106, "step": 15697 }, { "epoch": 1.0636221966257877, "grad_norm": 8.433707237243652, "learning_rate": 7.953727154493805e-05, "loss": 0.552, "step": 15698 }, { "epoch": 1.0636899518937597, "grad_norm": 7.640308856964111, "learning_rate": 7.953590252584024e-05, "loss": 0.5683, "step": 15699 }, { "epoch": 1.063757707161732, "grad_norm": 5.864986419677734, "learning_rate": 7.953453350674242e-05, "loss": 0.7258, "step": 15700 }, { "epoch": 1.0638254624297039, "grad_norm": 4.7822418212890625, "learning_rate": 7.95331644876446e-05, "loss": 0.7018, "step": 15701 }, { "epoch": 1.063893217697676, "grad_norm": 7.310060501098633, "learning_rate": 7.95317954685468e-05, "loss": 0.6671, "step": 15702 }, { "epoch": 1.063960972965648, "grad_norm": 5.417652606964111, "learning_rate": 7.953042644944897e-05, "loss": 0.7866, "step": 15703 }, { "epoch": 1.06402872823362, "grad_norm": 6.251247882843018, "learning_rate": 7.952905743035115e-05, "loss": 0.7288, "step": 15704 }, { "epoch": 1.0640964835015922, "grad_norm": 4.557955741882324, "learning_rate": 7.952768841125335e-05, "loss": 0.7022, "step": 15705 }, { "epoch": 1.0641642387695642, "grad_norm": 6.946822166442871, "learning_rate": 7.952631939215553e-05, "loss": 0.7494, "step": 15706 }, { "epoch": 1.0642319940375364, "grad_norm": 5.914007663726807, "learning_rate": 7.952495037305771e-05, "loss": 0.7723, "step": 15707 }, { "epoch": 1.0642997493055084, "grad_norm": 5.465998649597168, "learning_rate": 7.952358135395989e-05, "loss": 0.5798, "step": 15708 }, { "epoch": 1.0643675045734806, "grad_norm": 6.636282920837402, "learning_rate": 7.952221233486208e-05, "loss": 0.8187, "step": 15709 }, { "epoch": 1.0644352598414526, "grad_norm": 4.796360969543457, "learning_rate": 7.952084331576426e-05, "loss": 0.4452, "step": 15710 }, { "epoch": 1.0645030151094248, "grad_norm": 6.594966888427734, "learning_rate": 7.951947429666644e-05, "loss": 0.827, "step": 15711 }, { "epoch": 1.0645707703773968, "grad_norm": 4.327628135681152, "learning_rate": 7.951810527756862e-05, "loss": 0.5833, "step": 15712 }, { "epoch": 1.064638525645369, "grad_norm": 4.537369251251221, "learning_rate": 7.951673625847082e-05, "loss": 0.6398, "step": 15713 }, { "epoch": 1.064706280913341, "grad_norm": 5.633305549621582, "learning_rate": 7.9515367239373e-05, "loss": 0.6274, "step": 15714 }, { "epoch": 1.0647740361813132, "grad_norm": 5.848304271697998, "learning_rate": 7.951399822027518e-05, "loss": 0.6827, "step": 15715 }, { "epoch": 1.0648417914492851, "grad_norm": 5.550229072570801, "learning_rate": 7.951262920117736e-05, "loss": 0.5926, "step": 15716 }, { "epoch": 1.0649095467172573, "grad_norm": 5.79759407043457, "learning_rate": 7.951126018207954e-05, "loss": 0.7239, "step": 15717 }, { "epoch": 1.0649773019852293, "grad_norm": 5.386721134185791, "learning_rate": 7.950989116298173e-05, "loss": 0.6221, "step": 15718 }, { "epoch": 1.0650450572532015, "grad_norm": 6.526064395904541, "learning_rate": 7.950852214388391e-05, "loss": 0.6978, "step": 15719 }, { "epoch": 1.0651128125211735, "grad_norm": 5.103979587554932, "learning_rate": 7.95071531247861e-05, "loss": 0.5707, "step": 15720 }, { "epoch": 1.0651805677891457, "grad_norm": 7.769407749176025, "learning_rate": 7.950578410568827e-05, "loss": 0.7698, "step": 15721 }, { "epoch": 1.0652483230571177, "grad_norm": 4.609751224517822, "learning_rate": 7.950441508659047e-05, "loss": 0.8119, "step": 15722 }, { "epoch": 1.0653160783250897, "grad_norm": 4.900935649871826, "learning_rate": 7.950304606749265e-05, "loss": 0.6217, "step": 15723 }, { "epoch": 1.0653838335930619, "grad_norm": 6.532763957977295, "learning_rate": 7.950167704839483e-05, "loss": 0.7083, "step": 15724 }, { "epoch": 1.0654515888610339, "grad_norm": 7.212814807891846, "learning_rate": 7.950030802929701e-05, "loss": 0.6279, "step": 15725 }, { "epoch": 1.065519344129006, "grad_norm": 6.178891181945801, "learning_rate": 7.949893901019919e-05, "loss": 0.7573, "step": 15726 }, { "epoch": 1.065587099396978, "grad_norm": 4.8897528648376465, "learning_rate": 7.949756999110138e-05, "loss": 0.5121, "step": 15727 }, { "epoch": 1.0656548546649502, "grad_norm": 7.742895603179932, "learning_rate": 7.949620097200356e-05, "loss": 0.7652, "step": 15728 }, { "epoch": 1.0657226099329222, "grad_norm": 6.08188533782959, "learning_rate": 7.949483195290574e-05, "loss": 0.7593, "step": 15729 }, { "epoch": 1.0657903652008944, "grad_norm": 6.451333999633789, "learning_rate": 7.949346293380793e-05, "loss": 0.5019, "step": 15730 }, { "epoch": 1.0658581204688664, "grad_norm": 5.50657844543457, "learning_rate": 7.94920939147101e-05, "loss": 0.8513, "step": 15731 }, { "epoch": 1.0659258757368386, "grad_norm": 5.822981834411621, "learning_rate": 7.94907248956123e-05, "loss": 0.673, "step": 15732 }, { "epoch": 1.0659936310048106, "grad_norm": 6.3767991065979, "learning_rate": 7.948935587651448e-05, "loss": 0.7667, "step": 15733 }, { "epoch": 1.0660613862727828, "grad_norm": 6.416679382324219, "learning_rate": 7.948798685741666e-05, "loss": 0.7679, "step": 15734 }, { "epoch": 1.0661291415407548, "grad_norm": 4.7926344871521, "learning_rate": 7.948661783831884e-05, "loss": 0.5938, "step": 15735 }, { "epoch": 1.066196896808727, "grad_norm": 5.087335586547852, "learning_rate": 7.948524881922103e-05, "loss": 0.5486, "step": 15736 }, { "epoch": 1.066264652076699, "grad_norm": 6.492136478424072, "learning_rate": 7.948387980012321e-05, "loss": 0.8351, "step": 15737 }, { "epoch": 1.066332407344671, "grad_norm": 6.300568103790283, "learning_rate": 7.94825107810254e-05, "loss": 0.7037, "step": 15738 }, { "epoch": 1.0664001626126431, "grad_norm": 5.677840709686279, "learning_rate": 7.948114176192758e-05, "loss": 0.6712, "step": 15739 }, { "epoch": 1.0664679178806151, "grad_norm": 4.923882961273193, "learning_rate": 7.947977274282976e-05, "loss": 0.7166, "step": 15740 }, { "epoch": 1.0665356731485873, "grad_norm": 8.807173728942871, "learning_rate": 7.947840372373195e-05, "loss": 0.5985, "step": 15741 }, { "epoch": 1.0666034284165593, "grad_norm": 5.597122669219971, "learning_rate": 7.947703470463413e-05, "loss": 0.5973, "step": 15742 }, { "epoch": 1.0666711836845315, "grad_norm": 6.10443639755249, "learning_rate": 7.947566568553631e-05, "loss": 0.7233, "step": 15743 }, { "epoch": 1.0667389389525035, "grad_norm": 5.447210788726807, "learning_rate": 7.947429666643849e-05, "loss": 0.7122, "step": 15744 }, { "epoch": 1.0668066942204757, "grad_norm": 7.586278438568115, "learning_rate": 7.947292764734068e-05, "loss": 0.8076, "step": 15745 }, { "epoch": 1.0668744494884477, "grad_norm": 7.775143623352051, "learning_rate": 7.947155862824286e-05, "loss": 0.7714, "step": 15746 }, { "epoch": 1.0669422047564199, "grad_norm": 6.429764270782471, "learning_rate": 7.947018960914505e-05, "loss": 0.5349, "step": 15747 }, { "epoch": 1.0670099600243919, "grad_norm": 5.419410705566406, "learning_rate": 7.946882059004724e-05, "loss": 0.6813, "step": 15748 }, { "epoch": 1.067077715292364, "grad_norm": 8.733865737915039, "learning_rate": 7.946745157094942e-05, "loss": 0.7048, "step": 15749 }, { "epoch": 1.067145470560336, "grad_norm": 5.640448093414307, "learning_rate": 7.94660825518516e-05, "loss": 0.6213, "step": 15750 }, { "epoch": 1.0672132258283082, "grad_norm": 6.9355692863464355, "learning_rate": 7.94647135327538e-05, "loss": 0.7853, "step": 15751 }, { "epoch": 1.0672809810962802, "grad_norm": 6.067697048187256, "learning_rate": 7.946334451365597e-05, "loss": 0.7184, "step": 15752 }, { "epoch": 1.0673487363642522, "grad_norm": 5.817471981048584, "learning_rate": 7.946197549455815e-05, "loss": 0.7555, "step": 15753 }, { "epoch": 1.0674164916322244, "grad_norm": 5.668905735015869, "learning_rate": 7.946060647546035e-05, "loss": 0.6692, "step": 15754 }, { "epoch": 1.0674842469001964, "grad_norm": 8.108335494995117, "learning_rate": 7.945923745636253e-05, "loss": 0.9925, "step": 15755 }, { "epoch": 1.0675520021681686, "grad_norm": 9.043440818786621, "learning_rate": 7.945786843726471e-05, "loss": 0.7492, "step": 15756 }, { "epoch": 1.0676197574361406, "grad_norm": 5.559685230255127, "learning_rate": 7.945649941816689e-05, "loss": 0.6123, "step": 15757 }, { "epoch": 1.0676875127041128, "grad_norm": 6.704611778259277, "learning_rate": 7.945513039906907e-05, "loss": 0.8296, "step": 15758 }, { "epoch": 1.0677552679720848, "grad_norm": 4.621387004852295, "learning_rate": 7.945376137997126e-05, "loss": 0.5638, "step": 15759 }, { "epoch": 1.067823023240057, "grad_norm": 7.376697063446045, "learning_rate": 7.945239236087344e-05, "loss": 0.7056, "step": 15760 }, { "epoch": 1.067890778508029, "grad_norm": 6.9395527839660645, "learning_rate": 7.945102334177562e-05, "loss": 0.9354, "step": 15761 }, { "epoch": 1.0679585337760011, "grad_norm": 6.474776268005371, "learning_rate": 7.94496543226778e-05, "loss": 0.569, "step": 15762 }, { "epoch": 1.0680262890439731, "grad_norm": 5.319557189941406, "learning_rate": 7.944828530357998e-05, "loss": 0.9097, "step": 15763 }, { "epoch": 1.0680940443119453, "grad_norm": 6.072210788726807, "learning_rate": 7.944691628448218e-05, "loss": 0.8421, "step": 15764 }, { "epoch": 1.0681617995799173, "grad_norm": 5.9819865226745605, "learning_rate": 7.944554726538436e-05, "loss": 0.8024, "step": 15765 }, { "epoch": 1.0682295548478895, "grad_norm": 4.52716588973999, "learning_rate": 7.944417824628654e-05, "loss": 0.6446, "step": 15766 }, { "epoch": 1.0682973101158615, "grad_norm": 5.481772422790527, "learning_rate": 7.944280922718872e-05, "loss": 0.5395, "step": 15767 }, { "epoch": 1.0683650653838337, "grad_norm": 5.463979244232178, "learning_rate": 7.944144020809091e-05, "loss": 0.5777, "step": 15768 }, { "epoch": 1.0684328206518057, "grad_norm": 6.096555233001709, "learning_rate": 7.94400711889931e-05, "loss": 0.6996, "step": 15769 }, { "epoch": 1.0685005759197779, "grad_norm": 4.486513137817383, "learning_rate": 7.943870216989527e-05, "loss": 0.6826, "step": 15770 }, { "epoch": 1.0685683311877499, "grad_norm": 5.200229167938232, "learning_rate": 7.943733315079745e-05, "loss": 0.6869, "step": 15771 }, { "epoch": 1.0686360864557218, "grad_norm": 7.219493389129639, "learning_rate": 7.943596413169963e-05, "loss": 0.6478, "step": 15772 }, { "epoch": 1.068703841723694, "grad_norm": 5.901253700256348, "learning_rate": 7.943459511260183e-05, "loss": 0.7505, "step": 15773 }, { "epoch": 1.068771596991666, "grad_norm": 5.0671772956848145, "learning_rate": 7.943322609350401e-05, "loss": 0.7321, "step": 15774 }, { "epoch": 1.0688393522596382, "grad_norm": 6.431093215942383, "learning_rate": 7.943185707440619e-05, "loss": 0.6674, "step": 15775 }, { "epoch": 1.0689071075276102, "grad_norm": 5.902375221252441, "learning_rate": 7.943048805530837e-05, "loss": 0.7254, "step": 15776 }, { "epoch": 1.0689748627955824, "grad_norm": 4.459395408630371, "learning_rate": 7.942911903621056e-05, "loss": 0.6371, "step": 15777 }, { "epoch": 1.0690426180635544, "grad_norm": 5.590951442718506, "learning_rate": 7.942775001711274e-05, "loss": 0.7365, "step": 15778 }, { "epoch": 1.0691103733315266, "grad_norm": 9.108878135681152, "learning_rate": 7.942638099801492e-05, "loss": 0.8241, "step": 15779 }, { "epoch": 1.0691781285994986, "grad_norm": 5.599368572235107, "learning_rate": 7.94250119789171e-05, "loss": 0.8881, "step": 15780 }, { "epoch": 1.0692458838674708, "grad_norm": 5.293349266052246, "learning_rate": 7.942364295981929e-05, "loss": 0.5408, "step": 15781 }, { "epoch": 1.0693136391354428, "grad_norm": 6.881068706512451, "learning_rate": 7.942227394072148e-05, "loss": 0.8773, "step": 15782 }, { "epoch": 1.069381394403415, "grad_norm": 7.475467205047607, "learning_rate": 7.942090492162366e-05, "loss": 0.6288, "step": 15783 }, { "epoch": 1.069449149671387, "grad_norm": 6.4415669441223145, "learning_rate": 7.941953590252584e-05, "loss": 0.7308, "step": 15784 }, { "epoch": 1.0695169049393591, "grad_norm": 4.710741996765137, "learning_rate": 7.941816688342802e-05, "loss": 0.5095, "step": 15785 }, { "epoch": 1.0695846602073311, "grad_norm": 6.763927459716797, "learning_rate": 7.94167978643302e-05, "loss": 0.7424, "step": 15786 }, { "epoch": 1.069652415475303, "grad_norm": 6.286536693572998, "learning_rate": 7.94154288452324e-05, "loss": 0.5971, "step": 15787 }, { "epoch": 1.0697201707432753, "grad_norm": 6.880483150482178, "learning_rate": 7.941405982613457e-05, "loss": 0.8842, "step": 15788 }, { "epoch": 1.0697879260112473, "grad_norm": 5.467273712158203, "learning_rate": 7.941269080703675e-05, "loss": 0.7985, "step": 15789 }, { "epoch": 1.0698556812792195, "grad_norm": 5.367607593536377, "learning_rate": 7.941132178793894e-05, "loss": 0.8427, "step": 15790 }, { "epoch": 1.0699234365471915, "grad_norm": 5.593782424926758, "learning_rate": 7.940995276884113e-05, "loss": 0.5909, "step": 15791 }, { "epoch": 1.0699911918151637, "grad_norm": 4.525633811950684, "learning_rate": 7.940858374974331e-05, "loss": 0.5472, "step": 15792 }, { "epoch": 1.0700589470831356, "grad_norm": 5.167564868927002, "learning_rate": 7.940721473064549e-05, "loss": 0.4875, "step": 15793 }, { "epoch": 1.0701267023511079, "grad_norm": 4.89332914352417, "learning_rate": 7.940584571154768e-05, "loss": 0.731, "step": 15794 }, { "epoch": 1.0701944576190798, "grad_norm": 6.20195198059082, "learning_rate": 7.940447669244986e-05, "loss": 0.6374, "step": 15795 }, { "epoch": 1.070262212887052, "grad_norm": 4.612509727478027, "learning_rate": 7.940310767335204e-05, "loss": 0.5493, "step": 15796 }, { "epoch": 1.070329968155024, "grad_norm": 7.004962921142578, "learning_rate": 7.940173865425424e-05, "loss": 0.5409, "step": 15797 }, { "epoch": 1.0703977234229962, "grad_norm": 5.540370941162109, "learning_rate": 7.940036963515642e-05, "loss": 0.6747, "step": 15798 }, { "epoch": 1.0704654786909682, "grad_norm": 8.158012390136719, "learning_rate": 7.93990006160586e-05, "loss": 0.6241, "step": 15799 }, { "epoch": 1.0705332339589404, "grad_norm": 8.461899757385254, "learning_rate": 7.939763159696079e-05, "loss": 0.5866, "step": 15800 }, { "epoch": 1.0706009892269124, "grad_norm": 6.685081958770752, "learning_rate": 7.939626257786297e-05, "loss": 0.7346, "step": 15801 }, { "epoch": 1.0706687444948844, "grad_norm": 6.544353485107422, "learning_rate": 7.939489355876515e-05, "loss": 0.6462, "step": 15802 }, { "epoch": 1.0707364997628566, "grad_norm": 5.48783016204834, "learning_rate": 7.939352453966733e-05, "loss": 0.6524, "step": 15803 }, { "epoch": 1.0708042550308285, "grad_norm": 6.841857433319092, "learning_rate": 7.939215552056951e-05, "loss": 0.6979, "step": 15804 }, { "epoch": 1.0708720102988007, "grad_norm": 8.2971773147583, "learning_rate": 7.939078650147171e-05, "loss": 0.9404, "step": 15805 }, { "epoch": 1.0709397655667727, "grad_norm": 6.574617385864258, "learning_rate": 7.938941748237389e-05, "loss": 0.699, "step": 15806 }, { "epoch": 1.071007520834745, "grad_norm": 4.680912017822266, "learning_rate": 7.938804846327607e-05, "loss": 0.6734, "step": 15807 }, { "epoch": 1.071075276102717, "grad_norm": 6.463378429412842, "learning_rate": 7.938667944417825e-05, "loss": 0.596, "step": 15808 }, { "epoch": 1.0711430313706891, "grad_norm": 5.354882717132568, "learning_rate": 7.938531042508044e-05, "loss": 0.6923, "step": 15809 }, { "epoch": 1.071210786638661, "grad_norm": 6.985996246337891, "learning_rate": 7.938394140598262e-05, "loss": 0.5105, "step": 15810 }, { "epoch": 1.0712785419066333, "grad_norm": 7.460660457611084, "learning_rate": 7.93825723868848e-05, "loss": 0.8449, "step": 15811 }, { "epoch": 1.0713462971746053, "grad_norm": 7.631282806396484, "learning_rate": 7.938120336778698e-05, "loss": 0.7648, "step": 15812 }, { "epoch": 1.0714140524425775, "grad_norm": 6.466649532318115, "learning_rate": 7.937983434868916e-05, "loss": 0.5677, "step": 15813 }, { "epoch": 1.0714818077105495, "grad_norm": 4.905895709991455, "learning_rate": 7.937846532959136e-05, "loss": 0.7089, "step": 15814 }, { "epoch": 1.0715495629785217, "grad_norm": 5.96923828125, "learning_rate": 7.937709631049354e-05, "loss": 0.8982, "step": 15815 }, { "epoch": 1.0716173182464936, "grad_norm": 5.1013383865356445, "learning_rate": 7.937572729139572e-05, "loss": 0.6006, "step": 15816 }, { "epoch": 1.0716850735144658, "grad_norm": 5.436969757080078, "learning_rate": 7.93743582722979e-05, "loss": 0.6946, "step": 15817 }, { "epoch": 1.0717528287824378, "grad_norm": 4.626980304718018, "learning_rate": 7.937298925320008e-05, "loss": 0.5443, "step": 15818 }, { "epoch": 1.07182058405041, "grad_norm": 4.790196418762207, "learning_rate": 7.937162023410227e-05, "loss": 0.6067, "step": 15819 }, { "epoch": 1.071888339318382, "grad_norm": 7.302325248718262, "learning_rate": 7.937025121500445e-05, "loss": 0.7859, "step": 15820 }, { "epoch": 1.071956094586354, "grad_norm": 9.226067543029785, "learning_rate": 7.936888219590663e-05, "loss": 0.6142, "step": 15821 }, { "epoch": 1.0720238498543262, "grad_norm": 7.070141792297363, "learning_rate": 7.936751317680881e-05, "loss": 0.9445, "step": 15822 }, { "epoch": 1.0720916051222982, "grad_norm": 5.061405658721924, "learning_rate": 7.936614415771101e-05, "loss": 0.5768, "step": 15823 }, { "epoch": 1.0721593603902704, "grad_norm": 4.736205101013184, "learning_rate": 7.936477513861319e-05, "loss": 0.5623, "step": 15824 }, { "epoch": 1.0722271156582424, "grad_norm": 5.544649124145508, "learning_rate": 7.936340611951537e-05, "loss": 0.6344, "step": 15825 }, { "epoch": 1.0722948709262146, "grad_norm": 5.628857135772705, "learning_rate": 7.936203710041755e-05, "loss": 0.7983, "step": 15826 }, { "epoch": 1.0723626261941865, "grad_norm": 7.328362464904785, "learning_rate": 7.936066808131973e-05, "loss": 0.6597, "step": 15827 }, { "epoch": 1.0724303814621587, "grad_norm": 4.993453025817871, "learning_rate": 7.935929906222192e-05, "loss": 0.6878, "step": 15828 }, { "epoch": 1.0724981367301307, "grad_norm": 4.596197605133057, "learning_rate": 7.93579300431241e-05, "loss": 0.7162, "step": 15829 }, { "epoch": 1.072565891998103, "grad_norm": 7.738362789154053, "learning_rate": 7.935656102402628e-05, "loss": 0.644, "step": 15830 }, { "epoch": 1.072633647266075, "grad_norm": 6.615604877471924, "learning_rate": 7.935519200492846e-05, "loss": 0.7576, "step": 15831 }, { "epoch": 1.072701402534047, "grad_norm": 7.723992347717285, "learning_rate": 7.935382298583066e-05, "loss": 0.8502, "step": 15832 }, { "epoch": 1.072769157802019, "grad_norm": 5.472813606262207, "learning_rate": 7.935245396673284e-05, "loss": 0.6046, "step": 15833 }, { "epoch": 1.0728369130699913, "grad_norm": 5.456359386444092, "learning_rate": 7.935108494763502e-05, "loss": 0.9344, "step": 15834 }, { "epoch": 1.0729046683379633, "grad_norm": 5.542731285095215, "learning_rate": 7.93497159285372e-05, "loss": 0.7518, "step": 15835 }, { "epoch": 1.0729724236059353, "grad_norm": 5.318366050720215, "learning_rate": 7.934834690943938e-05, "loss": 0.6448, "step": 15836 }, { "epoch": 1.0730401788739075, "grad_norm": 5.1860432624816895, "learning_rate": 7.934697789034157e-05, "loss": 0.8315, "step": 15837 }, { "epoch": 1.0731079341418794, "grad_norm": 6.298211097717285, "learning_rate": 7.934560887124375e-05, "loss": 0.8517, "step": 15838 }, { "epoch": 1.0731756894098516, "grad_norm": 5.515106201171875, "learning_rate": 7.934423985214593e-05, "loss": 0.462, "step": 15839 }, { "epoch": 1.0732434446778236, "grad_norm": 5.4662394523620605, "learning_rate": 7.934287083304813e-05, "loss": 0.7735, "step": 15840 }, { "epoch": 1.0733111999457958, "grad_norm": 6.610855579376221, "learning_rate": 7.934150181395031e-05, "loss": 0.9312, "step": 15841 }, { "epoch": 1.0733789552137678, "grad_norm": 7.660674571990967, "learning_rate": 7.934013279485249e-05, "loss": 0.5592, "step": 15842 }, { "epoch": 1.07344671048174, "grad_norm": 5.828925609588623, "learning_rate": 7.933876377575468e-05, "loss": 0.7, "step": 15843 }, { "epoch": 1.073514465749712, "grad_norm": 6.494380474090576, "learning_rate": 7.933739475665686e-05, "loss": 0.6033, "step": 15844 }, { "epoch": 1.0735822210176842, "grad_norm": 7.698443412780762, "learning_rate": 7.933602573755904e-05, "loss": 0.8435, "step": 15845 }, { "epoch": 1.0736499762856562, "grad_norm": 5.353089809417725, "learning_rate": 7.933465671846124e-05, "loss": 0.9311, "step": 15846 }, { "epoch": 1.0737177315536284, "grad_norm": 6.445722579956055, "learning_rate": 7.933328769936342e-05, "loss": 0.6781, "step": 15847 }, { "epoch": 1.0737854868216004, "grad_norm": 4.490242004394531, "learning_rate": 7.93319186802656e-05, "loss": 0.8394, "step": 15848 }, { "epoch": 1.0738532420895726, "grad_norm": 4.664844989776611, "learning_rate": 7.933054966116778e-05, "loss": 0.7149, "step": 15849 }, { "epoch": 1.0739209973575445, "grad_norm": 5.997525691986084, "learning_rate": 7.932918064206996e-05, "loss": 0.6835, "step": 15850 }, { "epoch": 1.0739887526255165, "grad_norm": 8.030609130859375, "learning_rate": 7.932781162297215e-05, "loss": 0.8024, "step": 15851 }, { "epoch": 1.0740565078934887, "grad_norm": 7.785398960113525, "learning_rate": 7.932644260387433e-05, "loss": 0.7362, "step": 15852 }, { "epoch": 1.0741242631614607, "grad_norm": 4.920596122741699, "learning_rate": 7.932507358477651e-05, "loss": 0.7132, "step": 15853 }, { "epoch": 1.074192018429433, "grad_norm": 4.117366313934326, "learning_rate": 7.93237045656787e-05, "loss": 0.5307, "step": 15854 }, { "epoch": 1.0742597736974049, "grad_norm": 5.135700702667236, "learning_rate": 7.932233554658089e-05, "loss": 0.6229, "step": 15855 }, { "epoch": 1.074327528965377, "grad_norm": 6.207793712615967, "learning_rate": 7.932096652748307e-05, "loss": 0.5498, "step": 15856 }, { "epoch": 1.074395284233349, "grad_norm": 6.431981563568115, "learning_rate": 7.931959750838525e-05, "loss": 0.6798, "step": 15857 }, { "epoch": 1.0744630395013213, "grad_norm": 5.320846080780029, "learning_rate": 7.931822848928743e-05, "loss": 0.6198, "step": 15858 }, { "epoch": 1.0745307947692933, "grad_norm": 5.788670539855957, "learning_rate": 7.931685947018961e-05, "loss": 0.8206, "step": 15859 }, { "epoch": 1.0745985500372655, "grad_norm": 7.664066314697266, "learning_rate": 7.93154904510918e-05, "loss": 0.6584, "step": 15860 }, { "epoch": 1.0746663053052374, "grad_norm": 6.991944313049316, "learning_rate": 7.931412143199398e-05, "loss": 0.9677, "step": 15861 }, { "epoch": 1.0747340605732096, "grad_norm": 5.329096794128418, "learning_rate": 7.931275241289616e-05, "loss": 0.6164, "step": 15862 }, { "epoch": 1.0748018158411816, "grad_norm": 6.307762145996094, "learning_rate": 7.931138339379834e-05, "loss": 0.8304, "step": 15863 }, { "epoch": 1.0748695711091538, "grad_norm": 5.538243770599365, "learning_rate": 7.931001437470054e-05, "loss": 0.7314, "step": 15864 }, { "epoch": 1.0749373263771258, "grad_norm": 6.261715888977051, "learning_rate": 7.930864535560272e-05, "loss": 0.8585, "step": 15865 }, { "epoch": 1.075005081645098, "grad_norm": 5.783536434173584, "learning_rate": 7.93072763365049e-05, "loss": 0.6301, "step": 15866 }, { "epoch": 1.07507283691307, "grad_norm": 5.780196189880371, "learning_rate": 7.930590731740708e-05, "loss": 0.6306, "step": 15867 }, { "epoch": 1.0751405921810422, "grad_norm": 5.178442478179932, "learning_rate": 7.930453829830926e-05, "loss": 0.4359, "step": 15868 }, { "epoch": 1.0752083474490142, "grad_norm": 7.060993194580078, "learning_rate": 7.930316927921145e-05, "loss": 0.5727, "step": 15869 }, { "epoch": 1.0752761027169861, "grad_norm": 5.602401256561279, "learning_rate": 7.930180026011363e-05, "loss": 0.5486, "step": 15870 }, { "epoch": 1.0753438579849584, "grad_norm": 6.208281993865967, "learning_rate": 7.930043124101581e-05, "loss": 0.7401, "step": 15871 }, { "epoch": 1.0754116132529303, "grad_norm": 10.482538223266602, "learning_rate": 7.9299062221918e-05, "loss": 0.8773, "step": 15872 }, { "epoch": 1.0754793685209025, "grad_norm": 5.301812171936035, "learning_rate": 7.929769320282017e-05, "loss": 0.6965, "step": 15873 }, { "epoch": 1.0755471237888745, "grad_norm": 10.284536361694336, "learning_rate": 7.929632418372237e-05, "loss": 0.7072, "step": 15874 }, { "epoch": 1.0756148790568467, "grad_norm": 5.5219550132751465, "learning_rate": 7.929495516462455e-05, "loss": 0.6087, "step": 15875 }, { "epoch": 1.0756826343248187, "grad_norm": 5.441367149353027, "learning_rate": 7.929358614552673e-05, "loss": 0.7554, "step": 15876 }, { "epoch": 1.075750389592791, "grad_norm": 5.82017707824707, "learning_rate": 7.929221712642891e-05, "loss": 0.6087, "step": 15877 }, { "epoch": 1.0758181448607629, "grad_norm": 5.13921594619751, "learning_rate": 7.92908481073311e-05, "loss": 0.791, "step": 15878 }, { "epoch": 1.075885900128735, "grad_norm": 5.5391926765441895, "learning_rate": 7.928947908823328e-05, "loss": 0.6649, "step": 15879 }, { "epoch": 1.075953655396707, "grad_norm": 6.536075115203857, "learning_rate": 7.928811006913546e-05, "loss": 0.6299, "step": 15880 }, { "epoch": 1.0760214106646793, "grad_norm": 5.445075511932373, "learning_rate": 7.928674105003764e-05, "loss": 0.7024, "step": 15881 }, { "epoch": 1.0760891659326512, "grad_norm": 5.205681800842285, "learning_rate": 7.928537203093982e-05, "loss": 0.4982, "step": 15882 }, { "epoch": 1.0761569212006235, "grad_norm": 4.551524639129639, "learning_rate": 7.928400301184202e-05, "loss": 0.643, "step": 15883 }, { "epoch": 1.0762246764685954, "grad_norm": 5.783960819244385, "learning_rate": 7.92826339927442e-05, "loss": 0.6593, "step": 15884 }, { "epoch": 1.0762924317365674, "grad_norm": 5.964574337005615, "learning_rate": 7.928126497364638e-05, "loss": 0.6811, "step": 15885 }, { "epoch": 1.0763601870045396, "grad_norm": 7.782623767852783, "learning_rate": 7.927989595454856e-05, "loss": 0.7981, "step": 15886 }, { "epoch": 1.0764279422725116, "grad_norm": 6.336511135101318, "learning_rate": 7.927852693545075e-05, "loss": 0.9382, "step": 15887 }, { "epoch": 1.0764956975404838, "grad_norm": 4.654166221618652, "learning_rate": 7.927715791635293e-05, "loss": 0.5157, "step": 15888 }, { "epoch": 1.0765634528084558, "grad_norm": 4.949594974517822, "learning_rate": 7.927578889725511e-05, "loss": 0.662, "step": 15889 }, { "epoch": 1.076631208076428, "grad_norm": 6.451333999633789, "learning_rate": 7.927441987815731e-05, "loss": 0.7434, "step": 15890 }, { "epoch": 1.0766989633444, "grad_norm": 6.374203681945801, "learning_rate": 7.927305085905949e-05, "loss": 0.7248, "step": 15891 }, { "epoch": 1.0767667186123722, "grad_norm": 8.0396089553833, "learning_rate": 7.927168183996167e-05, "loss": 0.589, "step": 15892 }, { "epoch": 1.0768344738803441, "grad_norm": 9.502918243408203, "learning_rate": 7.927031282086386e-05, "loss": 0.9467, "step": 15893 }, { "epoch": 1.0769022291483163, "grad_norm": 5.921857833862305, "learning_rate": 7.926894380176604e-05, "loss": 0.8063, "step": 15894 }, { "epoch": 1.0769699844162883, "grad_norm": 6.634448051452637, "learning_rate": 7.926757478266822e-05, "loss": 0.8519, "step": 15895 }, { "epoch": 1.0770377396842605, "grad_norm": 4.550981044769287, "learning_rate": 7.92662057635704e-05, "loss": 0.6937, "step": 15896 }, { "epoch": 1.0771054949522325, "grad_norm": 6.369821071624756, "learning_rate": 7.92648367444726e-05, "loss": 0.7513, "step": 15897 }, { "epoch": 1.0771732502202047, "grad_norm": 6.474839210510254, "learning_rate": 7.926346772537478e-05, "loss": 0.8707, "step": 15898 }, { "epoch": 1.0772410054881767, "grad_norm": 5.136695861816406, "learning_rate": 7.926209870627696e-05, "loss": 0.5493, "step": 15899 }, { "epoch": 1.0773087607561487, "grad_norm": 5.588931560516357, "learning_rate": 7.926072968717914e-05, "loss": 0.7134, "step": 15900 }, { "epoch": 1.0773765160241209, "grad_norm": 6.223124980926514, "learning_rate": 7.925936066808133e-05, "loss": 0.7647, "step": 15901 }, { "epoch": 1.0774442712920929, "grad_norm": 4.788752555847168, "learning_rate": 7.925799164898351e-05, "loss": 0.439, "step": 15902 }, { "epoch": 1.077512026560065, "grad_norm": 5.260895252227783, "learning_rate": 7.92566226298857e-05, "loss": 0.6204, "step": 15903 }, { "epoch": 1.077579781828037, "grad_norm": 5.823332786560059, "learning_rate": 7.925525361078787e-05, "loss": 0.7579, "step": 15904 }, { "epoch": 1.0776475370960092, "grad_norm": 5.55746603012085, "learning_rate": 7.925388459169005e-05, "loss": 0.6343, "step": 15905 }, { "epoch": 1.0777152923639812, "grad_norm": 5.603518962860107, "learning_rate": 7.925251557259225e-05, "loss": 0.7849, "step": 15906 }, { "epoch": 1.0777830476319534, "grad_norm": 5.885906219482422, "learning_rate": 7.925114655349443e-05, "loss": 0.713, "step": 15907 }, { "epoch": 1.0778508028999254, "grad_norm": 9.000563621520996, "learning_rate": 7.924977753439661e-05, "loss": 0.8531, "step": 15908 }, { "epoch": 1.0779185581678976, "grad_norm": 5.798495769500732, "learning_rate": 7.924840851529879e-05, "loss": 0.7047, "step": 15909 }, { "epoch": 1.0779863134358696, "grad_norm": 5.180234432220459, "learning_rate": 7.924703949620098e-05, "loss": 0.4218, "step": 15910 }, { "epoch": 1.0780540687038418, "grad_norm": 5.090123653411865, "learning_rate": 7.924567047710316e-05, "loss": 0.7473, "step": 15911 }, { "epoch": 1.0781218239718138, "grad_norm": 4.858051300048828, "learning_rate": 7.924430145800534e-05, "loss": 0.5706, "step": 15912 }, { "epoch": 1.078189579239786, "grad_norm": 5.382134914398193, "learning_rate": 7.924293243890752e-05, "loss": 0.7633, "step": 15913 }, { "epoch": 1.078257334507758, "grad_norm": 6.212465763092041, "learning_rate": 7.92415634198097e-05, "loss": 0.6788, "step": 15914 }, { "epoch": 1.0783250897757302, "grad_norm": 6.3222880363464355, "learning_rate": 7.92401944007119e-05, "loss": 0.7848, "step": 15915 }, { "epoch": 1.0783928450437021, "grad_norm": 6.96838903427124, "learning_rate": 7.923882538161408e-05, "loss": 0.8652, "step": 15916 }, { "epoch": 1.0784606003116743, "grad_norm": 4.135616302490234, "learning_rate": 7.923745636251626e-05, "loss": 0.4712, "step": 15917 }, { "epoch": 1.0785283555796463, "grad_norm": 6.912881374359131, "learning_rate": 7.923608734341844e-05, "loss": 0.7325, "step": 15918 }, { "epoch": 1.0785961108476183, "grad_norm": 5.951952934265137, "learning_rate": 7.923471832432062e-05, "loss": 0.645, "step": 15919 }, { "epoch": 1.0786638661155905, "grad_norm": 5.720697402954102, "learning_rate": 7.923334930522281e-05, "loss": 0.751, "step": 15920 }, { "epoch": 1.0787316213835625, "grad_norm": 6.125636577606201, "learning_rate": 7.9231980286125e-05, "loss": 0.7128, "step": 15921 }, { "epoch": 1.0787993766515347, "grad_norm": 7.798324108123779, "learning_rate": 7.923061126702717e-05, "loss": 1.0655, "step": 15922 }, { "epoch": 1.0788671319195067, "grad_norm": 7.423648834228516, "learning_rate": 7.922924224792935e-05, "loss": 0.8611, "step": 15923 }, { "epoch": 1.0789348871874789, "grad_norm": 4.67471170425415, "learning_rate": 7.922787322883155e-05, "loss": 0.4459, "step": 15924 }, { "epoch": 1.0790026424554509, "grad_norm": 6.07832670211792, "learning_rate": 7.922650420973373e-05, "loss": 0.706, "step": 15925 }, { "epoch": 1.079070397723423, "grad_norm": 6.1671624183654785, "learning_rate": 7.922513519063591e-05, "loss": 0.8027, "step": 15926 }, { "epoch": 1.079138152991395, "grad_norm": 6.181910991668701, "learning_rate": 7.922376617153809e-05, "loss": 0.726, "step": 15927 }, { "epoch": 1.0792059082593672, "grad_norm": 6.463537693023682, "learning_rate": 7.922239715244027e-05, "loss": 0.6578, "step": 15928 }, { "epoch": 1.0792736635273392, "grad_norm": 6.259862422943115, "learning_rate": 7.922102813334246e-05, "loss": 0.6437, "step": 15929 }, { "epoch": 1.0793414187953114, "grad_norm": 6.758423805236816, "learning_rate": 7.921965911424464e-05, "loss": 0.6028, "step": 15930 }, { "epoch": 1.0794091740632834, "grad_norm": 5.508148193359375, "learning_rate": 7.921829009514682e-05, "loss": 0.6397, "step": 15931 }, { "epoch": 1.0794769293312556, "grad_norm": 4.803408145904541, "learning_rate": 7.9216921076049e-05, "loss": 0.8471, "step": 15932 }, { "epoch": 1.0795446845992276, "grad_norm": 5.470789432525635, "learning_rate": 7.92155520569512e-05, "loss": 0.9083, "step": 15933 }, { "epoch": 1.0796124398671996, "grad_norm": 5.594363212585449, "learning_rate": 7.921418303785338e-05, "loss": 0.6955, "step": 15934 }, { "epoch": 1.0796801951351718, "grad_norm": 6.420119285583496, "learning_rate": 7.921281401875556e-05, "loss": 0.8706, "step": 15935 }, { "epoch": 1.0797479504031438, "grad_norm": 5.343960285186768, "learning_rate": 7.921144499965775e-05, "loss": 0.8528, "step": 15936 }, { "epoch": 1.079815705671116, "grad_norm": 6.668441295623779, "learning_rate": 7.921007598055993e-05, "loss": 0.82, "step": 15937 }, { "epoch": 1.079883460939088, "grad_norm": 7.410157203674316, "learning_rate": 7.920870696146211e-05, "loss": 0.7367, "step": 15938 }, { "epoch": 1.0799512162070601, "grad_norm": 6.100698471069336, "learning_rate": 7.920733794236431e-05, "loss": 0.862, "step": 15939 }, { "epoch": 1.0800189714750321, "grad_norm": 6.438335418701172, "learning_rate": 7.920596892326649e-05, "loss": 0.7477, "step": 15940 }, { "epoch": 1.0800867267430043, "grad_norm": 6.367231845855713, "learning_rate": 7.920459990416867e-05, "loss": 0.6993, "step": 15941 }, { "epoch": 1.0801544820109763, "grad_norm": 5.784175395965576, "learning_rate": 7.920323088507086e-05, "loss": 0.6835, "step": 15942 }, { "epoch": 1.0802222372789485, "grad_norm": 5.12135648727417, "learning_rate": 7.920186186597304e-05, "loss": 0.7518, "step": 15943 }, { "epoch": 1.0802899925469205, "grad_norm": 5.591551780700684, "learning_rate": 7.920049284687522e-05, "loss": 0.5578, "step": 15944 }, { "epoch": 1.0803577478148927, "grad_norm": 3.941267728805542, "learning_rate": 7.91991238277774e-05, "loss": 0.6835, "step": 15945 }, { "epoch": 1.0804255030828647, "grad_norm": 4.970183849334717, "learning_rate": 7.919775480867958e-05, "loss": 0.6148, "step": 15946 }, { "epoch": 1.0804932583508369, "grad_norm": 5.3328938484191895, "learning_rate": 7.919638578958178e-05, "loss": 0.5961, "step": 15947 }, { "epoch": 1.0805610136188089, "grad_norm": 5.661096096038818, "learning_rate": 7.919501677048396e-05, "loss": 0.5808, "step": 15948 }, { "epoch": 1.0806287688867808, "grad_norm": 4.3002824783325195, "learning_rate": 7.919364775138614e-05, "loss": 0.6531, "step": 15949 }, { "epoch": 1.080696524154753, "grad_norm": 5.652578830718994, "learning_rate": 7.919227873228832e-05, "loss": 0.8663, "step": 15950 }, { "epoch": 1.080764279422725, "grad_norm": 5.4440717697143555, "learning_rate": 7.91909097131905e-05, "loss": 0.8888, "step": 15951 }, { "epoch": 1.0808320346906972, "grad_norm": 5.150991439819336, "learning_rate": 7.918954069409269e-05, "loss": 0.6697, "step": 15952 }, { "epoch": 1.0808997899586692, "grad_norm": 5.136131286621094, "learning_rate": 7.918817167499487e-05, "loss": 0.6577, "step": 15953 }, { "epoch": 1.0809675452266414, "grad_norm": 6.44126033782959, "learning_rate": 7.918680265589705e-05, "loss": 0.6284, "step": 15954 }, { "epoch": 1.0810353004946134, "grad_norm": 8.204652786254883, "learning_rate": 7.918543363679923e-05, "loss": 0.6879, "step": 15955 }, { "epoch": 1.0811030557625856, "grad_norm": 5.8660407066345215, "learning_rate": 7.918406461770143e-05, "loss": 0.5727, "step": 15956 }, { "epoch": 1.0811708110305576, "grad_norm": 6.534690856933594, "learning_rate": 7.918269559860361e-05, "loss": 0.697, "step": 15957 }, { "epoch": 1.0812385662985298, "grad_norm": 8.585152626037598, "learning_rate": 7.918132657950579e-05, "loss": 0.8429, "step": 15958 }, { "epoch": 1.0813063215665017, "grad_norm": 7.084420204162598, "learning_rate": 7.917995756040797e-05, "loss": 0.6254, "step": 15959 }, { "epoch": 1.081374076834474, "grad_norm": 5.689300060272217, "learning_rate": 7.917858854131015e-05, "loss": 0.5956, "step": 15960 }, { "epoch": 1.081441832102446, "grad_norm": 6.1884331703186035, "learning_rate": 7.917721952221234e-05, "loss": 0.7677, "step": 15961 }, { "epoch": 1.0815095873704181, "grad_norm": 10.952715873718262, "learning_rate": 7.917585050311452e-05, "loss": 0.783, "step": 15962 }, { "epoch": 1.0815773426383901, "grad_norm": 8.16262149810791, "learning_rate": 7.91744814840167e-05, "loss": 0.548, "step": 15963 }, { "epoch": 1.0816450979063623, "grad_norm": 5.6903157234191895, "learning_rate": 7.917311246491888e-05, "loss": 0.5904, "step": 15964 }, { "epoch": 1.0817128531743343, "grad_norm": 8.33536148071289, "learning_rate": 7.917174344582108e-05, "loss": 0.7886, "step": 15965 }, { "epoch": 1.0817806084423065, "grad_norm": 9.231719017028809, "learning_rate": 7.917037442672326e-05, "loss": 0.7876, "step": 15966 }, { "epoch": 1.0818483637102785, "grad_norm": 5.142817497253418, "learning_rate": 7.916900540762544e-05, "loss": 0.6525, "step": 15967 }, { "epoch": 1.0819161189782505, "grad_norm": 8.006011962890625, "learning_rate": 7.916763638852762e-05, "loss": 0.6731, "step": 15968 }, { "epoch": 1.0819838742462227, "grad_norm": 4.934510707855225, "learning_rate": 7.91662673694298e-05, "loss": 0.6913, "step": 15969 }, { "epoch": 1.0820516295141946, "grad_norm": 4.291750907897949, "learning_rate": 7.916489835033199e-05, "loss": 0.5819, "step": 15970 }, { "epoch": 1.0821193847821668, "grad_norm": 6.167888164520264, "learning_rate": 7.916352933123417e-05, "loss": 0.5861, "step": 15971 }, { "epoch": 1.0821871400501388, "grad_norm": 6.8970465660095215, "learning_rate": 7.916216031213635e-05, "loss": 0.4904, "step": 15972 }, { "epoch": 1.082254895318111, "grad_norm": 6.821037292480469, "learning_rate": 7.916079129303853e-05, "loss": 0.7735, "step": 15973 }, { "epoch": 1.082322650586083, "grad_norm": 6.066835403442383, "learning_rate": 7.915942227394071e-05, "loss": 0.5559, "step": 15974 }, { "epoch": 1.0823904058540552, "grad_norm": 5.0264716148376465, "learning_rate": 7.915805325484291e-05, "loss": 0.8914, "step": 15975 }, { "epoch": 1.0824581611220272, "grad_norm": 6.044068336486816, "learning_rate": 7.915668423574509e-05, "loss": 0.5114, "step": 15976 }, { "epoch": 1.0825259163899994, "grad_norm": 5.62042236328125, "learning_rate": 7.915531521664727e-05, "loss": 0.6438, "step": 15977 }, { "epoch": 1.0825936716579714, "grad_norm": 5.779903411865234, "learning_rate": 7.915394619754945e-05, "loss": 0.5855, "step": 15978 }, { "epoch": 1.0826614269259436, "grad_norm": 7.046977996826172, "learning_rate": 7.915257717845164e-05, "loss": 0.7729, "step": 15979 }, { "epoch": 1.0827291821939156, "grad_norm": 10.05582332611084, "learning_rate": 7.915120815935382e-05, "loss": 0.7776, "step": 15980 }, { "epoch": 1.0827969374618878, "grad_norm": 6.933775901794434, "learning_rate": 7.9149839140256e-05, "loss": 0.7646, "step": 15981 }, { "epoch": 1.0828646927298597, "grad_norm": 5.1650543212890625, "learning_rate": 7.91484701211582e-05, "loss": 0.5793, "step": 15982 }, { "epoch": 1.0829324479978317, "grad_norm": 6.840242385864258, "learning_rate": 7.914710110206038e-05, "loss": 0.7227, "step": 15983 }, { "epoch": 1.083000203265804, "grad_norm": 4.679685592651367, "learning_rate": 7.914573208296256e-05, "loss": 0.6102, "step": 15984 }, { "epoch": 1.083067958533776, "grad_norm": 6.447381496429443, "learning_rate": 7.914436306386475e-05, "loss": 0.9207, "step": 15985 }, { "epoch": 1.0831357138017481, "grad_norm": 7.533105850219727, "learning_rate": 7.914299404476693e-05, "loss": 0.7438, "step": 15986 }, { "epoch": 1.08320346906972, "grad_norm": 4.451360702514648, "learning_rate": 7.914162502566911e-05, "loss": 0.5668, "step": 15987 }, { "epoch": 1.0832712243376923, "grad_norm": 7.082450866699219, "learning_rate": 7.914025600657131e-05, "loss": 0.6538, "step": 15988 }, { "epoch": 1.0833389796056643, "grad_norm": 5.413089275360107, "learning_rate": 7.913888698747349e-05, "loss": 0.7967, "step": 15989 }, { "epoch": 1.0834067348736365, "grad_norm": 5.9802985191345215, "learning_rate": 7.913751796837567e-05, "loss": 0.7933, "step": 15990 }, { "epoch": 1.0834744901416085, "grad_norm": 6.097925662994385, "learning_rate": 7.913614894927785e-05, "loss": 0.602, "step": 15991 }, { "epoch": 1.0835422454095807, "grad_norm": 5.050368309020996, "learning_rate": 7.913477993018003e-05, "loss": 0.5664, "step": 15992 }, { "epoch": 1.0836100006775526, "grad_norm": 4.976052761077881, "learning_rate": 7.913341091108222e-05, "loss": 0.6543, "step": 15993 }, { "epoch": 1.0836777559455248, "grad_norm": 6.06764554977417, "learning_rate": 7.91320418919844e-05, "loss": 0.6747, "step": 15994 }, { "epoch": 1.0837455112134968, "grad_norm": 8.071044921875, "learning_rate": 7.913067287288658e-05, "loss": 0.6247, "step": 15995 }, { "epoch": 1.083813266481469, "grad_norm": 5.97242546081543, "learning_rate": 7.912930385378876e-05, "loss": 0.6158, "step": 15996 }, { "epoch": 1.083881021749441, "grad_norm": 5.717789173126221, "learning_rate": 7.912793483469096e-05, "loss": 0.7819, "step": 15997 }, { "epoch": 1.083948777017413, "grad_norm": 6.379135608673096, "learning_rate": 7.912656581559314e-05, "loss": 0.8008, "step": 15998 }, { "epoch": 1.0840165322853852, "grad_norm": 5.400828838348389, "learning_rate": 7.912519679649532e-05, "loss": 0.6621, "step": 15999 }, { "epoch": 1.0840842875533572, "grad_norm": 7.098434925079346, "learning_rate": 7.91238277773975e-05, "loss": 0.8617, "step": 16000 }, { "epoch": 1.0841520428213294, "grad_norm": 5.690536975860596, "learning_rate": 7.912245875829968e-05, "loss": 0.7622, "step": 16001 }, { "epoch": 1.0842197980893014, "grad_norm": 5.485647201538086, "learning_rate": 7.912108973920187e-05, "loss": 0.6856, "step": 16002 }, { "epoch": 1.0842875533572736, "grad_norm": 7.707883358001709, "learning_rate": 7.911972072010405e-05, "loss": 0.7483, "step": 16003 }, { "epoch": 1.0843553086252455, "grad_norm": 5.487302303314209, "learning_rate": 7.911835170100623e-05, "loss": 0.6216, "step": 16004 }, { "epoch": 1.0844230638932177, "grad_norm": 5.43400239944458, "learning_rate": 7.911698268190841e-05, "loss": 0.6424, "step": 16005 }, { "epoch": 1.0844908191611897, "grad_norm": 5.037356853485107, "learning_rate": 7.91156136628106e-05, "loss": 0.5823, "step": 16006 }, { "epoch": 1.084558574429162, "grad_norm": 6.4713215827941895, "learning_rate": 7.911424464371279e-05, "loss": 0.6753, "step": 16007 }, { "epoch": 1.084626329697134, "grad_norm": 5.2635393142700195, "learning_rate": 7.911287562461497e-05, "loss": 0.8342, "step": 16008 }, { "epoch": 1.084694084965106, "grad_norm": 5.816706657409668, "learning_rate": 7.911150660551715e-05, "loss": 0.8517, "step": 16009 }, { "epoch": 1.084761840233078, "grad_norm": 5.9537458419799805, "learning_rate": 7.911013758641933e-05, "loss": 0.6601, "step": 16010 }, { "epoch": 1.0848295955010503, "grad_norm": 4.499695301055908, "learning_rate": 7.910876856732152e-05, "loss": 0.5232, "step": 16011 }, { "epoch": 1.0848973507690223, "grad_norm": 5.2069091796875, "learning_rate": 7.91073995482237e-05, "loss": 0.5803, "step": 16012 }, { "epoch": 1.0849651060369945, "grad_norm": 4.306644916534424, "learning_rate": 7.910603052912588e-05, "loss": 0.6398, "step": 16013 }, { "epoch": 1.0850328613049665, "grad_norm": 5.758792400360107, "learning_rate": 7.910466151002806e-05, "loss": 0.7415, "step": 16014 }, { "epoch": 1.0851006165729387, "grad_norm": 6.12398099899292, "learning_rate": 7.910329249093024e-05, "loss": 0.7178, "step": 16015 }, { "epoch": 1.0851683718409106, "grad_norm": 7.941034317016602, "learning_rate": 7.910192347183244e-05, "loss": 0.6802, "step": 16016 }, { "epoch": 1.0852361271088826, "grad_norm": 5.2251176834106445, "learning_rate": 7.910055445273462e-05, "loss": 0.6264, "step": 16017 }, { "epoch": 1.0853038823768548, "grad_norm": 5.844374179840088, "learning_rate": 7.90991854336368e-05, "loss": 0.6761, "step": 16018 }, { "epoch": 1.0853716376448268, "grad_norm": 4.731149196624756, "learning_rate": 7.909781641453898e-05, "loss": 0.7042, "step": 16019 }, { "epoch": 1.085439392912799, "grad_norm": 5.9580206871032715, "learning_rate": 7.909644739544117e-05, "loss": 0.6448, "step": 16020 }, { "epoch": 1.085507148180771, "grad_norm": 6.342482089996338, "learning_rate": 7.909507837634335e-05, "loss": 0.6347, "step": 16021 }, { "epoch": 1.0855749034487432, "grad_norm": 5.999290943145752, "learning_rate": 7.909370935724553e-05, "loss": 0.7697, "step": 16022 }, { "epoch": 1.0856426587167152, "grad_norm": 6.850386142730713, "learning_rate": 7.909234033814771e-05, "loss": 0.6584, "step": 16023 }, { "epoch": 1.0857104139846874, "grad_norm": 6.658447265625, "learning_rate": 7.90909713190499e-05, "loss": 0.7402, "step": 16024 }, { "epoch": 1.0857781692526594, "grad_norm": 5.117592811584473, "learning_rate": 7.908960229995209e-05, "loss": 0.7171, "step": 16025 }, { "epoch": 1.0858459245206316, "grad_norm": 4.4139909744262695, "learning_rate": 7.908823328085427e-05, "loss": 0.5602, "step": 16026 }, { "epoch": 1.0859136797886035, "grad_norm": 8.113272666931152, "learning_rate": 7.908686426175645e-05, "loss": 0.5116, "step": 16027 }, { "epoch": 1.0859814350565757, "grad_norm": 6.033690452575684, "learning_rate": 7.908549524265864e-05, "loss": 0.6415, "step": 16028 }, { "epoch": 1.0860491903245477, "grad_norm": 5.521602630615234, "learning_rate": 7.908412622356082e-05, "loss": 0.7071, "step": 16029 }, { "epoch": 1.08611694559252, "grad_norm": 9.671419143676758, "learning_rate": 7.9082757204463e-05, "loss": 0.746, "step": 16030 }, { "epoch": 1.086184700860492, "grad_norm": 12.12438678741455, "learning_rate": 7.90813881853652e-05, "loss": 0.5135, "step": 16031 }, { "epoch": 1.0862524561284639, "grad_norm": 6.55898904800415, "learning_rate": 7.908001916626738e-05, "loss": 0.6438, "step": 16032 }, { "epoch": 1.086320211396436, "grad_norm": 6.704573154449463, "learning_rate": 7.907865014716956e-05, "loss": 0.5559, "step": 16033 }, { "epoch": 1.086387966664408, "grad_norm": 6.356927871704102, "learning_rate": 7.907728112807175e-05, "loss": 0.7004, "step": 16034 }, { "epoch": 1.0864557219323803, "grad_norm": 6.504284381866455, "learning_rate": 7.907591210897393e-05, "loss": 0.7731, "step": 16035 }, { "epoch": 1.0865234772003523, "grad_norm": 4.6852874755859375, "learning_rate": 7.907454308987611e-05, "loss": 0.5772, "step": 16036 }, { "epoch": 1.0865912324683245, "grad_norm": 5.674740791320801, "learning_rate": 7.907317407077829e-05, "loss": 0.6515, "step": 16037 }, { "epoch": 1.0866589877362964, "grad_norm": 11.06320571899414, "learning_rate": 7.907180505168047e-05, "loss": 0.7314, "step": 16038 }, { "epoch": 1.0867267430042686, "grad_norm": 7.077748775482178, "learning_rate": 7.907043603258267e-05, "loss": 0.7779, "step": 16039 }, { "epoch": 1.0867944982722406, "grad_norm": 4.752398490905762, "learning_rate": 7.906906701348485e-05, "loss": 0.6754, "step": 16040 }, { "epoch": 1.0868622535402128, "grad_norm": 6.530601978302002, "learning_rate": 7.906769799438703e-05, "loss": 0.7912, "step": 16041 }, { "epoch": 1.0869300088081848, "grad_norm": 7.028359889984131, "learning_rate": 7.906632897528921e-05, "loss": 0.7639, "step": 16042 }, { "epoch": 1.086997764076157, "grad_norm": 6.825929164886475, "learning_rate": 7.90649599561914e-05, "loss": 0.5188, "step": 16043 }, { "epoch": 1.087065519344129, "grad_norm": 5.377181053161621, "learning_rate": 7.906359093709358e-05, "loss": 0.7833, "step": 16044 }, { "epoch": 1.0871332746121012, "grad_norm": 5.946323394775391, "learning_rate": 7.906222191799576e-05, "loss": 0.9365, "step": 16045 }, { "epoch": 1.0872010298800732, "grad_norm": 6.080692768096924, "learning_rate": 7.906085289889794e-05, "loss": 0.8694, "step": 16046 }, { "epoch": 1.0872687851480451, "grad_norm": 5.918422698974609, "learning_rate": 7.905948387980012e-05, "loss": 0.8249, "step": 16047 }, { "epoch": 1.0873365404160173, "grad_norm": 8.938027381896973, "learning_rate": 7.905811486070232e-05, "loss": 0.6464, "step": 16048 }, { "epoch": 1.0874042956839893, "grad_norm": 5.869150638580322, "learning_rate": 7.90567458416045e-05, "loss": 0.849, "step": 16049 }, { "epoch": 1.0874720509519615, "grad_norm": 6.127419948577881, "learning_rate": 7.905537682250668e-05, "loss": 0.5813, "step": 16050 }, { "epoch": 1.0875398062199335, "grad_norm": 5.159786701202393, "learning_rate": 7.905400780340886e-05, "loss": 0.5756, "step": 16051 }, { "epoch": 1.0876075614879057, "grad_norm": 6.768173694610596, "learning_rate": 7.905263878431104e-05, "loss": 0.9234, "step": 16052 }, { "epoch": 1.0876753167558777, "grad_norm": 5.480606555938721, "learning_rate": 7.905126976521323e-05, "loss": 0.6028, "step": 16053 }, { "epoch": 1.08774307202385, "grad_norm": 6.204960823059082, "learning_rate": 7.904990074611541e-05, "loss": 0.6402, "step": 16054 }, { "epoch": 1.0878108272918219, "grad_norm": 5.574785232543945, "learning_rate": 7.904853172701759e-05, "loss": 0.6907, "step": 16055 }, { "epoch": 1.087878582559794, "grad_norm": 6.4214043617248535, "learning_rate": 7.904716270791977e-05, "loss": 1.0685, "step": 16056 }, { "epoch": 1.087946337827766, "grad_norm": 6.589137077331543, "learning_rate": 7.904579368882197e-05, "loss": 0.6175, "step": 16057 }, { "epoch": 1.0880140930957383, "grad_norm": 5.765407085418701, "learning_rate": 7.904442466972415e-05, "loss": 0.696, "step": 16058 }, { "epoch": 1.0880818483637102, "grad_norm": 4.7705278396606445, "learning_rate": 7.904305565062633e-05, "loss": 0.6874, "step": 16059 }, { "epoch": 1.0881496036316824, "grad_norm": 6.676177978515625, "learning_rate": 7.904168663152851e-05, "loss": 0.6891, "step": 16060 }, { "epoch": 1.0882173588996544, "grad_norm": 4.436383247375488, "learning_rate": 7.904031761243069e-05, "loss": 0.6336, "step": 16061 }, { "epoch": 1.0882851141676266, "grad_norm": 8.813880920410156, "learning_rate": 7.903894859333288e-05, "loss": 0.7391, "step": 16062 }, { "epoch": 1.0883528694355986, "grad_norm": 5.455590724945068, "learning_rate": 7.903757957423506e-05, "loss": 0.6368, "step": 16063 }, { "epoch": 1.0884206247035708, "grad_norm": 6.536738872528076, "learning_rate": 7.903621055513724e-05, "loss": 0.756, "step": 16064 }, { "epoch": 1.0884883799715428, "grad_norm": 4.461001873016357, "learning_rate": 7.903484153603942e-05, "loss": 0.5279, "step": 16065 }, { "epoch": 1.0885561352395148, "grad_norm": 5.54738187789917, "learning_rate": 7.903347251694162e-05, "loss": 0.5467, "step": 16066 }, { "epoch": 1.088623890507487, "grad_norm": 4.30350923538208, "learning_rate": 7.90321034978438e-05, "loss": 0.5577, "step": 16067 }, { "epoch": 1.088691645775459, "grad_norm": 6.150055885314941, "learning_rate": 7.903073447874598e-05, "loss": 0.6348, "step": 16068 }, { "epoch": 1.0887594010434312, "grad_norm": 4.178039073944092, "learning_rate": 7.902936545964816e-05, "loss": 0.5737, "step": 16069 }, { "epoch": 1.0888271563114031, "grad_norm": 4.776052474975586, "learning_rate": 7.902799644055034e-05, "loss": 0.6767, "step": 16070 }, { "epoch": 1.0888949115793753, "grad_norm": 4.931454658508301, "learning_rate": 7.902662742145253e-05, "loss": 0.6867, "step": 16071 }, { "epoch": 1.0889626668473473, "grad_norm": 5.556695461273193, "learning_rate": 7.902525840235471e-05, "loss": 0.902, "step": 16072 }, { "epoch": 1.0890304221153195, "grad_norm": 6.187075138092041, "learning_rate": 7.90238893832569e-05, "loss": 0.7161, "step": 16073 }, { "epoch": 1.0890981773832915, "grad_norm": 6.942097187042236, "learning_rate": 7.902252036415909e-05, "loss": 0.9483, "step": 16074 }, { "epoch": 1.0891659326512637, "grad_norm": 7.454192161560059, "learning_rate": 7.902115134506127e-05, "loss": 0.768, "step": 16075 }, { "epoch": 1.0892336879192357, "grad_norm": 4.790287494659424, "learning_rate": 7.901978232596345e-05, "loss": 0.7202, "step": 16076 }, { "epoch": 1.089301443187208, "grad_norm": 4.767421722412109, "learning_rate": 7.901841330686564e-05, "loss": 0.7903, "step": 16077 }, { "epoch": 1.0893691984551799, "grad_norm": 5.385760307312012, "learning_rate": 7.901704428776782e-05, "loss": 0.653, "step": 16078 }, { "epoch": 1.089436953723152, "grad_norm": 4.6891655921936035, "learning_rate": 7.901567526867e-05, "loss": 0.5077, "step": 16079 }, { "epoch": 1.089504708991124, "grad_norm": 6.615968704223633, "learning_rate": 7.90143062495722e-05, "loss": 0.6641, "step": 16080 }, { "epoch": 1.089572464259096, "grad_norm": 8.83869457244873, "learning_rate": 7.901293723047438e-05, "loss": 0.7476, "step": 16081 }, { "epoch": 1.0896402195270682, "grad_norm": 5.717437267303467, "learning_rate": 7.901156821137656e-05, "loss": 0.5911, "step": 16082 }, { "epoch": 1.0897079747950402, "grad_norm": 3.9267828464508057, "learning_rate": 7.901019919227874e-05, "loss": 0.4863, "step": 16083 }, { "epoch": 1.0897757300630124, "grad_norm": 6.119167327880859, "learning_rate": 7.900883017318092e-05, "loss": 0.7104, "step": 16084 }, { "epoch": 1.0898434853309844, "grad_norm": 8.956766128540039, "learning_rate": 7.900746115408311e-05, "loss": 0.61, "step": 16085 }, { "epoch": 1.0899112405989566, "grad_norm": 5.749386787414551, "learning_rate": 7.900609213498529e-05, "loss": 0.7368, "step": 16086 }, { "epoch": 1.0899789958669286, "grad_norm": 7.877694606781006, "learning_rate": 7.900472311588747e-05, "loss": 0.5073, "step": 16087 }, { "epoch": 1.0900467511349008, "grad_norm": 6.475144386291504, "learning_rate": 7.900335409678965e-05, "loss": 0.8651, "step": 16088 }, { "epoch": 1.0901145064028728, "grad_norm": 4.912156581878662, "learning_rate": 7.900198507769185e-05, "loss": 0.6296, "step": 16089 }, { "epoch": 1.090182261670845, "grad_norm": 6.647600173950195, "learning_rate": 7.900061605859403e-05, "loss": 0.7773, "step": 16090 }, { "epoch": 1.090250016938817, "grad_norm": 5.223667621612549, "learning_rate": 7.899924703949621e-05, "loss": 0.6258, "step": 16091 }, { "epoch": 1.0903177722067892, "grad_norm": 5.760304927825928, "learning_rate": 7.899787802039839e-05, "loss": 0.6562, "step": 16092 }, { "epoch": 1.0903855274747611, "grad_norm": 6.270285129547119, "learning_rate": 7.899650900130057e-05, "loss": 0.7241, "step": 16093 }, { "epoch": 1.0904532827427333, "grad_norm": 6.746170997619629, "learning_rate": 7.899513998220276e-05, "loss": 0.6451, "step": 16094 }, { "epoch": 1.0905210380107053, "grad_norm": 6.292099952697754, "learning_rate": 7.899377096310494e-05, "loss": 0.4791, "step": 16095 }, { "epoch": 1.0905887932786773, "grad_norm": 7.886358261108398, "learning_rate": 7.899240194400712e-05, "loss": 0.5683, "step": 16096 }, { "epoch": 1.0906565485466495, "grad_norm": 5.620253562927246, "learning_rate": 7.89910329249093e-05, "loss": 0.5844, "step": 16097 }, { "epoch": 1.0907243038146215, "grad_norm": 13.192233085632324, "learning_rate": 7.89896639058115e-05, "loss": 0.8562, "step": 16098 }, { "epoch": 1.0907920590825937, "grad_norm": 4.923586845397949, "learning_rate": 7.898829488671368e-05, "loss": 0.6513, "step": 16099 }, { "epoch": 1.0908598143505657, "grad_norm": 6.001354217529297, "learning_rate": 7.898692586761586e-05, "loss": 0.6313, "step": 16100 }, { "epoch": 1.0909275696185379, "grad_norm": 6.142572402954102, "learning_rate": 7.898555684851804e-05, "loss": 0.5797, "step": 16101 }, { "epoch": 1.0909953248865099, "grad_norm": 5.978823661804199, "learning_rate": 7.898418782942022e-05, "loss": 0.7278, "step": 16102 }, { "epoch": 1.091063080154482, "grad_norm": 6.031432151794434, "learning_rate": 7.898281881032241e-05, "loss": 0.4503, "step": 16103 }, { "epoch": 1.091130835422454, "grad_norm": 6.397644996643066, "learning_rate": 7.898144979122459e-05, "loss": 0.8417, "step": 16104 }, { "epoch": 1.0911985906904262, "grad_norm": 4.83469295501709, "learning_rate": 7.898008077212677e-05, "loss": 0.5679, "step": 16105 }, { "epoch": 1.0912663459583982, "grad_norm": 5.136175155639648, "learning_rate": 7.897871175302895e-05, "loss": 0.7549, "step": 16106 }, { "epoch": 1.0913341012263704, "grad_norm": 5.254212856292725, "learning_rate": 7.897734273393113e-05, "loss": 0.5692, "step": 16107 }, { "epoch": 1.0914018564943424, "grad_norm": 8.869996070861816, "learning_rate": 7.897597371483333e-05, "loss": 0.8202, "step": 16108 }, { "epoch": 1.0914696117623146, "grad_norm": 6.127236843109131, "learning_rate": 7.897460469573551e-05, "loss": 0.5936, "step": 16109 }, { "epoch": 1.0915373670302866, "grad_norm": 6.347018241882324, "learning_rate": 7.897323567663769e-05, "loss": 0.5723, "step": 16110 }, { "epoch": 1.0916051222982588, "grad_norm": 4.602900981903076, "learning_rate": 7.897186665753987e-05, "loss": 0.555, "step": 16111 }, { "epoch": 1.0916728775662308, "grad_norm": 5.458276271820068, "learning_rate": 7.897049763844206e-05, "loss": 0.6425, "step": 16112 }, { "epoch": 1.091740632834203, "grad_norm": 4.358611583709717, "learning_rate": 7.896912861934424e-05, "loss": 0.506, "step": 16113 }, { "epoch": 1.091808388102175, "grad_norm": 5.602118015289307, "learning_rate": 7.896775960024642e-05, "loss": 0.7078, "step": 16114 }, { "epoch": 1.091876143370147, "grad_norm": 5.718192100524902, "learning_rate": 7.89663905811486e-05, "loss": 0.6152, "step": 16115 }, { "epoch": 1.0919438986381191, "grad_norm": 7.62349796295166, "learning_rate": 7.896502156205078e-05, "loss": 0.8543, "step": 16116 }, { "epoch": 1.0920116539060911, "grad_norm": 5.8089189529418945, "learning_rate": 7.896365254295298e-05, "loss": 0.6322, "step": 16117 }, { "epoch": 1.0920794091740633, "grad_norm": 4.909200191497803, "learning_rate": 7.896228352385516e-05, "loss": 0.6035, "step": 16118 }, { "epoch": 1.0921471644420353, "grad_norm": 6.314395427703857, "learning_rate": 7.896091450475734e-05, "loss": 0.7228, "step": 16119 }, { "epoch": 1.0922149197100075, "grad_norm": 4.429898738861084, "learning_rate": 7.895954548565953e-05, "loss": 0.7112, "step": 16120 }, { "epoch": 1.0922826749779795, "grad_norm": 5.228384017944336, "learning_rate": 7.895817646656171e-05, "loss": 0.6499, "step": 16121 }, { "epoch": 1.0923504302459517, "grad_norm": 5.266185760498047, "learning_rate": 7.895680744746389e-05, "loss": 0.6539, "step": 16122 }, { "epoch": 1.0924181855139237, "grad_norm": 5.5866379737854, "learning_rate": 7.895543842836609e-05, "loss": 0.5946, "step": 16123 }, { "epoch": 1.0924859407818959, "grad_norm": 5.785736083984375, "learning_rate": 7.895406940926827e-05, "loss": 0.6127, "step": 16124 }, { "epoch": 1.0925536960498679, "grad_norm": 4.491186141967773, "learning_rate": 7.895270039017045e-05, "loss": 0.3924, "step": 16125 }, { "epoch": 1.09262145131784, "grad_norm": 5.7343831062316895, "learning_rate": 7.895133137107264e-05, "loss": 0.5891, "step": 16126 }, { "epoch": 1.092689206585812, "grad_norm": 5.105741500854492, "learning_rate": 7.894996235197482e-05, "loss": 0.6144, "step": 16127 }, { "epoch": 1.0927569618537842, "grad_norm": 6.519714832305908, "learning_rate": 7.8948593332877e-05, "loss": 0.6786, "step": 16128 }, { "epoch": 1.0928247171217562, "grad_norm": 6.885262489318848, "learning_rate": 7.894722431377918e-05, "loss": 0.7443, "step": 16129 }, { "epoch": 1.0928924723897282, "grad_norm": 7.912429332733154, "learning_rate": 7.894585529468138e-05, "loss": 0.851, "step": 16130 }, { "epoch": 1.0929602276577004, "grad_norm": 8.219959259033203, "learning_rate": 7.894448627558356e-05, "loss": 0.6724, "step": 16131 }, { "epoch": 1.0930279829256724, "grad_norm": 5.2340545654296875, "learning_rate": 7.894311725648574e-05, "loss": 0.8001, "step": 16132 }, { "epoch": 1.0930957381936446, "grad_norm": 5.254332542419434, "learning_rate": 7.894174823738792e-05, "loss": 0.735, "step": 16133 }, { "epoch": 1.0931634934616166, "grad_norm": 6.4383864402771, "learning_rate": 7.89403792182901e-05, "loss": 0.7279, "step": 16134 }, { "epoch": 1.0932312487295888, "grad_norm": 5.197512626647949, "learning_rate": 7.893901019919229e-05, "loss": 0.5941, "step": 16135 }, { "epoch": 1.0932990039975607, "grad_norm": 8.93490982055664, "learning_rate": 7.893764118009447e-05, "loss": 0.9157, "step": 16136 }, { "epoch": 1.093366759265533, "grad_norm": 5.834399223327637, "learning_rate": 7.893627216099665e-05, "loss": 0.6377, "step": 16137 }, { "epoch": 1.093434514533505, "grad_norm": 7.221981048583984, "learning_rate": 7.893490314189883e-05, "loss": 0.8778, "step": 16138 }, { "epoch": 1.0935022698014771, "grad_norm": 8.59212875366211, "learning_rate": 7.893353412280101e-05, "loss": 0.6377, "step": 16139 }, { "epoch": 1.0935700250694491, "grad_norm": 5.231501579284668, "learning_rate": 7.89321651037032e-05, "loss": 0.609, "step": 16140 }, { "epoch": 1.0936377803374213, "grad_norm": 6.0066447257995605, "learning_rate": 7.893079608460539e-05, "loss": 0.6786, "step": 16141 }, { "epoch": 1.0937055356053933, "grad_norm": 5.8601861000061035, "learning_rate": 7.892942706550757e-05, "loss": 0.7069, "step": 16142 }, { "epoch": 1.0937732908733655, "grad_norm": 5.470623016357422, "learning_rate": 7.892805804640975e-05, "loss": 0.6973, "step": 16143 }, { "epoch": 1.0938410461413375, "grad_norm": 6.8463454246521, "learning_rate": 7.892668902731194e-05, "loss": 0.6182, "step": 16144 }, { "epoch": 1.0939088014093095, "grad_norm": 4.355232238769531, "learning_rate": 7.892532000821412e-05, "loss": 0.6908, "step": 16145 }, { "epoch": 1.0939765566772817, "grad_norm": 7.3813676834106445, "learning_rate": 7.89239509891163e-05, "loss": 0.6964, "step": 16146 }, { "epoch": 1.0940443119452536, "grad_norm": 5.615843296051025, "learning_rate": 7.892258197001848e-05, "loss": 0.7703, "step": 16147 }, { "epoch": 1.0941120672132258, "grad_norm": 4.897951602935791, "learning_rate": 7.892121295092066e-05, "loss": 0.7212, "step": 16148 }, { "epoch": 1.0941798224811978, "grad_norm": 6.01582670211792, "learning_rate": 7.891984393182286e-05, "loss": 0.6612, "step": 16149 }, { "epoch": 1.09424757774917, "grad_norm": 6.209997653961182, "learning_rate": 7.891847491272504e-05, "loss": 0.7842, "step": 16150 }, { "epoch": 1.094315333017142, "grad_norm": 5.317853927612305, "learning_rate": 7.891710589362722e-05, "loss": 0.7604, "step": 16151 }, { "epoch": 1.0943830882851142, "grad_norm": 4.876063823699951, "learning_rate": 7.89157368745294e-05, "loss": 0.6274, "step": 16152 }, { "epoch": 1.0944508435530862, "grad_norm": 5.609501361846924, "learning_rate": 7.891436785543159e-05, "loss": 0.7662, "step": 16153 }, { "epoch": 1.0945185988210584, "grad_norm": 5.9666852951049805, "learning_rate": 7.891299883633377e-05, "loss": 0.7864, "step": 16154 }, { "epoch": 1.0945863540890304, "grad_norm": 4.681065559387207, "learning_rate": 7.891162981723595e-05, "loss": 0.727, "step": 16155 }, { "epoch": 1.0946541093570026, "grad_norm": 6.903472423553467, "learning_rate": 7.891026079813813e-05, "loss": 0.894, "step": 16156 }, { "epoch": 1.0947218646249746, "grad_norm": 6.2425408363342285, "learning_rate": 7.890889177904031e-05, "loss": 0.658, "step": 16157 }, { "epoch": 1.0947896198929468, "grad_norm": 5.621673583984375, "learning_rate": 7.890752275994251e-05, "loss": 0.675, "step": 16158 }, { "epoch": 1.0948573751609187, "grad_norm": 6.081178665161133, "learning_rate": 7.890615374084469e-05, "loss": 0.5955, "step": 16159 }, { "epoch": 1.094925130428891, "grad_norm": 6.864749431610107, "learning_rate": 7.890478472174687e-05, "loss": 0.5638, "step": 16160 }, { "epoch": 1.094992885696863, "grad_norm": 5.215126037597656, "learning_rate": 7.890341570264905e-05, "loss": 0.6291, "step": 16161 }, { "epoch": 1.0950606409648351, "grad_norm": 7.269073963165283, "learning_rate": 7.890204668355123e-05, "loss": 0.9472, "step": 16162 }, { "epoch": 1.095128396232807, "grad_norm": 6.205016613006592, "learning_rate": 7.890067766445342e-05, "loss": 0.5892, "step": 16163 }, { "epoch": 1.095196151500779, "grad_norm": 5.993995189666748, "learning_rate": 7.88993086453556e-05, "loss": 0.6912, "step": 16164 }, { "epoch": 1.0952639067687513, "grad_norm": 4.524845600128174, "learning_rate": 7.889793962625778e-05, "loss": 0.5425, "step": 16165 }, { "epoch": 1.0953316620367233, "grad_norm": 6.5391364097595215, "learning_rate": 7.889657060715996e-05, "loss": 0.7429, "step": 16166 }, { "epoch": 1.0953994173046955, "grad_norm": 6.908164978027344, "learning_rate": 7.889520158806216e-05, "loss": 0.6332, "step": 16167 }, { "epoch": 1.0954671725726675, "grad_norm": 6.190462112426758, "learning_rate": 7.889383256896434e-05, "loss": 0.8123, "step": 16168 }, { "epoch": 1.0955349278406397, "grad_norm": 5.4493207931518555, "learning_rate": 7.889246354986652e-05, "loss": 0.6539, "step": 16169 }, { "epoch": 1.0956026831086116, "grad_norm": 5.316275119781494, "learning_rate": 7.889109453076871e-05, "loss": 0.5694, "step": 16170 }, { "epoch": 1.0956704383765838, "grad_norm": 8.243550300598145, "learning_rate": 7.888972551167089e-05, "loss": 1.0688, "step": 16171 }, { "epoch": 1.0957381936445558, "grad_norm": 5.587428092956543, "learning_rate": 7.888835649257307e-05, "loss": 0.7348, "step": 16172 }, { "epoch": 1.095805948912528, "grad_norm": 6.552480697631836, "learning_rate": 7.888698747347527e-05, "loss": 0.5606, "step": 16173 }, { "epoch": 1.0958737041805, "grad_norm": 5.394407272338867, "learning_rate": 7.888561845437745e-05, "loss": 0.6762, "step": 16174 }, { "epoch": 1.0959414594484722, "grad_norm": 9.323013305664062, "learning_rate": 7.888424943527963e-05, "loss": 0.6987, "step": 16175 }, { "epoch": 1.0960092147164442, "grad_norm": 4.646063327789307, "learning_rate": 7.888288041618182e-05, "loss": 0.4748, "step": 16176 }, { "epoch": 1.0960769699844164, "grad_norm": 8.358293533325195, "learning_rate": 7.8881511397084e-05, "loss": 0.6064, "step": 16177 }, { "epoch": 1.0961447252523884, "grad_norm": 8.130820274353027, "learning_rate": 7.888014237798618e-05, "loss": 0.7405, "step": 16178 }, { "epoch": 1.0962124805203604, "grad_norm": 5.757419109344482, "learning_rate": 7.887877335888836e-05, "loss": 0.7595, "step": 16179 }, { "epoch": 1.0962802357883326, "grad_norm": 4.9174580574035645, "learning_rate": 7.887740433979054e-05, "loss": 0.5362, "step": 16180 }, { "epoch": 1.0963479910563045, "grad_norm": 6.0252604484558105, "learning_rate": 7.887603532069274e-05, "loss": 0.6579, "step": 16181 }, { "epoch": 1.0964157463242767, "grad_norm": 6.813238620758057, "learning_rate": 7.887466630159492e-05, "loss": 0.9755, "step": 16182 }, { "epoch": 1.0964835015922487, "grad_norm": 6.028433322906494, "learning_rate": 7.88732972824971e-05, "loss": 0.789, "step": 16183 }, { "epoch": 1.096551256860221, "grad_norm": 4.869696617126465, "learning_rate": 7.887192826339928e-05, "loss": 0.6194, "step": 16184 }, { "epoch": 1.096619012128193, "grad_norm": 5.043181419372559, "learning_rate": 7.887055924430146e-05, "loss": 0.515, "step": 16185 }, { "epoch": 1.096686767396165, "grad_norm": 5.320845127105713, "learning_rate": 7.886919022520365e-05, "loss": 0.5807, "step": 16186 }, { "epoch": 1.096754522664137, "grad_norm": 6.676725387573242, "learning_rate": 7.886782120610583e-05, "loss": 0.8363, "step": 16187 }, { "epoch": 1.0968222779321093, "grad_norm": 5.5581464767456055, "learning_rate": 7.886645218700801e-05, "loss": 0.7815, "step": 16188 }, { "epoch": 1.0968900332000813, "grad_norm": 6.747057914733887, "learning_rate": 7.886508316791019e-05, "loss": 0.781, "step": 16189 }, { "epoch": 1.0969577884680535, "grad_norm": 5.670258522033691, "learning_rate": 7.886371414881239e-05, "loss": 0.7192, "step": 16190 }, { "epoch": 1.0970255437360255, "grad_norm": 6.418565273284912, "learning_rate": 7.886234512971457e-05, "loss": 0.7151, "step": 16191 }, { "epoch": 1.0970932990039977, "grad_norm": 6.145472526550293, "learning_rate": 7.886097611061675e-05, "loss": 0.738, "step": 16192 }, { "epoch": 1.0971610542719696, "grad_norm": 6.247579574584961, "learning_rate": 7.885960709151893e-05, "loss": 0.7107, "step": 16193 }, { "epoch": 1.0972288095399416, "grad_norm": 6.627540111541748, "learning_rate": 7.885823807242111e-05, "loss": 0.6838, "step": 16194 }, { "epoch": 1.0972965648079138, "grad_norm": 5.86225700378418, "learning_rate": 7.88568690533233e-05, "loss": 0.7547, "step": 16195 }, { "epoch": 1.0973643200758858, "grad_norm": 5.816222667694092, "learning_rate": 7.885550003422548e-05, "loss": 0.5167, "step": 16196 }, { "epoch": 1.097432075343858, "grad_norm": 4.933455467224121, "learning_rate": 7.885413101512766e-05, "loss": 0.6946, "step": 16197 }, { "epoch": 1.09749983061183, "grad_norm": 4.601310729980469, "learning_rate": 7.885276199602984e-05, "loss": 0.6676, "step": 16198 }, { "epoch": 1.0975675858798022, "grad_norm": 6.967305660247803, "learning_rate": 7.885139297693204e-05, "loss": 0.7414, "step": 16199 }, { "epoch": 1.0976353411477742, "grad_norm": 5.442778587341309, "learning_rate": 7.885002395783422e-05, "loss": 0.5816, "step": 16200 }, { "epoch": 1.0977030964157464, "grad_norm": 6.803948879241943, "learning_rate": 7.88486549387364e-05, "loss": 0.6741, "step": 16201 }, { "epoch": 1.0977708516837184, "grad_norm": 4.722171783447266, "learning_rate": 7.884728591963858e-05, "loss": 0.7071, "step": 16202 }, { "epoch": 1.0978386069516906, "grad_norm": 6.338668346405029, "learning_rate": 7.884591690054076e-05, "loss": 0.7145, "step": 16203 }, { "epoch": 1.0979063622196625, "grad_norm": 6.372475624084473, "learning_rate": 7.884454788144295e-05, "loss": 0.8761, "step": 16204 }, { "epoch": 1.0979741174876347, "grad_norm": 5.98175048828125, "learning_rate": 7.884317886234513e-05, "loss": 0.7052, "step": 16205 }, { "epoch": 1.0980418727556067, "grad_norm": 7.220608711242676, "learning_rate": 7.884180984324731e-05, "loss": 0.7702, "step": 16206 }, { "epoch": 1.098109628023579, "grad_norm": 4.378786563873291, "learning_rate": 7.884044082414949e-05, "loss": 0.5409, "step": 16207 }, { "epoch": 1.098177383291551, "grad_norm": 4.2132978439331055, "learning_rate": 7.883907180505169e-05, "loss": 0.6008, "step": 16208 }, { "epoch": 1.098245138559523, "grad_norm": 5.9434590339660645, "learning_rate": 7.883770278595387e-05, "loss": 0.6274, "step": 16209 }, { "epoch": 1.098312893827495, "grad_norm": 4.109735012054443, "learning_rate": 7.883633376685605e-05, "loss": 0.7516, "step": 16210 }, { "epoch": 1.0983806490954673, "grad_norm": 5.5988006591796875, "learning_rate": 7.883496474775823e-05, "loss": 0.6469, "step": 16211 }, { "epoch": 1.0984484043634393, "grad_norm": 5.006146430969238, "learning_rate": 7.883359572866041e-05, "loss": 0.7183, "step": 16212 }, { "epoch": 1.0985161596314112, "grad_norm": 5.305348873138428, "learning_rate": 7.88322267095626e-05, "loss": 0.6361, "step": 16213 }, { "epoch": 1.0985839148993835, "grad_norm": 5.728710174560547, "learning_rate": 7.883085769046478e-05, "loss": 0.8569, "step": 16214 }, { "epoch": 1.0986516701673554, "grad_norm": 11.328938484191895, "learning_rate": 7.882948867136696e-05, "loss": 0.5037, "step": 16215 }, { "epoch": 1.0987194254353276, "grad_norm": 4.791298866271973, "learning_rate": 7.882811965226916e-05, "loss": 0.6317, "step": 16216 }, { "epoch": 1.0987871807032996, "grad_norm": 4.028521537780762, "learning_rate": 7.882675063317134e-05, "loss": 0.5422, "step": 16217 }, { "epoch": 1.0988549359712718, "grad_norm": 5.723876953125, "learning_rate": 7.882538161407352e-05, "loss": 0.8359, "step": 16218 }, { "epoch": 1.0989226912392438, "grad_norm": 4.189463138580322, "learning_rate": 7.882401259497571e-05, "loss": 0.5966, "step": 16219 }, { "epoch": 1.098990446507216, "grad_norm": 5.877222061157227, "learning_rate": 7.882264357587789e-05, "loss": 0.5115, "step": 16220 }, { "epoch": 1.099058201775188, "grad_norm": 6.062419891357422, "learning_rate": 7.882127455678007e-05, "loss": 0.6709, "step": 16221 }, { "epoch": 1.0991259570431602, "grad_norm": 8.064435958862305, "learning_rate": 7.881990553768227e-05, "loss": 0.8518, "step": 16222 }, { "epoch": 1.0991937123111322, "grad_norm": 5.81643533706665, "learning_rate": 7.881853651858445e-05, "loss": 0.7286, "step": 16223 }, { "epoch": 1.0992614675791044, "grad_norm": 5.727938652038574, "learning_rate": 7.881716749948663e-05, "loss": 0.8333, "step": 16224 }, { "epoch": 1.0993292228470763, "grad_norm": 8.810190200805664, "learning_rate": 7.88157984803888e-05, "loss": 0.9136, "step": 16225 }, { "epoch": 1.0993969781150486, "grad_norm": 6.392600059509277, "learning_rate": 7.881442946129099e-05, "loss": 0.8322, "step": 16226 }, { "epoch": 1.0994647333830205, "grad_norm": 6.082714080810547, "learning_rate": 7.881306044219318e-05, "loss": 0.649, "step": 16227 }, { "epoch": 1.0995324886509925, "grad_norm": 6.840407848358154, "learning_rate": 7.881169142309536e-05, "loss": 0.5934, "step": 16228 }, { "epoch": 1.0996002439189647, "grad_norm": 6.920519828796387, "learning_rate": 7.881032240399754e-05, "loss": 0.9076, "step": 16229 }, { "epoch": 1.0996679991869367, "grad_norm": 5.431446075439453, "learning_rate": 7.880895338489972e-05, "loss": 0.6394, "step": 16230 }, { "epoch": 1.099735754454909, "grad_norm": 7.608739376068115, "learning_rate": 7.880758436580192e-05, "loss": 0.7753, "step": 16231 }, { "epoch": 1.0998035097228809, "grad_norm": 6.12460470199585, "learning_rate": 7.88062153467041e-05, "loss": 0.7162, "step": 16232 }, { "epoch": 1.099871264990853, "grad_norm": 6.770415782928467, "learning_rate": 7.880484632760628e-05, "loss": 0.842, "step": 16233 }, { "epoch": 1.099939020258825, "grad_norm": 5.251020908355713, "learning_rate": 7.880347730850846e-05, "loss": 0.5031, "step": 16234 }, { "epoch": 1.1000067755267973, "grad_norm": 6.684656620025635, "learning_rate": 7.880210828941064e-05, "loss": 0.6369, "step": 16235 }, { "epoch": 1.1000745307947692, "grad_norm": 6.140908241271973, "learning_rate": 7.880073927031283e-05, "loss": 0.6889, "step": 16236 }, { "epoch": 1.1001422860627414, "grad_norm": 6.55711555480957, "learning_rate": 7.879937025121501e-05, "loss": 0.6736, "step": 16237 }, { "epoch": 1.1002100413307134, "grad_norm": 5.948589324951172, "learning_rate": 7.879800123211719e-05, "loss": 0.6249, "step": 16238 }, { "epoch": 1.1002777965986856, "grad_norm": 5.505045413970947, "learning_rate": 7.879663221301937e-05, "loss": 0.7244, "step": 16239 }, { "epoch": 1.1003455518666576, "grad_norm": 4.607968330383301, "learning_rate": 7.879526319392155e-05, "loss": 0.9175, "step": 16240 }, { "epoch": 1.1004133071346298, "grad_norm": 6.939621448516846, "learning_rate": 7.879389417482375e-05, "loss": 0.7586, "step": 16241 }, { "epoch": 1.1004810624026018, "grad_norm": 6.131583213806152, "learning_rate": 7.879252515572593e-05, "loss": 0.6007, "step": 16242 }, { "epoch": 1.1005488176705738, "grad_norm": 5.63719367980957, "learning_rate": 7.879115613662811e-05, "loss": 0.9402, "step": 16243 }, { "epoch": 1.100616572938546, "grad_norm": 6.878543376922607, "learning_rate": 7.878978711753029e-05, "loss": 0.5446, "step": 16244 }, { "epoch": 1.100684328206518, "grad_norm": 5.816869735717773, "learning_rate": 7.878841809843248e-05, "loss": 0.5093, "step": 16245 }, { "epoch": 1.1007520834744902, "grad_norm": 7.57546329498291, "learning_rate": 7.878704907933466e-05, "loss": 0.8181, "step": 16246 }, { "epoch": 1.1008198387424621, "grad_norm": 6.603987216949463, "learning_rate": 7.878568006023684e-05, "loss": 0.9014, "step": 16247 }, { "epoch": 1.1008875940104343, "grad_norm": 5.182919025421143, "learning_rate": 7.878431104113902e-05, "loss": 0.5691, "step": 16248 }, { "epoch": 1.1009553492784063, "grad_norm": 4.985978126525879, "learning_rate": 7.87829420220412e-05, "loss": 0.6697, "step": 16249 }, { "epoch": 1.1010231045463785, "grad_norm": 5.756229400634766, "learning_rate": 7.87815730029434e-05, "loss": 0.7705, "step": 16250 }, { "epoch": 1.1010908598143505, "grad_norm": 5.34890079498291, "learning_rate": 7.878020398384558e-05, "loss": 0.8372, "step": 16251 }, { "epoch": 1.1011586150823227, "grad_norm": 5.433262825012207, "learning_rate": 7.877883496474776e-05, "loss": 0.8024, "step": 16252 }, { "epoch": 1.1012263703502947, "grad_norm": 6.8542256355285645, "learning_rate": 7.877746594564994e-05, "loss": 0.6715, "step": 16253 }, { "epoch": 1.101294125618267, "grad_norm": 6.2984843254089355, "learning_rate": 7.877609692655213e-05, "loss": 0.9062, "step": 16254 }, { "epoch": 1.1013618808862389, "grad_norm": 3.8605449199676514, "learning_rate": 7.877472790745431e-05, "loss": 0.4797, "step": 16255 }, { "epoch": 1.101429636154211, "grad_norm": 6.800119400024414, "learning_rate": 7.877335888835649e-05, "loss": 0.7336, "step": 16256 }, { "epoch": 1.101497391422183, "grad_norm": 5.705935478210449, "learning_rate": 7.877198986925867e-05, "loss": 0.7437, "step": 16257 }, { "epoch": 1.1015651466901553, "grad_norm": 5.928062915802002, "learning_rate": 7.877062085016085e-05, "loss": 0.7036, "step": 16258 }, { "epoch": 1.1016329019581272, "grad_norm": 5.317257881164551, "learning_rate": 7.876925183106305e-05, "loss": 0.6604, "step": 16259 }, { "epoch": 1.1017006572260994, "grad_norm": 7.342348575592041, "learning_rate": 7.876788281196523e-05, "loss": 0.8121, "step": 16260 }, { "epoch": 1.1017684124940714, "grad_norm": 6.198090553283691, "learning_rate": 7.876651379286741e-05, "loss": 0.6446, "step": 16261 }, { "epoch": 1.1018361677620434, "grad_norm": 6.285484790802002, "learning_rate": 7.87651447737696e-05, "loss": 0.6862, "step": 16262 }, { "epoch": 1.1019039230300156, "grad_norm": 6.3264994621276855, "learning_rate": 7.876377575467178e-05, "loss": 0.5654, "step": 16263 }, { "epoch": 1.1019716782979876, "grad_norm": 4.603121280670166, "learning_rate": 7.876240673557396e-05, "loss": 0.799, "step": 16264 }, { "epoch": 1.1020394335659598, "grad_norm": 8.007383346557617, "learning_rate": 7.876103771647616e-05, "loss": 0.8962, "step": 16265 }, { "epoch": 1.1021071888339318, "grad_norm": 5.806975841522217, "learning_rate": 7.875966869737834e-05, "loss": 0.7357, "step": 16266 }, { "epoch": 1.102174944101904, "grad_norm": 5.703726291656494, "learning_rate": 7.875829967828052e-05, "loss": 0.7392, "step": 16267 }, { "epoch": 1.102242699369876, "grad_norm": 5.793273448944092, "learning_rate": 7.875693065918271e-05, "loss": 0.7173, "step": 16268 }, { "epoch": 1.1023104546378482, "grad_norm": 6.515778541564941, "learning_rate": 7.875556164008489e-05, "loss": 0.6591, "step": 16269 }, { "epoch": 1.1023782099058201, "grad_norm": 7.7042059898376465, "learning_rate": 7.875419262098707e-05, "loss": 0.9228, "step": 16270 }, { "epoch": 1.1024459651737923, "grad_norm": 4.594401836395264, "learning_rate": 7.875282360188925e-05, "loss": 0.5748, "step": 16271 }, { "epoch": 1.1025137204417643, "grad_norm": 8.685628890991211, "learning_rate": 7.875145458279143e-05, "loss": 0.7174, "step": 16272 }, { "epoch": 1.1025814757097365, "grad_norm": 5.300126075744629, "learning_rate": 7.875008556369363e-05, "loss": 0.6349, "step": 16273 }, { "epoch": 1.1026492309777085, "grad_norm": 5.215007305145264, "learning_rate": 7.87487165445958e-05, "loss": 0.5969, "step": 16274 }, { "epoch": 1.1027169862456807, "grad_norm": 6.813204765319824, "learning_rate": 7.874734752549799e-05, "loss": 0.7632, "step": 16275 }, { "epoch": 1.1027847415136527, "grad_norm": 5.5026631355285645, "learning_rate": 7.874597850640017e-05, "loss": 0.6924, "step": 16276 }, { "epoch": 1.1028524967816247, "grad_norm": 7.050729274749756, "learning_rate": 7.874460948730236e-05, "loss": 0.8821, "step": 16277 }, { "epoch": 1.1029202520495969, "grad_norm": 6.8287811279296875, "learning_rate": 7.874324046820454e-05, "loss": 0.7282, "step": 16278 }, { "epoch": 1.1029880073175689, "grad_norm": 5.6110687255859375, "learning_rate": 7.874187144910672e-05, "loss": 0.5387, "step": 16279 }, { "epoch": 1.103055762585541, "grad_norm": 6.597656726837158, "learning_rate": 7.87405024300089e-05, "loss": 0.9386, "step": 16280 }, { "epoch": 1.103123517853513, "grad_norm": 6.492720603942871, "learning_rate": 7.873913341091108e-05, "loss": 0.5659, "step": 16281 }, { "epoch": 1.1031912731214852, "grad_norm": 5.881611347198486, "learning_rate": 7.873776439181328e-05, "loss": 0.5639, "step": 16282 }, { "epoch": 1.1032590283894572, "grad_norm": 5.611779689788818, "learning_rate": 7.873639537271546e-05, "loss": 0.6953, "step": 16283 }, { "epoch": 1.1033267836574294, "grad_norm": 7.153480529785156, "learning_rate": 7.873502635361764e-05, "loss": 0.9283, "step": 16284 }, { "epoch": 1.1033945389254014, "grad_norm": 6.058135986328125, "learning_rate": 7.873365733451982e-05, "loss": 0.7362, "step": 16285 }, { "epoch": 1.1034622941933736, "grad_norm": 5.711484432220459, "learning_rate": 7.873228831542201e-05, "loss": 0.7727, "step": 16286 }, { "epoch": 1.1035300494613456, "grad_norm": 4.450558662414551, "learning_rate": 7.873091929632419e-05, "loss": 0.687, "step": 16287 }, { "epoch": 1.1035978047293178, "grad_norm": 6.392914772033691, "learning_rate": 7.872955027722637e-05, "loss": 0.7404, "step": 16288 }, { "epoch": 1.1036655599972898, "grad_norm": 7.236051082611084, "learning_rate": 7.872818125812855e-05, "loss": 0.8772, "step": 16289 }, { "epoch": 1.103733315265262, "grad_norm": 5.396810054779053, "learning_rate": 7.872681223903073e-05, "loss": 0.7059, "step": 16290 }, { "epoch": 1.103801070533234, "grad_norm": 6.475565433502197, "learning_rate": 7.872544321993293e-05, "loss": 0.7454, "step": 16291 }, { "epoch": 1.103868825801206, "grad_norm": 7.763959884643555, "learning_rate": 7.87240742008351e-05, "loss": 0.8434, "step": 16292 }, { "epoch": 1.1039365810691781, "grad_norm": 5.478339195251465, "learning_rate": 7.872270518173729e-05, "loss": 0.665, "step": 16293 }, { "epoch": 1.1040043363371501, "grad_norm": 6.564233779907227, "learning_rate": 7.872133616263947e-05, "loss": 0.8856, "step": 16294 }, { "epoch": 1.1040720916051223, "grad_norm": 5.313308238983154, "learning_rate": 7.871996714354165e-05, "loss": 0.5264, "step": 16295 }, { "epoch": 1.1041398468730943, "grad_norm": 5.785020351409912, "learning_rate": 7.871859812444384e-05, "loss": 0.5262, "step": 16296 }, { "epoch": 1.1042076021410665, "grad_norm": 4.397270679473877, "learning_rate": 7.871722910534602e-05, "loss": 0.5755, "step": 16297 }, { "epoch": 1.1042753574090385, "grad_norm": 5.507979393005371, "learning_rate": 7.87158600862482e-05, "loss": 0.6667, "step": 16298 }, { "epoch": 1.1043431126770107, "grad_norm": 6.59631872177124, "learning_rate": 7.871449106715038e-05, "loss": 0.6135, "step": 16299 }, { "epoch": 1.1044108679449827, "grad_norm": 5.889465808868408, "learning_rate": 7.871312204805258e-05, "loss": 0.6328, "step": 16300 }, { "epoch": 1.1044786232129549, "grad_norm": 5.357670307159424, "learning_rate": 7.871175302895476e-05, "loss": 0.8888, "step": 16301 }, { "epoch": 1.1045463784809268, "grad_norm": 5.813202381134033, "learning_rate": 7.871038400985694e-05, "loss": 0.8759, "step": 16302 }, { "epoch": 1.104614133748899, "grad_norm": 5.886719226837158, "learning_rate": 7.870901499075912e-05, "loss": 0.8037, "step": 16303 }, { "epoch": 1.104681889016871, "grad_norm": 5.701767444610596, "learning_rate": 7.87076459716613e-05, "loss": 0.7458, "step": 16304 }, { "epoch": 1.1047496442848432, "grad_norm": 4.6818413734436035, "learning_rate": 7.870627695256349e-05, "loss": 0.5317, "step": 16305 }, { "epoch": 1.1048173995528152, "grad_norm": 5.039698123931885, "learning_rate": 7.870490793346567e-05, "loss": 0.6784, "step": 16306 }, { "epoch": 1.1048851548207874, "grad_norm": 8.11133861541748, "learning_rate": 7.870353891436785e-05, "loss": 0.5449, "step": 16307 }, { "epoch": 1.1049529100887594, "grad_norm": 6.868621826171875, "learning_rate": 7.870216989527005e-05, "loss": 0.6633, "step": 16308 }, { "epoch": 1.1050206653567316, "grad_norm": 5.920335292816162, "learning_rate": 7.870080087617223e-05, "loss": 0.7819, "step": 16309 }, { "epoch": 1.1050884206247036, "grad_norm": 5.8382978439331055, "learning_rate": 7.86994318570744e-05, "loss": 0.7254, "step": 16310 }, { "epoch": 1.1051561758926756, "grad_norm": 6.541285514831543, "learning_rate": 7.86980628379766e-05, "loss": 0.6517, "step": 16311 }, { "epoch": 1.1052239311606478, "grad_norm": 6.541675567626953, "learning_rate": 7.869669381887878e-05, "loss": 0.5173, "step": 16312 }, { "epoch": 1.1052916864286197, "grad_norm": 8.50218677520752, "learning_rate": 7.869532479978096e-05, "loss": 0.6266, "step": 16313 }, { "epoch": 1.105359441696592, "grad_norm": 4.665585517883301, "learning_rate": 7.869395578068315e-05, "loss": 0.6517, "step": 16314 }, { "epoch": 1.105427196964564, "grad_norm": 6.082878112792969, "learning_rate": 7.869258676158534e-05, "loss": 0.9384, "step": 16315 }, { "epoch": 1.1054949522325361, "grad_norm": 6.00462532043457, "learning_rate": 7.869121774248752e-05, "loss": 0.7451, "step": 16316 }, { "epoch": 1.1055627075005081, "grad_norm": 6.826976776123047, "learning_rate": 7.86898487233897e-05, "loss": 0.8636, "step": 16317 }, { "epoch": 1.1056304627684803, "grad_norm": 6.036766052246094, "learning_rate": 7.868847970429188e-05, "loss": 0.6864, "step": 16318 }, { "epoch": 1.1056982180364523, "grad_norm": 5.662924766540527, "learning_rate": 7.868711068519407e-05, "loss": 0.6582, "step": 16319 }, { "epoch": 1.1057659733044245, "grad_norm": 5.307290077209473, "learning_rate": 7.868574166609625e-05, "loss": 0.6985, "step": 16320 }, { "epoch": 1.1058337285723965, "grad_norm": 6.92588472366333, "learning_rate": 7.868437264699843e-05, "loss": 0.799, "step": 16321 }, { "epoch": 1.1059014838403687, "grad_norm": 7.620028972625732, "learning_rate": 7.868300362790061e-05, "loss": 0.7356, "step": 16322 }, { "epoch": 1.1059692391083407, "grad_norm": 8.529465675354004, "learning_rate": 7.86816346088028e-05, "loss": 0.5915, "step": 16323 }, { "epoch": 1.1060369943763129, "grad_norm": 5.126700401306152, "learning_rate": 7.868026558970499e-05, "loss": 0.66, "step": 16324 }, { "epoch": 1.1061047496442848, "grad_norm": 5.512331962585449, "learning_rate": 7.867889657060717e-05, "loss": 0.804, "step": 16325 }, { "epoch": 1.1061725049122568, "grad_norm": 5.728443145751953, "learning_rate": 7.867752755150935e-05, "loss": 0.6654, "step": 16326 }, { "epoch": 1.106240260180229, "grad_norm": 4.493624210357666, "learning_rate": 7.867615853241153e-05, "loss": 0.5286, "step": 16327 }, { "epoch": 1.106308015448201, "grad_norm": 5.167482376098633, "learning_rate": 7.867478951331372e-05, "loss": 0.6084, "step": 16328 }, { "epoch": 1.1063757707161732, "grad_norm": 5.0305657386779785, "learning_rate": 7.86734204942159e-05, "loss": 0.8362, "step": 16329 }, { "epoch": 1.1064435259841452, "grad_norm": 5.162072658538818, "learning_rate": 7.867205147511808e-05, "loss": 0.8681, "step": 16330 }, { "epoch": 1.1065112812521174, "grad_norm": 6.101891040802002, "learning_rate": 7.867068245602026e-05, "loss": 0.7332, "step": 16331 }, { "epoch": 1.1065790365200894, "grad_norm": 6.320982456207275, "learning_rate": 7.866931343692246e-05, "loss": 0.9075, "step": 16332 }, { "epoch": 1.1066467917880616, "grad_norm": 5.854201316833496, "learning_rate": 7.866794441782464e-05, "loss": 0.5638, "step": 16333 }, { "epoch": 1.1067145470560336, "grad_norm": 5.276032447814941, "learning_rate": 7.866657539872682e-05, "loss": 0.6531, "step": 16334 }, { "epoch": 1.1067823023240058, "grad_norm": 8.056017875671387, "learning_rate": 7.8665206379629e-05, "loss": 0.5859, "step": 16335 }, { "epoch": 1.1068500575919777, "grad_norm": 5.445096492767334, "learning_rate": 7.866383736053118e-05, "loss": 0.8072, "step": 16336 }, { "epoch": 1.10691781285995, "grad_norm": 9.058344841003418, "learning_rate": 7.866246834143337e-05, "loss": 0.9842, "step": 16337 }, { "epoch": 1.106985568127922, "grad_norm": 5.209854602813721, "learning_rate": 7.866109932233555e-05, "loss": 0.6562, "step": 16338 }, { "epoch": 1.1070533233958941, "grad_norm": 4.583771705627441, "learning_rate": 7.865973030323773e-05, "loss": 0.6274, "step": 16339 }, { "epoch": 1.107121078663866, "grad_norm": 5.5320940017700195, "learning_rate": 7.865836128413991e-05, "loss": 0.6254, "step": 16340 }, { "epoch": 1.107188833931838, "grad_norm": 5.773438930511475, "learning_rate": 7.86569922650421e-05, "loss": 0.8155, "step": 16341 }, { "epoch": 1.1072565891998103, "grad_norm": 5.824255466461182, "learning_rate": 7.865562324594429e-05, "loss": 0.6921, "step": 16342 }, { "epoch": 1.1073243444677823, "grad_norm": 7.974730014801025, "learning_rate": 7.865425422684647e-05, "loss": 0.7548, "step": 16343 }, { "epoch": 1.1073920997357545, "grad_norm": 8.403234481811523, "learning_rate": 7.865288520774865e-05, "loss": 0.6378, "step": 16344 }, { "epoch": 1.1074598550037265, "grad_norm": 6.035255432128906, "learning_rate": 7.865151618865083e-05, "loss": 0.9632, "step": 16345 }, { "epoch": 1.1075276102716987, "grad_norm": 4.658270835876465, "learning_rate": 7.865014716955302e-05, "loss": 0.7039, "step": 16346 }, { "epoch": 1.1075953655396706, "grad_norm": 4.916011333465576, "learning_rate": 7.86487781504552e-05, "loss": 0.5733, "step": 16347 }, { "epoch": 1.1076631208076428, "grad_norm": 5.327943801879883, "learning_rate": 7.864740913135738e-05, "loss": 0.6256, "step": 16348 }, { "epoch": 1.1077308760756148, "grad_norm": 5.153133392333984, "learning_rate": 7.864604011225956e-05, "loss": 0.5463, "step": 16349 }, { "epoch": 1.107798631343587, "grad_norm": 4.027024745941162, "learning_rate": 7.864467109316174e-05, "loss": 0.5824, "step": 16350 }, { "epoch": 1.107866386611559, "grad_norm": 5.886280536651611, "learning_rate": 7.864330207406394e-05, "loss": 0.758, "step": 16351 }, { "epoch": 1.1079341418795312, "grad_norm": 10.420184135437012, "learning_rate": 7.864193305496612e-05, "loss": 0.7657, "step": 16352 }, { "epoch": 1.1080018971475032, "grad_norm": 6.934298515319824, "learning_rate": 7.86405640358683e-05, "loss": 0.579, "step": 16353 }, { "epoch": 1.1080696524154754, "grad_norm": 5.0468573570251465, "learning_rate": 7.863919501677049e-05, "loss": 0.6553, "step": 16354 }, { "epoch": 1.1081374076834474, "grad_norm": 4.991286754608154, "learning_rate": 7.863782599767267e-05, "loss": 0.6482, "step": 16355 }, { "epoch": 1.1082051629514196, "grad_norm": 5.812962055206299, "learning_rate": 7.863645697857485e-05, "loss": 0.7708, "step": 16356 }, { "epoch": 1.1082729182193916, "grad_norm": 6.6987104415893555, "learning_rate": 7.863508795947705e-05, "loss": 0.807, "step": 16357 }, { "epoch": 1.1083406734873638, "grad_norm": 4.859643459320068, "learning_rate": 7.863371894037923e-05, "loss": 0.7066, "step": 16358 }, { "epoch": 1.1084084287553357, "grad_norm": 8.146595001220703, "learning_rate": 7.86323499212814e-05, "loss": 0.7213, "step": 16359 }, { "epoch": 1.1084761840233077, "grad_norm": 5.962488174438477, "learning_rate": 7.86309809021836e-05, "loss": 0.9372, "step": 16360 }, { "epoch": 1.10854393929128, "grad_norm": 6.733558177947998, "learning_rate": 7.862961188308578e-05, "loss": 0.7216, "step": 16361 }, { "epoch": 1.108611694559252, "grad_norm": 6.295005798339844, "learning_rate": 7.862824286398796e-05, "loss": 0.6771, "step": 16362 }, { "epoch": 1.108679449827224, "grad_norm": 5.711909770965576, "learning_rate": 7.862687384489014e-05, "loss": 0.6979, "step": 16363 }, { "epoch": 1.108747205095196, "grad_norm": 7.529008865356445, "learning_rate": 7.862550482579233e-05, "loss": 0.8168, "step": 16364 }, { "epoch": 1.1088149603631683, "grad_norm": 6.682563304901123, "learning_rate": 7.862413580669451e-05, "loss": 0.7012, "step": 16365 }, { "epoch": 1.1088827156311403, "grad_norm": 5.381031513214111, "learning_rate": 7.86227667875967e-05, "loss": 0.6594, "step": 16366 }, { "epoch": 1.1089504708991125, "grad_norm": 5.263582229614258, "learning_rate": 7.862139776849888e-05, "loss": 0.6086, "step": 16367 }, { "epoch": 1.1090182261670845, "grad_norm": 4.777389049530029, "learning_rate": 7.862002874940106e-05, "loss": 0.563, "step": 16368 }, { "epoch": 1.1090859814350567, "grad_norm": 4.717031478881836, "learning_rate": 7.861865973030325e-05, "loss": 0.7422, "step": 16369 }, { "epoch": 1.1091537367030286, "grad_norm": 6.94821310043335, "learning_rate": 7.861729071120543e-05, "loss": 0.8252, "step": 16370 }, { "epoch": 1.1092214919710008, "grad_norm": 6.443717956542969, "learning_rate": 7.861592169210761e-05, "loss": 0.616, "step": 16371 }, { "epoch": 1.1092892472389728, "grad_norm": 5.591299533843994, "learning_rate": 7.861455267300979e-05, "loss": 0.7008, "step": 16372 }, { "epoch": 1.109357002506945, "grad_norm": 6.745190143585205, "learning_rate": 7.861318365391197e-05, "loss": 0.4351, "step": 16373 }, { "epoch": 1.109424757774917, "grad_norm": 4.188071250915527, "learning_rate": 7.861181463481417e-05, "loss": 0.644, "step": 16374 }, { "epoch": 1.109492513042889, "grad_norm": 9.427136421203613, "learning_rate": 7.861044561571635e-05, "loss": 0.6456, "step": 16375 }, { "epoch": 1.1095602683108612, "grad_norm": 6.370525360107422, "learning_rate": 7.860907659661853e-05, "loss": 0.8236, "step": 16376 }, { "epoch": 1.1096280235788332, "grad_norm": 5.648697376251221, "learning_rate": 7.86077075775207e-05, "loss": 0.7623, "step": 16377 }, { "epoch": 1.1096957788468054, "grad_norm": 4.926682472229004, "learning_rate": 7.86063385584229e-05, "loss": 0.5091, "step": 16378 }, { "epoch": 1.1097635341147774, "grad_norm": 5.184731483459473, "learning_rate": 7.860496953932508e-05, "loss": 0.6228, "step": 16379 }, { "epoch": 1.1098312893827496, "grad_norm": 4.412238597869873, "learning_rate": 7.860360052022726e-05, "loss": 0.4859, "step": 16380 }, { "epoch": 1.1098990446507215, "grad_norm": 5.148406505584717, "learning_rate": 7.860223150112944e-05, "loss": 0.6026, "step": 16381 }, { "epoch": 1.1099667999186937, "grad_norm": 6.479032039642334, "learning_rate": 7.860086248203162e-05, "loss": 0.6879, "step": 16382 }, { "epoch": 1.1100345551866657, "grad_norm": 5.107529163360596, "learning_rate": 7.859949346293382e-05, "loss": 0.8316, "step": 16383 }, { "epoch": 1.110102310454638, "grad_norm": 6.092916011810303, "learning_rate": 7.8598124443836e-05, "loss": 0.5405, "step": 16384 }, { "epoch": 1.11017006572261, "grad_norm": 6.275728702545166, "learning_rate": 7.859675542473818e-05, "loss": 0.6238, "step": 16385 }, { "epoch": 1.110237820990582, "grad_norm": 5.813026428222656, "learning_rate": 7.859538640564036e-05, "loss": 0.7023, "step": 16386 }, { "epoch": 1.110305576258554, "grad_norm": 5.524173259735107, "learning_rate": 7.859401738654255e-05, "loss": 0.6626, "step": 16387 }, { "epoch": 1.1103733315265263, "grad_norm": 7.234399318695068, "learning_rate": 7.859264836744473e-05, "loss": 0.5818, "step": 16388 }, { "epoch": 1.1104410867944983, "grad_norm": 5.309559345245361, "learning_rate": 7.859127934834691e-05, "loss": 0.7264, "step": 16389 }, { "epoch": 1.1105088420624702, "grad_norm": 6.291361331939697, "learning_rate": 7.858991032924909e-05, "loss": 0.8169, "step": 16390 }, { "epoch": 1.1105765973304424, "grad_norm": 5.197052955627441, "learning_rate": 7.858854131015127e-05, "loss": 0.7933, "step": 16391 }, { "epoch": 1.1106443525984144, "grad_norm": 6.224545955657959, "learning_rate": 7.858717229105347e-05, "loss": 0.9598, "step": 16392 }, { "epoch": 1.1107121078663866, "grad_norm": 7.769322395324707, "learning_rate": 7.858580327195565e-05, "loss": 0.8685, "step": 16393 }, { "epoch": 1.1107798631343586, "grad_norm": 6.402470588684082, "learning_rate": 7.858443425285783e-05, "loss": 0.8052, "step": 16394 }, { "epoch": 1.1108476184023308, "grad_norm": 5.41104793548584, "learning_rate": 7.858306523376e-05, "loss": 0.6345, "step": 16395 }, { "epoch": 1.1109153736703028, "grad_norm": 6.050509452819824, "learning_rate": 7.85816962146622e-05, "loss": 0.5625, "step": 16396 }, { "epoch": 1.110983128938275, "grad_norm": 5.332095146179199, "learning_rate": 7.858032719556438e-05, "loss": 0.5443, "step": 16397 }, { "epoch": 1.111050884206247, "grad_norm": 5.569375991821289, "learning_rate": 7.857895817646656e-05, "loss": 0.5195, "step": 16398 }, { "epoch": 1.1111186394742192, "grad_norm": 5.020506381988525, "learning_rate": 7.857758915736874e-05, "loss": 0.6129, "step": 16399 }, { "epoch": 1.1111863947421912, "grad_norm": 4.502058029174805, "learning_rate": 7.857622013827092e-05, "loss": 0.6652, "step": 16400 }, { "epoch": 1.1112541500101634, "grad_norm": 5.6798481941223145, "learning_rate": 7.857485111917312e-05, "loss": 0.7755, "step": 16401 }, { "epoch": 1.1113219052781353, "grad_norm": 9.84770679473877, "learning_rate": 7.85734821000753e-05, "loss": 0.786, "step": 16402 }, { "epoch": 1.1113896605461075, "grad_norm": 6.1381378173828125, "learning_rate": 7.857211308097748e-05, "loss": 0.8187, "step": 16403 }, { "epoch": 1.1114574158140795, "grad_norm": 5.588286399841309, "learning_rate": 7.857074406187967e-05, "loss": 0.6508, "step": 16404 }, { "epoch": 1.1115251710820517, "grad_norm": 5.41074275970459, "learning_rate": 7.856937504278185e-05, "loss": 0.6348, "step": 16405 }, { "epoch": 1.1115929263500237, "grad_norm": 6.4471235275268555, "learning_rate": 7.856800602368403e-05, "loss": 0.9603, "step": 16406 }, { "epoch": 1.111660681617996, "grad_norm": 6.701603412628174, "learning_rate": 7.856663700458622e-05, "loss": 0.7937, "step": 16407 }, { "epoch": 1.111728436885968, "grad_norm": 7.06520414352417, "learning_rate": 7.85652679854884e-05, "loss": 0.6763, "step": 16408 }, { "epoch": 1.1117961921539399, "grad_norm": 4.8184404373168945, "learning_rate": 7.856389896639059e-05, "loss": 0.7191, "step": 16409 }, { "epoch": 1.111863947421912, "grad_norm": 5.840653896331787, "learning_rate": 7.856252994729278e-05, "loss": 0.7294, "step": 16410 }, { "epoch": 1.111931702689884, "grad_norm": 5.681267261505127, "learning_rate": 7.856116092819496e-05, "loss": 0.8226, "step": 16411 }, { "epoch": 1.1119994579578563, "grad_norm": 5.182020664215088, "learning_rate": 7.855979190909714e-05, "loss": 0.6697, "step": 16412 }, { "epoch": 1.1120672132258282, "grad_norm": 7.743770599365234, "learning_rate": 7.855842288999932e-05, "loss": 0.8648, "step": 16413 }, { "epoch": 1.1121349684938004, "grad_norm": 4.801437854766846, "learning_rate": 7.85570538709015e-05, "loss": 0.611, "step": 16414 }, { "epoch": 1.1122027237617724, "grad_norm": 4.749513626098633, "learning_rate": 7.85556848518037e-05, "loss": 0.696, "step": 16415 }, { "epoch": 1.1122704790297446, "grad_norm": 6.828696250915527, "learning_rate": 7.855431583270587e-05, "loss": 0.5749, "step": 16416 }, { "epoch": 1.1123382342977166, "grad_norm": 5.405619144439697, "learning_rate": 7.855294681360806e-05, "loss": 0.5387, "step": 16417 }, { "epoch": 1.1124059895656888, "grad_norm": 5.546106338500977, "learning_rate": 7.855157779451024e-05, "loss": 0.6858, "step": 16418 }, { "epoch": 1.1124737448336608, "grad_norm": 5.9285712242126465, "learning_rate": 7.855020877541243e-05, "loss": 0.8244, "step": 16419 }, { "epoch": 1.112541500101633, "grad_norm": 5.90974760055542, "learning_rate": 7.854883975631461e-05, "loss": 0.6853, "step": 16420 }, { "epoch": 1.112609255369605, "grad_norm": 11.681938171386719, "learning_rate": 7.854747073721679e-05, "loss": 0.7719, "step": 16421 }, { "epoch": 1.1126770106375772, "grad_norm": 5.424148082733154, "learning_rate": 7.854610171811897e-05, "loss": 0.6133, "step": 16422 }, { "epoch": 1.1127447659055492, "grad_norm": 6.452503204345703, "learning_rate": 7.854473269902115e-05, "loss": 0.523, "step": 16423 }, { "epoch": 1.1128125211735211, "grad_norm": 11.88731861114502, "learning_rate": 7.854336367992334e-05, "loss": 0.9297, "step": 16424 }, { "epoch": 1.1128802764414933, "grad_norm": 6.110743522644043, "learning_rate": 7.854199466082553e-05, "loss": 0.5357, "step": 16425 }, { "epoch": 1.1129480317094653, "grad_norm": 5.067276477813721, "learning_rate": 7.85406256417277e-05, "loss": 0.5576, "step": 16426 }, { "epoch": 1.1130157869774375, "grad_norm": 6.3760552406311035, "learning_rate": 7.853925662262989e-05, "loss": 0.6453, "step": 16427 }, { "epoch": 1.1130835422454095, "grad_norm": 7.936148166656494, "learning_rate": 7.853788760353207e-05, "loss": 0.8744, "step": 16428 }, { "epoch": 1.1131512975133817, "grad_norm": 4.137428283691406, "learning_rate": 7.853651858443426e-05, "loss": 0.5353, "step": 16429 }, { "epoch": 1.1132190527813537, "grad_norm": 6.208136081695557, "learning_rate": 7.853514956533644e-05, "loss": 0.7925, "step": 16430 }, { "epoch": 1.113286808049326, "grad_norm": 7.687192440032959, "learning_rate": 7.853378054623862e-05, "loss": 0.9704, "step": 16431 }, { "epoch": 1.1133545633172979, "grad_norm": 4.639840602874756, "learning_rate": 7.85324115271408e-05, "loss": 0.6534, "step": 16432 }, { "epoch": 1.11342231858527, "grad_norm": 6.294159889221191, "learning_rate": 7.8531042508043e-05, "loss": 0.8825, "step": 16433 }, { "epoch": 1.113490073853242, "grad_norm": 7.234892845153809, "learning_rate": 7.852967348894518e-05, "loss": 0.6695, "step": 16434 }, { "epoch": 1.1135578291212143, "grad_norm": 7.041029453277588, "learning_rate": 7.852830446984736e-05, "loss": 0.8467, "step": 16435 }, { "epoch": 1.1136255843891862, "grad_norm": 8.180865287780762, "learning_rate": 7.852693545074954e-05, "loss": 0.5544, "step": 16436 }, { "epoch": 1.1136933396571584, "grad_norm": 5.615832328796387, "learning_rate": 7.852556643165172e-05, "loss": 0.788, "step": 16437 }, { "epoch": 1.1137610949251304, "grad_norm": 5.674948692321777, "learning_rate": 7.852419741255391e-05, "loss": 0.6344, "step": 16438 }, { "epoch": 1.1138288501931024, "grad_norm": 5.536764621734619, "learning_rate": 7.852282839345609e-05, "loss": 0.6614, "step": 16439 }, { "epoch": 1.1138966054610746, "grad_norm": 5.49966287612915, "learning_rate": 7.852145937435827e-05, "loss": 0.5425, "step": 16440 }, { "epoch": 1.1139643607290466, "grad_norm": 5.57595157623291, "learning_rate": 7.852009035526045e-05, "loss": 0.5386, "step": 16441 }, { "epoch": 1.1140321159970188, "grad_norm": 4.964458465576172, "learning_rate": 7.851872133616265e-05, "loss": 0.5351, "step": 16442 }, { "epoch": 1.1140998712649908, "grad_norm": 6.3265509605407715, "learning_rate": 7.851735231706483e-05, "loss": 0.6693, "step": 16443 }, { "epoch": 1.114167626532963, "grad_norm": 5.302865505218506, "learning_rate": 7.8515983297967e-05, "loss": 0.6336, "step": 16444 }, { "epoch": 1.114235381800935, "grad_norm": 4.544825553894043, "learning_rate": 7.851461427886919e-05, "loss": 0.5943, "step": 16445 }, { "epoch": 1.1143031370689072, "grad_norm": 5.0601630210876465, "learning_rate": 7.851324525977137e-05, "loss": 0.714, "step": 16446 }, { "epoch": 1.1143708923368791, "grad_norm": 4.90870475769043, "learning_rate": 7.851187624067356e-05, "loss": 0.6078, "step": 16447 }, { "epoch": 1.1144386476048513, "grad_norm": 5.746411323547363, "learning_rate": 7.851050722157574e-05, "loss": 0.5527, "step": 16448 }, { "epoch": 1.1145064028728233, "grad_norm": 5.825742244720459, "learning_rate": 7.850913820247792e-05, "loss": 0.5043, "step": 16449 }, { "epoch": 1.1145741581407955, "grad_norm": 6.97534704208374, "learning_rate": 7.850776918338011e-05, "loss": 0.8069, "step": 16450 }, { "epoch": 1.1146419134087675, "grad_norm": 6.477380752563477, "learning_rate": 7.85064001642823e-05, "loss": 0.6987, "step": 16451 }, { "epoch": 1.1147096686767397, "grad_norm": 5.29969596862793, "learning_rate": 7.850503114518448e-05, "loss": 0.7252, "step": 16452 }, { "epoch": 1.1147774239447117, "grad_norm": 5.611807823181152, "learning_rate": 7.850366212608667e-05, "loss": 0.6814, "step": 16453 }, { "epoch": 1.114845179212684, "grad_norm": 6.4802350997924805, "learning_rate": 7.850229310698885e-05, "loss": 0.4992, "step": 16454 }, { "epoch": 1.1149129344806559, "grad_norm": 5.166659832000732, "learning_rate": 7.850092408789103e-05, "loss": 0.708, "step": 16455 }, { "epoch": 1.114980689748628, "grad_norm": 4.782804012298584, "learning_rate": 7.849955506879322e-05, "loss": 0.7891, "step": 16456 }, { "epoch": 1.1150484450166, "grad_norm": 6.390671730041504, "learning_rate": 7.84981860496954e-05, "loss": 0.731, "step": 16457 }, { "epoch": 1.115116200284572, "grad_norm": 5.91715145111084, "learning_rate": 7.849681703059758e-05, "loss": 0.6395, "step": 16458 }, { "epoch": 1.1151839555525442, "grad_norm": 6.513552665710449, "learning_rate": 7.849544801149977e-05, "loss": 0.809, "step": 16459 }, { "epoch": 1.1152517108205162, "grad_norm": 6.119845390319824, "learning_rate": 7.849407899240195e-05, "loss": 0.6678, "step": 16460 }, { "epoch": 1.1153194660884884, "grad_norm": 4.332753658294678, "learning_rate": 7.849270997330414e-05, "loss": 0.6528, "step": 16461 }, { "epoch": 1.1153872213564604, "grad_norm": 5.871253490447998, "learning_rate": 7.849134095420632e-05, "loss": 0.6437, "step": 16462 }, { "epoch": 1.1154549766244326, "grad_norm": 4.9431047439575195, "learning_rate": 7.84899719351085e-05, "loss": 0.4917, "step": 16463 }, { "epoch": 1.1155227318924046, "grad_norm": 5.625153064727783, "learning_rate": 7.848860291601068e-05, "loss": 0.7017, "step": 16464 }, { "epoch": 1.1155904871603768, "grad_norm": 6.678959369659424, "learning_rate": 7.848723389691287e-05, "loss": 0.7541, "step": 16465 }, { "epoch": 1.1156582424283488, "grad_norm": 5.871247291564941, "learning_rate": 7.848586487781505e-05, "loss": 0.564, "step": 16466 }, { "epoch": 1.115725997696321, "grad_norm": 4.651440143585205, "learning_rate": 7.848449585871723e-05, "loss": 0.5407, "step": 16467 }, { "epoch": 1.115793752964293, "grad_norm": 6.9464521408081055, "learning_rate": 7.848312683961942e-05, "loss": 0.7581, "step": 16468 }, { "epoch": 1.1158615082322652, "grad_norm": 6.172132968902588, "learning_rate": 7.84817578205216e-05, "loss": 0.6855, "step": 16469 }, { "epoch": 1.1159292635002371, "grad_norm": 6.238826274871826, "learning_rate": 7.848038880142379e-05, "loss": 0.5436, "step": 16470 }, { "epoch": 1.1159970187682093, "grad_norm": 7.121433734893799, "learning_rate": 7.847901978232597e-05, "loss": 0.962, "step": 16471 }, { "epoch": 1.1160647740361813, "grad_norm": 6.923501014709473, "learning_rate": 7.847765076322815e-05, "loss": 0.6176, "step": 16472 }, { "epoch": 1.1161325293041533, "grad_norm": 5.844583511352539, "learning_rate": 7.847628174413033e-05, "loss": 0.8946, "step": 16473 }, { "epoch": 1.1162002845721255, "grad_norm": 6.436974048614502, "learning_rate": 7.847491272503252e-05, "loss": 0.7809, "step": 16474 }, { "epoch": 1.1162680398400975, "grad_norm": 5.668829441070557, "learning_rate": 7.84735437059347e-05, "loss": 0.7343, "step": 16475 }, { "epoch": 1.1163357951080697, "grad_norm": 4.371751308441162, "learning_rate": 7.847217468683689e-05, "loss": 0.5913, "step": 16476 }, { "epoch": 1.1164035503760417, "grad_norm": 5.785346031188965, "learning_rate": 7.847080566773907e-05, "loss": 0.5453, "step": 16477 }, { "epoch": 1.1164713056440139, "grad_norm": 6.657867431640625, "learning_rate": 7.846943664864125e-05, "loss": 0.9195, "step": 16478 }, { "epoch": 1.1165390609119858, "grad_norm": 7.955598831176758, "learning_rate": 7.846806762954344e-05, "loss": 0.7671, "step": 16479 }, { "epoch": 1.116606816179958, "grad_norm": 6.41574764251709, "learning_rate": 7.846669861044562e-05, "loss": 0.6941, "step": 16480 }, { "epoch": 1.11667457144793, "grad_norm": 5.509103775024414, "learning_rate": 7.84653295913478e-05, "loss": 0.6792, "step": 16481 }, { "epoch": 1.1167423267159022, "grad_norm": 7.486603260040283, "learning_rate": 7.846396057224998e-05, "loss": 0.6783, "step": 16482 }, { "epoch": 1.1168100819838742, "grad_norm": 5.092047214508057, "learning_rate": 7.846259155315216e-05, "loss": 0.5798, "step": 16483 }, { "epoch": 1.1168778372518464, "grad_norm": 6.867835998535156, "learning_rate": 7.846122253405435e-05, "loss": 0.6381, "step": 16484 }, { "epoch": 1.1169455925198184, "grad_norm": 4.400048732757568, "learning_rate": 7.845985351495654e-05, "loss": 0.5484, "step": 16485 }, { "epoch": 1.1170133477877906, "grad_norm": 5.648205757141113, "learning_rate": 7.845848449585872e-05, "loss": 0.7163, "step": 16486 }, { "epoch": 1.1170811030557626, "grad_norm": 5.113688945770264, "learning_rate": 7.84571154767609e-05, "loss": 0.6432, "step": 16487 }, { "epoch": 1.1171488583237346, "grad_norm": 9.245594024658203, "learning_rate": 7.845574645766309e-05, "loss": 1.042, "step": 16488 }, { "epoch": 1.1172166135917068, "grad_norm": 5.310211181640625, "learning_rate": 7.845437743856527e-05, "loss": 0.7199, "step": 16489 }, { "epoch": 1.1172843688596787, "grad_norm": 5.872876167297363, "learning_rate": 7.845300841946745e-05, "loss": 0.6653, "step": 16490 }, { "epoch": 1.117352124127651, "grad_norm": 6.042059421539307, "learning_rate": 7.845163940036963e-05, "loss": 0.7398, "step": 16491 }, { "epoch": 1.117419879395623, "grad_norm": 6.278406143188477, "learning_rate": 7.845027038127181e-05, "loss": 0.6938, "step": 16492 }, { "epoch": 1.1174876346635951, "grad_norm": 4.715973854064941, "learning_rate": 7.8448901362174e-05, "loss": 0.6145, "step": 16493 }, { "epoch": 1.1175553899315671, "grad_norm": 5.362488269805908, "learning_rate": 7.844753234307619e-05, "loss": 0.7129, "step": 16494 }, { "epoch": 1.1176231451995393, "grad_norm": 5.5275187492370605, "learning_rate": 7.844616332397837e-05, "loss": 0.519, "step": 16495 }, { "epoch": 1.1176909004675113, "grad_norm": 4.727498531341553, "learning_rate": 7.844479430488056e-05, "loss": 0.6799, "step": 16496 }, { "epoch": 1.1177586557354835, "grad_norm": 5.0060200691223145, "learning_rate": 7.844342528578274e-05, "loss": 0.6559, "step": 16497 }, { "epoch": 1.1178264110034555, "grad_norm": 8.323198318481445, "learning_rate": 7.844205626668492e-05, "loss": 0.574, "step": 16498 }, { "epoch": 1.1178941662714277, "grad_norm": 5.619205951690674, "learning_rate": 7.844068724758711e-05, "loss": 0.7148, "step": 16499 }, { "epoch": 1.1179619215393997, "grad_norm": 6.951564788818359, "learning_rate": 7.84393182284893e-05, "loss": 0.6269, "step": 16500 }, { "epoch": 1.1180296768073719, "grad_norm": 5.056802272796631, "learning_rate": 7.843794920939147e-05, "loss": 0.8339, "step": 16501 }, { "epoch": 1.1180974320753438, "grad_norm": 6.4941792488098145, "learning_rate": 7.843658019029367e-05, "loss": 0.7636, "step": 16502 }, { "epoch": 1.1181651873433158, "grad_norm": 6.141055583953857, "learning_rate": 7.843521117119585e-05, "loss": 0.7537, "step": 16503 }, { "epoch": 1.118232942611288, "grad_norm": 7.453089237213135, "learning_rate": 7.843384215209803e-05, "loss": 0.8046, "step": 16504 }, { "epoch": 1.1183006978792602, "grad_norm": 7.412580490112305, "learning_rate": 7.843247313300021e-05, "loss": 0.6542, "step": 16505 }, { "epoch": 1.1183684531472322, "grad_norm": 6.80420446395874, "learning_rate": 7.843110411390239e-05, "loss": 0.6983, "step": 16506 }, { "epoch": 1.1184362084152042, "grad_norm": 5.158776760101318, "learning_rate": 7.842973509480458e-05, "loss": 0.5782, "step": 16507 }, { "epoch": 1.1185039636831764, "grad_norm": 6.552058219909668, "learning_rate": 7.842836607570676e-05, "loss": 0.9792, "step": 16508 }, { "epoch": 1.1185717189511484, "grad_norm": 4.8856024742126465, "learning_rate": 7.842699705660894e-05, "loss": 0.6559, "step": 16509 }, { "epoch": 1.1186394742191206, "grad_norm": 5.895238399505615, "learning_rate": 7.842562803751113e-05, "loss": 0.6988, "step": 16510 }, { "epoch": 1.1187072294870926, "grad_norm": 10.207904815673828, "learning_rate": 7.842425901841332e-05, "loss": 0.9168, "step": 16511 }, { "epoch": 1.1187749847550648, "grad_norm": 4.816634654998779, "learning_rate": 7.84228899993155e-05, "loss": 0.7268, "step": 16512 }, { "epoch": 1.1188427400230367, "grad_norm": 7.246109485626221, "learning_rate": 7.842152098021768e-05, "loss": 0.8063, "step": 16513 }, { "epoch": 1.118910495291009, "grad_norm": 7.619381427764893, "learning_rate": 7.842015196111986e-05, "loss": 0.8026, "step": 16514 }, { "epoch": 1.118978250558981, "grad_norm": 8.938972473144531, "learning_rate": 7.841878294202204e-05, "loss": 1.2743, "step": 16515 }, { "epoch": 1.1190460058269531, "grad_norm": 7.479329586029053, "learning_rate": 7.841741392292423e-05, "loss": 0.6163, "step": 16516 }, { "epoch": 1.119113761094925, "grad_norm": 5.318511009216309, "learning_rate": 7.841604490382641e-05, "loss": 0.7075, "step": 16517 }, { "epoch": 1.1191815163628973, "grad_norm": 6.804056644439697, "learning_rate": 7.84146758847286e-05, "loss": 0.81, "step": 16518 }, { "epoch": 1.1192492716308693, "grad_norm": 5.385700702667236, "learning_rate": 7.841330686563078e-05, "loss": 0.7378, "step": 16519 }, { "epoch": 1.1193170268988415, "grad_norm": 7.53215217590332, "learning_rate": 7.841193784653297e-05, "loss": 0.6082, "step": 16520 }, { "epoch": 1.1193847821668135, "grad_norm": 8.614946365356445, "learning_rate": 7.841056882743515e-05, "loss": 0.6866, "step": 16521 }, { "epoch": 1.1194525374347855, "grad_norm": 6.260163307189941, "learning_rate": 7.840919980833733e-05, "loss": 0.6287, "step": 16522 }, { "epoch": 1.1195202927027577, "grad_norm": 9.336182594299316, "learning_rate": 7.840783078923951e-05, "loss": 0.7944, "step": 16523 }, { "epoch": 1.1195880479707296, "grad_norm": 10.182073593139648, "learning_rate": 7.840646177014169e-05, "loss": 0.6368, "step": 16524 }, { "epoch": 1.1196558032387018, "grad_norm": 5.3863420486450195, "learning_rate": 7.840509275104388e-05, "loss": 0.7848, "step": 16525 }, { "epoch": 1.1197235585066738, "grad_norm": 4.769381999969482, "learning_rate": 7.840372373194606e-05, "loss": 0.5335, "step": 16526 }, { "epoch": 1.119791313774646, "grad_norm": 8.063030242919922, "learning_rate": 7.840235471284825e-05, "loss": 0.7534, "step": 16527 }, { "epoch": 1.119859069042618, "grad_norm": 6.1403045654296875, "learning_rate": 7.840098569375043e-05, "loss": 0.8089, "step": 16528 }, { "epoch": 1.1199268243105902, "grad_norm": 7.606939792633057, "learning_rate": 7.839961667465262e-05, "loss": 0.7578, "step": 16529 }, { "epoch": 1.1199945795785622, "grad_norm": 10.877296447753906, "learning_rate": 7.83982476555548e-05, "loss": 0.54, "step": 16530 }, { "epoch": 1.1200623348465344, "grad_norm": 5.781667709350586, "learning_rate": 7.839687863645698e-05, "loss": 0.8624, "step": 16531 }, { "epoch": 1.1201300901145064, "grad_norm": 4.3829827308654785, "learning_rate": 7.839550961735916e-05, "loss": 0.4979, "step": 16532 }, { "epoch": 1.1201978453824786, "grad_norm": 4.735124111175537, "learning_rate": 7.839414059826134e-05, "loss": 0.6497, "step": 16533 }, { "epoch": 1.1202656006504506, "grad_norm": 5.546881198883057, "learning_rate": 7.839277157916353e-05, "loss": 0.6842, "step": 16534 }, { "epoch": 1.1203333559184228, "grad_norm": 4.9232177734375, "learning_rate": 7.839140256006571e-05, "loss": 0.5805, "step": 16535 }, { "epoch": 1.1204011111863947, "grad_norm": 4.378359794616699, "learning_rate": 7.83900335409679e-05, "loss": 0.6249, "step": 16536 }, { "epoch": 1.1204688664543667, "grad_norm": 5.407273769378662, "learning_rate": 7.838866452187008e-05, "loss": 0.8445, "step": 16537 }, { "epoch": 1.120536621722339, "grad_norm": 4.610024452209473, "learning_rate": 7.838729550277226e-05, "loss": 0.518, "step": 16538 }, { "epoch": 1.120604376990311, "grad_norm": 11.108832359313965, "learning_rate": 7.838592648367445e-05, "loss": 1.08, "step": 16539 }, { "epoch": 1.120672132258283, "grad_norm": 4.610969066619873, "learning_rate": 7.838455746457663e-05, "loss": 0.5164, "step": 16540 }, { "epoch": 1.120739887526255, "grad_norm": 8.567065238952637, "learning_rate": 7.838318844547881e-05, "loss": 1.0346, "step": 16541 }, { "epoch": 1.1208076427942273, "grad_norm": 6.773820877075195, "learning_rate": 7.8381819426381e-05, "loss": 0.7441, "step": 16542 }, { "epoch": 1.1208753980621993, "grad_norm": 9.476489067077637, "learning_rate": 7.838045040728318e-05, "loss": 0.5177, "step": 16543 }, { "epoch": 1.1209431533301715, "grad_norm": 6.598116397857666, "learning_rate": 7.837908138818537e-05, "loss": 0.6661, "step": 16544 }, { "epoch": 1.1210109085981435, "grad_norm": 6.180941581726074, "learning_rate": 7.837771236908756e-05, "loss": 0.8152, "step": 16545 }, { "epoch": 1.1210786638661157, "grad_norm": 6.241885662078857, "learning_rate": 7.837634334998974e-05, "loss": 0.8264, "step": 16546 }, { "epoch": 1.1211464191340876, "grad_norm": 5.314054489135742, "learning_rate": 7.837497433089192e-05, "loss": 0.6123, "step": 16547 }, { "epoch": 1.1212141744020598, "grad_norm": 4.802424430847168, "learning_rate": 7.837360531179411e-05, "loss": 0.6321, "step": 16548 }, { "epoch": 1.1212819296700318, "grad_norm": 5.131801605224609, "learning_rate": 7.83722362926963e-05, "loss": 0.5852, "step": 16549 }, { "epoch": 1.121349684938004, "grad_norm": 4.5471391677856445, "learning_rate": 7.837086727359847e-05, "loss": 0.419, "step": 16550 }, { "epoch": 1.121417440205976, "grad_norm": 6.3551225662231445, "learning_rate": 7.836949825450065e-05, "loss": 0.6822, "step": 16551 }, { "epoch": 1.121485195473948, "grad_norm": 6.698050022125244, "learning_rate": 7.836812923540285e-05, "loss": 0.7381, "step": 16552 }, { "epoch": 1.1215529507419202, "grad_norm": 5.779103755950928, "learning_rate": 7.836676021630503e-05, "loss": 0.7189, "step": 16553 }, { "epoch": 1.1216207060098922, "grad_norm": 5.6808552742004395, "learning_rate": 7.836539119720721e-05, "loss": 0.6718, "step": 16554 }, { "epoch": 1.1216884612778644, "grad_norm": 5.681619644165039, "learning_rate": 7.836402217810939e-05, "loss": 0.4688, "step": 16555 }, { "epoch": 1.1217562165458363, "grad_norm": 5.486327171325684, "learning_rate": 7.836265315901157e-05, "loss": 0.7598, "step": 16556 }, { "epoch": 1.1218239718138086, "grad_norm": 5.492034435272217, "learning_rate": 7.836128413991376e-05, "loss": 0.8031, "step": 16557 }, { "epoch": 1.1218917270817805, "grad_norm": 5.560188293457031, "learning_rate": 7.835991512081594e-05, "loss": 0.7689, "step": 16558 }, { "epoch": 1.1219594823497527, "grad_norm": 6.246037006378174, "learning_rate": 7.835854610171812e-05, "loss": 0.7454, "step": 16559 }, { "epoch": 1.1220272376177247, "grad_norm": 4.9940409660339355, "learning_rate": 7.83571770826203e-05, "loss": 0.5191, "step": 16560 }, { "epoch": 1.122094992885697, "grad_norm": 6.49337100982666, "learning_rate": 7.835580806352249e-05, "loss": 0.732, "step": 16561 }, { "epoch": 1.122162748153669, "grad_norm": 12.41988468170166, "learning_rate": 7.835443904442468e-05, "loss": 0.7257, "step": 16562 }, { "epoch": 1.122230503421641, "grad_norm": 6.870421886444092, "learning_rate": 7.835307002532686e-05, "loss": 0.6377, "step": 16563 }, { "epoch": 1.122298258689613, "grad_norm": 6.671525478363037, "learning_rate": 7.835170100622904e-05, "loss": 0.6479, "step": 16564 }, { "epoch": 1.1223660139575853, "grad_norm": 5.417186260223389, "learning_rate": 7.835033198713122e-05, "loss": 0.9996, "step": 16565 }, { "epoch": 1.1224337692255573, "grad_norm": 8.18811321258545, "learning_rate": 7.834896296803341e-05, "loss": 0.8235, "step": 16566 }, { "epoch": 1.1225015244935295, "grad_norm": 6.063117027282715, "learning_rate": 7.83475939489356e-05, "loss": 0.777, "step": 16567 }, { "epoch": 1.1225692797615014, "grad_norm": 4.244666576385498, "learning_rate": 7.834622492983777e-05, "loss": 0.4565, "step": 16568 }, { "epoch": 1.1226370350294737, "grad_norm": 5.733912467956543, "learning_rate": 7.834485591073995e-05, "loss": 0.6999, "step": 16569 }, { "epoch": 1.1227047902974456, "grad_norm": 5.080024719238281, "learning_rate": 7.834348689164214e-05, "loss": 0.5927, "step": 16570 }, { "epoch": 1.1227725455654176, "grad_norm": 5.610750198364258, "learning_rate": 7.834211787254433e-05, "loss": 0.6637, "step": 16571 }, { "epoch": 1.1228403008333898, "grad_norm": 5.9720635414123535, "learning_rate": 7.834074885344651e-05, "loss": 0.8085, "step": 16572 }, { "epoch": 1.1229080561013618, "grad_norm": 5.229776859283447, "learning_rate": 7.833937983434869e-05, "loss": 0.5649, "step": 16573 }, { "epoch": 1.122975811369334, "grad_norm": 5.457815647125244, "learning_rate": 7.833801081525087e-05, "loss": 0.6228, "step": 16574 }, { "epoch": 1.123043566637306, "grad_norm": 5.012820243835449, "learning_rate": 7.833664179615306e-05, "loss": 0.6635, "step": 16575 }, { "epoch": 1.1231113219052782, "grad_norm": 5.759067535400391, "learning_rate": 7.833527277705524e-05, "loss": 0.634, "step": 16576 }, { "epoch": 1.1231790771732502, "grad_norm": 9.28831958770752, "learning_rate": 7.833390375795742e-05, "loss": 0.7987, "step": 16577 }, { "epoch": 1.1232468324412224, "grad_norm": 6.013178825378418, "learning_rate": 7.83325347388596e-05, "loss": 0.6997, "step": 16578 }, { "epoch": 1.1233145877091943, "grad_norm": 5.971577167510986, "learning_rate": 7.833116571976179e-05, "loss": 0.6976, "step": 16579 }, { "epoch": 1.1233823429771665, "grad_norm": 7.694915771484375, "learning_rate": 7.832979670066398e-05, "loss": 0.9614, "step": 16580 }, { "epoch": 1.1234500982451385, "grad_norm": 5.692512512207031, "learning_rate": 7.832842768156616e-05, "loss": 0.7804, "step": 16581 }, { "epoch": 1.1235178535131107, "grad_norm": 4.4728312492370605, "learning_rate": 7.832705866246834e-05, "loss": 0.5845, "step": 16582 }, { "epoch": 1.1235856087810827, "grad_norm": 5.194689750671387, "learning_rate": 7.832568964337052e-05, "loss": 0.5488, "step": 16583 }, { "epoch": 1.123653364049055, "grad_norm": 5.546979904174805, "learning_rate": 7.832432062427271e-05, "loss": 0.6886, "step": 16584 }, { "epoch": 1.123721119317027, "grad_norm": 5.987720489501953, "learning_rate": 7.83229516051749e-05, "loss": 0.8281, "step": 16585 }, { "epoch": 1.1237888745849989, "grad_norm": 6.79714822769165, "learning_rate": 7.832158258607707e-05, "loss": 0.7205, "step": 16586 }, { "epoch": 1.123856629852971, "grad_norm": 5.197304725646973, "learning_rate": 7.832021356697926e-05, "loss": 0.5863, "step": 16587 }, { "epoch": 1.123924385120943, "grad_norm": 5.661016941070557, "learning_rate": 7.831884454788145e-05, "loss": 0.7591, "step": 16588 }, { "epoch": 1.1239921403889153, "grad_norm": 7.9939494132995605, "learning_rate": 7.831747552878363e-05, "loss": 0.5234, "step": 16589 }, { "epoch": 1.1240598956568872, "grad_norm": 4.955355644226074, "learning_rate": 7.831610650968581e-05, "loss": 0.5613, "step": 16590 }, { "epoch": 1.1241276509248594, "grad_norm": 6.7157883644104, "learning_rate": 7.8314737490588e-05, "loss": 0.9179, "step": 16591 }, { "epoch": 1.1241954061928314, "grad_norm": 5.742237091064453, "learning_rate": 7.831336847149018e-05, "loss": 0.5604, "step": 16592 }, { "epoch": 1.1242631614608036, "grad_norm": 5.890753746032715, "learning_rate": 7.831199945239236e-05, "loss": 0.6248, "step": 16593 }, { "epoch": 1.1243309167287756, "grad_norm": 6.5256242752075195, "learning_rate": 7.831063043329456e-05, "loss": 0.9446, "step": 16594 }, { "epoch": 1.1243986719967478, "grad_norm": 7.787291526794434, "learning_rate": 7.830926141419674e-05, "loss": 0.7736, "step": 16595 }, { "epoch": 1.1244664272647198, "grad_norm": 7.017754077911377, "learning_rate": 7.830789239509892e-05, "loss": 0.4302, "step": 16596 }, { "epoch": 1.124534182532692, "grad_norm": 5.535482406616211, "learning_rate": 7.83065233760011e-05, "loss": 0.8716, "step": 16597 }, { "epoch": 1.124601937800664, "grad_norm": 4.967796802520752, "learning_rate": 7.83051543569033e-05, "loss": 0.639, "step": 16598 }, { "epoch": 1.1246696930686362, "grad_norm": 11.574756622314453, "learning_rate": 7.830378533780547e-05, "loss": 0.6214, "step": 16599 }, { "epoch": 1.1247374483366082, "grad_norm": 5.107860088348389, "learning_rate": 7.830241631870765e-05, "loss": 0.5664, "step": 16600 }, { "epoch": 1.1248052036045801, "grad_norm": 6.146525859832764, "learning_rate": 7.830104729960983e-05, "loss": 0.7694, "step": 16601 }, { "epoch": 1.1248729588725523, "grad_norm": 5.966607570648193, "learning_rate": 7.829967828051201e-05, "loss": 0.6571, "step": 16602 }, { "epoch": 1.1249407141405243, "grad_norm": 6.187711238861084, "learning_rate": 7.829830926141421e-05, "loss": 1.0278, "step": 16603 }, { "epoch": 1.1250084694084965, "grad_norm": 7.682037353515625, "learning_rate": 7.829694024231639e-05, "loss": 0.6538, "step": 16604 }, { "epoch": 1.1250762246764685, "grad_norm": 7.3382415771484375, "learning_rate": 7.829557122321857e-05, "loss": 0.6901, "step": 16605 }, { "epoch": 1.1251439799444407, "grad_norm": 6.714434623718262, "learning_rate": 7.829420220412075e-05, "loss": 0.5718, "step": 16606 }, { "epoch": 1.1252117352124127, "grad_norm": 6.157127857208252, "learning_rate": 7.829283318502294e-05, "loss": 0.7647, "step": 16607 }, { "epoch": 1.125279490480385, "grad_norm": 5.902122497558594, "learning_rate": 7.829146416592512e-05, "loss": 0.7683, "step": 16608 }, { "epoch": 1.1253472457483569, "grad_norm": 5.676656723022461, "learning_rate": 7.82900951468273e-05, "loss": 0.6912, "step": 16609 }, { "epoch": 1.125415001016329, "grad_norm": 5.85725736618042, "learning_rate": 7.828872612772948e-05, "loss": 0.5421, "step": 16610 }, { "epoch": 1.125482756284301, "grad_norm": 6.861623287200928, "learning_rate": 7.828735710863166e-05, "loss": 0.5735, "step": 16611 }, { "epoch": 1.1255505115522733, "grad_norm": 4.622976779937744, "learning_rate": 7.828598808953386e-05, "loss": 0.6974, "step": 16612 }, { "epoch": 1.1256182668202452, "grad_norm": 5.978318691253662, "learning_rate": 7.828461907043604e-05, "loss": 0.7616, "step": 16613 }, { "epoch": 1.1256860220882174, "grad_norm": 8.068818092346191, "learning_rate": 7.828325005133822e-05, "loss": 0.8741, "step": 16614 }, { "epoch": 1.1257537773561894, "grad_norm": 5.705319404602051, "learning_rate": 7.82818810322404e-05, "loss": 0.604, "step": 16615 }, { "epoch": 1.1258215326241616, "grad_norm": 6.627946853637695, "learning_rate": 7.828051201314258e-05, "loss": 0.8083, "step": 16616 }, { "epoch": 1.1258892878921336, "grad_norm": 5.951031684875488, "learning_rate": 7.827914299404477e-05, "loss": 0.7051, "step": 16617 }, { "epoch": 1.1259570431601058, "grad_norm": 5.129096031188965, "learning_rate": 7.827777397494695e-05, "loss": 0.7117, "step": 16618 }, { "epoch": 1.1260247984280778, "grad_norm": 7.520353317260742, "learning_rate": 7.827640495584913e-05, "loss": 0.6925, "step": 16619 }, { "epoch": 1.1260925536960498, "grad_norm": 6.071813106536865, "learning_rate": 7.827503593675131e-05, "loss": 0.6316, "step": 16620 }, { "epoch": 1.126160308964022, "grad_norm": 5.8903679847717285, "learning_rate": 7.827366691765351e-05, "loss": 0.6882, "step": 16621 }, { "epoch": 1.126228064231994, "grad_norm": 5.252291202545166, "learning_rate": 7.827229789855569e-05, "loss": 0.7473, "step": 16622 }, { "epoch": 1.1262958194999662, "grad_norm": 6.164458274841309, "learning_rate": 7.827092887945787e-05, "loss": 0.7696, "step": 16623 }, { "epoch": 1.1263635747679381, "grad_norm": 5.674738883972168, "learning_rate": 7.826955986036005e-05, "loss": 0.6838, "step": 16624 }, { "epoch": 1.1264313300359103, "grad_norm": 6.459506511688232, "learning_rate": 7.826819084126223e-05, "loss": 0.5691, "step": 16625 }, { "epoch": 1.1264990853038823, "grad_norm": 8.24375057220459, "learning_rate": 7.826682182216442e-05, "loss": 0.5813, "step": 16626 }, { "epoch": 1.1265668405718545, "grad_norm": 6.915633201599121, "learning_rate": 7.82654528030666e-05, "loss": 0.6045, "step": 16627 }, { "epoch": 1.1266345958398265, "grad_norm": 7.0347394943237305, "learning_rate": 7.826408378396878e-05, "loss": 1.0053, "step": 16628 }, { "epoch": 1.1267023511077987, "grad_norm": 9.013284683227539, "learning_rate": 7.826271476487097e-05, "loss": 0.5846, "step": 16629 }, { "epoch": 1.1267701063757707, "grad_norm": 5.217116832733154, "learning_rate": 7.826134574577316e-05, "loss": 0.5151, "step": 16630 }, { "epoch": 1.1268378616437429, "grad_norm": 4.9722466468811035, "learning_rate": 7.825997672667534e-05, "loss": 0.6434, "step": 16631 }, { "epoch": 1.1269056169117149, "grad_norm": 4.9107666015625, "learning_rate": 7.825860770757752e-05, "loss": 0.5564, "step": 16632 }, { "epoch": 1.126973372179687, "grad_norm": 5.44745397567749, "learning_rate": 7.82572386884797e-05, "loss": 0.5663, "step": 16633 }, { "epoch": 1.127041127447659, "grad_norm": 5.827592849731445, "learning_rate": 7.825586966938188e-05, "loss": 0.5999, "step": 16634 }, { "epoch": 1.127108882715631, "grad_norm": 6.206888198852539, "learning_rate": 7.825450065028407e-05, "loss": 0.6755, "step": 16635 }, { "epoch": 1.1271766379836032, "grad_norm": 6.200521469116211, "learning_rate": 7.825313163118625e-05, "loss": 0.5529, "step": 16636 }, { "epoch": 1.1272443932515752, "grad_norm": 5.36111307144165, "learning_rate": 7.825176261208845e-05, "loss": 0.762, "step": 16637 }, { "epoch": 1.1273121485195474, "grad_norm": 4.9850664138793945, "learning_rate": 7.825039359299063e-05, "loss": 0.818, "step": 16638 }, { "epoch": 1.1273799037875194, "grad_norm": 5.960310459136963, "learning_rate": 7.824902457389281e-05, "loss": 0.8029, "step": 16639 }, { "epoch": 1.1274476590554916, "grad_norm": 4.687396049499512, "learning_rate": 7.8247655554795e-05, "loss": 0.5797, "step": 16640 }, { "epoch": 1.1275154143234636, "grad_norm": 4.676320552825928, "learning_rate": 7.824628653569718e-05, "loss": 0.6945, "step": 16641 }, { "epoch": 1.1275831695914358, "grad_norm": 6.536602020263672, "learning_rate": 7.824491751659936e-05, "loss": 0.7977, "step": 16642 }, { "epoch": 1.1276509248594078, "grad_norm": 6.302074432373047, "learning_rate": 7.824354849750154e-05, "loss": 0.9523, "step": 16643 }, { "epoch": 1.12771868012738, "grad_norm": 7.156180381774902, "learning_rate": 7.824217947840374e-05, "loss": 0.6981, "step": 16644 }, { "epoch": 1.127786435395352, "grad_norm": 7.256894588470459, "learning_rate": 7.824081045930592e-05, "loss": 0.8173, "step": 16645 }, { "epoch": 1.1278541906633242, "grad_norm": 9.1759033203125, "learning_rate": 7.82394414402081e-05, "loss": 0.6992, "step": 16646 }, { "epoch": 1.1279219459312961, "grad_norm": 6.0825886726379395, "learning_rate": 7.823807242111028e-05, "loss": 0.837, "step": 16647 }, { "epoch": 1.1279897011992683, "grad_norm": 7.240145683288574, "learning_rate": 7.823670340201246e-05, "loss": 0.6456, "step": 16648 }, { "epoch": 1.1280574564672403, "grad_norm": 4.927544116973877, "learning_rate": 7.823533438291465e-05, "loss": 0.7218, "step": 16649 }, { "epoch": 1.1281252117352123, "grad_norm": 9.792190551757812, "learning_rate": 7.823396536381683e-05, "loss": 0.7131, "step": 16650 }, { "epoch": 1.1281929670031845, "grad_norm": 6.260951042175293, "learning_rate": 7.823259634471901e-05, "loss": 0.6938, "step": 16651 }, { "epoch": 1.1282607222711567, "grad_norm": 5.710566997528076, "learning_rate": 7.82312273256212e-05, "loss": 0.6371, "step": 16652 }, { "epoch": 1.1283284775391287, "grad_norm": 5.180080413818359, "learning_rate": 7.822985830652339e-05, "loss": 0.7467, "step": 16653 }, { "epoch": 1.1283962328071007, "grad_norm": 5.3070149421691895, "learning_rate": 7.822848928742557e-05, "loss": 0.5804, "step": 16654 }, { "epoch": 1.1284639880750729, "grad_norm": 5.685241222381592, "learning_rate": 7.822712026832775e-05, "loss": 0.74, "step": 16655 }, { "epoch": 1.1285317433430448, "grad_norm": 5.124477386474609, "learning_rate": 7.822575124922993e-05, "loss": 0.56, "step": 16656 }, { "epoch": 1.128599498611017, "grad_norm": 4.9993577003479, "learning_rate": 7.822438223013211e-05, "loss": 0.628, "step": 16657 }, { "epoch": 1.128667253878989, "grad_norm": 5.491091251373291, "learning_rate": 7.82230132110343e-05, "loss": 0.7368, "step": 16658 }, { "epoch": 1.1287350091469612, "grad_norm": 5.759190559387207, "learning_rate": 7.822164419193648e-05, "loss": 0.6428, "step": 16659 }, { "epoch": 1.1288027644149332, "grad_norm": 6.618605613708496, "learning_rate": 7.822027517283866e-05, "loss": 0.695, "step": 16660 }, { "epoch": 1.1288705196829054, "grad_norm": 5.8363118171691895, "learning_rate": 7.821890615374084e-05, "loss": 0.6375, "step": 16661 }, { "epoch": 1.1289382749508774, "grad_norm": 6.037831783294678, "learning_rate": 7.821753713464304e-05, "loss": 0.6392, "step": 16662 }, { "epoch": 1.1290060302188496, "grad_norm": 6.488530158996582, "learning_rate": 7.821616811554522e-05, "loss": 0.6329, "step": 16663 }, { "epoch": 1.1290737854868216, "grad_norm": 6.927813529968262, "learning_rate": 7.82147990964474e-05, "loss": 0.7017, "step": 16664 }, { "epoch": 1.1291415407547938, "grad_norm": 7.420223712921143, "learning_rate": 7.821343007734958e-05, "loss": 0.9947, "step": 16665 }, { "epoch": 1.1292092960227658, "grad_norm": 6.57767915725708, "learning_rate": 7.821206105825176e-05, "loss": 0.7923, "step": 16666 }, { "epoch": 1.129277051290738, "grad_norm": 5.805924892425537, "learning_rate": 7.821069203915395e-05, "loss": 0.7343, "step": 16667 }, { "epoch": 1.12934480655871, "grad_norm": 5.582193374633789, "learning_rate": 7.820932302005613e-05, "loss": 0.647, "step": 16668 }, { "epoch": 1.129412561826682, "grad_norm": 5.6104607582092285, "learning_rate": 7.820795400095831e-05, "loss": 0.673, "step": 16669 }, { "epoch": 1.1294803170946541, "grad_norm": 6.4317522048950195, "learning_rate": 7.82065849818605e-05, "loss": 0.8305, "step": 16670 }, { "epoch": 1.129548072362626, "grad_norm": 6.044923305511475, "learning_rate": 7.820521596276267e-05, "loss": 0.6202, "step": 16671 }, { "epoch": 1.1296158276305983, "grad_norm": 5.044763088226318, "learning_rate": 7.820384694366487e-05, "loss": 0.644, "step": 16672 }, { "epoch": 1.1296835828985703, "grad_norm": 7.138949871063232, "learning_rate": 7.820247792456705e-05, "loss": 0.8183, "step": 16673 }, { "epoch": 1.1297513381665425, "grad_norm": 6.46617317199707, "learning_rate": 7.820110890546923e-05, "loss": 0.6277, "step": 16674 }, { "epoch": 1.1298190934345145, "grad_norm": 7.636822700500488, "learning_rate": 7.819973988637141e-05, "loss": 0.6938, "step": 16675 }, { "epoch": 1.1298868487024867, "grad_norm": 7.69808292388916, "learning_rate": 7.81983708672736e-05, "loss": 0.7331, "step": 16676 }, { "epoch": 1.1299546039704587, "grad_norm": 6.143553733825684, "learning_rate": 7.819700184817578e-05, "loss": 0.8705, "step": 16677 }, { "epoch": 1.1300223592384309, "grad_norm": 6.685749530792236, "learning_rate": 7.819563282907796e-05, "loss": 0.7717, "step": 16678 }, { "epoch": 1.1300901145064028, "grad_norm": 6.317489147186279, "learning_rate": 7.819426380998014e-05, "loss": 0.6593, "step": 16679 }, { "epoch": 1.130157869774375, "grad_norm": 5.497630596160889, "learning_rate": 7.819289479088233e-05, "loss": 0.7693, "step": 16680 }, { "epoch": 1.130225625042347, "grad_norm": 7.087355136871338, "learning_rate": 7.819152577178452e-05, "loss": 0.5709, "step": 16681 }, { "epoch": 1.1302933803103192, "grad_norm": 9.138280868530273, "learning_rate": 7.81901567526867e-05, "loss": 0.9426, "step": 16682 }, { "epoch": 1.1303611355782912, "grad_norm": 8.925271987915039, "learning_rate": 7.818878773358888e-05, "loss": 0.7462, "step": 16683 }, { "epoch": 1.1304288908462632, "grad_norm": 5.355180263519287, "learning_rate": 7.818741871449107e-05, "loss": 0.6806, "step": 16684 }, { "epoch": 1.1304966461142354, "grad_norm": 4.724582195281982, "learning_rate": 7.818604969539325e-05, "loss": 0.6157, "step": 16685 }, { "epoch": 1.1305644013822074, "grad_norm": 7.584688186645508, "learning_rate": 7.818468067629543e-05, "loss": 0.6655, "step": 16686 }, { "epoch": 1.1306321566501796, "grad_norm": 5.311779022216797, "learning_rate": 7.818331165719763e-05, "loss": 0.6513, "step": 16687 }, { "epoch": 1.1306999119181516, "grad_norm": 5.313599586486816, "learning_rate": 7.818194263809981e-05, "loss": 0.6251, "step": 16688 }, { "epoch": 1.1307676671861238, "grad_norm": 4.748965740203857, "learning_rate": 7.818057361900199e-05, "loss": 0.5648, "step": 16689 }, { "epoch": 1.1308354224540957, "grad_norm": 5.075737476348877, "learning_rate": 7.817920459990418e-05, "loss": 0.6007, "step": 16690 }, { "epoch": 1.130903177722068, "grad_norm": 6.251148700714111, "learning_rate": 7.817783558080636e-05, "loss": 0.635, "step": 16691 }, { "epoch": 1.13097093299004, "grad_norm": 8.291831970214844, "learning_rate": 7.817646656170854e-05, "loss": 0.7627, "step": 16692 }, { "epoch": 1.1310386882580121, "grad_norm": 6.881071090698242, "learning_rate": 7.817509754261072e-05, "loss": 0.7503, "step": 16693 }, { "epoch": 1.131106443525984, "grad_norm": 7.16835355758667, "learning_rate": 7.81737285235129e-05, "loss": 0.7823, "step": 16694 }, { "epoch": 1.1311741987939563, "grad_norm": 4.8891096115112305, "learning_rate": 7.81723595044151e-05, "loss": 0.6679, "step": 16695 }, { "epoch": 1.1312419540619283, "grad_norm": 5.029726505279541, "learning_rate": 7.817099048531728e-05, "loss": 0.6262, "step": 16696 }, { "epoch": 1.1313097093299005, "grad_norm": 6.097971439361572, "learning_rate": 7.816962146621946e-05, "loss": 0.6416, "step": 16697 }, { "epoch": 1.1313774645978725, "grad_norm": 6.032967567443848, "learning_rate": 7.816825244712164e-05, "loss": 0.7701, "step": 16698 }, { "epoch": 1.1314452198658445, "grad_norm": 7.488419055938721, "learning_rate": 7.816688342802383e-05, "loss": 0.8387, "step": 16699 }, { "epoch": 1.1315129751338167, "grad_norm": 6.614134788513184, "learning_rate": 7.816551440892601e-05, "loss": 0.7412, "step": 16700 }, { "epoch": 1.1315807304017889, "grad_norm": 4.813358306884766, "learning_rate": 7.81641453898282e-05, "loss": 0.7146, "step": 16701 }, { "epoch": 1.1316484856697608, "grad_norm": 5.225235939025879, "learning_rate": 7.816277637073037e-05, "loss": 0.704, "step": 16702 }, { "epoch": 1.1317162409377328, "grad_norm": 6.127220630645752, "learning_rate": 7.816140735163255e-05, "loss": 0.7934, "step": 16703 }, { "epoch": 1.131783996205705, "grad_norm": 5.334741592407227, "learning_rate": 7.816003833253475e-05, "loss": 0.5111, "step": 16704 }, { "epoch": 1.131851751473677, "grad_norm": 5.337317943572998, "learning_rate": 7.815866931343693e-05, "loss": 0.908, "step": 16705 }, { "epoch": 1.1319195067416492, "grad_norm": 7.072608470916748, "learning_rate": 7.815730029433911e-05, "loss": 0.884, "step": 16706 }, { "epoch": 1.1319872620096212, "grad_norm": 6.734743595123291, "learning_rate": 7.815593127524129e-05, "loss": 0.6501, "step": 16707 }, { "epoch": 1.1320550172775934, "grad_norm": 6.5684814453125, "learning_rate": 7.815456225614348e-05, "loss": 0.9087, "step": 16708 }, { "epoch": 1.1321227725455654, "grad_norm": 5.775663375854492, "learning_rate": 7.815319323704566e-05, "loss": 0.6729, "step": 16709 }, { "epoch": 1.1321905278135376, "grad_norm": 4.196002960205078, "learning_rate": 7.815182421794784e-05, "loss": 0.6439, "step": 16710 }, { "epoch": 1.1322582830815096, "grad_norm": 9.43163776397705, "learning_rate": 7.815045519885002e-05, "loss": 0.72, "step": 16711 }, { "epoch": 1.1323260383494818, "grad_norm": 5.663300514221191, "learning_rate": 7.81490861797522e-05, "loss": 0.6189, "step": 16712 }, { "epoch": 1.1323937936174537, "grad_norm": 5.103667736053467, "learning_rate": 7.81477171606544e-05, "loss": 0.9228, "step": 16713 }, { "epoch": 1.1324615488854257, "grad_norm": 11.563446044921875, "learning_rate": 7.814634814155658e-05, "loss": 0.6651, "step": 16714 }, { "epoch": 1.132529304153398, "grad_norm": 6.818861961364746, "learning_rate": 7.814497912245876e-05, "loss": 0.6716, "step": 16715 }, { "epoch": 1.1325970594213701, "grad_norm": 8.50759506225586, "learning_rate": 7.814361010336094e-05, "loss": 0.5372, "step": 16716 }, { "epoch": 1.132664814689342, "grad_norm": 5.753479957580566, "learning_rate": 7.814224108426313e-05, "loss": 0.7041, "step": 16717 }, { "epoch": 1.132732569957314, "grad_norm": 6.033994197845459, "learning_rate": 7.814087206516531e-05, "loss": 0.6911, "step": 16718 }, { "epoch": 1.1328003252252863, "grad_norm": 5.7770771980285645, "learning_rate": 7.81395030460675e-05, "loss": 0.6144, "step": 16719 }, { "epoch": 1.1328680804932583, "grad_norm": 6.466103553771973, "learning_rate": 7.813813402696967e-05, "loss": 0.822, "step": 16720 }, { "epoch": 1.1329358357612305, "grad_norm": 6.284421920776367, "learning_rate": 7.813676500787185e-05, "loss": 0.715, "step": 16721 }, { "epoch": 1.1330035910292025, "grad_norm": 5.6262407302856445, "learning_rate": 7.813539598877405e-05, "loss": 0.7364, "step": 16722 }, { "epoch": 1.1330713462971747, "grad_norm": 4.806675910949707, "learning_rate": 7.813402696967623e-05, "loss": 0.4433, "step": 16723 }, { "epoch": 1.1331391015651466, "grad_norm": 5.149235248565674, "learning_rate": 7.813265795057841e-05, "loss": 0.4811, "step": 16724 }, { "epoch": 1.1332068568331188, "grad_norm": 5.818775653839111, "learning_rate": 7.813128893148059e-05, "loss": 0.5279, "step": 16725 }, { "epoch": 1.1332746121010908, "grad_norm": 5.656182289123535, "learning_rate": 7.812991991238277e-05, "loss": 0.6816, "step": 16726 }, { "epoch": 1.133342367369063, "grad_norm": 4.287816524505615, "learning_rate": 7.812855089328496e-05, "loss": 0.6402, "step": 16727 }, { "epoch": 1.133410122637035, "grad_norm": 6.147930145263672, "learning_rate": 7.812718187418714e-05, "loss": 0.6251, "step": 16728 }, { "epoch": 1.1334778779050072, "grad_norm": 5.128780364990234, "learning_rate": 7.812581285508932e-05, "loss": 0.7653, "step": 16729 }, { "epoch": 1.1335456331729792, "grad_norm": 4.769107341766357, "learning_rate": 7.812444383599152e-05, "loss": 0.5929, "step": 16730 }, { "epoch": 1.1336133884409514, "grad_norm": 4.782686233520508, "learning_rate": 7.81230748168937e-05, "loss": 0.5916, "step": 16731 }, { "epoch": 1.1336811437089234, "grad_norm": 5.834915637969971, "learning_rate": 7.812170579779588e-05, "loss": 0.6614, "step": 16732 }, { "epoch": 1.1337488989768953, "grad_norm": 5.408773899078369, "learning_rate": 7.812033677869807e-05, "loss": 0.7269, "step": 16733 }, { "epoch": 1.1338166542448676, "grad_norm": 6.599340438842773, "learning_rate": 7.811896775960025e-05, "loss": 0.4876, "step": 16734 }, { "epoch": 1.1338844095128395, "grad_norm": 4.747675895690918, "learning_rate": 7.811759874050243e-05, "loss": 0.674, "step": 16735 }, { "epoch": 1.1339521647808117, "grad_norm": 5.954026222229004, "learning_rate": 7.811622972140463e-05, "loss": 0.6632, "step": 16736 }, { "epoch": 1.1340199200487837, "grad_norm": 4.728879928588867, "learning_rate": 7.811486070230681e-05, "loss": 0.6869, "step": 16737 }, { "epoch": 1.134087675316756, "grad_norm": 5.576781749725342, "learning_rate": 7.811349168320899e-05, "loss": 0.6638, "step": 16738 }, { "epoch": 1.134155430584728, "grad_norm": 5.9168782234191895, "learning_rate": 7.811212266411117e-05, "loss": 0.759, "step": 16739 }, { "epoch": 1.1342231858527, "grad_norm": 5.378051280975342, "learning_rate": 7.811075364501336e-05, "loss": 0.5783, "step": 16740 }, { "epoch": 1.134290941120672, "grad_norm": 7.849323749542236, "learning_rate": 7.810938462591554e-05, "loss": 0.7713, "step": 16741 }, { "epoch": 1.1343586963886443, "grad_norm": 5.6644415855407715, "learning_rate": 7.810801560681772e-05, "loss": 0.5597, "step": 16742 }, { "epoch": 1.1344264516566163, "grad_norm": 6.990406513214111, "learning_rate": 7.81066465877199e-05, "loss": 0.5516, "step": 16743 }, { "epoch": 1.1344942069245885, "grad_norm": 6.040196418762207, "learning_rate": 7.810527756862208e-05, "loss": 0.7269, "step": 16744 }, { "epoch": 1.1345619621925604, "grad_norm": 6.549674034118652, "learning_rate": 7.810390854952428e-05, "loss": 0.797, "step": 16745 }, { "epoch": 1.1346297174605326, "grad_norm": 5.927361011505127, "learning_rate": 7.810253953042646e-05, "loss": 0.6387, "step": 16746 }, { "epoch": 1.1346974727285046, "grad_norm": 5.803552150726318, "learning_rate": 7.810117051132864e-05, "loss": 0.7569, "step": 16747 }, { "epoch": 1.1347652279964766, "grad_norm": 6.477768898010254, "learning_rate": 7.809980149223082e-05, "loss": 0.8403, "step": 16748 }, { "epoch": 1.1348329832644488, "grad_norm": 6.080210208892822, "learning_rate": 7.8098432473133e-05, "loss": 0.8066, "step": 16749 }, { "epoch": 1.134900738532421, "grad_norm": 7.755611896514893, "learning_rate": 7.809706345403519e-05, "loss": 0.7821, "step": 16750 }, { "epoch": 1.134968493800393, "grad_norm": 5.807285308837891, "learning_rate": 7.809569443493737e-05, "loss": 0.7686, "step": 16751 }, { "epoch": 1.135036249068365, "grad_norm": 4.9453020095825195, "learning_rate": 7.809432541583955e-05, "loss": 0.5902, "step": 16752 }, { "epoch": 1.1351040043363372, "grad_norm": 5.776493072509766, "learning_rate": 7.809295639674173e-05, "loss": 0.568, "step": 16753 }, { "epoch": 1.1351717596043092, "grad_norm": 8.138739585876465, "learning_rate": 7.809158737764393e-05, "loss": 0.9117, "step": 16754 }, { "epoch": 1.1352395148722814, "grad_norm": 4.630499839782715, "learning_rate": 7.809021835854611e-05, "loss": 0.7508, "step": 16755 }, { "epoch": 1.1353072701402533, "grad_norm": 4.743904113769531, "learning_rate": 7.808884933944829e-05, "loss": 0.6519, "step": 16756 }, { "epoch": 1.1353750254082255, "grad_norm": 6.640937328338623, "learning_rate": 7.808748032035047e-05, "loss": 0.6931, "step": 16757 }, { "epoch": 1.1354427806761975, "grad_norm": 6.060274600982666, "learning_rate": 7.808611130125265e-05, "loss": 0.7295, "step": 16758 }, { "epoch": 1.1355105359441697, "grad_norm": 5.2093000411987305, "learning_rate": 7.808474228215484e-05, "loss": 0.629, "step": 16759 }, { "epoch": 1.1355782912121417, "grad_norm": 5.407078266143799, "learning_rate": 7.808337326305702e-05, "loss": 0.6372, "step": 16760 }, { "epoch": 1.135646046480114, "grad_norm": 6.483882904052734, "learning_rate": 7.80820042439592e-05, "loss": 0.6638, "step": 16761 }, { "epoch": 1.135713801748086, "grad_norm": 5.735899925231934, "learning_rate": 7.808063522486138e-05, "loss": 0.9567, "step": 16762 }, { "epoch": 1.1357815570160579, "grad_norm": 6.978238105773926, "learning_rate": 7.807926620576358e-05, "loss": 0.6355, "step": 16763 }, { "epoch": 1.13584931228403, "grad_norm": 6.08992338180542, "learning_rate": 7.807789718666576e-05, "loss": 0.8777, "step": 16764 }, { "epoch": 1.1359170675520023, "grad_norm": 6.668683052062988, "learning_rate": 7.807652816756794e-05, "loss": 0.5351, "step": 16765 }, { "epoch": 1.1359848228199743, "grad_norm": 5.498349666595459, "learning_rate": 7.807515914847012e-05, "loss": 0.7362, "step": 16766 }, { "epoch": 1.1360525780879462, "grad_norm": 5.9772868156433105, "learning_rate": 7.80737901293723e-05, "loss": 0.7146, "step": 16767 }, { "epoch": 1.1361203333559184, "grad_norm": 6.178906440734863, "learning_rate": 7.80724211102745e-05, "loss": 0.6685, "step": 16768 }, { "epoch": 1.1361880886238904, "grad_norm": 5.166858196258545, "learning_rate": 7.807105209117667e-05, "loss": 0.7369, "step": 16769 }, { "epoch": 1.1362558438918626, "grad_norm": 5.280497074127197, "learning_rate": 7.806968307207885e-05, "loss": 0.6309, "step": 16770 }, { "epoch": 1.1363235991598346, "grad_norm": 4.654086112976074, "learning_rate": 7.806831405298103e-05, "loss": 0.6645, "step": 16771 }, { "epoch": 1.1363913544278068, "grad_norm": 5.552098751068115, "learning_rate": 7.806694503388321e-05, "loss": 0.6494, "step": 16772 }, { "epoch": 1.1364591096957788, "grad_norm": 5.359954833984375, "learning_rate": 7.806557601478541e-05, "loss": 0.8581, "step": 16773 }, { "epoch": 1.136526864963751, "grad_norm": 5.112419128417969, "learning_rate": 7.806420699568759e-05, "loss": 0.6707, "step": 16774 }, { "epoch": 1.136594620231723, "grad_norm": 5.130734443664551, "learning_rate": 7.806283797658977e-05, "loss": 0.5441, "step": 16775 }, { "epoch": 1.1366623754996952, "grad_norm": 4.156247615814209, "learning_rate": 7.806146895749196e-05, "loss": 0.6177, "step": 16776 }, { "epoch": 1.1367301307676672, "grad_norm": 7.025331020355225, "learning_rate": 7.806009993839414e-05, "loss": 0.7531, "step": 16777 }, { "epoch": 1.1367978860356394, "grad_norm": 6.272400856018066, "learning_rate": 7.805873091929632e-05, "loss": 0.5415, "step": 16778 }, { "epoch": 1.1368656413036113, "grad_norm": 5.994542121887207, "learning_rate": 7.805736190019852e-05, "loss": 0.5739, "step": 16779 }, { "epoch": 1.1369333965715835, "grad_norm": 7.8840718269348145, "learning_rate": 7.80559928811007e-05, "loss": 0.7447, "step": 16780 }, { "epoch": 1.1370011518395555, "grad_norm": 5.8447651863098145, "learning_rate": 7.805462386200288e-05, "loss": 0.5919, "step": 16781 }, { "epoch": 1.1370689071075275, "grad_norm": 7.363594055175781, "learning_rate": 7.805325484290507e-05, "loss": 0.7014, "step": 16782 }, { "epoch": 1.1371366623754997, "grad_norm": 5.474704265594482, "learning_rate": 7.805188582380725e-05, "loss": 0.6021, "step": 16783 }, { "epoch": 1.1372044176434717, "grad_norm": 6.830411434173584, "learning_rate": 7.805051680470943e-05, "loss": 0.6739, "step": 16784 }, { "epoch": 1.137272172911444, "grad_norm": 5.505611896514893, "learning_rate": 7.804914778561161e-05, "loss": 0.5898, "step": 16785 }, { "epoch": 1.1373399281794159, "grad_norm": 9.52569580078125, "learning_rate": 7.804777876651381e-05, "loss": 0.6024, "step": 16786 }, { "epoch": 1.137407683447388, "grad_norm": 6.93997049331665, "learning_rate": 7.804640974741599e-05, "loss": 0.6355, "step": 16787 }, { "epoch": 1.13747543871536, "grad_norm": 5.036192893981934, "learning_rate": 7.804504072831817e-05, "loss": 0.7398, "step": 16788 }, { "epoch": 1.1375431939833323, "grad_norm": 5.485294818878174, "learning_rate": 7.804367170922035e-05, "loss": 0.7825, "step": 16789 }, { "epoch": 1.1376109492513042, "grad_norm": 5.600368976593018, "learning_rate": 7.804230269012253e-05, "loss": 0.7205, "step": 16790 }, { "epoch": 1.1376787045192764, "grad_norm": 5.522429943084717, "learning_rate": 7.804093367102472e-05, "loss": 0.7872, "step": 16791 }, { "epoch": 1.1377464597872484, "grad_norm": 5.987887382507324, "learning_rate": 7.80395646519269e-05, "loss": 0.6782, "step": 16792 }, { "epoch": 1.1378142150552206, "grad_norm": 5.354010105133057, "learning_rate": 7.803819563282908e-05, "loss": 0.6363, "step": 16793 }, { "epoch": 1.1378819703231926, "grad_norm": 6.39069938659668, "learning_rate": 7.803682661373126e-05, "loss": 0.7089, "step": 16794 }, { "epoch": 1.1379497255911648, "grad_norm": 5.727047443389893, "learning_rate": 7.803545759463346e-05, "loss": 0.7008, "step": 16795 }, { "epoch": 1.1380174808591368, "grad_norm": 7.178287506103516, "learning_rate": 7.803408857553564e-05, "loss": 0.7488, "step": 16796 }, { "epoch": 1.1380852361271088, "grad_norm": 6.201870441436768, "learning_rate": 7.803271955643782e-05, "loss": 0.6254, "step": 16797 }, { "epoch": 1.138152991395081, "grad_norm": 6.881004810333252, "learning_rate": 7.803135053734e-05, "loss": 0.6558, "step": 16798 }, { "epoch": 1.1382207466630532, "grad_norm": 6.2866291999816895, "learning_rate": 7.802998151824218e-05, "loss": 0.6537, "step": 16799 }, { "epoch": 1.1382885019310252, "grad_norm": 5.333223342895508, "learning_rate": 7.802861249914437e-05, "loss": 0.6693, "step": 16800 }, { "epoch": 1.1383562571989971, "grad_norm": 6.103747367858887, "learning_rate": 7.802724348004655e-05, "loss": 0.6822, "step": 16801 }, { "epoch": 1.1384240124669693, "grad_norm": 10.445125579833984, "learning_rate": 7.802587446094873e-05, "loss": 0.7734, "step": 16802 }, { "epoch": 1.1384917677349413, "grad_norm": 5.043206691741943, "learning_rate": 7.802450544185091e-05, "loss": 0.6898, "step": 16803 }, { "epoch": 1.1385595230029135, "grad_norm": 7.022032260894775, "learning_rate": 7.80231364227531e-05, "loss": 0.6542, "step": 16804 }, { "epoch": 1.1386272782708855, "grad_norm": 7.176365852355957, "learning_rate": 7.802176740365529e-05, "loss": 0.7367, "step": 16805 }, { "epoch": 1.1386950335388577, "grad_norm": 4.644754409790039, "learning_rate": 7.802039838455747e-05, "loss": 0.6602, "step": 16806 }, { "epoch": 1.1387627888068297, "grad_norm": 5.330687522888184, "learning_rate": 7.801902936545965e-05, "loss": 0.5775, "step": 16807 }, { "epoch": 1.1388305440748019, "grad_norm": 5.8319501876831055, "learning_rate": 7.801766034636183e-05, "loss": 0.599, "step": 16808 }, { "epoch": 1.1388982993427739, "grad_norm": 6.8199286460876465, "learning_rate": 7.801629132726402e-05, "loss": 0.8001, "step": 16809 }, { "epoch": 1.138966054610746, "grad_norm": 6.324723720550537, "learning_rate": 7.80149223081662e-05, "loss": 0.773, "step": 16810 }, { "epoch": 1.139033809878718, "grad_norm": 8.192002296447754, "learning_rate": 7.801355328906838e-05, "loss": 0.647, "step": 16811 }, { "epoch": 1.13910156514669, "grad_norm": 7.2895050048828125, "learning_rate": 7.801218426997056e-05, "loss": 0.8682, "step": 16812 }, { "epoch": 1.1391693204146622, "grad_norm": 6.973208904266357, "learning_rate": 7.801081525087274e-05, "loss": 0.667, "step": 16813 }, { "epoch": 1.1392370756826344, "grad_norm": 6.551448345184326, "learning_rate": 7.800944623177494e-05, "loss": 0.7502, "step": 16814 }, { "epoch": 1.1393048309506064, "grad_norm": 5.396692752838135, "learning_rate": 7.800807721267712e-05, "loss": 0.9984, "step": 16815 }, { "epoch": 1.1393725862185784, "grad_norm": 4.447607040405273, "learning_rate": 7.80067081935793e-05, "loss": 0.6526, "step": 16816 }, { "epoch": 1.1394403414865506, "grad_norm": 8.116009712219238, "learning_rate": 7.800533917448148e-05, "loss": 0.6774, "step": 16817 }, { "epoch": 1.1395080967545226, "grad_norm": 5.555187225341797, "learning_rate": 7.800397015538367e-05, "loss": 0.7358, "step": 16818 }, { "epoch": 1.1395758520224948, "grad_norm": 7.893671035766602, "learning_rate": 7.800260113628585e-05, "loss": 0.4427, "step": 16819 }, { "epoch": 1.1396436072904668, "grad_norm": 6.3167853355407715, "learning_rate": 7.800123211718803e-05, "loss": 0.7659, "step": 16820 }, { "epoch": 1.139711362558439, "grad_norm": 7.062374591827393, "learning_rate": 7.799986309809021e-05, "loss": 0.8105, "step": 16821 }, { "epoch": 1.139779117826411, "grad_norm": 6.286994457244873, "learning_rate": 7.799849407899241e-05, "loss": 0.6914, "step": 16822 }, { "epoch": 1.1398468730943832, "grad_norm": 5.267313480377197, "learning_rate": 7.799712505989459e-05, "loss": 0.6803, "step": 16823 }, { "epoch": 1.1399146283623551, "grad_norm": 5.873160362243652, "learning_rate": 7.799575604079677e-05, "loss": 0.8037, "step": 16824 }, { "epoch": 1.1399823836303273, "grad_norm": 6.283346176147461, "learning_rate": 7.799438702169896e-05, "loss": 0.6624, "step": 16825 }, { "epoch": 1.1400501388982993, "grad_norm": 6.988799571990967, "learning_rate": 7.799301800260114e-05, "loss": 0.7537, "step": 16826 }, { "epoch": 1.1401178941662715, "grad_norm": 6.114986419677734, "learning_rate": 7.799164898350332e-05, "loss": 0.7707, "step": 16827 }, { "epoch": 1.1401856494342435, "grad_norm": 5.79031229019165, "learning_rate": 7.799027996440552e-05, "loss": 0.5132, "step": 16828 }, { "epoch": 1.1402534047022157, "grad_norm": 6.05492639541626, "learning_rate": 7.79889109453077e-05, "loss": 0.604, "step": 16829 }, { "epoch": 1.1403211599701877, "grad_norm": 5.041617393493652, "learning_rate": 7.798754192620988e-05, "loss": 0.7133, "step": 16830 }, { "epoch": 1.1403889152381597, "grad_norm": 6.444592475891113, "learning_rate": 7.798617290711206e-05, "loss": 0.653, "step": 16831 }, { "epoch": 1.1404566705061319, "grad_norm": 6.009650230407715, "learning_rate": 7.798480388801425e-05, "loss": 0.6469, "step": 16832 }, { "epoch": 1.1405244257741038, "grad_norm": 8.08381462097168, "learning_rate": 7.798343486891643e-05, "loss": 0.7966, "step": 16833 }, { "epoch": 1.140592181042076, "grad_norm": 5.3437676429748535, "learning_rate": 7.798206584981861e-05, "loss": 0.7883, "step": 16834 }, { "epoch": 1.140659936310048, "grad_norm": 5.706287860870361, "learning_rate": 7.798069683072079e-05, "loss": 0.8088, "step": 16835 }, { "epoch": 1.1407276915780202, "grad_norm": 5.616438865661621, "learning_rate": 7.797932781162297e-05, "loss": 0.5782, "step": 16836 }, { "epoch": 1.1407954468459922, "grad_norm": 5.1152777671813965, "learning_rate": 7.797795879252517e-05, "loss": 0.8015, "step": 16837 }, { "epoch": 1.1408632021139644, "grad_norm": 5.392358303070068, "learning_rate": 7.797658977342735e-05, "loss": 0.6961, "step": 16838 }, { "epoch": 1.1409309573819364, "grad_norm": 5.752062797546387, "learning_rate": 7.797522075432953e-05, "loss": 0.7793, "step": 16839 }, { "epoch": 1.1409987126499086, "grad_norm": 6.018402576446533, "learning_rate": 7.797385173523171e-05, "loss": 0.6414, "step": 16840 }, { "epoch": 1.1410664679178806, "grad_norm": 4.536129951477051, "learning_rate": 7.79724827161339e-05, "loss": 0.4998, "step": 16841 }, { "epoch": 1.1411342231858528, "grad_norm": 5.575126647949219, "learning_rate": 7.797111369703608e-05, "loss": 0.6466, "step": 16842 }, { "epoch": 1.1412019784538248, "grad_norm": 8.243531227111816, "learning_rate": 7.796974467793826e-05, "loss": 0.9981, "step": 16843 }, { "epoch": 1.141269733721797, "grad_norm": 6.683679580688477, "learning_rate": 7.796837565884044e-05, "loss": 0.8015, "step": 16844 }, { "epoch": 1.141337488989769, "grad_norm": 7.662330627441406, "learning_rate": 7.796700663974262e-05, "loss": 0.6639, "step": 16845 }, { "epoch": 1.141405244257741, "grad_norm": 4.948780536651611, "learning_rate": 7.796563762064482e-05, "loss": 0.6174, "step": 16846 }, { "epoch": 1.1414729995257131, "grad_norm": 6.792630195617676, "learning_rate": 7.7964268601547e-05, "loss": 0.7958, "step": 16847 }, { "epoch": 1.1415407547936853, "grad_norm": 5.603211879730225, "learning_rate": 7.796289958244918e-05, "loss": 0.8995, "step": 16848 }, { "epoch": 1.1416085100616573, "grad_norm": 7.316423416137695, "learning_rate": 7.796153056335136e-05, "loss": 0.592, "step": 16849 }, { "epoch": 1.1416762653296293, "grad_norm": 5.690333843231201, "learning_rate": 7.796016154425355e-05, "loss": 0.568, "step": 16850 }, { "epoch": 1.1417440205976015, "grad_norm": 5.481085777282715, "learning_rate": 7.795879252515573e-05, "loss": 0.8911, "step": 16851 }, { "epoch": 1.1418117758655735, "grad_norm": 5.878414154052734, "learning_rate": 7.795742350605791e-05, "loss": 0.5673, "step": 16852 }, { "epoch": 1.1418795311335457, "grad_norm": 6.474081993103027, "learning_rate": 7.79560544869601e-05, "loss": 0.6036, "step": 16853 }, { "epoch": 1.1419472864015177, "grad_norm": 5.981825351715088, "learning_rate": 7.795468546786227e-05, "loss": 0.7612, "step": 16854 }, { "epoch": 1.1420150416694899, "grad_norm": 5.728933811187744, "learning_rate": 7.795331644876447e-05, "loss": 0.7179, "step": 16855 }, { "epoch": 1.1420827969374618, "grad_norm": 5.504831790924072, "learning_rate": 7.795194742966665e-05, "loss": 0.9218, "step": 16856 }, { "epoch": 1.142150552205434, "grad_norm": 6.834359169006348, "learning_rate": 7.795057841056883e-05, "loss": 0.7309, "step": 16857 }, { "epoch": 1.142218307473406, "grad_norm": 6.295196056365967, "learning_rate": 7.794920939147101e-05, "loss": 0.8664, "step": 16858 }, { "epoch": 1.1422860627413782, "grad_norm": 6.097269058227539, "learning_rate": 7.794784037237319e-05, "loss": 0.6478, "step": 16859 }, { "epoch": 1.1423538180093502, "grad_norm": 6.314675331115723, "learning_rate": 7.794647135327538e-05, "loss": 0.8395, "step": 16860 }, { "epoch": 1.1424215732773222, "grad_norm": 4.988530158996582, "learning_rate": 7.794510233417756e-05, "loss": 0.5958, "step": 16861 }, { "epoch": 1.1424893285452944, "grad_norm": 5.622345924377441, "learning_rate": 7.794373331507974e-05, "loss": 0.4794, "step": 16862 }, { "epoch": 1.1425570838132666, "grad_norm": 5.737193584442139, "learning_rate": 7.794236429598192e-05, "loss": 0.7704, "step": 16863 }, { "epoch": 1.1426248390812386, "grad_norm": 6.432234764099121, "learning_rate": 7.794099527688412e-05, "loss": 0.5991, "step": 16864 }, { "epoch": 1.1426925943492106, "grad_norm": 6.44136381149292, "learning_rate": 7.79396262577863e-05, "loss": 0.7507, "step": 16865 }, { "epoch": 1.1427603496171828, "grad_norm": 4.948369979858398, "learning_rate": 7.793825723868848e-05, "loss": 0.7673, "step": 16866 }, { "epoch": 1.1428281048851547, "grad_norm": 5.5636162757873535, "learning_rate": 7.793688821959066e-05, "loss": 0.4776, "step": 16867 }, { "epoch": 1.142895860153127, "grad_norm": 7.1905741691589355, "learning_rate": 7.793551920049285e-05, "loss": 0.8011, "step": 16868 }, { "epoch": 1.142963615421099, "grad_norm": 6.490988254547119, "learning_rate": 7.793415018139503e-05, "loss": 0.8343, "step": 16869 }, { "epoch": 1.1430313706890711, "grad_norm": 5.666626453399658, "learning_rate": 7.793278116229721e-05, "loss": 0.6194, "step": 16870 }, { "epoch": 1.143099125957043, "grad_norm": 7.139801025390625, "learning_rate": 7.793141214319941e-05, "loss": 0.6305, "step": 16871 }, { "epoch": 1.1431668812250153, "grad_norm": 5.280755043029785, "learning_rate": 7.793004312410159e-05, "loss": 0.69, "step": 16872 }, { "epoch": 1.1432346364929873, "grad_norm": 6.890513896942139, "learning_rate": 7.792867410500377e-05, "loss": 0.9433, "step": 16873 }, { "epoch": 1.1433023917609595, "grad_norm": 7.743908882141113, "learning_rate": 7.792730508590596e-05, "loss": 0.6906, "step": 16874 }, { "epoch": 1.1433701470289315, "grad_norm": 9.331748962402344, "learning_rate": 7.792593606680814e-05, "loss": 0.581, "step": 16875 }, { "epoch": 1.1434379022969037, "grad_norm": 5.105838775634766, "learning_rate": 7.792456704771032e-05, "loss": 0.6817, "step": 16876 }, { "epoch": 1.1435056575648757, "grad_norm": 7.092813014984131, "learning_rate": 7.79231980286125e-05, "loss": 0.5524, "step": 16877 }, { "epoch": 1.1435734128328479, "grad_norm": 5.3407673835754395, "learning_rate": 7.79218290095147e-05, "loss": 0.6377, "step": 16878 }, { "epoch": 1.1436411681008198, "grad_norm": 8.154608726501465, "learning_rate": 7.792045999041688e-05, "loss": 0.7605, "step": 16879 }, { "epoch": 1.1437089233687918, "grad_norm": 6.083822250366211, "learning_rate": 7.791909097131906e-05, "loss": 0.6825, "step": 16880 }, { "epoch": 1.143776678636764, "grad_norm": 5.946974754333496, "learning_rate": 7.791772195222124e-05, "loss": 0.6463, "step": 16881 }, { "epoch": 1.143844433904736, "grad_norm": 4.58415412902832, "learning_rate": 7.791635293312342e-05, "loss": 0.5366, "step": 16882 }, { "epoch": 1.1439121891727082, "grad_norm": 5.008115768432617, "learning_rate": 7.791498391402561e-05, "loss": 0.6225, "step": 16883 }, { "epoch": 1.1439799444406802, "grad_norm": 6.249969005584717, "learning_rate": 7.791361489492779e-05, "loss": 0.6601, "step": 16884 }, { "epoch": 1.1440476997086524, "grad_norm": 5.142440319061279, "learning_rate": 7.791224587582997e-05, "loss": 0.5248, "step": 16885 }, { "epoch": 1.1441154549766244, "grad_norm": 4.58303165435791, "learning_rate": 7.791087685673215e-05, "loss": 0.6253, "step": 16886 }, { "epoch": 1.1441832102445966, "grad_norm": 5.830782413482666, "learning_rate": 7.790950783763435e-05, "loss": 0.7958, "step": 16887 }, { "epoch": 1.1442509655125686, "grad_norm": 6.463974952697754, "learning_rate": 7.790813881853653e-05, "loss": 0.7663, "step": 16888 }, { "epoch": 1.1443187207805408, "grad_norm": 7.233205318450928, "learning_rate": 7.790676979943871e-05, "loss": 0.7953, "step": 16889 }, { "epoch": 1.1443864760485127, "grad_norm": 5.574187278747559, "learning_rate": 7.790540078034089e-05, "loss": 0.5744, "step": 16890 }, { "epoch": 1.144454231316485, "grad_norm": 6.045961856842041, "learning_rate": 7.790403176124307e-05, "loss": 0.6247, "step": 16891 }, { "epoch": 1.144521986584457, "grad_norm": 5.329028606414795, "learning_rate": 7.790266274214526e-05, "loss": 0.6026, "step": 16892 }, { "epoch": 1.1445897418524291, "grad_norm": 6.763497829437256, "learning_rate": 7.790129372304744e-05, "loss": 0.6519, "step": 16893 }, { "epoch": 1.144657497120401, "grad_norm": 5.743830680847168, "learning_rate": 7.789992470394962e-05, "loss": 0.6935, "step": 16894 }, { "epoch": 1.144725252388373, "grad_norm": 4.693161487579346, "learning_rate": 7.78985556848518e-05, "loss": 0.5889, "step": 16895 }, { "epoch": 1.1447930076563453, "grad_norm": 6.895607948303223, "learning_rate": 7.7897186665754e-05, "loss": 0.561, "step": 16896 }, { "epoch": 1.1448607629243175, "grad_norm": 5.308940410614014, "learning_rate": 7.789581764665618e-05, "loss": 0.8614, "step": 16897 }, { "epoch": 1.1449285181922895, "grad_norm": 6.690733909606934, "learning_rate": 7.789444862755836e-05, "loss": 0.5136, "step": 16898 }, { "epoch": 1.1449962734602614, "grad_norm": 4.7946624755859375, "learning_rate": 7.789307960846054e-05, "loss": 0.5016, "step": 16899 }, { "epoch": 1.1450640287282337, "grad_norm": 5.833794593811035, "learning_rate": 7.789171058936272e-05, "loss": 0.5225, "step": 16900 }, { "epoch": 1.1451317839962056, "grad_norm": 6.518613338470459, "learning_rate": 7.789034157026491e-05, "loss": 0.7277, "step": 16901 }, { "epoch": 1.1451995392641778, "grad_norm": 5.891839981079102, "learning_rate": 7.788897255116709e-05, "loss": 0.5277, "step": 16902 }, { "epoch": 1.1452672945321498, "grad_norm": 7.279749393463135, "learning_rate": 7.788760353206927e-05, "loss": 0.6505, "step": 16903 }, { "epoch": 1.145335049800122, "grad_norm": 5.246143341064453, "learning_rate": 7.788623451297145e-05, "loss": 0.6035, "step": 16904 }, { "epoch": 1.145402805068094, "grad_norm": 6.899950981140137, "learning_rate": 7.788486549387363e-05, "loss": 0.6674, "step": 16905 }, { "epoch": 1.1454705603360662, "grad_norm": 5.364004611968994, "learning_rate": 7.788349647477583e-05, "loss": 0.6255, "step": 16906 }, { "epoch": 1.1455383156040382, "grad_norm": 6.842263698577881, "learning_rate": 7.788212745567801e-05, "loss": 0.6159, "step": 16907 }, { "epoch": 1.1456060708720104, "grad_norm": 6.661463260650635, "learning_rate": 7.788075843658019e-05, "loss": 0.6893, "step": 16908 }, { "epoch": 1.1456738261399824, "grad_norm": 7.597821235656738, "learning_rate": 7.787938941748237e-05, "loss": 0.666, "step": 16909 }, { "epoch": 1.1457415814079543, "grad_norm": 6.1869354248046875, "learning_rate": 7.787802039838456e-05, "loss": 0.5579, "step": 16910 }, { "epoch": 1.1458093366759265, "grad_norm": 5.620526313781738, "learning_rate": 7.787665137928674e-05, "loss": 0.6303, "step": 16911 }, { "epoch": 1.1458770919438988, "grad_norm": 6.049895286560059, "learning_rate": 7.787528236018892e-05, "loss": 0.5657, "step": 16912 }, { "epoch": 1.1459448472118707, "grad_norm": 5.7843756675720215, "learning_rate": 7.78739133410911e-05, "loss": 0.6183, "step": 16913 }, { "epoch": 1.1460126024798427, "grad_norm": 7.550663471221924, "learning_rate": 7.787254432199328e-05, "loss": 0.8373, "step": 16914 }, { "epoch": 1.146080357747815, "grad_norm": 6.411697864532471, "learning_rate": 7.787117530289548e-05, "loss": 0.7271, "step": 16915 }, { "epoch": 1.146148113015787, "grad_norm": 5.198284149169922, "learning_rate": 7.786980628379766e-05, "loss": 0.7704, "step": 16916 }, { "epoch": 1.146215868283759, "grad_norm": 7.581902980804443, "learning_rate": 7.786843726469984e-05, "loss": 0.8091, "step": 16917 }, { "epoch": 1.146283623551731, "grad_norm": 6.926025390625, "learning_rate": 7.786706824560203e-05, "loss": 0.5879, "step": 16918 }, { "epoch": 1.1463513788197033, "grad_norm": 6.271979808807373, "learning_rate": 7.786569922650421e-05, "loss": 0.8076, "step": 16919 }, { "epoch": 1.1464191340876753, "grad_norm": 5.187648296356201, "learning_rate": 7.786433020740639e-05, "loss": 0.6618, "step": 16920 }, { "epoch": 1.1464868893556475, "grad_norm": 5.481020450592041, "learning_rate": 7.786296118830859e-05, "loss": 0.6813, "step": 16921 }, { "epoch": 1.1465546446236194, "grad_norm": 5.956170082092285, "learning_rate": 7.786159216921077e-05, "loss": 0.6529, "step": 16922 }, { "epoch": 1.1466223998915916, "grad_norm": 6.283894062042236, "learning_rate": 7.786022315011295e-05, "loss": 0.6642, "step": 16923 }, { "epoch": 1.1466901551595636, "grad_norm": 6.540280342102051, "learning_rate": 7.785885413101514e-05, "loss": 0.9651, "step": 16924 }, { "epoch": 1.1467579104275358, "grad_norm": 5.8760528564453125, "learning_rate": 7.785748511191732e-05, "loss": 0.5935, "step": 16925 }, { "epoch": 1.1468256656955078, "grad_norm": 6.100462436676025, "learning_rate": 7.78561160928195e-05, "loss": 0.6853, "step": 16926 }, { "epoch": 1.14689342096348, "grad_norm": 8.684874534606934, "learning_rate": 7.785474707372168e-05, "loss": 0.8358, "step": 16927 }, { "epoch": 1.146961176231452, "grad_norm": 9.009882926940918, "learning_rate": 7.785337805462388e-05, "loss": 0.6265, "step": 16928 }, { "epoch": 1.147028931499424, "grad_norm": 5.696036338806152, "learning_rate": 7.785200903552606e-05, "loss": 0.6748, "step": 16929 }, { "epoch": 1.1470966867673962, "grad_norm": 5.4167866706848145, "learning_rate": 7.785064001642824e-05, "loss": 0.7013, "step": 16930 }, { "epoch": 1.1471644420353682, "grad_norm": 9.469526290893555, "learning_rate": 7.784927099733042e-05, "loss": 0.9617, "step": 16931 }, { "epoch": 1.1472321973033404, "grad_norm": 7.8615593910217285, "learning_rate": 7.78479019782326e-05, "loss": 0.6682, "step": 16932 }, { "epoch": 1.1472999525713123, "grad_norm": 5.507148742675781, "learning_rate": 7.784653295913479e-05, "loss": 0.5723, "step": 16933 }, { "epoch": 1.1473677078392845, "grad_norm": 7.456667423248291, "learning_rate": 7.784516394003697e-05, "loss": 0.8836, "step": 16934 }, { "epoch": 1.1474354631072565, "grad_norm": 6.641378402709961, "learning_rate": 7.784379492093915e-05, "loss": 0.8819, "step": 16935 }, { "epoch": 1.1475032183752287, "grad_norm": 7.157406330108643, "learning_rate": 7.784242590184133e-05, "loss": 0.6635, "step": 16936 }, { "epoch": 1.1475709736432007, "grad_norm": 5.309220314025879, "learning_rate": 7.784105688274351e-05, "loss": 0.5876, "step": 16937 }, { "epoch": 1.147638728911173, "grad_norm": 6.855804920196533, "learning_rate": 7.783968786364571e-05, "loss": 0.7916, "step": 16938 }, { "epoch": 1.147706484179145, "grad_norm": 6.300415992736816, "learning_rate": 7.783831884454789e-05, "loss": 0.7407, "step": 16939 }, { "epoch": 1.147774239447117, "grad_norm": 5.024459362030029, "learning_rate": 7.783694982545007e-05, "loss": 0.669, "step": 16940 }, { "epoch": 1.147841994715089, "grad_norm": 5.556980609893799, "learning_rate": 7.783558080635225e-05, "loss": 0.6507, "step": 16941 }, { "epoch": 1.1479097499830613, "grad_norm": 6.7080535888671875, "learning_rate": 7.783421178725444e-05, "loss": 0.8388, "step": 16942 }, { "epoch": 1.1479775052510333, "grad_norm": 5.859105110168457, "learning_rate": 7.783284276815662e-05, "loss": 0.7661, "step": 16943 }, { "epoch": 1.1480452605190052, "grad_norm": 5.6181159019470215, "learning_rate": 7.78314737490588e-05, "loss": 0.7629, "step": 16944 }, { "epoch": 1.1481130157869774, "grad_norm": 5.8782854080200195, "learning_rate": 7.783010472996098e-05, "loss": 0.7253, "step": 16945 }, { "epoch": 1.1481807710549496, "grad_norm": 4.898889064788818, "learning_rate": 7.782873571086316e-05, "loss": 0.6607, "step": 16946 }, { "epoch": 1.1482485263229216, "grad_norm": 6.8231329917907715, "learning_rate": 7.782736669176536e-05, "loss": 0.6647, "step": 16947 }, { "epoch": 1.1483162815908936, "grad_norm": 4.969480514526367, "learning_rate": 7.782599767266754e-05, "loss": 0.5449, "step": 16948 }, { "epoch": 1.1483840368588658, "grad_norm": 7.149383544921875, "learning_rate": 7.782462865356972e-05, "loss": 0.6182, "step": 16949 }, { "epoch": 1.1484517921268378, "grad_norm": 6.851714134216309, "learning_rate": 7.78232596344719e-05, "loss": 0.7021, "step": 16950 }, { "epoch": 1.14851954739481, "grad_norm": 6.9529924392700195, "learning_rate": 7.782189061537409e-05, "loss": 0.7937, "step": 16951 }, { "epoch": 1.148587302662782, "grad_norm": 5.88994026184082, "learning_rate": 7.782052159627627e-05, "loss": 0.9483, "step": 16952 }, { "epoch": 1.1486550579307542, "grad_norm": 5.6566643714904785, "learning_rate": 7.781915257717845e-05, "loss": 0.5805, "step": 16953 }, { "epoch": 1.1487228131987262, "grad_norm": 6.150807857513428, "learning_rate": 7.781778355808063e-05, "loss": 0.7529, "step": 16954 }, { "epoch": 1.1487905684666984, "grad_norm": 5.5446038246154785, "learning_rate": 7.781641453898281e-05, "loss": 0.6351, "step": 16955 }, { "epoch": 1.1488583237346703, "grad_norm": 5.691783428192139, "learning_rate": 7.781504551988501e-05, "loss": 0.5886, "step": 16956 }, { "epoch": 1.1489260790026425, "grad_norm": 5.392401218414307, "learning_rate": 7.781367650078719e-05, "loss": 0.5915, "step": 16957 }, { "epoch": 1.1489938342706145, "grad_norm": 7.51052713394165, "learning_rate": 7.781230748168937e-05, "loss": 0.713, "step": 16958 }, { "epoch": 1.1490615895385865, "grad_norm": 6.7278523445129395, "learning_rate": 7.781093846259155e-05, "loss": 0.6796, "step": 16959 }, { "epoch": 1.1491293448065587, "grad_norm": 8.327152252197266, "learning_rate": 7.780956944349373e-05, "loss": 0.6551, "step": 16960 }, { "epoch": 1.149197100074531, "grad_norm": 7.18127965927124, "learning_rate": 7.780820042439592e-05, "loss": 0.6148, "step": 16961 }, { "epoch": 1.149264855342503, "grad_norm": 4.836153030395508, "learning_rate": 7.78068314052981e-05, "loss": 0.5284, "step": 16962 }, { "epoch": 1.1493326106104749, "grad_norm": 5.184370994567871, "learning_rate": 7.780546238620028e-05, "loss": 0.5887, "step": 16963 }, { "epoch": 1.149400365878447, "grad_norm": 7.310936450958252, "learning_rate": 7.780409336710248e-05, "loss": 0.6862, "step": 16964 }, { "epoch": 1.149468121146419, "grad_norm": 5.159470081329346, "learning_rate": 7.780272434800466e-05, "loss": 0.6848, "step": 16965 }, { "epoch": 1.1495358764143913, "grad_norm": 5.7770161628723145, "learning_rate": 7.780135532890684e-05, "loss": 0.9589, "step": 16966 }, { "epoch": 1.1496036316823632, "grad_norm": 6.089867115020752, "learning_rate": 7.779998630980903e-05, "loss": 0.6904, "step": 16967 }, { "epoch": 1.1496713869503354, "grad_norm": 4.882379531860352, "learning_rate": 7.779861729071121e-05, "loss": 0.5668, "step": 16968 }, { "epoch": 1.1497391422183074, "grad_norm": 6.222308158874512, "learning_rate": 7.779724827161339e-05, "loss": 0.8463, "step": 16969 }, { "epoch": 1.1498068974862796, "grad_norm": 7.721385478973389, "learning_rate": 7.779587925251559e-05, "loss": 0.869, "step": 16970 }, { "epoch": 1.1498746527542516, "grad_norm": 4.819281101226807, "learning_rate": 7.779451023341777e-05, "loss": 0.5424, "step": 16971 }, { "epoch": 1.1499424080222238, "grad_norm": 5.347133636474609, "learning_rate": 7.779314121431995e-05, "loss": 0.5816, "step": 16972 }, { "epoch": 1.1500101632901958, "grad_norm": 6.357661724090576, "learning_rate": 7.779177219522213e-05, "loss": 0.7384, "step": 16973 }, { "epoch": 1.150077918558168, "grad_norm": 6.177036762237549, "learning_rate": 7.779040317612432e-05, "loss": 0.6792, "step": 16974 }, { "epoch": 1.15014567382614, "grad_norm": 5.559142589569092, "learning_rate": 7.77890341570265e-05, "loss": 0.6939, "step": 16975 }, { "epoch": 1.1502134290941122, "grad_norm": 4.8017473220825195, "learning_rate": 7.778766513792868e-05, "loss": 0.7028, "step": 16976 }, { "epoch": 1.1502811843620842, "grad_norm": 5.330672264099121, "learning_rate": 7.778629611883086e-05, "loss": 0.7333, "step": 16977 }, { "epoch": 1.1503489396300561, "grad_norm": 7.376459121704102, "learning_rate": 7.778492709973304e-05, "loss": 0.6522, "step": 16978 }, { "epoch": 1.1504166948980283, "grad_norm": 4.467447280883789, "learning_rate": 7.778355808063524e-05, "loss": 0.5219, "step": 16979 }, { "epoch": 1.1504844501660003, "grad_norm": 5.761083126068115, "learning_rate": 7.778218906153742e-05, "loss": 0.6823, "step": 16980 }, { "epoch": 1.1505522054339725, "grad_norm": 7.13974666595459, "learning_rate": 7.77808200424396e-05, "loss": 0.6415, "step": 16981 }, { "epoch": 1.1506199607019445, "grad_norm": 5.323427200317383, "learning_rate": 7.777945102334178e-05, "loss": 0.7416, "step": 16982 }, { "epoch": 1.1506877159699167, "grad_norm": 8.04574203491211, "learning_rate": 7.777808200424397e-05, "loss": 0.7871, "step": 16983 }, { "epoch": 1.1507554712378887, "grad_norm": 7.360369682312012, "learning_rate": 7.777671298514615e-05, "loss": 0.7206, "step": 16984 }, { "epoch": 1.1508232265058609, "grad_norm": 6.681160926818848, "learning_rate": 7.777534396604833e-05, "loss": 0.7463, "step": 16985 }, { "epoch": 1.1508909817738329, "grad_norm": 7.055415630340576, "learning_rate": 7.777397494695051e-05, "loss": 0.8558, "step": 16986 }, { "epoch": 1.150958737041805, "grad_norm": 6.905728340148926, "learning_rate": 7.777260592785269e-05, "loss": 0.5685, "step": 16987 }, { "epoch": 1.151026492309777, "grad_norm": 5.242982387542725, "learning_rate": 7.777123690875489e-05, "loss": 0.4737, "step": 16988 }, { "epoch": 1.1510942475777493, "grad_norm": 4.060085773468018, "learning_rate": 7.776986788965707e-05, "loss": 0.6109, "step": 16989 }, { "epoch": 1.1511620028457212, "grad_norm": 6.4642229080200195, "learning_rate": 7.776849887055925e-05, "loss": 0.9386, "step": 16990 }, { "epoch": 1.1512297581136934, "grad_norm": 4.4571123123168945, "learning_rate": 7.776712985146143e-05, "loss": 0.6162, "step": 16991 }, { "epoch": 1.1512975133816654, "grad_norm": 4.490910530090332, "learning_rate": 7.776576083236361e-05, "loss": 0.6082, "step": 16992 }, { "epoch": 1.1513652686496374, "grad_norm": 7.802062034606934, "learning_rate": 7.77643918132658e-05, "loss": 0.7308, "step": 16993 }, { "epoch": 1.1514330239176096, "grad_norm": 4.726348876953125, "learning_rate": 7.776302279416798e-05, "loss": 0.7596, "step": 16994 }, { "epoch": 1.1515007791855818, "grad_norm": 7.226054668426514, "learning_rate": 7.776165377507016e-05, "loss": 0.8948, "step": 16995 }, { "epoch": 1.1515685344535538, "grad_norm": 4.567721843719482, "learning_rate": 7.776028475597234e-05, "loss": 0.5063, "step": 16996 }, { "epoch": 1.1516362897215258, "grad_norm": 8.410115242004395, "learning_rate": 7.775891573687454e-05, "loss": 0.6893, "step": 16997 }, { "epoch": 1.151704044989498, "grad_norm": 5.118034839630127, "learning_rate": 7.775754671777672e-05, "loss": 0.6749, "step": 16998 }, { "epoch": 1.15177180025747, "grad_norm": 6.0118184089660645, "learning_rate": 7.77561776986789e-05, "loss": 0.6228, "step": 16999 }, { "epoch": 1.1518395555254421, "grad_norm": 4.889934062957764, "learning_rate": 7.775480867958108e-05, "loss": 0.7567, "step": 17000 }, { "epoch": 1.1519073107934141, "grad_norm": 4.716844081878662, "learning_rate": 7.775343966048326e-05, "loss": 0.6083, "step": 17001 }, { "epoch": 1.1519750660613863, "grad_norm": 6.07877254486084, "learning_rate": 7.775207064138545e-05, "loss": 0.7686, "step": 17002 }, { "epoch": 1.1520428213293583, "grad_norm": 6.810544013977051, "learning_rate": 7.775070162228763e-05, "loss": 0.5769, "step": 17003 }, { "epoch": 1.1521105765973305, "grad_norm": 6.12678337097168, "learning_rate": 7.774933260318981e-05, "loss": 0.5156, "step": 17004 }, { "epoch": 1.1521783318653025, "grad_norm": 5.999094009399414, "learning_rate": 7.774796358409199e-05, "loss": 0.5038, "step": 17005 }, { "epoch": 1.1522460871332747, "grad_norm": 5.4058332443237305, "learning_rate": 7.774659456499419e-05, "loss": 0.7177, "step": 17006 }, { "epoch": 1.1523138424012467, "grad_norm": 4.426954746246338, "learning_rate": 7.774522554589637e-05, "loss": 0.4639, "step": 17007 }, { "epoch": 1.1523815976692187, "grad_norm": 5.5401530265808105, "learning_rate": 7.774385652679855e-05, "loss": 0.818, "step": 17008 }, { "epoch": 1.1524493529371909, "grad_norm": 5.004909038543701, "learning_rate": 7.774248750770073e-05, "loss": 0.8341, "step": 17009 }, { "epoch": 1.152517108205163, "grad_norm": 8.242889404296875, "learning_rate": 7.774111848860292e-05, "loss": 0.8211, "step": 17010 }, { "epoch": 1.152584863473135, "grad_norm": 5.827528476715088, "learning_rate": 7.77397494695051e-05, "loss": 0.6921, "step": 17011 }, { "epoch": 1.152652618741107, "grad_norm": 8.284795761108398, "learning_rate": 7.773838045040728e-05, "loss": 0.7018, "step": 17012 }, { "epoch": 1.1527203740090792, "grad_norm": 8.282230377197266, "learning_rate": 7.773701143130948e-05, "loss": 0.7349, "step": 17013 }, { "epoch": 1.1527881292770512, "grad_norm": 6.315917015075684, "learning_rate": 7.773564241221166e-05, "loss": 0.7272, "step": 17014 }, { "epoch": 1.1528558845450234, "grad_norm": 5.585415363311768, "learning_rate": 7.773427339311384e-05, "loss": 0.567, "step": 17015 }, { "epoch": 1.1529236398129954, "grad_norm": 4.947295665740967, "learning_rate": 7.773290437401603e-05, "loss": 0.5206, "step": 17016 }, { "epoch": 1.1529913950809676, "grad_norm": 5.741033554077148, "learning_rate": 7.773153535491821e-05, "loss": 0.651, "step": 17017 }, { "epoch": 1.1530591503489396, "grad_norm": 5.564809322357178, "learning_rate": 7.773016633582039e-05, "loss": 0.7332, "step": 17018 }, { "epoch": 1.1531269056169118, "grad_norm": 6.336185932159424, "learning_rate": 7.772879731672257e-05, "loss": 0.8473, "step": 17019 }, { "epoch": 1.1531946608848838, "grad_norm": 10.212484359741211, "learning_rate": 7.772742829762477e-05, "loss": 0.598, "step": 17020 }, { "epoch": 1.153262416152856, "grad_norm": 5.718522548675537, "learning_rate": 7.772605927852695e-05, "loss": 0.7474, "step": 17021 }, { "epoch": 1.153330171420828, "grad_norm": 5.485384464263916, "learning_rate": 7.772469025942913e-05, "loss": 0.8378, "step": 17022 }, { "epoch": 1.1533979266888001, "grad_norm": 4.341707229614258, "learning_rate": 7.77233212403313e-05, "loss": 0.6496, "step": 17023 }, { "epoch": 1.1534656819567721, "grad_norm": 5.866021633148193, "learning_rate": 7.772195222123349e-05, "loss": 0.611, "step": 17024 }, { "epoch": 1.1535334372247443, "grad_norm": 5.35130500793457, "learning_rate": 7.772058320213568e-05, "loss": 0.7405, "step": 17025 }, { "epoch": 1.1536011924927163, "grad_norm": 5.8092780113220215, "learning_rate": 7.771921418303786e-05, "loss": 0.7732, "step": 17026 }, { "epoch": 1.1536689477606883, "grad_norm": 4.408275604248047, "learning_rate": 7.771784516394004e-05, "loss": 0.7354, "step": 17027 }, { "epoch": 1.1537367030286605, "grad_norm": 5.520874500274658, "learning_rate": 7.771647614484222e-05, "loss": 0.57, "step": 17028 }, { "epoch": 1.1538044582966325, "grad_norm": 6.835943698883057, "learning_rate": 7.771510712574442e-05, "loss": 0.8913, "step": 17029 }, { "epoch": 1.1538722135646047, "grad_norm": 6.106658458709717, "learning_rate": 7.77137381066466e-05, "loss": 0.7279, "step": 17030 }, { "epoch": 1.1539399688325767, "grad_norm": 4.718100547790527, "learning_rate": 7.771236908754878e-05, "loss": 0.5556, "step": 17031 }, { "epoch": 1.1540077241005489, "grad_norm": 6.6284871101379395, "learning_rate": 7.771100006845096e-05, "loss": 0.8279, "step": 17032 }, { "epoch": 1.1540754793685208, "grad_norm": 6.730345726013184, "learning_rate": 7.770963104935314e-05, "loss": 0.5271, "step": 17033 }, { "epoch": 1.154143234636493, "grad_norm": 8.18601131439209, "learning_rate": 7.770826203025533e-05, "loss": 0.7659, "step": 17034 }, { "epoch": 1.154210989904465, "grad_norm": 5.387197017669678, "learning_rate": 7.770689301115751e-05, "loss": 0.5248, "step": 17035 }, { "epoch": 1.1542787451724372, "grad_norm": 7.745261192321777, "learning_rate": 7.770552399205969e-05, "loss": 0.9322, "step": 17036 }, { "epoch": 1.1543465004404092, "grad_norm": 5.017677307128906, "learning_rate": 7.770415497296187e-05, "loss": 0.6844, "step": 17037 }, { "epoch": 1.1544142557083814, "grad_norm": 5.272484302520752, "learning_rate": 7.770278595386407e-05, "loss": 0.9975, "step": 17038 }, { "epoch": 1.1544820109763534, "grad_norm": 6.785526752471924, "learning_rate": 7.770141693476625e-05, "loss": 0.8657, "step": 17039 }, { "epoch": 1.1545497662443256, "grad_norm": 6.357969284057617, "learning_rate": 7.770004791566843e-05, "loss": 0.9176, "step": 17040 }, { "epoch": 1.1546175215122976, "grad_norm": 5.590163707733154, "learning_rate": 7.769867889657061e-05, "loss": 0.6711, "step": 17041 }, { "epoch": 1.1546852767802696, "grad_norm": 6.2367377281188965, "learning_rate": 7.769730987747279e-05, "loss": 0.767, "step": 17042 }, { "epoch": 1.1547530320482418, "grad_norm": 5.25771951675415, "learning_rate": 7.769594085837498e-05, "loss": 0.7303, "step": 17043 }, { "epoch": 1.154820787316214, "grad_norm": 5.044614791870117, "learning_rate": 7.769457183927716e-05, "loss": 0.5922, "step": 17044 }, { "epoch": 1.154888542584186, "grad_norm": 7.982863426208496, "learning_rate": 7.769320282017934e-05, "loss": 1.0373, "step": 17045 }, { "epoch": 1.154956297852158, "grad_norm": 5.957779884338379, "learning_rate": 7.769183380108152e-05, "loss": 0.6403, "step": 17046 }, { "epoch": 1.1550240531201301, "grad_norm": 5.288189888000488, "learning_rate": 7.76904647819837e-05, "loss": 0.6481, "step": 17047 }, { "epoch": 1.155091808388102, "grad_norm": 6.193086624145508, "learning_rate": 7.76890957628859e-05, "loss": 0.9038, "step": 17048 }, { "epoch": 1.1551595636560743, "grad_norm": 8.397180557250977, "learning_rate": 7.768772674378808e-05, "loss": 0.689, "step": 17049 }, { "epoch": 1.1552273189240463, "grad_norm": 5.024189472198486, "learning_rate": 7.768635772469026e-05, "loss": 0.7721, "step": 17050 }, { "epoch": 1.1552950741920185, "grad_norm": 5.071702480316162, "learning_rate": 7.768498870559244e-05, "loss": 0.9724, "step": 17051 }, { "epoch": 1.1553628294599905, "grad_norm": 6.766203880310059, "learning_rate": 7.768361968649463e-05, "loss": 0.8014, "step": 17052 }, { "epoch": 1.1554305847279627, "grad_norm": 7.111300945281982, "learning_rate": 7.768225066739681e-05, "loss": 0.6352, "step": 17053 }, { "epoch": 1.1554983399959347, "grad_norm": 7.011511325836182, "learning_rate": 7.768088164829899e-05, "loss": 0.8808, "step": 17054 }, { "epoch": 1.1555660952639069, "grad_norm": 5.766463756561279, "learning_rate": 7.767951262920117e-05, "loss": 0.7257, "step": 17055 }, { "epoch": 1.1556338505318788, "grad_norm": 5.269811153411865, "learning_rate": 7.767814361010337e-05, "loss": 0.557, "step": 17056 }, { "epoch": 1.1557016057998508, "grad_norm": 5.217252254486084, "learning_rate": 7.767677459100555e-05, "loss": 0.6047, "step": 17057 }, { "epoch": 1.155769361067823, "grad_norm": 5.220236778259277, "learning_rate": 7.767540557190773e-05, "loss": 0.6139, "step": 17058 }, { "epoch": 1.1558371163357952, "grad_norm": 5.2868499755859375, "learning_rate": 7.767403655280992e-05, "loss": 0.5907, "step": 17059 }, { "epoch": 1.1559048716037672, "grad_norm": 5.082064151763916, "learning_rate": 7.76726675337121e-05, "loss": 0.6459, "step": 17060 }, { "epoch": 1.1559726268717392, "grad_norm": 5.143309116363525, "learning_rate": 7.767129851461428e-05, "loss": 0.7064, "step": 17061 }, { "epoch": 1.1560403821397114, "grad_norm": 5.849390029907227, "learning_rate": 7.766992949551648e-05, "loss": 0.632, "step": 17062 }, { "epoch": 1.1561081374076834, "grad_norm": 5.502965927124023, "learning_rate": 7.766856047641866e-05, "loss": 0.5645, "step": 17063 }, { "epoch": 1.1561758926756556, "grad_norm": 7.527248382568359, "learning_rate": 7.766719145732084e-05, "loss": 0.707, "step": 17064 }, { "epoch": 1.1562436479436276, "grad_norm": 11.085021018981934, "learning_rate": 7.766582243822302e-05, "loss": 0.5026, "step": 17065 }, { "epoch": 1.1563114032115998, "grad_norm": 4.332429885864258, "learning_rate": 7.766445341912521e-05, "loss": 0.4451, "step": 17066 }, { "epoch": 1.1563791584795717, "grad_norm": 6.877386569976807, "learning_rate": 7.766308440002739e-05, "loss": 0.6717, "step": 17067 }, { "epoch": 1.156446913747544, "grad_norm": 8.853668212890625, "learning_rate": 7.766171538092957e-05, "loss": 0.7166, "step": 17068 }, { "epoch": 1.156514669015516, "grad_norm": 6.745375156402588, "learning_rate": 7.766034636183175e-05, "loss": 0.7458, "step": 17069 }, { "epoch": 1.1565824242834881, "grad_norm": 4.851019859313965, "learning_rate": 7.765897734273393e-05, "loss": 0.5425, "step": 17070 }, { "epoch": 1.15665017955146, "grad_norm": 8.129114151000977, "learning_rate": 7.765760832363613e-05, "loss": 0.7745, "step": 17071 }, { "epoch": 1.1567179348194323, "grad_norm": 7.2463603019714355, "learning_rate": 7.76562393045383e-05, "loss": 0.7822, "step": 17072 }, { "epoch": 1.1567856900874043, "grad_norm": 6.648437023162842, "learning_rate": 7.765487028544049e-05, "loss": 0.7371, "step": 17073 }, { "epoch": 1.1568534453553765, "grad_norm": 4.990374565124512, "learning_rate": 7.765350126634267e-05, "loss": 0.6751, "step": 17074 }, { "epoch": 1.1569212006233485, "grad_norm": 11.30855655670166, "learning_rate": 7.765213224724486e-05, "loss": 0.5861, "step": 17075 }, { "epoch": 1.1569889558913204, "grad_norm": 6.271945953369141, "learning_rate": 7.765076322814704e-05, "loss": 0.6633, "step": 17076 }, { "epoch": 1.1570567111592927, "grad_norm": 6.8997392654418945, "learning_rate": 7.764939420904922e-05, "loss": 0.6447, "step": 17077 }, { "epoch": 1.1571244664272646, "grad_norm": 7.4019389152526855, "learning_rate": 7.76480251899514e-05, "loss": 0.7981, "step": 17078 }, { "epoch": 1.1571922216952368, "grad_norm": 8.452839851379395, "learning_rate": 7.764665617085358e-05, "loss": 0.9539, "step": 17079 }, { "epoch": 1.1572599769632088, "grad_norm": 5.080465793609619, "learning_rate": 7.764528715175578e-05, "loss": 0.6904, "step": 17080 }, { "epoch": 1.157327732231181, "grad_norm": 5.674568176269531, "learning_rate": 7.764391813265796e-05, "loss": 0.5814, "step": 17081 }, { "epoch": 1.157395487499153, "grad_norm": 5.936946392059326, "learning_rate": 7.764254911356014e-05, "loss": 0.7214, "step": 17082 }, { "epoch": 1.1574632427671252, "grad_norm": 5.97898530960083, "learning_rate": 7.764118009446232e-05, "loss": 0.8698, "step": 17083 }, { "epoch": 1.1575309980350972, "grad_norm": 6.230330467224121, "learning_rate": 7.763981107536451e-05, "loss": 0.7555, "step": 17084 }, { "epoch": 1.1575987533030694, "grad_norm": 4.197784900665283, "learning_rate": 7.763844205626669e-05, "loss": 0.5187, "step": 17085 }, { "epoch": 1.1576665085710414, "grad_norm": 7.4203643798828125, "learning_rate": 7.763707303716887e-05, "loss": 0.9264, "step": 17086 }, { "epoch": 1.1577342638390136, "grad_norm": 8.698983192443848, "learning_rate": 7.763570401807105e-05, "loss": 0.605, "step": 17087 }, { "epoch": 1.1578020191069855, "grad_norm": 6.924211502075195, "learning_rate": 7.763433499897323e-05, "loss": 0.7919, "step": 17088 }, { "epoch": 1.1578697743749577, "grad_norm": 4.469345569610596, "learning_rate": 7.763296597987543e-05, "loss": 0.4836, "step": 17089 }, { "epoch": 1.1579375296429297, "grad_norm": 6.712977409362793, "learning_rate": 7.76315969607776e-05, "loss": 0.7123, "step": 17090 }, { "epoch": 1.1580052849109017, "grad_norm": 5.67714786529541, "learning_rate": 7.763022794167979e-05, "loss": 0.7613, "step": 17091 }, { "epoch": 1.158073040178874, "grad_norm": 6.554959297180176, "learning_rate": 7.762885892258197e-05, "loss": 0.7535, "step": 17092 }, { "epoch": 1.1581407954468461, "grad_norm": 6.625911235809326, "learning_rate": 7.762748990348415e-05, "loss": 0.8759, "step": 17093 }, { "epoch": 1.158208550714818, "grad_norm": 6.199068069458008, "learning_rate": 7.762612088438634e-05, "loss": 1.0106, "step": 17094 }, { "epoch": 1.15827630598279, "grad_norm": 7.14901876449585, "learning_rate": 7.762475186528852e-05, "loss": 0.665, "step": 17095 }, { "epoch": 1.1583440612507623, "grad_norm": 8.458894729614258, "learning_rate": 7.76233828461907e-05, "loss": 0.8775, "step": 17096 }, { "epoch": 1.1584118165187343, "grad_norm": 5.443024635314941, "learning_rate": 7.762201382709288e-05, "loss": 0.6247, "step": 17097 }, { "epoch": 1.1584795717867065, "grad_norm": 5.4433817863464355, "learning_rate": 7.762064480799508e-05, "loss": 0.8298, "step": 17098 }, { "epoch": 1.1585473270546784, "grad_norm": 8.201144218444824, "learning_rate": 7.761927578889726e-05, "loss": 0.5813, "step": 17099 }, { "epoch": 1.1586150823226506, "grad_norm": 5.2088236808776855, "learning_rate": 7.761790676979944e-05, "loss": 0.7396, "step": 17100 }, { "epoch": 1.1586828375906226, "grad_norm": 7.154380798339844, "learning_rate": 7.761653775070162e-05, "loss": 0.6321, "step": 17101 }, { "epoch": 1.1587505928585948, "grad_norm": 7.217487812042236, "learning_rate": 7.761516873160381e-05, "loss": 0.4882, "step": 17102 }, { "epoch": 1.1588183481265668, "grad_norm": 5.230490684509277, "learning_rate": 7.761379971250599e-05, "loss": 0.6101, "step": 17103 }, { "epoch": 1.158886103394539, "grad_norm": 8.068559646606445, "learning_rate": 7.761243069340817e-05, "loss": 0.594, "step": 17104 }, { "epoch": 1.158953858662511, "grad_norm": 4.883063316345215, "learning_rate": 7.761106167431037e-05, "loss": 0.6828, "step": 17105 }, { "epoch": 1.159021613930483, "grad_norm": 6.669475078582764, "learning_rate": 7.760969265521255e-05, "loss": 0.8075, "step": 17106 }, { "epoch": 1.1590893691984552, "grad_norm": 7.147501468658447, "learning_rate": 7.760832363611473e-05, "loss": 0.6016, "step": 17107 }, { "epoch": 1.1591571244664274, "grad_norm": 5.705437660217285, "learning_rate": 7.760695461701692e-05, "loss": 0.7725, "step": 17108 }, { "epoch": 1.1592248797343994, "grad_norm": 7.769946098327637, "learning_rate": 7.76055855979191e-05, "loss": 0.6734, "step": 17109 }, { "epoch": 1.1592926350023713, "grad_norm": 5.215763092041016, "learning_rate": 7.760421657882128e-05, "loss": 0.659, "step": 17110 }, { "epoch": 1.1593603902703435, "grad_norm": 6.284592628479004, "learning_rate": 7.760284755972346e-05, "loss": 0.7078, "step": 17111 }, { "epoch": 1.1594281455383155, "grad_norm": 6.298258304595947, "learning_rate": 7.760147854062566e-05, "loss": 0.4822, "step": 17112 }, { "epoch": 1.1594959008062877, "grad_norm": 6.485654830932617, "learning_rate": 7.760010952152784e-05, "loss": 0.7425, "step": 17113 }, { "epoch": 1.1595636560742597, "grad_norm": 4.581310272216797, "learning_rate": 7.759874050243002e-05, "loss": 0.5134, "step": 17114 }, { "epoch": 1.159631411342232, "grad_norm": 5.878685474395752, "learning_rate": 7.75973714833322e-05, "loss": 0.71, "step": 17115 }, { "epoch": 1.159699166610204, "grad_norm": 5.818406581878662, "learning_rate": 7.759600246423439e-05, "loss": 0.705, "step": 17116 }, { "epoch": 1.159766921878176, "grad_norm": 6.247715950012207, "learning_rate": 7.759463344513657e-05, "loss": 0.747, "step": 17117 }, { "epoch": 1.159834677146148, "grad_norm": 7.777171611785889, "learning_rate": 7.759326442603875e-05, "loss": 0.8842, "step": 17118 }, { "epoch": 1.1599024324141203, "grad_norm": 4.369725704193115, "learning_rate": 7.759189540694093e-05, "loss": 0.5713, "step": 17119 }, { "epoch": 1.1599701876820923, "grad_norm": 5.170098304748535, "learning_rate": 7.759052638784311e-05, "loss": 0.7709, "step": 17120 }, { "epoch": 1.1600379429500645, "grad_norm": 7.886532306671143, "learning_rate": 7.75891573687453e-05, "loss": 0.5762, "step": 17121 }, { "epoch": 1.1601056982180364, "grad_norm": 6.549617290496826, "learning_rate": 7.758778834964749e-05, "loss": 0.597, "step": 17122 }, { "epoch": 1.1601734534860086, "grad_norm": 8.460987091064453, "learning_rate": 7.758641933054967e-05, "loss": 0.7194, "step": 17123 }, { "epoch": 1.1602412087539806, "grad_norm": 8.659561157226562, "learning_rate": 7.758505031145185e-05, "loss": 0.845, "step": 17124 }, { "epoch": 1.1603089640219526, "grad_norm": 6.515053749084473, "learning_rate": 7.758368129235403e-05, "loss": 0.9518, "step": 17125 }, { "epoch": 1.1603767192899248, "grad_norm": 14.59138011932373, "learning_rate": 7.758231227325622e-05, "loss": 0.6451, "step": 17126 }, { "epoch": 1.1604444745578968, "grad_norm": 5.754178047180176, "learning_rate": 7.75809432541584e-05, "loss": 0.6652, "step": 17127 }, { "epoch": 1.160512229825869, "grad_norm": 5.252496242523193, "learning_rate": 7.757957423506058e-05, "loss": 0.6412, "step": 17128 }, { "epoch": 1.160579985093841, "grad_norm": 7.788862228393555, "learning_rate": 7.757820521596276e-05, "loss": 0.7068, "step": 17129 }, { "epoch": 1.1606477403618132, "grad_norm": 5.3729143142700195, "learning_rate": 7.757683619686496e-05, "loss": 0.6523, "step": 17130 }, { "epoch": 1.1607154956297852, "grad_norm": 4.968121528625488, "learning_rate": 7.757546717776714e-05, "loss": 0.5823, "step": 17131 }, { "epoch": 1.1607832508977574, "grad_norm": 6.920864105224609, "learning_rate": 7.757409815866932e-05, "loss": 0.8567, "step": 17132 }, { "epoch": 1.1608510061657293, "grad_norm": 8.479312896728516, "learning_rate": 7.75727291395715e-05, "loss": 0.8317, "step": 17133 }, { "epoch": 1.1609187614337015, "grad_norm": 8.27676010131836, "learning_rate": 7.757136012047368e-05, "loss": 0.7197, "step": 17134 }, { "epoch": 1.1609865167016735, "grad_norm": 4.960324764251709, "learning_rate": 7.756999110137587e-05, "loss": 0.6568, "step": 17135 }, { "epoch": 1.1610542719696457, "grad_norm": 5.801924705505371, "learning_rate": 7.756862208227805e-05, "loss": 0.7134, "step": 17136 }, { "epoch": 1.1611220272376177, "grad_norm": 7.101438045501709, "learning_rate": 7.756725306318023e-05, "loss": 0.7979, "step": 17137 }, { "epoch": 1.16118978250559, "grad_norm": 3.8948190212249756, "learning_rate": 7.756588404408241e-05, "loss": 0.6097, "step": 17138 }, { "epoch": 1.1612575377735619, "grad_norm": 5.797446250915527, "learning_rate": 7.75645150249846e-05, "loss": 0.68, "step": 17139 }, { "epoch": 1.1613252930415339, "grad_norm": 7.5896525382995605, "learning_rate": 7.756314600588679e-05, "loss": 0.7619, "step": 17140 }, { "epoch": 1.161393048309506, "grad_norm": 4.365643501281738, "learning_rate": 7.756177698678897e-05, "loss": 0.7374, "step": 17141 }, { "epoch": 1.1614608035774783, "grad_norm": 5.378994941711426, "learning_rate": 7.756040796769115e-05, "loss": 0.7987, "step": 17142 }, { "epoch": 1.1615285588454503, "grad_norm": 5.658888339996338, "learning_rate": 7.755903894859333e-05, "loss": 0.6007, "step": 17143 }, { "epoch": 1.1615963141134222, "grad_norm": 4.8605170249938965, "learning_rate": 7.755766992949552e-05, "loss": 0.5425, "step": 17144 }, { "epoch": 1.1616640693813944, "grad_norm": 6.8122100830078125, "learning_rate": 7.75563009103977e-05, "loss": 0.5716, "step": 17145 }, { "epoch": 1.1617318246493664, "grad_norm": 5.280952453613281, "learning_rate": 7.755493189129988e-05, "loss": 0.8245, "step": 17146 }, { "epoch": 1.1617995799173386, "grad_norm": 6.752953052520752, "learning_rate": 7.755356287220206e-05, "loss": 0.7489, "step": 17147 }, { "epoch": 1.1618673351853106, "grad_norm": 5.453287124633789, "learning_rate": 7.755219385310424e-05, "loss": 0.6679, "step": 17148 }, { "epoch": 1.1619350904532828, "grad_norm": 5.091671466827393, "learning_rate": 7.755082483400644e-05, "loss": 0.5512, "step": 17149 }, { "epoch": 1.1620028457212548, "grad_norm": 7.830700397491455, "learning_rate": 7.754945581490862e-05, "loss": 0.7014, "step": 17150 }, { "epoch": 1.162070600989227, "grad_norm": 6.83212947845459, "learning_rate": 7.75480867958108e-05, "loss": 0.4873, "step": 17151 }, { "epoch": 1.162138356257199, "grad_norm": 6.0037841796875, "learning_rate": 7.754671777671299e-05, "loss": 0.6945, "step": 17152 }, { "epoch": 1.1622061115251712, "grad_norm": 5.744686603546143, "learning_rate": 7.754534875761517e-05, "loss": 0.805, "step": 17153 }, { "epoch": 1.1622738667931432, "grad_norm": 8.571599960327148, "learning_rate": 7.754397973851737e-05, "loss": 0.8147, "step": 17154 }, { "epoch": 1.1623416220611151, "grad_norm": 5.277167320251465, "learning_rate": 7.754261071941955e-05, "loss": 0.6513, "step": 17155 }, { "epoch": 1.1624093773290873, "grad_norm": 5.50898551940918, "learning_rate": 7.754124170032173e-05, "loss": 0.7769, "step": 17156 }, { "epoch": 1.1624771325970595, "grad_norm": 6.313873767852783, "learning_rate": 7.75398726812239e-05, "loss": 0.7234, "step": 17157 }, { "epoch": 1.1625448878650315, "grad_norm": 5.537959575653076, "learning_rate": 7.75385036621261e-05, "loss": 0.7568, "step": 17158 }, { "epoch": 1.1626126431330035, "grad_norm": 5.766448497772217, "learning_rate": 7.753713464302828e-05, "loss": 0.6761, "step": 17159 }, { "epoch": 1.1626803984009757, "grad_norm": 6.09381628036499, "learning_rate": 7.753576562393046e-05, "loss": 0.8554, "step": 17160 }, { "epoch": 1.1627481536689477, "grad_norm": 5.5013251304626465, "learning_rate": 7.753439660483264e-05, "loss": 0.6297, "step": 17161 }, { "epoch": 1.1628159089369199, "grad_norm": 6.226048946380615, "learning_rate": 7.753302758573483e-05, "loss": 0.7102, "step": 17162 }, { "epoch": 1.1628836642048919, "grad_norm": 5.695722579956055, "learning_rate": 7.753165856663702e-05, "loss": 0.5396, "step": 17163 }, { "epoch": 1.162951419472864, "grad_norm": 6.318460941314697, "learning_rate": 7.75302895475392e-05, "loss": 0.7032, "step": 17164 }, { "epoch": 1.163019174740836, "grad_norm": 6.319543361663818, "learning_rate": 7.752892052844138e-05, "loss": 0.5056, "step": 17165 }, { "epoch": 1.1630869300088083, "grad_norm": 5.164156913757324, "learning_rate": 7.752755150934356e-05, "loss": 0.5995, "step": 17166 }, { "epoch": 1.1631546852767802, "grad_norm": 5.994420051574707, "learning_rate": 7.752618249024575e-05, "loss": 1.1241, "step": 17167 }, { "epoch": 1.1632224405447524, "grad_norm": 5.081625461578369, "learning_rate": 7.752481347114793e-05, "loss": 0.6978, "step": 17168 }, { "epoch": 1.1632901958127244, "grad_norm": 5.624451637268066, "learning_rate": 7.752344445205011e-05, "loss": 0.7173, "step": 17169 }, { "epoch": 1.1633579510806966, "grad_norm": 4.424026966094971, "learning_rate": 7.752207543295229e-05, "loss": 0.4382, "step": 17170 }, { "epoch": 1.1634257063486686, "grad_norm": 6.661045551300049, "learning_rate": 7.752070641385449e-05, "loss": 0.6755, "step": 17171 }, { "epoch": 1.1634934616166408, "grad_norm": 5.17708158493042, "learning_rate": 7.751933739475667e-05, "loss": 0.4775, "step": 17172 }, { "epoch": 1.1635612168846128, "grad_norm": 6.10673713684082, "learning_rate": 7.751796837565885e-05, "loss": 0.7051, "step": 17173 }, { "epoch": 1.1636289721525848, "grad_norm": 5.64744234085083, "learning_rate": 7.751659935656103e-05, "loss": 0.7049, "step": 17174 }, { "epoch": 1.163696727420557, "grad_norm": 7.636764049530029, "learning_rate": 7.75152303374632e-05, "loss": 0.7381, "step": 17175 }, { "epoch": 1.163764482688529, "grad_norm": 10.210199356079102, "learning_rate": 7.75138613183654e-05, "loss": 0.8437, "step": 17176 }, { "epoch": 1.1638322379565011, "grad_norm": 4.762551784515381, "learning_rate": 7.751249229926758e-05, "loss": 0.5048, "step": 17177 }, { "epoch": 1.1638999932244731, "grad_norm": 5.5700883865356445, "learning_rate": 7.751112328016976e-05, "loss": 0.8869, "step": 17178 }, { "epoch": 1.1639677484924453, "grad_norm": 5.91666316986084, "learning_rate": 7.750975426107194e-05, "loss": 0.6451, "step": 17179 }, { "epoch": 1.1640355037604173, "grad_norm": 5.347517013549805, "learning_rate": 7.750838524197412e-05, "loss": 0.6594, "step": 17180 }, { "epoch": 1.1641032590283895, "grad_norm": 5.739136219024658, "learning_rate": 7.750701622287632e-05, "loss": 0.6144, "step": 17181 }, { "epoch": 1.1641710142963615, "grad_norm": 5.545490264892578, "learning_rate": 7.75056472037785e-05, "loss": 0.5842, "step": 17182 }, { "epoch": 1.1642387695643337, "grad_norm": 7.26037073135376, "learning_rate": 7.750427818468068e-05, "loss": 0.7446, "step": 17183 }, { "epoch": 1.1643065248323057, "grad_norm": 8.13321590423584, "learning_rate": 7.750290916558286e-05, "loss": 0.7403, "step": 17184 }, { "epoch": 1.1643742801002779, "grad_norm": 5.851143836975098, "learning_rate": 7.750154014648505e-05, "loss": 0.6878, "step": 17185 }, { "epoch": 1.1644420353682499, "grad_norm": 5.682823181152344, "learning_rate": 7.750017112738723e-05, "loss": 0.7803, "step": 17186 }, { "epoch": 1.164509790636222, "grad_norm": 7.069244861602783, "learning_rate": 7.749880210828941e-05, "loss": 1.0644, "step": 17187 }, { "epoch": 1.164577545904194, "grad_norm": 4.923807144165039, "learning_rate": 7.749743308919159e-05, "loss": 0.5087, "step": 17188 }, { "epoch": 1.164645301172166, "grad_norm": 7.384223937988281, "learning_rate": 7.749606407009377e-05, "loss": 0.6694, "step": 17189 }, { "epoch": 1.1647130564401382, "grad_norm": 4.950394630432129, "learning_rate": 7.749469505099597e-05, "loss": 0.5682, "step": 17190 }, { "epoch": 1.1647808117081102, "grad_norm": 3.849876642227173, "learning_rate": 7.749332603189815e-05, "loss": 0.4433, "step": 17191 }, { "epoch": 1.1648485669760824, "grad_norm": 6.983705520629883, "learning_rate": 7.749195701280033e-05, "loss": 0.9134, "step": 17192 }, { "epoch": 1.1649163222440544, "grad_norm": 5.343315601348877, "learning_rate": 7.74905879937025e-05, "loss": 0.6362, "step": 17193 }, { "epoch": 1.1649840775120266, "grad_norm": 8.281697273254395, "learning_rate": 7.74892189746047e-05, "loss": 0.8319, "step": 17194 }, { "epoch": 1.1650518327799986, "grad_norm": 7.667835712432861, "learning_rate": 7.748784995550688e-05, "loss": 0.6045, "step": 17195 }, { "epoch": 1.1651195880479708, "grad_norm": 7.925049304962158, "learning_rate": 7.748648093640906e-05, "loss": 0.7674, "step": 17196 }, { "epoch": 1.1651873433159428, "grad_norm": 6.60289192199707, "learning_rate": 7.748511191731124e-05, "loss": 0.7478, "step": 17197 }, { "epoch": 1.165255098583915, "grad_norm": 4.811013698577881, "learning_rate": 7.748374289821344e-05, "loss": 0.5568, "step": 17198 }, { "epoch": 1.165322853851887, "grad_norm": 5.416989803314209, "learning_rate": 7.748237387911562e-05, "loss": 0.667, "step": 17199 }, { "epoch": 1.1653906091198591, "grad_norm": 5.876220703125, "learning_rate": 7.74810048600178e-05, "loss": 0.8068, "step": 17200 }, { "epoch": 1.1654583643878311, "grad_norm": 5.452373504638672, "learning_rate": 7.747963584091999e-05, "loss": 0.7206, "step": 17201 }, { "epoch": 1.1655261196558033, "grad_norm": 5.000153064727783, "learning_rate": 7.747826682182217e-05, "loss": 0.8598, "step": 17202 }, { "epoch": 1.1655938749237753, "grad_norm": 4.371356010437012, "learning_rate": 7.747689780272435e-05, "loss": 0.6802, "step": 17203 }, { "epoch": 1.1656616301917473, "grad_norm": 5.595484733581543, "learning_rate": 7.747552878362654e-05, "loss": 0.71, "step": 17204 }, { "epoch": 1.1657293854597195, "grad_norm": 6.505746364593506, "learning_rate": 7.747415976452873e-05, "loss": 0.7463, "step": 17205 }, { "epoch": 1.1657971407276917, "grad_norm": 8.62131404876709, "learning_rate": 7.74727907454309e-05, "loss": 0.5391, "step": 17206 }, { "epoch": 1.1658648959956637, "grad_norm": 4.648898601531982, "learning_rate": 7.747142172633309e-05, "loss": 0.5731, "step": 17207 }, { "epoch": 1.1659326512636357, "grad_norm": 4.276782989501953, "learning_rate": 7.747005270723528e-05, "loss": 0.5733, "step": 17208 }, { "epoch": 1.1660004065316079, "grad_norm": 7.70058012008667, "learning_rate": 7.746868368813746e-05, "loss": 0.655, "step": 17209 }, { "epoch": 1.1660681617995798, "grad_norm": 6.740431785583496, "learning_rate": 7.746731466903964e-05, "loss": 0.782, "step": 17210 }, { "epoch": 1.166135917067552, "grad_norm": 5.23295783996582, "learning_rate": 7.746594564994182e-05, "loss": 0.7198, "step": 17211 }, { "epoch": 1.166203672335524, "grad_norm": 4.6036458015441895, "learning_rate": 7.7464576630844e-05, "loss": 0.7787, "step": 17212 }, { "epoch": 1.1662714276034962, "grad_norm": 4.967904567718506, "learning_rate": 7.74632076117462e-05, "loss": 0.6894, "step": 17213 }, { "epoch": 1.1663391828714682, "grad_norm": 6.81717586517334, "learning_rate": 7.746183859264838e-05, "loss": 0.6447, "step": 17214 }, { "epoch": 1.1664069381394404, "grad_norm": 5.5367112159729, "learning_rate": 7.746046957355056e-05, "loss": 0.6741, "step": 17215 }, { "epoch": 1.1664746934074124, "grad_norm": 6.124912261962891, "learning_rate": 7.745910055445274e-05, "loss": 0.8282, "step": 17216 }, { "epoch": 1.1665424486753846, "grad_norm": 7.484246253967285, "learning_rate": 7.745773153535493e-05, "loss": 0.8897, "step": 17217 }, { "epoch": 1.1666102039433566, "grad_norm": 5.709711074829102, "learning_rate": 7.745636251625711e-05, "loss": 0.7469, "step": 17218 }, { "epoch": 1.1666779592113288, "grad_norm": 5.799917221069336, "learning_rate": 7.745499349715929e-05, "loss": 0.6627, "step": 17219 }, { "epoch": 1.1667457144793008, "grad_norm": 8.688015937805176, "learning_rate": 7.745362447806147e-05, "loss": 0.8772, "step": 17220 }, { "epoch": 1.166813469747273, "grad_norm": 5.448634147644043, "learning_rate": 7.745225545896365e-05, "loss": 0.5945, "step": 17221 }, { "epoch": 1.166881225015245, "grad_norm": 5.151609420776367, "learning_rate": 7.745088643986585e-05, "loss": 0.557, "step": 17222 }, { "epoch": 1.166948980283217, "grad_norm": 7.523448467254639, "learning_rate": 7.744951742076803e-05, "loss": 0.7231, "step": 17223 }, { "epoch": 1.1670167355511891, "grad_norm": 6.95708703994751, "learning_rate": 7.74481484016702e-05, "loss": 0.9136, "step": 17224 }, { "epoch": 1.167084490819161, "grad_norm": 4.518056869506836, "learning_rate": 7.744677938257239e-05, "loss": 0.6328, "step": 17225 }, { "epoch": 1.1671522460871333, "grad_norm": 5.213376522064209, "learning_rate": 7.744541036347457e-05, "loss": 0.5484, "step": 17226 }, { "epoch": 1.1672200013551053, "grad_norm": 6.388365268707275, "learning_rate": 7.744404134437676e-05, "loss": 0.6401, "step": 17227 }, { "epoch": 1.1672877566230775, "grad_norm": 5.914669990539551, "learning_rate": 7.744267232527894e-05, "loss": 0.7474, "step": 17228 }, { "epoch": 1.1673555118910495, "grad_norm": 7.565332889556885, "learning_rate": 7.744130330618112e-05, "loss": 0.637, "step": 17229 }, { "epoch": 1.1674232671590217, "grad_norm": 4.865163326263428, "learning_rate": 7.74399342870833e-05, "loss": 0.6212, "step": 17230 }, { "epoch": 1.1674910224269937, "grad_norm": 4.324069976806641, "learning_rate": 7.74385652679855e-05, "loss": 0.533, "step": 17231 }, { "epoch": 1.1675587776949659, "grad_norm": 5.414638519287109, "learning_rate": 7.743719624888768e-05, "loss": 0.4943, "step": 17232 }, { "epoch": 1.1676265329629378, "grad_norm": 6.219473838806152, "learning_rate": 7.743582722978986e-05, "loss": 0.5288, "step": 17233 }, { "epoch": 1.16769428823091, "grad_norm": 6.105441093444824, "learning_rate": 7.743445821069204e-05, "loss": 0.5303, "step": 17234 }, { "epoch": 1.167762043498882, "grad_norm": 5.521098613739014, "learning_rate": 7.743308919159422e-05, "loss": 0.6609, "step": 17235 }, { "epoch": 1.1678297987668542, "grad_norm": 5.3490424156188965, "learning_rate": 7.743172017249641e-05, "loss": 0.5023, "step": 17236 }, { "epoch": 1.1678975540348262, "grad_norm": 7.1635541915893555, "learning_rate": 7.743035115339859e-05, "loss": 0.8012, "step": 17237 }, { "epoch": 1.1679653093027982, "grad_norm": 6.178504467010498, "learning_rate": 7.742898213430077e-05, "loss": 0.8079, "step": 17238 }, { "epoch": 1.1680330645707704, "grad_norm": 4.957045078277588, "learning_rate": 7.742761311520295e-05, "loss": 0.6104, "step": 17239 }, { "epoch": 1.1681008198387424, "grad_norm": 8.841684341430664, "learning_rate": 7.742624409610515e-05, "loss": 0.551, "step": 17240 }, { "epoch": 1.1681685751067146, "grad_norm": 5.315309524536133, "learning_rate": 7.742487507700733e-05, "loss": 0.6352, "step": 17241 }, { "epoch": 1.1682363303746865, "grad_norm": 7.584867477416992, "learning_rate": 7.74235060579095e-05, "loss": 0.6937, "step": 17242 }, { "epoch": 1.1683040856426588, "grad_norm": 4.14179801940918, "learning_rate": 7.742213703881169e-05, "loss": 0.5933, "step": 17243 }, { "epoch": 1.1683718409106307, "grad_norm": 6.462168216705322, "learning_rate": 7.742076801971388e-05, "loss": 0.6561, "step": 17244 }, { "epoch": 1.168439596178603, "grad_norm": 6.578998565673828, "learning_rate": 7.741939900061606e-05, "loss": 0.7552, "step": 17245 }, { "epoch": 1.168507351446575, "grad_norm": 6.081613540649414, "learning_rate": 7.741802998151824e-05, "loss": 0.5051, "step": 17246 }, { "epoch": 1.1685751067145471, "grad_norm": 7.021066188812256, "learning_rate": 7.741666096242043e-05, "loss": 0.7581, "step": 17247 }, { "epoch": 1.168642861982519, "grad_norm": 6.672050476074219, "learning_rate": 7.741529194332262e-05, "loss": 0.832, "step": 17248 }, { "epoch": 1.1687106172504913, "grad_norm": 6.199688911437988, "learning_rate": 7.74139229242248e-05, "loss": 0.8048, "step": 17249 }, { "epoch": 1.1687783725184633, "grad_norm": 5.789974212646484, "learning_rate": 7.741255390512699e-05, "loss": 0.8967, "step": 17250 }, { "epoch": 1.1688461277864355, "grad_norm": 4.966305732727051, "learning_rate": 7.741118488602917e-05, "loss": 0.778, "step": 17251 }, { "epoch": 1.1689138830544075, "grad_norm": 4.729147434234619, "learning_rate": 7.740981586693135e-05, "loss": 0.4176, "step": 17252 }, { "epoch": 1.1689816383223794, "grad_norm": 5.638984203338623, "learning_rate": 7.740844684783353e-05, "loss": 0.6909, "step": 17253 }, { "epoch": 1.1690493935903516, "grad_norm": 5.719832420349121, "learning_rate": 7.740707782873572e-05, "loss": 0.5901, "step": 17254 }, { "epoch": 1.1691171488583239, "grad_norm": 5.108078956604004, "learning_rate": 7.74057088096379e-05, "loss": 0.5979, "step": 17255 }, { "epoch": 1.1691849041262958, "grad_norm": 5.3545732498168945, "learning_rate": 7.740433979054009e-05, "loss": 0.5158, "step": 17256 }, { "epoch": 1.1692526593942678, "grad_norm": 5.225886344909668, "learning_rate": 7.740297077144227e-05, "loss": 0.7653, "step": 17257 }, { "epoch": 1.16932041466224, "grad_norm": 5.831471920013428, "learning_rate": 7.740160175234445e-05, "loss": 0.7989, "step": 17258 }, { "epoch": 1.169388169930212, "grad_norm": 8.2451810836792, "learning_rate": 7.740023273324664e-05, "loss": 0.6536, "step": 17259 }, { "epoch": 1.1694559251981842, "grad_norm": 6.761453151702881, "learning_rate": 7.739886371414882e-05, "loss": 0.6763, "step": 17260 }, { "epoch": 1.1695236804661562, "grad_norm": 5.2614240646362305, "learning_rate": 7.7397494695051e-05, "loss": 0.7062, "step": 17261 }, { "epoch": 1.1695914357341284, "grad_norm": 6.049597263336182, "learning_rate": 7.739612567595318e-05, "loss": 0.5726, "step": 17262 }, { "epoch": 1.1696591910021004, "grad_norm": 6.359036922454834, "learning_rate": 7.739475665685537e-05, "loss": 0.8202, "step": 17263 }, { "epoch": 1.1697269462700726, "grad_norm": 5.320462703704834, "learning_rate": 7.739338763775755e-05, "loss": 0.5932, "step": 17264 }, { "epoch": 1.1697947015380445, "grad_norm": 6.084722518920898, "learning_rate": 7.739201861865974e-05, "loss": 0.5786, "step": 17265 }, { "epoch": 1.1698624568060167, "grad_norm": 7.363154888153076, "learning_rate": 7.739064959956192e-05, "loss": 0.8631, "step": 17266 }, { "epoch": 1.1699302120739887, "grad_norm": 7.075015544891357, "learning_rate": 7.73892805804641e-05, "loss": 0.8241, "step": 17267 }, { "epoch": 1.169997967341961, "grad_norm": 5.505892276763916, "learning_rate": 7.738791156136629e-05, "loss": 0.6984, "step": 17268 }, { "epoch": 1.170065722609933, "grad_norm": 6.062048435211182, "learning_rate": 7.738654254226847e-05, "loss": 0.5967, "step": 17269 }, { "epoch": 1.1701334778779051, "grad_norm": 5.961355686187744, "learning_rate": 7.738517352317065e-05, "loss": 0.8252, "step": 17270 }, { "epoch": 1.170201233145877, "grad_norm": 5.551196098327637, "learning_rate": 7.738380450407283e-05, "loss": 0.7744, "step": 17271 }, { "epoch": 1.170268988413849, "grad_norm": 5.391801357269287, "learning_rate": 7.738243548497502e-05, "loss": 0.5997, "step": 17272 }, { "epoch": 1.1703367436818213, "grad_norm": 6.983804702758789, "learning_rate": 7.73810664658772e-05, "loss": 0.5914, "step": 17273 }, { "epoch": 1.1704044989497933, "grad_norm": 5.468769550323486, "learning_rate": 7.737969744677939e-05, "loss": 0.7202, "step": 17274 }, { "epoch": 1.1704722542177655, "grad_norm": 5.046876907348633, "learning_rate": 7.737832842768157e-05, "loss": 0.5294, "step": 17275 }, { "epoch": 1.1705400094857374, "grad_norm": 6.133605480194092, "learning_rate": 7.737695940858375e-05, "loss": 0.7646, "step": 17276 }, { "epoch": 1.1706077647537096, "grad_norm": 8.28398323059082, "learning_rate": 7.737559038948594e-05, "loss": 0.7566, "step": 17277 }, { "epoch": 1.1706755200216816, "grad_norm": 6.076024532318115, "learning_rate": 7.737422137038812e-05, "loss": 0.6782, "step": 17278 }, { "epoch": 1.1707432752896538, "grad_norm": 6.060565948486328, "learning_rate": 7.73728523512903e-05, "loss": 0.7511, "step": 17279 }, { "epoch": 1.1708110305576258, "grad_norm": 7.766849517822266, "learning_rate": 7.737148333219248e-05, "loss": 0.6535, "step": 17280 }, { "epoch": 1.170878785825598, "grad_norm": 5.996023654937744, "learning_rate": 7.737011431309466e-05, "loss": 0.7026, "step": 17281 }, { "epoch": 1.17094654109357, "grad_norm": 6.4893646240234375, "learning_rate": 7.736874529399686e-05, "loss": 0.8587, "step": 17282 }, { "epoch": 1.1710142963615422, "grad_norm": 5.484610557556152, "learning_rate": 7.736737627489904e-05, "loss": 0.5746, "step": 17283 }, { "epoch": 1.1710820516295142, "grad_norm": 7.2935309410095215, "learning_rate": 7.736600725580122e-05, "loss": 0.7526, "step": 17284 }, { "epoch": 1.1711498068974864, "grad_norm": 6.469519138336182, "learning_rate": 7.73646382367034e-05, "loss": 0.6975, "step": 17285 }, { "epoch": 1.1712175621654584, "grad_norm": 6.044117450714111, "learning_rate": 7.736326921760559e-05, "loss": 0.694, "step": 17286 }, { "epoch": 1.1712853174334303, "grad_norm": 5.34628963470459, "learning_rate": 7.736190019850777e-05, "loss": 0.7829, "step": 17287 }, { "epoch": 1.1713530727014025, "grad_norm": 6.716300010681152, "learning_rate": 7.736053117940995e-05, "loss": 0.6242, "step": 17288 }, { "epoch": 1.1714208279693745, "grad_norm": 6.748067378997803, "learning_rate": 7.735916216031213e-05, "loss": 0.7363, "step": 17289 }, { "epoch": 1.1714885832373467, "grad_norm": 5.220841407775879, "learning_rate": 7.735779314121433e-05, "loss": 0.5735, "step": 17290 }, { "epoch": 1.1715563385053187, "grad_norm": 5.8814496994018555, "learning_rate": 7.73564241221165e-05, "loss": 0.7137, "step": 17291 }, { "epoch": 1.171624093773291, "grad_norm": 7.384030342102051, "learning_rate": 7.735505510301869e-05, "loss": 0.6728, "step": 17292 }, { "epoch": 1.171691849041263, "grad_norm": 4.854050636291504, "learning_rate": 7.735368608392088e-05, "loss": 0.8363, "step": 17293 }, { "epoch": 1.171759604309235, "grad_norm": 6.271542072296143, "learning_rate": 7.735231706482306e-05, "loss": 0.6886, "step": 17294 }, { "epoch": 1.171827359577207, "grad_norm": 8.313980102539062, "learning_rate": 7.735094804572524e-05, "loss": 0.6262, "step": 17295 }, { "epoch": 1.1718951148451793, "grad_norm": 5.353496551513672, "learning_rate": 7.734957902662743e-05, "loss": 0.7738, "step": 17296 }, { "epoch": 1.1719628701131513, "grad_norm": 5.875715732574463, "learning_rate": 7.734821000752961e-05, "loss": 0.6644, "step": 17297 }, { "epoch": 1.1720306253811235, "grad_norm": 5.350170135498047, "learning_rate": 7.73468409884318e-05, "loss": 0.5832, "step": 17298 }, { "epoch": 1.1720983806490954, "grad_norm": 4.67872953414917, "learning_rate": 7.734547196933398e-05, "loss": 0.6281, "step": 17299 }, { "epoch": 1.1721661359170676, "grad_norm": 6.967146396636963, "learning_rate": 7.734410295023617e-05, "loss": 0.5029, "step": 17300 }, { "epoch": 1.1722338911850396, "grad_norm": 5.5732221603393555, "learning_rate": 7.734273393113835e-05, "loss": 0.7479, "step": 17301 }, { "epoch": 1.1723016464530116, "grad_norm": 5.325867176055908, "learning_rate": 7.734136491204053e-05, "loss": 0.5204, "step": 17302 }, { "epoch": 1.1723694017209838, "grad_norm": 6.015812873840332, "learning_rate": 7.733999589294271e-05, "loss": 0.7403, "step": 17303 }, { "epoch": 1.172437156988956, "grad_norm": 7.1903581619262695, "learning_rate": 7.73386268738449e-05, "loss": 0.6715, "step": 17304 }, { "epoch": 1.172504912256928, "grad_norm": 5.407469272613525, "learning_rate": 7.733725785474708e-05, "loss": 0.8218, "step": 17305 }, { "epoch": 1.1725726675249, "grad_norm": 5.6599249839782715, "learning_rate": 7.733588883564926e-05, "loss": 0.7434, "step": 17306 }, { "epoch": 1.1726404227928722, "grad_norm": 5.750308036804199, "learning_rate": 7.733451981655145e-05, "loss": 0.8658, "step": 17307 }, { "epoch": 1.1727081780608442, "grad_norm": 5.779434680938721, "learning_rate": 7.733315079745363e-05, "loss": 0.5016, "step": 17308 }, { "epoch": 1.1727759333288164, "grad_norm": 4.432728290557861, "learning_rate": 7.733178177835582e-05, "loss": 0.5855, "step": 17309 }, { "epoch": 1.1728436885967883, "grad_norm": 5.154073715209961, "learning_rate": 7.7330412759258e-05, "loss": 0.6117, "step": 17310 }, { "epoch": 1.1729114438647605, "grad_norm": 5.967854022979736, "learning_rate": 7.732904374016018e-05, "loss": 0.8827, "step": 17311 }, { "epoch": 1.1729791991327325, "grad_norm": 7.544610500335693, "learning_rate": 7.732767472106236e-05, "loss": 0.8073, "step": 17312 }, { "epoch": 1.1730469544007047, "grad_norm": 6.066003322601318, "learning_rate": 7.732630570196454e-05, "loss": 0.9099, "step": 17313 }, { "epoch": 1.1731147096686767, "grad_norm": 6.47094202041626, "learning_rate": 7.732493668286673e-05, "loss": 0.7394, "step": 17314 }, { "epoch": 1.173182464936649, "grad_norm": 6.1977925300598145, "learning_rate": 7.732356766376891e-05, "loss": 0.5849, "step": 17315 }, { "epoch": 1.1732502202046209, "grad_norm": 6.772620677947998, "learning_rate": 7.73221986446711e-05, "loss": 0.8209, "step": 17316 }, { "epoch": 1.173317975472593, "grad_norm": 5.0960693359375, "learning_rate": 7.732082962557328e-05, "loss": 0.6163, "step": 17317 }, { "epoch": 1.173385730740565, "grad_norm": 8.258513450622559, "learning_rate": 7.731946060647547e-05, "loss": 0.7963, "step": 17318 }, { "epoch": 1.1734534860085373, "grad_norm": 5.403371810913086, "learning_rate": 7.731809158737765e-05, "loss": 0.5509, "step": 17319 }, { "epoch": 1.1735212412765093, "grad_norm": 4.698668956756592, "learning_rate": 7.731672256827983e-05, "loss": 0.712, "step": 17320 }, { "epoch": 1.1735889965444812, "grad_norm": 5.766746997833252, "learning_rate": 7.731535354918201e-05, "loss": 0.7452, "step": 17321 }, { "epoch": 1.1736567518124534, "grad_norm": 5.701009750366211, "learning_rate": 7.731398453008419e-05, "loss": 1.0187, "step": 17322 }, { "epoch": 1.1737245070804254, "grad_norm": 4.641021251678467, "learning_rate": 7.731261551098638e-05, "loss": 0.5497, "step": 17323 }, { "epoch": 1.1737922623483976, "grad_norm": 6.468865871429443, "learning_rate": 7.731124649188857e-05, "loss": 0.9236, "step": 17324 }, { "epoch": 1.1738600176163696, "grad_norm": 4.343420505523682, "learning_rate": 7.730987747279075e-05, "loss": 0.4876, "step": 17325 }, { "epoch": 1.1739277728843418, "grad_norm": 6.445211887359619, "learning_rate": 7.730850845369293e-05, "loss": 0.7793, "step": 17326 }, { "epoch": 1.1739955281523138, "grad_norm": 5.686356544494629, "learning_rate": 7.730713943459512e-05, "loss": 0.6946, "step": 17327 }, { "epoch": 1.174063283420286, "grad_norm": 7.529437065124512, "learning_rate": 7.73057704154973e-05, "loss": 0.7395, "step": 17328 }, { "epoch": 1.174131038688258, "grad_norm": 8.172284126281738, "learning_rate": 7.730440139639948e-05, "loss": 0.6705, "step": 17329 }, { "epoch": 1.1741987939562302, "grad_norm": 8.046764373779297, "learning_rate": 7.730303237730166e-05, "loss": 0.8011, "step": 17330 }, { "epoch": 1.1742665492242021, "grad_norm": 4.910728454589844, "learning_rate": 7.730166335820384e-05, "loss": 0.7334, "step": 17331 }, { "epoch": 1.1743343044921744, "grad_norm": 6.085247993469238, "learning_rate": 7.730029433910603e-05, "loss": 0.5327, "step": 17332 }, { "epoch": 1.1744020597601463, "grad_norm": 4.2402520179748535, "learning_rate": 7.729892532000822e-05, "loss": 0.7252, "step": 17333 }, { "epoch": 1.1744698150281185, "grad_norm": 7.234801769256592, "learning_rate": 7.72975563009104e-05, "loss": 0.9923, "step": 17334 }, { "epoch": 1.1745375702960905, "grad_norm": 7.858911991119385, "learning_rate": 7.729618728181258e-05, "loss": 0.6549, "step": 17335 }, { "epoch": 1.1746053255640625, "grad_norm": 4.676433563232422, "learning_rate": 7.729481826271477e-05, "loss": 0.7768, "step": 17336 }, { "epoch": 1.1746730808320347, "grad_norm": 4.907873630523682, "learning_rate": 7.729344924361695e-05, "loss": 0.6704, "step": 17337 }, { "epoch": 1.1747408361000067, "grad_norm": 5.7177839279174805, "learning_rate": 7.729208022451913e-05, "loss": 0.713, "step": 17338 }, { "epoch": 1.1748085913679789, "grad_norm": 5.389528751373291, "learning_rate": 7.729071120542132e-05, "loss": 0.6746, "step": 17339 }, { "epoch": 1.1748763466359509, "grad_norm": 7.09976863861084, "learning_rate": 7.72893421863235e-05, "loss": 0.6885, "step": 17340 }, { "epoch": 1.174944101903923, "grad_norm": 7.006227493286133, "learning_rate": 7.728797316722569e-05, "loss": 0.9149, "step": 17341 }, { "epoch": 1.175011857171895, "grad_norm": 6.186046600341797, "learning_rate": 7.728660414812788e-05, "loss": 0.7265, "step": 17342 }, { "epoch": 1.1750796124398672, "grad_norm": 6.1935834884643555, "learning_rate": 7.728523512903006e-05, "loss": 0.8993, "step": 17343 }, { "epoch": 1.1751473677078392, "grad_norm": 5.259952545166016, "learning_rate": 7.728386610993224e-05, "loss": 0.7487, "step": 17344 }, { "epoch": 1.1752151229758114, "grad_norm": 7.327434539794922, "learning_rate": 7.728249709083442e-05, "loss": 1.0751, "step": 17345 }, { "epoch": 1.1752828782437834, "grad_norm": 5.855953216552734, "learning_rate": 7.728112807173661e-05, "loss": 0.6628, "step": 17346 }, { "epoch": 1.1753506335117556, "grad_norm": 5.843200206756592, "learning_rate": 7.72797590526388e-05, "loss": 0.8061, "step": 17347 }, { "epoch": 1.1754183887797276, "grad_norm": 5.277237892150879, "learning_rate": 7.727839003354097e-05, "loss": 0.7312, "step": 17348 }, { "epoch": 1.1754861440476998, "grad_norm": 5.734367847442627, "learning_rate": 7.727702101444315e-05, "loss": 0.6302, "step": 17349 }, { "epoch": 1.1755538993156718, "grad_norm": 4.641081809997559, "learning_rate": 7.727565199534535e-05, "loss": 0.5845, "step": 17350 }, { "epoch": 1.1756216545836438, "grad_norm": 5.636540412902832, "learning_rate": 7.727428297624753e-05, "loss": 0.7319, "step": 17351 }, { "epoch": 1.175689409851616, "grad_norm": 5.056623458862305, "learning_rate": 7.727291395714971e-05, "loss": 0.6003, "step": 17352 }, { "epoch": 1.1757571651195882, "grad_norm": 8.262724876403809, "learning_rate": 7.727154493805189e-05, "loss": 0.7032, "step": 17353 }, { "epoch": 1.1758249203875601, "grad_norm": 5.76828145980835, "learning_rate": 7.727017591895407e-05, "loss": 0.6717, "step": 17354 }, { "epoch": 1.1758926756555321, "grad_norm": 4.917673587799072, "learning_rate": 7.726880689985626e-05, "loss": 0.8381, "step": 17355 }, { "epoch": 1.1759604309235043, "grad_norm": 5.0473127365112305, "learning_rate": 7.726743788075844e-05, "loss": 0.7817, "step": 17356 }, { "epoch": 1.1760281861914763, "grad_norm": 6.269956588745117, "learning_rate": 7.726606886166062e-05, "loss": 0.7135, "step": 17357 }, { "epoch": 1.1760959414594485, "grad_norm": 5.429281711578369, "learning_rate": 7.72646998425628e-05, "loss": 0.5025, "step": 17358 }, { "epoch": 1.1761636967274205, "grad_norm": 6.20123815536499, "learning_rate": 7.726333082346499e-05, "loss": 0.618, "step": 17359 }, { "epoch": 1.1762314519953927, "grad_norm": 7.61653995513916, "learning_rate": 7.726196180436718e-05, "loss": 0.6468, "step": 17360 }, { "epoch": 1.1762992072633647, "grad_norm": 6.944809913635254, "learning_rate": 7.726059278526936e-05, "loss": 0.6998, "step": 17361 }, { "epoch": 1.1763669625313369, "grad_norm": 5.237542629241943, "learning_rate": 7.725922376617154e-05, "loss": 0.6578, "step": 17362 }, { "epoch": 1.1764347177993089, "grad_norm": 4.988475799560547, "learning_rate": 7.725785474707372e-05, "loss": 0.5578, "step": 17363 }, { "epoch": 1.176502473067281, "grad_norm": 6.360912322998047, "learning_rate": 7.725648572797591e-05, "loss": 0.6835, "step": 17364 }, { "epoch": 1.176570228335253, "grad_norm": 5.701276779174805, "learning_rate": 7.72551167088781e-05, "loss": 0.6863, "step": 17365 }, { "epoch": 1.1766379836032252, "grad_norm": 5.497084140777588, "learning_rate": 7.725374768978027e-05, "loss": 0.6515, "step": 17366 }, { "epoch": 1.1767057388711972, "grad_norm": 5.4620771408081055, "learning_rate": 7.725237867068246e-05, "loss": 0.6462, "step": 17367 }, { "epoch": 1.1767734941391694, "grad_norm": 5.820620059967041, "learning_rate": 7.725100965158464e-05, "loss": 0.5846, "step": 17368 }, { "epoch": 1.1768412494071414, "grad_norm": 5.675363540649414, "learning_rate": 7.724964063248683e-05, "loss": 0.8976, "step": 17369 }, { "epoch": 1.1769090046751134, "grad_norm": 6.178694248199463, "learning_rate": 7.724827161338901e-05, "loss": 0.7486, "step": 17370 }, { "epoch": 1.1769767599430856, "grad_norm": 7.601099014282227, "learning_rate": 7.724690259429119e-05, "loss": 0.6428, "step": 17371 }, { "epoch": 1.1770445152110576, "grad_norm": 5.221908092498779, "learning_rate": 7.724553357519337e-05, "loss": 0.5738, "step": 17372 }, { "epoch": 1.1771122704790298, "grad_norm": 8.009245872497559, "learning_rate": 7.724416455609556e-05, "loss": 0.8131, "step": 17373 }, { "epoch": 1.1771800257470018, "grad_norm": 4.87775182723999, "learning_rate": 7.724279553699774e-05, "loss": 0.8272, "step": 17374 }, { "epoch": 1.177247781014974, "grad_norm": 6.45685338973999, "learning_rate": 7.724142651789993e-05, "loss": 0.7668, "step": 17375 }, { "epoch": 1.177315536282946, "grad_norm": 6.50517463684082, "learning_rate": 7.72400574988021e-05, "loss": 1.0197, "step": 17376 }, { "epoch": 1.1773832915509181, "grad_norm": 8.832415580749512, "learning_rate": 7.723868847970429e-05, "loss": 0.5662, "step": 17377 }, { "epoch": 1.1774510468188901, "grad_norm": 4.831343650817871, "learning_rate": 7.723731946060648e-05, "loss": 0.5595, "step": 17378 }, { "epoch": 1.1775188020868623, "grad_norm": 5.61880350112915, "learning_rate": 7.723595044150866e-05, "loss": 0.7912, "step": 17379 }, { "epoch": 1.1775865573548343, "grad_norm": 6.002482891082764, "learning_rate": 7.723458142241084e-05, "loss": 0.6987, "step": 17380 }, { "epoch": 1.1776543126228065, "grad_norm": 8.140533447265625, "learning_rate": 7.723321240331302e-05, "loss": 0.863, "step": 17381 }, { "epoch": 1.1777220678907785, "grad_norm": 6.138432502746582, "learning_rate": 7.723184338421521e-05, "loss": 0.8564, "step": 17382 }, { "epoch": 1.1777898231587507, "grad_norm": 5.278548717498779, "learning_rate": 7.72304743651174e-05, "loss": 0.6709, "step": 17383 }, { "epoch": 1.1778575784267227, "grad_norm": 5.611672401428223, "learning_rate": 7.722910534601958e-05, "loss": 0.8298, "step": 17384 }, { "epoch": 1.1779253336946947, "grad_norm": 5.170347690582275, "learning_rate": 7.722773632692177e-05, "loss": 1.0291, "step": 17385 }, { "epoch": 1.1779930889626669, "grad_norm": 6.799687385559082, "learning_rate": 7.722636730782395e-05, "loss": 0.6598, "step": 17386 }, { "epoch": 1.1780608442306388, "grad_norm": 8.000127792358398, "learning_rate": 7.722499828872613e-05, "loss": 0.6322, "step": 17387 }, { "epoch": 1.178128599498611, "grad_norm": 6.476943492889404, "learning_rate": 7.722362926962832e-05, "loss": 0.6098, "step": 17388 }, { "epoch": 1.178196354766583, "grad_norm": 8.889633178710938, "learning_rate": 7.72222602505305e-05, "loss": 0.9242, "step": 17389 }, { "epoch": 1.1782641100345552, "grad_norm": 5.422774791717529, "learning_rate": 7.722089123143268e-05, "loss": 0.6973, "step": 17390 }, { "epoch": 1.1783318653025272, "grad_norm": 5.284212112426758, "learning_rate": 7.721952221233486e-05, "loss": 0.5893, "step": 17391 }, { "epoch": 1.1783996205704994, "grad_norm": 5.717512607574463, "learning_rate": 7.721815319323706e-05, "loss": 0.6507, "step": 17392 }, { "epoch": 1.1784673758384714, "grad_norm": 5.993809700012207, "learning_rate": 7.721678417413924e-05, "loss": 0.7245, "step": 17393 }, { "epoch": 1.1785351311064436, "grad_norm": 5.827890872955322, "learning_rate": 7.721541515504142e-05, "loss": 0.8094, "step": 17394 }, { "epoch": 1.1786028863744156, "grad_norm": 5.711609363555908, "learning_rate": 7.72140461359436e-05, "loss": 0.6623, "step": 17395 }, { "epoch": 1.1786706416423878, "grad_norm": 4.487983226776123, "learning_rate": 7.72126771168458e-05, "loss": 0.6765, "step": 17396 }, { "epoch": 1.1787383969103598, "grad_norm": 6.461648464202881, "learning_rate": 7.721130809774797e-05, "loss": 0.7344, "step": 17397 }, { "epoch": 1.178806152178332, "grad_norm": 5.570317268371582, "learning_rate": 7.720993907865015e-05, "loss": 0.674, "step": 17398 }, { "epoch": 1.178873907446304, "grad_norm": 6.213165760040283, "learning_rate": 7.720857005955233e-05, "loss": 0.6197, "step": 17399 }, { "epoch": 1.178941662714276, "grad_norm": 6.4340500831604, "learning_rate": 7.720720104045451e-05, "loss": 0.7551, "step": 17400 }, { "epoch": 1.1790094179822481, "grad_norm": 9.295565605163574, "learning_rate": 7.720583202135671e-05, "loss": 0.8534, "step": 17401 }, { "epoch": 1.1790771732502203, "grad_norm": 5.942518711090088, "learning_rate": 7.720446300225889e-05, "loss": 0.5783, "step": 17402 }, { "epoch": 1.1791449285181923, "grad_norm": 6.883828163146973, "learning_rate": 7.720309398316107e-05, "loss": 0.8772, "step": 17403 }, { "epoch": 1.1792126837861643, "grad_norm": 5.310081958770752, "learning_rate": 7.720172496406325e-05, "loss": 0.685, "step": 17404 }, { "epoch": 1.1792804390541365, "grad_norm": 5.378293037414551, "learning_rate": 7.720035594496544e-05, "loss": 0.7761, "step": 17405 }, { "epoch": 1.1793481943221085, "grad_norm": 5.762744426727295, "learning_rate": 7.719898692586762e-05, "loss": 0.7877, "step": 17406 }, { "epoch": 1.1794159495900807, "grad_norm": 6.828834533691406, "learning_rate": 7.71976179067698e-05, "loss": 0.7059, "step": 17407 }, { "epoch": 1.1794837048580527, "grad_norm": 5.380794525146484, "learning_rate": 7.719624888767198e-05, "loss": 0.8572, "step": 17408 }, { "epoch": 1.1795514601260249, "grad_norm": 5.599371910095215, "learning_rate": 7.719487986857417e-05, "loss": 0.6622, "step": 17409 }, { "epoch": 1.1796192153939968, "grad_norm": 6.256725311279297, "learning_rate": 7.719351084947636e-05, "loss": 0.6448, "step": 17410 }, { "epoch": 1.179686970661969, "grad_norm": 5.341912746429443, "learning_rate": 7.719214183037854e-05, "loss": 0.5639, "step": 17411 }, { "epoch": 1.179754725929941, "grad_norm": 10.262722969055176, "learning_rate": 7.719077281128072e-05, "loss": 0.6745, "step": 17412 }, { "epoch": 1.1798224811979132, "grad_norm": 6.846330642700195, "learning_rate": 7.71894037921829e-05, "loss": 0.6262, "step": 17413 }, { "epoch": 1.1798902364658852, "grad_norm": 5.529928684234619, "learning_rate": 7.718803477308508e-05, "loss": 0.8384, "step": 17414 }, { "epoch": 1.1799579917338574, "grad_norm": 5.655308723449707, "learning_rate": 7.718666575398727e-05, "loss": 0.701, "step": 17415 }, { "epoch": 1.1800257470018294, "grad_norm": 5.416240692138672, "learning_rate": 7.718529673488945e-05, "loss": 0.5655, "step": 17416 }, { "epoch": 1.1800935022698016, "grad_norm": 6.7942705154418945, "learning_rate": 7.718392771579163e-05, "loss": 0.7546, "step": 17417 }, { "epoch": 1.1801612575377736, "grad_norm": 6.258289813995361, "learning_rate": 7.718255869669382e-05, "loss": 0.8086, "step": 17418 }, { "epoch": 1.1802290128057455, "grad_norm": 7.271621227264404, "learning_rate": 7.718118967759601e-05, "loss": 0.7594, "step": 17419 }, { "epoch": 1.1802967680737178, "grad_norm": 7.857482433319092, "learning_rate": 7.717982065849819e-05, "loss": 0.7947, "step": 17420 }, { "epoch": 1.1803645233416897, "grad_norm": 5.672320365905762, "learning_rate": 7.717845163940037e-05, "loss": 0.9218, "step": 17421 }, { "epoch": 1.180432278609662, "grad_norm": 7.510291576385498, "learning_rate": 7.717708262030255e-05, "loss": 0.8078, "step": 17422 }, { "epoch": 1.180500033877634, "grad_norm": 7.348386287689209, "learning_rate": 7.717571360120473e-05, "loss": 1.0443, "step": 17423 }, { "epoch": 1.1805677891456061, "grad_norm": 5.457333087921143, "learning_rate": 7.717434458210692e-05, "loss": 0.7723, "step": 17424 }, { "epoch": 1.180635544413578, "grad_norm": 6.572354793548584, "learning_rate": 7.71729755630091e-05, "loss": 0.5917, "step": 17425 }, { "epoch": 1.1807032996815503, "grad_norm": 5.724460601806641, "learning_rate": 7.717160654391129e-05, "loss": 0.7562, "step": 17426 }, { "epoch": 1.1807710549495223, "grad_norm": 6.148141860961914, "learning_rate": 7.717023752481347e-05, "loss": 0.8323, "step": 17427 }, { "epoch": 1.1808388102174945, "grad_norm": 5.528962135314941, "learning_rate": 7.716886850571566e-05, "loss": 0.6606, "step": 17428 }, { "epoch": 1.1809065654854665, "grad_norm": 7.883500099182129, "learning_rate": 7.716749948661784e-05, "loss": 0.8106, "step": 17429 }, { "epoch": 1.1809743207534387, "grad_norm": 4.686129093170166, "learning_rate": 7.716613046752002e-05, "loss": 0.6902, "step": 17430 }, { "epoch": 1.1810420760214106, "grad_norm": 6.630269527435303, "learning_rate": 7.71647614484222e-05, "loss": 0.6001, "step": 17431 }, { "epoch": 1.1811098312893828, "grad_norm": 6.239437580108643, "learning_rate": 7.71633924293244e-05, "loss": 0.5679, "step": 17432 }, { "epoch": 1.1811775865573548, "grad_norm": 5.275092124938965, "learning_rate": 7.716202341022657e-05, "loss": 0.6574, "step": 17433 }, { "epoch": 1.1812453418253268, "grad_norm": 4.501307487487793, "learning_rate": 7.716065439112875e-05, "loss": 0.6631, "step": 17434 }, { "epoch": 1.181313097093299, "grad_norm": 4.495173931121826, "learning_rate": 7.715928537203095e-05, "loss": 0.5067, "step": 17435 }, { "epoch": 1.181380852361271, "grad_norm": 5.3275275230407715, "learning_rate": 7.715791635293313e-05, "loss": 0.5386, "step": 17436 }, { "epoch": 1.1814486076292432, "grad_norm": 9.05092716217041, "learning_rate": 7.715654733383531e-05, "loss": 0.7015, "step": 17437 }, { "epoch": 1.1815163628972152, "grad_norm": 6.003642559051514, "learning_rate": 7.71551783147375e-05, "loss": 0.7313, "step": 17438 }, { "epoch": 1.1815841181651874, "grad_norm": 4.795654296875, "learning_rate": 7.715380929563968e-05, "loss": 0.6542, "step": 17439 }, { "epoch": 1.1816518734331594, "grad_norm": 7.203171730041504, "learning_rate": 7.715244027654186e-05, "loss": 0.7249, "step": 17440 }, { "epoch": 1.1817196287011316, "grad_norm": 6.126667022705078, "learning_rate": 7.715107125744404e-05, "loss": 0.7768, "step": 17441 }, { "epoch": 1.1817873839691035, "grad_norm": 8.417376518249512, "learning_rate": 7.714970223834624e-05, "loss": 0.5883, "step": 17442 }, { "epoch": 1.1818551392370757, "grad_norm": 10.3145112991333, "learning_rate": 7.714833321924842e-05, "loss": 0.7328, "step": 17443 }, { "epoch": 1.1819228945050477, "grad_norm": 6.670629024505615, "learning_rate": 7.71469642001506e-05, "loss": 0.8297, "step": 17444 }, { "epoch": 1.18199064977302, "grad_norm": 6.059311389923096, "learning_rate": 7.714559518105278e-05, "loss": 0.7134, "step": 17445 }, { "epoch": 1.182058405040992, "grad_norm": 5.818484306335449, "learning_rate": 7.714422616195496e-05, "loss": 0.5291, "step": 17446 }, { "epoch": 1.1821261603089641, "grad_norm": 5.98686408996582, "learning_rate": 7.714285714285715e-05, "loss": 0.5893, "step": 17447 }, { "epoch": 1.182193915576936, "grad_norm": 5.623051643371582, "learning_rate": 7.714148812375933e-05, "loss": 0.6766, "step": 17448 }, { "epoch": 1.182261670844908, "grad_norm": 6.6503586769104, "learning_rate": 7.714011910466151e-05, "loss": 0.551, "step": 17449 }, { "epoch": 1.1823294261128803, "grad_norm": 8.11122989654541, "learning_rate": 7.71387500855637e-05, "loss": 0.6561, "step": 17450 }, { "epoch": 1.1823971813808525, "grad_norm": 6.778097152709961, "learning_rate": 7.713738106646589e-05, "loss": 0.8429, "step": 17451 }, { "epoch": 1.1824649366488245, "grad_norm": 4.3668107986450195, "learning_rate": 7.713601204736807e-05, "loss": 0.6024, "step": 17452 }, { "epoch": 1.1825326919167964, "grad_norm": 10.554072380065918, "learning_rate": 7.713464302827025e-05, "loss": 0.5214, "step": 17453 }, { "epoch": 1.1826004471847686, "grad_norm": 6.878169059753418, "learning_rate": 7.713327400917243e-05, "loss": 0.727, "step": 17454 }, { "epoch": 1.1826682024527406, "grad_norm": 5.530303001403809, "learning_rate": 7.713190499007461e-05, "loss": 0.6007, "step": 17455 }, { "epoch": 1.1827359577207128, "grad_norm": 6.1936354637146, "learning_rate": 7.71305359709768e-05, "loss": 0.6258, "step": 17456 }, { "epoch": 1.1828037129886848, "grad_norm": 8.023333549499512, "learning_rate": 7.712916695187898e-05, "loss": 0.6559, "step": 17457 }, { "epoch": 1.182871468256657, "grad_norm": 5.9769110679626465, "learning_rate": 7.712779793278116e-05, "loss": 0.6817, "step": 17458 }, { "epoch": 1.182939223524629, "grad_norm": 6.784766674041748, "learning_rate": 7.712642891368334e-05, "loss": 0.6251, "step": 17459 }, { "epoch": 1.1830069787926012, "grad_norm": 5.5998382568359375, "learning_rate": 7.712505989458554e-05, "loss": 0.7717, "step": 17460 }, { "epoch": 1.1830747340605732, "grad_norm": 5.586696147918701, "learning_rate": 7.712369087548772e-05, "loss": 0.6819, "step": 17461 }, { "epoch": 1.1831424893285454, "grad_norm": 5.863603591918945, "learning_rate": 7.71223218563899e-05, "loss": 0.6594, "step": 17462 }, { "epoch": 1.1832102445965174, "grad_norm": 9.297795295715332, "learning_rate": 7.712095283729208e-05, "loss": 0.7023, "step": 17463 }, { "epoch": 1.1832779998644896, "grad_norm": 4.512932300567627, "learning_rate": 7.711958381819426e-05, "loss": 0.5351, "step": 17464 }, { "epoch": 1.1833457551324615, "grad_norm": 5.763913631439209, "learning_rate": 7.711821479909645e-05, "loss": 0.5338, "step": 17465 }, { "epoch": 1.1834135104004337, "grad_norm": 6.064705848693848, "learning_rate": 7.711684577999863e-05, "loss": 0.6775, "step": 17466 }, { "epoch": 1.1834812656684057, "grad_norm": 7.101622581481934, "learning_rate": 7.711547676090081e-05, "loss": 0.8101, "step": 17467 }, { "epoch": 1.1835490209363777, "grad_norm": 5.599690914154053, "learning_rate": 7.7114107741803e-05, "loss": 0.7406, "step": 17468 }, { "epoch": 1.18361677620435, "grad_norm": 6.1384124755859375, "learning_rate": 7.711273872270518e-05, "loss": 0.8194, "step": 17469 }, { "epoch": 1.1836845314723219, "grad_norm": 4.1809611320495605, "learning_rate": 7.711136970360737e-05, "loss": 0.672, "step": 17470 }, { "epoch": 1.183752286740294, "grad_norm": 6.009151935577393, "learning_rate": 7.711000068450955e-05, "loss": 0.9584, "step": 17471 }, { "epoch": 1.183820042008266, "grad_norm": 5.323096752166748, "learning_rate": 7.710863166541173e-05, "loss": 0.51, "step": 17472 }, { "epoch": 1.1838877972762383, "grad_norm": 5.447154998779297, "learning_rate": 7.710726264631391e-05, "loss": 0.5468, "step": 17473 }, { "epoch": 1.1839555525442103, "grad_norm": 8.218817710876465, "learning_rate": 7.71058936272161e-05, "loss": 0.7344, "step": 17474 }, { "epoch": 1.1840233078121825, "grad_norm": 5.56463098526001, "learning_rate": 7.710452460811828e-05, "loss": 0.5611, "step": 17475 }, { "epoch": 1.1840910630801544, "grad_norm": 6.369572639465332, "learning_rate": 7.710315558902046e-05, "loss": 0.9165, "step": 17476 }, { "epoch": 1.1841588183481266, "grad_norm": 7.159587860107422, "learning_rate": 7.710178656992265e-05, "loss": 0.8848, "step": 17477 }, { "epoch": 1.1842265736160986, "grad_norm": 5.673058986663818, "learning_rate": 7.710041755082484e-05, "loss": 0.7024, "step": 17478 }, { "epoch": 1.1842943288840708, "grad_norm": 4.88538122177124, "learning_rate": 7.709904853172702e-05, "loss": 0.7115, "step": 17479 }, { "epoch": 1.1843620841520428, "grad_norm": 4.682442665100098, "learning_rate": 7.70976795126292e-05, "loss": 0.5612, "step": 17480 }, { "epoch": 1.184429839420015, "grad_norm": 5.996410369873047, "learning_rate": 7.70963104935314e-05, "loss": 0.6594, "step": 17481 }, { "epoch": 1.184497594687987, "grad_norm": 7.457950115203857, "learning_rate": 7.709494147443357e-05, "loss": 0.7196, "step": 17482 }, { "epoch": 1.184565349955959, "grad_norm": 9.7376127243042, "learning_rate": 7.709357245533575e-05, "loss": 0.5854, "step": 17483 }, { "epoch": 1.1846331052239312, "grad_norm": 7.527082920074463, "learning_rate": 7.709220343623795e-05, "loss": 0.6919, "step": 17484 }, { "epoch": 1.1847008604919032, "grad_norm": 6.091211795806885, "learning_rate": 7.709083441714013e-05, "loss": 1.0379, "step": 17485 }, { "epoch": 1.1847686157598754, "grad_norm": 5.3425750732421875, "learning_rate": 7.708946539804231e-05, "loss": 0.8544, "step": 17486 }, { "epoch": 1.1848363710278473, "grad_norm": 5.463139057159424, "learning_rate": 7.708809637894449e-05, "loss": 0.5774, "step": 17487 }, { "epoch": 1.1849041262958195, "grad_norm": 4.83765983581543, "learning_rate": 7.708672735984668e-05, "loss": 0.6807, "step": 17488 }, { "epoch": 1.1849718815637915, "grad_norm": 6.051468372344971, "learning_rate": 7.708535834074886e-05, "loss": 0.7163, "step": 17489 }, { "epoch": 1.1850396368317637, "grad_norm": 5.282536506652832, "learning_rate": 7.708398932165104e-05, "loss": 0.7486, "step": 17490 }, { "epoch": 1.1851073920997357, "grad_norm": 6.964774131774902, "learning_rate": 7.708262030255322e-05, "loss": 0.8596, "step": 17491 }, { "epoch": 1.185175147367708, "grad_norm": 5.607154846191406, "learning_rate": 7.70812512834554e-05, "loss": 0.5895, "step": 17492 }, { "epoch": 1.1852429026356799, "grad_norm": 6.054999351501465, "learning_rate": 7.70798822643576e-05, "loss": 0.7251, "step": 17493 }, { "epoch": 1.185310657903652, "grad_norm": 5.619368553161621, "learning_rate": 7.707851324525978e-05, "loss": 0.4792, "step": 17494 }, { "epoch": 1.185378413171624, "grad_norm": 5.682523250579834, "learning_rate": 7.707714422616196e-05, "loss": 0.6317, "step": 17495 }, { "epoch": 1.1854461684395963, "grad_norm": 5.336386680603027, "learning_rate": 7.707577520706414e-05, "loss": 0.7298, "step": 17496 }, { "epoch": 1.1855139237075683, "grad_norm": 4.904666900634766, "learning_rate": 7.707440618796633e-05, "loss": 0.5701, "step": 17497 }, { "epoch": 1.1855816789755402, "grad_norm": 7.6780571937561035, "learning_rate": 7.707303716886851e-05, "loss": 0.8559, "step": 17498 }, { "epoch": 1.1856494342435124, "grad_norm": 6.725180149078369, "learning_rate": 7.70716681497707e-05, "loss": 0.7298, "step": 17499 }, { "epoch": 1.1857171895114846, "grad_norm": 6.419045925140381, "learning_rate": 7.707029913067287e-05, "loss": 0.5927, "step": 17500 }, { "epoch": 1.1857849447794566, "grad_norm": 5.36964225769043, "learning_rate": 7.706893011157505e-05, "loss": 0.5406, "step": 17501 }, { "epoch": 1.1858527000474286, "grad_norm": 4.675073623657227, "learning_rate": 7.706756109247725e-05, "loss": 0.4895, "step": 17502 }, { "epoch": 1.1859204553154008, "grad_norm": 4.49945592880249, "learning_rate": 7.706619207337943e-05, "loss": 0.5266, "step": 17503 }, { "epoch": 1.1859882105833728, "grad_norm": 5.286568641662598, "learning_rate": 7.706482305428161e-05, "loss": 0.8502, "step": 17504 }, { "epoch": 1.186055965851345, "grad_norm": 6.318417549133301, "learning_rate": 7.706345403518379e-05, "loss": 0.9895, "step": 17505 }, { "epoch": 1.186123721119317, "grad_norm": 6.430423259735107, "learning_rate": 7.706208501608598e-05, "loss": 0.7683, "step": 17506 }, { "epoch": 1.1861914763872892, "grad_norm": 5.740241050720215, "learning_rate": 7.706071599698816e-05, "loss": 0.9374, "step": 17507 }, { "epoch": 1.1862592316552611, "grad_norm": 12.021029472351074, "learning_rate": 7.705934697789034e-05, "loss": 0.5365, "step": 17508 }, { "epoch": 1.1863269869232334, "grad_norm": 7.397732257843018, "learning_rate": 7.705797795879252e-05, "loss": 0.5, "step": 17509 }, { "epoch": 1.1863947421912053, "grad_norm": 6.771727561950684, "learning_rate": 7.70566089396947e-05, "loss": 0.545, "step": 17510 }, { "epoch": 1.1864624974591775, "grad_norm": 6.742783069610596, "learning_rate": 7.70552399205969e-05, "loss": 0.9527, "step": 17511 }, { "epoch": 1.1865302527271495, "grad_norm": 5.033138275146484, "learning_rate": 7.705387090149908e-05, "loss": 0.5438, "step": 17512 }, { "epoch": 1.1865980079951217, "grad_norm": 7.680271148681641, "learning_rate": 7.705250188240126e-05, "loss": 0.543, "step": 17513 }, { "epoch": 1.1866657632630937, "grad_norm": 6.076967716217041, "learning_rate": 7.705113286330344e-05, "loss": 0.8015, "step": 17514 }, { "epoch": 1.186733518531066, "grad_norm": 5.922079086303711, "learning_rate": 7.704976384420563e-05, "loss": 0.6701, "step": 17515 }, { "epoch": 1.1868012737990379, "grad_norm": 6.352739334106445, "learning_rate": 7.704839482510781e-05, "loss": 0.6318, "step": 17516 }, { "epoch": 1.1868690290670099, "grad_norm": 7.644301414489746, "learning_rate": 7.704702580601e-05, "loss": 0.6471, "step": 17517 }, { "epoch": 1.186936784334982, "grad_norm": 5.560800552368164, "learning_rate": 7.704565678691217e-05, "loss": 0.5093, "step": 17518 }, { "epoch": 1.187004539602954, "grad_norm": 8.711373329162598, "learning_rate": 7.704428776781435e-05, "loss": 0.6588, "step": 17519 }, { "epoch": 1.1870722948709262, "grad_norm": 5.718215465545654, "learning_rate": 7.704291874871655e-05, "loss": 0.8604, "step": 17520 }, { "epoch": 1.1871400501388982, "grad_norm": 6.022330284118652, "learning_rate": 7.704154972961873e-05, "loss": 0.7205, "step": 17521 }, { "epoch": 1.1872078054068704, "grad_norm": 7.659083366394043, "learning_rate": 7.704018071052091e-05, "loss": 0.4675, "step": 17522 }, { "epoch": 1.1872755606748424, "grad_norm": 6.095610618591309, "learning_rate": 7.703881169142309e-05, "loss": 0.6772, "step": 17523 }, { "epoch": 1.1873433159428146, "grad_norm": 5.625333786010742, "learning_rate": 7.703744267232528e-05, "loss": 0.618, "step": 17524 }, { "epoch": 1.1874110712107866, "grad_norm": 7.851975440979004, "learning_rate": 7.703607365322746e-05, "loss": 0.763, "step": 17525 }, { "epoch": 1.1874788264787588, "grad_norm": 9.398256301879883, "learning_rate": 7.703470463412964e-05, "loss": 0.6771, "step": 17526 }, { "epoch": 1.1875465817467308, "grad_norm": 9.461082458496094, "learning_rate": 7.703333561503184e-05, "loss": 0.9216, "step": 17527 }, { "epoch": 1.187614337014703, "grad_norm": 7.010152339935303, "learning_rate": 7.703196659593402e-05, "loss": 0.8298, "step": 17528 }, { "epoch": 1.187682092282675, "grad_norm": 5.272936820983887, "learning_rate": 7.70305975768362e-05, "loss": 0.7349, "step": 17529 }, { "epoch": 1.1877498475506472, "grad_norm": 6.14058780670166, "learning_rate": 7.702922855773839e-05, "loss": 0.6735, "step": 17530 }, { "epoch": 1.1878176028186191, "grad_norm": 8.246026039123535, "learning_rate": 7.702785953864057e-05, "loss": 0.6286, "step": 17531 }, { "epoch": 1.1878853580865911, "grad_norm": 8.610701560974121, "learning_rate": 7.702649051954275e-05, "loss": 0.5919, "step": 17532 }, { "epoch": 1.1879531133545633, "grad_norm": 8.594643592834473, "learning_rate": 7.702512150044493e-05, "loss": 0.4835, "step": 17533 }, { "epoch": 1.1880208686225353, "grad_norm": 4.968395709991455, "learning_rate": 7.702375248134713e-05, "loss": 0.5888, "step": 17534 }, { "epoch": 1.1880886238905075, "grad_norm": 6.422661781311035, "learning_rate": 7.702238346224931e-05, "loss": 0.5625, "step": 17535 }, { "epoch": 1.1881563791584795, "grad_norm": 9.77650260925293, "learning_rate": 7.702101444315149e-05, "loss": 0.7993, "step": 17536 }, { "epoch": 1.1882241344264517, "grad_norm": 5.65856409072876, "learning_rate": 7.701964542405367e-05, "loss": 0.5667, "step": 17537 }, { "epoch": 1.1882918896944237, "grad_norm": 6.888467311859131, "learning_rate": 7.701827640495586e-05, "loss": 0.9473, "step": 17538 }, { "epoch": 1.1883596449623959, "grad_norm": 5.9407548904418945, "learning_rate": 7.701690738585804e-05, "loss": 0.888, "step": 17539 }, { "epoch": 1.1884274002303679, "grad_norm": 4.671491622924805, "learning_rate": 7.701553836676022e-05, "loss": 0.5841, "step": 17540 }, { "epoch": 1.18849515549834, "grad_norm": 6.472855567932129, "learning_rate": 7.70141693476624e-05, "loss": 0.5864, "step": 17541 }, { "epoch": 1.188562910766312, "grad_norm": 6.266822338104248, "learning_rate": 7.701280032856458e-05, "loss": 0.6501, "step": 17542 }, { "epoch": 1.1886306660342842, "grad_norm": 5.629112243652344, "learning_rate": 7.701143130946678e-05, "loss": 0.5439, "step": 17543 }, { "epoch": 1.1886984213022562, "grad_norm": 4.828508377075195, "learning_rate": 7.701006229036896e-05, "loss": 0.5442, "step": 17544 }, { "epoch": 1.1887661765702284, "grad_norm": 5.5869059562683105, "learning_rate": 7.700869327127114e-05, "loss": 0.8221, "step": 17545 }, { "epoch": 1.1888339318382004, "grad_norm": 6.538342475891113, "learning_rate": 7.700732425217332e-05, "loss": 0.6933, "step": 17546 }, { "epoch": 1.1889016871061724, "grad_norm": 6.4926981925964355, "learning_rate": 7.70059552330755e-05, "loss": 0.8851, "step": 17547 }, { "epoch": 1.1889694423741446, "grad_norm": 6.433711528778076, "learning_rate": 7.700458621397769e-05, "loss": 0.7035, "step": 17548 }, { "epoch": 1.1890371976421168, "grad_norm": 9.490755081176758, "learning_rate": 7.700321719487987e-05, "loss": 0.6435, "step": 17549 }, { "epoch": 1.1891049529100888, "grad_norm": 5.095461845397949, "learning_rate": 7.700184817578205e-05, "loss": 0.4888, "step": 17550 }, { "epoch": 1.1891727081780608, "grad_norm": 6.992951393127441, "learning_rate": 7.700047915668423e-05, "loss": 0.6976, "step": 17551 }, { "epoch": 1.189240463446033, "grad_norm": 5.5607709884643555, "learning_rate": 7.699911013758643e-05, "loss": 0.5983, "step": 17552 }, { "epoch": 1.189308218714005, "grad_norm": 5.8281402587890625, "learning_rate": 7.699774111848861e-05, "loss": 0.9768, "step": 17553 }, { "epoch": 1.1893759739819771, "grad_norm": 6.718166351318359, "learning_rate": 7.699637209939079e-05, "loss": 0.7467, "step": 17554 }, { "epoch": 1.1894437292499491, "grad_norm": 5.299029350280762, "learning_rate": 7.699500308029297e-05, "loss": 0.4338, "step": 17555 }, { "epoch": 1.1895114845179213, "grad_norm": 7.574790954589844, "learning_rate": 7.699363406119515e-05, "loss": 0.5508, "step": 17556 }, { "epoch": 1.1895792397858933, "grad_norm": 5.672619819641113, "learning_rate": 7.699226504209734e-05, "loss": 0.8091, "step": 17557 }, { "epoch": 1.1896469950538655, "grad_norm": 4.973649978637695, "learning_rate": 7.699089602299952e-05, "loss": 0.5796, "step": 17558 }, { "epoch": 1.1897147503218375, "grad_norm": 6.112509250640869, "learning_rate": 7.69895270039017e-05, "loss": 0.7684, "step": 17559 }, { "epoch": 1.1897825055898097, "grad_norm": 5.4346723556518555, "learning_rate": 7.698815798480388e-05, "loss": 0.5285, "step": 17560 }, { "epoch": 1.1898502608577817, "grad_norm": 5.107007026672363, "learning_rate": 7.698678896570608e-05, "loss": 0.6484, "step": 17561 }, { "epoch": 1.1899180161257537, "grad_norm": 4.733124732971191, "learning_rate": 7.698541994660826e-05, "loss": 0.658, "step": 17562 }, { "epoch": 1.1899857713937259, "grad_norm": 3.8034684658050537, "learning_rate": 7.698405092751044e-05, "loss": 0.507, "step": 17563 }, { "epoch": 1.190053526661698, "grad_norm": 5.694223880767822, "learning_rate": 7.698268190841262e-05, "loss": 0.622, "step": 17564 }, { "epoch": 1.19012128192967, "grad_norm": 5.963348865509033, "learning_rate": 7.69813128893148e-05, "loss": 0.5774, "step": 17565 }, { "epoch": 1.190189037197642, "grad_norm": 6.509255886077881, "learning_rate": 7.6979943870217e-05, "loss": 0.6859, "step": 17566 }, { "epoch": 1.1902567924656142, "grad_norm": 5.054556369781494, "learning_rate": 7.697857485111917e-05, "loss": 0.7494, "step": 17567 }, { "epoch": 1.1903245477335862, "grad_norm": 6.527555465698242, "learning_rate": 7.697720583202135e-05, "loss": 1.0898, "step": 17568 }, { "epoch": 1.1903923030015584, "grad_norm": 4.6289896965026855, "learning_rate": 7.697583681292353e-05, "loss": 0.5725, "step": 17569 }, { "epoch": 1.1904600582695304, "grad_norm": 6.46843147277832, "learning_rate": 7.697446779382573e-05, "loss": 0.7417, "step": 17570 }, { "epoch": 1.1905278135375026, "grad_norm": 6.553291320800781, "learning_rate": 7.697309877472791e-05, "loss": 0.7798, "step": 17571 }, { "epoch": 1.1905955688054746, "grad_norm": 4.56556510925293, "learning_rate": 7.697172975563009e-05, "loss": 0.5706, "step": 17572 }, { "epoch": 1.1906633240734468, "grad_norm": 6.320700645446777, "learning_rate": 7.697036073653228e-05, "loss": 0.6367, "step": 17573 }, { "epoch": 1.1907310793414188, "grad_norm": 7.7214226722717285, "learning_rate": 7.696899171743446e-05, "loss": 0.8209, "step": 17574 }, { "epoch": 1.190798834609391, "grad_norm": 5.526475429534912, "learning_rate": 7.696762269833664e-05, "loss": 0.7385, "step": 17575 }, { "epoch": 1.190866589877363, "grad_norm": 5.44613790512085, "learning_rate": 7.696625367923884e-05, "loss": 0.608, "step": 17576 }, { "epoch": 1.1909343451453351, "grad_norm": 5.303778648376465, "learning_rate": 7.696488466014102e-05, "loss": 0.7954, "step": 17577 }, { "epoch": 1.1910021004133071, "grad_norm": 5.654077529907227, "learning_rate": 7.69635156410432e-05, "loss": 0.8997, "step": 17578 }, { "epoch": 1.1910698556812793, "grad_norm": 5.673882961273193, "learning_rate": 7.696214662194538e-05, "loss": 0.6305, "step": 17579 }, { "epoch": 1.1911376109492513, "grad_norm": 6.7035675048828125, "learning_rate": 7.696077760284757e-05, "loss": 0.6158, "step": 17580 }, { "epoch": 1.1912053662172233, "grad_norm": 5.399929046630859, "learning_rate": 7.695940858374975e-05, "loss": 0.6918, "step": 17581 }, { "epoch": 1.1912731214851955, "grad_norm": 6.211287975311279, "learning_rate": 7.695803956465193e-05, "loss": 0.7219, "step": 17582 }, { "epoch": 1.1913408767531675, "grad_norm": 6.3117876052856445, "learning_rate": 7.695667054555411e-05, "loss": 0.7323, "step": 17583 }, { "epoch": 1.1914086320211397, "grad_norm": 5.704460144042969, "learning_rate": 7.695530152645631e-05, "loss": 0.7325, "step": 17584 }, { "epoch": 1.1914763872891116, "grad_norm": 6.604908466339111, "learning_rate": 7.695393250735849e-05, "loss": 0.6055, "step": 17585 }, { "epoch": 1.1915441425570839, "grad_norm": 5.135843276977539, "learning_rate": 7.695256348826067e-05, "loss": 0.7543, "step": 17586 }, { "epoch": 1.1916118978250558, "grad_norm": 6.044817924499512, "learning_rate": 7.695119446916285e-05, "loss": 0.6591, "step": 17587 }, { "epoch": 1.191679653093028, "grad_norm": 10.055615425109863, "learning_rate": 7.694982545006503e-05, "loss": 0.6775, "step": 17588 }, { "epoch": 1.191747408361, "grad_norm": 5.508429050445557, "learning_rate": 7.694845643096722e-05, "loss": 0.8237, "step": 17589 }, { "epoch": 1.1918151636289722, "grad_norm": 5.428280353546143, "learning_rate": 7.69470874118694e-05, "loss": 0.5512, "step": 17590 }, { "epoch": 1.1918829188969442, "grad_norm": 6.358389377593994, "learning_rate": 7.694571839277158e-05, "loss": 0.9419, "step": 17591 }, { "epoch": 1.1919506741649164, "grad_norm": 4.868531227111816, "learning_rate": 7.694434937367376e-05, "loss": 0.6449, "step": 17592 }, { "epoch": 1.1920184294328884, "grad_norm": 4.3719000816345215, "learning_rate": 7.694298035457596e-05, "loss": 0.6134, "step": 17593 }, { "epoch": 1.1920861847008606, "grad_norm": 12.582817077636719, "learning_rate": 7.694161133547814e-05, "loss": 0.5755, "step": 17594 }, { "epoch": 1.1921539399688326, "grad_norm": 5.3732781410217285, "learning_rate": 7.694024231638032e-05, "loss": 0.732, "step": 17595 }, { "epoch": 1.1922216952368045, "grad_norm": 5.49517822265625, "learning_rate": 7.69388732972825e-05, "loss": 0.5249, "step": 17596 }, { "epoch": 1.1922894505047767, "grad_norm": 4.9107584953308105, "learning_rate": 7.693750427818468e-05, "loss": 0.5645, "step": 17597 }, { "epoch": 1.192357205772749, "grad_norm": 6.208014965057373, "learning_rate": 7.693613525908687e-05, "loss": 0.7483, "step": 17598 }, { "epoch": 1.192424961040721, "grad_norm": 5.240930080413818, "learning_rate": 7.693476623998905e-05, "loss": 0.5941, "step": 17599 }, { "epoch": 1.192492716308693, "grad_norm": 5.509008884429932, "learning_rate": 7.693339722089123e-05, "loss": 0.6262, "step": 17600 }, { "epoch": 1.1925604715766651, "grad_norm": 4.510783672332764, "learning_rate": 7.693202820179341e-05, "loss": 0.6087, "step": 17601 }, { "epoch": 1.192628226844637, "grad_norm": 7.165205478668213, "learning_rate": 7.69306591826956e-05, "loss": 0.7364, "step": 17602 }, { "epoch": 1.1926959821126093, "grad_norm": 5.687561511993408, "learning_rate": 7.692929016359779e-05, "loss": 0.8353, "step": 17603 }, { "epoch": 1.1927637373805813, "grad_norm": 7.553353786468506, "learning_rate": 7.692792114449997e-05, "loss": 1.0092, "step": 17604 }, { "epoch": 1.1928314926485535, "grad_norm": 5.063547611236572, "learning_rate": 7.692655212540215e-05, "loss": 0.7275, "step": 17605 }, { "epoch": 1.1928992479165255, "grad_norm": 5.614800930023193, "learning_rate": 7.692518310630433e-05, "loss": 0.628, "step": 17606 }, { "epoch": 1.1929670031844977, "grad_norm": 6.790213584899902, "learning_rate": 7.692381408720652e-05, "loss": 0.7567, "step": 17607 }, { "epoch": 1.1930347584524696, "grad_norm": 6.226934432983398, "learning_rate": 7.69224450681087e-05, "loss": 0.7992, "step": 17608 }, { "epoch": 1.1931025137204418, "grad_norm": 5.4407525062561035, "learning_rate": 7.692107604901088e-05, "loss": 0.59, "step": 17609 }, { "epoch": 1.1931702689884138, "grad_norm": 4.665156841278076, "learning_rate": 7.691970702991306e-05, "loss": 0.5459, "step": 17610 }, { "epoch": 1.1932380242563858, "grad_norm": 6.73590612411499, "learning_rate": 7.691833801081524e-05, "loss": 0.6124, "step": 17611 }, { "epoch": 1.193305779524358, "grad_norm": 5.0771074295043945, "learning_rate": 7.691696899171744e-05, "loss": 0.579, "step": 17612 }, { "epoch": 1.1933735347923302, "grad_norm": 9.059715270996094, "learning_rate": 7.691559997261962e-05, "loss": 0.8435, "step": 17613 }, { "epoch": 1.1934412900603022, "grad_norm": 6.360510349273682, "learning_rate": 7.69142309535218e-05, "loss": 0.7151, "step": 17614 }, { "epoch": 1.1935090453282742, "grad_norm": 4.936126232147217, "learning_rate": 7.691286193442398e-05, "loss": 0.6126, "step": 17615 }, { "epoch": 1.1935768005962464, "grad_norm": 5.501605987548828, "learning_rate": 7.691149291532617e-05, "loss": 0.6608, "step": 17616 }, { "epoch": 1.1936445558642184, "grad_norm": 5.651332855224609, "learning_rate": 7.691012389622835e-05, "loss": 0.5896, "step": 17617 }, { "epoch": 1.1937123111321906, "grad_norm": 5.945789813995361, "learning_rate": 7.690875487713053e-05, "loss": 0.9682, "step": 17618 }, { "epoch": 1.1937800664001625, "grad_norm": 6.9971604347229, "learning_rate": 7.690738585803273e-05, "loss": 0.6775, "step": 17619 }, { "epoch": 1.1938478216681347, "grad_norm": 5.1300506591796875, "learning_rate": 7.690601683893491e-05, "loss": 0.6251, "step": 17620 }, { "epoch": 1.1939155769361067, "grad_norm": 6.18295431137085, "learning_rate": 7.690464781983709e-05, "loss": 0.7284, "step": 17621 }, { "epoch": 1.193983332204079, "grad_norm": 5.232485294342041, "learning_rate": 7.690327880073928e-05, "loss": 0.6627, "step": 17622 }, { "epoch": 1.194051087472051, "grad_norm": 4.987381458282471, "learning_rate": 7.690190978164146e-05, "loss": 0.5891, "step": 17623 }, { "epoch": 1.1941188427400231, "grad_norm": 5.091625213623047, "learning_rate": 7.690054076254364e-05, "loss": 0.6001, "step": 17624 }, { "epoch": 1.194186598007995, "grad_norm": 5.142812252044678, "learning_rate": 7.689917174344582e-05, "loss": 0.4461, "step": 17625 }, { "epoch": 1.1942543532759673, "grad_norm": 5.461329460144043, "learning_rate": 7.689780272434802e-05, "loss": 0.7192, "step": 17626 }, { "epoch": 1.1943221085439393, "grad_norm": 5.987907886505127, "learning_rate": 7.68964337052502e-05, "loss": 0.7833, "step": 17627 }, { "epoch": 1.1943898638119115, "grad_norm": 6.716150760650635, "learning_rate": 7.689506468615238e-05, "loss": 0.6963, "step": 17628 }, { "epoch": 1.1944576190798835, "grad_norm": 7.355491638183594, "learning_rate": 7.689369566705456e-05, "loss": 0.483, "step": 17629 }, { "epoch": 1.1945253743478554, "grad_norm": 6.228071689605713, "learning_rate": 7.689232664795675e-05, "loss": 0.677, "step": 17630 }, { "epoch": 1.1945931296158276, "grad_norm": 6.996883869171143, "learning_rate": 7.689095762885893e-05, "loss": 0.8552, "step": 17631 }, { "epoch": 1.1946608848837996, "grad_norm": 4.602889537811279, "learning_rate": 7.688958860976111e-05, "loss": 0.5121, "step": 17632 }, { "epoch": 1.1947286401517718, "grad_norm": 5.40228271484375, "learning_rate": 7.688821959066329e-05, "loss": 0.5775, "step": 17633 }, { "epoch": 1.1947963954197438, "grad_norm": 5.13663387298584, "learning_rate": 7.688685057156547e-05, "loss": 0.7632, "step": 17634 }, { "epoch": 1.194864150687716, "grad_norm": 9.4126558303833, "learning_rate": 7.688548155246767e-05, "loss": 0.8483, "step": 17635 }, { "epoch": 1.194931905955688, "grad_norm": 5.480384349822998, "learning_rate": 7.688411253336985e-05, "loss": 0.6682, "step": 17636 }, { "epoch": 1.1949996612236602, "grad_norm": 5.476341247558594, "learning_rate": 7.688274351427203e-05, "loss": 0.6641, "step": 17637 }, { "epoch": 1.1950674164916322, "grad_norm": 4.595249652862549, "learning_rate": 7.688137449517421e-05, "loss": 0.5387, "step": 17638 }, { "epoch": 1.1951351717596044, "grad_norm": 5.596693515777588, "learning_rate": 7.68800054760764e-05, "loss": 0.7083, "step": 17639 }, { "epoch": 1.1952029270275764, "grad_norm": 6.919038772583008, "learning_rate": 7.687863645697858e-05, "loss": 0.6535, "step": 17640 }, { "epoch": 1.1952706822955486, "grad_norm": 5.938333988189697, "learning_rate": 7.687726743788076e-05, "loss": 0.8527, "step": 17641 }, { "epoch": 1.1953384375635205, "grad_norm": 5.0389299392700195, "learning_rate": 7.687589841878294e-05, "loss": 0.7157, "step": 17642 }, { "epoch": 1.1954061928314927, "grad_norm": 8.325102806091309, "learning_rate": 7.687452939968512e-05, "loss": 0.7156, "step": 17643 }, { "epoch": 1.1954739480994647, "grad_norm": 6.028964519500732, "learning_rate": 7.687316038058732e-05, "loss": 0.7134, "step": 17644 }, { "epoch": 1.1955417033674367, "grad_norm": 7.535099983215332, "learning_rate": 7.68717913614895e-05, "loss": 0.7695, "step": 17645 }, { "epoch": 1.195609458635409, "grad_norm": 5.430510520935059, "learning_rate": 7.687042234239168e-05, "loss": 0.8219, "step": 17646 }, { "epoch": 1.195677213903381, "grad_norm": 5.3170013427734375, "learning_rate": 7.686905332329386e-05, "loss": 0.9118, "step": 17647 }, { "epoch": 1.195744969171353, "grad_norm": 8.100552558898926, "learning_rate": 7.686768430419605e-05, "loss": 0.8288, "step": 17648 }, { "epoch": 1.195812724439325, "grad_norm": 6.240118980407715, "learning_rate": 7.686631528509823e-05, "loss": 0.8284, "step": 17649 }, { "epoch": 1.1958804797072973, "grad_norm": 6.110140323638916, "learning_rate": 7.686494626600041e-05, "loss": 0.7722, "step": 17650 }, { "epoch": 1.1959482349752693, "grad_norm": 6.435049057006836, "learning_rate": 7.68635772469026e-05, "loss": 0.6591, "step": 17651 }, { "epoch": 1.1960159902432415, "grad_norm": 5.954352855682373, "learning_rate": 7.686220822780477e-05, "loss": 0.6074, "step": 17652 }, { "epoch": 1.1960837455112134, "grad_norm": 5.165590763092041, "learning_rate": 7.686083920870697e-05, "loss": 0.7499, "step": 17653 }, { "epoch": 1.1961515007791856, "grad_norm": 7.155406475067139, "learning_rate": 7.685947018960915e-05, "loss": 0.8336, "step": 17654 }, { "epoch": 1.1962192560471576, "grad_norm": 5.120672702789307, "learning_rate": 7.685810117051133e-05, "loss": 0.6934, "step": 17655 }, { "epoch": 1.1962870113151298, "grad_norm": 6.173689365386963, "learning_rate": 7.685673215141351e-05, "loss": 0.4323, "step": 17656 }, { "epoch": 1.1963547665831018, "grad_norm": 4.619163513183594, "learning_rate": 7.685536313231569e-05, "loss": 0.4938, "step": 17657 }, { "epoch": 1.196422521851074, "grad_norm": 8.75632381439209, "learning_rate": 7.685399411321788e-05, "loss": 1.0098, "step": 17658 }, { "epoch": 1.196490277119046, "grad_norm": 6.584256172180176, "learning_rate": 7.685262509412006e-05, "loss": 0.5902, "step": 17659 }, { "epoch": 1.196558032387018, "grad_norm": 6.670119762420654, "learning_rate": 7.685125607502224e-05, "loss": 0.8715, "step": 17660 }, { "epoch": 1.1966257876549902, "grad_norm": 8.594388008117676, "learning_rate": 7.684988705592442e-05, "loss": 0.7048, "step": 17661 }, { "epoch": 1.1966935429229624, "grad_norm": 7.816542148590088, "learning_rate": 7.684851803682662e-05, "loss": 0.9348, "step": 17662 }, { "epoch": 1.1967612981909344, "grad_norm": 4.398709774017334, "learning_rate": 7.68471490177288e-05, "loss": 0.5985, "step": 17663 }, { "epoch": 1.1968290534589063, "grad_norm": 7.980318069458008, "learning_rate": 7.684577999863098e-05, "loss": 0.8392, "step": 17664 }, { "epoch": 1.1968968087268785, "grad_norm": 7.779125213623047, "learning_rate": 7.684441097953316e-05, "loss": 0.6489, "step": 17665 }, { "epoch": 1.1969645639948505, "grad_norm": 6.15852165222168, "learning_rate": 7.684304196043535e-05, "loss": 0.8325, "step": 17666 }, { "epoch": 1.1970323192628227, "grad_norm": 5.582149982452393, "learning_rate": 7.684167294133753e-05, "loss": 0.6112, "step": 17667 }, { "epoch": 1.1971000745307947, "grad_norm": 6.231848239898682, "learning_rate": 7.684030392223973e-05, "loss": 0.704, "step": 17668 }, { "epoch": 1.197167829798767, "grad_norm": 8.48556900024414, "learning_rate": 7.683893490314191e-05, "loss": 0.8127, "step": 17669 }, { "epoch": 1.1972355850667389, "grad_norm": 5.312507152557373, "learning_rate": 7.683756588404409e-05, "loss": 0.5856, "step": 17670 }, { "epoch": 1.197303340334711, "grad_norm": 4.0072407722473145, "learning_rate": 7.683619686494628e-05, "loss": 0.6048, "step": 17671 }, { "epoch": 1.197371095602683, "grad_norm": 4.753046989440918, "learning_rate": 7.683482784584846e-05, "loss": 0.7208, "step": 17672 }, { "epoch": 1.1974388508706553, "grad_norm": 4.263588905334473, "learning_rate": 7.683345882675064e-05, "loss": 0.6492, "step": 17673 }, { "epoch": 1.1975066061386272, "grad_norm": 5.775005340576172, "learning_rate": 7.683208980765282e-05, "loss": 0.8061, "step": 17674 }, { "epoch": 1.1975743614065995, "grad_norm": 6.587451457977295, "learning_rate": 7.6830720788555e-05, "loss": 0.7187, "step": 17675 }, { "epoch": 1.1976421166745714, "grad_norm": 7.55980110168457, "learning_rate": 7.68293517694572e-05, "loss": 0.7459, "step": 17676 }, { "epoch": 1.1977098719425436, "grad_norm": 5.851051330566406, "learning_rate": 7.682798275035938e-05, "loss": 0.7314, "step": 17677 }, { "epoch": 1.1977776272105156, "grad_norm": 6.21124792098999, "learning_rate": 7.682661373126156e-05, "loss": 0.6496, "step": 17678 }, { "epoch": 1.1978453824784876, "grad_norm": 6.034067630767822, "learning_rate": 7.682524471216374e-05, "loss": 0.7823, "step": 17679 }, { "epoch": 1.1979131377464598, "grad_norm": 5.624752521514893, "learning_rate": 7.682387569306592e-05, "loss": 0.9108, "step": 17680 }, { "epoch": 1.1979808930144318, "grad_norm": 7.310678005218506, "learning_rate": 7.682250667396811e-05, "loss": 0.7976, "step": 17681 }, { "epoch": 1.198048648282404, "grad_norm": 5.958901405334473, "learning_rate": 7.682113765487029e-05, "loss": 0.6814, "step": 17682 }, { "epoch": 1.198116403550376, "grad_norm": 5.8540120124816895, "learning_rate": 7.681976863577247e-05, "loss": 0.9074, "step": 17683 }, { "epoch": 1.1981841588183482, "grad_norm": 11.885223388671875, "learning_rate": 7.681839961667465e-05, "loss": 0.6314, "step": 17684 }, { "epoch": 1.1982519140863201, "grad_norm": 5.285735607147217, "learning_rate": 7.681703059757685e-05, "loss": 0.7848, "step": 17685 }, { "epoch": 1.1983196693542923, "grad_norm": 6.814820289611816, "learning_rate": 7.681566157847903e-05, "loss": 0.7201, "step": 17686 }, { "epoch": 1.1983874246222643, "grad_norm": 7.1838788986206055, "learning_rate": 7.681429255938121e-05, "loss": 0.8404, "step": 17687 }, { "epoch": 1.1984551798902365, "grad_norm": 5.949917793273926, "learning_rate": 7.681292354028339e-05, "loss": 0.7767, "step": 17688 }, { "epoch": 1.1985229351582085, "grad_norm": 6.235685348510742, "learning_rate": 7.681155452118557e-05, "loss": 0.717, "step": 17689 }, { "epoch": 1.1985906904261807, "grad_norm": 5.224851131439209, "learning_rate": 7.681018550208776e-05, "loss": 0.7371, "step": 17690 }, { "epoch": 1.1986584456941527, "grad_norm": 5.613080978393555, "learning_rate": 7.680881648298994e-05, "loss": 0.5199, "step": 17691 }, { "epoch": 1.198726200962125, "grad_norm": 8.915862083435059, "learning_rate": 7.680744746389212e-05, "loss": 0.717, "step": 17692 }, { "epoch": 1.1987939562300969, "grad_norm": 5.137214660644531, "learning_rate": 7.68060784447943e-05, "loss": 0.6165, "step": 17693 }, { "epoch": 1.1988617114980689, "grad_norm": 4.481411933898926, "learning_rate": 7.68047094256965e-05, "loss": 0.7008, "step": 17694 }, { "epoch": 1.198929466766041, "grad_norm": 4.9299798011779785, "learning_rate": 7.680334040659868e-05, "loss": 0.6167, "step": 17695 }, { "epoch": 1.1989972220340133, "grad_norm": 6.480248928070068, "learning_rate": 7.680197138750086e-05, "loss": 0.6755, "step": 17696 }, { "epoch": 1.1990649773019852, "grad_norm": 7.132851600646973, "learning_rate": 7.680060236840304e-05, "loss": 0.8065, "step": 17697 }, { "epoch": 1.1991327325699572, "grad_norm": 6.641706943511963, "learning_rate": 7.679923334930522e-05, "loss": 0.7251, "step": 17698 }, { "epoch": 1.1992004878379294, "grad_norm": 10.569890022277832, "learning_rate": 7.679786433020741e-05, "loss": 0.6988, "step": 17699 }, { "epoch": 1.1992682431059014, "grad_norm": 5.934963226318359, "learning_rate": 7.679649531110959e-05, "loss": 0.8192, "step": 17700 }, { "epoch": 1.1993359983738736, "grad_norm": 7.19936466217041, "learning_rate": 7.679512629201177e-05, "loss": 0.6698, "step": 17701 }, { "epoch": 1.1994037536418456, "grad_norm": 4.598423957824707, "learning_rate": 7.679375727291395e-05, "loss": 0.6782, "step": 17702 }, { "epoch": 1.1994715089098178, "grad_norm": 6.056293487548828, "learning_rate": 7.679238825381615e-05, "loss": 1.0353, "step": 17703 }, { "epoch": 1.1995392641777898, "grad_norm": 4.992430210113525, "learning_rate": 7.679101923471833e-05, "loss": 0.51, "step": 17704 }, { "epoch": 1.199607019445762, "grad_norm": 6.1313886642456055, "learning_rate": 7.678965021562051e-05, "loss": 0.6802, "step": 17705 }, { "epoch": 1.199674774713734, "grad_norm": 8.208282470703125, "learning_rate": 7.678828119652269e-05, "loss": 0.7752, "step": 17706 }, { "epoch": 1.1997425299817062, "grad_norm": 8.525566101074219, "learning_rate": 7.678691217742487e-05, "loss": 0.7824, "step": 17707 }, { "epoch": 1.1998102852496781, "grad_norm": 9.573042869567871, "learning_rate": 7.678554315832706e-05, "loss": 0.8599, "step": 17708 }, { "epoch": 1.1998780405176501, "grad_norm": 6.6548871994018555, "learning_rate": 7.678417413922924e-05, "loss": 0.5811, "step": 17709 }, { "epoch": 1.1999457957856223, "grad_norm": 7.299272537231445, "learning_rate": 7.678280512013142e-05, "loss": 0.7162, "step": 17710 }, { "epoch": 1.2000135510535945, "grad_norm": 6.244133949279785, "learning_rate": 7.67814361010336e-05, "loss": 0.8619, "step": 17711 }, { "epoch": 1.2000813063215665, "grad_norm": 5.655728816986084, "learning_rate": 7.67800670819358e-05, "loss": 0.5859, "step": 17712 }, { "epoch": 1.2001490615895385, "grad_norm": 6.566688060760498, "learning_rate": 7.677869806283798e-05, "loss": 0.7493, "step": 17713 }, { "epoch": 1.2002168168575107, "grad_norm": 7.644930839538574, "learning_rate": 7.677732904374016e-05, "loss": 0.7606, "step": 17714 }, { "epoch": 1.2002845721254827, "grad_norm": 6.584826946258545, "learning_rate": 7.677596002464235e-05, "loss": 0.816, "step": 17715 }, { "epoch": 1.2003523273934549, "grad_norm": 10.395920753479004, "learning_rate": 7.677459100554453e-05, "loss": 0.9352, "step": 17716 }, { "epoch": 1.2004200826614269, "grad_norm": 6.787402629852295, "learning_rate": 7.677322198644671e-05, "loss": 0.7819, "step": 17717 }, { "epoch": 1.200487837929399, "grad_norm": 5.836369037628174, "learning_rate": 7.67718529673489e-05, "loss": 0.7411, "step": 17718 }, { "epoch": 1.200555593197371, "grad_norm": 5.070427417755127, "learning_rate": 7.677048394825109e-05, "loss": 0.6364, "step": 17719 }, { "epoch": 1.2006233484653432, "grad_norm": 5.135974407196045, "learning_rate": 7.676911492915327e-05, "loss": 0.6565, "step": 17720 }, { "epoch": 1.2006911037333152, "grad_norm": 3.663120746612549, "learning_rate": 7.676774591005545e-05, "loss": 0.4482, "step": 17721 }, { "epoch": 1.2007588590012874, "grad_norm": 4.547859191894531, "learning_rate": 7.676637689095764e-05, "loss": 0.5517, "step": 17722 }, { "epoch": 1.2008266142692594, "grad_norm": 5.410218238830566, "learning_rate": 7.676500787185982e-05, "loss": 0.6991, "step": 17723 }, { "epoch": 1.2008943695372316, "grad_norm": 5.378064155578613, "learning_rate": 7.6763638852762e-05, "loss": 0.6361, "step": 17724 }, { "epoch": 1.2009621248052036, "grad_norm": 4.933994770050049, "learning_rate": 7.676226983366418e-05, "loss": 0.587, "step": 17725 }, { "epoch": 1.2010298800731758, "grad_norm": 5.668514728546143, "learning_rate": 7.676090081456638e-05, "loss": 0.5187, "step": 17726 }, { "epoch": 1.2010976353411478, "grad_norm": 6.826091289520264, "learning_rate": 7.675953179546856e-05, "loss": 0.656, "step": 17727 }, { "epoch": 1.2011653906091198, "grad_norm": 7.071187496185303, "learning_rate": 7.675816277637074e-05, "loss": 0.5693, "step": 17728 }, { "epoch": 1.201233145877092, "grad_norm": 7.474967002868652, "learning_rate": 7.675679375727292e-05, "loss": 0.4735, "step": 17729 }, { "epoch": 1.201300901145064, "grad_norm": 6.620955467224121, "learning_rate": 7.67554247381751e-05, "loss": 0.8567, "step": 17730 }, { "epoch": 1.2013686564130361, "grad_norm": 5.957265853881836, "learning_rate": 7.675405571907729e-05, "loss": 0.5876, "step": 17731 }, { "epoch": 1.2014364116810081, "grad_norm": 6.6812028884887695, "learning_rate": 7.675268669997947e-05, "loss": 0.7244, "step": 17732 }, { "epoch": 1.2015041669489803, "grad_norm": 6.950610160827637, "learning_rate": 7.675131768088165e-05, "loss": 0.7549, "step": 17733 }, { "epoch": 1.2015719222169523, "grad_norm": 6.661945343017578, "learning_rate": 7.674994866178383e-05, "loss": 0.8251, "step": 17734 }, { "epoch": 1.2016396774849245, "grad_norm": 6.50548791885376, "learning_rate": 7.674857964268601e-05, "loss": 0.7072, "step": 17735 }, { "epoch": 1.2017074327528965, "grad_norm": 10.191153526306152, "learning_rate": 7.674721062358821e-05, "loss": 0.5644, "step": 17736 }, { "epoch": 1.2017751880208687, "grad_norm": 5.519306659698486, "learning_rate": 7.674584160449039e-05, "loss": 0.649, "step": 17737 }, { "epoch": 1.2018429432888407, "grad_norm": 8.362431526184082, "learning_rate": 7.674447258539257e-05, "loss": 0.8843, "step": 17738 }, { "epoch": 1.2019106985568129, "grad_norm": 6.758439540863037, "learning_rate": 7.674310356629475e-05, "loss": 0.6952, "step": 17739 }, { "epoch": 1.2019784538247849, "grad_norm": 5.742153167724609, "learning_rate": 7.674173454719694e-05, "loss": 0.7082, "step": 17740 }, { "epoch": 1.202046209092757, "grad_norm": 6.582305431365967, "learning_rate": 7.674036552809912e-05, "loss": 0.734, "step": 17741 }, { "epoch": 1.202113964360729, "grad_norm": 5.384820938110352, "learning_rate": 7.67389965090013e-05, "loss": 0.7698, "step": 17742 }, { "epoch": 1.202181719628701, "grad_norm": 7.0777082443237305, "learning_rate": 7.673762748990348e-05, "loss": 0.5331, "step": 17743 }, { "epoch": 1.2022494748966732, "grad_norm": 6.164592742919922, "learning_rate": 7.673625847080566e-05, "loss": 0.6242, "step": 17744 }, { "epoch": 1.2023172301646454, "grad_norm": 6.713252544403076, "learning_rate": 7.673488945170786e-05, "loss": 0.5903, "step": 17745 }, { "epoch": 1.2023849854326174, "grad_norm": 5.783125877380371, "learning_rate": 7.673352043261004e-05, "loss": 0.7877, "step": 17746 }, { "epoch": 1.2024527407005894, "grad_norm": 8.683629989624023, "learning_rate": 7.673215141351222e-05, "loss": 0.6339, "step": 17747 }, { "epoch": 1.2025204959685616, "grad_norm": 8.471028327941895, "learning_rate": 7.67307823944144e-05, "loss": 0.7041, "step": 17748 }, { "epoch": 1.2025882512365336, "grad_norm": 5.869851112365723, "learning_rate": 7.672941337531659e-05, "loss": 0.7802, "step": 17749 }, { "epoch": 1.2026560065045058, "grad_norm": 5.1376776695251465, "learning_rate": 7.672804435621877e-05, "loss": 0.637, "step": 17750 }, { "epoch": 1.2027237617724778, "grad_norm": 4.885056972503662, "learning_rate": 7.672667533712095e-05, "loss": 0.6424, "step": 17751 }, { "epoch": 1.20279151704045, "grad_norm": 5.53432035446167, "learning_rate": 7.672530631802313e-05, "loss": 0.7047, "step": 17752 }, { "epoch": 1.202859272308422, "grad_norm": 8.100170135498047, "learning_rate": 7.672393729892531e-05, "loss": 0.6902, "step": 17753 }, { "epoch": 1.2029270275763941, "grad_norm": 6.803896903991699, "learning_rate": 7.672256827982751e-05, "loss": 0.6291, "step": 17754 }, { "epoch": 1.2029947828443661, "grad_norm": 4.285371780395508, "learning_rate": 7.672119926072969e-05, "loss": 0.6842, "step": 17755 }, { "epoch": 1.2030625381123383, "grad_norm": 5.140682697296143, "learning_rate": 7.671983024163187e-05, "loss": 0.7823, "step": 17756 }, { "epoch": 1.2031302933803103, "grad_norm": 6.522716045379639, "learning_rate": 7.671846122253405e-05, "loss": 0.6746, "step": 17757 }, { "epoch": 1.2031980486482823, "grad_norm": 7.171764850616455, "learning_rate": 7.671709220343624e-05, "loss": 0.7369, "step": 17758 }, { "epoch": 1.2032658039162545, "grad_norm": 8.315983772277832, "learning_rate": 7.671572318433842e-05, "loss": 0.9706, "step": 17759 }, { "epoch": 1.2033335591842267, "grad_norm": 5.393362998962402, "learning_rate": 7.67143541652406e-05, "loss": 0.5944, "step": 17760 }, { "epoch": 1.2034013144521987, "grad_norm": 7.974836826324463, "learning_rate": 7.67129851461428e-05, "loss": 0.7715, "step": 17761 }, { "epoch": 1.2034690697201706, "grad_norm": 8.586780548095703, "learning_rate": 7.671161612704498e-05, "loss": 0.7409, "step": 17762 }, { "epoch": 1.2035368249881429, "grad_norm": 5.896920204162598, "learning_rate": 7.671024710794716e-05, "loss": 0.9213, "step": 17763 }, { "epoch": 1.2036045802561148, "grad_norm": 6.252879619598389, "learning_rate": 7.670887808884935e-05, "loss": 0.6313, "step": 17764 }, { "epoch": 1.203672335524087, "grad_norm": 5.003981113433838, "learning_rate": 7.670750906975153e-05, "loss": 0.8074, "step": 17765 }, { "epoch": 1.203740090792059, "grad_norm": 6.52988862991333, "learning_rate": 7.670614005065371e-05, "loss": 0.6897, "step": 17766 }, { "epoch": 1.2038078460600312, "grad_norm": 5.87505578994751, "learning_rate": 7.670477103155589e-05, "loss": 0.6301, "step": 17767 }, { "epoch": 1.2038756013280032, "grad_norm": 5.153764724731445, "learning_rate": 7.670340201245809e-05, "loss": 0.8281, "step": 17768 }, { "epoch": 1.2039433565959754, "grad_norm": 5.710607051849365, "learning_rate": 7.670203299336027e-05, "loss": 0.6222, "step": 17769 }, { "epoch": 1.2040111118639474, "grad_norm": 6.170762538909912, "learning_rate": 7.670066397426245e-05, "loss": 0.7103, "step": 17770 }, { "epoch": 1.2040788671319196, "grad_norm": 6.375565052032471, "learning_rate": 7.669929495516463e-05, "loss": 0.6191, "step": 17771 }, { "epoch": 1.2041466223998916, "grad_norm": 4.824178218841553, "learning_rate": 7.669792593606682e-05, "loss": 0.7482, "step": 17772 }, { "epoch": 1.2042143776678638, "grad_norm": 5.683229446411133, "learning_rate": 7.6696556916969e-05, "loss": 0.6528, "step": 17773 }, { "epoch": 1.2042821329358357, "grad_norm": 7.511515140533447, "learning_rate": 7.669518789787118e-05, "loss": 0.7132, "step": 17774 }, { "epoch": 1.204349888203808, "grad_norm": 4.747464179992676, "learning_rate": 7.669381887877336e-05, "loss": 0.6468, "step": 17775 }, { "epoch": 1.20441764347178, "grad_norm": 6.176259517669678, "learning_rate": 7.669244985967554e-05, "loss": 0.8254, "step": 17776 }, { "epoch": 1.204485398739752, "grad_norm": 6.217640399932861, "learning_rate": 7.669108084057774e-05, "loss": 0.7821, "step": 17777 }, { "epoch": 1.2045531540077241, "grad_norm": 7.800964832305908, "learning_rate": 7.668971182147992e-05, "loss": 0.8139, "step": 17778 }, { "epoch": 1.204620909275696, "grad_norm": 6.084750175476074, "learning_rate": 7.66883428023821e-05, "loss": 0.5719, "step": 17779 }, { "epoch": 1.2046886645436683, "grad_norm": 6.607977390289307, "learning_rate": 7.668697378328428e-05, "loss": 0.7541, "step": 17780 }, { "epoch": 1.2047564198116403, "grad_norm": 5.644943714141846, "learning_rate": 7.668560476418647e-05, "loss": 0.7759, "step": 17781 }, { "epoch": 1.2048241750796125, "grad_norm": 10.316941261291504, "learning_rate": 7.668423574508865e-05, "loss": 0.6069, "step": 17782 }, { "epoch": 1.2048919303475845, "grad_norm": 7.116324424743652, "learning_rate": 7.668286672599083e-05, "loss": 0.7976, "step": 17783 }, { "epoch": 1.2049596856155567, "grad_norm": 4.7204365730285645, "learning_rate": 7.668149770689301e-05, "loss": 0.6528, "step": 17784 }, { "epoch": 1.2050274408835286, "grad_norm": 4.601452350616455, "learning_rate": 7.668012868779519e-05, "loss": 0.7411, "step": 17785 }, { "epoch": 1.2050951961515008, "grad_norm": 5.289638519287109, "learning_rate": 7.667875966869739e-05, "loss": 0.7751, "step": 17786 }, { "epoch": 1.2051629514194728, "grad_norm": 4.839632987976074, "learning_rate": 7.667739064959957e-05, "loss": 0.5549, "step": 17787 }, { "epoch": 1.205230706687445, "grad_norm": 5.583590030670166, "learning_rate": 7.667602163050175e-05, "loss": 0.6464, "step": 17788 }, { "epoch": 1.205298461955417, "grad_norm": 7.026573181152344, "learning_rate": 7.667465261140393e-05, "loss": 0.764, "step": 17789 }, { "epoch": 1.2053662172233892, "grad_norm": 6.217905044555664, "learning_rate": 7.667328359230611e-05, "loss": 0.6443, "step": 17790 }, { "epoch": 1.2054339724913612, "grad_norm": 6.040329933166504, "learning_rate": 7.66719145732083e-05, "loss": 0.6784, "step": 17791 }, { "epoch": 1.2055017277593332, "grad_norm": 9.230742454528809, "learning_rate": 7.667054555411048e-05, "loss": 0.6793, "step": 17792 }, { "epoch": 1.2055694830273054, "grad_norm": 8.235578536987305, "learning_rate": 7.666917653501266e-05, "loss": 0.8269, "step": 17793 }, { "epoch": 1.2056372382952776, "grad_norm": 6.351599216461182, "learning_rate": 7.666780751591484e-05, "loss": 0.7285, "step": 17794 }, { "epoch": 1.2057049935632496, "grad_norm": 7.1128973960876465, "learning_rate": 7.666643849681704e-05, "loss": 0.8007, "step": 17795 }, { "epoch": 1.2057727488312215, "grad_norm": 6.171035289764404, "learning_rate": 7.666506947771922e-05, "loss": 0.7373, "step": 17796 }, { "epoch": 1.2058405040991937, "grad_norm": 5.534034729003906, "learning_rate": 7.66637004586214e-05, "loss": 0.6877, "step": 17797 }, { "epoch": 1.2059082593671657, "grad_norm": 9.175405502319336, "learning_rate": 7.666233143952358e-05, "loss": 0.6967, "step": 17798 }, { "epoch": 1.205976014635138, "grad_norm": 5.125663757324219, "learning_rate": 7.666096242042576e-05, "loss": 0.5005, "step": 17799 }, { "epoch": 1.20604376990311, "grad_norm": 6.7034687995910645, "learning_rate": 7.665959340132795e-05, "loss": 0.8659, "step": 17800 }, { "epoch": 1.206111525171082, "grad_norm": 5.710980415344238, "learning_rate": 7.665822438223013e-05, "loss": 0.6413, "step": 17801 }, { "epoch": 1.206179280439054, "grad_norm": 4.896183967590332, "learning_rate": 7.665685536313231e-05, "loss": 0.5167, "step": 17802 }, { "epoch": 1.2062470357070263, "grad_norm": 6.997365474700928, "learning_rate": 7.665548634403449e-05, "loss": 0.743, "step": 17803 }, { "epoch": 1.2063147909749983, "grad_norm": 5.428713798522949, "learning_rate": 7.665411732493669e-05, "loss": 0.7474, "step": 17804 }, { "epoch": 1.2063825462429705, "grad_norm": 4.988559722900391, "learning_rate": 7.665274830583887e-05, "loss": 0.5755, "step": 17805 }, { "epoch": 1.2064503015109425, "grad_norm": 5.562001705169678, "learning_rate": 7.665137928674105e-05, "loss": 0.64, "step": 17806 }, { "epoch": 1.2065180567789144, "grad_norm": 6.794801235198975, "learning_rate": 7.665001026764324e-05, "loss": 0.6635, "step": 17807 }, { "epoch": 1.2065858120468866, "grad_norm": 7.743435382843018, "learning_rate": 7.664864124854542e-05, "loss": 0.6183, "step": 17808 }, { "epoch": 1.2066535673148588, "grad_norm": 5.286825180053711, "learning_rate": 7.66472722294476e-05, "loss": 0.7029, "step": 17809 }, { "epoch": 1.2067213225828308, "grad_norm": 3.8852932453155518, "learning_rate": 7.66459032103498e-05, "loss": 0.5791, "step": 17810 }, { "epoch": 1.2067890778508028, "grad_norm": 10.570182800292969, "learning_rate": 7.664453419125198e-05, "loss": 0.7256, "step": 17811 }, { "epoch": 1.206856833118775, "grad_norm": 6.794580936431885, "learning_rate": 7.664316517215416e-05, "loss": 0.6337, "step": 17812 }, { "epoch": 1.206924588386747, "grad_norm": 6.657121658325195, "learning_rate": 7.664179615305634e-05, "loss": 0.8513, "step": 17813 }, { "epoch": 1.2069923436547192, "grad_norm": 5.735461235046387, "learning_rate": 7.664042713395853e-05, "loss": 0.7064, "step": 17814 }, { "epoch": 1.2070600989226912, "grad_norm": 5.639031887054443, "learning_rate": 7.663905811486071e-05, "loss": 0.7369, "step": 17815 }, { "epoch": 1.2071278541906634, "grad_norm": 7.826430797576904, "learning_rate": 7.663768909576289e-05, "loss": 0.6114, "step": 17816 }, { "epoch": 1.2071956094586354, "grad_norm": 5.544642448425293, "learning_rate": 7.663632007666507e-05, "loss": 0.543, "step": 17817 }, { "epoch": 1.2072633647266076, "grad_norm": 6.61638069152832, "learning_rate": 7.663495105756727e-05, "loss": 0.6926, "step": 17818 }, { "epoch": 1.2073311199945795, "grad_norm": 4.663098335266113, "learning_rate": 7.663358203846945e-05, "loss": 0.7931, "step": 17819 }, { "epoch": 1.2073988752625517, "grad_norm": 6.7196478843688965, "learning_rate": 7.663221301937163e-05, "loss": 0.8295, "step": 17820 }, { "epoch": 1.2074666305305237, "grad_norm": 5.3182196617126465, "learning_rate": 7.663084400027381e-05, "loss": 0.624, "step": 17821 }, { "epoch": 1.207534385798496, "grad_norm": 5.531083106994629, "learning_rate": 7.662947498117599e-05, "loss": 0.4976, "step": 17822 }, { "epoch": 1.207602141066468, "grad_norm": 5.298411846160889, "learning_rate": 7.662810596207818e-05, "loss": 0.6012, "step": 17823 }, { "epoch": 1.20766989633444, "grad_norm": 4.497094631195068, "learning_rate": 7.662673694298036e-05, "loss": 0.6826, "step": 17824 }, { "epoch": 1.207737651602412, "grad_norm": 5.269436836242676, "learning_rate": 7.662536792388254e-05, "loss": 0.6327, "step": 17825 }, { "epoch": 1.207805406870384, "grad_norm": 5.4644646644592285, "learning_rate": 7.662399890478472e-05, "loss": 0.71, "step": 17826 }, { "epoch": 1.2078731621383563, "grad_norm": 5.628913879394531, "learning_rate": 7.662262988568692e-05, "loss": 0.7919, "step": 17827 }, { "epoch": 1.2079409174063283, "grad_norm": 5.46560525894165, "learning_rate": 7.66212608665891e-05, "loss": 0.8439, "step": 17828 }, { "epoch": 1.2080086726743005, "grad_norm": 5.631601810455322, "learning_rate": 7.661989184749128e-05, "loss": 0.6336, "step": 17829 }, { "epoch": 1.2080764279422724, "grad_norm": 5.358332633972168, "learning_rate": 7.661852282839346e-05, "loss": 0.9302, "step": 17830 }, { "epoch": 1.2081441832102446, "grad_norm": 6.63771915435791, "learning_rate": 7.661715380929564e-05, "loss": 0.7401, "step": 17831 }, { "epoch": 1.2082119384782166, "grad_norm": 6.354868412017822, "learning_rate": 7.661578479019783e-05, "loss": 0.7275, "step": 17832 }, { "epoch": 1.2082796937461888, "grad_norm": 7.358850479125977, "learning_rate": 7.661441577110001e-05, "loss": 0.7485, "step": 17833 }, { "epoch": 1.2083474490141608, "grad_norm": 5.550168991088867, "learning_rate": 7.661304675200219e-05, "loss": 0.7116, "step": 17834 }, { "epoch": 1.208415204282133, "grad_norm": 6.618070125579834, "learning_rate": 7.661167773290437e-05, "loss": 0.6558, "step": 17835 }, { "epoch": 1.208482959550105, "grad_norm": 7.177542209625244, "learning_rate": 7.661030871380657e-05, "loss": 0.6931, "step": 17836 }, { "epoch": 1.2085507148180772, "grad_norm": 5.309508800506592, "learning_rate": 7.660893969470875e-05, "loss": 0.6781, "step": 17837 }, { "epoch": 1.2086184700860492, "grad_norm": 5.462770462036133, "learning_rate": 7.660757067561093e-05, "loss": 0.6715, "step": 17838 }, { "epoch": 1.2086862253540214, "grad_norm": 5.621263027191162, "learning_rate": 7.660620165651311e-05, "loss": 0.6802, "step": 17839 }, { "epoch": 1.2087539806219934, "grad_norm": 6.031620979309082, "learning_rate": 7.660483263741529e-05, "loss": 0.708, "step": 17840 }, { "epoch": 1.2088217358899653, "grad_norm": 5.407814979553223, "learning_rate": 7.660346361831748e-05, "loss": 0.6426, "step": 17841 }, { "epoch": 1.2088894911579375, "grad_norm": 5.029693603515625, "learning_rate": 7.660209459921966e-05, "loss": 0.7588, "step": 17842 }, { "epoch": 1.2089572464259097, "grad_norm": 5.002652645111084, "learning_rate": 7.660072558012184e-05, "loss": 0.6632, "step": 17843 }, { "epoch": 1.2090250016938817, "grad_norm": 5.994513988494873, "learning_rate": 7.659935656102402e-05, "loss": 0.8487, "step": 17844 }, { "epoch": 1.2090927569618537, "grad_norm": 6.450139045715332, "learning_rate": 7.65979875419262e-05, "loss": 0.5194, "step": 17845 }, { "epoch": 1.209160512229826, "grad_norm": 6.206781387329102, "learning_rate": 7.65966185228284e-05, "loss": 0.6804, "step": 17846 }, { "epoch": 1.2092282674977979, "grad_norm": 6.784191608428955, "learning_rate": 7.659524950373058e-05, "loss": 0.8282, "step": 17847 }, { "epoch": 1.20929602276577, "grad_norm": 7.1671671867370605, "learning_rate": 7.659388048463276e-05, "loss": 0.8137, "step": 17848 }, { "epoch": 1.209363778033742, "grad_norm": 5.292774200439453, "learning_rate": 7.659251146553494e-05, "loss": 0.5384, "step": 17849 }, { "epoch": 1.2094315333017143, "grad_norm": 6.33740758895874, "learning_rate": 7.659114244643713e-05, "loss": 0.6687, "step": 17850 }, { "epoch": 1.2094992885696862, "grad_norm": 8.038208961486816, "learning_rate": 7.658977342733931e-05, "loss": 0.8412, "step": 17851 }, { "epoch": 1.2095670438376585, "grad_norm": 5.751485347747803, "learning_rate": 7.658840440824149e-05, "loss": 0.845, "step": 17852 }, { "epoch": 1.2096347991056304, "grad_norm": 5.004260540008545, "learning_rate": 7.658703538914369e-05, "loss": 0.594, "step": 17853 }, { "epoch": 1.2097025543736026, "grad_norm": 6.479980945587158, "learning_rate": 7.658566637004587e-05, "loss": 0.7178, "step": 17854 }, { "epoch": 1.2097703096415746, "grad_norm": 5.524226665496826, "learning_rate": 7.658429735094805e-05, "loss": 0.5855, "step": 17855 }, { "epoch": 1.2098380649095466, "grad_norm": 6.297536849975586, "learning_rate": 7.658292833185024e-05, "loss": 0.5762, "step": 17856 }, { "epoch": 1.2099058201775188, "grad_norm": 5.452970504760742, "learning_rate": 7.658155931275242e-05, "loss": 0.6021, "step": 17857 }, { "epoch": 1.209973575445491, "grad_norm": 6.164029121398926, "learning_rate": 7.65801902936546e-05, "loss": 0.834, "step": 17858 }, { "epoch": 1.210041330713463, "grad_norm": 6.116921901702881, "learning_rate": 7.65788212745568e-05, "loss": 0.6033, "step": 17859 }, { "epoch": 1.210109085981435, "grad_norm": 4.469160556793213, "learning_rate": 7.657745225545898e-05, "loss": 0.8679, "step": 17860 }, { "epoch": 1.2101768412494072, "grad_norm": 6.041546821594238, "learning_rate": 7.657608323636116e-05, "loss": 0.5729, "step": 17861 }, { "epoch": 1.2102445965173791, "grad_norm": 5.222907066345215, "learning_rate": 7.657471421726334e-05, "loss": 0.596, "step": 17862 }, { "epoch": 1.2103123517853513, "grad_norm": 6.178247451782227, "learning_rate": 7.657334519816552e-05, "loss": 0.571, "step": 17863 }, { "epoch": 1.2103801070533233, "grad_norm": 5.390204429626465, "learning_rate": 7.657197617906771e-05, "loss": 0.8544, "step": 17864 }, { "epoch": 1.2104478623212955, "grad_norm": 5.556325435638428, "learning_rate": 7.657060715996989e-05, "loss": 0.5408, "step": 17865 }, { "epoch": 1.2105156175892675, "grad_norm": 6.003518581390381, "learning_rate": 7.656923814087207e-05, "loss": 0.9889, "step": 17866 }, { "epoch": 1.2105833728572397, "grad_norm": 8.722765922546387, "learning_rate": 7.656786912177425e-05, "loss": 0.5056, "step": 17867 }, { "epoch": 1.2106511281252117, "grad_norm": 5.819286823272705, "learning_rate": 7.656650010267643e-05, "loss": 0.6311, "step": 17868 }, { "epoch": 1.210718883393184, "grad_norm": 5.321593761444092, "learning_rate": 7.656513108357863e-05, "loss": 0.5492, "step": 17869 }, { "epoch": 1.2107866386611559, "grad_norm": 5.966612339019775, "learning_rate": 7.65637620644808e-05, "loss": 0.801, "step": 17870 }, { "epoch": 1.210854393929128, "grad_norm": 5.289181232452393, "learning_rate": 7.656239304538299e-05, "loss": 0.6989, "step": 17871 }, { "epoch": 1.2109221491971, "grad_norm": 5.957951545715332, "learning_rate": 7.656102402628517e-05, "loss": 0.7726, "step": 17872 }, { "epoch": 1.2109899044650723, "grad_norm": 6.526004314422607, "learning_rate": 7.655965500718736e-05, "loss": 0.8481, "step": 17873 }, { "epoch": 1.2110576597330442, "grad_norm": 8.721756935119629, "learning_rate": 7.655828598808954e-05, "loss": 0.681, "step": 17874 }, { "epoch": 1.2111254150010162, "grad_norm": 5.119085311889648, "learning_rate": 7.655691696899172e-05, "loss": 0.3988, "step": 17875 }, { "epoch": 1.2111931702689884, "grad_norm": 4.622032165527344, "learning_rate": 7.65555479498939e-05, "loss": 0.7325, "step": 17876 }, { "epoch": 1.2112609255369604, "grad_norm": 5.131158828735352, "learning_rate": 7.655417893079608e-05, "loss": 0.58, "step": 17877 }, { "epoch": 1.2113286808049326, "grad_norm": 6.189968585968018, "learning_rate": 7.655280991169828e-05, "loss": 0.9961, "step": 17878 }, { "epoch": 1.2113964360729046, "grad_norm": 5.269293785095215, "learning_rate": 7.655144089260046e-05, "loss": 0.705, "step": 17879 }, { "epoch": 1.2114641913408768, "grad_norm": 4.969742774963379, "learning_rate": 7.655007187350264e-05, "loss": 0.8844, "step": 17880 }, { "epoch": 1.2115319466088488, "grad_norm": 6.767873287200928, "learning_rate": 7.654870285440482e-05, "loss": 1.0077, "step": 17881 }, { "epoch": 1.211599701876821, "grad_norm": 8.03074836730957, "learning_rate": 7.654733383530701e-05, "loss": 0.9055, "step": 17882 }, { "epoch": 1.211667457144793, "grad_norm": 7.549478054046631, "learning_rate": 7.654596481620919e-05, "loss": 0.8107, "step": 17883 }, { "epoch": 1.2117352124127652, "grad_norm": 7.665398597717285, "learning_rate": 7.654459579711137e-05, "loss": 0.5934, "step": 17884 }, { "epoch": 1.2118029676807371, "grad_norm": 7.51274299621582, "learning_rate": 7.654322677801355e-05, "loss": 0.6872, "step": 17885 }, { "epoch": 1.2118707229487093, "grad_norm": 6.046478748321533, "learning_rate": 7.654185775891573e-05, "loss": 0.6458, "step": 17886 }, { "epoch": 1.2119384782166813, "grad_norm": 5.589632511138916, "learning_rate": 7.654048873981793e-05, "loss": 0.7045, "step": 17887 }, { "epoch": 1.2120062334846535, "grad_norm": 7.263500690460205, "learning_rate": 7.65391197207201e-05, "loss": 0.7436, "step": 17888 }, { "epoch": 1.2120739887526255, "grad_norm": 6.018463611602783, "learning_rate": 7.653775070162229e-05, "loss": 0.7441, "step": 17889 }, { "epoch": 1.2121417440205975, "grad_norm": 11.365656852722168, "learning_rate": 7.653638168252447e-05, "loss": 0.7303, "step": 17890 }, { "epoch": 1.2122094992885697, "grad_norm": 6.8662109375, "learning_rate": 7.653501266342666e-05, "loss": 0.9822, "step": 17891 }, { "epoch": 1.212277254556542, "grad_norm": 5.432251930236816, "learning_rate": 7.653364364432884e-05, "loss": 0.718, "step": 17892 }, { "epoch": 1.2123450098245139, "grad_norm": 5.230556488037109, "learning_rate": 7.653227462523102e-05, "loss": 0.5133, "step": 17893 }, { "epoch": 1.2124127650924859, "grad_norm": 5.820934295654297, "learning_rate": 7.65309056061332e-05, "loss": 0.7827, "step": 17894 }, { "epoch": 1.212480520360458, "grad_norm": 7.665424823760986, "learning_rate": 7.652953658703538e-05, "loss": 0.871, "step": 17895 }, { "epoch": 1.21254827562843, "grad_norm": 6.874349594116211, "learning_rate": 7.652816756793758e-05, "loss": 0.7607, "step": 17896 }, { "epoch": 1.2126160308964022, "grad_norm": 4.565303802490234, "learning_rate": 7.652679854883976e-05, "loss": 0.4878, "step": 17897 }, { "epoch": 1.2126837861643742, "grad_norm": 8.851542472839355, "learning_rate": 7.652542952974194e-05, "loss": 0.7029, "step": 17898 }, { "epoch": 1.2127515414323464, "grad_norm": 5.475307464599609, "learning_rate": 7.652406051064413e-05, "loss": 0.7158, "step": 17899 }, { "epoch": 1.2128192967003184, "grad_norm": 5.020805835723877, "learning_rate": 7.652269149154631e-05, "loss": 0.607, "step": 17900 }, { "epoch": 1.2128870519682906, "grad_norm": 4.76190185546875, "learning_rate": 7.652132247244849e-05, "loss": 0.682, "step": 17901 }, { "epoch": 1.2129548072362626, "grad_norm": 5.650835037231445, "learning_rate": 7.651995345335069e-05, "loss": 0.6631, "step": 17902 }, { "epoch": 1.2130225625042348, "grad_norm": 5.15869140625, "learning_rate": 7.651858443425287e-05, "loss": 0.7104, "step": 17903 }, { "epoch": 1.2130903177722068, "grad_norm": 6.4718804359436035, "learning_rate": 7.651721541515505e-05, "loss": 0.828, "step": 17904 }, { "epoch": 1.2131580730401788, "grad_norm": 6.307829856872559, "learning_rate": 7.651584639605724e-05, "loss": 0.9308, "step": 17905 }, { "epoch": 1.213225828308151, "grad_norm": 4.885651111602783, "learning_rate": 7.651447737695942e-05, "loss": 0.8558, "step": 17906 }, { "epoch": 1.2132935835761232, "grad_norm": 7.297476768493652, "learning_rate": 7.65131083578616e-05, "loss": 0.5909, "step": 17907 }, { "epoch": 1.2133613388440951, "grad_norm": 6.080501079559326, "learning_rate": 7.651173933876378e-05, "loss": 0.7439, "step": 17908 }, { "epoch": 1.2134290941120671, "grad_norm": 6.017581939697266, "learning_rate": 7.651037031966596e-05, "loss": 0.6583, "step": 17909 }, { "epoch": 1.2134968493800393, "grad_norm": 7.0170464515686035, "learning_rate": 7.650900130056816e-05, "loss": 0.7311, "step": 17910 }, { "epoch": 1.2135646046480113, "grad_norm": 5.122740268707275, "learning_rate": 7.650763228147034e-05, "loss": 0.7785, "step": 17911 }, { "epoch": 1.2136323599159835, "grad_norm": 4.886419773101807, "learning_rate": 7.650626326237252e-05, "loss": 0.6322, "step": 17912 }, { "epoch": 1.2137001151839555, "grad_norm": 5.377499103546143, "learning_rate": 7.65048942432747e-05, "loss": 0.8079, "step": 17913 }, { "epoch": 1.2137678704519277, "grad_norm": 5.473203182220459, "learning_rate": 7.650352522417689e-05, "loss": 0.6221, "step": 17914 }, { "epoch": 1.2138356257198997, "grad_norm": 6.620959758758545, "learning_rate": 7.650215620507907e-05, "loss": 0.6327, "step": 17915 }, { "epoch": 1.2139033809878719, "grad_norm": 5.501951694488525, "learning_rate": 7.650078718598125e-05, "loss": 0.6191, "step": 17916 }, { "epoch": 1.2139711362558439, "grad_norm": 5.863165378570557, "learning_rate": 7.649941816688343e-05, "loss": 0.4766, "step": 17917 }, { "epoch": 1.214038891523816, "grad_norm": 4.651849269866943, "learning_rate": 7.649804914778561e-05, "loss": 0.8014, "step": 17918 }, { "epoch": 1.214106646791788, "grad_norm": 5.458044528961182, "learning_rate": 7.64966801286878e-05, "loss": 0.6545, "step": 17919 }, { "epoch": 1.2141744020597602, "grad_norm": 6.30871057510376, "learning_rate": 7.649531110958999e-05, "loss": 0.8052, "step": 17920 }, { "epoch": 1.2142421573277322, "grad_norm": 6.08293342590332, "learning_rate": 7.649394209049217e-05, "loss": 0.863, "step": 17921 }, { "epoch": 1.2143099125957044, "grad_norm": 4.689571857452393, "learning_rate": 7.649257307139435e-05, "loss": 0.7528, "step": 17922 }, { "epoch": 1.2143776678636764, "grad_norm": 5.698482513427734, "learning_rate": 7.649120405229653e-05, "loss": 0.7365, "step": 17923 }, { "epoch": 1.2144454231316484, "grad_norm": 6.945766925811768, "learning_rate": 7.648983503319872e-05, "loss": 0.7323, "step": 17924 }, { "epoch": 1.2145131783996206, "grad_norm": 4.9405670166015625, "learning_rate": 7.64884660141009e-05, "loss": 0.5537, "step": 17925 }, { "epoch": 1.2145809336675926, "grad_norm": 6.1244001388549805, "learning_rate": 7.648709699500308e-05, "loss": 0.9281, "step": 17926 }, { "epoch": 1.2146486889355648, "grad_norm": 5.485476970672607, "learning_rate": 7.648572797590526e-05, "loss": 0.7444, "step": 17927 }, { "epoch": 1.2147164442035367, "grad_norm": 6.439229965209961, "learning_rate": 7.648435895680746e-05, "loss": 0.6663, "step": 17928 }, { "epoch": 1.214784199471509, "grad_norm": 4.058414936065674, "learning_rate": 7.648298993770964e-05, "loss": 0.6314, "step": 17929 }, { "epoch": 1.214851954739481, "grad_norm": 4.499625205993652, "learning_rate": 7.648162091861182e-05, "loss": 0.6212, "step": 17930 }, { "epoch": 1.2149197100074531, "grad_norm": 6.286862850189209, "learning_rate": 7.6480251899514e-05, "loss": 0.795, "step": 17931 }, { "epoch": 1.2149874652754251, "grad_norm": 5.967475414276123, "learning_rate": 7.647888288041618e-05, "loss": 0.6406, "step": 17932 }, { "epoch": 1.2150552205433973, "grad_norm": 4.926750659942627, "learning_rate": 7.647751386131837e-05, "loss": 0.8561, "step": 17933 }, { "epoch": 1.2151229758113693, "grad_norm": 4.823459625244141, "learning_rate": 7.647614484222055e-05, "loss": 0.6308, "step": 17934 }, { "epoch": 1.2151907310793415, "grad_norm": 6.4650797843933105, "learning_rate": 7.647477582312273e-05, "loss": 0.9417, "step": 17935 }, { "epoch": 1.2152584863473135, "grad_norm": 6.535785675048828, "learning_rate": 7.647340680402491e-05, "loss": 0.6846, "step": 17936 }, { "epoch": 1.2153262416152857, "grad_norm": 5.6134819984436035, "learning_rate": 7.64720377849271e-05, "loss": 0.8102, "step": 17937 }, { "epoch": 1.2153939968832577, "grad_norm": 8.289133071899414, "learning_rate": 7.647066876582929e-05, "loss": 0.8256, "step": 17938 }, { "epoch": 1.2154617521512296, "grad_norm": 5.382526397705078, "learning_rate": 7.646929974673147e-05, "loss": 0.5605, "step": 17939 }, { "epoch": 1.2155295074192018, "grad_norm": 4.763079643249512, "learning_rate": 7.646793072763365e-05, "loss": 0.4803, "step": 17940 }, { "epoch": 1.215597262687174, "grad_norm": 4.224546432495117, "learning_rate": 7.646656170853583e-05, "loss": 0.4886, "step": 17941 }, { "epoch": 1.215665017955146, "grad_norm": 7.985034465789795, "learning_rate": 7.646519268943802e-05, "loss": 0.6896, "step": 17942 }, { "epoch": 1.215732773223118, "grad_norm": 7.068222522735596, "learning_rate": 7.64638236703402e-05, "loss": 0.7415, "step": 17943 }, { "epoch": 1.2158005284910902, "grad_norm": 5.369699001312256, "learning_rate": 7.646245465124238e-05, "loss": 0.7562, "step": 17944 }, { "epoch": 1.2158682837590622, "grad_norm": 5.549074172973633, "learning_rate": 7.646108563214456e-05, "loss": 0.6803, "step": 17945 }, { "epoch": 1.2159360390270344, "grad_norm": 5.730227947235107, "learning_rate": 7.645971661304676e-05, "loss": 0.5862, "step": 17946 }, { "epoch": 1.2160037942950064, "grad_norm": 5.7196478843688965, "learning_rate": 7.645834759394894e-05, "loss": 0.4924, "step": 17947 }, { "epoch": 1.2160715495629786, "grad_norm": 6.75718355178833, "learning_rate": 7.645697857485112e-05, "loss": 0.7233, "step": 17948 }, { "epoch": 1.2161393048309506, "grad_norm": 8.140740394592285, "learning_rate": 7.645560955575331e-05, "loss": 0.706, "step": 17949 }, { "epoch": 1.2162070600989228, "grad_norm": 9.06883716583252, "learning_rate": 7.645424053665549e-05, "loss": 0.5888, "step": 17950 }, { "epoch": 1.2162748153668947, "grad_norm": 6.231507301330566, "learning_rate": 7.645287151755767e-05, "loss": 0.6705, "step": 17951 }, { "epoch": 1.216342570634867, "grad_norm": 6.362677097320557, "learning_rate": 7.645150249845987e-05, "loss": 0.696, "step": 17952 }, { "epoch": 1.216410325902839, "grad_norm": 5.278102397918701, "learning_rate": 7.645013347936205e-05, "loss": 0.612, "step": 17953 }, { "epoch": 1.216478081170811, "grad_norm": 5.913932800292969, "learning_rate": 7.644876446026423e-05, "loss": 0.8063, "step": 17954 }, { "epoch": 1.2165458364387831, "grad_norm": 6.743002891540527, "learning_rate": 7.64473954411664e-05, "loss": 0.6755, "step": 17955 }, { "epoch": 1.2166135917067553, "grad_norm": 6.559525012969971, "learning_rate": 7.64460264220686e-05, "loss": 0.7889, "step": 17956 }, { "epoch": 1.2166813469747273, "grad_norm": 7.0902228355407715, "learning_rate": 7.644465740297078e-05, "loss": 0.5968, "step": 17957 }, { "epoch": 1.2167491022426993, "grad_norm": 5.9385576248168945, "learning_rate": 7.644328838387296e-05, "loss": 0.7367, "step": 17958 }, { "epoch": 1.2168168575106715, "grad_norm": 5.945615768432617, "learning_rate": 7.644191936477514e-05, "loss": 0.6871, "step": 17959 }, { "epoch": 1.2168846127786435, "grad_norm": 5.105138778686523, "learning_rate": 7.644055034567734e-05, "loss": 0.6814, "step": 17960 }, { "epoch": 1.2169523680466157, "grad_norm": 7.892396926879883, "learning_rate": 7.643918132657952e-05, "loss": 0.7212, "step": 17961 }, { "epoch": 1.2170201233145876, "grad_norm": 7.983614444732666, "learning_rate": 7.64378123074817e-05, "loss": 0.6391, "step": 17962 }, { "epoch": 1.2170878785825598, "grad_norm": 5.618093013763428, "learning_rate": 7.643644328838388e-05, "loss": 0.7687, "step": 17963 }, { "epoch": 1.2171556338505318, "grad_norm": 7.337813854217529, "learning_rate": 7.643507426928606e-05, "loss": 0.6703, "step": 17964 }, { "epoch": 1.217223389118504, "grad_norm": 4.311225891113281, "learning_rate": 7.643370525018825e-05, "loss": 0.6361, "step": 17965 }, { "epoch": 1.217291144386476, "grad_norm": 6.524477958679199, "learning_rate": 7.643233623109043e-05, "loss": 0.7418, "step": 17966 }, { "epoch": 1.2173588996544482, "grad_norm": 6.941540241241455, "learning_rate": 7.643096721199261e-05, "loss": 0.5442, "step": 17967 }, { "epoch": 1.2174266549224202, "grad_norm": 5.137349605560303, "learning_rate": 7.642959819289479e-05, "loss": 0.7523, "step": 17968 }, { "epoch": 1.2174944101903924, "grad_norm": 5.285187721252441, "learning_rate": 7.642822917379699e-05, "loss": 0.5724, "step": 17969 }, { "epoch": 1.2175621654583644, "grad_norm": 6.266340255737305, "learning_rate": 7.642686015469917e-05, "loss": 0.6546, "step": 17970 }, { "epoch": 1.2176299207263366, "grad_norm": 5.239572525024414, "learning_rate": 7.642549113560135e-05, "loss": 0.6406, "step": 17971 }, { "epoch": 1.2176976759943086, "grad_norm": 6.736250400543213, "learning_rate": 7.642412211650353e-05, "loss": 1.0865, "step": 17972 }, { "epoch": 1.2177654312622805, "grad_norm": 8.947715759277344, "learning_rate": 7.64227530974057e-05, "loss": 0.5183, "step": 17973 }, { "epoch": 1.2178331865302527, "grad_norm": 8.303337097167969, "learning_rate": 7.64213840783079e-05, "loss": 1.1634, "step": 17974 }, { "epoch": 1.2179009417982247, "grad_norm": 6.613880634307861, "learning_rate": 7.642001505921008e-05, "loss": 0.7042, "step": 17975 }, { "epoch": 1.217968697066197, "grad_norm": 6.535386085510254, "learning_rate": 7.641864604011226e-05, "loss": 0.7257, "step": 17976 }, { "epoch": 1.218036452334169, "grad_norm": 6.180886745452881, "learning_rate": 7.641727702101444e-05, "loss": 0.6752, "step": 17977 }, { "epoch": 1.218104207602141, "grad_norm": 5.949535369873047, "learning_rate": 7.641590800191662e-05, "loss": 0.7168, "step": 17978 }, { "epoch": 1.218171962870113, "grad_norm": 7.5198283195495605, "learning_rate": 7.641453898281882e-05, "loss": 0.8332, "step": 17979 }, { "epoch": 1.2182397181380853, "grad_norm": 6.259788990020752, "learning_rate": 7.6413169963721e-05, "loss": 0.7097, "step": 17980 }, { "epoch": 1.2183074734060573, "grad_norm": 5.875858306884766, "learning_rate": 7.641180094462318e-05, "loss": 0.6978, "step": 17981 }, { "epoch": 1.2183752286740295, "grad_norm": 7.679528713226318, "learning_rate": 7.641043192552536e-05, "loss": 0.6878, "step": 17982 }, { "epoch": 1.2184429839420015, "grad_norm": 5.411177635192871, "learning_rate": 7.640906290642755e-05, "loss": 0.6506, "step": 17983 }, { "epoch": 1.2185107392099737, "grad_norm": 5.739083766937256, "learning_rate": 7.640769388732973e-05, "loss": 0.689, "step": 17984 }, { "epoch": 1.2185784944779456, "grad_norm": 6.422370910644531, "learning_rate": 7.640632486823191e-05, "loss": 0.7599, "step": 17985 }, { "epoch": 1.2186462497459178, "grad_norm": 5.531982421875, "learning_rate": 7.640495584913409e-05, "loss": 0.7083, "step": 17986 }, { "epoch": 1.2187140050138898, "grad_norm": 9.015816688537598, "learning_rate": 7.640358683003627e-05, "loss": 0.8653, "step": 17987 }, { "epoch": 1.2187817602818618, "grad_norm": 7.057861804962158, "learning_rate": 7.640221781093847e-05, "loss": 0.5141, "step": 17988 }, { "epoch": 1.218849515549834, "grad_norm": 7.158889293670654, "learning_rate": 7.640084879184065e-05, "loss": 0.6141, "step": 17989 }, { "epoch": 1.2189172708178062, "grad_norm": 7.1165771484375, "learning_rate": 7.639947977274283e-05, "loss": 0.7902, "step": 17990 }, { "epoch": 1.2189850260857782, "grad_norm": 6.267188549041748, "learning_rate": 7.639811075364501e-05, "loss": 0.5597, "step": 17991 }, { "epoch": 1.2190527813537502, "grad_norm": 5.786649227142334, "learning_rate": 7.63967417345472e-05, "loss": 0.6786, "step": 17992 }, { "epoch": 1.2191205366217224, "grad_norm": 4.65104341506958, "learning_rate": 7.639537271544938e-05, "loss": 0.5413, "step": 17993 }, { "epoch": 1.2191882918896944, "grad_norm": 6.078935623168945, "learning_rate": 7.639400369635156e-05, "loss": 0.7329, "step": 17994 }, { "epoch": 1.2192560471576666, "grad_norm": 5.8307342529296875, "learning_rate": 7.639263467725376e-05, "loss": 0.7237, "step": 17995 }, { "epoch": 1.2193238024256385, "grad_norm": 5.22831392288208, "learning_rate": 7.639126565815594e-05, "loss": 0.6605, "step": 17996 }, { "epoch": 1.2193915576936107, "grad_norm": 6.974266529083252, "learning_rate": 7.638989663905812e-05, "loss": 0.7335, "step": 17997 }, { "epoch": 1.2194593129615827, "grad_norm": 5.890809059143066, "learning_rate": 7.638852761996031e-05, "loss": 0.8327, "step": 17998 }, { "epoch": 1.219527068229555, "grad_norm": 7.678559303283691, "learning_rate": 7.638715860086249e-05, "loss": 0.7564, "step": 17999 }, { "epoch": 1.219594823497527, "grad_norm": 4.4165191650390625, "learning_rate": 7.638578958176467e-05, "loss": 0.6001, "step": 18000 }, { "epoch": 1.219662578765499, "grad_norm": 7.2417731285095215, "learning_rate": 7.638442056266685e-05, "loss": 0.8368, "step": 18001 }, { "epoch": 1.219730334033471, "grad_norm": 5.695639133453369, "learning_rate": 7.638305154356905e-05, "loss": 0.7352, "step": 18002 }, { "epoch": 1.219798089301443, "grad_norm": 9.774930953979492, "learning_rate": 7.638168252447123e-05, "loss": 0.6938, "step": 18003 }, { "epoch": 1.2198658445694153, "grad_norm": 7.178581714630127, "learning_rate": 7.63803135053734e-05, "loss": 0.5617, "step": 18004 }, { "epoch": 1.2199335998373875, "grad_norm": 4.547571182250977, "learning_rate": 7.637894448627559e-05, "loss": 0.6759, "step": 18005 }, { "epoch": 1.2200013551053595, "grad_norm": 6.263281345367432, "learning_rate": 7.637757546717778e-05, "loss": 0.7855, "step": 18006 }, { "epoch": 1.2200691103733314, "grad_norm": 6.402713298797607, "learning_rate": 7.637620644807996e-05, "loss": 0.6332, "step": 18007 }, { "epoch": 1.2201368656413036, "grad_norm": 7.917328834533691, "learning_rate": 7.637483742898214e-05, "loss": 0.8464, "step": 18008 }, { "epoch": 1.2202046209092756, "grad_norm": 6.9080891609191895, "learning_rate": 7.637346840988432e-05, "loss": 0.6566, "step": 18009 }, { "epoch": 1.2202723761772478, "grad_norm": 5.2827467918396, "learning_rate": 7.63720993907865e-05, "loss": 0.8047, "step": 18010 }, { "epoch": 1.2203401314452198, "grad_norm": 8.067967414855957, "learning_rate": 7.63707303716887e-05, "loss": 0.6424, "step": 18011 }, { "epoch": 1.220407886713192, "grad_norm": 5.451672554016113, "learning_rate": 7.636936135259088e-05, "loss": 0.671, "step": 18012 }, { "epoch": 1.220475641981164, "grad_norm": 8.954378128051758, "learning_rate": 7.636799233349306e-05, "loss": 0.8216, "step": 18013 }, { "epoch": 1.2205433972491362, "grad_norm": 4.8422651290893555, "learning_rate": 7.636662331439524e-05, "loss": 0.4488, "step": 18014 }, { "epoch": 1.2206111525171082, "grad_norm": 7.705546855926514, "learning_rate": 7.636525429529743e-05, "loss": 0.7685, "step": 18015 }, { "epoch": 1.2206789077850804, "grad_norm": 7.396956920623779, "learning_rate": 7.636388527619961e-05, "loss": 0.6365, "step": 18016 }, { "epoch": 1.2207466630530523, "grad_norm": 5.974605083465576, "learning_rate": 7.636251625710179e-05, "loss": 0.5927, "step": 18017 }, { "epoch": 1.2208144183210246, "grad_norm": 5.919744968414307, "learning_rate": 7.636114723800397e-05, "loss": 0.5308, "step": 18018 }, { "epoch": 1.2208821735889965, "grad_norm": 5.136290550231934, "learning_rate": 7.635977821890615e-05, "loss": 0.5941, "step": 18019 }, { "epoch": 1.2209499288569687, "grad_norm": 4.4463210105896, "learning_rate": 7.635840919980835e-05, "loss": 0.4817, "step": 18020 }, { "epoch": 1.2210176841249407, "grad_norm": 6.176875114440918, "learning_rate": 7.635704018071053e-05, "loss": 0.6398, "step": 18021 }, { "epoch": 1.2210854393929127, "grad_norm": 5.770840167999268, "learning_rate": 7.63556711616127e-05, "loss": 0.6786, "step": 18022 }, { "epoch": 1.221153194660885, "grad_norm": 6.276705741882324, "learning_rate": 7.635430214251489e-05, "loss": 0.8613, "step": 18023 }, { "epoch": 1.2212209499288569, "grad_norm": 4.476863384246826, "learning_rate": 7.635293312341708e-05, "loss": 0.6629, "step": 18024 }, { "epoch": 1.221288705196829, "grad_norm": 6.741992950439453, "learning_rate": 7.635156410431926e-05, "loss": 0.8168, "step": 18025 }, { "epoch": 1.221356460464801, "grad_norm": 6.626770973205566, "learning_rate": 7.635019508522144e-05, "loss": 0.6608, "step": 18026 }, { "epoch": 1.2214242157327733, "grad_norm": 6.388763427734375, "learning_rate": 7.634882606612362e-05, "loss": 0.7622, "step": 18027 }, { "epoch": 1.2214919710007452, "grad_norm": 7.007261276245117, "learning_rate": 7.63474570470258e-05, "loss": 0.8817, "step": 18028 }, { "epoch": 1.2215597262687174, "grad_norm": 4.994833469390869, "learning_rate": 7.6346088027928e-05, "loss": 0.525, "step": 18029 }, { "epoch": 1.2216274815366894, "grad_norm": 5.362818717956543, "learning_rate": 7.634471900883018e-05, "loss": 0.6027, "step": 18030 }, { "epoch": 1.2216952368046616, "grad_norm": 6.882465362548828, "learning_rate": 7.634334998973236e-05, "loss": 0.476, "step": 18031 }, { "epoch": 1.2217629920726336, "grad_norm": 8.69393253326416, "learning_rate": 7.634198097063454e-05, "loss": 0.6321, "step": 18032 }, { "epoch": 1.2218307473406058, "grad_norm": 5.9786529541015625, "learning_rate": 7.634061195153672e-05, "loss": 0.7933, "step": 18033 }, { "epoch": 1.2218985026085778, "grad_norm": 5.898993968963623, "learning_rate": 7.633924293243891e-05, "loss": 0.6235, "step": 18034 }, { "epoch": 1.22196625787655, "grad_norm": 4.783423900604248, "learning_rate": 7.633787391334109e-05, "loss": 0.5224, "step": 18035 }, { "epoch": 1.222034013144522, "grad_norm": 7.560459613800049, "learning_rate": 7.633650489424327e-05, "loss": 0.8104, "step": 18036 }, { "epoch": 1.222101768412494, "grad_norm": 4.811002731323242, "learning_rate": 7.633513587514545e-05, "loss": 0.7236, "step": 18037 }, { "epoch": 1.2221695236804662, "grad_norm": 6.19532585144043, "learning_rate": 7.633376685604765e-05, "loss": 0.8387, "step": 18038 }, { "epoch": 1.2222372789484384, "grad_norm": 5.872559547424316, "learning_rate": 7.633239783694983e-05, "loss": 0.7077, "step": 18039 }, { "epoch": 1.2223050342164103, "grad_norm": 4.4188232421875, "learning_rate": 7.6331028817852e-05, "loss": 0.5792, "step": 18040 }, { "epoch": 1.2223727894843823, "grad_norm": 5.051517009735107, "learning_rate": 7.63296597987542e-05, "loss": 0.6725, "step": 18041 }, { "epoch": 1.2224405447523545, "grad_norm": 5.528559684753418, "learning_rate": 7.632829077965638e-05, "loss": 0.8271, "step": 18042 }, { "epoch": 1.2225083000203265, "grad_norm": 6.110957145690918, "learning_rate": 7.632692176055856e-05, "loss": 0.7396, "step": 18043 }, { "epoch": 1.2225760552882987, "grad_norm": 9.123190879821777, "learning_rate": 7.632555274146075e-05, "loss": 0.636, "step": 18044 }, { "epoch": 1.2226438105562707, "grad_norm": 6.206221103668213, "learning_rate": 7.632418372236294e-05, "loss": 0.645, "step": 18045 }, { "epoch": 1.222711565824243, "grad_norm": 8.248867988586426, "learning_rate": 7.632281470326512e-05, "loss": 0.7355, "step": 18046 }, { "epoch": 1.2227793210922149, "grad_norm": 5.231109619140625, "learning_rate": 7.632144568416731e-05, "loss": 0.5745, "step": 18047 }, { "epoch": 1.222847076360187, "grad_norm": 5.008121967315674, "learning_rate": 7.632007666506949e-05, "loss": 0.6544, "step": 18048 }, { "epoch": 1.222914831628159, "grad_norm": 5.843269348144531, "learning_rate": 7.631870764597167e-05, "loss": 0.6308, "step": 18049 }, { "epoch": 1.2229825868961313, "grad_norm": 4.995326042175293, "learning_rate": 7.631733862687385e-05, "loss": 0.7161, "step": 18050 }, { "epoch": 1.2230503421641032, "grad_norm": 5.701000213623047, "learning_rate": 7.631596960777603e-05, "loss": 0.6209, "step": 18051 }, { "epoch": 1.2231180974320752, "grad_norm": 5.274313926696777, "learning_rate": 7.631460058867822e-05, "loss": 0.6137, "step": 18052 }, { "epoch": 1.2231858527000474, "grad_norm": 4.914982318878174, "learning_rate": 7.63132315695804e-05, "loss": 0.7078, "step": 18053 }, { "epoch": 1.2232536079680196, "grad_norm": 4.857698917388916, "learning_rate": 7.631186255048259e-05, "loss": 0.717, "step": 18054 }, { "epoch": 1.2233213632359916, "grad_norm": 7.02639627456665, "learning_rate": 7.631049353138477e-05, "loss": 0.8204, "step": 18055 }, { "epoch": 1.2233891185039636, "grad_norm": 7.193076133728027, "learning_rate": 7.630912451228695e-05, "loss": 0.6439, "step": 18056 }, { "epoch": 1.2234568737719358, "grad_norm": 5.445677757263184, "learning_rate": 7.630775549318914e-05, "loss": 0.656, "step": 18057 }, { "epoch": 1.2235246290399078, "grad_norm": 9.708181381225586, "learning_rate": 7.630638647409132e-05, "loss": 0.5955, "step": 18058 }, { "epoch": 1.22359238430788, "grad_norm": 6.708062171936035, "learning_rate": 7.63050174549935e-05, "loss": 0.7779, "step": 18059 }, { "epoch": 1.223660139575852, "grad_norm": 4.14343786239624, "learning_rate": 7.630364843589568e-05, "loss": 0.6535, "step": 18060 }, { "epoch": 1.2237278948438242, "grad_norm": 5.566597938537598, "learning_rate": 7.630227941679787e-05, "loss": 0.6028, "step": 18061 }, { "epoch": 1.2237956501117961, "grad_norm": 4.511200904846191, "learning_rate": 7.630091039770006e-05, "loss": 0.6257, "step": 18062 }, { "epoch": 1.2238634053797683, "grad_norm": 5.272220611572266, "learning_rate": 7.629954137860224e-05, "loss": 0.7351, "step": 18063 }, { "epoch": 1.2239311606477403, "grad_norm": 5.659876823425293, "learning_rate": 7.629817235950442e-05, "loss": 0.7477, "step": 18064 }, { "epoch": 1.2239989159157125, "grad_norm": 7.069100379943848, "learning_rate": 7.62968033404066e-05, "loss": 0.73, "step": 18065 }, { "epoch": 1.2240666711836845, "grad_norm": 5.5793304443359375, "learning_rate": 7.629543432130879e-05, "loss": 0.8045, "step": 18066 }, { "epoch": 1.2241344264516567, "grad_norm": 5.751017093658447, "learning_rate": 7.629406530221097e-05, "loss": 0.6574, "step": 18067 }, { "epoch": 1.2242021817196287, "grad_norm": 4.8076701164245605, "learning_rate": 7.629269628311315e-05, "loss": 0.6907, "step": 18068 }, { "epoch": 1.224269936987601, "grad_norm": 7.757662773132324, "learning_rate": 7.629132726401533e-05, "loss": 0.7141, "step": 18069 }, { "epoch": 1.2243376922555729, "grad_norm": 5.999941349029541, "learning_rate": 7.628995824491753e-05, "loss": 0.808, "step": 18070 }, { "epoch": 1.2244054475235449, "grad_norm": 5.947088718414307, "learning_rate": 7.62885892258197e-05, "loss": 0.6211, "step": 18071 }, { "epoch": 1.224473202791517, "grad_norm": 4.921745300292969, "learning_rate": 7.628722020672189e-05, "loss": 0.7553, "step": 18072 }, { "epoch": 1.224540958059489, "grad_norm": 4.209071636199951, "learning_rate": 7.628585118762407e-05, "loss": 0.4931, "step": 18073 }, { "epoch": 1.2246087133274612, "grad_norm": 4.7868170738220215, "learning_rate": 7.628448216852625e-05, "loss": 0.5719, "step": 18074 }, { "epoch": 1.2246764685954332, "grad_norm": 5.3280158042907715, "learning_rate": 7.628311314942844e-05, "loss": 0.573, "step": 18075 }, { "epoch": 1.2247442238634054, "grad_norm": 5.4629716873168945, "learning_rate": 7.628174413033062e-05, "loss": 0.5231, "step": 18076 }, { "epoch": 1.2248119791313774, "grad_norm": 4.5787787437438965, "learning_rate": 7.62803751112328e-05, "loss": 0.5587, "step": 18077 }, { "epoch": 1.2248797343993496, "grad_norm": 4.548202037811279, "learning_rate": 7.627900609213498e-05, "loss": 0.5094, "step": 18078 }, { "epoch": 1.2249474896673216, "grad_norm": 6.067834377288818, "learning_rate": 7.627763707303716e-05, "loss": 0.6347, "step": 18079 }, { "epoch": 1.2250152449352938, "grad_norm": 6.163285255432129, "learning_rate": 7.627626805393936e-05, "loss": 0.8042, "step": 18080 }, { "epoch": 1.2250830002032658, "grad_norm": 6.52103853225708, "learning_rate": 7.627489903484154e-05, "loss": 0.7237, "step": 18081 }, { "epoch": 1.225150755471238, "grad_norm": 5.5198893547058105, "learning_rate": 7.627353001574372e-05, "loss": 0.7413, "step": 18082 }, { "epoch": 1.22521851073921, "grad_norm": 5.332125186920166, "learning_rate": 7.62721609966459e-05, "loss": 0.7945, "step": 18083 }, { "epoch": 1.2252862660071822, "grad_norm": 6.892205238342285, "learning_rate": 7.627079197754809e-05, "loss": 0.7906, "step": 18084 }, { "epoch": 1.2253540212751541, "grad_norm": 4.1680908203125, "learning_rate": 7.626942295845027e-05, "loss": 0.6408, "step": 18085 }, { "epoch": 1.2254217765431261, "grad_norm": 8.176078796386719, "learning_rate": 7.626805393935245e-05, "loss": 0.6947, "step": 18086 }, { "epoch": 1.2254895318110983, "grad_norm": 6.414756774902344, "learning_rate": 7.626668492025465e-05, "loss": 0.7691, "step": 18087 }, { "epoch": 1.2255572870790703, "grad_norm": 5.7930474281311035, "learning_rate": 7.626531590115683e-05, "loss": 0.7177, "step": 18088 }, { "epoch": 1.2256250423470425, "grad_norm": 5.2728271484375, "learning_rate": 7.6263946882059e-05, "loss": 0.6034, "step": 18089 }, { "epoch": 1.2256927976150145, "grad_norm": 5.549907684326172, "learning_rate": 7.62625778629612e-05, "loss": 0.6401, "step": 18090 }, { "epoch": 1.2257605528829867, "grad_norm": 6.938556671142578, "learning_rate": 7.626120884386338e-05, "loss": 0.5395, "step": 18091 }, { "epoch": 1.2258283081509587, "grad_norm": 5.350551128387451, "learning_rate": 7.625983982476556e-05, "loss": 0.716, "step": 18092 }, { "epoch": 1.2258960634189309, "grad_norm": 4.515890598297119, "learning_rate": 7.625847080566775e-05, "loss": 0.5628, "step": 18093 }, { "epoch": 1.2259638186869029, "grad_norm": 5.260360240936279, "learning_rate": 7.625710178656993e-05, "loss": 0.7814, "step": 18094 }, { "epoch": 1.226031573954875, "grad_norm": 4.987314224243164, "learning_rate": 7.625573276747211e-05, "loss": 0.9556, "step": 18095 }, { "epoch": 1.226099329222847, "grad_norm": 6.4795241355896, "learning_rate": 7.62543637483743e-05, "loss": 0.7433, "step": 18096 }, { "epoch": 1.2261670844908192, "grad_norm": 4.585717678070068, "learning_rate": 7.625299472927648e-05, "loss": 0.5516, "step": 18097 }, { "epoch": 1.2262348397587912, "grad_norm": 5.807083606719971, "learning_rate": 7.625162571017867e-05, "loss": 1.0551, "step": 18098 }, { "epoch": 1.2263025950267634, "grad_norm": 7.647436141967773, "learning_rate": 7.625025669108085e-05, "loss": 0.5785, "step": 18099 }, { "epoch": 1.2263703502947354, "grad_norm": 5.377509117126465, "learning_rate": 7.624888767198303e-05, "loss": 0.5938, "step": 18100 }, { "epoch": 1.2264381055627074, "grad_norm": 7.228111743927002, "learning_rate": 7.624751865288521e-05, "loss": 0.6299, "step": 18101 }, { "epoch": 1.2265058608306796, "grad_norm": 6.495123386383057, "learning_rate": 7.62461496337874e-05, "loss": 0.7236, "step": 18102 }, { "epoch": 1.2265736160986518, "grad_norm": 5.612201690673828, "learning_rate": 7.624478061468958e-05, "loss": 0.8444, "step": 18103 }, { "epoch": 1.2266413713666238, "grad_norm": 6.513780117034912, "learning_rate": 7.624341159559177e-05, "loss": 0.8117, "step": 18104 }, { "epoch": 1.2267091266345957, "grad_norm": 6.759029388427734, "learning_rate": 7.624204257649395e-05, "loss": 0.9207, "step": 18105 }, { "epoch": 1.226776881902568, "grad_norm": 5.229630470275879, "learning_rate": 7.624067355739613e-05, "loss": 0.718, "step": 18106 }, { "epoch": 1.22684463717054, "grad_norm": 5.184760570526123, "learning_rate": 7.623930453829832e-05, "loss": 0.5565, "step": 18107 }, { "epoch": 1.2269123924385121, "grad_norm": 5.323483943939209, "learning_rate": 7.62379355192005e-05, "loss": 0.5649, "step": 18108 }, { "epoch": 1.2269801477064841, "grad_norm": 5.713914394378662, "learning_rate": 7.623656650010268e-05, "loss": 1.0169, "step": 18109 }, { "epoch": 1.2270479029744563, "grad_norm": 6.060333728790283, "learning_rate": 7.623519748100486e-05, "loss": 0.5474, "step": 18110 }, { "epoch": 1.2271156582424283, "grad_norm": 7.35471248626709, "learning_rate": 7.623382846190704e-05, "loss": 0.9568, "step": 18111 }, { "epoch": 1.2271834135104005, "grad_norm": 5.508981227874756, "learning_rate": 7.623245944280923e-05, "loss": 0.8809, "step": 18112 }, { "epoch": 1.2272511687783725, "grad_norm": 6.507081031799316, "learning_rate": 7.623109042371142e-05, "loss": 0.6141, "step": 18113 }, { "epoch": 1.2273189240463447, "grad_norm": 4.551576614379883, "learning_rate": 7.62297214046136e-05, "loss": 0.5581, "step": 18114 }, { "epoch": 1.2273866793143167, "grad_norm": 5.739194393157959, "learning_rate": 7.622835238551578e-05, "loss": 0.563, "step": 18115 }, { "epoch": 1.2274544345822889, "grad_norm": 7.448077201843262, "learning_rate": 7.622698336641797e-05, "loss": 0.8644, "step": 18116 }, { "epoch": 1.2275221898502608, "grad_norm": 4.7695770263671875, "learning_rate": 7.622561434732015e-05, "loss": 0.6372, "step": 18117 }, { "epoch": 1.227589945118233, "grad_norm": 5.259091854095459, "learning_rate": 7.622424532822233e-05, "loss": 0.7068, "step": 18118 }, { "epoch": 1.227657700386205, "grad_norm": 4.840086936950684, "learning_rate": 7.622287630912451e-05, "loss": 0.668, "step": 18119 }, { "epoch": 1.227725455654177, "grad_norm": 7.2845282554626465, "learning_rate": 7.622150729002669e-05, "loss": 0.5841, "step": 18120 }, { "epoch": 1.2277932109221492, "grad_norm": 5.2438249588012695, "learning_rate": 7.622013827092889e-05, "loss": 0.5784, "step": 18121 }, { "epoch": 1.2278609661901212, "grad_norm": 5.425882816314697, "learning_rate": 7.621876925183107e-05, "loss": 0.4798, "step": 18122 }, { "epoch": 1.2279287214580934, "grad_norm": 4.423764705657959, "learning_rate": 7.621740023273325e-05, "loss": 0.5684, "step": 18123 }, { "epoch": 1.2279964767260654, "grad_norm": 6.794591903686523, "learning_rate": 7.621603121363543e-05, "loss": 0.3746, "step": 18124 }, { "epoch": 1.2280642319940376, "grad_norm": 4.884415626525879, "learning_rate": 7.621466219453762e-05, "loss": 0.6326, "step": 18125 }, { "epoch": 1.2281319872620096, "grad_norm": 4.998366832733154, "learning_rate": 7.62132931754398e-05, "loss": 0.6967, "step": 18126 }, { "epoch": 1.2281997425299818, "grad_norm": 5.243693828582764, "learning_rate": 7.621192415634198e-05, "loss": 0.8389, "step": 18127 }, { "epoch": 1.2282674977979537, "grad_norm": 5.258869171142578, "learning_rate": 7.621055513724416e-05, "loss": 0.6824, "step": 18128 }, { "epoch": 1.228335253065926, "grad_norm": 6.289614677429199, "learning_rate": 7.620918611814634e-05, "loss": 0.9113, "step": 18129 }, { "epoch": 1.228403008333898, "grad_norm": 4.787515163421631, "learning_rate": 7.620781709904854e-05, "loss": 0.6163, "step": 18130 }, { "epoch": 1.2284707636018701, "grad_norm": 6.306053161621094, "learning_rate": 7.620644807995072e-05, "loss": 0.6851, "step": 18131 }, { "epoch": 1.2285385188698421, "grad_norm": 5.287718772888184, "learning_rate": 7.62050790608529e-05, "loss": 0.4938, "step": 18132 }, { "epoch": 1.2286062741378143, "grad_norm": 7.48237943649292, "learning_rate": 7.620371004175509e-05, "loss": 0.7323, "step": 18133 }, { "epoch": 1.2286740294057863, "grad_norm": 5.759404182434082, "learning_rate": 7.620234102265727e-05, "loss": 0.6837, "step": 18134 }, { "epoch": 1.2287417846737583, "grad_norm": 6.255051136016846, "learning_rate": 7.620097200355945e-05, "loss": 0.7534, "step": 18135 }, { "epoch": 1.2288095399417305, "grad_norm": 6.460224628448486, "learning_rate": 7.619960298446164e-05, "loss": 0.813, "step": 18136 }, { "epoch": 1.2288772952097025, "grad_norm": 6.245341777801514, "learning_rate": 7.619823396536382e-05, "loss": 0.8548, "step": 18137 }, { "epoch": 1.2289450504776747, "grad_norm": 7.165752410888672, "learning_rate": 7.6196864946266e-05, "loss": 0.7247, "step": 18138 }, { "epoch": 1.2290128057456466, "grad_norm": 5.944732666015625, "learning_rate": 7.61954959271682e-05, "loss": 0.7436, "step": 18139 }, { "epoch": 1.2290805610136188, "grad_norm": 5.928808212280273, "learning_rate": 7.619412690807038e-05, "loss": 0.6332, "step": 18140 }, { "epoch": 1.2291483162815908, "grad_norm": 7.637009143829346, "learning_rate": 7.619275788897256e-05, "loss": 0.6698, "step": 18141 }, { "epoch": 1.229216071549563, "grad_norm": 6.397332668304443, "learning_rate": 7.619138886987474e-05, "loss": 0.5942, "step": 18142 }, { "epoch": 1.229283826817535, "grad_norm": 7.642723560333252, "learning_rate": 7.619001985077692e-05, "loss": 0.6061, "step": 18143 }, { "epoch": 1.2293515820855072, "grad_norm": 4.819983959197998, "learning_rate": 7.618865083167911e-05, "loss": 0.6466, "step": 18144 }, { "epoch": 1.2294193373534792, "grad_norm": 4.973183631896973, "learning_rate": 7.61872818125813e-05, "loss": 0.5931, "step": 18145 }, { "epoch": 1.2294870926214514, "grad_norm": 5.909261703491211, "learning_rate": 7.618591279348347e-05, "loss": 0.5146, "step": 18146 }, { "epoch": 1.2295548478894234, "grad_norm": 4.776844501495361, "learning_rate": 7.618454377438566e-05, "loss": 0.5519, "step": 18147 }, { "epoch": 1.2296226031573956, "grad_norm": 4.408102512359619, "learning_rate": 7.618317475528785e-05, "loss": 0.488, "step": 18148 }, { "epoch": 1.2296903584253676, "grad_norm": 4.527730464935303, "learning_rate": 7.618180573619003e-05, "loss": 0.6212, "step": 18149 }, { "epoch": 1.2297581136933395, "grad_norm": 5.151172161102295, "learning_rate": 7.618043671709221e-05, "loss": 0.751, "step": 18150 }, { "epoch": 1.2298258689613117, "grad_norm": 4.503907680511475, "learning_rate": 7.617906769799439e-05, "loss": 0.675, "step": 18151 }, { "epoch": 1.229893624229284, "grad_norm": 6.000171661376953, "learning_rate": 7.617769867889657e-05, "loss": 0.5313, "step": 18152 }, { "epoch": 1.229961379497256, "grad_norm": 5.874682903289795, "learning_rate": 7.617632965979876e-05, "loss": 0.8739, "step": 18153 }, { "epoch": 1.230029134765228, "grad_norm": 5.422181129455566, "learning_rate": 7.617496064070094e-05, "loss": 0.7197, "step": 18154 }, { "epoch": 1.2300968900332, "grad_norm": 5.414138317108154, "learning_rate": 7.617359162160313e-05, "loss": 0.6733, "step": 18155 }, { "epoch": 1.230164645301172, "grad_norm": 6.205505847930908, "learning_rate": 7.61722226025053e-05, "loss": 0.8054, "step": 18156 }, { "epoch": 1.2302324005691443, "grad_norm": 7.225019454956055, "learning_rate": 7.61708535834075e-05, "loss": 0.6045, "step": 18157 }, { "epoch": 1.2303001558371163, "grad_norm": 5.018001079559326, "learning_rate": 7.616948456430968e-05, "loss": 0.573, "step": 18158 }, { "epoch": 1.2303679111050885, "grad_norm": 5.88112735748291, "learning_rate": 7.616811554521186e-05, "loss": 0.7554, "step": 18159 }, { "epoch": 1.2304356663730605, "grad_norm": 7.625991344451904, "learning_rate": 7.616674652611404e-05, "loss": 0.9717, "step": 18160 }, { "epoch": 1.2305034216410327, "grad_norm": 5.763574123382568, "learning_rate": 7.616537750701622e-05, "loss": 0.9212, "step": 18161 }, { "epoch": 1.2305711769090046, "grad_norm": 4.682827472686768, "learning_rate": 7.616400848791841e-05, "loss": 0.4873, "step": 18162 }, { "epoch": 1.2306389321769768, "grad_norm": 5.930944442749023, "learning_rate": 7.61626394688206e-05, "loss": 0.6912, "step": 18163 }, { "epoch": 1.2307066874449488, "grad_norm": 6.0003814697265625, "learning_rate": 7.616127044972278e-05, "loss": 0.6373, "step": 18164 }, { "epoch": 1.230774442712921, "grad_norm": 6.296213626861572, "learning_rate": 7.615990143062496e-05, "loss": 0.7201, "step": 18165 }, { "epoch": 1.230842197980893, "grad_norm": 5.7189249992370605, "learning_rate": 7.615853241152714e-05, "loss": 0.7472, "step": 18166 }, { "epoch": 1.2309099532488652, "grad_norm": 6.441144943237305, "learning_rate": 7.615716339242933e-05, "loss": 0.9438, "step": 18167 }, { "epoch": 1.2309777085168372, "grad_norm": 6.301687717437744, "learning_rate": 7.615579437333151e-05, "loss": 0.7934, "step": 18168 }, { "epoch": 1.2310454637848092, "grad_norm": 5.537693023681641, "learning_rate": 7.615442535423369e-05, "loss": 0.7587, "step": 18169 }, { "epoch": 1.2311132190527814, "grad_norm": 9.502969741821289, "learning_rate": 7.615305633513587e-05, "loss": 0.6527, "step": 18170 }, { "epoch": 1.2311809743207534, "grad_norm": 5.102190971374512, "learning_rate": 7.615168731603806e-05, "loss": 0.6343, "step": 18171 }, { "epoch": 1.2312487295887256, "grad_norm": 4.21937370300293, "learning_rate": 7.615031829694025e-05, "loss": 0.4586, "step": 18172 }, { "epoch": 1.2313164848566975, "grad_norm": 6.393093585968018, "learning_rate": 7.614894927784243e-05, "loss": 0.7663, "step": 18173 }, { "epoch": 1.2313842401246697, "grad_norm": 4.278507232666016, "learning_rate": 7.61475802587446e-05, "loss": 0.5946, "step": 18174 }, { "epoch": 1.2314519953926417, "grad_norm": 7.228772163391113, "learning_rate": 7.614621123964679e-05, "loss": 0.8254, "step": 18175 }, { "epoch": 1.231519750660614, "grad_norm": 7.127139091491699, "learning_rate": 7.614484222054898e-05, "loss": 0.9439, "step": 18176 }, { "epoch": 1.231587505928586, "grad_norm": 5.133269786834717, "learning_rate": 7.614347320145116e-05, "loss": 0.5107, "step": 18177 }, { "epoch": 1.231655261196558, "grad_norm": 7.366678714752197, "learning_rate": 7.614210418235334e-05, "loss": 0.7128, "step": 18178 }, { "epoch": 1.23172301646453, "grad_norm": 5.290058135986328, "learning_rate": 7.614073516325552e-05, "loss": 0.7522, "step": 18179 }, { "epoch": 1.2317907717325023, "grad_norm": 8.452763557434082, "learning_rate": 7.613936614415771e-05, "loss": 0.6214, "step": 18180 }, { "epoch": 1.2318585270004743, "grad_norm": 5.031332492828369, "learning_rate": 7.61379971250599e-05, "loss": 0.6009, "step": 18181 }, { "epoch": 1.2319262822684465, "grad_norm": 4.337656497955322, "learning_rate": 7.613662810596208e-05, "loss": 0.4222, "step": 18182 }, { "epoch": 1.2319940375364185, "grad_norm": 7.403090000152588, "learning_rate": 7.613525908686427e-05, "loss": 0.6843, "step": 18183 }, { "epoch": 1.2320617928043904, "grad_norm": 5.537214279174805, "learning_rate": 7.613389006776645e-05, "loss": 0.6674, "step": 18184 }, { "epoch": 1.2321295480723626, "grad_norm": 5.237438201904297, "learning_rate": 7.613252104866864e-05, "loss": 0.7471, "step": 18185 }, { "epoch": 1.2321973033403346, "grad_norm": 4.745239734649658, "learning_rate": 7.613115202957082e-05, "loss": 0.5399, "step": 18186 }, { "epoch": 1.2322650586083068, "grad_norm": 5.67287015914917, "learning_rate": 7.6129783010473e-05, "loss": 0.6627, "step": 18187 }, { "epoch": 1.2323328138762788, "grad_norm": 6.220743179321289, "learning_rate": 7.612841399137518e-05, "loss": 0.6627, "step": 18188 }, { "epoch": 1.232400569144251, "grad_norm": 7.604142189025879, "learning_rate": 7.612704497227737e-05, "loss": 0.6789, "step": 18189 }, { "epoch": 1.232468324412223, "grad_norm": 5.372503280639648, "learning_rate": 7.612567595317956e-05, "loss": 0.7404, "step": 18190 }, { "epoch": 1.2325360796801952, "grad_norm": 5.212665557861328, "learning_rate": 7.612430693408174e-05, "loss": 0.6296, "step": 18191 }, { "epoch": 1.2326038349481672, "grad_norm": 7.466536521911621, "learning_rate": 7.612293791498392e-05, "loss": 0.7127, "step": 18192 }, { "epoch": 1.2326715902161394, "grad_norm": 6.254476070404053, "learning_rate": 7.61215688958861e-05, "loss": 0.849, "step": 18193 }, { "epoch": 1.2327393454841113, "grad_norm": 7.161530494689941, "learning_rate": 7.61201998767883e-05, "loss": 0.4574, "step": 18194 }, { "epoch": 1.2328071007520836, "grad_norm": 7.048676013946533, "learning_rate": 7.611883085769047e-05, "loss": 0.537, "step": 18195 }, { "epoch": 1.2328748560200555, "grad_norm": 4.868967533111572, "learning_rate": 7.611746183859265e-05, "loss": 0.6313, "step": 18196 }, { "epoch": 1.2329426112880277, "grad_norm": 4.794018745422363, "learning_rate": 7.611609281949483e-05, "loss": 0.6326, "step": 18197 }, { "epoch": 1.2330103665559997, "grad_norm": 7.3240556716918945, "learning_rate": 7.611472380039702e-05, "loss": 0.6995, "step": 18198 }, { "epoch": 1.2330781218239717, "grad_norm": 6.228128910064697, "learning_rate": 7.611335478129921e-05, "loss": 0.5818, "step": 18199 }, { "epoch": 1.233145877091944, "grad_norm": 9.069717407226562, "learning_rate": 7.611198576220139e-05, "loss": 0.718, "step": 18200 }, { "epoch": 1.233213632359916, "grad_norm": 5.857358932495117, "learning_rate": 7.611061674310357e-05, "loss": 0.7181, "step": 18201 }, { "epoch": 1.233281387627888, "grad_norm": 9.045475959777832, "learning_rate": 7.610924772400575e-05, "loss": 0.7415, "step": 18202 }, { "epoch": 1.23334914289586, "grad_norm": 6.362603664398193, "learning_rate": 7.610787870490794e-05, "loss": 0.6241, "step": 18203 }, { "epoch": 1.2334168981638323, "grad_norm": 7.243282318115234, "learning_rate": 7.610650968581012e-05, "loss": 0.7114, "step": 18204 }, { "epoch": 1.2334846534318042, "grad_norm": 5.471545219421387, "learning_rate": 7.61051406667123e-05, "loss": 0.8035, "step": 18205 }, { "epoch": 1.2335524086997764, "grad_norm": 6.288739204406738, "learning_rate": 7.610377164761449e-05, "loss": 0.7507, "step": 18206 }, { "epoch": 1.2336201639677484, "grad_norm": 6.829304218292236, "learning_rate": 7.610240262851667e-05, "loss": 0.7863, "step": 18207 }, { "epoch": 1.2336879192357206, "grad_norm": 5.105554580688477, "learning_rate": 7.610103360941886e-05, "loss": 0.7707, "step": 18208 }, { "epoch": 1.2337556745036926, "grad_norm": 7.09926176071167, "learning_rate": 7.609966459032104e-05, "loss": 0.8251, "step": 18209 }, { "epoch": 1.2338234297716648, "grad_norm": 4.862083911895752, "learning_rate": 7.609829557122322e-05, "loss": 0.5155, "step": 18210 }, { "epoch": 1.2338911850396368, "grad_norm": 5.866125583648682, "learning_rate": 7.60969265521254e-05, "loss": 0.6442, "step": 18211 }, { "epoch": 1.233958940307609, "grad_norm": 6.490697860717773, "learning_rate": 7.609555753302758e-05, "loss": 0.7055, "step": 18212 }, { "epoch": 1.234026695575581, "grad_norm": 6.739508152008057, "learning_rate": 7.609418851392977e-05, "loss": 0.8067, "step": 18213 }, { "epoch": 1.2340944508435532, "grad_norm": 7.74035120010376, "learning_rate": 7.609281949483195e-05, "loss": 0.8372, "step": 18214 }, { "epoch": 1.2341622061115252, "grad_norm": 6.445488929748535, "learning_rate": 7.609145047573414e-05, "loss": 0.6183, "step": 18215 }, { "epoch": 1.2342299613794974, "grad_norm": 6.993857383728027, "learning_rate": 7.609008145663632e-05, "loss": 0.6401, "step": 18216 }, { "epoch": 1.2342977166474693, "grad_norm": 5.35559606552124, "learning_rate": 7.608871243753851e-05, "loss": 0.5708, "step": 18217 }, { "epoch": 1.2343654719154413, "grad_norm": 8.084784507751465, "learning_rate": 7.608734341844069e-05, "loss": 0.5623, "step": 18218 }, { "epoch": 1.2344332271834135, "grad_norm": 7.187109470367432, "learning_rate": 7.608597439934287e-05, "loss": 0.7409, "step": 18219 }, { "epoch": 1.2345009824513855, "grad_norm": 6.2048563957214355, "learning_rate": 7.608460538024505e-05, "loss": 0.7278, "step": 18220 }, { "epoch": 1.2345687377193577, "grad_norm": 8.183573722839355, "learning_rate": 7.608323636114723e-05, "loss": 0.8511, "step": 18221 }, { "epoch": 1.2346364929873297, "grad_norm": 8.200891494750977, "learning_rate": 7.608186734204942e-05, "loss": 1.0603, "step": 18222 }, { "epoch": 1.234704248255302, "grad_norm": 4.761917591094971, "learning_rate": 7.60804983229516e-05, "loss": 0.7157, "step": 18223 }, { "epoch": 1.2347720035232739, "grad_norm": 8.363795280456543, "learning_rate": 7.607912930385379e-05, "loss": 0.6852, "step": 18224 }, { "epoch": 1.234839758791246, "grad_norm": 5.754380702972412, "learning_rate": 7.607776028475597e-05, "loss": 0.6346, "step": 18225 }, { "epoch": 1.234907514059218, "grad_norm": 5.477109909057617, "learning_rate": 7.607639126565816e-05, "loss": 0.7659, "step": 18226 }, { "epoch": 1.2349752693271903, "grad_norm": 7.943727493286133, "learning_rate": 7.607502224656034e-05, "loss": 0.63, "step": 18227 }, { "epoch": 1.2350430245951622, "grad_norm": 4.878358364105225, "learning_rate": 7.607365322746252e-05, "loss": 0.6735, "step": 18228 }, { "epoch": 1.2351107798631344, "grad_norm": 5.543867111206055, "learning_rate": 7.607228420836471e-05, "loss": 0.689, "step": 18229 }, { "epoch": 1.2351785351311064, "grad_norm": 5.050788402557373, "learning_rate": 7.60709151892669e-05, "loss": 0.7141, "step": 18230 }, { "epoch": 1.2352462903990786, "grad_norm": 11.664437294006348, "learning_rate": 7.606954617016907e-05, "loss": 0.5473, "step": 18231 }, { "epoch": 1.2353140456670506, "grad_norm": 4.634623050689697, "learning_rate": 7.606817715107127e-05, "loss": 0.7636, "step": 18232 }, { "epoch": 1.2353818009350226, "grad_norm": 7.5991339683532715, "learning_rate": 7.606680813197345e-05, "loss": 0.6636, "step": 18233 }, { "epoch": 1.2354495562029948, "grad_norm": 5.919665336608887, "learning_rate": 7.606543911287563e-05, "loss": 0.6424, "step": 18234 }, { "epoch": 1.2355173114709668, "grad_norm": 6.050735950469971, "learning_rate": 7.606407009377782e-05, "loss": 0.7623, "step": 18235 }, { "epoch": 1.235585066738939, "grad_norm": 5.501585483551025, "learning_rate": 7.606270107468e-05, "loss": 0.5839, "step": 18236 }, { "epoch": 1.235652822006911, "grad_norm": 5.354194164276123, "learning_rate": 7.606133205558218e-05, "loss": 0.8305, "step": 18237 }, { "epoch": 1.2357205772748832, "grad_norm": 4.940706729888916, "learning_rate": 7.605996303648436e-05, "loss": 0.6149, "step": 18238 }, { "epoch": 1.2357883325428551, "grad_norm": 7.159313678741455, "learning_rate": 7.605859401738654e-05, "loss": 0.7691, "step": 18239 }, { "epoch": 1.2358560878108273, "grad_norm": 6.895895481109619, "learning_rate": 7.605722499828874e-05, "loss": 0.6597, "step": 18240 }, { "epoch": 1.2359238430787993, "grad_norm": 6.15907621383667, "learning_rate": 7.605585597919092e-05, "loss": 0.7909, "step": 18241 }, { "epoch": 1.2359915983467715, "grad_norm": 7.679660797119141, "learning_rate": 7.60544869600931e-05, "loss": 0.8247, "step": 18242 }, { "epoch": 1.2360593536147435, "grad_norm": 4.731861114501953, "learning_rate": 7.605311794099528e-05, "loss": 0.7492, "step": 18243 }, { "epoch": 1.2361271088827157, "grad_norm": 5.705376625061035, "learning_rate": 7.605174892189746e-05, "loss": 0.7343, "step": 18244 }, { "epoch": 1.2361948641506877, "grad_norm": 7.357006072998047, "learning_rate": 7.605037990279965e-05, "loss": 0.6074, "step": 18245 }, { "epoch": 1.23626261941866, "grad_norm": 7.494833469390869, "learning_rate": 7.604901088370183e-05, "loss": 0.7074, "step": 18246 }, { "epoch": 1.2363303746866319, "grad_norm": 5.229856967926025, "learning_rate": 7.604764186460401e-05, "loss": 0.7422, "step": 18247 }, { "epoch": 1.2363981299546039, "grad_norm": 4.943398475646973, "learning_rate": 7.60462728455062e-05, "loss": 0.5172, "step": 18248 }, { "epoch": 1.236465885222576, "grad_norm": 6.097332954406738, "learning_rate": 7.604490382640839e-05, "loss": 0.7787, "step": 18249 }, { "epoch": 1.2365336404905483, "grad_norm": 5.074896812438965, "learning_rate": 7.604353480731057e-05, "loss": 0.631, "step": 18250 }, { "epoch": 1.2366013957585202, "grad_norm": 6.022866725921631, "learning_rate": 7.604216578821275e-05, "loss": 0.9236, "step": 18251 }, { "epoch": 1.2366691510264922, "grad_norm": 5.703517436981201, "learning_rate": 7.604079676911493e-05, "loss": 0.66, "step": 18252 }, { "epoch": 1.2367369062944644, "grad_norm": 4.512523651123047, "learning_rate": 7.603942775001711e-05, "loss": 0.7138, "step": 18253 }, { "epoch": 1.2368046615624364, "grad_norm": 4.882974624633789, "learning_rate": 7.60380587309193e-05, "loss": 0.7239, "step": 18254 }, { "epoch": 1.2368724168304086, "grad_norm": 5.847234725952148, "learning_rate": 7.603668971182148e-05, "loss": 0.699, "step": 18255 }, { "epoch": 1.2369401720983806, "grad_norm": 7.679762840270996, "learning_rate": 7.603532069272366e-05, "loss": 0.7491, "step": 18256 }, { "epoch": 1.2370079273663528, "grad_norm": 5.321573257446289, "learning_rate": 7.603395167362585e-05, "loss": 0.4653, "step": 18257 }, { "epoch": 1.2370756826343248, "grad_norm": 5.447080612182617, "learning_rate": 7.603258265452804e-05, "loss": 0.6317, "step": 18258 }, { "epoch": 1.237143437902297, "grad_norm": 7.135063648223877, "learning_rate": 7.603121363543022e-05, "loss": 0.7021, "step": 18259 }, { "epoch": 1.237211193170269, "grad_norm": 4.839768886566162, "learning_rate": 7.60298446163324e-05, "loss": 0.5687, "step": 18260 }, { "epoch": 1.2372789484382412, "grad_norm": 7.4310078620910645, "learning_rate": 7.602847559723458e-05, "loss": 0.811, "step": 18261 }, { "epoch": 1.2373467037062131, "grad_norm": 8.49348258972168, "learning_rate": 7.602710657813676e-05, "loss": 0.8388, "step": 18262 }, { "epoch": 1.2374144589741853, "grad_norm": 7.248007297515869, "learning_rate": 7.602573755903895e-05, "loss": 0.6113, "step": 18263 }, { "epoch": 1.2374822142421573, "grad_norm": 4.8018598556518555, "learning_rate": 7.602436853994113e-05, "loss": 0.5807, "step": 18264 }, { "epoch": 1.2375499695101295, "grad_norm": 6.000351428985596, "learning_rate": 7.602299952084331e-05, "loss": 0.7009, "step": 18265 }, { "epoch": 1.2376177247781015, "grad_norm": 6.559912204742432, "learning_rate": 7.60216305017455e-05, "loss": 0.6852, "step": 18266 }, { "epoch": 1.2376854800460735, "grad_norm": 6.837792873382568, "learning_rate": 7.602026148264768e-05, "loss": 1.0492, "step": 18267 }, { "epoch": 1.2377532353140457, "grad_norm": 4.829981803894043, "learning_rate": 7.601889246354987e-05, "loss": 0.566, "step": 18268 }, { "epoch": 1.2378209905820177, "grad_norm": 7.107032299041748, "learning_rate": 7.601752344445205e-05, "loss": 0.6397, "step": 18269 }, { "epoch": 1.2378887458499899, "grad_norm": 6.705806255340576, "learning_rate": 7.601615442535423e-05, "loss": 0.7072, "step": 18270 }, { "epoch": 1.2379565011179618, "grad_norm": 7.603937149047852, "learning_rate": 7.601478540625641e-05, "loss": 0.7151, "step": 18271 }, { "epoch": 1.238024256385934, "grad_norm": 8.943765640258789, "learning_rate": 7.60134163871586e-05, "loss": 0.5127, "step": 18272 }, { "epoch": 1.238092011653906, "grad_norm": 5.192441463470459, "learning_rate": 7.601204736806078e-05, "loss": 0.5709, "step": 18273 }, { "epoch": 1.2381597669218782, "grad_norm": 7.020391941070557, "learning_rate": 7.601067834896297e-05, "loss": 0.7772, "step": 18274 }, { "epoch": 1.2382275221898502, "grad_norm": 7.921938419342041, "learning_rate": 7.600930932986516e-05, "loss": 0.5601, "step": 18275 }, { "epoch": 1.2382952774578224, "grad_norm": 5.29582405090332, "learning_rate": 7.600794031076734e-05, "loss": 0.7777, "step": 18276 }, { "epoch": 1.2383630327257944, "grad_norm": 6.641912937164307, "learning_rate": 7.600657129166952e-05, "loss": 0.9943, "step": 18277 }, { "epoch": 1.2384307879937666, "grad_norm": 5.117624282836914, "learning_rate": 7.600520227257171e-05, "loss": 0.6456, "step": 18278 }, { "epoch": 1.2384985432617386, "grad_norm": 7.091884136199951, "learning_rate": 7.60038332534739e-05, "loss": 0.7467, "step": 18279 }, { "epoch": 1.2385662985297108, "grad_norm": 7.4439005851745605, "learning_rate": 7.600246423437607e-05, "loss": 0.6892, "step": 18280 }, { "epoch": 1.2386340537976828, "grad_norm": 5.143298149108887, "learning_rate": 7.600109521527827e-05, "loss": 0.6211, "step": 18281 }, { "epoch": 1.2387018090656547, "grad_norm": 4.91877555847168, "learning_rate": 7.599972619618045e-05, "loss": 0.635, "step": 18282 }, { "epoch": 1.238769564333627, "grad_norm": 6.771152496337891, "learning_rate": 7.599835717708263e-05, "loss": 0.8141, "step": 18283 }, { "epoch": 1.238837319601599, "grad_norm": 6.854776859283447, "learning_rate": 7.599698815798481e-05, "loss": 0.7984, "step": 18284 }, { "epoch": 1.2389050748695711, "grad_norm": 5.357391357421875, "learning_rate": 7.599561913888699e-05, "loss": 0.6077, "step": 18285 }, { "epoch": 1.2389728301375431, "grad_norm": 7.865238189697266, "learning_rate": 7.599425011978918e-05, "loss": 0.8676, "step": 18286 }, { "epoch": 1.2390405854055153, "grad_norm": 6.712452411651611, "learning_rate": 7.599288110069136e-05, "loss": 0.9323, "step": 18287 }, { "epoch": 1.2391083406734873, "grad_norm": 5.430103778839111, "learning_rate": 7.599151208159354e-05, "loss": 0.5833, "step": 18288 }, { "epoch": 1.2391760959414595, "grad_norm": 5.1226959228515625, "learning_rate": 7.599014306249572e-05, "loss": 0.5945, "step": 18289 }, { "epoch": 1.2392438512094315, "grad_norm": 5.239380836486816, "learning_rate": 7.598877404339792e-05, "loss": 0.5955, "step": 18290 }, { "epoch": 1.2393116064774037, "grad_norm": 5.962562561035156, "learning_rate": 7.59874050243001e-05, "loss": 0.5801, "step": 18291 }, { "epoch": 1.2393793617453757, "grad_norm": 5.283496856689453, "learning_rate": 7.598603600520228e-05, "loss": 0.7513, "step": 18292 }, { "epoch": 1.2394471170133479, "grad_norm": 5.144606113433838, "learning_rate": 7.598466698610446e-05, "loss": 0.6134, "step": 18293 }, { "epoch": 1.2395148722813198, "grad_norm": 5.917706489562988, "learning_rate": 7.598329796700664e-05, "loss": 0.6122, "step": 18294 }, { "epoch": 1.239582627549292, "grad_norm": 6.156318187713623, "learning_rate": 7.598192894790883e-05, "loss": 0.7378, "step": 18295 }, { "epoch": 1.239650382817264, "grad_norm": 5.589748859405518, "learning_rate": 7.598055992881101e-05, "loss": 0.7661, "step": 18296 }, { "epoch": 1.239718138085236, "grad_norm": 5.590287685394287, "learning_rate": 7.59791909097132e-05, "loss": 0.6075, "step": 18297 }, { "epoch": 1.2397858933532082, "grad_norm": 5.150818824768066, "learning_rate": 7.597782189061537e-05, "loss": 0.5828, "step": 18298 }, { "epoch": 1.2398536486211804, "grad_norm": 6.356131553649902, "learning_rate": 7.597645287151755e-05, "loss": 0.735, "step": 18299 }, { "epoch": 1.2399214038891524, "grad_norm": 6.432243824005127, "learning_rate": 7.597508385241975e-05, "loss": 0.7147, "step": 18300 }, { "epoch": 1.2399891591571244, "grad_norm": 6.962221622467041, "learning_rate": 7.597371483332193e-05, "loss": 0.6691, "step": 18301 }, { "epoch": 1.2400569144250966, "grad_norm": 5.084456443786621, "learning_rate": 7.597234581422411e-05, "loss": 0.5687, "step": 18302 }, { "epoch": 1.2401246696930686, "grad_norm": 5.874925136566162, "learning_rate": 7.597097679512629e-05, "loss": 0.7533, "step": 18303 }, { "epoch": 1.2401924249610408, "grad_norm": 7.665027618408203, "learning_rate": 7.596960777602848e-05, "loss": 0.706, "step": 18304 }, { "epoch": 1.2402601802290127, "grad_norm": 6.6125030517578125, "learning_rate": 7.596823875693066e-05, "loss": 0.6882, "step": 18305 }, { "epoch": 1.240327935496985, "grad_norm": 5.991425514221191, "learning_rate": 7.596686973783284e-05, "loss": 0.6645, "step": 18306 }, { "epoch": 1.240395690764957, "grad_norm": 7.070034980773926, "learning_rate": 7.596550071873502e-05, "loss": 0.7398, "step": 18307 }, { "epoch": 1.2404634460329291, "grad_norm": 4.521208763122559, "learning_rate": 7.59641316996372e-05, "loss": 0.498, "step": 18308 }, { "epoch": 1.240531201300901, "grad_norm": 5.832149982452393, "learning_rate": 7.59627626805394e-05, "loss": 0.5592, "step": 18309 }, { "epoch": 1.2405989565688733, "grad_norm": 6.671751976013184, "learning_rate": 7.596139366144158e-05, "loss": 0.6682, "step": 18310 }, { "epoch": 1.2406667118368453, "grad_norm": 5.849062442779541, "learning_rate": 7.596002464234376e-05, "loss": 0.779, "step": 18311 }, { "epoch": 1.2407344671048175, "grad_norm": 7.465084552764893, "learning_rate": 7.595865562324594e-05, "loss": 0.5536, "step": 18312 }, { "epoch": 1.2408022223727895, "grad_norm": 5.540762901306152, "learning_rate": 7.595728660414813e-05, "loss": 0.7038, "step": 18313 }, { "epoch": 1.2408699776407617, "grad_norm": 6.382152080535889, "learning_rate": 7.595591758505031e-05, "loss": 0.7971, "step": 18314 }, { "epoch": 1.2409377329087337, "grad_norm": 4.793679237365723, "learning_rate": 7.59545485659525e-05, "loss": 0.7614, "step": 18315 }, { "epoch": 1.2410054881767056, "grad_norm": 6.767176628112793, "learning_rate": 7.595317954685467e-05, "loss": 0.7421, "step": 18316 }, { "epoch": 1.2410732434446778, "grad_norm": 6.34992790222168, "learning_rate": 7.595181052775686e-05, "loss": 0.7134, "step": 18317 }, { "epoch": 1.2411409987126498, "grad_norm": 8.232148170471191, "learning_rate": 7.595044150865905e-05, "loss": 0.6995, "step": 18318 }, { "epoch": 1.241208753980622, "grad_norm": 4.682238578796387, "learning_rate": 7.594907248956123e-05, "loss": 0.7234, "step": 18319 }, { "epoch": 1.241276509248594, "grad_norm": 6.7042555809021, "learning_rate": 7.594770347046341e-05, "loss": 0.6465, "step": 18320 }, { "epoch": 1.2413442645165662, "grad_norm": 4.933433532714844, "learning_rate": 7.59463344513656e-05, "loss": 0.5732, "step": 18321 }, { "epoch": 1.2414120197845382, "grad_norm": 6.0281171798706055, "learning_rate": 7.594496543226778e-05, "loss": 0.8858, "step": 18322 }, { "epoch": 1.2414797750525104, "grad_norm": 6.028160095214844, "learning_rate": 7.594359641316996e-05, "loss": 0.6607, "step": 18323 }, { "epoch": 1.2415475303204824, "grad_norm": 5.355819225311279, "learning_rate": 7.594222739407216e-05, "loss": 0.5502, "step": 18324 }, { "epoch": 1.2416152855884546, "grad_norm": 4.877525329589844, "learning_rate": 7.594085837497434e-05, "loss": 0.6762, "step": 18325 }, { "epoch": 1.2416830408564266, "grad_norm": 7.059868335723877, "learning_rate": 7.593948935587652e-05, "loss": 0.7334, "step": 18326 }, { "epoch": 1.2417507961243988, "grad_norm": 7.107666015625, "learning_rate": 7.593812033677871e-05, "loss": 0.7463, "step": 18327 }, { "epoch": 1.2418185513923707, "grad_norm": 4.644011497497559, "learning_rate": 7.593675131768089e-05, "loss": 0.61, "step": 18328 }, { "epoch": 1.241886306660343, "grad_norm": 4.502552509307861, "learning_rate": 7.593538229858307e-05, "loss": 0.527, "step": 18329 }, { "epoch": 1.241954061928315, "grad_norm": 5.403695583343506, "learning_rate": 7.593401327948525e-05, "loss": 0.7903, "step": 18330 }, { "epoch": 1.242021817196287, "grad_norm": 8.830596923828125, "learning_rate": 7.593264426038743e-05, "loss": 0.6086, "step": 18331 }, { "epoch": 1.242089572464259, "grad_norm": 6.801039695739746, "learning_rate": 7.593127524128963e-05, "loss": 0.583, "step": 18332 }, { "epoch": 1.242157327732231, "grad_norm": 6.03915548324585, "learning_rate": 7.592990622219181e-05, "loss": 0.8215, "step": 18333 }, { "epoch": 1.2422250830002033, "grad_norm": 7.676496505737305, "learning_rate": 7.592853720309399e-05, "loss": 0.6051, "step": 18334 }, { "epoch": 1.2422928382681753, "grad_norm": 5.050236701965332, "learning_rate": 7.592716818399617e-05, "loss": 0.5496, "step": 18335 }, { "epoch": 1.2423605935361475, "grad_norm": 6.2748212814331055, "learning_rate": 7.592579916489836e-05, "loss": 0.9785, "step": 18336 }, { "epoch": 1.2424283488041195, "grad_norm": 7.210531711578369, "learning_rate": 7.592443014580054e-05, "loss": 0.936, "step": 18337 }, { "epoch": 1.2424961040720917, "grad_norm": 5.671183109283447, "learning_rate": 7.592306112670272e-05, "loss": 0.6438, "step": 18338 }, { "epoch": 1.2425638593400636, "grad_norm": 5.4255690574646, "learning_rate": 7.59216921076049e-05, "loss": 0.614, "step": 18339 }, { "epoch": 1.2426316146080358, "grad_norm": 9.136259078979492, "learning_rate": 7.592032308850708e-05, "loss": 0.863, "step": 18340 }, { "epoch": 1.2426993698760078, "grad_norm": 9.439860343933105, "learning_rate": 7.591895406940928e-05, "loss": 0.5626, "step": 18341 }, { "epoch": 1.24276712514398, "grad_norm": 5.82816743850708, "learning_rate": 7.591758505031146e-05, "loss": 0.8564, "step": 18342 }, { "epoch": 1.242834880411952, "grad_norm": 8.167750358581543, "learning_rate": 7.591621603121364e-05, "loss": 0.5207, "step": 18343 }, { "epoch": 1.2429026356799242, "grad_norm": 6.8161940574646, "learning_rate": 7.591484701211582e-05, "loss": 0.7033, "step": 18344 }, { "epoch": 1.2429703909478962, "grad_norm": 6.037930965423584, "learning_rate": 7.591347799301801e-05, "loss": 0.9543, "step": 18345 }, { "epoch": 1.2430381462158682, "grad_norm": 6.356756687164307, "learning_rate": 7.59121089739202e-05, "loss": 0.426, "step": 18346 }, { "epoch": 1.2431059014838404, "grad_norm": 6.194242477416992, "learning_rate": 7.591073995482237e-05, "loss": 0.6365, "step": 18347 }, { "epoch": 1.2431736567518126, "grad_norm": 6.263245105743408, "learning_rate": 7.590937093572455e-05, "loss": 0.7125, "step": 18348 }, { "epoch": 1.2432414120197846, "grad_norm": 8.249999046325684, "learning_rate": 7.590800191662673e-05, "loss": 0.7486, "step": 18349 }, { "epoch": 1.2433091672877565, "grad_norm": 6.465185165405273, "learning_rate": 7.590663289752893e-05, "loss": 0.588, "step": 18350 }, { "epoch": 1.2433769225557287, "grad_norm": 6.615595817565918, "learning_rate": 7.590526387843111e-05, "loss": 0.8067, "step": 18351 }, { "epoch": 1.2434446778237007, "grad_norm": 5.744843482971191, "learning_rate": 7.590389485933329e-05, "loss": 0.7306, "step": 18352 }, { "epoch": 1.243512433091673, "grad_norm": 5.4917144775390625, "learning_rate": 7.590252584023547e-05, "loss": 0.6532, "step": 18353 }, { "epoch": 1.243580188359645, "grad_norm": 4.359947204589844, "learning_rate": 7.590115682113765e-05, "loss": 0.7198, "step": 18354 }, { "epoch": 1.243647943627617, "grad_norm": 5.287740707397461, "learning_rate": 7.589978780203984e-05, "loss": 0.6558, "step": 18355 }, { "epoch": 1.243715698895589, "grad_norm": 5.208303928375244, "learning_rate": 7.589841878294202e-05, "loss": 0.7272, "step": 18356 }, { "epoch": 1.2437834541635613, "grad_norm": 5.912323474884033, "learning_rate": 7.58970497638442e-05, "loss": 0.6226, "step": 18357 }, { "epoch": 1.2438512094315333, "grad_norm": 4.994621753692627, "learning_rate": 7.589568074474638e-05, "loss": 0.6522, "step": 18358 }, { "epoch": 1.2439189646995055, "grad_norm": 5.531447410583496, "learning_rate": 7.589431172564858e-05, "loss": 0.6913, "step": 18359 }, { "epoch": 1.2439867199674775, "grad_norm": 6.614006996154785, "learning_rate": 7.589294270655076e-05, "loss": 0.5982, "step": 18360 }, { "epoch": 1.2440544752354497, "grad_norm": 6.066467761993408, "learning_rate": 7.589157368745294e-05, "loss": 0.7662, "step": 18361 }, { "epoch": 1.2441222305034216, "grad_norm": 4.1904826164245605, "learning_rate": 7.589020466835512e-05, "loss": 0.3991, "step": 18362 }, { "epoch": 1.2441899857713938, "grad_norm": 6.30812931060791, "learning_rate": 7.58888356492573e-05, "loss": 0.6071, "step": 18363 }, { "epoch": 1.2442577410393658, "grad_norm": 4.418458461761475, "learning_rate": 7.58874666301595e-05, "loss": 0.661, "step": 18364 }, { "epoch": 1.2443254963073378, "grad_norm": 6.085812568664551, "learning_rate": 7.588609761106167e-05, "loss": 0.5891, "step": 18365 }, { "epoch": 1.24439325157531, "grad_norm": 5.265291690826416, "learning_rate": 7.588472859196385e-05, "loss": 0.6936, "step": 18366 }, { "epoch": 1.244461006843282, "grad_norm": 5.662139892578125, "learning_rate": 7.588335957286605e-05, "loss": 0.6085, "step": 18367 }, { "epoch": 1.2445287621112542, "grad_norm": 4.7674241065979, "learning_rate": 7.588199055376823e-05, "loss": 0.5873, "step": 18368 }, { "epoch": 1.2445965173792262, "grad_norm": 5.590237617492676, "learning_rate": 7.588062153467041e-05, "loss": 0.603, "step": 18369 }, { "epoch": 1.2446642726471984, "grad_norm": 5.36829137802124, "learning_rate": 7.58792525155726e-05, "loss": 0.5697, "step": 18370 }, { "epoch": 1.2447320279151703, "grad_norm": 4.962731838226318, "learning_rate": 7.587788349647478e-05, "loss": 0.5829, "step": 18371 }, { "epoch": 1.2447997831831425, "grad_norm": 5.7452802658081055, "learning_rate": 7.587651447737696e-05, "loss": 0.6695, "step": 18372 }, { "epoch": 1.2448675384511145, "grad_norm": 7.008640766143799, "learning_rate": 7.587514545827916e-05, "loss": 0.834, "step": 18373 }, { "epoch": 1.2449352937190867, "grad_norm": 9.434866905212402, "learning_rate": 7.587377643918134e-05, "loss": 0.618, "step": 18374 }, { "epoch": 1.2450030489870587, "grad_norm": 5.060585975646973, "learning_rate": 7.587240742008352e-05, "loss": 0.618, "step": 18375 }, { "epoch": 1.245070804255031, "grad_norm": 5.655799865722656, "learning_rate": 7.58710384009857e-05, "loss": 0.8284, "step": 18376 }, { "epoch": 1.245138559523003, "grad_norm": 7.925520420074463, "learning_rate": 7.586966938188788e-05, "loss": 0.7431, "step": 18377 }, { "epoch": 1.245206314790975, "grad_norm": 5.230562686920166, "learning_rate": 7.586830036279007e-05, "loss": 0.761, "step": 18378 }, { "epoch": 1.245274070058947, "grad_norm": 5.60720157623291, "learning_rate": 7.586693134369225e-05, "loss": 0.5065, "step": 18379 }, { "epoch": 1.245341825326919, "grad_norm": 6.819098472595215, "learning_rate": 7.586556232459443e-05, "loss": 0.6631, "step": 18380 }, { "epoch": 1.2454095805948913, "grad_norm": 7.610952377319336, "learning_rate": 7.586419330549661e-05, "loss": 0.8025, "step": 18381 }, { "epoch": 1.2454773358628632, "grad_norm": 5.915439605712891, "learning_rate": 7.586282428639881e-05, "loss": 0.781, "step": 18382 }, { "epoch": 1.2455450911308354, "grad_norm": 4.912126541137695, "learning_rate": 7.586145526730099e-05, "loss": 0.5188, "step": 18383 }, { "epoch": 1.2456128463988074, "grad_norm": 5.676177024841309, "learning_rate": 7.586008624820317e-05, "loss": 0.5475, "step": 18384 }, { "epoch": 1.2456806016667796, "grad_norm": 4.516726493835449, "learning_rate": 7.585871722910535e-05, "loss": 0.6143, "step": 18385 }, { "epoch": 1.2457483569347516, "grad_norm": 8.950729370117188, "learning_rate": 7.585734821000753e-05, "loss": 0.5928, "step": 18386 }, { "epoch": 1.2458161122027238, "grad_norm": 7.224480628967285, "learning_rate": 7.585597919090972e-05, "loss": 0.7095, "step": 18387 }, { "epoch": 1.2458838674706958, "grad_norm": 6.877074241638184, "learning_rate": 7.58546101718119e-05, "loss": 0.7253, "step": 18388 }, { "epoch": 1.245951622738668, "grad_norm": 7.589900970458984, "learning_rate": 7.585324115271408e-05, "loss": 0.8208, "step": 18389 }, { "epoch": 1.24601937800664, "grad_norm": 6.306529998779297, "learning_rate": 7.585187213361626e-05, "loss": 0.5556, "step": 18390 }, { "epoch": 1.2460871332746122, "grad_norm": 5.165809631347656, "learning_rate": 7.585050311451846e-05, "loss": 0.8348, "step": 18391 }, { "epoch": 1.2461548885425842, "grad_norm": 6.17510986328125, "learning_rate": 7.584913409542064e-05, "loss": 0.7496, "step": 18392 }, { "epoch": 1.2462226438105564, "grad_norm": 6.940537929534912, "learning_rate": 7.584776507632282e-05, "loss": 0.9307, "step": 18393 }, { "epoch": 1.2462903990785283, "grad_norm": 5.258039951324463, "learning_rate": 7.5846396057225e-05, "loss": 0.5341, "step": 18394 }, { "epoch": 1.2463581543465003, "grad_norm": 5.3323774337768555, "learning_rate": 7.584502703812718e-05, "loss": 0.5748, "step": 18395 }, { "epoch": 1.2464259096144725, "grad_norm": 6.254161834716797, "learning_rate": 7.584365801902937e-05, "loss": 0.7838, "step": 18396 }, { "epoch": 1.2464936648824447, "grad_norm": 6.0442214012146, "learning_rate": 7.584228899993155e-05, "loss": 0.7778, "step": 18397 }, { "epoch": 1.2465614201504167, "grad_norm": 6.557648658752441, "learning_rate": 7.584091998083373e-05, "loss": 0.7761, "step": 18398 }, { "epoch": 1.2466291754183887, "grad_norm": 6.024246692657471, "learning_rate": 7.583955096173591e-05, "loss": 0.8143, "step": 18399 }, { "epoch": 1.246696930686361, "grad_norm": 6.548332691192627, "learning_rate": 7.58381819426381e-05, "loss": 0.7228, "step": 18400 }, { "epoch": 1.2467646859543329, "grad_norm": 8.239317893981934, "learning_rate": 7.583681292354029e-05, "loss": 0.681, "step": 18401 }, { "epoch": 1.246832441222305, "grad_norm": 6.653371810913086, "learning_rate": 7.583544390444247e-05, "loss": 0.5783, "step": 18402 }, { "epoch": 1.246900196490277, "grad_norm": 5.489253044128418, "learning_rate": 7.583407488534465e-05, "loss": 0.7057, "step": 18403 }, { "epoch": 1.2469679517582493, "grad_norm": 10.217595100402832, "learning_rate": 7.583270586624683e-05, "loss": 0.821, "step": 18404 }, { "epoch": 1.2470357070262212, "grad_norm": 5.781734466552734, "learning_rate": 7.583133684714902e-05, "loss": 0.8124, "step": 18405 }, { "epoch": 1.2471034622941934, "grad_norm": 5.485482692718506, "learning_rate": 7.58299678280512e-05, "loss": 0.3988, "step": 18406 }, { "epoch": 1.2471712175621654, "grad_norm": 5.147676944732666, "learning_rate": 7.582859880895338e-05, "loss": 0.6227, "step": 18407 }, { "epoch": 1.2472389728301376, "grad_norm": 5.972175121307373, "learning_rate": 7.582722978985556e-05, "loss": 0.7966, "step": 18408 }, { "epoch": 1.2473067280981096, "grad_norm": 4.4100728034973145, "learning_rate": 7.582586077075774e-05, "loss": 0.5352, "step": 18409 }, { "epoch": 1.2473744833660818, "grad_norm": 8.67353343963623, "learning_rate": 7.582449175165994e-05, "loss": 0.7738, "step": 18410 }, { "epoch": 1.2474422386340538, "grad_norm": 7.108034133911133, "learning_rate": 7.582312273256212e-05, "loss": 0.5752, "step": 18411 }, { "epoch": 1.247509993902026, "grad_norm": 5.655974388122559, "learning_rate": 7.58217537134643e-05, "loss": 0.7622, "step": 18412 }, { "epoch": 1.247577749169998, "grad_norm": 6.0239996910095215, "learning_rate": 7.582038469436649e-05, "loss": 0.4806, "step": 18413 }, { "epoch": 1.24764550443797, "grad_norm": 7.3847527503967285, "learning_rate": 7.581901567526867e-05, "loss": 0.8353, "step": 18414 }, { "epoch": 1.2477132597059422, "grad_norm": 5.64340353012085, "learning_rate": 7.581764665617085e-05, "loss": 0.6279, "step": 18415 }, { "epoch": 1.2477810149739141, "grad_norm": 6.056209564208984, "learning_rate": 7.581627763707305e-05, "loss": 0.7538, "step": 18416 }, { "epoch": 1.2478487702418863, "grad_norm": 5.869894981384277, "learning_rate": 7.581490861797523e-05, "loss": 0.6976, "step": 18417 }, { "epoch": 1.2479165255098583, "grad_norm": 5.403618335723877, "learning_rate": 7.581353959887741e-05, "loss": 0.618, "step": 18418 }, { "epoch": 1.2479842807778305, "grad_norm": 4.804281234741211, "learning_rate": 7.58121705797796e-05, "loss": 0.4805, "step": 18419 }, { "epoch": 1.2480520360458025, "grad_norm": 5.022444725036621, "learning_rate": 7.581080156068178e-05, "loss": 0.9231, "step": 18420 }, { "epoch": 1.2481197913137747, "grad_norm": 5.900055885314941, "learning_rate": 7.580943254158396e-05, "loss": 0.6079, "step": 18421 }, { "epoch": 1.2481875465817467, "grad_norm": 5.404339790344238, "learning_rate": 7.580806352248614e-05, "loss": 0.6443, "step": 18422 }, { "epoch": 1.248255301849719, "grad_norm": 4.7197184562683105, "learning_rate": 7.580669450338834e-05, "loss": 0.6525, "step": 18423 }, { "epoch": 1.2483230571176909, "grad_norm": 6.294169902801514, "learning_rate": 7.580532548429052e-05, "loss": 0.6796, "step": 18424 }, { "epoch": 1.248390812385663, "grad_norm": 5.794606685638428, "learning_rate": 7.58039564651927e-05, "loss": 0.5467, "step": 18425 }, { "epoch": 1.248458567653635, "grad_norm": 4.591580867767334, "learning_rate": 7.580258744609488e-05, "loss": 0.7601, "step": 18426 }, { "epoch": 1.2485263229216073, "grad_norm": 4.168753147125244, "learning_rate": 7.580121842699706e-05, "loss": 0.5565, "step": 18427 }, { "epoch": 1.2485940781895792, "grad_norm": 7.54253625869751, "learning_rate": 7.579984940789925e-05, "loss": 0.6358, "step": 18428 }, { "epoch": 1.2486618334575512, "grad_norm": 4.858458518981934, "learning_rate": 7.579848038880143e-05, "loss": 0.5466, "step": 18429 }, { "epoch": 1.2487295887255234, "grad_norm": 7.536377429962158, "learning_rate": 7.579711136970361e-05, "loss": 0.6683, "step": 18430 }, { "epoch": 1.2487973439934954, "grad_norm": 6.619974613189697, "learning_rate": 7.57957423506058e-05, "loss": 0.5221, "step": 18431 }, { "epoch": 1.2488650992614676, "grad_norm": 9.248383522033691, "learning_rate": 7.579437333150797e-05, "loss": 0.5517, "step": 18432 }, { "epoch": 1.2489328545294396, "grad_norm": 7.319504261016846, "learning_rate": 7.579300431241017e-05, "loss": 0.7803, "step": 18433 }, { "epoch": 1.2490006097974118, "grad_norm": 6.124282360076904, "learning_rate": 7.579163529331235e-05, "loss": 0.6649, "step": 18434 }, { "epoch": 1.2490683650653838, "grad_norm": 4.615629196166992, "learning_rate": 7.579026627421453e-05, "loss": 0.6239, "step": 18435 }, { "epoch": 1.249136120333356, "grad_norm": 7.09864616394043, "learning_rate": 7.578889725511671e-05, "loss": 0.7135, "step": 18436 }, { "epoch": 1.249203875601328, "grad_norm": 5.639509677886963, "learning_rate": 7.57875282360189e-05, "loss": 0.8308, "step": 18437 }, { "epoch": 1.2492716308693002, "grad_norm": 5.514218807220459, "learning_rate": 7.578615921692108e-05, "loss": 0.51, "step": 18438 }, { "epoch": 1.2493393861372721, "grad_norm": 5.990070343017578, "learning_rate": 7.578479019782326e-05, "loss": 0.5471, "step": 18439 }, { "epoch": 1.2494071414052443, "grad_norm": 5.667049407958984, "learning_rate": 7.578342117872544e-05, "loss": 0.5892, "step": 18440 }, { "epoch": 1.2494748966732163, "grad_norm": 7.905735015869141, "learning_rate": 7.578205215962762e-05, "loss": 0.8953, "step": 18441 }, { "epoch": 1.2495426519411885, "grad_norm": 6.189059734344482, "learning_rate": 7.578068314052982e-05, "loss": 1.0339, "step": 18442 }, { "epoch": 1.2496104072091605, "grad_norm": 5.740088939666748, "learning_rate": 7.5779314121432e-05, "loss": 0.7114, "step": 18443 }, { "epoch": 1.2496781624771325, "grad_norm": 6.6557698249816895, "learning_rate": 7.577794510233418e-05, "loss": 0.9342, "step": 18444 }, { "epoch": 1.2497459177451047, "grad_norm": 4.974968910217285, "learning_rate": 7.577657608323636e-05, "loss": 0.736, "step": 18445 }, { "epoch": 1.2497459177451047, "eval_loss": 0.7137033939361572, "eval_noise_accuracy": 0.0, "eval_runtime": 1472.2965, "eval_samples_per_second": 3.49, "eval_steps_per_second": 0.219, "eval_wer": 67.47337717729287, "step": 18445 }, { "epoch": 1.2498136730130769, "grad_norm": 5.765774250030518, "learning_rate": 7.577520706413855e-05, "loss": 0.6947, "step": 18446 }, { "epoch": 1.2498814282810489, "grad_norm": 5.928546905517578, "learning_rate": 7.577383804504073e-05, "loss": 0.7455, "step": 18447 }, { "epoch": 1.2499491835490208, "grad_norm": 6.987960338592529, "learning_rate": 7.577246902594291e-05, "loss": 0.7321, "step": 18448 }, { "epoch": 1.250016938816993, "grad_norm": 5.092806339263916, "learning_rate": 7.57711000068451e-05, "loss": 0.5406, "step": 18449 }, { "epoch": 1.250084694084965, "grad_norm": 6.543668270111084, "learning_rate": 7.576973098774727e-05, "loss": 0.8091, "step": 18450 }, { "epoch": 1.2501524493529372, "grad_norm": 7.305642127990723, "learning_rate": 7.576836196864947e-05, "loss": 0.6787, "step": 18451 }, { "epoch": 1.2502202046209092, "grad_norm": 5.901629447937012, "learning_rate": 7.576699294955165e-05, "loss": 0.5149, "step": 18452 }, { "epoch": 1.2502879598888814, "grad_norm": 7.56804895401001, "learning_rate": 7.576562393045383e-05, "loss": 0.6372, "step": 18453 }, { "epoch": 1.2503557151568534, "grad_norm": 5.676199913024902, "learning_rate": 7.576425491135601e-05, "loss": 0.6373, "step": 18454 }, { "epoch": 1.2504234704248256, "grad_norm": 6.032554626464844, "learning_rate": 7.576288589225819e-05, "loss": 0.6024, "step": 18455 }, { "epoch": 1.2504912256927976, "grad_norm": 5.8432488441467285, "learning_rate": 7.576151687316038e-05, "loss": 0.7547, "step": 18456 }, { "epoch": 1.2505589809607698, "grad_norm": 4.738234996795654, "learning_rate": 7.576014785406256e-05, "loss": 0.7022, "step": 18457 }, { "epoch": 1.2506267362287418, "grad_norm": 6.379268169403076, "learning_rate": 7.575877883496474e-05, "loss": 0.7208, "step": 18458 }, { "epoch": 1.2506944914967137, "grad_norm": 5.782483100891113, "learning_rate": 7.575740981586692e-05, "loss": 0.8258, "step": 18459 }, { "epoch": 1.250762246764686, "grad_norm": 5.850151062011719, "learning_rate": 7.575604079676912e-05, "loss": 0.7966, "step": 18460 }, { "epoch": 1.2508300020326582, "grad_norm": 6.330451488494873, "learning_rate": 7.57546717776713e-05, "loss": 0.6553, "step": 18461 }, { "epoch": 1.2508977573006301, "grad_norm": 8.971338272094727, "learning_rate": 7.575330275857348e-05, "loss": 0.7667, "step": 18462 }, { "epoch": 1.2509655125686021, "grad_norm": 5.894313812255859, "learning_rate": 7.575193373947567e-05, "loss": 0.7369, "step": 18463 }, { "epoch": 1.2510332678365743, "grad_norm": 6.110507965087891, "learning_rate": 7.575056472037785e-05, "loss": 0.6186, "step": 18464 }, { "epoch": 1.2511010231045465, "grad_norm": 6.35048246383667, "learning_rate": 7.574919570128003e-05, "loss": 0.8074, "step": 18465 }, { "epoch": 1.2511687783725185, "grad_norm": 5.195486545562744, "learning_rate": 7.574782668218223e-05, "loss": 0.7819, "step": 18466 }, { "epoch": 1.2512365336404905, "grad_norm": 4.7817888259887695, "learning_rate": 7.574645766308441e-05, "loss": 0.7666, "step": 18467 }, { "epoch": 1.2513042889084627, "grad_norm": 6.538143157958984, "learning_rate": 7.574508864398659e-05, "loss": 0.7804, "step": 18468 }, { "epoch": 1.2513720441764347, "grad_norm": 5.508159637451172, "learning_rate": 7.574371962488878e-05, "loss": 0.6687, "step": 18469 }, { "epoch": 1.2514397994444069, "grad_norm": 4.448439121246338, "learning_rate": 7.574235060579096e-05, "loss": 0.7121, "step": 18470 }, { "epoch": 1.2515075547123788, "grad_norm": 7.669184684753418, "learning_rate": 7.574098158669314e-05, "loss": 0.5168, "step": 18471 }, { "epoch": 1.251575309980351, "grad_norm": 5.563172817230225, "learning_rate": 7.573961256759532e-05, "loss": 0.6821, "step": 18472 }, { "epoch": 1.251643065248323, "grad_norm": 5.528843879699707, "learning_rate": 7.57382435484975e-05, "loss": 0.716, "step": 18473 }, { "epoch": 1.251710820516295, "grad_norm": 6.016157150268555, "learning_rate": 7.57368745293997e-05, "loss": 0.6406, "step": 18474 }, { "epoch": 1.2517785757842672, "grad_norm": 6.181151866912842, "learning_rate": 7.573550551030188e-05, "loss": 0.6313, "step": 18475 }, { "epoch": 1.2518463310522394, "grad_norm": 5.659000396728516, "learning_rate": 7.573413649120406e-05, "loss": 0.7512, "step": 18476 }, { "epoch": 1.2519140863202114, "grad_norm": 4.974128723144531, "learning_rate": 7.573276747210624e-05, "loss": 0.6549, "step": 18477 }, { "epoch": 1.2519818415881834, "grad_norm": 6.594817638397217, "learning_rate": 7.573139845300843e-05, "loss": 0.9165, "step": 18478 }, { "epoch": 1.2520495968561556, "grad_norm": 6.796396732330322, "learning_rate": 7.573002943391061e-05, "loss": 0.7955, "step": 18479 }, { "epoch": 1.2521173521241278, "grad_norm": 4.470376968383789, "learning_rate": 7.572866041481279e-05, "loss": 0.5418, "step": 18480 }, { "epoch": 1.2521851073920998, "grad_norm": 8.240352630615234, "learning_rate": 7.572729139571497e-05, "loss": 0.594, "step": 18481 }, { "epoch": 1.2522528626600717, "grad_norm": 5.189418315887451, "learning_rate": 7.572592237661715e-05, "loss": 0.6964, "step": 18482 }, { "epoch": 1.252320617928044, "grad_norm": 5.715917110443115, "learning_rate": 7.572455335751935e-05, "loss": 0.6072, "step": 18483 }, { "epoch": 1.252388373196016, "grad_norm": 5.18189001083374, "learning_rate": 7.572318433842153e-05, "loss": 0.5861, "step": 18484 }, { "epoch": 1.2524561284639881, "grad_norm": 6.857985496520996, "learning_rate": 7.572181531932371e-05, "loss": 0.7787, "step": 18485 }, { "epoch": 1.25252388373196, "grad_norm": 5.915340900421143, "learning_rate": 7.572044630022589e-05, "loss": 0.7984, "step": 18486 }, { "epoch": 1.2525916389999323, "grad_norm": 5.554129123687744, "learning_rate": 7.571907728112807e-05, "loss": 0.6809, "step": 18487 }, { "epoch": 1.2526593942679043, "grad_norm": 5.325961589813232, "learning_rate": 7.571770826203026e-05, "loss": 0.6008, "step": 18488 }, { "epoch": 1.2527271495358765, "grad_norm": 6.199392795562744, "learning_rate": 7.571633924293244e-05, "loss": 0.6453, "step": 18489 }, { "epoch": 1.2527949048038485, "grad_norm": 7.090646266937256, "learning_rate": 7.571497022383462e-05, "loss": 0.6128, "step": 18490 }, { "epoch": 1.2528626600718207, "grad_norm": 6.836106777191162, "learning_rate": 7.57136012047368e-05, "loss": 0.6919, "step": 18491 }, { "epoch": 1.2529304153397927, "grad_norm": 8.539996147155762, "learning_rate": 7.5712232185639e-05, "loss": 0.6941, "step": 18492 }, { "epoch": 1.2529981706077646, "grad_norm": 6.330958366394043, "learning_rate": 7.571086316654118e-05, "loss": 0.5077, "step": 18493 }, { "epoch": 1.2530659258757368, "grad_norm": 5.8012237548828125, "learning_rate": 7.570949414744336e-05, "loss": 0.7444, "step": 18494 }, { "epoch": 1.253133681143709, "grad_norm": 5.817990779876709, "learning_rate": 7.570812512834554e-05, "loss": 0.6091, "step": 18495 }, { "epoch": 1.253201436411681, "grad_norm": 5.869943618774414, "learning_rate": 7.570675610924772e-05, "loss": 0.9666, "step": 18496 }, { "epoch": 1.253269191679653, "grad_norm": 5.181975841522217, "learning_rate": 7.570538709014991e-05, "loss": 0.7059, "step": 18497 }, { "epoch": 1.2533369469476252, "grad_norm": 5.238135814666748, "learning_rate": 7.570401807105209e-05, "loss": 0.7596, "step": 18498 }, { "epoch": 1.2534047022155972, "grad_norm": 7.823385715484619, "learning_rate": 7.570264905195427e-05, "loss": 0.8832, "step": 18499 }, { "epoch": 1.2534724574835694, "grad_norm": 5.989696025848389, "learning_rate": 7.570128003285645e-05, "loss": 0.7683, "step": 18500 }, { "epoch": 1.2535402127515414, "grad_norm": 6.299252510070801, "learning_rate": 7.569991101375865e-05, "loss": 0.7224, "step": 18501 }, { "epoch": 1.2536079680195136, "grad_norm": 7.141812801361084, "learning_rate": 7.569854199466083e-05, "loss": 0.8333, "step": 18502 }, { "epoch": 1.2536757232874856, "grad_norm": 6.017572402954102, "learning_rate": 7.569717297556301e-05, "loss": 0.9219, "step": 18503 }, { "epoch": 1.2537434785554578, "grad_norm": 6.288930416107178, "learning_rate": 7.569580395646519e-05, "loss": 0.6977, "step": 18504 }, { "epoch": 1.2538112338234297, "grad_norm": 5.960664749145508, "learning_rate": 7.569443493736737e-05, "loss": 0.6247, "step": 18505 }, { "epoch": 1.253878989091402, "grad_norm": 5.1620612144470215, "learning_rate": 7.569306591826956e-05, "loss": 0.7044, "step": 18506 }, { "epoch": 1.253946744359374, "grad_norm": 6.695984840393066, "learning_rate": 7.569169689917174e-05, "loss": 0.6635, "step": 18507 }, { "epoch": 1.254014499627346, "grad_norm": 6.664060592651367, "learning_rate": 7.569032788007392e-05, "loss": 0.5532, "step": 18508 }, { "epoch": 1.254082254895318, "grad_norm": 5.855518341064453, "learning_rate": 7.568895886097612e-05, "loss": 0.6359, "step": 18509 }, { "epoch": 1.2541500101632903, "grad_norm": 5.479102611541748, "learning_rate": 7.56875898418783e-05, "loss": 0.723, "step": 18510 }, { "epoch": 1.2542177654312623, "grad_norm": 4.955291748046875, "learning_rate": 7.568622082278048e-05, "loss": 0.6165, "step": 18511 }, { "epoch": 1.2542855206992343, "grad_norm": 5.175276279449463, "learning_rate": 7.568485180368267e-05, "loss": 0.8386, "step": 18512 }, { "epoch": 1.2543532759672065, "grad_norm": 5.586778163909912, "learning_rate": 7.568348278458485e-05, "loss": 0.6178, "step": 18513 }, { "epoch": 1.2544210312351785, "grad_norm": 4.956143379211426, "learning_rate": 7.568211376548703e-05, "loss": 0.6645, "step": 18514 }, { "epoch": 1.2544887865031507, "grad_norm": 4.254742622375488, "learning_rate": 7.568074474638923e-05, "loss": 0.7241, "step": 18515 }, { "epoch": 1.2545565417711226, "grad_norm": 5.6016950607299805, "learning_rate": 7.567937572729141e-05, "loss": 0.7738, "step": 18516 }, { "epoch": 1.2546242970390948, "grad_norm": 7.074273109436035, "learning_rate": 7.567800670819359e-05, "loss": 0.6681, "step": 18517 }, { "epoch": 1.2546920523070668, "grad_norm": 6.391725540161133, "learning_rate": 7.567663768909577e-05, "loss": 0.5549, "step": 18518 }, { "epoch": 1.254759807575039, "grad_norm": 6.349266529083252, "learning_rate": 7.567526866999795e-05, "loss": 0.7118, "step": 18519 }, { "epoch": 1.254827562843011, "grad_norm": 5.164762496948242, "learning_rate": 7.567389965090014e-05, "loss": 0.8626, "step": 18520 }, { "epoch": 1.2548953181109832, "grad_norm": 5.601468086242676, "learning_rate": 7.567253063180232e-05, "loss": 0.8346, "step": 18521 }, { "epoch": 1.2549630733789552, "grad_norm": 4.642623424530029, "learning_rate": 7.56711616127045e-05, "loss": 0.6299, "step": 18522 }, { "epoch": 1.2550308286469272, "grad_norm": 5.651762962341309, "learning_rate": 7.566979259360668e-05, "loss": 0.5342, "step": 18523 }, { "epoch": 1.2550985839148994, "grad_norm": 4.839752674102783, "learning_rate": 7.566842357450888e-05, "loss": 0.6587, "step": 18524 }, { "epoch": 1.2551663391828716, "grad_norm": 5.076649188995361, "learning_rate": 7.566705455541106e-05, "loss": 0.705, "step": 18525 }, { "epoch": 1.2552340944508436, "grad_norm": 5.833838939666748, "learning_rate": 7.566568553631324e-05, "loss": 0.7622, "step": 18526 }, { "epoch": 1.2553018497188155, "grad_norm": 10.792261123657227, "learning_rate": 7.566431651721542e-05, "loss": 0.6095, "step": 18527 }, { "epoch": 1.2553696049867877, "grad_norm": 4.595198631286621, "learning_rate": 7.56629474981176e-05, "loss": 0.5988, "step": 18528 }, { "epoch": 1.25543736025476, "grad_norm": 5.7172040939331055, "learning_rate": 7.566157847901979e-05, "loss": 0.6821, "step": 18529 }, { "epoch": 1.255505115522732, "grad_norm": 5.3055315017700195, "learning_rate": 7.566020945992197e-05, "loss": 0.6005, "step": 18530 }, { "epoch": 1.255572870790704, "grad_norm": 4.874014854431152, "learning_rate": 7.565884044082415e-05, "loss": 0.5495, "step": 18531 }, { "epoch": 1.255640626058676, "grad_norm": 5.3809332847595215, "learning_rate": 7.565747142172633e-05, "loss": 0.624, "step": 18532 }, { "epoch": 1.255708381326648, "grad_norm": 6.123635292053223, "learning_rate": 7.565610240262851e-05, "loss": 0.6392, "step": 18533 }, { "epoch": 1.2557761365946203, "grad_norm": 7.112510681152344, "learning_rate": 7.565473338353071e-05, "loss": 0.6779, "step": 18534 }, { "epoch": 1.2558438918625923, "grad_norm": 5.567243576049805, "learning_rate": 7.565336436443289e-05, "loss": 0.7524, "step": 18535 }, { "epoch": 1.2559116471305645, "grad_norm": 6.924431800842285, "learning_rate": 7.565199534533507e-05, "loss": 0.6554, "step": 18536 }, { "epoch": 1.2559794023985364, "grad_norm": 6.007933139801025, "learning_rate": 7.565062632623725e-05, "loss": 0.6939, "step": 18537 }, { "epoch": 1.2560471576665087, "grad_norm": 6.062107563018799, "learning_rate": 7.564925730713944e-05, "loss": 0.5502, "step": 18538 }, { "epoch": 1.2561149129344806, "grad_norm": 5.4054694175720215, "learning_rate": 7.564788828804162e-05, "loss": 0.6591, "step": 18539 }, { "epoch": 1.2561826682024528, "grad_norm": 6.372161388397217, "learning_rate": 7.56465192689438e-05, "loss": 0.8376, "step": 18540 }, { "epoch": 1.2562504234704248, "grad_norm": 4.888607978820801, "learning_rate": 7.564515024984598e-05, "loss": 0.8645, "step": 18541 }, { "epoch": 1.2563181787383968, "grad_norm": 5.889427661895752, "learning_rate": 7.564378123074816e-05, "loss": 0.6933, "step": 18542 }, { "epoch": 1.256385934006369, "grad_norm": 5.196866512298584, "learning_rate": 7.564241221165036e-05, "loss": 0.7812, "step": 18543 }, { "epoch": 1.2564536892743412, "grad_norm": 10.582948684692383, "learning_rate": 7.564104319255254e-05, "loss": 0.7324, "step": 18544 }, { "epoch": 1.2565214445423132, "grad_norm": 4.985345840454102, "learning_rate": 7.563967417345472e-05, "loss": 0.585, "step": 18545 }, { "epoch": 1.2565891998102852, "grad_norm": 5.265500545501709, "learning_rate": 7.56383051543569e-05, "loss": 0.6476, "step": 18546 }, { "epoch": 1.2566569550782574, "grad_norm": 12.165959358215332, "learning_rate": 7.563693613525909e-05, "loss": 0.613, "step": 18547 }, { "epoch": 1.2567247103462293, "grad_norm": 6.362099647521973, "learning_rate": 7.563556711616127e-05, "loss": 0.7217, "step": 18548 }, { "epoch": 1.2567924656142015, "grad_norm": 4.63362979888916, "learning_rate": 7.563419809706345e-05, "loss": 0.6154, "step": 18549 }, { "epoch": 1.2568602208821735, "grad_norm": 5.0658464431762695, "learning_rate": 7.563282907796563e-05, "loss": 0.6214, "step": 18550 }, { "epoch": 1.2569279761501457, "grad_norm": 6.6774582862854, "learning_rate": 7.563146005886781e-05, "loss": 0.674, "step": 18551 }, { "epoch": 1.2569957314181177, "grad_norm": 5.199604034423828, "learning_rate": 7.563009103977001e-05, "loss": 0.6128, "step": 18552 }, { "epoch": 1.25706348668609, "grad_norm": 8.721567153930664, "learning_rate": 7.562872202067219e-05, "loss": 0.6065, "step": 18553 }, { "epoch": 1.257131241954062, "grad_norm": 5.043855667114258, "learning_rate": 7.562735300157437e-05, "loss": 0.7786, "step": 18554 }, { "epoch": 1.257198997222034, "grad_norm": 6.66047477722168, "learning_rate": 7.562598398247656e-05, "loss": 0.7203, "step": 18555 }, { "epoch": 1.257266752490006, "grad_norm": 4.944528102874756, "learning_rate": 7.562461496337874e-05, "loss": 0.7272, "step": 18556 }, { "epoch": 1.257334507757978, "grad_norm": 5.683043479919434, "learning_rate": 7.562324594428092e-05, "loss": 0.7337, "step": 18557 }, { "epoch": 1.2574022630259503, "grad_norm": 5.674587249755859, "learning_rate": 7.562187692518312e-05, "loss": 0.598, "step": 18558 }, { "epoch": 1.2574700182939225, "grad_norm": 5.3899245262146, "learning_rate": 7.56205079060853e-05, "loss": 0.662, "step": 18559 }, { "epoch": 1.2575377735618944, "grad_norm": 6.485472679138184, "learning_rate": 7.561913888698748e-05, "loss": 0.8906, "step": 18560 }, { "epoch": 1.2576055288298664, "grad_norm": 5.379263877868652, "learning_rate": 7.561776986788967e-05, "loss": 0.7099, "step": 18561 }, { "epoch": 1.2576732840978386, "grad_norm": 4.986139297485352, "learning_rate": 7.561640084879185e-05, "loss": 0.6209, "step": 18562 }, { "epoch": 1.2577410393658106, "grad_norm": 6.471348285675049, "learning_rate": 7.561503182969403e-05, "loss": 0.7118, "step": 18563 }, { "epoch": 1.2578087946337828, "grad_norm": 5.576128005981445, "learning_rate": 7.561366281059621e-05, "loss": 0.7865, "step": 18564 }, { "epoch": 1.2578765499017548, "grad_norm": 6.393286228179932, "learning_rate": 7.561229379149839e-05, "loss": 0.6527, "step": 18565 }, { "epoch": 1.257944305169727, "grad_norm": 6.897951602935791, "learning_rate": 7.561092477240059e-05, "loss": 0.8406, "step": 18566 }, { "epoch": 1.258012060437699, "grad_norm": 6.056159019470215, "learning_rate": 7.560955575330277e-05, "loss": 0.9668, "step": 18567 }, { "epoch": 1.2580798157056712, "grad_norm": 6.5312113761901855, "learning_rate": 7.560818673420495e-05, "loss": 0.8794, "step": 18568 }, { "epoch": 1.2581475709736432, "grad_norm": 6.969417572021484, "learning_rate": 7.560681771510713e-05, "loss": 0.8021, "step": 18569 }, { "epoch": 1.2582153262416154, "grad_norm": 5.902151107788086, "learning_rate": 7.560544869600932e-05, "loss": 0.7954, "step": 18570 }, { "epoch": 1.2582830815095873, "grad_norm": 7.758078098297119, "learning_rate": 7.56040796769115e-05, "loss": 1.1007, "step": 18571 }, { "epoch": 1.2583508367775593, "grad_norm": 5.972092628479004, "learning_rate": 7.560271065781368e-05, "loss": 0.6651, "step": 18572 }, { "epoch": 1.2584185920455315, "grad_norm": 8.309666633605957, "learning_rate": 7.560134163871586e-05, "loss": 0.6527, "step": 18573 }, { "epoch": 1.2584863473135037, "grad_norm": 8.012523651123047, "learning_rate": 7.559997261961804e-05, "loss": 0.7023, "step": 18574 }, { "epoch": 1.2585541025814757, "grad_norm": 5.024482727050781, "learning_rate": 7.559860360052024e-05, "loss": 0.6819, "step": 18575 }, { "epoch": 1.2586218578494477, "grad_norm": 7.1834893226623535, "learning_rate": 7.559723458142242e-05, "loss": 0.7986, "step": 18576 }, { "epoch": 1.25868961311742, "grad_norm": 5.639023780822754, "learning_rate": 7.55958655623246e-05, "loss": 0.6736, "step": 18577 }, { "epoch": 1.258757368385392, "grad_norm": 4.606266021728516, "learning_rate": 7.559449654322678e-05, "loss": 0.586, "step": 18578 }, { "epoch": 1.258825123653364, "grad_norm": 6.475333213806152, "learning_rate": 7.559312752412897e-05, "loss": 0.6087, "step": 18579 }, { "epoch": 1.258892878921336, "grad_norm": 6.171159744262695, "learning_rate": 7.559175850503115e-05, "loss": 0.8264, "step": 18580 }, { "epoch": 1.2589606341893083, "grad_norm": 4.428532123565674, "learning_rate": 7.559038948593333e-05, "loss": 0.6508, "step": 18581 }, { "epoch": 1.2590283894572802, "grad_norm": 7.2537102699279785, "learning_rate": 7.558902046683551e-05, "loss": 0.7325, "step": 18582 }, { "epoch": 1.2590961447252524, "grad_norm": 5.073615550994873, "learning_rate": 7.558765144773769e-05, "loss": 0.5633, "step": 18583 }, { "epoch": 1.2591638999932244, "grad_norm": 9.035654067993164, "learning_rate": 7.558628242863989e-05, "loss": 0.6906, "step": 18584 }, { "epoch": 1.2592316552611966, "grad_norm": 5.055354595184326, "learning_rate": 7.558491340954207e-05, "loss": 0.472, "step": 18585 }, { "epoch": 1.2592994105291686, "grad_norm": 7.8245649337768555, "learning_rate": 7.558354439044425e-05, "loss": 0.8155, "step": 18586 }, { "epoch": 1.2593671657971408, "grad_norm": 6.202260494232178, "learning_rate": 7.558217537134643e-05, "loss": 0.9133, "step": 18587 }, { "epoch": 1.2594349210651128, "grad_norm": 4.322682857513428, "learning_rate": 7.558080635224861e-05, "loss": 0.679, "step": 18588 }, { "epoch": 1.259502676333085, "grad_norm": 5.716592788696289, "learning_rate": 7.55794373331508e-05, "loss": 0.4453, "step": 18589 }, { "epoch": 1.259570431601057, "grad_norm": 6.840198516845703, "learning_rate": 7.557806831405298e-05, "loss": 0.7131, "step": 18590 }, { "epoch": 1.259638186869029, "grad_norm": 8.745750427246094, "learning_rate": 7.557669929495516e-05, "loss": 0.5623, "step": 18591 }, { "epoch": 1.2597059421370012, "grad_norm": 8.578254699707031, "learning_rate": 7.557533027585734e-05, "loss": 0.6782, "step": 18592 }, { "epoch": 1.2597736974049734, "grad_norm": 4.5666022300720215, "learning_rate": 7.557396125675954e-05, "loss": 0.5209, "step": 18593 }, { "epoch": 1.2598414526729453, "grad_norm": 4.5428643226623535, "learning_rate": 7.557259223766172e-05, "loss": 0.4816, "step": 18594 }, { "epoch": 1.2599092079409173, "grad_norm": 7.3000898361206055, "learning_rate": 7.55712232185639e-05, "loss": 0.8839, "step": 18595 }, { "epoch": 1.2599769632088895, "grad_norm": 5.491621971130371, "learning_rate": 7.556985419946608e-05, "loss": 0.7736, "step": 18596 }, { "epoch": 1.2600447184768615, "grad_norm": 5.579415321350098, "learning_rate": 7.556848518036826e-05, "loss": 0.8968, "step": 18597 }, { "epoch": 1.2601124737448337, "grad_norm": 6.422910690307617, "learning_rate": 7.556711616127045e-05, "loss": 0.7781, "step": 18598 }, { "epoch": 1.2601802290128057, "grad_norm": 5.6319169998168945, "learning_rate": 7.556574714217263e-05, "loss": 0.8092, "step": 18599 }, { "epoch": 1.2602479842807779, "grad_norm": 5.097975730895996, "learning_rate": 7.556437812307481e-05, "loss": 0.7693, "step": 18600 }, { "epoch": 1.2603157395487499, "grad_norm": 5.839118480682373, "learning_rate": 7.556300910397701e-05, "loss": 0.7554, "step": 18601 }, { "epoch": 1.260383494816722, "grad_norm": 6.079533100128174, "learning_rate": 7.556164008487919e-05, "loss": 0.8032, "step": 18602 }, { "epoch": 1.260451250084694, "grad_norm": 6.094401836395264, "learning_rate": 7.556027106578137e-05, "loss": 0.7168, "step": 18603 }, { "epoch": 1.2605190053526663, "grad_norm": 4.861274719238281, "learning_rate": 7.555890204668356e-05, "loss": 0.6073, "step": 18604 }, { "epoch": 1.2605867606206382, "grad_norm": 7.345917224884033, "learning_rate": 7.555753302758574e-05, "loss": 0.7244, "step": 18605 }, { "epoch": 1.2606545158886102, "grad_norm": 7.340696811676025, "learning_rate": 7.555616400848792e-05, "loss": 0.5943, "step": 18606 }, { "epoch": 1.2607222711565824, "grad_norm": 5.267749786376953, "learning_rate": 7.555479498939012e-05, "loss": 0.8116, "step": 18607 }, { "epoch": 1.2607900264245546, "grad_norm": 5.4350762367248535, "learning_rate": 7.55534259702923e-05, "loss": 0.6313, "step": 18608 }, { "epoch": 1.2608577816925266, "grad_norm": 5.079307556152344, "learning_rate": 7.555205695119448e-05, "loss": 0.6928, "step": 18609 }, { "epoch": 1.2609255369604986, "grad_norm": 5.302357196807861, "learning_rate": 7.555068793209666e-05, "loss": 0.6648, "step": 18610 }, { "epoch": 1.2609932922284708, "grad_norm": 4.778517723083496, "learning_rate": 7.554931891299885e-05, "loss": 0.8079, "step": 18611 }, { "epoch": 1.2610610474964428, "grad_norm": 5.953247547149658, "learning_rate": 7.554794989390103e-05, "loss": 0.6627, "step": 18612 }, { "epoch": 1.261128802764415, "grad_norm": 5.900984764099121, "learning_rate": 7.554658087480321e-05, "loss": 0.5109, "step": 18613 }, { "epoch": 1.261196558032387, "grad_norm": 7.358961582183838, "learning_rate": 7.554521185570539e-05, "loss": 0.6241, "step": 18614 }, { "epoch": 1.2612643133003592, "grad_norm": 5.111667633056641, "learning_rate": 7.554384283660757e-05, "loss": 0.7036, "step": 18615 }, { "epoch": 1.2613320685683311, "grad_norm": 5.966854572296143, "learning_rate": 7.554247381750977e-05, "loss": 0.8375, "step": 18616 }, { "epoch": 1.2613998238363033, "grad_norm": 7.099857807159424, "learning_rate": 7.554110479841195e-05, "loss": 0.8404, "step": 18617 }, { "epoch": 1.2614675791042753, "grad_norm": 6.744253635406494, "learning_rate": 7.553973577931413e-05, "loss": 0.8001, "step": 18618 }, { "epoch": 1.2615353343722475, "grad_norm": 7.040173053741455, "learning_rate": 7.553836676021631e-05, "loss": 0.6206, "step": 18619 }, { "epoch": 1.2616030896402195, "grad_norm": 4.291145324707031, "learning_rate": 7.553699774111849e-05, "loss": 0.4751, "step": 18620 }, { "epoch": 1.2616708449081915, "grad_norm": 6.649335861206055, "learning_rate": 7.553562872202068e-05, "loss": 0.6669, "step": 18621 }, { "epoch": 1.2617386001761637, "grad_norm": 5.786914348602295, "learning_rate": 7.553425970292286e-05, "loss": 0.6102, "step": 18622 }, { "epoch": 1.2618063554441359, "grad_norm": 7.2642974853515625, "learning_rate": 7.553289068382504e-05, "loss": 0.8635, "step": 18623 }, { "epoch": 1.2618741107121079, "grad_norm": 6.113177299499512, "learning_rate": 7.553152166472722e-05, "loss": 0.9767, "step": 18624 }, { "epoch": 1.2619418659800798, "grad_norm": 5.75513219833374, "learning_rate": 7.553015264562942e-05, "loss": 0.8082, "step": 18625 }, { "epoch": 1.262009621248052, "grad_norm": 6.72035026550293, "learning_rate": 7.55287836265316e-05, "loss": 0.5233, "step": 18626 }, { "epoch": 1.2620773765160243, "grad_norm": 7.259964942932129, "learning_rate": 7.552741460743378e-05, "loss": 0.5585, "step": 18627 }, { "epoch": 1.2621451317839962, "grad_norm": 7.402397155761719, "learning_rate": 7.552604558833596e-05, "loss": 0.7705, "step": 18628 }, { "epoch": 1.2622128870519682, "grad_norm": 6.601362228393555, "learning_rate": 7.552467656923814e-05, "loss": 0.507, "step": 18629 }, { "epoch": 1.2622806423199404, "grad_norm": 4.783417701721191, "learning_rate": 7.552330755014033e-05, "loss": 0.6551, "step": 18630 }, { "epoch": 1.2623483975879124, "grad_norm": 4.542998790740967, "learning_rate": 7.552193853104251e-05, "loss": 0.5728, "step": 18631 }, { "epoch": 1.2624161528558846, "grad_norm": 5.842750072479248, "learning_rate": 7.552056951194469e-05, "loss": 0.4854, "step": 18632 }, { "epoch": 1.2624839081238566, "grad_norm": 6.539297103881836, "learning_rate": 7.551920049284687e-05, "loss": 0.9015, "step": 18633 }, { "epoch": 1.2625516633918288, "grad_norm": 5.538585186004639, "learning_rate": 7.551783147374907e-05, "loss": 0.7525, "step": 18634 }, { "epoch": 1.2626194186598008, "grad_norm": 5.992204189300537, "learning_rate": 7.551646245465125e-05, "loss": 0.7884, "step": 18635 }, { "epoch": 1.262687173927773, "grad_norm": 7.5876617431640625, "learning_rate": 7.551509343555343e-05, "loss": 0.5644, "step": 18636 }, { "epoch": 1.262754929195745, "grad_norm": 9.88604736328125, "learning_rate": 7.551372441645561e-05, "loss": 0.8097, "step": 18637 }, { "epoch": 1.2628226844637171, "grad_norm": 6.442137718200684, "learning_rate": 7.551235539735779e-05, "loss": 0.5098, "step": 18638 }, { "epoch": 1.2628904397316891, "grad_norm": 7.304479598999023, "learning_rate": 7.551098637825998e-05, "loss": 0.4754, "step": 18639 }, { "epoch": 1.262958194999661, "grad_norm": 6.708983421325684, "learning_rate": 7.550961735916216e-05, "loss": 0.6931, "step": 18640 }, { "epoch": 1.2630259502676333, "grad_norm": 10.012980461120605, "learning_rate": 7.550824834006434e-05, "loss": 0.5175, "step": 18641 }, { "epoch": 1.2630937055356055, "grad_norm": 5.989541053771973, "learning_rate": 7.550687932096652e-05, "loss": 0.7058, "step": 18642 }, { "epoch": 1.2631614608035775, "grad_norm": 6.577622890472412, "learning_rate": 7.55055103018687e-05, "loss": 0.7162, "step": 18643 }, { "epoch": 1.2632292160715495, "grad_norm": 10.932467460632324, "learning_rate": 7.55041412827709e-05, "loss": 0.9334, "step": 18644 }, { "epoch": 1.2632969713395217, "grad_norm": 6.353855609893799, "learning_rate": 7.550277226367308e-05, "loss": 0.751, "step": 18645 }, { "epoch": 1.2633647266074937, "grad_norm": 5.465866565704346, "learning_rate": 7.550140324457526e-05, "loss": 0.7445, "step": 18646 }, { "epoch": 1.2634324818754659, "grad_norm": 6.35228967666626, "learning_rate": 7.550003422547745e-05, "loss": 0.5136, "step": 18647 }, { "epoch": 1.2635002371434378, "grad_norm": 5.699860095977783, "learning_rate": 7.549866520637963e-05, "loss": 0.5295, "step": 18648 }, { "epoch": 1.26356799241141, "grad_norm": 5.473416805267334, "learning_rate": 7.549729618728181e-05, "loss": 0.6528, "step": 18649 }, { "epoch": 1.263635747679382, "grad_norm": 5.712282657623291, "learning_rate": 7.5495927168184e-05, "loss": 0.555, "step": 18650 }, { "epoch": 1.2637035029473542, "grad_norm": 5.04739236831665, "learning_rate": 7.549455814908619e-05, "loss": 0.6002, "step": 18651 }, { "epoch": 1.2637712582153262, "grad_norm": 5.735923767089844, "learning_rate": 7.549318912998837e-05, "loss": 0.8652, "step": 18652 }, { "epoch": 1.2638390134832984, "grad_norm": 6.413249492645264, "learning_rate": 7.549182011089056e-05, "loss": 0.6395, "step": 18653 }, { "epoch": 1.2639067687512704, "grad_norm": 5.0051350593566895, "learning_rate": 7.549045109179274e-05, "loss": 0.6231, "step": 18654 }, { "epoch": 1.2639745240192424, "grad_norm": 7.132907390594482, "learning_rate": 7.548908207269492e-05, "loss": 0.8982, "step": 18655 }, { "epoch": 1.2640422792872146, "grad_norm": 5.584010124206543, "learning_rate": 7.54877130535971e-05, "loss": 0.716, "step": 18656 }, { "epoch": 1.2641100345551868, "grad_norm": 6.893798828125, "learning_rate": 7.54863440344993e-05, "loss": 0.7787, "step": 18657 }, { "epoch": 1.2641777898231588, "grad_norm": 6.842940807342529, "learning_rate": 7.548497501540148e-05, "loss": 0.8391, "step": 18658 }, { "epoch": 1.2642455450911307, "grad_norm": 7.086545467376709, "learning_rate": 7.548360599630366e-05, "loss": 0.8986, "step": 18659 }, { "epoch": 1.264313300359103, "grad_norm": 6.724247932434082, "learning_rate": 7.548223697720584e-05, "loss": 0.7217, "step": 18660 }, { "epoch": 1.264381055627075, "grad_norm": 6.771048545837402, "learning_rate": 7.548086795810802e-05, "loss": 0.5481, "step": 18661 }, { "epoch": 1.2644488108950471, "grad_norm": 4.857229232788086, "learning_rate": 7.547949893901021e-05, "loss": 0.5625, "step": 18662 }, { "epoch": 1.264516566163019, "grad_norm": 6.672415256500244, "learning_rate": 7.547812991991239e-05, "loss": 0.705, "step": 18663 }, { "epoch": 1.2645843214309913, "grad_norm": 5.948575496673584, "learning_rate": 7.547676090081457e-05, "loss": 0.6448, "step": 18664 }, { "epoch": 1.2646520766989633, "grad_norm": 5.037988662719727, "learning_rate": 7.547539188171675e-05, "loss": 0.705, "step": 18665 }, { "epoch": 1.2647198319669355, "grad_norm": 4.970808506011963, "learning_rate": 7.547402286261893e-05, "loss": 0.6353, "step": 18666 }, { "epoch": 1.2647875872349075, "grad_norm": 6.588896751403809, "learning_rate": 7.547265384352113e-05, "loss": 0.8291, "step": 18667 }, { "epoch": 1.2648553425028797, "grad_norm": 4.794162750244141, "learning_rate": 7.54712848244233e-05, "loss": 0.6412, "step": 18668 }, { "epoch": 1.2649230977708517, "grad_norm": 6.164547443389893, "learning_rate": 7.546991580532549e-05, "loss": 0.8252, "step": 18669 }, { "epoch": 1.2649908530388236, "grad_norm": 8.093086242675781, "learning_rate": 7.546854678622767e-05, "loss": 0.6457, "step": 18670 }, { "epoch": 1.2650586083067958, "grad_norm": 5.33576774597168, "learning_rate": 7.546717776712986e-05, "loss": 0.7117, "step": 18671 }, { "epoch": 1.265126363574768, "grad_norm": 6.756962299346924, "learning_rate": 7.546580874803204e-05, "loss": 0.5851, "step": 18672 }, { "epoch": 1.26519411884274, "grad_norm": 6.43195104598999, "learning_rate": 7.546443972893422e-05, "loss": 0.7082, "step": 18673 }, { "epoch": 1.265261874110712, "grad_norm": 5.0282769203186035, "learning_rate": 7.54630707098364e-05, "loss": 0.5662, "step": 18674 }, { "epoch": 1.2653296293786842, "grad_norm": 5.411833763122559, "learning_rate": 7.546170169073858e-05, "loss": 0.7744, "step": 18675 }, { "epoch": 1.2653973846466564, "grad_norm": 5.758815288543701, "learning_rate": 7.546033267164078e-05, "loss": 0.6941, "step": 18676 }, { "epoch": 1.2654651399146284, "grad_norm": 5.123340606689453, "learning_rate": 7.545896365254296e-05, "loss": 0.5436, "step": 18677 }, { "epoch": 1.2655328951826004, "grad_norm": 5.003796577453613, "learning_rate": 7.545759463344514e-05, "loss": 0.7271, "step": 18678 }, { "epoch": 1.2656006504505726, "grad_norm": 5.447422981262207, "learning_rate": 7.545622561434732e-05, "loss": 0.7889, "step": 18679 }, { "epoch": 1.2656684057185446, "grad_norm": 6.364343643188477, "learning_rate": 7.545485659524951e-05, "loss": 0.7037, "step": 18680 }, { "epoch": 1.2657361609865168, "grad_norm": 4.874630451202393, "learning_rate": 7.545348757615169e-05, "loss": 0.5631, "step": 18681 }, { "epoch": 1.2658039162544887, "grad_norm": 4.877963542938232, "learning_rate": 7.545211855705387e-05, "loss": 0.7046, "step": 18682 }, { "epoch": 1.265871671522461, "grad_norm": 7.1135406494140625, "learning_rate": 7.545074953795605e-05, "loss": 0.8417, "step": 18683 }, { "epoch": 1.265939426790433, "grad_norm": 4.576328277587891, "learning_rate": 7.544938051885823e-05, "loss": 0.4811, "step": 18684 }, { "epoch": 1.2660071820584051, "grad_norm": 4.497889995574951, "learning_rate": 7.544801149976043e-05, "loss": 0.5026, "step": 18685 }, { "epoch": 1.266074937326377, "grad_norm": 7.000176429748535, "learning_rate": 7.544664248066261e-05, "loss": 0.4834, "step": 18686 }, { "epoch": 1.2661426925943493, "grad_norm": 7.418007850646973, "learning_rate": 7.544527346156479e-05, "loss": 0.8872, "step": 18687 }, { "epoch": 1.2662104478623213, "grad_norm": 5.117703437805176, "learning_rate": 7.544390444246697e-05, "loss": 0.5762, "step": 18688 }, { "epoch": 1.2662782031302933, "grad_norm": 5.337724208831787, "learning_rate": 7.544253542336916e-05, "loss": 0.815, "step": 18689 }, { "epoch": 1.2663459583982655, "grad_norm": 8.588623046875, "learning_rate": 7.544116640427134e-05, "loss": 1.015, "step": 18690 }, { "epoch": 1.2664137136662377, "grad_norm": 6.889605522155762, "learning_rate": 7.543979738517352e-05, "loss": 0.6149, "step": 18691 }, { "epoch": 1.2664814689342097, "grad_norm": 6.725203990936279, "learning_rate": 7.54384283660757e-05, "loss": 0.724, "step": 18692 }, { "epoch": 1.2665492242021816, "grad_norm": 6.064904689788818, "learning_rate": 7.543705934697788e-05, "loss": 0.7099, "step": 18693 }, { "epoch": 1.2666169794701538, "grad_norm": 6.5061516761779785, "learning_rate": 7.543569032788008e-05, "loss": 0.6394, "step": 18694 }, { "epoch": 1.2666847347381258, "grad_norm": 5.632493019104004, "learning_rate": 7.543432130878226e-05, "loss": 0.6813, "step": 18695 }, { "epoch": 1.266752490006098, "grad_norm": 5.213821887969971, "learning_rate": 7.543295228968444e-05, "loss": 0.722, "step": 18696 }, { "epoch": 1.26682024527407, "grad_norm": 5.874366760253906, "learning_rate": 7.543158327058663e-05, "loss": 0.6142, "step": 18697 }, { "epoch": 1.2668880005420422, "grad_norm": 5.205356597900391, "learning_rate": 7.543021425148881e-05, "loss": 0.6186, "step": 18698 }, { "epoch": 1.2669557558100142, "grad_norm": 5.747596740722656, "learning_rate": 7.5428845232391e-05, "loss": 0.9889, "step": 18699 }, { "epoch": 1.2670235110779864, "grad_norm": 7.3996686935424805, "learning_rate": 7.542747621329319e-05, "loss": 0.6892, "step": 18700 }, { "epoch": 1.2670912663459584, "grad_norm": 7.774918079376221, "learning_rate": 7.542610719419537e-05, "loss": 0.7751, "step": 18701 }, { "epoch": 1.2671590216139306, "grad_norm": 4.111617088317871, "learning_rate": 7.542473817509755e-05, "loss": 0.604, "step": 18702 }, { "epoch": 1.2672267768819026, "grad_norm": 7.3278422355651855, "learning_rate": 7.542336915599974e-05, "loss": 0.6958, "step": 18703 }, { "epoch": 1.2672945321498745, "grad_norm": 6.07009744644165, "learning_rate": 7.542200013690192e-05, "loss": 0.6963, "step": 18704 }, { "epoch": 1.2673622874178467, "grad_norm": 5.076060771942139, "learning_rate": 7.54206311178041e-05, "loss": 0.7156, "step": 18705 }, { "epoch": 1.267430042685819, "grad_norm": 6.285949230194092, "learning_rate": 7.541926209870628e-05, "loss": 0.6888, "step": 18706 }, { "epoch": 1.267497797953791, "grad_norm": 6.628175258636475, "learning_rate": 7.541789307960846e-05, "loss": 0.7926, "step": 18707 }, { "epoch": 1.267565553221763, "grad_norm": 5.416219234466553, "learning_rate": 7.541652406051066e-05, "loss": 0.5316, "step": 18708 }, { "epoch": 1.267633308489735, "grad_norm": 5.582966327667236, "learning_rate": 7.541515504141284e-05, "loss": 0.7351, "step": 18709 }, { "epoch": 1.267701063757707, "grad_norm": 6.219982147216797, "learning_rate": 7.541378602231502e-05, "loss": 0.6298, "step": 18710 }, { "epoch": 1.2677688190256793, "grad_norm": 5.465405464172363, "learning_rate": 7.54124170032172e-05, "loss": 0.8407, "step": 18711 }, { "epoch": 1.2678365742936513, "grad_norm": 7.575626850128174, "learning_rate": 7.541104798411939e-05, "loss": 0.7848, "step": 18712 }, { "epoch": 1.2679043295616235, "grad_norm": 6.816934585571289, "learning_rate": 7.540967896502157e-05, "loss": 1.0209, "step": 18713 }, { "epoch": 1.2679720848295954, "grad_norm": 4.162535667419434, "learning_rate": 7.540830994592375e-05, "loss": 0.5911, "step": 18714 }, { "epoch": 1.2680398400975676, "grad_norm": 6.326861381530762, "learning_rate": 7.540694092682593e-05, "loss": 0.865, "step": 18715 }, { "epoch": 1.2681075953655396, "grad_norm": 7.262811183929443, "learning_rate": 7.540557190772811e-05, "loss": 0.8497, "step": 18716 }, { "epoch": 1.2681753506335118, "grad_norm": 6.211512565612793, "learning_rate": 7.54042028886303e-05, "loss": 0.7681, "step": 18717 }, { "epoch": 1.2682431059014838, "grad_norm": 5.747305870056152, "learning_rate": 7.540283386953249e-05, "loss": 0.7447, "step": 18718 }, { "epoch": 1.2683108611694558, "grad_norm": 6.38530969619751, "learning_rate": 7.540146485043467e-05, "loss": 0.8112, "step": 18719 }, { "epoch": 1.268378616437428, "grad_norm": 7.680963039398193, "learning_rate": 7.540009583133685e-05, "loss": 0.9147, "step": 18720 }, { "epoch": 1.2684463717054002, "grad_norm": 6.187520980834961, "learning_rate": 7.539872681223903e-05, "loss": 0.544, "step": 18721 }, { "epoch": 1.2685141269733722, "grad_norm": 7.133886337280273, "learning_rate": 7.539735779314122e-05, "loss": 0.6501, "step": 18722 }, { "epoch": 1.2685818822413442, "grad_norm": 4.940894603729248, "learning_rate": 7.53959887740434e-05, "loss": 0.5228, "step": 18723 }, { "epoch": 1.2686496375093164, "grad_norm": 5.824269771575928, "learning_rate": 7.539461975494558e-05, "loss": 0.5875, "step": 18724 }, { "epoch": 1.2687173927772886, "grad_norm": 5.165929794311523, "learning_rate": 7.539325073584776e-05, "loss": 0.5474, "step": 18725 }, { "epoch": 1.2687851480452605, "grad_norm": 6.881862640380859, "learning_rate": 7.539188171674996e-05, "loss": 0.8285, "step": 18726 }, { "epoch": 1.2688529033132325, "grad_norm": 4.256899356842041, "learning_rate": 7.539051269765214e-05, "loss": 0.6895, "step": 18727 }, { "epoch": 1.2689206585812047, "grad_norm": 6.94963264465332, "learning_rate": 7.538914367855432e-05, "loss": 0.7305, "step": 18728 }, { "epoch": 1.2689884138491767, "grad_norm": 5.503376007080078, "learning_rate": 7.53877746594565e-05, "loss": 0.8593, "step": 18729 }, { "epoch": 1.269056169117149, "grad_norm": 6.723635196685791, "learning_rate": 7.538640564035868e-05, "loss": 0.725, "step": 18730 }, { "epoch": 1.269123924385121, "grad_norm": 4.929683208465576, "learning_rate": 7.538503662126087e-05, "loss": 0.7759, "step": 18731 }, { "epoch": 1.269191679653093, "grad_norm": 5.286065578460693, "learning_rate": 7.538366760216305e-05, "loss": 0.5143, "step": 18732 }, { "epoch": 1.269259434921065, "grad_norm": 6.429125785827637, "learning_rate": 7.538229858306523e-05, "loss": 0.7311, "step": 18733 }, { "epoch": 1.2693271901890373, "grad_norm": 5.855788707733154, "learning_rate": 7.538092956396741e-05, "loss": 0.6172, "step": 18734 }, { "epoch": 1.2693949454570093, "grad_norm": 9.119575500488281, "learning_rate": 7.53795605448696e-05, "loss": 0.6225, "step": 18735 }, { "epoch": 1.2694627007249815, "grad_norm": 5.314157009124756, "learning_rate": 7.537819152577179e-05, "loss": 0.5715, "step": 18736 }, { "epoch": 1.2695304559929534, "grad_norm": 5.304906845092773, "learning_rate": 7.537682250667397e-05, "loss": 0.6502, "step": 18737 }, { "epoch": 1.2695982112609254, "grad_norm": 9.36937427520752, "learning_rate": 7.537545348757615e-05, "loss": 0.6523, "step": 18738 }, { "epoch": 1.2696659665288976, "grad_norm": 4.427879333496094, "learning_rate": 7.537408446847833e-05, "loss": 0.5489, "step": 18739 }, { "epoch": 1.2697337217968698, "grad_norm": 5.053177356719971, "learning_rate": 7.537271544938052e-05, "loss": 0.4986, "step": 18740 }, { "epoch": 1.2698014770648418, "grad_norm": 7.225897789001465, "learning_rate": 7.53713464302827e-05, "loss": 0.8408, "step": 18741 }, { "epoch": 1.2698692323328138, "grad_norm": 12.45961856842041, "learning_rate": 7.536997741118488e-05, "loss": 0.5649, "step": 18742 }, { "epoch": 1.269936987600786, "grad_norm": 7.825034141540527, "learning_rate": 7.536860839208708e-05, "loss": 0.6696, "step": 18743 }, { "epoch": 1.270004742868758, "grad_norm": 6.810579299926758, "learning_rate": 7.536723937298926e-05, "loss": 0.6444, "step": 18744 }, { "epoch": 1.2700724981367302, "grad_norm": 5.1344451904296875, "learning_rate": 7.536587035389144e-05, "loss": 0.7353, "step": 18745 }, { "epoch": 1.2701402534047022, "grad_norm": 6.530963897705078, "learning_rate": 7.536450133479363e-05, "loss": 0.5504, "step": 18746 }, { "epoch": 1.2702080086726744, "grad_norm": 6.726248741149902, "learning_rate": 7.536313231569581e-05, "loss": 0.713, "step": 18747 }, { "epoch": 1.2702757639406463, "grad_norm": 6.521676540374756, "learning_rate": 7.536176329659799e-05, "loss": 0.6072, "step": 18748 }, { "epoch": 1.2703435192086185, "grad_norm": 5.845739364624023, "learning_rate": 7.536039427750019e-05, "loss": 0.6976, "step": 18749 }, { "epoch": 1.2704112744765905, "grad_norm": 6.972343921661377, "learning_rate": 7.535902525840237e-05, "loss": 0.754, "step": 18750 }, { "epoch": 1.2704790297445627, "grad_norm": 5.711891174316406, "learning_rate": 7.535765623930455e-05, "loss": 0.74, "step": 18751 }, { "epoch": 1.2705467850125347, "grad_norm": 5.517587661743164, "learning_rate": 7.535628722020673e-05, "loss": 0.7303, "step": 18752 }, { "epoch": 1.2706145402805067, "grad_norm": 8.589468955993652, "learning_rate": 7.53549182011089e-05, "loss": 0.5891, "step": 18753 }, { "epoch": 1.270682295548479, "grad_norm": 7.845397472381592, "learning_rate": 7.53535491820111e-05, "loss": 0.688, "step": 18754 }, { "epoch": 1.270750050816451, "grad_norm": 6.336021900177002, "learning_rate": 7.535218016291328e-05, "loss": 1.0431, "step": 18755 }, { "epoch": 1.270817806084423, "grad_norm": 4.923684120178223, "learning_rate": 7.535081114381546e-05, "loss": 0.599, "step": 18756 }, { "epoch": 1.270885561352395, "grad_norm": 4.674132347106934, "learning_rate": 7.534944212471764e-05, "loss": 0.6613, "step": 18757 }, { "epoch": 1.2709533166203673, "grad_norm": 6.190471649169922, "learning_rate": 7.534807310561984e-05, "loss": 0.6995, "step": 18758 }, { "epoch": 1.2710210718883392, "grad_norm": 4.602145671844482, "learning_rate": 7.534670408652202e-05, "loss": 0.6335, "step": 18759 }, { "epoch": 1.2710888271563114, "grad_norm": 7.381703853607178, "learning_rate": 7.53453350674242e-05, "loss": 0.8018, "step": 18760 }, { "epoch": 1.2711565824242834, "grad_norm": 9.000822067260742, "learning_rate": 7.534396604832638e-05, "loss": 0.8581, "step": 18761 }, { "epoch": 1.2712243376922556, "grad_norm": 8.146454811096191, "learning_rate": 7.534259702922856e-05, "loss": 0.7054, "step": 18762 }, { "epoch": 1.2712920929602276, "grad_norm": 8.46900463104248, "learning_rate": 7.534122801013075e-05, "loss": 1.2489, "step": 18763 }, { "epoch": 1.2713598482281998, "grad_norm": 6.2840495109558105, "learning_rate": 7.533985899103293e-05, "loss": 0.4337, "step": 18764 }, { "epoch": 1.2714276034961718, "grad_norm": 5.154699325561523, "learning_rate": 7.533848997193511e-05, "loss": 0.9335, "step": 18765 }, { "epoch": 1.271495358764144, "grad_norm": 7.376125335693359, "learning_rate": 7.533712095283729e-05, "loss": 0.9432, "step": 18766 }, { "epoch": 1.271563114032116, "grad_norm": 6.189776420593262, "learning_rate": 7.533575193373949e-05, "loss": 0.7373, "step": 18767 }, { "epoch": 1.271630869300088, "grad_norm": 6.3389201164245605, "learning_rate": 7.533438291464167e-05, "loss": 0.7647, "step": 18768 }, { "epoch": 1.2716986245680602, "grad_norm": 4.66450834274292, "learning_rate": 7.533301389554385e-05, "loss": 0.662, "step": 18769 }, { "epoch": 1.2717663798360324, "grad_norm": 8.064640045166016, "learning_rate": 7.533164487644603e-05, "loss": 0.6845, "step": 18770 }, { "epoch": 1.2718341351040043, "grad_norm": 7.3907599449157715, "learning_rate": 7.533027585734821e-05, "loss": 0.9273, "step": 18771 }, { "epoch": 1.2719018903719763, "grad_norm": 5.336575984954834, "learning_rate": 7.53289068382504e-05, "loss": 0.6789, "step": 18772 }, { "epoch": 1.2719696456399485, "grad_norm": 6.4545464515686035, "learning_rate": 7.532753781915258e-05, "loss": 0.9242, "step": 18773 }, { "epoch": 1.2720374009079207, "grad_norm": 5.498749256134033, "learning_rate": 7.532616880005476e-05, "loss": 0.6835, "step": 18774 }, { "epoch": 1.2721051561758927, "grad_norm": 6.079244136810303, "learning_rate": 7.532479978095694e-05, "loss": 0.6402, "step": 18775 }, { "epoch": 1.2721729114438647, "grad_norm": 5.355831146240234, "learning_rate": 7.532343076185912e-05, "loss": 0.5447, "step": 18776 }, { "epoch": 1.2722406667118369, "grad_norm": 5.9255595207214355, "learning_rate": 7.532206174276132e-05, "loss": 0.8845, "step": 18777 }, { "epoch": 1.2723084219798089, "grad_norm": 4.958127975463867, "learning_rate": 7.53206927236635e-05, "loss": 0.7662, "step": 18778 }, { "epoch": 1.272376177247781, "grad_norm": 6.8580756187438965, "learning_rate": 7.531932370456568e-05, "loss": 0.77, "step": 18779 }, { "epoch": 1.272443932515753, "grad_norm": 5.93341064453125, "learning_rate": 7.531795468546786e-05, "loss": 0.7201, "step": 18780 }, { "epoch": 1.2725116877837253, "grad_norm": 8.533459663391113, "learning_rate": 7.531658566637005e-05, "loss": 0.6278, "step": 18781 }, { "epoch": 1.2725794430516972, "grad_norm": 7.035817623138428, "learning_rate": 7.531521664727223e-05, "loss": 0.6383, "step": 18782 }, { "epoch": 1.2726471983196694, "grad_norm": 5.18642520904541, "learning_rate": 7.531384762817441e-05, "loss": 0.7545, "step": 18783 }, { "epoch": 1.2727149535876414, "grad_norm": 5.188976287841797, "learning_rate": 7.531247860907659e-05, "loss": 0.5967, "step": 18784 }, { "epoch": 1.2727827088556136, "grad_norm": 6.648321151733398, "learning_rate": 7.531110958997877e-05, "loss": 0.6463, "step": 18785 }, { "epoch": 1.2728504641235856, "grad_norm": 4.127718448638916, "learning_rate": 7.530974057088097e-05, "loss": 0.6448, "step": 18786 }, { "epoch": 1.2729182193915576, "grad_norm": 5.507750511169434, "learning_rate": 7.530837155178315e-05, "loss": 0.6502, "step": 18787 }, { "epoch": 1.2729859746595298, "grad_norm": 4.259255409240723, "learning_rate": 7.530700253268533e-05, "loss": 0.6832, "step": 18788 }, { "epoch": 1.273053729927502, "grad_norm": 5.432353496551514, "learning_rate": 7.530563351358752e-05, "loss": 0.6964, "step": 18789 }, { "epoch": 1.273121485195474, "grad_norm": 7.876563549041748, "learning_rate": 7.53042644944897e-05, "loss": 0.6839, "step": 18790 }, { "epoch": 1.273189240463446, "grad_norm": 5.93082332611084, "learning_rate": 7.530289547539188e-05, "loss": 0.7036, "step": 18791 }, { "epoch": 1.2732569957314182, "grad_norm": 6.902464389801025, "learning_rate": 7.530152645629408e-05, "loss": 0.5999, "step": 18792 }, { "epoch": 1.2733247509993901, "grad_norm": 6.2476091384887695, "learning_rate": 7.530015743719626e-05, "loss": 0.7056, "step": 18793 }, { "epoch": 1.2733925062673623, "grad_norm": 6.089849948883057, "learning_rate": 7.529878841809844e-05, "loss": 0.5194, "step": 18794 }, { "epoch": 1.2734602615353343, "grad_norm": 6.8402323722839355, "learning_rate": 7.529741939900063e-05, "loss": 0.9667, "step": 18795 }, { "epoch": 1.2735280168033065, "grad_norm": 4.990555286407471, "learning_rate": 7.529605037990281e-05, "loss": 0.6621, "step": 18796 }, { "epoch": 1.2735957720712785, "grad_norm": 5.385702610015869, "learning_rate": 7.529468136080499e-05, "loss": 0.7561, "step": 18797 }, { "epoch": 1.2736635273392507, "grad_norm": 6.55327033996582, "learning_rate": 7.529331234170717e-05, "loss": 0.6992, "step": 18798 }, { "epoch": 1.2737312826072227, "grad_norm": 4.9786529541015625, "learning_rate": 7.529194332260935e-05, "loss": 0.423, "step": 18799 }, { "epoch": 1.2737990378751949, "grad_norm": 6.033566474914551, "learning_rate": 7.529057430351155e-05, "loss": 0.8076, "step": 18800 }, { "epoch": 1.2738667931431669, "grad_norm": 4.819602012634277, "learning_rate": 7.528920528441373e-05, "loss": 0.5846, "step": 18801 }, { "epoch": 1.2739345484111388, "grad_norm": 4.58457088470459, "learning_rate": 7.52878362653159e-05, "loss": 0.4613, "step": 18802 }, { "epoch": 1.274002303679111, "grad_norm": 6.81635046005249, "learning_rate": 7.528646724621809e-05, "loss": 0.6047, "step": 18803 }, { "epoch": 1.2740700589470833, "grad_norm": 6.048337459564209, "learning_rate": 7.528509822712028e-05, "loss": 0.7038, "step": 18804 }, { "epoch": 1.2741378142150552, "grad_norm": 5.8839497566223145, "learning_rate": 7.528372920802246e-05, "loss": 0.5125, "step": 18805 }, { "epoch": 1.2742055694830272, "grad_norm": 5.312802314758301, "learning_rate": 7.528236018892464e-05, "loss": 0.7354, "step": 18806 }, { "epoch": 1.2742733247509994, "grad_norm": 5.553534030914307, "learning_rate": 7.528099116982682e-05, "loss": 0.7618, "step": 18807 }, { "epoch": 1.2743410800189714, "grad_norm": 9.886577606201172, "learning_rate": 7.5279622150729e-05, "loss": 0.6893, "step": 18808 }, { "epoch": 1.2744088352869436, "grad_norm": 4.535064220428467, "learning_rate": 7.52782531316312e-05, "loss": 0.6792, "step": 18809 }, { "epoch": 1.2744765905549156, "grad_norm": 7.9594011306762695, "learning_rate": 7.527688411253338e-05, "loss": 0.5245, "step": 18810 }, { "epoch": 1.2745443458228878, "grad_norm": 7.513524532318115, "learning_rate": 7.527551509343556e-05, "loss": 0.8481, "step": 18811 }, { "epoch": 1.2746121010908598, "grad_norm": 6.94472599029541, "learning_rate": 7.527414607433774e-05, "loss": 0.8436, "step": 18812 }, { "epoch": 1.274679856358832, "grad_norm": 6.036854267120361, "learning_rate": 7.527277705523993e-05, "loss": 0.7955, "step": 18813 }, { "epoch": 1.274747611626804, "grad_norm": 5.6724653244018555, "learning_rate": 7.527140803614211e-05, "loss": 0.6856, "step": 18814 }, { "epoch": 1.2748153668947761, "grad_norm": 7.902639865875244, "learning_rate": 7.527003901704429e-05, "loss": 0.7921, "step": 18815 }, { "epoch": 1.2748831221627481, "grad_norm": 9.003299713134766, "learning_rate": 7.526866999794647e-05, "loss": 0.7184, "step": 18816 }, { "epoch": 1.27495087743072, "grad_norm": 4.744279861450195, "learning_rate": 7.526730097884865e-05, "loss": 0.609, "step": 18817 }, { "epoch": 1.2750186326986923, "grad_norm": 4.949873924255371, "learning_rate": 7.526593195975085e-05, "loss": 0.5167, "step": 18818 }, { "epoch": 1.2750863879666645, "grad_norm": 7.005631923675537, "learning_rate": 7.526456294065303e-05, "loss": 0.7546, "step": 18819 }, { "epoch": 1.2751541432346365, "grad_norm": 5.18028450012207, "learning_rate": 7.52631939215552e-05, "loss": 0.8859, "step": 18820 }, { "epoch": 1.2752218985026085, "grad_norm": 8.949212074279785, "learning_rate": 7.526182490245739e-05, "loss": 0.5453, "step": 18821 }, { "epoch": 1.2752896537705807, "grad_norm": 7.494017601013184, "learning_rate": 7.526045588335958e-05, "loss": 0.7327, "step": 18822 }, { "epoch": 1.2753574090385529, "grad_norm": 7.241610527038574, "learning_rate": 7.525908686426176e-05, "loss": 0.8487, "step": 18823 }, { "epoch": 1.2754251643065249, "grad_norm": 6.786823749542236, "learning_rate": 7.525771784516394e-05, "loss": 0.6919, "step": 18824 }, { "epoch": 1.2754929195744968, "grad_norm": 6.090564727783203, "learning_rate": 7.525634882606612e-05, "loss": 0.6609, "step": 18825 }, { "epoch": 1.275560674842469, "grad_norm": 5.381803035736084, "learning_rate": 7.52549798069683e-05, "loss": 0.8732, "step": 18826 }, { "epoch": 1.275628430110441, "grad_norm": 5.4159464836120605, "learning_rate": 7.52536107878705e-05, "loss": 0.8216, "step": 18827 }, { "epoch": 1.2756961853784132, "grad_norm": 6.12056303024292, "learning_rate": 7.525224176877268e-05, "loss": 0.7598, "step": 18828 }, { "epoch": 1.2757639406463852, "grad_norm": 6.235606670379639, "learning_rate": 7.525087274967486e-05, "loss": 0.7207, "step": 18829 }, { "epoch": 1.2758316959143574, "grad_norm": 6.274681091308594, "learning_rate": 7.524950373057704e-05, "loss": 0.6821, "step": 18830 }, { "epoch": 1.2758994511823294, "grad_norm": 7.6665730476379395, "learning_rate": 7.524813471147922e-05, "loss": 0.6943, "step": 18831 }, { "epoch": 1.2759672064503014, "grad_norm": 5.985592842102051, "learning_rate": 7.524676569238141e-05, "loss": 0.6707, "step": 18832 }, { "epoch": 1.2760349617182736, "grad_norm": 5.93402624130249, "learning_rate": 7.524539667328359e-05, "loss": 0.5169, "step": 18833 }, { "epoch": 1.2761027169862458, "grad_norm": 5.8805365562438965, "learning_rate": 7.524402765418577e-05, "loss": 0.7534, "step": 18834 }, { "epoch": 1.2761704722542178, "grad_norm": 5.239326477050781, "learning_rate": 7.524265863508797e-05, "loss": 0.6285, "step": 18835 }, { "epoch": 1.2762382275221897, "grad_norm": 7.4469218254089355, "learning_rate": 7.524128961599015e-05, "loss": 0.9258, "step": 18836 }, { "epoch": 1.276305982790162, "grad_norm": 6.6660308837890625, "learning_rate": 7.523992059689233e-05, "loss": 0.5781, "step": 18837 }, { "epoch": 1.2763737380581341, "grad_norm": 6.7496843338012695, "learning_rate": 7.523855157779452e-05, "loss": 0.7574, "step": 18838 }, { "epoch": 1.2764414933261061, "grad_norm": 5.500575542449951, "learning_rate": 7.52371825586967e-05, "loss": 0.6644, "step": 18839 }, { "epoch": 1.276509248594078, "grad_norm": 9.367401123046875, "learning_rate": 7.523581353959888e-05, "loss": 0.6461, "step": 18840 }, { "epoch": 1.2765770038620503, "grad_norm": 5.116668224334717, "learning_rate": 7.523444452050107e-05, "loss": 0.6969, "step": 18841 }, { "epoch": 1.2766447591300223, "grad_norm": 5.678927421569824, "learning_rate": 7.523307550140326e-05, "loss": 0.7335, "step": 18842 }, { "epoch": 1.2767125143979945, "grad_norm": 5.794867038726807, "learning_rate": 7.523170648230544e-05, "loss": 0.6325, "step": 18843 }, { "epoch": 1.2767802696659665, "grad_norm": 7.916839122772217, "learning_rate": 7.523033746320762e-05, "loss": 0.7342, "step": 18844 }, { "epoch": 1.2768480249339387, "grad_norm": 5.764396667480469, "learning_rate": 7.522896844410981e-05, "loss": 0.6167, "step": 18845 }, { "epoch": 1.2769157802019107, "grad_norm": 6.946478366851807, "learning_rate": 7.522759942501199e-05, "loss": 0.6968, "step": 18846 }, { "epoch": 1.2769835354698829, "grad_norm": 4.506878852844238, "learning_rate": 7.522623040591417e-05, "loss": 0.4669, "step": 18847 }, { "epoch": 1.2770512907378548, "grad_norm": 7.872246265411377, "learning_rate": 7.522486138681635e-05, "loss": 0.6437, "step": 18848 }, { "epoch": 1.277119046005827, "grad_norm": 5.910600185394287, "learning_rate": 7.522349236771853e-05, "loss": 0.7571, "step": 18849 }, { "epoch": 1.277186801273799, "grad_norm": 6.5720696449279785, "learning_rate": 7.522212334862072e-05, "loss": 0.5434, "step": 18850 }, { "epoch": 1.277254556541771, "grad_norm": 6.9568071365356445, "learning_rate": 7.52207543295229e-05, "loss": 0.4984, "step": 18851 }, { "epoch": 1.2773223118097432, "grad_norm": 7.4235029220581055, "learning_rate": 7.521938531042509e-05, "loss": 0.6011, "step": 18852 }, { "epoch": 1.2773900670777154, "grad_norm": 4.359251976013184, "learning_rate": 7.521801629132727e-05, "loss": 0.7338, "step": 18853 }, { "epoch": 1.2774578223456874, "grad_norm": 4.444194316864014, "learning_rate": 7.521664727222945e-05, "loss": 0.7421, "step": 18854 }, { "epoch": 1.2775255776136594, "grad_norm": 8.513532638549805, "learning_rate": 7.521527825313164e-05, "loss": 0.6509, "step": 18855 }, { "epoch": 1.2775933328816316, "grad_norm": 4.326864242553711, "learning_rate": 7.521390923403382e-05, "loss": 0.6874, "step": 18856 }, { "epoch": 1.2776610881496036, "grad_norm": 5.668485641479492, "learning_rate": 7.5212540214936e-05, "loss": 0.7196, "step": 18857 }, { "epoch": 1.2777288434175758, "grad_norm": 4.572542190551758, "learning_rate": 7.521117119583818e-05, "loss": 0.44, "step": 18858 }, { "epoch": 1.2777965986855477, "grad_norm": 5.525473594665527, "learning_rate": 7.520980217674038e-05, "loss": 0.7072, "step": 18859 }, { "epoch": 1.27786435395352, "grad_norm": 5.77551794052124, "learning_rate": 7.520843315764256e-05, "loss": 0.7568, "step": 18860 }, { "epoch": 1.277932109221492, "grad_norm": 6.813683032989502, "learning_rate": 7.520706413854474e-05, "loss": 0.5452, "step": 18861 }, { "epoch": 1.2779998644894641, "grad_norm": 5.953164577484131, "learning_rate": 7.520569511944692e-05, "loss": 0.8031, "step": 18862 }, { "epoch": 1.278067619757436, "grad_norm": 6.146420955657959, "learning_rate": 7.52043261003491e-05, "loss": 0.7288, "step": 18863 }, { "epoch": 1.2781353750254083, "grad_norm": 5.6717047691345215, "learning_rate": 7.520295708125129e-05, "loss": 0.6703, "step": 18864 }, { "epoch": 1.2782031302933803, "grad_norm": 6.754662036895752, "learning_rate": 7.520158806215347e-05, "loss": 0.8151, "step": 18865 }, { "epoch": 1.2782708855613523, "grad_norm": 5.751812934875488, "learning_rate": 7.520021904305565e-05, "loss": 0.4711, "step": 18866 }, { "epoch": 1.2783386408293245, "grad_norm": 4.40509033203125, "learning_rate": 7.519885002395783e-05, "loss": 0.6373, "step": 18867 }, { "epoch": 1.2784063960972967, "grad_norm": 5.7296624183654785, "learning_rate": 7.519748100486003e-05, "loss": 0.6872, "step": 18868 }, { "epoch": 1.2784741513652687, "grad_norm": 6.116519451141357, "learning_rate": 7.51961119857622e-05, "loss": 1.0041, "step": 18869 }, { "epoch": 1.2785419066332406, "grad_norm": 7.518869400024414, "learning_rate": 7.519474296666439e-05, "loss": 0.748, "step": 18870 }, { "epoch": 1.2786096619012128, "grad_norm": 4.801846981048584, "learning_rate": 7.519337394756657e-05, "loss": 0.5434, "step": 18871 }, { "epoch": 1.278677417169185, "grad_norm": 6.030666351318359, "learning_rate": 7.519200492846875e-05, "loss": 0.6367, "step": 18872 }, { "epoch": 1.278745172437157, "grad_norm": 4.764389991760254, "learning_rate": 7.519063590937094e-05, "loss": 0.8017, "step": 18873 }, { "epoch": 1.278812927705129, "grad_norm": 5.63907527923584, "learning_rate": 7.518926689027312e-05, "loss": 0.7555, "step": 18874 }, { "epoch": 1.2788806829731012, "grad_norm": 10.029809951782227, "learning_rate": 7.51878978711753e-05, "loss": 0.5597, "step": 18875 }, { "epoch": 1.2789484382410732, "grad_norm": 5.845201015472412, "learning_rate": 7.518652885207748e-05, "loss": 0.9033, "step": 18876 }, { "epoch": 1.2790161935090454, "grad_norm": 9.39575481414795, "learning_rate": 7.518515983297968e-05, "loss": 0.8103, "step": 18877 }, { "epoch": 1.2790839487770174, "grad_norm": 5.571200847625732, "learning_rate": 7.518379081388186e-05, "loss": 0.7806, "step": 18878 }, { "epoch": 1.2791517040449896, "grad_norm": 7.216861248016357, "learning_rate": 7.518242179478404e-05, "loss": 0.643, "step": 18879 }, { "epoch": 1.2792194593129615, "grad_norm": 4.62844181060791, "learning_rate": 7.518105277568622e-05, "loss": 0.5559, "step": 18880 }, { "epoch": 1.2792872145809335, "grad_norm": 6.89844274520874, "learning_rate": 7.517968375658841e-05, "loss": 0.7902, "step": 18881 }, { "epoch": 1.2793549698489057, "grad_norm": 6.073351860046387, "learning_rate": 7.517831473749059e-05, "loss": 0.8402, "step": 18882 }, { "epoch": 1.279422725116878, "grad_norm": 4.2992658615112305, "learning_rate": 7.517694571839277e-05, "loss": 0.645, "step": 18883 }, { "epoch": 1.27949048038485, "grad_norm": 5.582603931427002, "learning_rate": 7.517557669929496e-05, "loss": 0.7271, "step": 18884 }, { "epoch": 1.279558235652822, "grad_norm": 6.288532733917236, "learning_rate": 7.517420768019715e-05, "loss": 0.6048, "step": 18885 }, { "epoch": 1.279625990920794, "grad_norm": 5.831702709197998, "learning_rate": 7.517283866109933e-05, "loss": 0.7371, "step": 18886 }, { "epoch": 1.2796937461887663, "grad_norm": 6.60377311706543, "learning_rate": 7.517146964200152e-05, "loss": 0.8213, "step": 18887 }, { "epoch": 1.2797615014567383, "grad_norm": 6.422870635986328, "learning_rate": 7.51701006229037e-05, "loss": 0.8495, "step": 18888 }, { "epoch": 1.2798292567247103, "grad_norm": 4.115203380584717, "learning_rate": 7.516873160380588e-05, "loss": 0.6161, "step": 18889 }, { "epoch": 1.2798970119926825, "grad_norm": 6.056524276733398, "learning_rate": 7.516736258470806e-05, "loss": 0.6325, "step": 18890 }, { "epoch": 1.2799647672606544, "grad_norm": 4.64837121963501, "learning_rate": 7.516599356561025e-05, "loss": 0.7224, "step": 18891 }, { "epoch": 1.2800325225286266, "grad_norm": 5.659262657165527, "learning_rate": 7.516462454651243e-05, "loss": 0.7367, "step": 18892 }, { "epoch": 1.2801002777965986, "grad_norm": 5.922879219055176, "learning_rate": 7.516325552741462e-05, "loss": 0.8327, "step": 18893 }, { "epoch": 1.2801680330645708, "grad_norm": 5.646880626678467, "learning_rate": 7.51618865083168e-05, "loss": 0.8108, "step": 18894 }, { "epoch": 1.2802357883325428, "grad_norm": 5.094715595245361, "learning_rate": 7.516051748921898e-05, "loss": 0.6538, "step": 18895 }, { "epoch": 1.280303543600515, "grad_norm": 6.307434558868408, "learning_rate": 7.515914847012117e-05, "loss": 0.5699, "step": 18896 }, { "epoch": 1.280371298868487, "grad_norm": 6.823366165161133, "learning_rate": 7.515777945102335e-05, "loss": 0.6621, "step": 18897 }, { "epoch": 1.2804390541364592, "grad_norm": 5.032593250274658, "learning_rate": 7.515641043192553e-05, "loss": 0.7023, "step": 18898 }, { "epoch": 1.2805068094044312, "grad_norm": 5.109512805938721, "learning_rate": 7.515504141282771e-05, "loss": 0.6812, "step": 18899 }, { "epoch": 1.2805745646724032, "grad_norm": 5.409660339355469, "learning_rate": 7.51536723937299e-05, "loss": 0.8913, "step": 18900 }, { "epoch": 1.2806423199403754, "grad_norm": 7.99271297454834, "learning_rate": 7.515230337463208e-05, "loss": 0.6878, "step": 18901 }, { "epoch": 1.2807100752083476, "grad_norm": 4.207230567932129, "learning_rate": 7.515093435553427e-05, "loss": 0.5964, "step": 18902 }, { "epoch": 1.2807778304763195, "grad_norm": 6.239865779876709, "learning_rate": 7.514956533643645e-05, "loss": 0.8948, "step": 18903 }, { "epoch": 1.2808455857442915, "grad_norm": 4.550334453582764, "learning_rate": 7.514819631733863e-05, "loss": 0.7515, "step": 18904 }, { "epoch": 1.2809133410122637, "grad_norm": 4.666752338409424, "learning_rate": 7.514682729824082e-05, "loss": 0.7464, "step": 18905 }, { "epoch": 1.2809810962802357, "grad_norm": 5.829958438873291, "learning_rate": 7.5145458279143e-05, "loss": 0.6942, "step": 18906 }, { "epoch": 1.281048851548208, "grad_norm": 6.163125991821289, "learning_rate": 7.514408926004518e-05, "loss": 0.6625, "step": 18907 }, { "epoch": 1.28111660681618, "grad_norm": 5.07780647277832, "learning_rate": 7.514272024094736e-05, "loss": 0.577, "step": 18908 }, { "epoch": 1.281184362084152, "grad_norm": 4.930899143218994, "learning_rate": 7.514135122184954e-05, "loss": 0.7989, "step": 18909 }, { "epoch": 1.281252117352124, "grad_norm": 5.865635871887207, "learning_rate": 7.513998220275174e-05, "loss": 0.7121, "step": 18910 }, { "epoch": 1.2813198726200963, "grad_norm": 4.985292434692383, "learning_rate": 7.513861318365392e-05, "loss": 0.6258, "step": 18911 }, { "epoch": 1.2813876278880683, "grad_norm": 4.347504138946533, "learning_rate": 7.51372441645561e-05, "loss": 0.7127, "step": 18912 }, { "epoch": 1.2814553831560405, "grad_norm": 5.557649612426758, "learning_rate": 7.513587514545828e-05, "loss": 0.7456, "step": 18913 }, { "epoch": 1.2815231384240124, "grad_norm": 4.633983612060547, "learning_rate": 7.513450612636047e-05, "loss": 0.6507, "step": 18914 }, { "epoch": 1.2815908936919844, "grad_norm": 4.707529067993164, "learning_rate": 7.513313710726265e-05, "loss": 0.6159, "step": 18915 }, { "epoch": 1.2816586489599566, "grad_norm": 5.327187538146973, "learning_rate": 7.513176808816483e-05, "loss": 0.7239, "step": 18916 }, { "epoch": 1.2817264042279288, "grad_norm": 5.088149070739746, "learning_rate": 7.513039906906701e-05, "loss": 0.6742, "step": 18917 }, { "epoch": 1.2817941594959008, "grad_norm": 4.886707782745361, "learning_rate": 7.512903004996919e-05, "loss": 0.559, "step": 18918 }, { "epoch": 1.2818619147638728, "grad_norm": 8.560362815856934, "learning_rate": 7.512766103087139e-05, "loss": 0.5952, "step": 18919 }, { "epoch": 1.281929670031845, "grad_norm": 5.153624057769775, "learning_rate": 7.512629201177357e-05, "loss": 0.6235, "step": 18920 }, { "epoch": 1.2819974252998172, "grad_norm": 5.429281711578369, "learning_rate": 7.512492299267575e-05, "loss": 0.7461, "step": 18921 }, { "epoch": 1.2820651805677892, "grad_norm": 5.903201103210449, "learning_rate": 7.512355397357793e-05, "loss": 0.6662, "step": 18922 }, { "epoch": 1.2821329358357612, "grad_norm": 6.73254919052124, "learning_rate": 7.512218495448012e-05, "loss": 0.6163, "step": 18923 }, { "epoch": 1.2822006911037334, "grad_norm": 4.202960968017578, "learning_rate": 7.51208159353823e-05, "loss": 0.7297, "step": 18924 }, { "epoch": 1.2822684463717053, "grad_norm": 6.174546241760254, "learning_rate": 7.511944691628448e-05, "loss": 1.0266, "step": 18925 }, { "epoch": 1.2823362016396775, "grad_norm": 6.674781799316406, "learning_rate": 7.511807789718666e-05, "loss": 0.8947, "step": 18926 }, { "epoch": 1.2824039569076495, "grad_norm": 5.889008522033691, "learning_rate": 7.511670887808886e-05, "loss": 0.7084, "step": 18927 }, { "epoch": 1.2824717121756217, "grad_norm": 5.369926929473877, "learning_rate": 7.511533985899104e-05, "loss": 0.5471, "step": 18928 }, { "epoch": 1.2825394674435937, "grad_norm": 6.113325119018555, "learning_rate": 7.511397083989322e-05, "loss": 0.7868, "step": 18929 }, { "epoch": 1.2826072227115657, "grad_norm": 4.535378456115723, "learning_rate": 7.511260182079541e-05, "loss": 0.6198, "step": 18930 }, { "epoch": 1.282674977979538, "grad_norm": 5.319047927856445, "learning_rate": 7.511123280169759e-05, "loss": 0.6189, "step": 18931 }, { "epoch": 1.28274273324751, "grad_norm": 5.134969711303711, "learning_rate": 7.510986378259977e-05, "loss": 0.6798, "step": 18932 }, { "epoch": 1.282810488515482, "grad_norm": 7.2931976318359375, "learning_rate": 7.510849476350196e-05, "loss": 0.6263, "step": 18933 }, { "epoch": 1.282878243783454, "grad_norm": 4.7278642654418945, "learning_rate": 7.510712574440414e-05, "loss": 0.7181, "step": 18934 }, { "epoch": 1.2829459990514263, "grad_norm": 5.304832935333252, "learning_rate": 7.510575672530632e-05, "loss": 0.724, "step": 18935 }, { "epoch": 1.2830137543193985, "grad_norm": 5.6897430419921875, "learning_rate": 7.51043877062085e-05, "loss": 0.8844, "step": 18936 }, { "epoch": 1.2830815095873704, "grad_norm": 6.98329496383667, "learning_rate": 7.51030186871107e-05, "loss": 0.9702, "step": 18937 }, { "epoch": 1.2831492648553424, "grad_norm": 6.507364273071289, "learning_rate": 7.510164966801288e-05, "loss": 0.7699, "step": 18938 }, { "epoch": 1.2832170201233146, "grad_norm": 6.325850009918213, "learning_rate": 7.510028064891506e-05, "loss": 0.7413, "step": 18939 }, { "epoch": 1.2832847753912866, "grad_norm": 5.69012975692749, "learning_rate": 7.509891162981724e-05, "loss": 0.7384, "step": 18940 }, { "epoch": 1.2833525306592588, "grad_norm": 4.061514377593994, "learning_rate": 7.509754261071942e-05, "loss": 0.489, "step": 18941 }, { "epoch": 1.2834202859272308, "grad_norm": 5.335684776306152, "learning_rate": 7.509617359162161e-05, "loss": 0.6391, "step": 18942 }, { "epoch": 1.283488041195203, "grad_norm": 6.4478254318237305, "learning_rate": 7.50948045725238e-05, "loss": 0.6467, "step": 18943 }, { "epoch": 1.283555796463175, "grad_norm": 7.432367324829102, "learning_rate": 7.509343555342598e-05, "loss": 0.899, "step": 18944 }, { "epoch": 1.2836235517311472, "grad_norm": 5.422093868255615, "learning_rate": 7.509206653432816e-05, "loss": 0.7801, "step": 18945 }, { "epoch": 1.2836913069991192, "grad_norm": 5.186742305755615, "learning_rate": 7.509069751523035e-05, "loss": 0.5494, "step": 18946 }, { "epoch": 1.2837590622670914, "grad_norm": 6.226986408233643, "learning_rate": 7.508932849613253e-05, "loss": 0.8416, "step": 18947 }, { "epoch": 1.2838268175350633, "grad_norm": 4.487210750579834, "learning_rate": 7.508795947703471e-05, "loss": 0.5364, "step": 18948 }, { "epoch": 1.2838945728030353, "grad_norm": 7.355178356170654, "learning_rate": 7.508659045793689e-05, "loss": 0.5614, "step": 18949 }, { "epoch": 1.2839623280710075, "grad_norm": 7.658474445343018, "learning_rate": 7.508522143883907e-05, "loss": 0.8788, "step": 18950 }, { "epoch": 1.2840300833389797, "grad_norm": 5.916354656219482, "learning_rate": 7.508385241974126e-05, "loss": 0.8266, "step": 18951 }, { "epoch": 1.2840978386069517, "grad_norm": 8.505805015563965, "learning_rate": 7.508248340064344e-05, "loss": 0.6606, "step": 18952 }, { "epoch": 1.2841655938749237, "grad_norm": 5.885147571563721, "learning_rate": 7.508111438154563e-05, "loss": 0.6486, "step": 18953 }, { "epoch": 1.2842333491428959, "grad_norm": 4.990082740783691, "learning_rate": 7.50797453624478e-05, "loss": 0.6547, "step": 18954 }, { "epoch": 1.2843011044108679, "grad_norm": 6.718355178833008, "learning_rate": 7.507837634335e-05, "loss": 0.6687, "step": 18955 }, { "epoch": 1.28436885967884, "grad_norm": 4.848979473114014, "learning_rate": 7.507700732425218e-05, "loss": 0.6116, "step": 18956 }, { "epoch": 1.284436614946812, "grad_norm": 5.292390823364258, "learning_rate": 7.507563830515436e-05, "loss": 0.6961, "step": 18957 }, { "epoch": 1.2845043702147843, "grad_norm": 5.300043106079102, "learning_rate": 7.507426928605654e-05, "loss": 0.5778, "step": 18958 }, { "epoch": 1.2845721254827562, "grad_norm": 6.009559631347656, "learning_rate": 7.507290026695872e-05, "loss": 0.5248, "step": 18959 }, { "epoch": 1.2846398807507284, "grad_norm": 5.956521511077881, "learning_rate": 7.507153124786091e-05, "loss": 0.8463, "step": 18960 }, { "epoch": 1.2847076360187004, "grad_norm": 7.788699150085449, "learning_rate": 7.50701622287631e-05, "loss": 0.7121, "step": 18961 }, { "epoch": 1.2847753912866726, "grad_norm": 5.360804080963135, "learning_rate": 7.506879320966528e-05, "loss": 0.7558, "step": 18962 }, { "epoch": 1.2848431465546446, "grad_norm": 6.172602653503418, "learning_rate": 7.506742419056746e-05, "loss": 0.8419, "step": 18963 }, { "epoch": 1.2849109018226166, "grad_norm": 8.835184097290039, "learning_rate": 7.506605517146964e-05, "loss": 0.7741, "step": 18964 }, { "epoch": 1.2849786570905888, "grad_norm": 3.8804407119750977, "learning_rate": 7.506468615237183e-05, "loss": 0.457, "step": 18965 }, { "epoch": 1.285046412358561, "grad_norm": 6.400809288024902, "learning_rate": 7.506331713327401e-05, "loss": 0.6716, "step": 18966 }, { "epoch": 1.285114167626533, "grad_norm": 5.496263027191162, "learning_rate": 7.506194811417619e-05, "loss": 0.5292, "step": 18967 }, { "epoch": 1.285181922894505, "grad_norm": 9.594989776611328, "learning_rate": 7.506057909507837e-05, "loss": 0.5046, "step": 18968 }, { "epoch": 1.2852496781624771, "grad_norm": 5.41709566116333, "learning_rate": 7.505921007598056e-05, "loss": 0.5274, "step": 18969 }, { "epoch": 1.2853174334304494, "grad_norm": 7.191579341888428, "learning_rate": 7.505784105688275e-05, "loss": 0.8198, "step": 18970 }, { "epoch": 1.2853851886984213, "grad_norm": 4.957528591156006, "learning_rate": 7.505647203778493e-05, "loss": 0.5919, "step": 18971 }, { "epoch": 1.2854529439663933, "grad_norm": 5.475522518157959, "learning_rate": 7.50551030186871e-05, "loss": 0.849, "step": 18972 }, { "epoch": 1.2855206992343655, "grad_norm": 4.834624290466309, "learning_rate": 7.505373399958929e-05, "loss": 0.6045, "step": 18973 }, { "epoch": 1.2855884545023375, "grad_norm": 4.829009056091309, "learning_rate": 7.505236498049148e-05, "loss": 0.6468, "step": 18974 }, { "epoch": 1.2856562097703097, "grad_norm": 7.386830806732178, "learning_rate": 7.505099596139366e-05, "loss": 0.7796, "step": 18975 }, { "epoch": 1.2857239650382817, "grad_norm": 4.808793067932129, "learning_rate": 7.504962694229584e-05, "loss": 0.6896, "step": 18976 }, { "epoch": 1.2857917203062539, "grad_norm": 7.036888599395752, "learning_rate": 7.504825792319803e-05, "loss": 0.795, "step": 18977 }, { "epoch": 1.2858594755742259, "grad_norm": 6.848461627960205, "learning_rate": 7.504688890410022e-05, "loss": 0.7832, "step": 18978 }, { "epoch": 1.2859272308421978, "grad_norm": 6.701910495758057, "learning_rate": 7.50455198850024e-05, "loss": 0.6065, "step": 18979 }, { "epoch": 1.28599498611017, "grad_norm": 5.697361469268799, "learning_rate": 7.504415086590459e-05, "loss": 0.7513, "step": 18980 }, { "epoch": 1.2860627413781422, "grad_norm": 5.874011039733887, "learning_rate": 7.504278184680677e-05, "loss": 0.826, "step": 18981 }, { "epoch": 1.2861304966461142, "grad_norm": 5.672131061553955, "learning_rate": 7.504141282770895e-05, "loss": 0.6908, "step": 18982 }, { "epoch": 1.2861982519140862, "grad_norm": 6.700557231903076, "learning_rate": 7.504004380861114e-05, "loss": 0.4983, "step": 18983 }, { "epoch": 1.2862660071820584, "grad_norm": 6.988021373748779, "learning_rate": 7.503867478951332e-05, "loss": 0.7474, "step": 18984 }, { "epoch": 1.2863337624500306, "grad_norm": 8.243559837341309, "learning_rate": 7.50373057704155e-05, "loss": 0.7256, "step": 18985 }, { "epoch": 1.2864015177180026, "grad_norm": 6.7014312744140625, "learning_rate": 7.503593675131768e-05, "loss": 0.8513, "step": 18986 }, { "epoch": 1.2864692729859746, "grad_norm": 5.269378185272217, "learning_rate": 7.503456773221987e-05, "loss": 0.6072, "step": 18987 }, { "epoch": 1.2865370282539468, "grad_norm": 5.27901029586792, "learning_rate": 7.503319871312206e-05, "loss": 0.6553, "step": 18988 }, { "epoch": 1.2866047835219188, "grad_norm": 7.072850704193115, "learning_rate": 7.503182969402424e-05, "loss": 0.7969, "step": 18989 }, { "epoch": 1.286672538789891, "grad_norm": 7.112760066986084, "learning_rate": 7.503046067492642e-05, "loss": 0.997, "step": 18990 }, { "epoch": 1.286740294057863, "grad_norm": 7.057110786437988, "learning_rate": 7.50290916558286e-05, "loss": 0.7143, "step": 18991 }, { "epoch": 1.2868080493258351, "grad_norm": 5.904301166534424, "learning_rate": 7.50277226367308e-05, "loss": 0.6172, "step": 18992 }, { "epoch": 1.2868758045938071, "grad_norm": 8.106389999389648, "learning_rate": 7.502635361763297e-05, "loss": 0.8726, "step": 18993 }, { "epoch": 1.2869435598617793, "grad_norm": 4.713334560394287, "learning_rate": 7.502498459853515e-05, "loss": 0.6867, "step": 18994 }, { "epoch": 1.2870113151297513, "grad_norm": 5.963455677032471, "learning_rate": 7.502361557943734e-05, "loss": 0.4405, "step": 18995 }, { "epoch": 1.2870790703977235, "grad_norm": 6.356420516967773, "learning_rate": 7.502224656033952e-05, "loss": 0.6746, "step": 18996 }, { "epoch": 1.2871468256656955, "grad_norm": 4.7186126708984375, "learning_rate": 7.502087754124171e-05, "loss": 0.6257, "step": 18997 }, { "epoch": 1.2872145809336675, "grad_norm": 7.2148237228393555, "learning_rate": 7.501950852214389e-05, "loss": 0.6387, "step": 18998 }, { "epoch": 1.2872823362016397, "grad_norm": 6.3130669593811035, "learning_rate": 7.501813950304607e-05, "loss": 0.7136, "step": 18999 }, { "epoch": 1.2873500914696119, "grad_norm": 5.614752769470215, "learning_rate": 7.501677048394825e-05, "loss": 0.7034, "step": 19000 }, { "epoch": 1.2874178467375839, "grad_norm": 5.171928882598877, "learning_rate": 7.501540146485044e-05, "loss": 0.6074, "step": 19001 }, { "epoch": 1.2874856020055558, "grad_norm": 7.966459274291992, "learning_rate": 7.501403244575262e-05, "loss": 0.7485, "step": 19002 }, { "epoch": 1.287553357273528, "grad_norm": 4.990401268005371, "learning_rate": 7.50126634266548e-05, "loss": 0.7075, "step": 19003 }, { "epoch": 1.2876211125415, "grad_norm": 6.305065155029297, "learning_rate": 7.501129440755699e-05, "loss": 0.6638, "step": 19004 }, { "epoch": 1.2876888678094722, "grad_norm": 5.265984535217285, "learning_rate": 7.500992538845917e-05, "loss": 0.4996, "step": 19005 }, { "epoch": 1.2877566230774442, "grad_norm": 5.309114933013916, "learning_rate": 7.500855636936136e-05, "loss": 0.64, "step": 19006 }, { "epoch": 1.2878243783454164, "grad_norm": 6.5448689460754395, "learning_rate": 7.500718735026354e-05, "loss": 0.5986, "step": 19007 }, { "epoch": 1.2878921336133884, "grad_norm": 5.091623306274414, "learning_rate": 7.500581833116572e-05, "loss": 0.6548, "step": 19008 }, { "epoch": 1.2879598888813606, "grad_norm": 6.535802364349365, "learning_rate": 7.50044493120679e-05, "loss": 0.7007, "step": 19009 }, { "epoch": 1.2880276441493326, "grad_norm": 6.128486633300781, "learning_rate": 7.50030802929701e-05, "loss": 0.9784, "step": 19010 }, { "epoch": 1.2880953994173048, "grad_norm": 6.317168235778809, "learning_rate": 7.500171127387227e-05, "loss": 0.9048, "step": 19011 }, { "epoch": 1.2881631546852768, "grad_norm": 5.279526233673096, "learning_rate": 7.500034225477446e-05, "loss": 0.5129, "step": 19012 }, { "epoch": 1.2882309099532487, "grad_norm": 5.69022798538208, "learning_rate": 7.499897323567664e-05, "loss": 0.5757, "step": 19013 }, { "epoch": 1.288298665221221, "grad_norm": 5.739441871643066, "learning_rate": 7.499760421657882e-05, "loss": 0.4331, "step": 19014 }, { "epoch": 1.2883664204891931, "grad_norm": 7.490499019622803, "learning_rate": 7.499623519748101e-05, "loss": 0.761, "step": 19015 }, { "epoch": 1.2884341757571651, "grad_norm": 10.157821655273438, "learning_rate": 7.499486617838319e-05, "loss": 0.7003, "step": 19016 }, { "epoch": 1.288501931025137, "grad_norm": 5.9427876472473145, "learning_rate": 7.499349715928537e-05, "loss": 0.6023, "step": 19017 }, { "epoch": 1.2885696862931093, "grad_norm": 5.6554059982299805, "learning_rate": 7.499212814018755e-05, "loss": 0.6488, "step": 19018 }, { "epoch": 1.2886374415610815, "grad_norm": 6.236210823059082, "learning_rate": 7.499075912108973e-05, "loss": 0.6389, "step": 19019 }, { "epoch": 1.2887051968290535, "grad_norm": 4.029348373413086, "learning_rate": 7.498939010199192e-05, "loss": 0.5288, "step": 19020 }, { "epoch": 1.2887729520970255, "grad_norm": 8.369741439819336, "learning_rate": 7.49880210828941e-05, "loss": 0.6518, "step": 19021 }, { "epoch": 1.2888407073649977, "grad_norm": 4.985721588134766, "learning_rate": 7.498665206379629e-05, "loss": 0.6372, "step": 19022 }, { "epoch": 1.2889084626329697, "grad_norm": 8.40103530883789, "learning_rate": 7.498528304469848e-05, "loss": 0.5655, "step": 19023 }, { "epoch": 1.2889762179009419, "grad_norm": 6.697427749633789, "learning_rate": 7.498391402560066e-05, "loss": 0.6363, "step": 19024 }, { "epoch": 1.2890439731689138, "grad_norm": 4.908364772796631, "learning_rate": 7.498254500650284e-05, "loss": 0.6867, "step": 19025 }, { "epoch": 1.289111728436886, "grad_norm": 6.707319736480713, "learning_rate": 7.498117598740503e-05, "loss": 0.6754, "step": 19026 }, { "epoch": 1.289179483704858, "grad_norm": 29.607969284057617, "learning_rate": 7.497980696830721e-05, "loss": 0.7171, "step": 19027 }, { "epoch": 1.28924723897283, "grad_norm": 6.6992268562316895, "learning_rate": 7.49784379492094e-05, "loss": 0.6515, "step": 19028 }, { "epoch": 1.2893149942408022, "grad_norm": 7.497162342071533, "learning_rate": 7.497706893011159e-05, "loss": 0.558, "step": 19029 }, { "epoch": 1.2893827495087744, "grad_norm": 6.205023288726807, "learning_rate": 7.497569991101377e-05, "loss": 0.6415, "step": 19030 }, { "epoch": 1.2894505047767464, "grad_norm": 5.757179260253906, "learning_rate": 7.497433089191595e-05, "loss": 0.7391, "step": 19031 }, { "epoch": 1.2895182600447184, "grad_norm": 5.249318599700928, "learning_rate": 7.497296187281813e-05, "loss": 0.729, "step": 19032 }, { "epoch": 1.2895860153126906, "grad_norm": 4.7711944580078125, "learning_rate": 7.497159285372032e-05, "loss": 0.6849, "step": 19033 }, { "epoch": 1.2896537705806628, "grad_norm": 4.8976030349731445, "learning_rate": 7.49702238346225e-05, "loss": 0.7532, "step": 19034 }, { "epoch": 1.2897215258486348, "grad_norm": 5.8016533851623535, "learning_rate": 7.496885481552468e-05, "loss": 0.6349, "step": 19035 }, { "epoch": 1.2897892811166067, "grad_norm": 6.809314727783203, "learning_rate": 7.496748579642686e-05, "loss": 0.6976, "step": 19036 }, { "epoch": 1.289857036384579, "grad_norm": 8.582451820373535, "learning_rate": 7.496611677732904e-05, "loss": 0.7856, "step": 19037 }, { "epoch": 1.289924791652551, "grad_norm": 6.161681652069092, "learning_rate": 7.496474775823124e-05, "loss": 0.6678, "step": 19038 }, { "epoch": 1.2899925469205231, "grad_norm": 4.436268329620361, "learning_rate": 7.496337873913342e-05, "loss": 0.5384, "step": 19039 }, { "epoch": 1.290060302188495, "grad_norm": 6.227169513702393, "learning_rate": 7.49620097200356e-05, "loss": 0.858, "step": 19040 }, { "epoch": 1.2901280574564673, "grad_norm": 7.1437249183654785, "learning_rate": 7.496064070093778e-05, "loss": 0.8332, "step": 19041 }, { "epoch": 1.2901958127244393, "grad_norm": 6.581973075866699, "learning_rate": 7.495927168183996e-05, "loss": 0.6825, "step": 19042 }, { "epoch": 1.2902635679924115, "grad_norm": 8.830132484436035, "learning_rate": 7.495790266274215e-05, "loss": 0.6207, "step": 19043 }, { "epoch": 1.2903313232603835, "grad_norm": 5.445920467376709, "learning_rate": 7.495653364364433e-05, "loss": 0.5649, "step": 19044 }, { "epoch": 1.2903990785283557, "grad_norm": 6.3548359870910645, "learning_rate": 7.495516462454651e-05, "loss": 0.6226, "step": 19045 }, { "epoch": 1.2904668337963277, "grad_norm": 5.947188377380371, "learning_rate": 7.49537956054487e-05, "loss": 0.6831, "step": 19046 }, { "epoch": 1.2905345890642996, "grad_norm": 6.858307838439941, "learning_rate": 7.495242658635089e-05, "loss": 0.817, "step": 19047 }, { "epoch": 1.2906023443322718, "grad_norm": 5.442610740661621, "learning_rate": 7.495105756725307e-05, "loss": 0.8569, "step": 19048 }, { "epoch": 1.290670099600244, "grad_norm": 4.956701278686523, "learning_rate": 7.494968854815525e-05, "loss": 0.656, "step": 19049 }, { "epoch": 1.290737854868216, "grad_norm": 5.638252258300781, "learning_rate": 7.494831952905743e-05, "loss": 0.7737, "step": 19050 }, { "epoch": 1.290805610136188, "grad_norm": 5.065107822418213, "learning_rate": 7.494695050995961e-05, "loss": 0.5561, "step": 19051 }, { "epoch": 1.2908733654041602, "grad_norm": 4.787414073944092, "learning_rate": 7.49455814908618e-05, "loss": 0.6709, "step": 19052 }, { "epoch": 1.2909411206721322, "grad_norm": 5.403744697570801, "learning_rate": 7.494421247176398e-05, "loss": 0.6683, "step": 19053 }, { "epoch": 1.2910088759401044, "grad_norm": 8.315958976745605, "learning_rate": 7.494284345266616e-05, "loss": 0.683, "step": 19054 }, { "epoch": 1.2910766312080764, "grad_norm": 4.917759895324707, "learning_rate": 7.494147443356835e-05, "loss": 0.74, "step": 19055 }, { "epoch": 1.2911443864760486, "grad_norm": 5.201471328735352, "learning_rate": 7.494010541447054e-05, "loss": 0.6715, "step": 19056 }, { "epoch": 1.2912121417440205, "grad_norm": 4.8714399337768555, "learning_rate": 7.493873639537272e-05, "loss": 0.7998, "step": 19057 }, { "epoch": 1.2912798970119927, "grad_norm": 5.682001113891602, "learning_rate": 7.49373673762749e-05, "loss": 0.9011, "step": 19058 }, { "epoch": 1.2913476522799647, "grad_norm": 4.447145462036133, "learning_rate": 7.493599835717708e-05, "loss": 0.6008, "step": 19059 }, { "epoch": 1.291415407547937, "grad_norm": 5.916626453399658, "learning_rate": 7.493462933807926e-05, "loss": 0.7508, "step": 19060 }, { "epoch": 1.291483162815909, "grad_norm": 7.291490077972412, "learning_rate": 7.493326031898145e-05, "loss": 0.6759, "step": 19061 }, { "epoch": 1.291550918083881, "grad_norm": 4.439260005950928, "learning_rate": 7.493189129988363e-05, "loss": 0.5057, "step": 19062 }, { "epoch": 1.291618673351853, "grad_norm": 5.721559524536133, "learning_rate": 7.493052228078582e-05, "loss": 0.5098, "step": 19063 }, { "epoch": 1.2916864286198253, "grad_norm": 4.662031173706055, "learning_rate": 7.4929153261688e-05, "loss": 0.5689, "step": 19064 }, { "epoch": 1.2917541838877973, "grad_norm": 6.120683193206787, "learning_rate": 7.492778424259019e-05, "loss": 0.5959, "step": 19065 }, { "epoch": 1.2918219391557693, "grad_norm": 7.522911071777344, "learning_rate": 7.492641522349237e-05, "loss": 0.9768, "step": 19066 }, { "epoch": 1.2918896944237415, "grad_norm": 5.127923011779785, "learning_rate": 7.492504620439455e-05, "loss": 0.627, "step": 19067 }, { "epoch": 1.2919574496917137, "grad_norm": 10.054405212402344, "learning_rate": 7.492367718529673e-05, "loss": 0.6896, "step": 19068 }, { "epoch": 1.2920252049596856, "grad_norm": 8.326977729797363, "learning_rate": 7.492230816619892e-05, "loss": 0.6013, "step": 19069 }, { "epoch": 1.2920929602276576, "grad_norm": 4.071907997131348, "learning_rate": 7.49209391471011e-05, "loss": 0.4635, "step": 19070 }, { "epoch": 1.2921607154956298, "grad_norm": 4.721922874450684, "learning_rate": 7.491957012800328e-05, "loss": 0.6101, "step": 19071 }, { "epoch": 1.2922284707636018, "grad_norm": 5.1097822189331055, "learning_rate": 7.491820110890548e-05, "loss": 0.5978, "step": 19072 }, { "epoch": 1.292296226031574, "grad_norm": 5.518023490905762, "learning_rate": 7.491683208980766e-05, "loss": 0.5237, "step": 19073 }, { "epoch": 1.292363981299546, "grad_norm": 5.511222839355469, "learning_rate": 7.491546307070984e-05, "loss": 0.5496, "step": 19074 }, { "epoch": 1.2924317365675182, "grad_norm": 6.64237642288208, "learning_rate": 7.491409405161203e-05, "loss": 0.6374, "step": 19075 }, { "epoch": 1.2924994918354902, "grad_norm": 5.097143173217773, "learning_rate": 7.491272503251421e-05, "loss": 0.8234, "step": 19076 }, { "epoch": 1.2925672471034622, "grad_norm": 6.166132926940918, "learning_rate": 7.49113560134164e-05, "loss": 0.615, "step": 19077 }, { "epoch": 1.2926350023714344, "grad_norm": 5.8667216300964355, "learning_rate": 7.490998699431857e-05, "loss": 0.6672, "step": 19078 }, { "epoch": 1.2927027576394066, "grad_norm": 6.80618953704834, "learning_rate": 7.490861797522077e-05, "loss": 0.8106, "step": 19079 }, { "epoch": 1.2927705129073785, "grad_norm": 7.135251045227051, "learning_rate": 7.490724895612295e-05, "loss": 0.6267, "step": 19080 }, { "epoch": 1.2928382681753505, "grad_norm": 7.1912665367126465, "learning_rate": 7.490587993702513e-05, "loss": 0.8144, "step": 19081 }, { "epoch": 1.2929060234433227, "grad_norm": 8.14855670928955, "learning_rate": 7.490451091792731e-05, "loss": 0.7899, "step": 19082 }, { "epoch": 1.292973778711295, "grad_norm": 7.248320579528809, "learning_rate": 7.490314189882949e-05, "loss": 0.6009, "step": 19083 }, { "epoch": 1.293041533979267, "grad_norm": 6.335907459259033, "learning_rate": 7.490177287973168e-05, "loss": 0.569, "step": 19084 }, { "epoch": 1.293109289247239, "grad_norm": 4.860580921173096, "learning_rate": 7.490040386063386e-05, "loss": 0.5193, "step": 19085 }, { "epoch": 1.293177044515211, "grad_norm": 7.895290851593018, "learning_rate": 7.489903484153604e-05, "loss": 0.6026, "step": 19086 }, { "epoch": 1.293244799783183, "grad_norm": 5.047767162322998, "learning_rate": 7.489766582243822e-05, "loss": 0.6986, "step": 19087 }, { "epoch": 1.2933125550511553, "grad_norm": 5.926577091217041, "learning_rate": 7.489629680334042e-05, "loss": 0.5577, "step": 19088 }, { "epoch": 1.2933803103191273, "grad_norm": 5.948668479919434, "learning_rate": 7.48949277842426e-05, "loss": 0.6597, "step": 19089 }, { "epoch": 1.2934480655870995, "grad_norm": 5.912086009979248, "learning_rate": 7.489355876514478e-05, "loss": 0.6318, "step": 19090 }, { "epoch": 1.2935158208550714, "grad_norm": 4.917775630950928, "learning_rate": 7.489218974604696e-05, "loss": 0.624, "step": 19091 }, { "epoch": 1.2935835761230436, "grad_norm": 5.1207380294799805, "learning_rate": 7.489082072694914e-05, "loss": 0.7946, "step": 19092 }, { "epoch": 1.2936513313910156, "grad_norm": 3.9565823078155518, "learning_rate": 7.488945170785133e-05, "loss": 0.5253, "step": 19093 }, { "epoch": 1.2937190866589878, "grad_norm": 6.215704441070557, "learning_rate": 7.488808268875351e-05, "loss": 0.5773, "step": 19094 }, { "epoch": 1.2937868419269598, "grad_norm": 7.778109550476074, "learning_rate": 7.48867136696557e-05, "loss": 0.5588, "step": 19095 }, { "epoch": 1.2938545971949318, "grad_norm": 6.255362033843994, "learning_rate": 7.488534465055787e-05, "loss": 0.6826, "step": 19096 }, { "epoch": 1.293922352462904, "grad_norm": 6.045589923858643, "learning_rate": 7.488397563146006e-05, "loss": 0.8596, "step": 19097 }, { "epoch": 1.2939901077308762, "grad_norm": 7.053040981292725, "learning_rate": 7.488260661236225e-05, "loss": 0.4944, "step": 19098 }, { "epoch": 1.2940578629988482, "grad_norm": 4.342740535736084, "learning_rate": 7.488123759326443e-05, "loss": 0.5461, "step": 19099 }, { "epoch": 1.2941256182668202, "grad_norm": 5.933722496032715, "learning_rate": 7.487986857416661e-05, "loss": 0.5581, "step": 19100 }, { "epoch": 1.2941933735347924, "grad_norm": 7.10228967666626, "learning_rate": 7.487849955506879e-05, "loss": 0.8373, "step": 19101 }, { "epoch": 1.2942611288027643, "grad_norm": 7.499148845672607, "learning_rate": 7.487713053597098e-05, "loss": 0.7426, "step": 19102 }, { "epoch": 1.2943288840707365, "grad_norm": 6.0970354080200195, "learning_rate": 7.487576151687316e-05, "loss": 0.7959, "step": 19103 }, { "epoch": 1.2943966393387085, "grad_norm": 6.606189250946045, "learning_rate": 7.487439249777534e-05, "loss": 0.5305, "step": 19104 }, { "epoch": 1.2944643946066807, "grad_norm": 4.675085544586182, "learning_rate": 7.487302347867752e-05, "loss": 0.6525, "step": 19105 }, { "epoch": 1.2945321498746527, "grad_norm": 7.67195987701416, "learning_rate": 7.48716544595797e-05, "loss": 0.4927, "step": 19106 }, { "epoch": 1.294599905142625, "grad_norm": 5.768126964569092, "learning_rate": 7.48702854404819e-05, "loss": 0.7951, "step": 19107 }, { "epoch": 1.2946676604105969, "grad_norm": 9.432232856750488, "learning_rate": 7.486891642138408e-05, "loss": 0.6249, "step": 19108 }, { "epoch": 1.294735415678569, "grad_norm": 5.337562561035156, "learning_rate": 7.486754740228626e-05, "loss": 0.6767, "step": 19109 }, { "epoch": 1.294803170946541, "grad_norm": 5.255843639373779, "learning_rate": 7.486617838318844e-05, "loss": 0.572, "step": 19110 }, { "epoch": 1.294870926214513, "grad_norm": 5.104163646697998, "learning_rate": 7.486480936409063e-05, "loss": 0.6072, "step": 19111 }, { "epoch": 1.2949386814824853, "grad_norm": 5.880269527435303, "learning_rate": 7.486344034499281e-05, "loss": 0.7877, "step": 19112 }, { "epoch": 1.2950064367504575, "grad_norm": 6.030186176300049, "learning_rate": 7.4862071325895e-05, "loss": 0.6094, "step": 19113 }, { "epoch": 1.2950741920184294, "grad_norm": 5.60014009475708, "learning_rate": 7.486070230679718e-05, "loss": 0.6984, "step": 19114 }, { "epoch": 1.2951419472864014, "grad_norm": 6.737033843994141, "learning_rate": 7.485933328769937e-05, "loss": 0.7144, "step": 19115 }, { "epoch": 1.2952097025543736, "grad_norm": 6.1418023109436035, "learning_rate": 7.485796426860155e-05, "loss": 0.6173, "step": 19116 }, { "epoch": 1.2952774578223458, "grad_norm": 6.478549480438232, "learning_rate": 7.485659524950373e-05, "loss": 0.5839, "step": 19117 }, { "epoch": 1.2953452130903178, "grad_norm": 5.2262396812438965, "learning_rate": 7.485522623040592e-05, "loss": 0.6235, "step": 19118 }, { "epoch": 1.2954129683582898, "grad_norm": 5.0620341300964355, "learning_rate": 7.48538572113081e-05, "loss": 0.6881, "step": 19119 }, { "epoch": 1.295480723626262, "grad_norm": 6.0692057609558105, "learning_rate": 7.485248819221028e-05, "loss": 0.8118, "step": 19120 }, { "epoch": 1.295548478894234, "grad_norm": 6.242649555206299, "learning_rate": 7.485111917311248e-05, "loss": 0.9643, "step": 19121 }, { "epoch": 1.2956162341622062, "grad_norm": 6.3982062339782715, "learning_rate": 7.484975015401466e-05, "loss": 0.7003, "step": 19122 }, { "epoch": 1.2956839894301782, "grad_norm": 5.777416706085205, "learning_rate": 7.484838113491684e-05, "loss": 0.6675, "step": 19123 }, { "epoch": 1.2957517446981504, "grad_norm": 6.80368185043335, "learning_rate": 7.484701211581902e-05, "loss": 0.5221, "step": 19124 }, { "epoch": 1.2958194999661223, "grad_norm": 7.708262920379639, "learning_rate": 7.484564309672121e-05, "loss": 0.7613, "step": 19125 }, { "epoch": 1.2958872552340943, "grad_norm": 6.192023754119873, "learning_rate": 7.48442740776234e-05, "loss": 0.7431, "step": 19126 }, { "epoch": 1.2959550105020665, "grad_norm": 6.2393693923950195, "learning_rate": 7.484290505852557e-05, "loss": 0.7529, "step": 19127 }, { "epoch": 1.2960227657700387, "grad_norm": 5.543433666229248, "learning_rate": 7.484153603942775e-05, "loss": 0.5564, "step": 19128 }, { "epoch": 1.2960905210380107, "grad_norm": 9.415334701538086, "learning_rate": 7.484016702032993e-05, "loss": 0.6656, "step": 19129 }, { "epoch": 1.2961582763059827, "grad_norm": 8.343303680419922, "learning_rate": 7.483879800123213e-05, "loss": 0.7609, "step": 19130 }, { "epoch": 1.2962260315739549, "grad_norm": 6.454612731933594, "learning_rate": 7.483742898213431e-05, "loss": 0.818, "step": 19131 }, { "epoch": 1.296293786841927, "grad_norm": 5.173709392547607, "learning_rate": 7.483605996303649e-05, "loss": 0.8018, "step": 19132 }, { "epoch": 1.296361542109899, "grad_norm": 4.982776165008545, "learning_rate": 7.483469094393867e-05, "loss": 0.6752, "step": 19133 }, { "epoch": 1.296429297377871, "grad_norm": 8.027585983276367, "learning_rate": 7.483332192484086e-05, "loss": 0.8125, "step": 19134 }, { "epoch": 1.2964970526458433, "grad_norm": 7.384998321533203, "learning_rate": 7.483195290574304e-05, "loss": 0.5833, "step": 19135 }, { "epoch": 1.2965648079138152, "grad_norm": 4.902980804443359, "learning_rate": 7.483058388664522e-05, "loss": 0.7385, "step": 19136 }, { "epoch": 1.2966325631817874, "grad_norm": 5.606247425079346, "learning_rate": 7.48292148675474e-05, "loss": 0.6383, "step": 19137 }, { "epoch": 1.2967003184497594, "grad_norm": 5.878718852996826, "learning_rate": 7.482784584844958e-05, "loss": 0.7277, "step": 19138 }, { "epoch": 1.2967680737177316, "grad_norm": 5.765954971313477, "learning_rate": 7.482647682935178e-05, "loss": 0.7414, "step": 19139 }, { "epoch": 1.2968358289857036, "grad_norm": 5.570731163024902, "learning_rate": 7.482510781025396e-05, "loss": 0.7893, "step": 19140 }, { "epoch": 1.2969035842536758, "grad_norm": 5.429853439331055, "learning_rate": 7.482373879115614e-05, "loss": 0.6957, "step": 19141 }, { "epoch": 1.2969713395216478, "grad_norm": 6.646486759185791, "learning_rate": 7.482236977205832e-05, "loss": 1.0376, "step": 19142 }, { "epoch": 1.29703909478962, "grad_norm": 4.993820667266846, "learning_rate": 7.482100075296051e-05, "loss": 0.5901, "step": 19143 }, { "epoch": 1.297106850057592, "grad_norm": 4.799305438995361, "learning_rate": 7.48196317338627e-05, "loss": 0.5087, "step": 19144 }, { "epoch": 1.297174605325564, "grad_norm": 6.037576675415039, "learning_rate": 7.481826271476487e-05, "loss": 0.4785, "step": 19145 }, { "epoch": 1.2972423605935361, "grad_norm": 4.883419990539551, "learning_rate": 7.481689369566705e-05, "loss": 0.6148, "step": 19146 }, { "epoch": 1.2973101158615084, "grad_norm": 4.612204551696777, "learning_rate": 7.481552467656923e-05, "loss": 0.6114, "step": 19147 }, { "epoch": 1.2973778711294803, "grad_norm": 5.567351341247559, "learning_rate": 7.481415565747143e-05, "loss": 0.7547, "step": 19148 }, { "epoch": 1.2974456263974523, "grad_norm": 6.468573093414307, "learning_rate": 7.481278663837361e-05, "loss": 0.7149, "step": 19149 }, { "epoch": 1.2975133816654245, "grad_norm": 4.87019681930542, "learning_rate": 7.481141761927579e-05, "loss": 0.7442, "step": 19150 }, { "epoch": 1.2975811369333965, "grad_norm": 6.80837345123291, "learning_rate": 7.481004860017797e-05, "loss": 0.7891, "step": 19151 }, { "epoch": 1.2976488922013687, "grad_norm": 4.948276996612549, "learning_rate": 7.480867958108015e-05, "loss": 0.607, "step": 19152 }, { "epoch": 1.2977166474693407, "grad_norm": 9.127814292907715, "learning_rate": 7.480731056198234e-05, "loss": 0.5496, "step": 19153 }, { "epoch": 1.2977844027373129, "grad_norm": 7.154975414276123, "learning_rate": 7.480594154288452e-05, "loss": 0.9948, "step": 19154 }, { "epoch": 1.2978521580052849, "grad_norm": 7.215506076812744, "learning_rate": 7.48045725237867e-05, "loss": 0.8908, "step": 19155 }, { "epoch": 1.297919913273257, "grad_norm": 5.2011189460754395, "learning_rate": 7.480320350468888e-05, "loss": 0.6457, "step": 19156 }, { "epoch": 1.297987668541229, "grad_norm": 4.925286293029785, "learning_rate": 7.480183448559108e-05, "loss": 0.6543, "step": 19157 }, { "epoch": 1.2980554238092012, "grad_norm": 6.635340690612793, "learning_rate": 7.480046546649326e-05, "loss": 0.9229, "step": 19158 }, { "epoch": 1.2981231790771732, "grad_norm": 5.571264743804932, "learning_rate": 7.479909644739544e-05, "loss": 0.6742, "step": 19159 }, { "epoch": 1.2981909343451452, "grad_norm": 5.524951934814453, "learning_rate": 7.479772742829762e-05, "loss": 0.6031, "step": 19160 }, { "epoch": 1.2982586896131174, "grad_norm": 6.948805332183838, "learning_rate": 7.479635840919981e-05, "loss": 0.7308, "step": 19161 }, { "epoch": 1.2983264448810896, "grad_norm": 8.246399879455566, "learning_rate": 7.4794989390102e-05, "loss": 0.59, "step": 19162 }, { "epoch": 1.2983942001490616, "grad_norm": 6.494504451751709, "learning_rate": 7.479362037100417e-05, "loss": 0.692, "step": 19163 }, { "epoch": 1.2984619554170336, "grad_norm": 5.083800315856934, "learning_rate": 7.479225135190637e-05, "loss": 0.6705, "step": 19164 }, { "epoch": 1.2985297106850058, "grad_norm": 6.179067611694336, "learning_rate": 7.479088233280855e-05, "loss": 0.6867, "step": 19165 }, { "epoch": 1.298597465952978, "grad_norm": 8.640321731567383, "learning_rate": 7.478951331371073e-05, "loss": 0.7719, "step": 19166 }, { "epoch": 1.29866522122095, "grad_norm": 4.811356544494629, "learning_rate": 7.478814429461292e-05, "loss": 0.5735, "step": 19167 }, { "epoch": 1.298732976488922, "grad_norm": 4.978984355926514, "learning_rate": 7.47867752755151e-05, "loss": 0.7061, "step": 19168 }, { "epoch": 1.2988007317568941, "grad_norm": 7.419102191925049, "learning_rate": 7.478540625641728e-05, "loss": 0.8325, "step": 19169 }, { "epoch": 1.2988684870248661, "grad_norm": 5.784476280212402, "learning_rate": 7.478403723731946e-05, "loss": 0.6738, "step": 19170 }, { "epoch": 1.2989362422928383, "grad_norm": 6.706230163574219, "learning_rate": 7.478266821822166e-05, "loss": 0.6222, "step": 19171 }, { "epoch": 1.2990039975608103, "grad_norm": 6.111355781555176, "learning_rate": 7.478129919912384e-05, "loss": 0.7159, "step": 19172 }, { "epoch": 1.2990717528287825, "grad_norm": 8.37850570678711, "learning_rate": 7.477993018002602e-05, "loss": 0.718, "step": 19173 }, { "epoch": 1.2991395080967545, "grad_norm": 4.993941783905029, "learning_rate": 7.47785611609282e-05, "loss": 0.6222, "step": 19174 }, { "epoch": 1.2992072633647265, "grad_norm": 6.056105613708496, "learning_rate": 7.477719214183038e-05, "loss": 0.6241, "step": 19175 }, { "epoch": 1.2992750186326987, "grad_norm": 7.279097557067871, "learning_rate": 7.477582312273257e-05, "loss": 0.6628, "step": 19176 }, { "epoch": 1.2993427739006709, "grad_norm": 6.4105224609375, "learning_rate": 7.477445410363475e-05, "loss": 0.6014, "step": 19177 }, { "epoch": 1.2994105291686429, "grad_norm": 5.561768531799316, "learning_rate": 7.477308508453693e-05, "loss": 0.5993, "step": 19178 }, { "epoch": 1.2994782844366148, "grad_norm": 6.5868635177612305, "learning_rate": 7.477171606543911e-05, "loss": 0.6739, "step": 19179 }, { "epoch": 1.299546039704587, "grad_norm": 8.133974075317383, "learning_rate": 7.477034704634131e-05, "loss": 0.5866, "step": 19180 }, { "epoch": 1.2996137949725592, "grad_norm": 7.4204936027526855, "learning_rate": 7.476897802724349e-05, "loss": 0.7688, "step": 19181 }, { "epoch": 1.2996815502405312, "grad_norm": 6.2851881980896, "learning_rate": 7.476760900814567e-05, "loss": 0.6837, "step": 19182 }, { "epoch": 1.2997493055085032, "grad_norm": 5.351262092590332, "learning_rate": 7.476623998904785e-05, "loss": 0.5304, "step": 19183 }, { "epoch": 1.2998170607764754, "grad_norm": 5.493442058563232, "learning_rate": 7.476487096995003e-05, "loss": 0.8619, "step": 19184 }, { "epoch": 1.2998848160444474, "grad_norm": 7.936534404754639, "learning_rate": 7.476350195085222e-05, "loss": 0.7527, "step": 19185 }, { "epoch": 1.2999525713124196, "grad_norm": 8.634086608886719, "learning_rate": 7.47621329317544e-05, "loss": 0.8861, "step": 19186 }, { "epoch": 1.3000203265803916, "grad_norm": 4.095170974731445, "learning_rate": 7.476076391265658e-05, "loss": 0.4741, "step": 19187 }, { "epoch": 1.3000880818483638, "grad_norm": 5.462159633636475, "learning_rate": 7.475939489355876e-05, "loss": 0.6109, "step": 19188 }, { "epoch": 1.3001558371163358, "grad_norm": 7.525767803192139, "learning_rate": 7.475802587446096e-05, "loss": 0.6647, "step": 19189 }, { "epoch": 1.300223592384308, "grad_norm": 5.488816261291504, "learning_rate": 7.475665685536314e-05, "loss": 0.78, "step": 19190 }, { "epoch": 1.30029134765228, "grad_norm": 5.837284564971924, "learning_rate": 7.475528783626532e-05, "loss": 0.7529, "step": 19191 }, { "epoch": 1.3003591029202521, "grad_norm": 5.8723673820495605, "learning_rate": 7.47539188171675e-05, "loss": 0.6139, "step": 19192 }, { "epoch": 1.3004268581882241, "grad_norm": 7.713638782501221, "learning_rate": 7.475254979806968e-05, "loss": 0.7491, "step": 19193 }, { "epoch": 1.300494613456196, "grad_norm": 4.880240440368652, "learning_rate": 7.475118077897187e-05, "loss": 0.3861, "step": 19194 }, { "epoch": 1.3005623687241683, "grad_norm": 5.569873809814453, "learning_rate": 7.474981175987405e-05, "loss": 0.7557, "step": 19195 }, { "epoch": 1.3006301239921405, "grad_norm": 5.212553977966309, "learning_rate": 7.474844274077623e-05, "loss": 0.5967, "step": 19196 }, { "epoch": 1.3006978792601125, "grad_norm": 4.729264736175537, "learning_rate": 7.474707372167841e-05, "loss": 0.5471, "step": 19197 }, { "epoch": 1.3007656345280845, "grad_norm": 4.274075508117676, "learning_rate": 7.474570470258061e-05, "loss": 0.4707, "step": 19198 }, { "epoch": 1.3008333897960567, "grad_norm": 7.062283992767334, "learning_rate": 7.474433568348279e-05, "loss": 0.7103, "step": 19199 }, { "epoch": 1.3009011450640287, "grad_norm": 6.251115322113037, "learning_rate": 7.474296666438497e-05, "loss": 0.6342, "step": 19200 }, { "epoch": 1.3009689003320009, "grad_norm": 11.562602043151855, "learning_rate": 7.474159764528715e-05, "loss": 0.6925, "step": 19201 }, { "epoch": 1.3010366555999728, "grad_norm": 5.307293891906738, "learning_rate": 7.474022862618933e-05, "loss": 0.6351, "step": 19202 }, { "epoch": 1.301104410867945, "grad_norm": 6.55226993560791, "learning_rate": 7.473885960709152e-05, "loss": 0.736, "step": 19203 }, { "epoch": 1.301172166135917, "grad_norm": 4.6428632736206055, "learning_rate": 7.47374905879937e-05, "loss": 0.6353, "step": 19204 }, { "epoch": 1.3012399214038892, "grad_norm": 6.939322471618652, "learning_rate": 7.473612156889588e-05, "loss": 0.7603, "step": 19205 }, { "epoch": 1.3013076766718612, "grad_norm": 10.608319282531738, "learning_rate": 7.473475254979806e-05, "loss": 0.5318, "step": 19206 }, { "epoch": 1.3013754319398334, "grad_norm": 5.859227657318115, "learning_rate": 7.473338353070024e-05, "loss": 0.6618, "step": 19207 }, { "epoch": 1.3014431872078054, "grad_norm": 7.598918437957764, "learning_rate": 7.473201451160244e-05, "loss": 0.87, "step": 19208 }, { "epoch": 1.3015109424757774, "grad_norm": 4.769138336181641, "learning_rate": 7.473064549250462e-05, "loss": 0.5415, "step": 19209 }, { "epoch": 1.3015786977437496, "grad_norm": 5.071512699127197, "learning_rate": 7.47292764734068e-05, "loss": 0.6609, "step": 19210 }, { "epoch": 1.3016464530117218, "grad_norm": 4.998343467712402, "learning_rate": 7.4727907454309e-05, "loss": 0.5147, "step": 19211 }, { "epoch": 1.3017142082796938, "grad_norm": 6.552610874176025, "learning_rate": 7.472653843521117e-05, "loss": 0.7542, "step": 19212 }, { "epoch": 1.3017819635476657, "grad_norm": 6.5538649559021, "learning_rate": 7.472516941611335e-05, "loss": 0.6257, "step": 19213 }, { "epoch": 1.301849718815638, "grad_norm": 7.676652908325195, "learning_rate": 7.472380039701555e-05, "loss": 0.6259, "step": 19214 }, { "epoch": 1.3019174740836101, "grad_norm": 7.48184871673584, "learning_rate": 7.472243137791773e-05, "loss": 0.8445, "step": 19215 }, { "epoch": 1.3019852293515821, "grad_norm": 5.528251647949219, "learning_rate": 7.472106235881991e-05, "loss": 0.627, "step": 19216 }, { "epoch": 1.302052984619554, "grad_norm": 5.817585468292236, "learning_rate": 7.47196933397221e-05, "loss": 0.5198, "step": 19217 }, { "epoch": 1.3021207398875263, "grad_norm": 6.067303657531738, "learning_rate": 7.471832432062428e-05, "loss": 0.6521, "step": 19218 }, { "epoch": 1.3021884951554983, "grad_norm": 10.996803283691406, "learning_rate": 7.471695530152646e-05, "loss": 0.7708, "step": 19219 }, { "epoch": 1.3022562504234705, "grad_norm": 7.437582015991211, "learning_rate": 7.471558628242864e-05, "loss": 0.6164, "step": 19220 }, { "epoch": 1.3023240056914425, "grad_norm": 5.327712535858154, "learning_rate": 7.471421726333084e-05, "loss": 0.5831, "step": 19221 }, { "epoch": 1.3023917609594147, "grad_norm": 6.0046000480651855, "learning_rate": 7.471284824423302e-05, "loss": 0.7883, "step": 19222 }, { "epoch": 1.3024595162273866, "grad_norm": 6.560208320617676, "learning_rate": 7.47114792251352e-05, "loss": 0.8632, "step": 19223 }, { "epoch": 1.3025272714953586, "grad_norm": 6.115014553070068, "learning_rate": 7.471011020603738e-05, "loss": 0.8404, "step": 19224 }, { "epoch": 1.3025950267633308, "grad_norm": 5.94087028503418, "learning_rate": 7.470874118693956e-05, "loss": 0.7399, "step": 19225 }, { "epoch": 1.302662782031303, "grad_norm": 5.286967754364014, "learning_rate": 7.470737216784175e-05, "loss": 0.6313, "step": 19226 }, { "epoch": 1.302730537299275, "grad_norm": 5.249954700469971, "learning_rate": 7.470600314874393e-05, "loss": 0.5753, "step": 19227 }, { "epoch": 1.302798292567247, "grad_norm": 4.766334533691406, "learning_rate": 7.470463412964611e-05, "loss": 0.5336, "step": 19228 }, { "epoch": 1.3028660478352192, "grad_norm": 4.6472015380859375, "learning_rate": 7.47032651105483e-05, "loss": 0.6527, "step": 19229 }, { "epoch": 1.3029338031031914, "grad_norm": 9.291872024536133, "learning_rate": 7.470189609145047e-05, "loss": 0.6812, "step": 19230 }, { "epoch": 1.3030015583711634, "grad_norm": 10.377548217773438, "learning_rate": 7.470052707235267e-05, "loss": 0.7271, "step": 19231 }, { "epoch": 1.3030693136391354, "grad_norm": 5.028967380523682, "learning_rate": 7.469915805325485e-05, "loss": 0.6234, "step": 19232 }, { "epoch": 1.3031370689071076, "grad_norm": 6.307035446166992, "learning_rate": 7.469778903415703e-05, "loss": 0.7607, "step": 19233 }, { "epoch": 1.3032048241750795, "grad_norm": 6.983972549438477, "learning_rate": 7.469642001505921e-05, "loss": 0.6678, "step": 19234 }, { "epoch": 1.3032725794430517, "grad_norm": 8.028096199035645, "learning_rate": 7.46950509959614e-05, "loss": 0.6394, "step": 19235 }, { "epoch": 1.3033403347110237, "grad_norm": 7.099324703216553, "learning_rate": 7.469368197686358e-05, "loss": 0.6622, "step": 19236 }, { "epoch": 1.303408089978996, "grad_norm": 5.278601169586182, "learning_rate": 7.469231295776576e-05, "loss": 0.6119, "step": 19237 }, { "epoch": 1.303475845246968, "grad_norm": 5.695682048797607, "learning_rate": 7.469094393866794e-05, "loss": 0.8656, "step": 19238 }, { "epoch": 1.3035436005149401, "grad_norm": 6.856337547302246, "learning_rate": 7.468957491957012e-05, "loss": 0.8033, "step": 19239 }, { "epoch": 1.303611355782912, "grad_norm": 5.4627180099487305, "learning_rate": 7.468820590047232e-05, "loss": 0.6622, "step": 19240 }, { "epoch": 1.3036791110508843, "grad_norm": 4.678643703460693, "learning_rate": 7.46868368813745e-05, "loss": 0.6168, "step": 19241 }, { "epoch": 1.3037468663188563, "grad_norm": 5.501672744750977, "learning_rate": 7.468546786227668e-05, "loss": 0.7184, "step": 19242 }, { "epoch": 1.3038146215868283, "grad_norm": 5.930903434753418, "learning_rate": 7.468409884317886e-05, "loss": 0.8131, "step": 19243 }, { "epoch": 1.3038823768548005, "grad_norm": 6.3783440589904785, "learning_rate": 7.468272982408105e-05, "loss": 0.6675, "step": 19244 }, { "epoch": 1.3039501321227727, "grad_norm": 6.246618270874023, "learning_rate": 7.468136080498323e-05, "loss": 0.6933, "step": 19245 }, { "epoch": 1.3040178873907446, "grad_norm": 7.05374002456665, "learning_rate": 7.467999178588541e-05, "loss": 0.7478, "step": 19246 }, { "epoch": 1.3040856426587166, "grad_norm": 11.633477210998535, "learning_rate": 7.46786227667876e-05, "loss": 0.6421, "step": 19247 }, { "epoch": 1.3041533979266888, "grad_norm": 6.43737268447876, "learning_rate": 7.467725374768977e-05, "loss": 0.7109, "step": 19248 }, { "epoch": 1.3042211531946608, "grad_norm": 5.8468098640441895, "learning_rate": 7.467588472859197e-05, "loss": 0.5493, "step": 19249 }, { "epoch": 1.304288908462633, "grad_norm": 5.595934867858887, "learning_rate": 7.467451570949415e-05, "loss": 0.7199, "step": 19250 }, { "epoch": 1.304356663730605, "grad_norm": 4.735411167144775, "learning_rate": 7.467314669039633e-05, "loss": 0.5628, "step": 19251 }, { "epoch": 1.3044244189985772, "grad_norm": 5.467381477355957, "learning_rate": 7.467177767129851e-05, "loss": 0.7369, "step": 19252 }, { "epoch": 1.3044921742665492, "grad_norm": 5.543175220489502, "learning_rate": 7.467040865220069e-05, "loss": 0.7923, "step": 19253 }, { "epoch": 1.3045599295345214, "grad_norm": 7.489737510681152, "learning_rate": 7.466903963310288e-05, "loss": 0.5594, "step": 19254 }, { "epoch": 1.3046276848024934, "grad_norm": 4.915290832519531, "learning_rate": 7.466767061400506e-05, "loss": 0.674, "step": 19255 }, { "epoch": 1.3046954400704656, "grad_norm": 5.178788661956787, "learning_rate": 7.466630159490724e-05, "loss": 0.6778, "step": 19256 }, { "epoch": 1.3047631953384375, "grad_norm": 7.914145469665527, "learning_rate": 7.466493257580944e-05, "loss": 0.9561, "step": 19257 }, { "epoch": 1.3048309506064095, "grad_norm": 5.359807014465332, "learning_rate": 7.466356355671162e-05, "loss": 0.8672, "step": 19258 }, { "epoch": 1.3048987058743817, "grad_norm": 4.358567237854004, "learning_rate": 7.46621945376138e-05, "loss": 0.6061, "step": 19259 }, { "epoch": 1.304966461142354, "grad_norm": 5.369869232177734, "learning_rate": 7.466082551851599e-05, "loss": 0.7014, "step": 19260 }, { "epoch": 1.305034216410326, "grad_norm": 5.531113147735596, "learning_rate": 7.465945649941817e-05, "loss": 0.7263, "step": 19261 }, { "epoch": 1.305101971678298, "grad_norm": 5.243298053741455, "learning_rate": 7.465808748032035e-05, "loss": 0.7758, "step": 19262 }, { "epoch": 1.30516972694627, "grad_norm": 5.8512091636657715, "learning_rate": 7.465671846122255e-05, "loss": 0.6066, "step": 19263 }, { "epoch": 1.3052374822142423, "grad_norm": 5.892056941986084, "learning_rate": 7.465534944212473e-05, "loss": 0.6627, "step": 19264 }, { "epoch": 1.3053052374822143, "grad_norm": 6.516430377960205, "learning_rate": 7.465398042302691e-05, "loss": 0.7854, "step": 19265 }, { "epoch": 1.3053729927501863, "grad_norm": 4.172369956970215, "learning_rate": 7.465261140392909e-05, "loss": 0.5617, "step": 19266 }, { "epoch": 1.3054407480181585, "grad_norm": 6.882170677185059, "learning_rate": 7.465124238483128e-05, "loss": 0.8986, "step": 19267 }, { "epoch": 1.3055085032861304, "grad_norm": 5.521777153015137, "learning_rate": 7.464987336573346e-05, "loss": 0.8518, "step": 19268 }, { "epoch": 1.3055762585541026, "grad_norm": 4.716281890869141, "learning_rate": 7.464850434663564e-05, "loss": 0.6356, "step": 19269 }, { "epoch": 1.3056440138220746, "grad_norm": 9.426493644714355, "learning_rate": 7.464713532753782e-05, "loss": 0.7349, "step": 19270 }, { "epoch": 1.3057117690900468, "grad_norm": 4.040985107421875, "learning_rate": 7.464576630844e-05, "loss": 0.4661, "step": 19271 }, { "epoch": 1.3057795243580188, "grad_norm": 5.1140217781066895, "learning_rate": 7.46443972893422e-05, "loss": 0.7967, "step": 19272 }, { "epoch": 1.3058472796259908, "grad_norm": 5.608806610107422, "learning_rate": 7.464302827024438e-05, "loss": 0.6339, "step": 19273 }, { "epoch": 1.305915034893963, "grad_norm": 7.1240925788879395, "learning_rate": 7.464165925114656e-05, "loss": 0.7145, "step": 19274 }, { "epoch": 1.3059827901619352, "grad_norm": 5.230318546295166, "learning_rate": 7.464029023204874e-05, "loss": 0.6909, "step": 19275 }, { "epoch": 1.3060505454299072, "grad_norm": 4.885568141937256, "learning_rate": 7.463892121295093e-05, "loss": 0.5265, "step": 19276 }, { "epoch": 1.3061183006978792, "grad_norm": 7.368503570556641, "learning_rate": 7.463755219385311e-05, "loss": 0.8811, "step": 19277 }, { "epoch": 1.3061860559658514, "grad_norm": 5.144466876983643, "learning_rate": 7.463618317475529e-05, "loss": 0.6843, "step": 19278 }, { "epoch": 1.3062538112338236, "grad_norm": 5.075724124908447, "learning_rate": 7.463481415565747e-05, "loss": 0.5911, "step": 19279 }, { "epoch": 1.3063215665017955, "grad_norm": 4.716891288757324, "learning_rate": 7.463344513655965e-05, "loss": 0.7512, "step": 19280 }, { "epoch": 1.3063893217697675, "grad_norm": 6.228489398956299, "learning_rate": 7.463207611746185e-05, "loss": 0.4407, "step": 19281 }, { "epoch": 1.3064570770377397, "grad_norm": 4.770838737487793, "learning_rate": 7.463070709836403e-05, "loss": 0.6937, "step": 19282 }, { "epoch": 1.3065248323057117, "grad_norm": 4.86104154586792, "learning_rate": 7.462933807926621e-05, "loss": 0.6015, "step": 19283 }, { "epoch": 1.306592587573684, "grad_norm": 5.036496639251709, "learning_rate": 7.462796906016839e-05, "loss": 0.6975, "step": 19284 }, { "epoch": 1.3066603428416559, "grad_norm": 6.338175296783447, "learning_rate": 7.462660004107057e-05, "loss": 0.6948, "step": 19285 }, { "epoch": 1.306728098109628, "grad_norm": 6.409108638763428, "learning_rate": 7.462523102197276e-05, "loss": 0.6668, "step": 19286 }, { "epoch": 1.3067958533776, "grad_norm": 5.018176555633545, "learning_rate": 7.462386200287494e-05, "loss": 0.6277, "step": 19287 }, { "epoch": 1.3068636086455723, "grad_norm": 8.338909149169922, "learning_rate": 7.462249298377712e-05, "loss": 0.6821, "step": 19288 }, { "epoch": 1.3069313639135443, "grad_norm": 10.845988273620605, "learning_rate": 7.46211239646793e-05, "loss": 0.655, "step": 19289 }, { "epoch": 1.3069991191815165, "grad_norm": 5.800774097442627, "learning_rate": 7.46197549455815e-05, "loss": 0.9006, "step": 19290 }, { "epoch": 1.3070668744494884, "grad_norm": 5.894642353057861, "learning_rate": 7.461838592648368e-05, "loss": 0.7337, "step": 19291 }, { "epoch": 1.3071346297174604, "grad_norm": 7.261331558227539, "learning_rate": 7.461701690738586e-05, "loss": 0.9142, "step": 19292 }, { "epoch": 1.3072023849854326, "grad_norm": 5.738690376281738, "learning_rate": 7.461564788828804e-05, "loss": 0.6547, "step": 19293 }, { "epoch": 1.3072701402534048, "grad_norm": 6.226438045501709, "learning_rate": 7.461427886919022e-05, "loss": 0.8074, "step": 19294 }, { "epoch": 1.3073378955213768, "grad_norm": 7.120782852172852, "learning_rate": 7.461290985009241e-05, "loss": 0.4694, "step": 19295 }, { "epoch": 1.3074056507893488, "grad_norm": 6.239137649536133, "learning_rate": 7.46115408309946e-05, "loss": 0.7706, "step": 19296 }, { "epoch": 1.307473406057321, "grad_norm": 7.421683311462402, "learning_rate": 7.461017181189677e-05, "loss": 0.7482, "step": 19297 }, { "epoch": 1.307541161325293, "grad_norm": 5.84153413772583, "learning_rate": 7.460880279279895e-05, "loss": 0.7207, "step": 19298 }, { "epoch": 1.3076089165932652, "grad_norm": 5.52492094039917, "learning_rate": 7.460743377370115e-05, "loss": 0.576, "step": 19299 }, { "epoch": 1.3076766718612371, "grad_norm": 7.11539888381958, "learning_rate": 7.460606475460333e-05, "loss": 0.7295, "step": 19300 }, { "epoch": 1.3077444271292094, "grad_norm": 5.929843902587891, "learning_rate": 7.460469573550551e-05, "loss": 0.9111, "step": 19301 }, { "epoch": 1.3078121823971813, "grad_norm": 7.055301189422607, "learning_rate": 7.460332671640769e-05, "loss": 0.8519, "step": 19302 }, { "epoch": 1.3078799376651535, "grad_norm": 5.1674723625183105, "learning_rate": 7.460195769730988e-05, "loss": 0.6802, "step": 19303 }, { "epoch": 1.3079476929331255, "grad_norm": 6.270401954650879, "learning_rate": 7.460058867821206e-05, "loss": 0.7357, "step": 19304 }, { "epoch": 1.3080154482010977, "grad_norm": 6.857835292816162, "learning_rate": 7.459921965911424e-05, "loss": 0.7122, "step": 19305 }, { "epoch": 1.3080832034690697, "grad_norm": 4.810972213745117, "learning_rate": 7.459785064001644e-05, "loss": 0.7816, "step": 19306 }, { "epoch": 1.3081509587370417, "grad_norm": 6.710718631744385, "learning_rate": 7.459648162091862e-05, "loss": 0.9358, "step": 19307 }, { "epoch": 1.3082187140050139, "grad_norm": 4.387126445770264, "learning_rate": 7.45951126018208e-05, "loss": 0.6055, "step": 19308 }, { "epoch": 1.308286469272986, "grad_norm": 6.241201400756836, "learning_rate": 7.459374358272299e-05, "loss": 0.7275, "step": 19309 }, { "epoch": 1.308354224540958, "grad_norm": 5.566206932067871, "learning_rate": 7.459237456362517e-05, "loss": 0.7582, "step": 19310 }, { "epoch": 1.30842197980893, "grad_norm": 6.567960262298584, "learning_rate": 7.459100554452735e-05, "loss": 0.6721, "step": 19311 }, { "epoch": 1.3084897350769022, "grad_norm": 6.594301700592041, "learning_rate": 7.458963652542953e-05, "loss": 0.9371, "step": 19312 }, { "epoch": 1.3085574903448745, "grad_norm": 6.433032989501953, "learning_rate": 7.458826750633173e-05, "loss": 0.7312, "step": 19313 }, { "epoch": 1.3086252456128464, "grad_norm": 7.4513068199157715, "learning_rate": 7.458689848723391e-05, "loss": 0.9703, "step": 19314 }, { "epoch": 1.3086930008808184, "grad_norm": 5.248074531555176, "learning_rate": 7.458552946813609e-05, "loss": 0.7406, "step": 19315 }, { "epoch": 1.3087607561487906, "grad_norm": 4.781547546386719, "learning_rate": 7.458416044903827e-05, "loss": 0.532, "step": 19316 }, { "epoch": 1.3088285114167626, "grad_norm": 6.263958930969238, "learning_rate": 7.458279142994045e-05, "loss": 0.7988, "step": 19317 }, { "epoch": 1.3088962666847348, "grad_norm": 5.305252552032471, "learning_rate": 7.458142241084264e-05, "loss": 0.7644, "step": 19318 }, { "epoch": 1.3089640219527068, "grad_norm": 9.670378684997559, "learning_rate": 7.458005339174482e-05, "loss": 0.6676, "step": 19319 }, { "epoch": 1.309031777220679, "grad_norm": 5.033024787902832, "learning_rate": 7.4578684372647e-05, "loss": 0.6788, "step": 19320 }, { "epoch": 1.309099532488651, "grad_norm": 5.487405776977539, "learning_rate": 7.457731535354918e-05, "loss": 0.8192, "step": 19321 }, { "epoch": 1.309167287756623, "grad_norm": 5.061060905456543, "learning_rate": 7.457594633445138e-05, "loss": 0.8232, "step": 19322 }, { "epoch": 1.3092350430245951, "grad_norm": 4.374985694885254, "learning_rate": 7.457457731535356e-05, "loss": 0.6486, "step": 19323 }, { "epoch": 1.3093027982925673, "grad_norm": 5.638055324554443, "learning_rate": 7.457320829625574e-05, "loss": 0.7696, "step": 19324 }, { "epoch": 1.3093705535605393, "grad_norm": 7.374284744262695, "learning_rate": 7.457183927715792e-05, "loss": 0.7099, "step": 19325 }, { "epoch": 1.3094383088285113, "grad_norm": 6.0166754722595215, "learning_rate": 7.45704702580601e-05, "loss": 0.6954, "step": 19326 }, { "epoch": 1.3095060640964835, "grad_norm": 5.404942035675049, "learning_rate": 7.456910123896229e-05, "loss": 0.6847, "step": 19327 }, { "epoch": 1.3095738193644557, "grad_norm": 5.770890712738037, "learning_rate": 7.456773221986447e-05, "loss": 0.7579, "step": 19328 }, { "epoch": 1.3096415746324277, "grad_norm": 6.840242862701416, "learning_rate": 7.456636320076665e-05, "loss": 0.7649, "step": 19329 }, { "epoch": 1.3097093299003997, "grad_norm": 5.207508087158203, "learning_rate": 7.456499418166883e-05, "loss": 0.701, "step": 19330 }, { "epoch": 1.3097770851683719, "grad_norm": 5.867867469787598, "learning_rate": 7.456362516257103e-05, "loss": 0.6686, "step": 19331 }, { "epoch": 1.3098448404363439, "grad_norm": 8.843786239624023, "learning_rate": 7.456225614347321e-05, "loss": 0.8897, "step": 19332 }, { "epoch": 1.309912595704316, "grad_norm": 4.909896373748779, "learning_rate": 7.456088712437539e-05, "loss": 0.5461, "step": 19333 }, { "epoch": 1.309980350972288, "grad_norm": 6.377152442932129, "learning_rate": 7.455951810527757e-05, "loss": 0.7952, "step": 19334 }, { "epoch": 1.3100481062402602, "grad_norm": 5.607712745666504, "learning_rate": 7.455814908617975e-05, "loss": 0.8146, "step": 19335 }, { "epoch": 1.3101158615082322, "grad_norm": 6.361235618591309, "learning_rate": 7.455678006708194e-05, "loss": 0.6006, "step": 19336 }, { "epoch": 1.3101836167762044, "grad_norm": 7.716373920440674, "learning_rate": 7.455541104798412e-05, "loss": 0.5178, "step": 19337 }, { "epoch": 1.3102513720441764, "grad_norm": 7.2192158699035645, "learning_rate": 7.45540420288863e-05, "loss": 0.9224, "step": 19338 }, { "epoch": 1.3103191273121486, "grad_norm": 5.349232196807861, "learning_rate": 7.455267300978848e-05, "loss": 0.6533, "step": 19339 }, { "epoch": 1.3103868825801206, "grad_norm": 6.7535881996154785, "learning_rate": 7.455130399069066e-05, "loss": 0.6903, "step": 19340 }, { "epoch": 1.3104546378480926, "grad_norm": 5.157278060913086, "learning_rate": 7.454993497159286e-05, "loss": 0.6179, "step": 19341 }, { "epoch": 1.3105223931160648, "grad_norm": 6.018963813781738, "learning_rate": 7.454856595249504e-05, "loss": 0.6945, "step": 19342 }, { "epoch": 1.310590148384037, "grad_norm": 6.139010906219482, "learning_rate": 7.454719693339722e-05, "loss": 1.0191, "step": 19343 }, { "epoch": 1.310657903652009, "grad_norm": 7.0752854347229, "learning_rate": 7.45458279142994e-05, "loss": 0.8522, "step": 19344 }, { "epoch": 1.310725658919981, "grad_norm": 7.510865211486816, "learning_rate": 7.454445889520159e-05, "loss": 0.779, "step": 19345 }, { "epoch": 1.3107934141879531, "grad_norm": 8.075210571289062, "learning_rate": 7.454308987610377e-05, "loss": 0.8218, "step": 19346 }, { "epoch": 1.3108611694559251, "grad_norm": 4.835181713104248, "learning_rate": 7.454172085700595e-05, "loss": 0.6102, "step": 19347 }, { "epoch": 1.3109289247238973, "grad_norm": 7.623097896575928, "learning_rate": 7.454035183790813e-05, "loss": 1.0256, "step": 19348 }, { "epoch": 1.3109966799918693, "grad_norm": 6.484959602355957, "learning_rate": 7.453898281881033e-05, "loss": 1.1643, "step": 19349 }, { "epoch": 1.3110644352598415, "grad_norm": 6.833344459533691, "learning_rate": 7.453761379971251e-05, "loss": 0.7066, "step": 19350 }, { "epoch": 1.3111321905278135, "grad_norm": 5.98909330368042, "learning_rate": 7.453624478061469e-05, "loss": 0.5327, "step": 19351 }, { "epoch": 1.3111999457957857, "grad_norm": 4.8876566886901855, "learning_rate": 7.453487576151688e-05, "loss": 0.5756, "step": 19352 }, { "epoch": 1.3112677010637577, "grad_norm": 9.255005836486816, "learning_rate": 7.453350674241906e-05, "loss": 0.6187, "step": 19353 }, { "epoch": 1.3113354563317299, "grad_norm": 6.508296966552734, "learning_rate": 7.453213772332124e-05, "loss": 0.6439, "step": 19354 }, { "epoch": 1.3114032115997019, "grad_norm": 8.067445755004883, "learning_rate": 7.453076870422344e-05, "loss": 0.7454, "step": 19355 }, { "epoch": 1.3114709668676738, "grad_norm": 6.23566198348999, "learning_rate": 7.452939968512562e-05, "loss": 0.6954, "step": 19356 }, { "epoch": 1.311538722135646, "grad_norm": 5.791611194610596, "learning_rate": 7.45280306660278e-05, "loss": 0.6349, "step": 19357 }, { "epoch": 1.3116064774036182, "grad_norm": 5.790517807006836, "learning_rate": 7.452666164692998e-05, "loss": 0.6411, "step": 19358 }, { "epoch": 1.3116742326715902, "grad_norm": 7.0076003074646, "learning_rate": 7.452529262783217e-05, "loss": 0.6847, "step": 19359 }, { "epoch": 1.3117419879395622, "grad_norm": 4.665477275848389, "learning_rate": 7.452392360873435e-05, "loss": 0.5956, "step": 19360 }, { "epoch": 1.3118097432075344, "grad_norm": 4.5870137214660645, "learning_rate": 7.452255458963653e-05, "loss": 0.5477, "step": 19361 }, { "epoch": 1.3118774984755066, "grad_norm": 5.801887035369873, "learning_rate": 7.452118557053871e-05, "loss": 0.638, "step": 19362 }, { "epoch": 1.3119452537434786, "grad_norm": 6.096950531005859, "learning_rate": 7.451981655144089e-05, "loss": 0.7094, "step": 19363 }, { "epoch": 1.3120130090114506, "grad_norm": 6.0921502113342285, "learning_rate": 7.451844753234309e-05, "loss": 0.7481, "step": 19364 }, { "epoch": 1.3120807642794228, "grad_norm": 6.048202037811279, "learning_rate": 7.451707851324527e-05, "loss": 0.8793, "step": 19365 }, { "epoch": 1.3121485195473948, "grad_norm": 7.584158897399902, "learning_rate": 7.451570949414745e-05, "loss": 0.8978, "step": 19366 }, { "epoch": 1.312216274815367, "grad_norm": 4.377420902252197, "learning_rate": 7.451434047504963e-05, "loss": 0.5956, "step": 19367 }, { "epoch": 1.312284030083339, "grad_norm": 5.6085662841796875, "learning_rate": 7.451297145595182e-05, "loss": 0.7076, "step": 19368 }, { "epoch": 1.3123517853513111, "grad_norm": 9.232466697692871, "learning_rate": 7.4511602436854e-05, "loss": 0.5763, "step": 19369 }, { "epoch": 1.3124195406192831, "grad_norm": 5.571643829345703, "learning_rate": 7.451023341775618e-05, "loss": 0.5858, "step": 19370 }, { "epoch": 1.312487295887255, "grad_norm": 4.94617223739624, "learning_rate": 7.450886439865836e-05, "loss": 0.6133, "step": 19371 }, { "epoch": 1.3125550511552273, "grad_norm": 6.077978610992432, "learning_rate": 7.450749537956054e-05, "loss": 0.7838, "step": 19372 }, { "epoch": 1.3126228064231995, "grad_norm": 6.489776134490967, "learning_rate": 7.450612636046274e-05, "loss": 0.6184, "step": 19373 }, { "epoch": 1.3126905616911715, "grad_norm": 4.797688007354736, "learning_rate": 7.450475734136492e-05, "loss": 0.6418, "step": 19374 }, { "epoch": 1.3127583169591435, "grad_norm": 6.741479873657227, "learning_rate": 7.45033883222671e-05, "loss": 0.7005, "step": 19375 }, { "epoch": 1.3128260722271157, "grad_norm": 5.446199893951416, "learning_rate": 7.450201930316928e-05, "loss": 0.7806, "step": 19376 }, { "epoch": 1.3128938274950879, "grad_norm": 5.60636568069458, "learning_rate": 7.450065028407147e-05, "loss": 0.7208, "step": 19377 }, { "epoch": 1.3129615827630599, "grad_norm": 8.90152359008789, "learning_rate": 7.449928126497365e-05, "loss": 0.7644, "step": 19378 }, { "epoch": 1.3130293380310318, "grad_norm": 4.633640289306641, "learning_rate": 7.449791224587583e-05, "loss": 0.8419, "step": 19379 }, { "epoch": 1.313097093299004, "grad_norm": 7.062408924102783, "learning_rate": 7.449654322677801e-05, "loss": 0.6711, "step": 19380 }, { "epoch": 1.313164848566976, "grad_norm": 8.023946762084961, "learning_rate": 7.44951742076802e-05, "loss": 0.6515, "step": 19381 }, { "epoch": 1.3132326038349482, "grad_norm": 7.642714023590088, "learning_rate": 7.449380518858239e-05, "loss": 0.7367, "step": 19382 }, { "epoch": 1.3133003591029202, "grad_norm": 5.831465244293213, "learning_rate": 7.449243616948457e-05, "loss": 0.7688, "step": 19383 }, { "epoch": 1.3133681143708924, "grad_norm": 7.057791233062744, "learning_rate": 7.449106715038675e-05, "loss": 0.7749, "step": 19384 }, { "epoch": 1.3134358696388644, "grad_norm": 6.4020185470581055, "learning_rate": 7.448969813128893e-05, "loss": 0.5115, "step": 19385 }, { "epoch": 1.3135036249068366, "grad_norm": 5.396944522857666, "learning_rate": 7.448832911219111e-05, "loss": 0.6695, "step": 19386 }, { "epoch": 1.3135713801748086, "grad_norm": 4.610302448272705, "learning_rate": 7.44869600930933e-05, "loss": 0.4791, "step": 19387 }, { "epoch": 1.3136391354427808, "grad_norm": 4.943906307220459, "learning_rate": 7.448559107399548e-05, "loss": 0.7018, "step": 19388 }, { "epoch": 1.3137068907107528, "grad_norm": 5.574923038482666, "learning_rate": 7.448422205489766e-05, "loss": 0.5651, "step": 19389 }, { "epoch": 1.3137746459787247, "grad_norm": 4.756671905517578, "learning_rate": 7.448285303579984e-05, "loss": 0.5166, "step": 19390 }, { "epoch": 1.313842401246697, "grad_norm": 5.564601421356201, "learning_rate": 7.448148401670204e-05, "loss": 0.6378, "step": 19391 }, { "epoch": 1.3139101565146691, "grad_norm": 6.258582592010498, "learning_rate": 7.448011499760422e-05, "loss": 0.5832, "step": 19392 }, { "epoch": 1.3139779117826411, "grad_norm": 6.909316062927246, "learning_rate": 7.44787459785064e-05, "loss": 0.5788, "step": 19393 }, { "epoch": 1.314045667050613, "grad_norm": 4.564525604248047, "learning_rate": 7.447737695940858e-05, "loss": 0.5963, "step": 19394 }, { "epoch": 1.3141134223185853, "grad_norm": 5.835295677185059, "learning_rate": 7.447600794031077e-05, "loss": 0.6673, "step": 19395 }, { "epoch": 1.3141811775865573, "grad_norm": 5.046399116516113, "learning_rate": 7.447463892121295e-05, "loss": 0.664, "step": 19396 }, { "epoch": 1.3142489328545295, "grad_norm": 12.507112503051758, "learning_rate": 7.447326990211513e-05, "loss": 0.6598, "step": 19397 }, { "epoch": 1.3143166881225015, "grad_norm": 4.722832679748535, "learning_rate": 7.447190088301733e-05, "loss": 0.7585, "step": 19398 }, { "epoch": 1.3143844433904737, "grad_norm": 6.198647499084473, "learning_rate": 7.447053186391951e-05, "loss": 0.7597, "step": 19399 }, { "epoch": 1.3144521986584456, "grad_norm": 6.029629230499268, "learning_rate": 7.446916284482169e-05, "loss": 0.7884, "step": 19400 }, { "epoch": 1.3145199539264179, "grad_norm": 4.300859451293945, "learning_rate": 7.446779382572388e-05, "loss": 0.633, "step": 19401 }, { "epoch": 1.3145877091943898, "grad_norm": 6.153326034545898, "learning_rate": 7.446642480662606e-05, "loss": 0.843, "step": 19402 }, { "epoch": 1.314655464462362, "grad_norm": 6.223268508911133, "learning_rate": 7.446505578752824e-05, "loss": 0.6378, "step": 19403 }, { "epoch": 1.314723219730334, "grad_norm": 5.457462310791016, "learning_rate": 7.446368676843042e-05, "loss": 0.929, "step": 19404 }, { "epoch": 1.314790974998306, "grad_norm": 6.110988140106201, "learning_rate": 7.446231774933262e-05, "loss": 0.9132, "step": 19405 }, { "epoch": 1.3148587302662782, "grad_norm": 5.993718147277832, "learning_rate": 7.44609487302348e-05, "loss": 0.5805, "step": 19406 }, { "epoch": 1.3149264855342504, "grad_norm": 4.322659015655518, "learning_rate": 7.445957971113698e-05, "loss": 0.5928, "step": 19407 }, { "epoch": 1.3149942408022224, "grad_norm": 8.245033264160156, "learning_rate": 7.445821069203916e-05, "loss": 0.9419, "step": 19408 }, { "epoch": 1.3150619960701944, "grad_norm": 5.226015567779541, "learning_rate": 7.445684167294135e-05, "loss": 0.6486, "step": 19409 }, { "epoch": 1.3151297513381666, "grad_norm": 8.089787483215332, "learning_rate": 7.445547265384353e-05, "loss": 0.7667, "step": 19410 }, { "epoch": 1.3151975066061385, "grad_norm": 5.974411964416504, "learning_rate": 7.445410363474571e-05, "loss": 0.6063, "step": 19411 }, { "epoch": 1.3152652618741107, "grad_norm": 7.185817241668701, "learning_rate": 7.445273461564789e-05, "loss": 0.9654, "step": 19412 }, { "epoch": 1.3153330171420827, "grad_norm": 6.01674222946167, "learning_rate": 7.445136559655007e-05, "loss": 0.7661, "step": 19413 }, { "epoch": 1.315400772410055, "grad_norm": 8.49528694152832, "learning_rate": 7.444999657745227e-05, "loss": 0.8556, "step": 19414 }, { "epoch": 1.315468527678027, "grad_norm": 7.08057165145874, "learning_rate": 7.444862755835445e-05, "loss": 0.5697, "step": 19415 }, { "epoch": 1.3155362829459991, "grad_norm": 5.859785079956055, "learning_rate": 7.444725853925663e-05, "loss": 0.7225, "step": 19416 }, { "epoch": 1.315604038213971, "grad_norm": 6.681051254272461, "learning_rate": 7.444588952015881e-05, "loss": 0.5483, "step": 19417 }, { "epoch": 1.3156717934819433, "grad_norm": 5.176866054534912, "learning_rate": 7.444452050106099e-05, "loss": 0.7977, "step": 19418 }, { "epoch": 1.3157395487499153, "grad_norm": 6.115938186645508, "learning_rate": 7.444315148196318e-05, "loss": 0.8832, "step": 19419 }, { "epoch": 1.3158073040178873, "grad_norm": 5.864163398742676, "learning_rate": 7.444178246286536e-05, "loss": 0.6857, "step": 19420 }, { "epoch": 1.3158750592858595, "grad_norm": 5.4385294914245605, "learning_rate": 7.444041344376754e-05, "loss": 0.6525, "step": 19421 }, { "epoch": 1.3159428145538317, "grad_norm": 6.185214996337891, "learning_rate": 7.443904442466972e-05, "loss": 0.8319, "step": 19422 }, { "epoch": 1.3160105698218036, "grad_norm": 5.944722652435303, "learning_rate": 7.443767540557192e-05, "loss": 0.7466, "step": 19423 }, { "epoch": 1.3160783250897756, "grad_norm": 6.3874831199646, "learning_rate": 7.44363063864741e-05, "loss": 0.5268, "step": 19424 }, { "epoch": 1.3161460803577478, "grad_norm": 5.51204776763916, "learning_rate": 7.443493736737628e-05, "loss": 0.8992, "step": 19425 }, { "epoch": 1.31621383562572, "grad_norm": 8.472578048706055, "learning_rate": 7.443356834827846e-05, "loss": 0.5486, "step": 19426 }, { "epoch": 1.316281590893692, "grad_norm": 6.188774585723877, "learning_rate": 7.443219932918064e-05, "loss": 0.8403, "step": 19427 }, { "epoch": 1.316349346161664, "grad_norm": 5.237027645111084, "learning_rate": 7.443083031008283e-05, "loss": 0.6804, "step": 19428 }, { "epoch": 1.3164171014296362, "grad_norm": 7.453249931335449, "learning_rate": 7.442946129098501e-05, "loss": 0.4868, "step": 19429 }, { "epoch": 1.3164848566976082, "grad_norm": 6.01828145980835, "learning_rate": 7.442809227188719e-05, "loss": 0.6209, "step": 19430 }, { "epoch": 1.3165526119655804, "grad_norm": 6.305059432983398, "learning_rate": 7.442672325278937e-05, "loss": 0.7031, "step": 19431 }, { "epoch": 1.3166203672335524, "grad_norm": 4.643130779266357, "learning_rate": 7.442535423369157e-05, "loss": 0.5172, "step": 19432 }, { "epoch": 1.3166881225015246, "grad_norm": 5.668321132659912, "learning_rate": 7.442398521459375e-05, "loss": 0.7273, "step": 19433 }, { "epoch": 1.3167558777694965, "grad_norm": 6.121042251586914, "learning_rate": 7.442261619549593e-05, "loss": 0.4063, "step": 19434 }, { "epoch": 1.3168236330374687, "grad_norm": 5.198802471160889, "learning_rate": 7.442124717639811e-05, "loss": 0.6315, "step": 19435 }, { "epoch": 1.3168913883054407, "grad_norm": 5.431886196136475, "learning_rate": 7.441987815730029e-05, "loss": 0.9739, "step": 19436 }, { "epoch": 1.316959143573413, "grad_norm": 4.823352813720703, "learning_rate": 7.441850913820248e-05, "loss": 0.4531, "step": 19437 }, { "epoch": 1.317026898841385, "grad_norm": 10.751338958740234, "learning_rate": 7.441714011910466e-05, "loss": 0.7126, "step": 19438 }, { "epoch": 1.3170946541093569, "grad_norm": 6.240265846252441, "learning_rate": 7.441577110000684e-05, "loss": 0.795, "step": 19439 }, { "epoch": 1.317162409377329, "grad_norm": 8.094771385192871, "learning_rate": 7.441440208090902e-05, "loss": 0.4406, "step": 19440 }, { "epoch": 1.3172301646453013, "grad_norm": 5.662014961242676, "learning_rate": 7.441303306181122e-05, "loss": 0.6213, "step": 19441 }, { "epoch": 1.3172979199132733, "grad_norm": 4.889885902404785, "learning_rate": 7.44116640427134e-05, "loss": 0.5403, "step": 19442 }, { "epoch": 1.3173656751812453, "grad_norm": 7.321258544921875, "learning_rate": 7.441029502361558e-05, "loss": 0.7253, "step": 19443 }, { "epoch": 1.3174334304492175, "grad_norm": 6.593913555145264, "learning_rate": 7.440892600451777e-05, "loss": 0.7196, "step": 19444 }, { "epoch": 1.3175011857171894, "grad_norm": 7.0890278816223145, "learning_rate": 7.440755698541995e-05, "loss": 0.5379, "step": 19445 }, { "epoch": 1.3175689409851616, "grad_norm": 8.078094482421875, "learning_rate": 7.440618796632213e-05, "loss": 0.7752, "step": 19446 }, { "epoch": 1.3176366962531336, "grad_norm": 7.165046215057373, "learning_rate": 7.440481894722433e-05, "loss": 0.6255, "step": 19447 }, { "epoch": 1.3177044515211058, "grad_norm": 5.3996968269348145, "learning_rate": 7.44034499281265e-05, "loss": 0.4677, "step": 19448 }, { "epoch": 1.3177722067890778, "grad_norm": 8.729655265808105, "learning_rate": 7.440208090902869e-05, "loss": 0.5873, "step": 19449 }, { "epoch": 1.31783996205705, "grad_norm": 7.191657543182373, "learning_rate": 7.440071188993087e-05, "loss": 0.8549, "step": 19450 }, { "epoch": 1.317907717325022, "grad_norm": 6.264857769012451, "learning_rate": 7.439934287083306e-05, "loss": 0.8095, "step": 19451 }, { "epoch": 1.3179754725929942, "grad_norm": 9.130210876464844, "learning_rate": 7.439797385173524e-05, "loss": 0.6723, "step": 19452 }, { "epoch": 1.3180432278609662, "grad_norm": 5.472723960876465, "learning_rate": 7.439660483263742e-05, "loss": 0.4606, "step": 19453 }, { "epoch": 1.3181109831289382, "grad_norm": 7.973169803619385, "learning_rate": 7.43952358135396e-05, "loss": 0.634, "step": 19454 }, { "epoch": 1.3181787383969104, "grad_norm": 5.368284702301025, "learning_rate": 7.43938667944418e-05, "loss": 0.6448, "step": 19455 }, { "epoch": 1.3182464936648826, "grad_norm": 6.428613185882568, "learning_rate": 7.439249777534398e-05, "loss": 0.7865, "step": 19456 }, { "epoch": 1.3183142489328545, "grad_norm": 6.700669288635254, "learning_rate": 7.439112875624616e-05, "loss": 0.9703, "step": 19457 }, { "epoch": 1.3183820042008265, "grad_norm": 5.049210071563721, "learning_rate": 7.438975973714834e-05, "loss": 0.8393, "step": 19458 }, { "epoch": 1.3184497594687987, "grad_norm": 5.753997802734375, "learning_rate": 7.438839071805052e-05, "loss": 0.8557, "step": 19459 }, { "epoch": 1.3185175147367707, "grad_norm": 5.230213642120361, "learning_rate": 7.438702169895271e-05, "loss": 0.7035, "step": 19460 }, { "epoch": 1.318585270004743, "grad_norm": 5.4090728759765625, "learning_rate": 7.438565267985489e-05, "loss": 0.8293, "step": 19461 }, { "epoch": 1.3186530252727149, "grad_norm": 6.2835564613342285, "learning_rate": 7.438428366075707e-05, "loss": 0.6565, "step": 19462 }, { "epoch": 1.318720780540687, "grad_norm": 5.500874042510986, "learning_rate": 7.438291464165925e-05, "loss": 0.4761, "step": 19463 }, { "epoch": 1.318788535808659, "grad_norm": 5.51194953918457, "learning_rate": 7.438154562256145e-05, "loss": 0.6134, "step": 19464 }, { "epoch": 1.3188562910766313, "grad_norm": 8.707521438598633, "learning_rate": 7.438017660346363e-05, "loss": 0.4999, "step": 19465 }, { "epoch": 1.3189240463446033, "grad_norm": 6.068605899810791, "learning_rate": 7.437880758436581e-05, "loss": 0.7916, "step": 19466 }, { "epoch": 1.3189918016125755, "grad_norm": 5.49448299407959, "learning_rate": 7.437743856526799e-05, "loss": 0.8369, "step": 19467 }, { "epoch": 1.3190595568805474, "grad_norm": 6.42059326171875, "learning_rate": 7.437606954617017e-05, "loss": 0.5271, "step": 19468 }, { "epoch": 1.3191273121485194, "grad_norm": 6.059930801391602, "learning_rate": 7.437470052707236e-05, "loss": 0.858, "step": 19469 }, { "epoch": 1.3191950674164916, "grad_norm": 5.000464916229248, "learning_rate": 7.437333150797454e-05, "loss": 0.5571, "step": 19470 }, { "epoch": 1.3192628226844638, "grad_norm": 6.232437610626221, "learning_rate": 7.437196248887672e-05, "loss": 0.8087, "step": 19471 }, { "epoch": 1.3193305779524358, "grad_norm": 6.386351108551025, "learning_rate": 7.43705934697789e-05, "loss": 0.6996, "step": 19472 }, { "epoch": 1.3193983332204078, "grad_norm": 5.568863868713379, "learning_rate": 7.436922445068108e-05, "loss": 0.6037, "step": 19473 }, { "epoch": 1.31946608848838, "grad_norm": 5.133992671966553, "learning_rate": 7.436785543158328e-05, "loss": 0.7874, "step": 19474 }, { "epoch": 1.3195338437563522, "grad_norm": 9.910419464111328, "learning_rate": 7.436648641248546e-05, "loss": 0.7648, "step": 19475 }, { "epoch": 1.3196015990243242, "grad_norm": 6.775590419769287, "learning_rate": 7.436511739338764e-05, "loss": 0.7225, "step": 19476 }, { "epoch": 1.3196693542922961, "grad_norm": 6.351054668426514, "learning_rate": 7.436374837428982e-05, "loss": 0.573, "step": 19477 }, { "epoch": 1.3197371095602684, "grad_norm": 6.432724475860596, "learning_rate": 7.436237935519201e-05, "loss": 0.7763, "step": 19478 }, { "epoch": 1.3198048648282403, "grad_norm": 5.826111793518066, "learning_rate": 7.436101033609419e-05, "loss": 0.6997, "step": 19479 }, { "epoch": 1.3198726200962125, "grad_norm": 5.106350898742676, "learning_rate": 7.435964131699637e-05, "loss": 0.6509, "step": 19480 }, { "epoch": 1.3199403753641845, "grad_norm": 5.013709545135498, "learning_rate": 7.435827229789855e-05, "loss": 0.7314, "step": 19481 }, { "epoch": 1.3200081306321567, "grad_norm": 6.2881975173950195, "learning_rate": 7.435690327880073e-05, "loss": 0.8209, "step": 19482 }, { "epoch": 1.3200758859001287, "grad_norm": 4.610447406768799, "learning_rate": 7.435553425970293e-05, "loss": 0.6809, "step": 19483 }, { "epoch": 1.320143641168101, "grad_norm": 4.840565204620361, "learning_rate": 7.435416524060511e-05, "loss": 0.6153, "step": 19484 }, { "epoch": 1.3202113964360729, "grad_norm": 5.611886978149414, "learning_rate": 7.435279622150729e-05, "loss": 0.794, "step": 19485 }, { "epoch": 1.320279151704045, "grad_norm": 6.805380821228027, "learning_rate": 7.435142720240947e-05, "loss": 0.5826, "step": 19486 }, { "epoch": 1.320346906972017, "grad_norm": 6.753982067108154, "learning_rate": 7.435005818331166e-05, "loss": 0.6645, "step": 19487 }, { "epoch": 1.320414662239989, "grad_norm": 4.702186584472656, "learning_rate": 7.434868916421384e-05, "loss": 0.8013, "step": 19488 }, { "epoch": 1.3204824175079612, "grad_norm": 6.64607048034668, "learning_rate": 7.434732014511602e-05, "loss": 0.6772, "step": 19489 }, { "epoch": 1.3205501727759335, "grad_norm": 4.55126428604126, "learning_rate": 7.43459511260182e-05, "loss": 0.6863, "step": 19490 }, { "epoch": 1.3206179280439054, "grad_norm": 6.725127220153809, "learning_rate": 7.43445821069204e-05, "loss": 0.8879, "step": 19491 }, { "epoch": 1.3206856833118774, "grad_norm": 5.033085823059082, "learning_rate": 7.434321308782258e-05, "loss": 0.6014, "step": 19492 }, { "epoch": 1.3207534385798496, "grad_norm": 4.325772762298584, "learning_rate": 7.434184406872476e-05, "loss": 0.6434, "step": 19493 }, { "epoch": 1.3208211938478216, "grad_norm": 6.072863578796387, "learning_rate": 7.434047504962695e-05, "loss": 0.6445, "step": 19494 }, { "epoch": 1.3208889491157938, "grad_norm": 6.066547870635986, "learning_rate": 7.433910603052913e-05, "loss": 0.7725, "step": 19495 }, { "epoch": 1.3209567043837658, "grad_norm": 4.887907028198242, "learning_rate": 7.433773701143131e-05, "loss": 0.6567, "step": 19496 }, { "epoch": 1.321024459651738, "grad_norm": 4.615170955657959, "learning_rate": 7.43363679923335e-05, "loss": 0.5582, "step": 19497 }, { "epoch": 1.32109221491971, "grad_norm": 5.4815545082092285, "learning_rate": 7.433499897323569e-05, "loss": 0.6664, "step": 19498 }, { "epoch": 1.3211599701876822, "grad_norm": 5.664987087249756, "learning_rate": 7.433362995413787e-05, "loss": 0.7647, "step": 19499 }, { "epoch": 1.3212277254556541, "grad_norm": 5.764766216278076, "learning_rate": 7.433226093504005e-05, "loss": 0.5244, "step": 19500 }, { "epoch": 1.3212954807236263, "grad_norm": 4.638474464416504, "learning_rate": 7.433089191594224e-05, "loss": 0.7476, "step": 19501 }, { "epoch": 1.3213632359915983, "grad_norm": 5.39206075668335, "learning_rate": 7.432952289684442e-05, "loss": 0.6201, "step": 19502 }, { "epoch": 1.3214309912595703, "grad_norm": 7.5513458251953125, "learning_rate": 7.43281538777466e-05, "loss": 0.822, "step": 19503 }, { "epoch": 1.3214987465275425, "grad_norm": 6.5639872550964355, "learning_rate": 7.432678485864878e-05, "loss": 0.6701, "step": 19504 }, { "epoch": 1.3215665017955147, "grad_norm": 5.726168155670166, "learning_rate": 7.432541583955096e-05, "loss": 0.8891, "step": 19505 }, { "epoch": 1.3216342570634867, "grad_norm": 9.421454429626465, "learning_rate": 7.432404682045316e-05, "loss": 0.6717, "step": 19506 }, { "epoch": 1.3217020123314587, "grad_norm": 5.886024475097656, "learning_rate": 7.432267780135534e-05, "loss": 0.5371, "step": 19507 }, { "epoch": 1.3217697675994309, "grad_norm": 5.60050106048584, "learning_rate": 7.432130878225752e-05, "loss": 0.5408, "step": 19508 }, { "epoch": 1.3218375228674029, "grad_norm": 6.319180488586426, "learning_rate": 7.43199397631597e-05, "loss": 0.6963, "step": 19509 }, { "epoch": 1.321905278135375, "grad_norm": 5.335339069366455, "learning_rate": 7.431857074406189e-05, "loss": 0.7583, "step": 19510 }, { "epoch": 1.321973033403347, "grad_norm": 4.836117267608643, "learning_rate": 7.431720172496407e-05, "loss": 0.6322, "step": 19511 }, { "epoch": 1.3220407886713192, "grad_norm": 6.297771453857422, "learning_rate": 7.431583270586625e-05, "loss": 0.8176, "step": 19512 }, { "epoch": 1.3221085439392912, "grad_norm": 4.546733379364014, "learning_rate": 7.431446368676843e-05, "loss": 0.7023, "step": 19513 }, { "epoch": 1.3221762992072634, "grad_norm": 5.969432830810547, "learning_rate": 7.431309466767061e-05, "loss": 0.6919, "step": 19514 }, { "epoch": 1.3222440544752354, "grad_norm": 4.719072341918945, "learning_rate": 7.43117256485728e-05, "loss": 0.6738, "step": 19515 }, { "epoch": 1.3223118097432076, "grad_norm": 4.476152420043945, "learning_rate": 7.431035662947499e-05, "loss": 0.5922, "step": 19516 }, { "epoch": 1.3223795650111796, "grad_norm": 5.045933723449707, "learning_rate": 7.430898761037717e-05, "loss": 0.6493, "step": 19517 }, { "epoch": 1.3224473202791516, "grad_norm": 6.879467010498047, "learning_rate": 7.430761859127935e-05, "loss": 0.737, "step": 19518 }, { "epoch": 1.3225150755471238, "grad_norm": 5.558518409729004, "learning_rate": 7.430624957218154e-05, "loss": 0.7391, "step": 19519 }, { "epoch": 1.322582830815096, "grad_norm": 5.281270503997803, "learning_rate": 7.430488055308372e-05, "loss": 0.7663, "step": 19520 }, { "epoch": 1.322650586083068, "grad_norm": 4.8754777908325195, "learning_rate": 7.43035115339859e-05, "loss": 0.7364, "step": 19521 }, { "epoch": 1.32271834135104, "grad_norm": 5.949429512023926, "learning_rate": 7.430214251488808e-05, "loss": 0.7663, "step": 19522 }, { "epoch": 1.3227860966190121, "grad_norm": 5.705995082855225, "learning_rate": 7.430077349579026e-05, "loss": 0.7453, "step": 19523 }, { "epoch": 1.3228538518869843, "grad_norm": 7.121650218963623, "learning_rate": 7.429940447669246e-05, "loss": 0.581, "step": 19524 }, { "epoch": 1.3229216071549563, "grad_norm": 7.888652324676514, "learning_rate": 7.429803545759464e-05, "loss": 0.6069, "step": 19525 }, { "epoch": 1.3229893624229283, "grad_norm": 4.939925193786621, "learning_rate": 7.429666643849682e-05, "loss": 0.5381, "step": 19526 }, { "epoch": 1.3230571176909005, "grad_norm": 5.22904109954834, "learning_rate": 7.4295297419399e-05, "loss": 0.6914, "step": 19527 }, { "epoch": 1.3231248729588725, "grad_norm": 5.434836387634277, "learning_rate": 7.429392840030118e-05, "loss": 0.8692, "step": 19528 }, { "epoch": 1.3231926282268447, "grad_norm": 6.608084678649902, "learning_rate": 7.429255938120337e-05, "loss": 0.7307, "step": 19529 }, { "epoch": 1.3232603834948167, "grad_norm": 5.904374599456787, "learning_rate": 7.429119036210555e-05, "loss": 0.6776, "step": 19530 }, { "epoch": 1.3233281387627889, "grad_norm": 5.720280170440674, "learning_rate": 7.428982134300773e-05, "loss": 0.7418, "step": 19531 }, { "epoch": 1.3233958940307609, "grad_norm": 5.843316555023193, "learning_rate": 7.428845232390991e-05, "loss": 0.6596, "step": 19532 }, { "epoch": 1.323463649298733, "grad_norm": 5.113088607788086, "learning_rate": 7.42870833048121e-05, "loss": 0.5953, "step": 19533 }, { "epoch": 1.323531404566705, "grad_norm": 8.359429359436035, "learning_rate": 7.428571428571429e-05, "loss": 0.8248, "step": 19534 }, { "epoch": 1.3235991598346772, "grad_norm": 6.242190361022949, "learning_rate": 7.428434526661647e-05, "loss": 0.6995, "step": 19535 }, { "epoch": 1.3236669151026492, "grad_norm": 4.716933727264404, "learning_rate": 7.428297624751865e-05, "loss": 0.5945, "step": 19536 }, { "epoch": 1.3237346703706212, "grad_norm": 7.377647399902344, "learning_rate": 7.428160722842084e-05, "loss": 0.6669, "step": 19537 }, { "epoch": 1.3238024256385934, "grad_norm": 5.865257740020752, "learning_rate": 7.428023820932302e-05, "loss": 0.6819, "step": 19538 }, { "epoch": 1.3238701809065656, "grad_norm": 5.339293003082275, "learning_rate": 7.42788691902252e-05, "loss": 0.6037, "step": 19539 }, { "epoch": 1.3239379361745376, "grad_norm": 6.475237846374512, "learning_rate": 7.42775001711274e-05, "loss": 0.6728, "step": 19540 }, { "epoch": 1.3240056914425096, "grad_norm": 6.000756740570068, "learning_rate": 7.427613115202958e-05, "loss": 0.7953, "step": 19541 }, { "epoch": 1.3240734467104818, "grad_norm": 6.11937952041626, "learning_rate": 7.427476213293176e-05, "loss": 0.5934, "step": 19542 }, { "epoch": 1.3241412019784538, "grad_norm": 5.853704452514648, "learning_rate": 7.427339311383395e-05, "loss": 0.7542, "step": 19543 }, { "epoch": 1.324208957246426, "grad_norm": 5.53123140335083, "learning_rate": 7.427202409473613e-05, "loss": 0.5521, "step": 19544 }, { "epoch": 1.324276712514398, "grad_norm": 4.9783196449279785, "learning_rate": 7.427065507563831e-05, "loss": 0.5368, "step": 19545 }, { "epoch": 1.3243444677823701, "grad_norm": 5.201394557952881, "learning_rate": 7.426928605654049e-05, "loss": 0.6644, "step": 19546 }, { "epoch": 1.3244122230503421, "grad_norm": 9.542466163635254, "learning_rate": 7.426791703744269e-05, "loss": 0.5159, "step": 19547 }, { "epoch": 1.3244799783183143, "grad_norm": 4.146124839782715, "learning_rate": 7.426654801834487e-05, "loss": 0.539, "step": 19548 }, { "epoch": 1.3245477335862863, "grad_norm": 7.882824420928955, "learning_rate": 7.426517899924705e-05, "loss": 0.6581, "step": 19549 }, { "epoch": 1.3246154888542585, "grad_norm": 7.6763811111450195, "learning_rate": 7.426380998014923e-05, "loss": 0.6129, "step": 19550 }, { "epoch": 1.3246832441222305, "grad_norm": 4.968385696411133, "learning_rate": 7.426244096105141e-05, "loss": 0.6813, "step": 19551 }, { "epoch": 1.3247509993902025, "grad_norm": 5.670926570892334, "learning_rate": 7.42610719419536e-05, "loss": 0.6188, "step": 19552 }, { "epoch": 1.3248187546581747, "grad_norm": 4.717343330383301, "learning_rate": 7.425970292285578e-05, "loss": 0.6913, "step": 19553 }, { "epoch": 1.3248865099261469, "grad_norm": 6.271756172180176, "learning_rate": 7.425833390375796e-05, "loss": 0.7425, "step": 19554 }, { "epoch": 1.3249542651941189, "grad_norm": 6.672755718231201, "learning_rate": 7.425696488466014e-05, "loss": 0.7338, "step": 19555 }, { "epoch": 1.3250220204620908, "grad_norm": 6.166086673736572, "learning_rate": 7.425559586556234e-05, "loss": 0.8215, "step": 19556 }, { "epoch": 1.325089775730063, "grad_norm": 7.117947101593018, "learning_rate": 7.425422684646452e-05, "loss": 0.695, "step": 19557 }, { "epoch": 1.325157530998035, "grad_norm": 4.843782424926758, "learning_rate": 7.42528578273667e-05, "loss": 0.7577, "step": 19558 }, { "epoch": 1.3252252862660072, "grad_norm": 5.3754730224609375, "learning_rate": 7.425148880826888e-05, "loss": 0.585, "step": 19559 }, { "epoch": 1.3252930415339792, "grad_norm": 4.789627552032471, "learning_rate": 7.425011978917106e-05, "loss": 0.7087, "step": 19560 }, { "epoch": 1.3253607968019514, "grad_norm": 7.695534706115723, "learning_rate": 7.424875077007325e-05, "loss": 0.6621, "step": 19561 }, { "epoch": 1.3254285520699234, "grad_norm": 6.93203592300415, "learning_rate": 7.424738175097543e-05, "loss": 0.6082, "step": 19562 }, { "epoch": 1.3254963073378956, "grad_norm": 5.022673606872559, "learning_rate": 7.424601273187761e-05, "loss": 0.7607, "step": 19563 }, { "epoch": 1.3255640626058676, "grad_norm": 4.476314067840576, "learning_rate": 7.424464371277979e-05, "loss": 0.5764, "step": 19564 }, { "epoch": 1.3256318178738398, "grad_norm": 5.612485408782959, "learning_rate": 7.424327469368199e-05, "loss": 0.6653, "step": 19565 }, { "epoch": 1.3256995731418117, "grad_norm": 9.542510986328125, "learning_rate": 7.424190567458417e-05, "loss": 1.0227, "step": 19566 }, { "epoch": 1.3257673284097837, "grad_norm": 8.777383804321289, "learning_rate": 7.424053665548635e-05, "loss": 0.5828, "step": 19567 }, { "epoch": 1.325835083677756, "grad_norm": 9.570206642150879, "learning_rate": 7.423916763638853e-05, "loss": 0.5985, "step": 19568 }, { "epoch": 1.3259028389457281, "grad_norm": 6.144327163696289, "learning_rate": 7.423779861729071e-05, "loss": 0.9355, "step": 19569 }, { "epoch": 1.3259705942137001, "grad_norm": 4.859616279602051, "learning_rate": 7.42364295981929e-05, "loss": 0.7555, "step": 19570 }, { "epoch": 1.326038349481672, "grad_norm": 7.9081645011901855, "learning_rate": 7.423506057909508e-05, "loss": 0.7662, "step": 19571 }, { "epoch": 1.3261061047496443, "grad_norm": 4.869895935058594, "learning_rate": 7.423369155999726e-05, "loss": 0.6938, "step": 19572 }, { "epoch": 1.3261738600176165, "grad_norm": 5.545702934265137, "learning_rate": 7.423232254089944e-05, "loss": 0.5665, "step": 19573 }, { "epoch": 1.3262416152855885, "grad_norm": 5.26690149307251, "learning_rate": 7.423095352180162e-05, "loss": 0.7668, "step": 19574 }, { "epoch": 1.3263093705535605, "grad_norm": 9.415175437927246, "learning_rate": 7.422958450270382e-05, "loss": 0.5775, "step": 19575 }, { "epoch": 1.3263771258215327, "grad_norm": 6.747313022613525, "learning_rate": 7.4228215483606e-05, "loss": 0.6484, "step": 19576 }, { "epoch": 1.3264448810895046, "grad_norm": 5.838394641876221, "learning_rate": 7.422684646450818e-05, "loss": 0.655, "step": 19577 }, { "epoch": 1.3265126363574768, "grad_norm": 4.876594543457031, "learning_rate": 7.422547744541036e-05, "loss": 0.6014, "step": 19578 }, { "epoch": 1.3265803916254488, "grad_norm": 4.580115795135498, "learning_rate": 7.422410842631255e-05, "loss": 0.8023, "step": 19579 }, { "epoch": 1.326648146893421, "grad_norm": 4.614436626434326, "learning_rate": 7.422273940721473e-05, "loss": 0.5718, "step": 19580 }, { "epoch": 1.326715902161393, "grad_norm": 5.435197353363037, "learning_rate": 7.422137038811691e-05, "loss": 0.7066, "step": 19581 }, { "epoch": 1.3267836574293652, "grad_norm": 5.277594566345215, "learning_rate": 7.422000136901909e-05, "loss": 0.7615, "step": 19582 }, { "epoch": 1.3268514126973372, "grad_norm": 3.9337337017059326, "learning_rate": 7.421863234992129e-05, "loss": 0.6078, "step": 19583 }, { "epoch": 1.3269191679653094, "grad_norm": 4.52541446685791, "learning_rate": 7.421726333082347e-05, "loss": 0.6275, "step": 19584 }, { "epoch": 1.3269869232332814, "grad_norm": 5.581685543060303, "learning_rate": 7.421589431172565e-05, "loss": 0.5348, "step": 19585 }, { "epoch": 1.3270546785012534, "grad_norm": 6.0455732345581055, "learning_rate": 7.421452529262784e-05, "loss": 0.7604, "step": 19586 }, { "epoch": 1.3271224337692256, "grad_norm": 4.712172031402588, "learning_rate": 7.421315627353002e-05, "loss": 0.6622, "step": 19587 }, { "epoch": 1.3271901890371978, "grad_norm": 4.916778564453125, "learning_rate": 7.42117872544322e-05, "loss": 0.5618, "step": 19588 }, { "epoch": 1.3272579443051697, "grad_norm": 4.74273157119751, "learning_rate": 7.42104182353344e-05, "loss": 0.6376, "step": 19589 }, { "epoch": 1.3273256995731417, "grad_norm": 6.103904724121094, "learning_rate": 7.420904921623658e-05, "loss": 0.9474, "step": 19590 }, { "epoch": 1.327393454841114, "grad_norm": 5.101203441619873, "learning_rate": 7.420768019713876e-05, "loss": 0.6023, "step": 19591 }, { "epoch": 1.327461210109086, "grad_norm": 6.057389736175537, "learning_rate": 7.420631117804094e-05, "loss": 0.9189, "step": 19592 }, { "epoch": 1.3275289653770581, "grad_norm": 5.859302043914795, "learning_rate": 7.420494215894313e-05, "loss": 0.5733, "step": 19593 }, { "epoch": 1.32759672064503, "grad_norm": 4.227105140686035, "learning_rate": 7.420357313984531e-05, "loss": 0.4545, "step": 19594 }, { "epoch": 1.3276644759130023, "grad_norm": 6.982843399047852, "learning_rate": 7.420220412074749e-05, "loss": 0.6488, "step": 19595 }, { "epoch": 1.3277322311809743, "grad_norm": 5.818058490753174, "learning_rate": 7.420083510164967e-05, "loss": 0.6148, "step": 19596 }, { "epoch": 1.3277999864489465, "grad_norm": 7.4769768714904785, "learning_rate": 7.419946608255187e-05, "loss": 0.6213, "step": 19597 }, { "epoch": 1.3278677417169185, "grad_norm": 5.863611698150635, "learning_rate": 7.419809706345405e-05, "loss": 0.8129, "step": 19598 }, { "epoch": 1.3279354969848907, "grad_norm": 4.631213188171387, "learning_rate": 7.419672804435623e-05, "loss": 0.7811, "step": 19599 }, { "epoch": 1.3280032522528626, "grad_norm": 7.944879055023193, "learning_rate": 7.41953590252584e-05, "loss": 0.7316, "step": 19600 }, { "epoch": 1.3280710075208346, "grad_norm": 4.542110919952393, "learning_rate": 7.419399000616059e-05, "loss": 0.6293, "step": 19601 }, { "epoch": 1.3281387627888068, "grad_norm": 3.8577167987823486, "learning_rate": 7.419262098706278e-05, "loss": 0.4599, "step": 19602 }, { "epoch": 1.328206518056779, "grad_norm": 5.283283233642578, "learning_rate": 7.419125196796496e-05, "loss": 0.6906, "step": 19603 }, { "epoch": 1.328274273324751, "grad_norm": 5.5352935791015625, "learning_rate": 7.418988294886714e-05, "loss": 0.5197, "step": 19604 }, { "epoch": 1.328342028592723, "grad_norm": 4.2517571449279785, "learning_rate": 7.418851392976932e-05, "loss": 0.5782, "step": 19605 }, { "epoch": 1.3284097838606952, "grad_norm": 5.656694412231445, "learning_rate": 7.41871449106715e-05, "loss": 0.7209, "step": 19606 }, { "epoch": 1.3284775391286672, "grad_norm": 4.821116924285889, "learning_rate": 7.41857758915737e-05, "loss": 0.5399, "step": 19607 }, { "epoch": 1.3285452943966394, "grad_norm": 4.683342933654785, "learning_rate": 7.418440687247588e-05, "loss": 0.7972, "step": 19608 }, { "epoch": 1.3286130496646114, "grad_norm": 4.1331562995910645, "learning_rate": 7.418303785337806e-05, "loss": 0.5357, "step": 19609 }, { "epoch": 1.3286808049325836, "grad_norm": 4.9924468994140625, "learning_rate": 7.418166883428024e-05, "loss": 0.6122, "step": 19610 }, { "epoch": 1.3287485602005555, "grad_norm": 6.538344383239746, "learning_rate": 7.418029981518243e-05, "loss": 0.7433, "step": 19611 }, { "epoch": 1.3288163154685277, "grad_norm": 5.256631851196289, "learning_rate": 7.417893079608461e-05, "loss": 0.6629, "step": 19612 }, { "epoch": 1.3288840707364997, "grad_norm": 6.6425461769104, "learning_rate": 7.417756177698679e-05, "loss": 0.7148, "step": 19613 }, { "epoch": 1.328951826004472, "grad_norm": 5.989322185516357, "learning_rate": 7.417619275788897e-05, "loss": 0.7008, "step": 19614 }, { "epoch": 1.329019581272444, "grad_norm": 5.072157382965088, "learning_rate": 7.417482373879115e-05, "loss": 0.5458, "step": 19615 }, { "epoch": 1.3290873365404159, "grad_norm": 5.237290382385254, "learning_rate": 7.417345471969335e-05, "loss": 0.5554, "step": 19616 }, { "epoch": 1.329155091808388, "grad_norm": 4.874074459075928, "learning_rate": 7.417208570059553e-05, "loss": 0.6184, "step": 19617 }, { "epoch": 1.3292228470763603, "grad_norm": 5.974160671234131, "learning_rate": 7.41707166814977e-05, "loss": 0.547, "step": 19618 }, { "epoch": 1.3292906023443323, "grad_norm": 5.266088485717773, "learning_rate": 7.416934766239989e-05, "loss": 0.7072, "step": 19619 }, { "epoch": 1.3293583576123043, "grad_norm": 5.29006290435791, "learning_rate": 7.416797864330208e-05, "loss": 0.4658, "step": 19620 }, { "epoch": 1.3294261128802765, "grad_norm": 6.996419906616211, "learning_rate": 7.416660962420426e-05, "loss": 0.7235, "step": 19621 }, { "epoch": 1.3294938681482487, "grad_norm": 7.4278435707092285, "learning_rate": 7.416524060510644e-05, "loss": 0.6816, "step": 19622 }, { "epoch": 1.3295616234162206, "grad_norm": 5.706221103668213, "learning_rate": 7.416387158600862e-05, "loss": 0.5939, "step": 19623 }, { "epoch": 1.3296293786841926, "grad_norm": 4.282823085784912, "learning_rate": 7.41625025669108e-05, "loss": 0.7656, "step": 19624 }, { "epoch": 1.3296971339521648, "grad_norm": 6.332409381866455, "learning_rate": 7.4161133547813e-05, "loss": 0.7396, "step": 19625 }, { "epoch": 1.3297648892201368, "grad_norm": 7.1764631271362305, "learning_rate": 7.415976452871518e-05, "loss": 0.9027, "step": 19626 }, { "epoch": 1.329832644488109, "grad_norm": 6.798389434814453, "learning_rate": 7.415839550961736e-05, "loss": 0.6686, "step": 19627 }, { "epoch": 1.329900399756081, "grad_norm": 7.829345226287842, "learning_rate": 7.415702649051954e-05, "loss": 0.9302, "step": 19628 }, { "epoch": 1.3299681550240532, "grad_norm": 6.066162109375, "learning_rate": 7.415565747142173e-05, "loss": 0.766, "step": 19629 }, { "epoch": 1.3300359102920252, "grad_norm": 5.32252311706543, "learning_rate": 7.415428845232391e-05, "loss": 0.6671, "step": 19630 }, { "epoch": 1.3301036655599974, "grad_norm": 9.345575332641602, "learning_rate": 7.415291943322609e-05, "loss": 0.7603, "step": 19631 }, { "epoch": 1.3301714208279694, "grad_norm": 6.46652364730835, "learning_rate": 7.415155041412829e-05, "loss": 0.6882, "step": 19632 }, { "epoch": 1.3302391760959416, "grad_norm": 6.323260307312012, "learning_rate": 7.415018139503047e-05, "loss": 0.8026, "step": 19633 }, { "epoch": 1.3303069313639135, "grad_norm": 5.357861518859863, "learning_rate": 7.414881237593265e-05, "loss": 0.6883, "step": 19634 }, { "epoch": 1.3303746866318855, "grad_norm": 6.475422382354736, "learning_rate": 7.414744335683484e-05, "loss": 0.8861, "step": 19635 }, { "epoch": 1.3304424418998577, "grad_norm": 5.2813591957092285, "learning_rate": 7.414607433773702e-05, "loss": 0.7761, "step": 19636 }, { "epoch": 1.33051019716783, "grad_norm": 9.309098243713379, "learning_rate": 7.41447053186392e-05, "loss": 1.1316, "step": 19637 }, { "epoch": 1.330577952435802, "grad_norm": 8.387908935546875, "learning_rate": 7.414333629954138e-05, "loss": 0.6867, "step": 19638 }, { "epoch": 1.3306457077037739, "grad_norm": 7.428659439086914, "learning_rate": 7.414196728044358e-05, "loss": 0.7212, "step": 19639 }, { "epoch": 1.330713462971746, "grad_norm": 5.903443336486816, "learning_rate": 7.414059826134576e-05, "loss": 0.6809, "step": 19640 }, { "epoch": 1.330781218239718, "grad_norm": 6.738523960113525, "learning_rate": 7.413922924224794e-05, "loss": 0.707, "step": 19641 }, { "epoch": 1.3308489735076903, "grad_norm": 4.826591968536377, "learning_rate": 7.413786022315012e-05, "loss": 0.6356, "step": 19642 }, { "epoch": 1.3309167287756622, "grad_norm": 4.637776851654053, "learning_rate": 7.413649120405231e-05, "loss": 0.5584, "step": 19643 }, { "epoch": 1.3309844840436345, "grad_norm": 4.120635509490967, "learning_rate": 7.413512218495449e-05, "loss": 0.4823, "step": 19644 }, { "epoch": 1.3310522393116064, "grad_norm": 4.953101634979248, "learning_rate": 7.413375316585667e-05, "loss": 0.6662, "step": 19645 }, { "epoch": 1.3311199945795786, "grad_norm": 5.180854797363281, "learning_rate": 7.413238414675885e-05, "loss": 0.6037, "step": 19646 }, { "epoch": 1.3311877498475506, "grad_norm": 5.885667324066162, "learning_rate": 7.413101512766103e-05, "loss": 0.6361, "step": 19647 }, { "epoch": 1.3312555051155228, "grad_norm": 4.5806050300598145, "learning_rate": 7.412964610856323e-05, "loss": 0.6095, "step": 19648 }, { "epoch": 1.3313232603834948, "grad_norm": 7.001134395599365, "learning_rate": 7.41282770894654e-05, "loss": 0.7755, "step": 19649 }, { "epoch": 1.3313910156514668, "grad_norm": 5.782771587371826, "learning_rate": 7.412690807036759e-05, "loss": 0.7335, "step": 19650 }, { "epoch": 1.331458770919439, "grad_norm": 9.202961921691895, "learning_rate": 7.412553905126977e-05, "loss": 0.4738, "step": 19651 }, { "epoch": 1.3315265261874112, "grad_norm": 7.2362446784973145, "learning_rate": 7.412417003217196e-05, "loss": 0.7815, "step": 19652 }, { "epoch": 1.3315942814553832, "grad_norm": 6.054732799530029, "learning_rate": 7.412280101307414e-05, "loss": 0.8273, "step": 19653 }, { "epoch": 1.3316620367233551, "grad_norm": 6.161701202392578, "learning_rate": 7.412143199397632e-05, "loss": 0.6898, "step": 19654 }, { "epoch": 1.3317297919913273, "grad_norm": 6.232685565948486, "learning_rate": 7.41200629748785e-05, "loss": 0.7474, "step": 19655 }, { "epoch": 1.3317975472592993, "grad_norm": 5.072072982788086, "learning_rate": 7.411869395578068e-05, "loss": 0.7804, "step": 19656 }, { "epoch": 1.3318653025272715, "grad_norm": 7.281087398529053, "learning_rate": 7.411732493668288e-05, "loss": 0.7387, "step": 19657 }, { "epoch": 1.3319330577952435, "grad_norm": 4.972793102264404, "learning_rate": 7.411595591758506e-05, "loss": 0.6745, "step": 19658 }, { "epoch": 1.3320008130632157, "grad_norm": 4.346713066101074, "learning_rate": 7.411458689848724e-05, "loss": 0.5604, "step": 19659 }, { "epoch": 1.3320685683311877, "grad_norm": 6.858221530914307, "learning_rate": 7.411321787938942e-05, "loss": 0.7522, "step": 19660 }, { "epoch": 1.33213632359916, "grad_norm": 7.996399402618408, "learning_rate": 7.41118488602916e-05, "loss": 0.6967, "step": 19661 }, { "epoch": 1.3322040788671319, "grad_norm": 6.0486369132995605, "learning_rate": 7.411047984119379e-05, "loss": 0.8641, "step": 19662 }, { "epoch": 1.332271834135104, "grad_norm": 5.254128932952881, "learning_rate": 7.410911082209597e-05, "loss": 0.8916, "step": 19663 }, { "epoch": 1.332339589403076, "grad_norm": 4.869318008422852, "learning_rate": 7.410774180299815e-05, "loss": 0.5519, "step": 19664 }, { "epoch": 1.332407344671048, "grad_norm": 5.177772045135498, "learning_rate": 7.410637278390033e-05, "loss": 0.5959, "step": 19665 }, { "epoch": 1.3324750999390202, "grad_norm": 6.5965166091918945, "learning_rate": 7.410500376480253e-05, "loss": 0.626, "step": 19666 }, { "epoch": 1.3325428552069924, "grad_norm": 6.8329057693481445, "learning_rate": 7.41036347457047e-05, "loss": 0.6347, "step": 19667 }, { "epoch": 1.3326106104749644, "grad_norm": 4.243175983428955, "learning_rate": 7.410226572660689e-05, "loss": 0.5962, "step": 19668 }, { "epoch": 1.3326783657429364, "grad_norm": 6.972354412078857, "learning_rate": 7.410089670750907e-05, "loss": 0.7804, "step": 19669 }, { "epoch": 1.3327461210109086, "grad_norm": 5.446687698364258, "learning_rate": 7.409952768841125e-05, "loss": 0.6006, "step": 19670 }, { "epoch": 1.3328138762788808, "grad_norm": 6.940367698669434, "learning_rate": 7.409815866931344e-05, "loss": 0.9298, "step": 19671 }, { "epoch": 1.3328816315468528, "grad_norm": 4.870388031005859, "learning_rate": 7.409678965021562e-05, "loss": 0.6237, "step": 19672 }, { "epoch": 1.3329493868148248, "grad_norm": 5.922102451324463, "learning_rate": 7.40954206311178e-05, "loss": 0.6489, "step": 19673 }, { "epoch": 1.333017142082797, "grad_norm": 5.451462268829346, "learning_rate": 7.409405161201998e-05, "loss": 0.6362, "step": 19674 }, { "epoch": 1.333084897350769, "grad_norm": 6.219964027404785, "learning_rate": 7.409268259292218e-05, "loss": 0.7475, "step": 19675 }, { "epoch": 1.3331526526187412, "grad_norm": 5.39208459854126, "learning_rate": 7.409131357382436e-05, "loss": 0.6291, "step": 19676 }, { "epoch": 1.3332204078867131, "grad_norm": 5.417110919952393, "learning_rate": 7.408994455472654e-05, "loss": 0.5158, "step": 19677 }, { "epoch": 1.3332881631546853, "grad_norm": 6.18552827835083, "learning_rate": 7.408857553562873e-05, "loss": 0.678, "step": 19678 }, { "epoch": 1.3333559184226573, "grad_norm": 5.716446876525879, "learning_rate": 7.408720651653091e-05, "loss": 0.6249, "step": 19679 }, { "epoch": 1.3334236736906295, "grad_norm": 8.19953727722168, "learning_rate": 7.408583749743309e-05, "loss": 0.894, "step": 19680 }, { "epoch": 1.3334914289586015, "grad_norm": 5.739817142486572, "learning_rate": 7.408446847833528e-05, "loss": 0.6807, "step": 19681 }, { "epoch": 1.3335591842265737, "grad_norm": 4.3875203132629395, "learning_rate": 7.408309945923747e-05, "loss": 0.5396, "step": 19682 }, { "epoch": 1.3336269394945457, "grad_norm": 6.968618392944336, "learning_rate": 7.408173044013965e-05, "loss": 0.7229, "step": 19683 }, { "epoch": 1.3336946947625177, "grad_norm": 5.650720596313477, "learning_rate": 7.408036142104183e-05, "loss": 0.6867, "step": 19684 }, { "epoch": 1.3337624500304899, "grad_norm": 7.317506313323975, "learning_rate": 7.407899240194402e-05, "loss": 0.6604, "step": 19685 }, { "epoch": 1.333830205298462, "grad_norm": 5.676753520965576, "learning_rate": 7.40776233828462e-05, "loss": 0.619, "step": 19686 }, { "epoch": 1.333897960566434, "grad_norm": 6.961564064025879, "learning_rate": 7.407625436374838e-05, "loss": 0.6463, "step": 19687 }, { "epoch": 1.333965715834406, "grad_norm": 7.472172260284424, "learning_rate": 7.407488534465056e-05, "loss": 0.8027, "step": 19688 }, { "epoch": 1.3340334711023782, "grad_norm": 6.359547138214111, "learning_rate": 7.407351632555275e-05, "loss": 0.5472, "step": 19689 }, { "epoch": 1.3341012263703502, "grad_norm": 4.493527889251709, "learning_rate": 7.407214730645494e-05, "loss": 0.6642, "step": 19690 }, { "epoch": 1.3341689816383224, "grad_norm": 5.469796657562256, "learning_rate": 7.407077828735712e-05, "loss": 0.8223, "step": 19691 }, { "epoch": 1.3342367369062944, "grad_norm": 10.444416999816895, "learning_rate": 7.40694092682593e-05, "loss": 0.8211, "step": 19692 }, { "epoch": 1.3343044921742666, "grad_norm": 5.852866172790527, "learning_rate": 7.406804024916148e-05, "loss": 0.7076, "step": 19693 }, { "epoch": 1.3343722474422386, "grad_norm": 5.985776901245117, "learning_rate": 7.406667123006367e-05, "loss": 0.5783, "step": 19694 }, { "epoch": 1.3344400027102108, "grad_norm": 4.740921497344971, "learning_rate": 7.406530221096585e-05, "loss": 0.6289, "step": 19695 }, { "epoch": 1.3345077579781828, "grad_norm": 5.922240257263184, "learning_rate": 7.406393319186803e-05, "loss": 0.5349, "step": 19696 }, { "epoch": 1.334575513246155, "grad_norm": 6.107593059539795, "learning_rate": 7.406256417277021e-05, "loss": 0.5415, "step": 19697 }, { "epoch": 1.334643268514127, "grad_norm": 5.477252960205078, "learning_rate": 7.40611951536724e-05, "loss": 0.7243, "step": 19698 }, { "epoch": 1.334711023782099, "grad_norm": 5.740756511688232, "learning_rate": 7.405982613457459e-05, "loss": 0.582, "step": 19699 }, { "epoch": 1.3347787790500711, "grad_norm": 5.75766658782959, "learning_rate": 7.405845711547677e-05, "loss": 0.5948, "step": 19700 }, { "epoch": 1.3348465343180433, "grad_norm": 6.117081642150879, "learning_rate": 7.405708809637895e-05, "loss": 0.8801, "step": 19701 }, { "epoch": 1.3349142895860153, "grad_norm": 6.683793544769287, "learning_rate": 7.405571907728113e-05, "loss": 0.7528, "step": 19702 }, { "epoch": 1.3349820448539873, "grad_norm": 4.820801734924316, "learning_rate": 7.405435005818332e-05, "loss": 0.6003, "step": 19703 }, { "epoch": 1.3350498001219595, "grad_norm": 4.525071144104004, "learning_rate": 7.40529810390855e-05, "loss": 0.5244, "step": 19704 }, { "epoch": 1.3351175553899315, "grad_norm": 14.6823091506958, "learning_rate": 7.405161201998768e-05, "loss": 0.5611, "step": 19705 }, { "epoch": 1.3351853106579037, "grad_norm": 5.386323928833008, "learning_rate": 7.405024300088986e-05, "loss": 0.7235, "step": 19706 }, { "epoch": 1.3352530659258757, "grad_norm": 5.542819023132324, "learning_rate": 7.404887398179204e-05, "loss": 0.8549, "step": 19707 }, { "epoch": 1.3353208211938479, "grad_norm": 5.006810188293457, "learning_rate": 7.404750496269424e-05, "loss": 0.5291, "step": 19708 }, { "epoch": 1.3353885764618199, "grad_norm": 5.515298366546631, "learning_rate": 7.404613594359642e-05, "loss": 0.5716, "step": 19709 }, { "epoch": 1.335456331729792, "grad_norm": 5.32451057434082, "learning_rate": 7.40447669244986e-05, "loss": 0.4452, "step": 19710 }, { "epoch": 1.335524086997764, "grad_norm": 6.715766906738281, "learning_rate": 7.404339790540078e-05, "loss": 0.8271, "step": 19711 }, { "epoch": 1.3355918422657362, "grad_norm": 6.276601314544678, "learning_rate": 7.404202888630297e-05, "loss": 0.4965, "step": 19712 }, { "epoch": 1.3356595975337082, "grad_norm": 7.835783958435059, "learning_rate": 7.404065986720515e-05, "loss": 0.7203, "step": 19713 }, { "epoch": 1.3357273528016802, "grad_norm": 5.7291364669799805, "learning_rate": 7.403929084810733e-05, "loss": 0.5428, "step": 19714 }, { "epoch": 1.3357951080696524, "grad_norm": 5.314157962799072, "learning_rate": 7.403792182900951e-05, "loss": 0.7079, "step": 19715 }, { "epoch": 1.3358628633376246, "grad_norm": 5.838201522827148, "learning_rate": 7.403655280991169e-05, "loss": 0.7707, "step": 19716 }, { "epoch": 1.3359306186055966, "grad_norm": 5.3242902755737305, "learning_rate": 7.403518379081389e-05, "loss": 0.4563, "step": 19717 }, { "epoch": 1.3359983738735686, "grad_norm": 5.365819931030273, "learning_rate": 7.403381477171607e-05, "loss": 0.6084, "step": 19718 }, { "epoch": 1.3360661291415408, "grad_norm": 6.16398811340332, "learning_rate": 7.403244575261825e-05, "loss": 0.7249, "step": 19719 }, { "epoch": 1.336133884409513, "grad_norm": 7.386852264404297, "learning_rate": 7.403107673352043e-05, "loss": 0.5079, "step": 19720 }, { "epoch": 1.336201639677485, "grad_norm": 5.932759761810303, "learning_rate": 7.402970771442262e-05, "loss": 0.87, "step": 19721 }, { "epoch": 1.336269394945457, "grad_norm": 8.247381210327148, "learning_rate": 7.40283386953248e-05, "loss": 0.7828, "step": 19722 }, { "epoch": 1.3363371502134291, "grad_norm": 5.505183696746826, "learning_rate": 7.402696967622698e-05, "loss": 0.7579, "step": 19723 }, { "epoch": 1.3364049054814011, "grad_norm": 6.413590908050537, "learning_rate": 7.402560065712916e-05, "loss": 0.7976, "step": 19724 }, { "epoch": 1.3364726607493733, "grad_norm": 5.974003791809082, "learning_rate": 7.402423163803136e-05, "loss": 0.725, "step": 19725 }, { "epoch": 1.3365404160173453, "grad_norm": 6.018005847930908, "learning_rate": 7.402286261893354e-05, "loss": 0.8571, "step": 19726 }, { "epoch": 1.3366081712853175, "grad_norm": 4.581902503967285, "learning_rate": 7.402149359983572e-05, "loss": 0.5336, "step": 19727 }, { "epoch": 1.3366759265532895, "grad_norm": 5.49620246887207, "learning_rate": 7.402012458073791e-05, "loss": 0.7526, "step": 19728 }, { "epoch": 1.3367436818212615, "grad_norm": 5.842744827270508, "learning_rate": 7.401875556164009e-05, "loss": 0.692, "step": 19729 }, { "epoch": 1.3368114370892337, "grad_norm": 3.9143564701080322, "learning_rate": 7.401738654254228e-05, "loss": 0.5943, "step": 19730 }, { "epoch": 1.3368791923572059, "grad_norm": 7.945070743560791, "learning_rate": 7.401601752344446e-05, "loss": 0.5181, "step": 19731 }, { "epoch": 1.3369469476251779, "grad_norm": 5.2820963859558105, "learning_rate": 7.401464850434664e-05, "loss": 0.635, "step": 19732 }, { "epoch": 1.3370147028931498, "grad_norm": 6.0084733963012695, "learning_rate": 7.401327948524883e-05, "loss": 0.6876, "step": 19733 }, { "epoch": 1.337082458161122, "grad_norm": 6.210163116455078, "learning_rate": 7.4011910466151e-05, "loss": 0.6126, "step": 19734 }, { "epoch": 1.3371502134290942, "grad_norm": 7.7409443855285645, "learning_rate": 7.40105414470532e-05, "loss": 1.0074, "step": 19735 }, { "epoch": 1.3372179686970662, "grad_norm": 4.860037803649902, "learning_rate": 7.400917242795538e-05, "loss": 0.5741, "step": 19736 }, { "epoch": 1.3372857239650382, "grad_norm": 5.615636348724365, "learning_rate": 7.400780340885756e-05, "loss": 0.7633, "step": 19737 }, { "epoch": 1.3373534792330104, "grad_norm": 5.90158224105835, "learning_rate": 7.400643438975974e-05, "loss": 0.5758, "step": 19738 }, { "epoch": 1.3374212345009824, "grad_norm": 5.403393268585205, "learning_rate": 7.400506537066192e-05, "loss": 0.6195, "step": 19739 }, { "epoch": 1.3374889897689546, "grad_norm": 4.7914299964904785, "learning_rate": 7.400369635156411e-05, "loss": 0.6565, "step": 19740 }, { "epoch": 1.3375567450369266, "grad_norm": 4.943591117858887, "learning_rate": 7.40023273324663e-05, "loss": 0.5891, "step": 19741 }, { "epoch": 1.3376245003048988, "grad_norm": 7.600788593292236, "learning_rate": 7.400095831336848e-05, "loss": 0.4996, "step": 19742 }, { "epoch": 1.3376922555728707, "grad_norm": 6.198159694671631, "learning_rate": 7.399958929427066e-05, "loss": 0.7334, "step": 19743 }, { "epoch": 1.337760010840843, "grad_norm": 5.308366298675537, "learning_rate": 7.399822027517285e-05, "loss": 0.6985, "step": 19744 }, { "epoch": 1.337827766108815, "grad_norm": 5.500591278076172, "learning_rate": 7.399685125607503e-05, "loss": 0.7543, "step": 19745 }, { "epoch": 1.3378955213767871, "grad_norm": 4.954442977905273, "learning_rate": 7.399548223697721e-05, "loss": 0.642, "step": 19746 }, { "epoch": 1.3379632766447591, "grad_norm": 7.143369197845459, "learning_rate": 7.399411321787939e-05, "loss": 0.8015, "step": 19747 }, { "epoch": 1.338031031912731, "grad_norm": 5.742795467376709, "learning_rate": 7.399274419878157e-05, "loss": 0.7698, "step": 19748 }, { "epoch": 1.3380987871807033, "grad_norm": 4.979240417480469, "learning_rate": 7.399137517968376e-05, "loss": 0.7582, "step": 19749 }, { "epoch": 1.3381665424486755, "grad_norm": 5.437368869781494, "learning_rate": 7.399000616058595e-05, "loss": 0.5713, "step": 19750 }, { "epoch": 1.3382342977166475, "grad_norm": 6.680069446563721, "learning_rate": 7.398863714148813e-05, "loss": 0.7758, "step": 19751 }, { "epoch": 1.3383020529846195, "grad_norm": 8.733224868774414, "learning_rate": 7.39872681223903e-05, "loss": 0.9019, "step": 19752 }, { "epoch": 1.3383698082525917, "grad_norm": 6.1617045402526855, "learning_rate": 7.39858991032925e-05, "loss": 0.6982, "step": 19753 }, { "epoch": 1.3384375635205636, "grad_norm": 5.574944496154785, "learning_rate": 7.398453008419468e-05, "loss": 0.8184, "step": 19754 }, { "epoch": 1.3385053187885358, "grad_norm": 6.665420055389404, "learning_rate": 7.398316106509686e-05, "loss": 0.7169, "step": 19755 }, { "epoch": 1.3385730740565078, "grad_norm": 5.491334915161133, "learning_rate": 7.398179204599904e-05, "loss": 0.61, "step": 19756 }, { "epoch": 1.33864082932448, "grad_norm": 6.688482761383057, "learning_rate": 7.398042302690122e-05, "loss": 0.7411, "step": 19757 }, { "epoch": 1.338708584592452, "grad_norm": 6.365215301513672, "learning_rate": 7.397905400780342e-05, "loss": 0.7613, "step": 19758 }, { "epoch": 1.3387763398604242, "grad_norm": 6.205850124359131, "learning_rate": 7.39776849887056e-05, "loss": 0.4851, "step": 19759 }, { "epoch": 1.3388440951283962, "grad_norm": 4.762978553771973, "learning_rate": 7.397631596960778e-05, "loss": 0.5479, "step": 19760 }, { "epoch": 1.3389118503963684, "grad_norm": 5.916381359100342, "learning_rate": 7.397494695050996e-05, "loss": 0.8127, "step": 19761 }, { "epoch": 1.3389796056643404, "grad_norm": 11.313655853271484, "learning_rate": 7.397357793141214e-05, "loss": 0.6643, "step": 19762 }, { "epoch": 1.3390473609323124, "grad_norm": 6.884215831756592, "learning_rate": 7.397220891231433e-05, "loss": 0.6442, "step": 19763 }, { "epoch": 1.3391151162002846, "grad_norm": 5.777895450592041, "learning_rate": 7.397083989321651e-05, "loss": 0.8416, "step": 19764 }, { "epoch": 1.3391828714682568, "grad_norm": 7.961150169372559, "learning_rate": 7.396947087411869e-05, "loss": 0.7217, "step": 19765 }, { "epoch": 1.3392506267362287, "grad_norm": 5.622987747192383, "learning_rate": 7.396810185502087e-05, "loss": 0.7471, "step": 19766 }, { "epoch": 1.3393183820042007, "grad_norm": 8.08484935760498, "learning_rate": 7.396673283592307e-05, "loss": 0.7683, "step": 19767 }, { "epoch": 1.339386137272173, "grad_norm": 7.479506492614746, "learning_rate": 7.396536381682525e-05, "loss": 0.9016, "step": 19768 }, { "epoch": 1.3394538925401451, "grad_norm": 4.879533290863037, "learning_rate": 7.396399479772743e-05, "loss": 0.5581, "step": 19769 }, { "epoch": 1.339521647808117, "grad_norm": 6.326993942260742, "learning_rate": 7.39626257786296e-05, "loss": 0.601, "step": 19770 }, { "epoch": 1.339589403076089, "grad_norm": 5.830367088317871, "learning_rate": 7.39612567595318e-05, "loss": 0.6905, "step": 19771 }, { "epoch": 1.3396571583440613, "grad_norm": 8.22908878326416, "learning_rate": 7.395988774043398e-05, "loss": 0.8502, "step": 19772 }, { "epoch": 1.3397249136120333, "grad_norm": 5.564326763153076, "learning_rate": 7.395851872133616e-05, "loss": 0.7243, "step": 19773 }, { "epoch": 1.3397926688800055, "grad_norm": 8.302552223205566, "learning_rate": 7.395714970223835e-05, "loss": 0.6313, "step": 19774 }, { "epoch": 1.3398604241479775, "grad_norm": 7.87471342086792, "learning_rate": 7.395578068314054e-05, "loss": 0.6092, "step": 19775 }, { "epoch": 1.3399281794159497, "grad_norm": 5.355712890625, "learning_rate": 7.395441166404272e-05, "loss": 0.7247, "step": 19776 }, { "epoch": 1.3399959346839216, "grad_norm": 5.848559856414795, "learning_rate": 7.395304264494491e-05, "loss": 0.5787, "step": 19777 }, { "epoch": 1.3400636899518936, "grad_norm": 5.752941131591797, "learning_rate": 7.395167362584709e-05, "loss": 0.7809, "step": 19778 }, { "epoch": 1.3401314452198658, "grad_norm": 8.121430397033691, "learning_rate": 7.395030460674927e-05, "loss": 0.8262, "step": 19779 }, { "epoch": 1.340199200487838, "grad_norm": 5.677140235900879, "learning_rate": 7.394893558765145e-05, "loss": 0.7486, "step": 19780 }, { "epoch": 1.34026695575581, "grad_norm": 5.764679908752441, "learning_rate": 7.394756656855364e-05, "loss": 0.7841, "step": 19781 }, { "epoch": 1.340334711023782, "grad_norm": 5.689769268035889, "learning_rate": 7.394619754945582e-05, "loss": 0.6569, "step": 19782 }, { "epoch": 1.3404024662917542, "grad_norm": 6.480662822723389, "learning_rate": 7.3944828530358e-05, "loss": 0.5927, "step": 19783 }, { "epoch": 1.3404702215597264, "grad_norm": 4.857582092285156, "learning_rate": 7.394345951126019e-05, "loss": 0.5097, "step": 19784 }, { "epoch": 1.3405379768276984, "grad_norm": 6.696939945220947, "learning_rate": 7.394209049216238e-05, "loss": 0.9862, "step": 19785 }, { "epoch": 1.3406057320956704, "grad_norm": 6.558492183685303, "learning_rate": 7.394072147306456e-05, "loss": 0.8778, "step": 19786 }, { "epoch": 1.3406734873636426, "grad_norm": 5.839897155761719, "learning_rate": 7.393935245396674e-05, "loss": 0.5926, "step": 19787 }, { "epoch": 1.3407412426316145, "grad_norm": 6.95966911315918, "learning_rate": 7.393798343486892e-05, "loss": 0.6054, "step": 19788 }, { "epoch": 1.3408089978995867, "grad_norm": 6.446970462799072, "learning_rate": 7.39366144157711e-05, "loss": 0.7114, "step": 19789 }, { "epoch": 1.3408767531675587, "grad_norm": 6.5432891845703125, "learning_rate": 7.39352453966733e-05, "loss": 0.7445, "step": 19790 }, { "epoch": 1.340944508435531, "grad_norm": 6.882696628570557, "learning_rate": 7.393387637757547e-05, "loss": 0.7109, "step": 19791 }, { "epoch": 1.341012263703503, "grad_norm": 5.472219944000244, "learning_rate": 7.393250735847766e-05, "loss": 0.7475, "step": 19792 }, { "epoch": 1.341080018971475, "grad_norm": 6.459081649780273, "learning_rate": 7.393113833937984e-05, "loss": 0.8222, "step": 19793 }, { "epoch": 1.341147774239447, "grad_norm": 5.561173915863037, "learning_rate": 7.392976932028202e-05, "loss": 0.8658, "step": 19794 }, { "epoch": 1.3412155295074193, "grad_norm": 5.897449970245361, "learning_rate": 7.392840030118421e-05, "loss": 0.6467, "step": 19795 }, { "epoch": 1.3412832847753913, "grad_norm": 7.28239631652832, "learning_rate": 7.392703128208639e-05, "loss": 0.6559, "step": 19796 }, { "epoch": 1.3413510400433633, "grad_norm": 4.274104118347168, "learning_rate": 7.392566226298857e-05, "loss": 0.6085, "step": 19797 }, { "epoch": 1.3414187953113355, "grad_norm": 6.912581443786621, "learning_rate": 7.392429324389075e-05, "loss": 0.5061, "step": 19798 }, { "epoch": 1.3414865505793077, "grad_norm": 6.572048187255859, "learning_rate": 7.392292422479294e-05, "loss": 0.7635, "step": 19799 }, { "epoch": 1.3415543058472796, "grad_norm": 8.052891731262207, "learning_rate": 7.392155520569512e-05, "loss": 0.5573, "step": 19800 }, { "epoch": 1.3416220611152516, "grad_norm": 7.067938327789307, "learning_rate": 7.39201861865973e-05, "loss": 0.8178, "step": 19801 }, { "epoch": 1.3416898163832238, "grad_norm": 4.831704616546631, "learning_rate": 7.391881716749949e-05, "loss": 0.501, "step": 19802 }, { "epoch": 1.3417575716511958, "grad_norm": 5.58782958984375, "learning_rate": 7.391744814840167e-05, "loss": 0.8134, "step": 19803 }, { "epoch": 1.341825326919168, "grad_norm": 7.2371907234191895, "learning_rate": 7.391607912930386e-05, "loss": 0.6641, "step": 19804 }, { "epoch": 1.34189308218714, "grad_norm": 7.211178779602051, "learning_rate": 7.391471011020604e-05, "loss": 0.8097, "step": 19805 }, { "epoch": 1.3419608374551122, "grad_norm": 5.650362968444824, "learning_rate": 7.391334109110822e-05, "loss": 0.7149, "step": 19806 }, { "epoch": 1.3420285927230842, "grad_norm": 6.270521640777588, "learning_rate": 7.39119720720104e-05, "loss": 0.6627, "step": 19807 }, { "epoch": 1.3420963479910564, "grad_norm": 7.388031482696533, "learning_rate": 7.39106030529126e-05, "loss": 0.8797, "step": 19808 }, { "epoch": 1.3421641032590284, "grad_norm": 5.9021806716918945, "learning_rate": 7.390923403381478e-05, "loss": 0.6674, "step": 19809 }, { "epoch": 1.3422318585270006, "grad_norm": 5.441925525665283, "learning_rate": 7.390786501471696e-05, "loss": 0.859, "step": 19810 }, { "epoch": 1.3422996137949725, "grad_norm": 5.86013650894165, "learning_rate": 7.390649599561914e-05, "loss": 0.5684, "step": 19811 }, { "epoch": 1.3423673690629445, "grad_norm": 6.234279155731201, "learning_rate": 7.390512697652132e-05, "loss": 0.7527, "step": 19812 }, { "epoch": 1.3424351243309167, "grad_norm": 6.3187761306762695, "learning_rate": 7.390375795742351e-05, "loss": 0.6248, "step": 19813 }, { "epoch": 1.342502879598889, "grad_norm": 6.773894786834717, "learning_rate": 7.390238893832569e-05, "loss": 0.6642, "step": 19814 }, { "epoch": 1.342570634866861, "grad_norm": 5.440525531768799, "learning_rate": 7.390101991922787e-05, "loss": 0.6297, "step": 19815 }, { "epoch": 1.3426383901348329, "grad_norm": 5.599619388580322, "learning_rate": 7.389965090013005e-05, "loss": 0.52, "step": 19816 }, { "epoch": 1.342706145402805, "grad_norm": 4.75631856918335, "learning_rate": 7.389828188103224e-05, "loss": 0.702, "step": 19817 }, { "epoch": 1.3427739006707773, "grad_norm": 4.91621208190918, "learning_rate": 7.389691286193443e-05, "loss": 0.6136, "step": 19818 }, { "epoch": 1.3428416559387493, "grad_norm": 4.944465160369873, "learning_rate": 7.38955438428366e-05, "loss": 0.4995, "step": 19819 }, { "epoch": 1.3429094112067212, "grad_norm": 5.446164131164551, "learning_rate": 7.38941748237388e-05, "loss": 0.5898, "step": 19820 }, { "epoch": 1.3429771664746935, "grad_norm": 7.435744285583496, "learning_rate": 7.389280580464098e-05, "loss": 0.5663, "step": 19821 }, { "epoch": 1.3430449217426654, "grad_norm": 6.098787307739258, "learning_rate": 7.389143678554316e-05, "loss": 0.8143, "step": 19822 }, { "epoch": 1.3431126770106376, "grad_norm": 6.8340535163879395, "learning_rate": 7.389006776644535e-05, "loss": 0.5677, "step": 19823 }, { "epoch": 1.3431804322786096, "grad_norm": 5.451878547668457, "learning_rate": 7.388869874734753e-05, "loss": 0.6436, "step": 19824 }, { "epoch": 1.3432481875465818, "grad_norm": 5.109428882598877, "learning_rate": 7.388732972824971e-05, "loss": 0.557, "step": 19825 }, { "epoch": 1.3433159428145538, "grad_norm": 6.277471542358398, "learning_rate": 7.38859607091519e-05, "loss": 0.6998, "step": 19826 }, { "epoch": 1.3433836980825258, "grad_norm": 5.464527130126953, "learning_rate": 7.388459169005409e-05, "loss": 0.6073, "step": 19827 }, { "epoch": 1.343451453350498, "grad_norm": 6.115703582763672, "learning_rate": 7.388322267095627e-05, "loss": 0.7384, "step": 19828 }, { "epoch": 1.3435192086184702, "grad_norm": 5.55519962310791, "learning_rate": 7.388185365185845e-05, "loss": 0.67, "step": 19829 }, { "epoch": 1.3435869638864422, "grad_norm": 7.020036697387695, "learning_rate": 7.388048463276063e-05, "loss": 0.7271, "step": 19830 }, { "epoch": 1.3436547191544141, "grad_norm": 5.655063152313232, "learning_rate": 7.387911561366282e-05, "loss": 0.5643, "step": 19831 }, { "epoch": 1.3437224744223863, "grad_norm": 6.301867961883545, "learning_rate": 7.3877746594565e-05, "loss": 0.8198, "step": 19832 }, { "epoch": 1.3437902296903586, "grad_norm": 6.931334018707275, "learning_rate": 7.387637757546718e-05, "loss": 0.8376, "step": 19833 }, { "epoch": 1.3438579849583305, "grad_norm": 4.899502754211426, "learning_rate": 7.387500855636936e-05, "loss": 0.6396, "step": 19834 }, { "epoch": 1.3439257402263025, "grad_norm": 4.386233806610107, "learning_rate": 7.387363953727155e-05, "loss": 0.4676, "step": 19835 }, { "epoch": 1.3439934954942747, "grad_norm": 7.516632080078125, "learning_rate": 7.387227051817374e-05, "loss": 0.7192, "step": 19836 }, { "epoch": 1.3440612507622467, "grad_norm": 4.94453239440918, "learning_rate": 7.387090149907592e-05, "loss": 0.5237, "step": 19837 }, { "epoch": 1.344129006030219, "grad_norm": 6.488331317901611, "learning_rate": 7.38695324799781e-05, "loss": 0.742, "step": 19838 }, { "epoch": 1.3441967612981909, "grad_norm": 6.848110675811768, "learning_rate": 7.386816346088028e-05, "loss": 0.7182, "step": 19839 }, { "epoch": 1.344264516566163, "grad_norm": 5.772297382354736, "learning_rate": 7.386679444178246e-05, "loss": 0.725, "step": 19840 }, { "epoch": 1.344332271834135, "grad_norm": 5.969285011291504, "learning_rate": 7.386542542268465e-05, "loss": 0.7543, "step": 19841 }, { "epoch": 1.3444000271021073, "grad_norm": 5.955066680908203, "learning_rate": 7.386405640358683e-05, "loss": 0.7023, "step": 19842 }, { "epoch": 1.3444677823700792, "grad_norm": 5.906832695007324, "learning_rate": 7.386268738448902e-05, "loss": 0.6935, "step": 19843 }, { "epoch": 1.3445355376380514, "grad_norm": 4.453817367553711, "learning_rate": 7.38613183653912e-05, "loss": 0.4929, "step": 19844 }, { "epoch": 1.3446032929060234, "grad_norm": 12.94332504272461, "learning_rate": 7.385994934629339e-05, "loss": 0.9634, "step": 19845 }, { "epoch": 1.3446710481739954, "grad_norm": 4.869606971740723, "learning_rate": 7.385858032719557e-05, "loss": 0.8817, "step": 19846 }, { "epoch": 1.3447388034419676, "grad_norm": 4.875044822692871, "learning_rate": 7.385721130809775e-05, "loss": 0.6543, "step": 19847 }, { "epoch": 1.3448065587099398, "grad_norm": 5.2942214012146, "learning_rate": 7.385584228899993e-05, "loss": 0.5397, "step": 19848 }, { "epoch": 1.3448743139779118, "grad_norm": 4.81077241897583, "learning_rate": 7.385447326990211e-05, "loss": 0.5812, "step": 19849 }, { "epoch": 1.3449420692458838, "grad_norm": 5.391111850738525, "learning_rate": 7.38531042508043e-05, "loss": 0.5214, "step": 19850 }, { "epoch": 1.345009824513856, "grad_norm": 7.752267360687256, "learning_rate": 7.385173523170648e-05, "loss": 0.8283, "step": 19851 }, { "epoch": 1.345077579781828, "grad_norm": 6.373712062835693, "learning_rate": 7.385036621260867e-05, "loss": 0.8675, "step": 19852 }, { "epoch": 1.3451453350498002, "grad_norm": 8.875795364379883, "learning_rate": 7.384899719351085e-05, "loss": 0.8988, "step": 19853 }, { "epoch": 1.3452130903177721, "grad_norm": 5.196077346801758, "learning_rate": 7.384762817441304e-05, "loss": 0.7996, "step": 19854 }, { "epoch": 1.3452808455857443, "grad_norm": 5.03963565826416, "learning_rate": 7.384625915531522e-05, "loss": 0.6648, "step": 19855 }, { "epoch": 1.3453486008537163, "grad_norm": 5.028035640716553, "learning_rate": 7.38448901362174e-05, "loss": 0.6327, "step": 19856 }, { "epoch": 1.3454163561216885, "grad_norm": 6.16055154800415, "learning_rate": 7.384352111711958e-05, "loss": 0.7194, "step": 19857 }, { "epoch": 1.3454841113896605, "grad_norm": 4.410861015319824, "learning_rate": 7.384215209802176e-05, "loss": 0.5526, "step": 19858 }, { "epoch": 1.3455518666576327, "grad_norm": 4.960877895355225, "learning_rate": 7.384078307892395e-05, "loss": 0.6938, "step": 19859 }, { "epoch": 1.3456196219256047, "grad_norm": 6.684182167053223, "learning_rate": 7.383941405982614e-05, "loss": 0.6522, "step": 19860 }, { "epoch": 1.3456873771935767, "grad_norm": 6.45423698425293, "learning_rate": 7.383804504072832e-05, "loss": 0.5843, "step": 19861 }, { "epoch": 1.3457551324615489, "grad_norm": 5.287134170532227, "learning_rate": 7.38366760216305e-05, "loss": 0.5562, "step": 19862 }, { "epoch": 1.345822887729521, "grad_norm": 4.524722576141357, "learning_rate": 7.383530700253269e-05, "loss": 0.6363, "step": 19863 }, { "epoch": 1.345890642997493, "grad_norm": 7.711580276489258, "learning_rate": 7.383393798343487e-05, "loss": 0.9547, "step": 19864 }, { "epoch": 1.345958398265465, "grad_norm": 5.649478435516357, "learning_rate": 7.383256896433705e-05, "loss": 0.7075, "step": 19865 }, { "epoch": 1.3460261535334372, "grad_norm": 5.75239896774292, "learning_rate": 7.383119994523924e-05, "loss": 0.7716, "step": 19866 }, { "epoch": 1.3460939088014094, "grad_norm": 5.829864978790283, "learning_rate": 7.382983092614142e-05, "loss": 0.7739, "step": 19867 }, { "epoch": 1.3461616640693814, "grad_norm": 5.750999927520752, "learning_rate": 7.38284619070436e-05, "loss": 0.734, "step": 19868 }, { "epoch": 1.3462294193373534, "grad_norm": 5.921744346618652, "learning_rate": 7.38270928879458e-05, "loss": 0.7777, "step": 19869 }, { "epoch": 1.3462971746053256, "grad_norm": 5.157602310180664, "learning_rate": 7.382572386884798e-05, "loss": 0.6393, "step": 19870 }, { "epoch": 1.3463649298732976, "grad_norm": 5.843872547149658, "learning_rate": 7.382435484975016e-05, "loss": 0.6508, "step": 19871 }, { "epoch": 1.3464326851412698, "grad_norm": 8.795445442199707, "learning_rate": 7.382298583065234e-05, "loss": 0.8662, "step": 19872 }, { "epoch": 1.3465004404092418, "grad_norm": 9.207554817199707, "learning_rate": 7.382161681155453e-05, "loss": 0.5971, "step": 19873 }, { "epoch": 1.346568195677214, "grad_norm": 6.088033199310303, "learning_rate": 7.382024779245671e-05, "loss": 0.4128, "step": 19874 }, { "epoch": 1.346635950945186, "grad_norm": 5.845105171203613, "learning_rate": 7.38188787733589e-05, "loss": 0.7231, "step": 19875 }, { "epoch": 1.346703706213158, "grad_norm": 6.239406108856201, "learning_rate": 7.381750975426107e-05, "loss": 0.7635, "step": 19876 }, { "epoch": 1.3467714614811301, "grad_norm": 5.746782302856445, "learning_rate": 7.381614073516327e-05, "loss": 0.7652, "step": 19877 }, { "epoch": 1.3468392167491023, "grad_norm": 4.873795032501221, "learning_rate": 7.381477171606545e-05, "loss": 0.7254, "step": 19878 }, { "epoch": 1.3469069720170743, "grad_norm": 6.158491134643555, "learning_rate": 7.381340269696763e-05, "loss": 1.068, "step": 19879 }, { "epoch": 1.3469747272850463, "grad_norm": 5.696873188018799, "learning_rate": 7.381203367786981e-05, "loss": 0.606, "step": 19880 }, { "epoch": 1.3470424825530185, "grad_norm": 12.769306182861328, "learning_rate": 7.381066465877199e-05, "loss": 0.7959, "step": 19881 }, { "epoch": 1.3471102378209907, "grad_norm": 6.773158073425293, "learning_rate": 7.380929563967418e-05, "loss": 0.8518, "step": 19882 }, { "epoch": 1.3471779930889627, "grad_norm": 4.226945877075195, "learning_rate": 7.380792662057636e-05, "loss": 0.5775, "step": 19883 }, { "epoch": 1.3472457483569347, "grad_norm": 5.617305755615234, "learning_rate": 7.380655760147854e-05, "loss": 0.6835, "step": 19884 }, { "epoch": 1.3473135036249069, "grad_norm": 5.302349090576172, "learning_rate": 7.380518858238072e-05, "loss": 0.657, "step": 19885 }, { "epoch": 1.3473812588928789, "grad_norm": 5.91692590713501, "learning_rate": 7.380381956328292e-05, "loss": 0.577, "step": 19886 }, { "epoch": 1.347449014160851, "grad_norm": 7.197889804840088, "learning_rate": 7.38024505441851e-05, "loss": 0.8061, "step": 19887 }, { "epoch": 1.347516769428823, "grad_norm": 5.56497859954834, "learning_rate": 7.380108152508728e-05, "loss": 0.6734, "step": 19888 }, { "epoch": 1.3475845246967952, "grad_norm": 5.7053351402282715, "learning_rate": 7.379971250598946e-05, "loss": 0.7539, "step": 19889 }, { "epoch": 1.3476522799647672, "grad_norm": 4.959164142608643, "learning_rate": 7.379834348689164e-05, "loss": 0.6064, "step": 19890 }, { "epoch": 1.3477200352327394, "grad_norm": 6.624017238616943, "learning_rate": 7.379697446779383e-05, "loss": 0.8453, "step": 19891 }, { "epoch": 1.3477877905007114, "grad_norm": 6.205291748046875, "learning_rate": 7.379560544869601e-05, "loss": 0.6586, "step": 19892 }, { "epoch": 1.3478555457686836, "grad_norm": 4.573380470275879, "learning_rate": 7.37942364295982e-05, "loss": 0.5073, "step": 19893 }, { "epoch": 1.3479233010366556, "grad_norm": 8.442205429077148, "learning_rate": 7.379286741050038e-05, "loss": 0.6374, "step": 19894 }, { "epoch": 1.3479910563046276, "grad_norm": 7.132976055145264, "learning_rate": 7.379149839140256e-05, "loss": 0.683, "step": 19895 }, { "epoch": 1.3480588115725998, "grad_norm": 5.779045581817627, "learning_rate": 7.379012937230475e-05, "loss": 0.5237, "step": 19896 }, { "epoch": 1.348126566840572, "grad_norm": 6.305784702301025, "learning_rate": 7.378876035320693e-05, "loss": 0.545, "step": 19897 }, { "epoch": 1.348194322108544, "grad_norm": 5.750983715057373, "learning_rate": 7.378739133410911e-05, "loss": 0.5079, "step": 19898 }, { "epoch": 1.348262077376516, "grad_norm": 5.37518835067749, "learning_rate": 7.378602231501129e-05, "loss": 0.6818, "step": 19899 }, { "epoch": 1.3483298326444881, "grad_norm": 8.95629596710205, "learning_rate": 7.378465329591348e-05, "loss": 0.7512, "step": 19900 }, { "epoch": 1.3483975879124601, "grad_norm": 5.567829608917236, "learning_rate": 7.378328427681566e-05, "loss": 0.7101, "step": 19901 }, { "epoch": 1.3484653431804323, "grad_norm": 5.218935966491699, "learning_rate": 7.378191525771784e-05, "loss": 0.6791, "step": 19902 }, { "epoch": 1.3485330984484043, "grad_norm": 5.391122817993164, "learning_rate": 7.378054623862003e-05, "loss": 0.7807, "step": 19903 }, { "epoch": 1.3486008537163765, "grad_norm": 7.139793395996094, "learning_rate": 7.37791772195222e-05, "loss": 0.7345, "step": 19904 }, { "epoch": 1.3486686089843485, "grad_norm": 6.509952068328857, "learning_rate": 7.37778082004244e-05, "loss": 0.5603, "step": 19905 }, { "epoch": 1.3487363642523207, "grad_norm": 5.780937194824219, "learning_rate": 7.377643918132658e-05, "loss": 0.8247, "step": 19906 }, { "epoch": 1.3488041195202927, "grad_norm": 5.263069152832031, "learning_rate": 7.377507016222876e-05, "loss": 0.6957, "step": 19907 }, { "epoch": 1.3488718747882649, "grad_norm": 6.392242908477783, "learning_rate": 7.377370114313094e-05, "loss": 0.78, "step": 19908 }, { "epoch": 1.3489396300562368, "grad_norm": 4.981612205505371, "learning_rate": 7.377233212403313e-05, "loss": 0.7537, "step": 19909 }, { "epoch": 1.3490073853242088, "grad_norm": 6.004624366760254, "learning_rate": 7.377096310493531e-05, "loss": 0.7305, "step": 19910 }, { "epoch": 1.349075140592181, "grad_norm": 5.324982643127441, "learning_rate": 7.37695940858375e-05, "loss": 0.7622, "step": 19911 }, { "epoch": 1.3491428958601532, "grad_norm": 5.008772850036621, "learning_rate": 7.376822506673969e-05, "loss": 0.7225, "step": 19912 }, { "epoch": 1.3492106511281252, "grad_norm": 6.342090606689453, "learning_rate": 7.376685604764187e-05, "loss": 0.6007, "step": 19913 }, { "epoch": 1.3492784063960972, "grad_norm": 9.305315971374512, "learning_rate": 7.376548702854405e-05, "loss": 0.6583, "step": 19914 }, { "epoch": 1.3493461616640694, "grad_norm": 6.3889641761779785, "learning_rate": 7.376411800944624e-05, "loss": 0.6681, "step": 19915 }, { "epoch": 1.3494139169320416, "grad_norm": 5.218634128570557, "learning_rate": 7.376274899034842e-05, "loss": 0.5958, "step": 19916 }, { "epoch": 1.3494816722000136, "grad_norm": 9.410050392150879, "learning_rate": 7.37613799712506e-05, "loss": 0.6878, "step": 19917 }, { "epoch": 1.3495494274679856, "grad_norm": 9.368659019470215, "learning_rate": 7.37600109521528e-05, "loss": 0.712, "step": 19918 }, { "epoch": 1.3496171827359578, "grad_norm": 7.470731258392334, "learning_rate": 7.375864193305498e-05, "loss": 0.4887, "step": 19919 }, { "epoch": 1.3496849380039297, "grad_norm": 5.798779010772705, "learning_rate": 7.375727291395716e-05, "loss": 0.7645, "step": 19920 }, { "epoch": 1.349752693271902, "grad_norm": 5.193567752838135, "learning_rate": 7.375590389485934e-05, "loss": 0.6978, "step": 19921 }, { "epoch": 1.349820448539874, "grad_norm": 3.8177502155303955, "learning_rate": 7.375453487576152e-05, "loss": 0.667, "step": 19922 }, { "epoch": 1.3498882038078461, "grad_norm": 4.63831901550293, "learning_rate": 7.375316585666371e-05, "loss": 0.4122, "step": 19923 }, { "epoch": 1.3499559590758181, "grad_norm": 5.202359199523926, "learning_rate": 7.37517968375659e-05, "loss": 0.6762, "step": 19924 }, { "epoch": 1.35002371434379, "grad_norm": 5.238800048828125, "learning_rate": 7.375042781846807e-05, "loss": 0.7574, "step": 19925 }, { "epoch": 1.3500914696117623, "grad_norm": 7.062122344970703, "learning_rate": 7.374905879937025e-05, "loss": 0.5537, "step": 19926 }, { "epoch": 1.3501592248797345, "grad_norm": 5.381449222564697, "learning_rate": 7.374768978027243e-05, "loss": 0.7263, "step": 19927 }, { "epoch": 1.3502269801477065, "grad_norm": 5.771204471588135, "learning_rate": 7.374632076117463e-05, "loss": 0.5127, "step": 19928 }, { "epoch": 1.3502947354156785, "grad_norm": 5.696809768676758, "learning_rate": 7.374495174207681e-05, "loss": 0.6094, "step": 19929 }, { "epoch": 1.3503624906836507, "grad_norm": 5.744051456451416, "learning_rate": 7.374358272297899e-05, "loss": 0.6486, "step": 19930 }, { "epoch": 1.3504302459516229, "grad_norm": 6.894992828369141, "learning_rate": 7.374221370388117e-05, "loss": 0.8283, "step": 19931 }, { "epoch": 1.3504980012195948, "grad_norm": 6.154562473297119, "learning_rate": 7.374084468478336e-05, "loss": 0.76, "step": 19932 }, { "epoch": 1.3505657564875668, "grad_norm": 5.728113651275635, "learning_rate": 7.373947566568554e-05, "loss": 0.6072, "step": 19933 }, { "epoch": 1.350633511755539, "grad_norm": 6.653811454772949, "learning_rate": 7.373810664658772e-05, "loss": 0.6356, "step": 19934 }, { "epoch": 1.350701267023511, "grad_norm": 6.638221263885498, "learning_rate": 7.37367376274899e-05, "loss": 0.7051, "step": 19935 }, { "epoch": 1.3507690222914832, "grad_norm": 5.749013900756836, "learning_rate": 7.373536860839208e-05, "loss": 0.6852, "step": 19936 }, { "epoch": 1.3508367775594552, "grad_norm": 9.273425102233887, "learning_rate": 7.373399958929428e-05, "loss": 0.5764, "step": 19937 }, { "epoch": 1.3509045328274274, "grad_norm": 6.758439064025879, "learning_rate": 7.373263057019646e-05, "loss": 0.6425, "step": 19938 }, { "epoch": 1.3509722880953994, "grad_norm": 5.231766700744629, "learning_rate": 7.373126155109864e-05, "loss": 0.8546, "step": 19939 }, { "epoch": 1.3510400433633716, "grad_norm": 5.232595920562744, "learning_rate": 7.372989253200082e-05, "loss": 0.6119, "step": 19940 }, { "epoch": 1.3511077986313436, "grad_norm": 5.4670796394348145, "learning_rate": 7.372852351290301e-05, "loss": 0.6454, "step": 19941 }, { "epoch": 1.3511755538993158, "grad_norm": 5.958322525024414, "learning_rate": 7.37271544938052e-05, "loss": 0.5938, "step": 19942 }, { "epoch": 1.3512433091672877, "grad_norm": 5.144124507904053, "learning_rate": 7.372578547470737e-05, "loss": 0.8476, "step": 19943 }, { "epoch": 1.3513110644352597, "grad_norm": 6.348459720611572, "learning_rate": 7.372441645560955e-05, "loss": 0.6618, "step": 19944 }, { "epoch": 1.351378819703232, "grad_norm": 4.759542465209961, "learning_rate": 7.372304743651174e-05, "loss": 0.6995, "step": 19945 }, { "epoch": 1.3514465749712041, "grad_norm": 5.490478038787842, "learning_rate": 7.372167841741393e-05, "loss": 0.7879, "step": 19946 }, { "epoch": 1.351514330239176, "grad_norm": 5.07197380065918, "learning_rate": 7.372030939831611e-05, "loss": 0.6756, "step": 19947 }, { "epoch": 1.351582085507148, "grad_norm": 6.051860809326172, "learning_rate": 7.371894037921829e-05, "loss": 0.7536, "step": 19948 }, { "epoch": 1.3516498407751203, "grad_norm": 10.358040809631348, "learning_rate": 7.371757136012047e-05, "loss": 0.6029, "step": 19949 }, { "epoch": 1.3517175960430923, "grad_norm": 5.648488998413086, "learning_rate": 7.371620234102265e-05, "loss": 0.6964, "step": 19950 }, { "epoch": 1.3517853513110645, "grad_norm": 5.14361047744751, "learning_rate": 7.371483332192484e-05, "loss": 0.6324, "step": 19951 }, { "epoch": 1.3518531065790365, "grad_norm": 7.075919151306152, "learning_rate": 7.371346430282702e-05, "loss": 0.7916, "step": 19952 }, { "epoch": 1.3519208618470087, "grad_norm": 4.692775249481201, "learning_rate": 7.37120952837292e-05, "loss": 0.6497, "step": 19953 }, { "epoch": 1.3519886171149806, "grad_norm": 7.179323196411133, "learning_rate": 7.371072626463139e-05, "loss": 0.6755, "step": 19954 }, { "epoch": 1.3520563723829528, "grad_norm": 6.154078483581543, "learning_rate": 7.370935724553358e-05, "loss": 0.704, "step": 19955 }, { "epoch": 1.3521241276509248, "grad_norm": 4.876952171325684, "learning_rate": 7.370798822643576e-05, "loss": 0.7229, "step": 19956 }, { "epoch": 1.352191882918897, "grad_norm": 4.508577346801758, "learning_rate": 7.370661920733794e-05, "loss": 0.6494, "step": 19957 }, { "epoch": 1.352259638186869, "grad_norm": 5.934298992156982, "learning_rate": 7.370525018824013e-05, "loss": 0.6569, "step": 19958 }, { "epoch": 1.352327393454841, "grad_norm": 4.9950714111328125, "learning_rate": 7.370388116914231e-05, "loss": 0.6014, "step": 19959 }, { "epoch": 1.3523951487228132, "grad_norm": 5.902311325073242, "learning_rate": 7.37025121500445e-05, "loss": 0.5294, "step": 19960 }, { "epoch": 1.3524629039907854, "grad_norm": 6.487723350524902, "learning_rate": 7.370114313094669e-05, "loss": 0.8835, "step": 19961 }, { "epoch": 1.3525306592587574, "grad_norm": 5.77423095703125, "learning_rate": 7.369977411184887e-05, "loss": 0.6867, "step": 19962 }, { "epoch": 1.3525984145267294, "grad_norm": 6.632908344268799, "learning_rate": 7.369840509275105e-05, "loss": 0.8624, "step": 19963 }, { "epoch": 1.3526661697947016, "grad_norm": 6.848690509796143, "learning_rate": 7.369703607365324e-05, "loss": 0.7605, "step": 19964 }, { "epoch": 1.3527339250626738, "grad_norm": 4.228781223297119, "learning_rate": 7.369566705455542e-05, "loss": 0.6512, "step": 19965 }, { "epoch": 1.3528016803306457, "grad_norm": 5.002082347869873, "learning_rate": 7.36942980354576e-05, "loss": 0.591, "step": 19966 }, { "epoch": 1.3528694355986177, "grad_norm": 6.647200107574463, "learning_rate": 7.369292901635978e-05, "loss": 0.7447, "step": 19967 }, { "epoch": 1.35293719086659, "grad_norm": 5.426908016204834, "learning_rate": 7.369155999726196e-05, "loss": 0.6537, "step": 19968 }, { "epoch": 1.353004946134562, "grad_norm": 6.237785816192627, "learning_rate": 7.369019097816416e-05, "loss": 0.7078, "step": 19969 }, { "epoch": 1.353072701402534, "grad_norm": 4.335097312927246, "learning_rate": 7.368882195906634e-05, "loss": 0.5103, "step": 19970 }, { "epoch": 1.353140456670506, "grad_norm": 5.579981327056885, "learning_rate": 7.368745293996852e-05, "loss": 0.6114, "step": 19971 }, { "epoch": 1.3532082119384783, "grad_norm": 5.784294128417969, "learning_rate": 7.36860839208707e-05, "loss": 0.6899, "step": 19972 }, { "epoch": 1.3532759672064503, "grad_norm": 4.744174957275391, "learning_rate": 7.368471490177288e-05, "loss": 0.7841, "step": 19973 }, { "epoch": 1.3533437224744223, "grad_norm": 4.778841972351074, "learning_rate": 7.368334588267507e-05, "loss": 0.7849, "step": 19974 }, { "epoch": 1.3534114777423945, "grad_norm": 6.180090427398682, "learning_rate": 7.368197686357725e-05, "loss": 0.7655, "step": 19975 }, { "epoch": 1.3534792330103667, "grad_norm": 6.841678619384766, "learning_rate": 7.368060784447943e-05, "loss": 0.8086, "step": 19976 }, { "epoch": 1.3535469882783386, "grad_norm": 5.358008861541748, "learning_rate": 7.367923882538161e-05, "loss": 0.6261, "step": 19977 }, { "epoch": 1.3536147435463106, "grad_norm": 4.822285175323486, "learning_rate": 7.367786980628381e-05, "loss": 0.6001, "step": 19978 }, { "epoch": 1.3536824988142828, "grad_norm": 5.250954627990723, "learning_rate": 7.367650078718599e-05, "loss": 0.6783, "step": 19979 }, { "epoch": 1.353750254082255, "grad_norm": 9.218700408935547, "learning_rate": 7.367513176808817e-05, "loss": 0.8117, "step": 19980 }, { "epoch": 1.353818009350227, "grad_norm": 6.408379077911377, "learning_rate": 7.367376274899035e-05, "loss": 0.6777, "step": 19981 }, { "epoch": 1.353885764618199, "grad_norm": 6.272383213043213, "learning_rate": 7.367239372989253e-05, "loss": 0.922, "step": 19982 }, { "epoch": 1.3539535198861712, "grad_norm": 5.051000118255615, "learning_rate": 7.367102471079472e-05, "loss": 0.6216, "step": 19983 }, { "epoch": 1.3540212751541432, "grad_norm": 5.2475905418396, "learning_rate": 7.36696556916969e-05, "loss": 0.6342, "step": 19984 }, { "epoch": 1.3540890304221154, "grad_norm": 5.668992519378662, "learning_rate": 7.366828667259908e-05, "loss": 0.6653, "step": 19985 }, { "epoch": 1.3541567856900874, "grad_norm": 5.0019659996032715, "learning_rate": 7.366691765350126e-05, "loss": 0.8475, "step": 19986 }, { "epoch": 1.3542245409580596, "grad_norm": 5.761159420013428, "learning_rate": 7.366554863440346e-05, "loss": 0.7274, "step": 19987 }, { "epoch": 1.3542922962260315, "grad_norm": 7.718562126159668, "learning_rate": 7.366417961530564e-05, "loss": 0.7133, "step": 19988 }, { "epoch": 1.3543600514940037, "grad_norm": 6.297607421875, "learning_rate": 7.366281059620782e-05, "loss": 0.7465, "step": 19989 }, { "epoch": 1.3544278067619757, "grad_norm": 8.293129920959473, "learning_rate": 7.366144157711e-05, "loss": 0.699, "step": 19990 }, { "epoch": 1.354495562029948, "grad_norm": 4.6341705322265625, "learning_rate": 7.366007255801218e-05, "loss": 0.7267, "step": 19991 }, { "epoch": 1.35456331729792, "grad_norm": 6.480041027069092, "learning_rate": 7.365870353891437e-05, "loss": 0.5549, "step": 19992 }, { "epoch": 1.3546310725658919, "grad_norm": 7.306915283203125, "learning_rate": 7.365733451981655e-05, "loss": 0.8121, "step": 19993 }, { "epoch": 1.354698827833864, "grad_norm": 5.290069580078125, "learning_rate": 7.365596550071873e-05, "loss": 0.8269, "step": 19994 }, { "epoch": 1.3547665831018363, "grad_norm": 7.2335405349731445, "learning_rate": 7.365459648162091e-05, "loss": 0.7509, "step": 19995 }, { "epoch": 1.3548343383698083, "grad_norm": 4.835022926330566, "learning_rate": 7.365322746252311e-05, "loss": 0.5865, "step": 19996 }, { "epoch": 1.3549020936377802, "grad_norm": 5.039982795715332, "learning_rate": 7.365185844342529e-05, "loss": 0.6308, "step": 19997 }, { "epoch": 1.3549698489057524, "grad_norm": 5.263078212738037, "learning_rate": 7.365048942432747e-05, "loss": 0.6588, "step": 19998 }, { "epoch": 1.3550376041737244, "grad_norm": 5.617459774017334, "learning_rate": 7.364912040522965e-05, "loss": 0.8105, "step": 19999 }, { "epoch": 1.3551053594416966, "grad_norm": 5.321951389312744, "learning_rate": 7.364775138613183e-05, "loss": 0.5187, "step": 20000 }, { "epoch": 1.3551731147096686, "grad_norm": 5.454623222351074, "learning_rate": 7.364638236703402e-05, "loss": 0.6273, "step": 20001 }, { "epoch": 1.3552408699776408, "grad_norm": 6.286800384521484, "learning_rate": 7.36450133479362e-05, "loss": 0.543, "step": 20002 }, { "epoch": 1.3553086252456128, "grad_norm": 5.315005302429199, "learning_rate": 7.364364432883838e-05, "loss": 0.643, "step": 20003 }, { "epoch": 1.355376380513585, "grad_norm": 5.628118991851807, "learning_rate": 7.364227530974056e-05, "loss": 0.6692, "step": 20004 }, { "epoch": 1.355444135781557, "grad_norm": 7.164305686950684, "learning_rate": 7.364090629064276e-05, "loss": 0.6539, "step": 20005 }, { "epoch": 1.3555118910495292, "grad_norm": 7.3465681076049805, "learning_rate": 7.363953727154494e-05, "loss": 0.9834, "step": 20006 }, { "epoch": 1.3555796463175012, "grad_norm": 4.9146246910095215, "learning_rate": 7.363816825244712e-05, "loss": 0.7351, "step": 20007 }, { "epoch": 1.3556474015854731, "grad_norm": 4.55314302444458, "learning_rate": 7.363679923334931e-05, "loss": 0.642, "step": 20008 }, { "epoch": 1.3557151568534453, "grad_norm": 5.331081390380859, "learning_rate": 7.36354302142515e-05, "loss": 0.6139, "step": 20009 }, { "epoch": 1.3557829121214175, "grad_norm": 5.817625999450684, "learning_rate": 7.363406119515367e-05, "loss": 0.7084, "step": 20010 }, { "epoch": 1.3558506673893895, "grad_norm": 5.053684711456299, "learning_rate": 7.363269217605587e-05, "loss": 0.5583, "step": 20011 }, { "epoch": 1.3559184226573615, "grad_norm": 7.729983329772949, "learning_rate": 7.363132315695805e-05, "loss": 0.8155, "step": 20012 }, { "epoch": 1.3559861779253337, "grad_norm": 7.670418739318848, "learning_rate": 7.362995413786023e-05, "loss": 0.8305, "step": 20013 }, { "epoch": 1.356053933193306, "grad_norm": 7.319329261779785, "learning_rate": 7.362858511876241e-05, "loss": 0.671, "step": 20014 }, { "epoch": 1.356121688461278, "grad_norm": 3.7903876304626465, "learning_rate": 7.36272160996646e-05, "loss": 0.3765, "step": 20015 }, { "epoch": 1.3561894437292499, "grad_norm": 5.028668403625488, "learning_rate": 7.362584708056678e-05, "loss": 0.6547, "step": 20016 }, { "epoch": 1.356257198997222, "grad_norm": 4.86316442489624, "learning_rate": 7.362447806146896e-05, "loss": 0.7517, "step": 20017 }, { "epoch": 1.356324954265194, "grad_norm": 5.922643184661865, "learning_rate": 7.362310904237114e-05, "loss": 0.5753, "step": 20018 }, { "epoch": 1.3563927095331663, "grad_norm": 5.995681285858154, "learning_rate": 7.362174002327334e-05, "loss": 0.72, "step": 20019 }, { "epoch": 1.3564604648011382, "grad_norm": 7.475533485412598, "learning_rate": 7.362037100417552e-05, "loss": 0.7081, "step": 20020 }, { "epoch": 1.3565282200691104, "grad_norm": 5.785661220550537, "learning_rate": 7.36190019850777e-05, "loss": 0.6773, "step": 20021 }, { "epoch": 1.3565959753370824, "grad_norm": 5.810043811798096, "learning_rate": 7.361763296597988e-05, "loss": 0.8038, "step": 20022 }, { "epoch": 1.3566637306050544, "grad_norm": 6.352225303649902, "learning_rate": 7.361626394688206e-05, "loss": 0.6, "step": 20023 }, { "epoch": 1.3567314858730266, "grad_norm": 6.08746337890625, "learning_rate": 7.361489492778425e-05, "loss": 0.7623, "step": 20024 }, { "epoch": 1.3567992411409988, "grad_norm": 7.001803398132324, "learning_rate": 7.361352590868643e-05, "loss": 0.579, "step": 20025 }, { "epoch": 1.3568669964089708, "grad_norm": 7.817383289337158, "learning_rate": 7.361215688958861e-05, "loss": 0.8488, "step": 20026 }, { "epoch": 1.3569347516769428, "grad_norm": 9.60105037689209, "learning_rate": 7.36107878704908e-05, "loss": 0.6014, "step": 20027 }, { "epoch": 1.357002506944915, "grad_norm": 5.276477813720703, "learning_rate": 7.360941885139297e-05, "loss": 0.5482, "step": 20028 }, { "epoch": 1.3570702622128872, "grad_norm": 4.966242790222168, "learning_rate": 7.360804983229517e-05, "loss": 0.6784, "step": 20029 }, { "epoch": 1.3571380174808592, "grad_norm": 6.604118347167969, "learning_rate": 7.360668081319735e-05, "loss": 0.6823, "step": 20030 }, { "epoch": 1.3572057727488311, "grad_norm": 7.855197906494141, "learning_rate": 7.360531179409953e-05, "loss": 0.8609, "step": 20031 }, { "epoch": 1.3572735280168033, "grad_norm": 7.300774574279785, "learning_rate": 7.360394277500171e-05, "loss": 0.642, "step": 20032 }, { "epoch": 1.3573412832847753, "grad_norm": 5.237577438354492, "learning_rate": 7.36025737559039e-05, "loss": 0.7547, "step": 20033 }, { "epoch": 1.3574090385527475, "grad_norm": 6.2791056632995605, "learning_rate": 7.360120473680608e-05, "loss": 0.6317, "step": 20034 }, { "epoch": 1.3574767938207195, "grad_norm": 5.698138236999512, "learning_rate": 7.359983571770826e-05, "loss": 0.4747, "step": 20035 }, { "epoch": 1.3575445490886917, "grad_norm": 7.2368364334106445, "learning_rate": 7.359846669861044e-05, "loss": 0.8925, "step": 20036 }, { "epoch": 1.3576123043566637, "grad_norm": 6.461616039276123, "learning_rate": 7.359709767951262e-05, "loss": 0.8687, "step": 20037 }, { "epoch": 1.357680059624636, "grad_norm": 6.772340297698975, "learning_rate": 7.359572866041482e-05, "loss": 0.6268, "step": 20038 }, { "epoch": 1.3577478148926079, "grad_norm": 7.397302150726318, "learning_rate": 7.3594359641317e-05, "loss": 0.8455, "step": 20039 }, { "epoch": 1.35781557016058, "grad_norm": 4.867091178894043, "learning_rate": 7.359299062221918e-05, "loss": 0.89, "step": 20040 }, { "epoch": 1.357883325428552, "grad_norm": 5.971895694732666, "learning_rate": 7.359162160312136e-05, "loss": 0.7222, "step": 20041 }, { "epoch": 1.357951080696524, "grad_norm": 6.650653839111328, "learning_rate": 7.359025258402355e-05, "loss": 0.7469, "step": 20042 }, { "epoch": 1.3580188359644962, "grad_norm": 4.813751697540283, "learning_rate": 7.358888356492573e-05, "loss": 0.5838, "step": 20043 }, { "epoch": 1.3580865912324684, "grad_norm": 4.849099159240723, "learning_rate": 7.358751454582791e-05, "loss": 0.679, "step": 20044 }, { "epoch": 1.3581543465004404, "grad_norm": 6.174177646636963, "learning_rate": 7.35861455267301e-05, "loss": 1.1783, "step": 20045 }, { "epoch": 1.3582221017684124, "grad_norm": 5.561427593231201, "learning_rate": 7.358477650763227e-05, "loss": 0.6697, "step": 20046 }, { "epoch": 1.3582898570363846, "grad_norm": 6.558093070983887, "learning_rate": 7.358340748853447e-05, "loss": 0.6278, "step": 20047 }, { "epoch": 1.3583576123043566, "grad_norm": 7.646174907684326, "learning_rate": 7.358203846943665e-05, "loss": 0.6253, "step": 20048 }, { "epoch": 1.3584253675723288, "grad_norm": 7.046586990356445, "learning_rate": 7.358066945033883e-05, "loss": 0.881, "step": 20049 }, { "epoch": 1.3584931228403008, "grad_norm": 6.800720691680908, "learning_rate": 7.357930043124101e-05, "loss": 0.5778, "step": 20050 }, { "epoch": 1.358560878108273, "grad_norm": 6.495662212371826, "learning_rate": 7.35779314121432e-05, "loss": 0.8684, "step": 20051 }, { "epoch": 1.358628633376245, "grad_norm": 12.279613494873047, "learning_rate": 7.357656239304538e-05, "loss": 0.6486, "step": 20052 }, { "epoch": 1.3586963886442172, "grad_norm": 4.549555778503418, "learning_rate": 7.357519337394756e-05, "loss": 0.6898, "step": 20053 }, { "epoch": 1.3587641439121891, "grad_norm": 7.736841678619385, "learning_rate": 7.357382435484976e-05, "loss": 0.7159, "step": 20054 }, { "epoch": 1.3588318991801613, "grad_norm": 5.395872116088867, "learning_rate": 7.357245533575194e-05, "loss": 0.6687, "step": 20055 }, { "epoch": 1.3588996544481333, "grad_norm": 5.658153057098389, "learning_rate": 7.357108631665412e-05, "loss": 0.8024, "step": 20056 }, { "epoch": 1.3589674097161053, "grad_norm": 6.057212829589844, "learning_rate": 7.356971729755631e-05, "loss": 0.718, "step": 20057 }, { "epoch": 1.3590351649840775, "grad_norm": 5.64166784286499, "learning_rate": 7.356834827845849e-05, "loss": 0.543, "step": 20058 }, { "epoch": 1.3591029202520497, "grad_norm": 5.753907680511475, "learning_rate": 7.356697925936067e-05, "loss": 0.8792, "step": 20059 }, { "epoch": 1.3591706755200217, "grad_norm": 6.109472274780273, "learning_rate": 7.356561024026285e-05, "loss": 0.7085, "step": 20060 }, { "epoch": 1.3592384307879937, "grad_norm": 5.680140018463135, "learning_rate": 7.356424122116505e-05, "loss": 0.6179, "step": 20061 }, { "epoch": 1.3593061860559659, "grad_norm": 5.016184329986572, "learning_rate": 7.356287220206723e-05, "loss": 0.7204, "step": 20062 }, { "epoch": 1.359373941323938, "grad_norm": 6.262152671813965, "learning_rate": 7.356150318296941e-05, "loss": 0.6992, "step": 20063 }, { "epoch": 1.35944169659191, "grad_norm": 6.355588912963867, "learning_rate": 7.356013416387159e-05, "loss": 0.8267, "step": 20064 }, { "epoch": 1.359509451859882, "grad_norm": 6.8170928955078125, "learning_rate": 7.355876514477378e-05, "loss": 0.7809, "step": 20065 }, { "epoch": 1.3595772071278542, "grad_norm": 4.490106582641602, "learning_rate": 7.355739612567596e-05, "loss": 0.5812, "step": 20066 }, { "epoch": 1.3596449623958262, "grad_norm": 4.712568283081055, "learning_rate": 7.355602710657814e-05, "loss": 0.4843, "step": 20067 }, { "epoch": 1.3597127176637984, "grad_norm": 5.723649978637695, "learning_rate": 7.355465808748032e-05, "loss": 0.6928, "step": 20068 }, { "epoch": 1.3597804729317704, "grad_norm": 8.284466743469238, "learning_rate": 7.35532890683825e-05, "loss": 0.6851, "step": 20069 }, { "epoch": 1.3598482281997426, "grad_norm": 7.465725421905518, "learning_rate": 7.35519200492847e-05, "loss": 1.017, "step": 20070 }, { "epoch": 1.3599159834677146, "grad_norm": 7.355698108673096, "learning_rate": 7.355055103018688e-05, "loss": 0.5683, "step": 20071 }, { "epoch": 1.3599837387356866, "grad_norm": 6.492972373962402, "learning_rate": 7.354918201108906e-05, "loss": 0.5715, "step": 20072 }, { "epoch": 1.3600514940036588, "grad_norm": 9.104310035705566, "learning_rate": 7.354781299199124e-05, "loss": 0.6879, "step": 20073 }, { "epoch": 1.360119249271631, "grad_norm": 6.3227152824401855, "learning_rate": 7.354644397289343e-05, "loss": 0.672, "step": 20074 }, { "epoch": 1.360187004539603, "grad_norm": 5.511898040771484, "learning_rate": 7.354507495379561e-05, "loss": 0.6087, "step": 20075 }, { "epoch": 1.360254759807575, "grad_norm": 8.365015983581543, "learning_rate": 7.35437059346978e-05, "loss": 0.7209, "step": 20076 }, { "epoch": 1.3603225150755471, "grad_norm": 7.271026134490967, "learning_rate": 7.354233691559997e-05, "loss": 0.8676, "step": 20077 }, { "epoch": 1.3603902703435193, "grad_norm": 7.777347087860107, "learning_rate": 7.354096789650215e-05, "loss": 0.6073, "step": 20078 }, { "epoch": 1.3604580256114913, "grad_norm": 5.516927242279053, "learning_rate": 7.353959887740435e-05, "loss": 0.6862, "step": 20079 }, { "epoch": 1.3605257808794633, "grad_norm": 5.664980888366699, "learning_rate": 7.353822985830653e-05, "loss": 0.6629, "step": 20080 }, { "epoch": 1.3605935361474355, "grad_norm": 5.623690128326416, "learning_rate": 7.353686083920871e-05, "loss": 0.7468, "step": 20081 }, { "epoch": 1.3606612914154075, "grad_norm": 5.137689113616943, "learning_rate": 7.353549182011089e-05, "loss": 0.687, "step": 20082 }, { "epoch": 1.3607290466833797, "grad_norm": 6.086162090301514, "learning_rate": 7.353412280101307e-05, "loss": 0.7801, "step": 20083 }, { "epoch": 1.3607968019513517, "grad_norm": 7.750800132751465, "learning_rate": 7.353275378191526e-05, "loss": 0.6714, "step": 20084 }, { "epoch": 1.3608645572193239, "grad_norm": 6.469507217407227, "learning_rate": 7.353138476281744e-05, "loss": 0.6849, "step": 20085 }, { "epoch": 1.3609323124872958, "grad_norm": 6.682690620422363, "learning_rate": 7.353001574371962e-05, "loss": 0.6748, "step": 20086 }, { "epoch": 1.361000067755268, "grad_norm": 5.2257914543151855, "learning_rate": 7.35286467246218e-05, "loss": 0.5539, "step": 20087 }, { "epoch": 1.36106782302324, "grad_norm": 5.187159538269043, "learning_rate": 7.3527277705524e-05, "loss": 0.5237, "step": 20088 }, { "epoch": 1.3611355782912122, "grad_norm": 6.509801864624023, "learning_rate": 7.352590868642618e-05, "loss": 0.6219, "step": 20089 }, { "epoch": 1.3612033335591842, "grad_norm": 4.808472156524658, "learning_rate": 7.352453966732836e-05, "loss": 0.5522, "step": 20090 }, { "epoch": 1.3612710888271562, "grad_norm": 4.554870128631592, "learning_rate": 7.352317064823054e-05, "loss": 0.5707, "step": 20091 }, { "epoch": 1.3613388440951284, "grad_norm": 6.499706745147705, "learning_rate": 7.352180162913272e-05, "loss": 0.6295, "step": 20092 }, { "epoch": 1.3614065993631006, "grad_norm": 5.483860492706299, "learning_rate": 7.352043261003491e-05, "loss": 0.5475, "step": 20093 }, { "epoch": 1.3614743546310726, "grad_norm": 7.3637800216674805, "learning_rate": 7.35190635909371e-05, "loss": 0.6633, "step": 20094 }, { "epoch": 1.3615421098990446, "grad_norm": 5.607994079589844, "learning_rate": 7.351769457183927e-05, "loss": 0.607, "step": 20095 }, { "epoch": 1.3616098651670168, "grad_norm": 6.8592681884765625, "learning_rate": 7.351632555274145e-05, "loss": 0.7922, "step": 20096 }, { "epoch": 1.3616776204349887, "grad_norm": 6.994537353515625, "learning_rate": 7.351495653364365e-05, "loss": 0.8001, "step": 20097 }, { "epoch": 1.361745375702961, "grad_norm": 6.066976547241211, "learning_rate": 7.351358751454583e-05, "loss": 0.6669, "step": 20098 }, { "epoch": 1.361813130970933, "grad_norm": 7.42667293548584, "learning_rate": 7.351221849544801e-05, "loss": 0.8944, "step": 20099 }, { "epoch": 1.3618808862389051, "grad_norm": 7.16603946685791, "learning_rate": 7.35108494763502e-05, "loss": 0.8215, "step": 20100 }, { "epoch": 1.3619486415068771, "grad_norm": 4.327278137207031, "learning_rate": 7.350948045725238e-05, "loss": 0.4048, "step": 20101 }, { "epoch": 1.3620163967748493, "grad_norm": 5.429370403289795, "learning_rate": 7.350811143815456e-05, "loss": 0.8349, "step": 20102 }, { "epoch": 1.3620841520428213, "grad_norm": 6.940039157867432, "learning_rate": 7.350674241905676e-05, "loss": 0.676, "step": 20103 }, { "epoch": 1.3621519073107935, "grad_norm": 6.706622123718262, "learning_rate": 7.350537339995894e-05, "loss": 0.6571, "step": 20104 }, { "epoch": 1.3622196625787655, "grad_norm": 7.027434825897217, "learning_rate": 7.350400438086112e-05, "loss": 0.6632, "step": 20105 }, { "epoch": 1.3622874178467375, "grad_norm": 4.937480449676514, "learning_rate": 7.350263536176331e-05, "loss": 0.567, "step": 20106 }, { "epoch": 1.3623551731147097, "grad_norm": 6.342832088470459, "learning_rate": 7.350126634266549e-05, "loss": 0.7271, "step": 20107 }, { "epoch": 1.3624229283826819, "grad_norm": 5.432223320007324, "learning_rate": 7.349989732356767e-05, "loss": 0.6286, "step": 20108 }, { "epoch": 1.3624906836506538, "grad_norm": 6.907063961029053, "learning_rate": 7.349852830446985e-05, "loss": 0.7298, "step": 20109 }, { "epoch": 1.3625584389186258, "grad_norm": 5.244854927062988, "learning_rate": 7.349715928537203e-05, "loss": 0.6566, "step": 20110 }, { "epoch": 1.362626194186598, "grad_norm": 4.9953765869140625, "learning_rate": 7.349579026627423e-05, "loss": 0.6722, "step": 20111 }, { "epoch": 1.3626939494545702, "grad_norm": 5.006008625030518, "learning_rate": 7.349442124717641e-05, "loss": 0.5723, "step": 20112 }, { "epoch": 1.3627617047225422, "grad_norm": 9.514997482299805, "learning_rate": 7.349305222807859e-05, "loss": 0.7346, "step": 20113 }, { "epoch": 1.3628294599905142, "grad_norm": 6.089010715484619, "learning_rate": 7.349168320898077e-05, "loss": 0.66, "step": 20114 }, { "epoch": 1.3628972152584864, "grad_norm": 4.536818981170654, "learning_rate": 7.349031418988295e-05, "loss": 0.6891, "step": 20115 }, { "epoch": 1.3629649705264584, "grad_norm": 5.106909275054932, "learning_rate": 7.348894517078514e-05, "loss": 0.5718, "step": 20116 }, { "epoch": 1.3630327257944306, "grad_norm": 4.707106113433838, "learning_rate": 7.348757615168732e-05, "loss": 0.5814, "step": 20117 }, { "epoch": 1.3631004810624026, "grad_norm": 5.799274921417236, "learning_rate": 7.34862071325895e-05, "loss": 0.7959, "step": 20118 }, { "epoch": 1.3631682363303748, "grad_norm": 6.064062118530273, "learning_rate": 7.348483811349168e-05, "loss": 0.6274, "step": 20119 }, { "epoch": 1.3632359915983467, "grad_norm": 4.436087131500244, "learning_rate": 7.348346909439388e-05, "loss": 0.7505, "step": 20120 }, { "epoch": 1.3633037468663187, "grad_norm": 6.271925449371338, "learning_rate": 7.348210007529606e-05, "loss": 0.7702, "step": 20121 }, { "epoch": 1.363371502134291, "grad_norm": 4.295551776885986, "learning_rate": 7.348073105619824e-05, "loss": 0.694, "step": 20122 }, { "epoch": 1.3634392574022631, "grad_norm": 5.443527698516846, "learning_rate": 7.347936203710042e-05, "loss": 0.6034, "step": 20123 }, { "epoch": 1.363507012670235, "grad_norm": 5.247413158416748, "learning_rate": 7.34779930180026e-05, "loss": 0.8159, "step": 20124 }, { "epoch": 1.363574767938207, "grad_norm": 4.961094856262207, "learning_rate": 7.347662399890479e-05, "loss": 0.6515, "step": 20125 }, { "epoch": 1.3636425232061793, "grad_norm": 5.317098140716553, "learning_rate": 7.347525497980697e-05, "loss": 0.5528, "step": 20126 }, { "epoch": 1.3637102784741515, "grad_norm": 5.877913951873779, "learning_rate": 7.347388596070915e-05, "loss": 0.636, "step": 20127 }, { "epoch": 1.3637780337421235, "grad_norm": 10.45703411102295, "learning_rate": 7.347251694161133e-05, "loss": 0.5555, "step": 20128 }, { "epoch": 1.3638457890100955, "grad_norm": 9.53973388671875, "learning_rate": 7.347114792251353e-05, "loss": 0.7984, "step": 20129 }, { "epoch": 1.3639135442780677, "grad_norm": 5.888890266418457, "learning_rate": 7.346977890341571e-05, "loss": 0.7336, "step": 20130 }, { "epoch": 1.3639812995460396, "grad_norm": 7.18953275680542, "learning_rate": 7.346840988431789e-05, "loss": 0.6776, "step": 20131 }, { "epoch": 1.3640490548140118, "grad_norm": 6.5027875900268555, "learning_rate": 7.346704086522007e-05, "loss": 0.7024, "step": 20132 }, { "epoch": 1.3641168100819838, "grad_norm": 7.588999271392822, "learning_rate": 7.346567184612225e-05, "loss": 0.7848, "step": 20133 }, { "epoch": 1.364184565349956, "grad_norm": 5.535569190979004, "learning_rate": 7.346430282702444e-05, "loss": 0.7186, "step": 20134 }, { "epoch": 1.364252320617928, "grad_norm": 6.170422554016113, "learning_rate": 7.346293380792662e-05, "loss": 0.6289, "step": 20135 }, { "epoch": 1.3643200758859002, "grad_norm": 4.50344705581665, "learning_rate": 7.34615647888288e-05, "loss": 0.5076, "step": 20136 }, { "epoch": 1.3643878311538722, "grad_norm": 4.778453826904297, "learning_rate": 7.346019576973098e-05, "loss": 0.7256, "step": 20137 }, { "epoch": 1.3644555864218444, "grad_norm": 5.358566761016846, "learning_rate": 7.345882675063316e-05, "loss": 0.6061, "step": 20138 }, { "epoch": 1.3645233416898164, "grad_norm": 4.654705047607422, "learning_rate": 7.345745773153536e-05, "loss": 0.5285, "step": 20139 }, { "epoch": 1.3645910969577884, "grad_norm": 6.088637351989746, "learning_rate": 7.345608871243754e-05, "loss": 0.676, "step": 20140 }, { "epoch": 1.3646588522257606, "grad_norm": 5.737188816070557, "learning_rate": 7.345471969333972e-05, "loss": 0.879, "step": 20141 }, { "epoch": 1.3647266074937328, "grad_norm": 5.3685622215271, "learning_rate": 7.34533506742419e-05, "loss": 0.5696, "step": 20142 }, { "epoch": 1.3647943627617047, "grad_norm": 7.844949722290039, "learning_rate": 7.345198165514409e-05, "loss": 0.6263, "step": 20143 }, { "epoch": 1.3648621180296767, "grad_norm": 5.7474470138549805, "learning_rate": 7.345061263604627e-05, "loss": 0.894, "step": 20144 }, { "epoch": 1.364929873297649, "grad_norm": 7.3926682472229, "learning_rate": 7.344924361694845e-05, "loss": 0.5948, "step": 20145 }, { "epoch": 1.364997628565621, "grad_norm": 5.154560565948486, "learning_rate": 7.344787459785065e-05, "loss": 0.6158, "step": 20146 }, { "epoch": 1.365065383833593, "grad_norm": 7.032984256744385, "learning_rate": 7.344650557875283e-05, "loss": 0.6778, "step": 20147 }, { "epoch": 1.365133139101565, "grad_norm": 5.855047225952148, "learning_rate": 7.344513655965501e-05, "loss": 0.719, "step": 20148 }, { "epoch": 1.3652008943695373, "grad_norm": 6.642358303070068, "learning_rate": 7.34437675405572e-05, "loss": 0.6852, "step": 20149 }, { "epoch": 1.3652686496375093, "grad_norm": 4.949575424194336, "learning_rate": 7.344239852145938e-05, "loss": 0.5822, "step": 20150 }, { "epoch": 1.3653364049054815, "grad_norm": 5.005299091339111, "learning_rate": 7.344102950236156e-05, "loss": 0.5982, "step": 20151 }, { "epoch": 1.3654041601734535, "grad_norm": 5.513084888458252, "learning_rate": 7.343966048326376e-05, "loss": 0.596, "step": 20152 }, { "epoch": 1.3654719154414257, "grad_norm": 6.026250839233398, "learning_rate": 7.343829146416594e-05, "loss": 0.6629, "step": 20153 }, { "epoch": 1.3655396707093976, "grad_norm": 6.370750427246094, "learning_rate": 7.343692244506812e-05, "loss": 0.6251, "step": 20154 }, { "epoch": 1.3656074259773696, "grad_norm": 5.418914794921875, "learning_rate": 7.34355534259703e-05, "loss": 0.7145, "step": 20155 }, { "epoch": 1.3656751812453418, "grad_norm": 7.243440628051758, "learning_rate": 7.343418440687248e-05, "loss": 0.9619, "step": 20156 }, { "epoch": 1.365742936513314, "grad_norm": 5.129270553588867, "learning_rate": 7.343281538777467e-05, "loss": 0.5543, "step": 20157 }, { "epoch": 1.365810691781286, "grad_norm": 4.4051079750061035, "learning_rate": 7.343144636867685e-05, "loss": 0.6505, "step": 20158 }, { "epoch": 1.365878447049258, "grad_norm": 5.100550651550293, "learning_rate": 7.343007734957903e-05, "loss": 0.7191, "step": 20159 }, { "epoch": 1.3659462023172302, "grad_norm": 5.932982444763184, "learning_rate": 7.342870833048121e-05, "loss": 0.6559, "step": 20160 }, { "epoch": 1.3660139575852024, "grad_norm": 7.680897235870361, "learning_rate": 7.34273393113834e-05, "loss": 0.7453, "step": 20161 }, { "epoch": 1.3660817128531744, "grad_norm": 5.447516441345215, "learning_rate": 7.342597029228559e-05, "loss": 0.5703, "step": 20162 }, { "epoch": 1.3661494681211463, "grad_norm": 5.584428310394287, "learning_rate": 7.342460127318777e-05, "loss": 0.582, "step": 20163 }, { "epoch": 1.3662172233891186, "grad_norm": 4.155641078948975, "learning_rate": 7.342323225408995e-05, "loss": 0.685, "step": 20164 }, { "epoch": 1.3662849786570905, "grad_norm": 7.435667514801025, "learning_rate": 7.342186323499213e-05, "loss": 0.5706, "step": 20165 }, { "epoch": 1.3663527339250627, "grad_norm": 5.5184102058410645, "learning_rate": 7.342049421589432e-05, "loss": 0.5828, "step": 20166 }, { "epoch": 1.3664204891930347, "grad_norm": 6.287384033203125, "learning_rate": 7.34191251967965e-05, "loss": 0.9034, "step": 20167 }, { "epoch": 1.366488244461007, "grad_norm": 8.249863624572754, "learning_rate": 7.341775617769868e-05, "loss": 0.7587, "step": 20168 }, { "epoch": 1.366555999728979, "grad_norm": 4.764066219329834, "learning_rate": 7.341638715860086e-05, "loss": 0.4863, "step": 20169 }, { "epoch": 1.3666237549969509, "grad_norm": 7.109884262084961, "learning_rate": 7.341501813950304e-05, "loss": 0.9377, "step": 20170 }, { "epoch": 1.366691510264923, "grad_norm": 6.360245227813721, "learning_rate": 7.341364912040524e-05, "loss": 0.7904, "step": 20171 }, { "epoch": 1.3667592655328953, "grad_norm": 5.585444450378418, "learning_rate": 7.341228010130742e-05, "loss": 0.5923, "step": 20172 }, { "epoch": 1.3668270208008673, "grad_norm": 6.293943881988525, "learning_rate": 7.34109110822096e-05, "loss": 0.601, "step": 20173 }, { "epoch": 1.3668947760688392, "grad_norm": 6.488381862640381, "learning_rate": 7.340954206311178e-05, "loss": 0.798, "step": 20174 }, { "epoch": 1.3669625313368114, "grad_norm": 5.550592422485352, "learning_rate": 7.340817304401397e-05, "loss": 0.8187, "step": 20175 }, { "epoch": 1.3670302866047837, "grad_norm": 6.912769317626953, "learning_rate": 7.340680402491615e-05, "loss": 0.6221, "step": 20176 }, { "epoch": 1.3670980418727556, "grad_norm": 6.651598930358887, "learning_rate": 7.340543500581833e-05, "loss": 0.675, "step": 20177 }, { "epoch": 1.3671657971407276, "grad_norm": 5.200508117675781, "learning_rate": 7.340406598672051e-05, "loss": 0.7206, "step": 20178 }, { "epoch": 1.3672335524086998, "grad_norm": 5.525914669036865, "learning_rate": 7.34026969676227e-05, "loss": 0.6245, "step": 20179 }, { "epoch": 1.3673013076766718, "grad_norm": 7.679629325866699, "learning_rate": 7.340132794852489e-05, "loss": 0.8756, "step": 20180 }, { "epoch": 1.367369062944644, "grad_norm": 4.98943567276001, "learning_rate": 7.339995892942707e-05, "loss": 0.7636, "step": 20181 }, { "epoch": 1.367436818212616, "grad_norm": 5.590726852416992, "learning_rate": 7.339858991032925e-05, "loss": 0.621, "step": 20182 }, { "epoch": 1.3675045734805882, "grad_norm": 6.432532787322998, "learning_rate": 7.339722089123143e-05, "loss": 0.7312, "step": 20183 }, { "epoch": 1.3675723287485602, "grad_norm": 6.933303356170654, "learning_rate": 7.339585187213362e-05, "loss": 0.7112, "step": 20184 }, { "epoch": 1.3676400840165324, "grad_norm": 8.58838176727295, "learning_rate": 7.33944828530358e-05, "loss": 0.8534, "step": 20185 }, { "epoch": 1.3677078392845043, "grad_norm": 5.378226280212402, "learning_rate": 7.339311383393798e-05, "loss": 0.908, "step": 20186 }, { "epoch": 1.3677755945524765, "grad_norm": 6.113254547119141, "learning_rate": 7.339174481484016e-05, "loss": 0.5784, "step": 20187 }, { "epoch": 1.3678433498204485, "grad_norm": 6.084851264953613, "learning_rate": 7.339037579574234e-05, "loss": 0.6692, "step": 20188 }, { "epoch": 1.3679111050884205, "grad_norm": 4.855129718780518, "learning_rate": 7.338900677664454e-05, "loss": 0.6994, "step": 20189 }, { "epoch": 1.3679788603563927, "grad_norm": 5.024105548858643, "learning_rate": 7.338763775754672e-05, "loss": 0.6321, "step": 20190 }, { "epoch": 1.368046615624365, "grad_norm": 3.4362354278564453, "learning_rate": 7.33862687384489e-05, "loss": 0.5783, "step": 20191 }, { "epoch": 1.368114370892337, "grad_norm": 4.923007965087891, "learning_rate": 7.338489971935109e-05, "loss": 0.5771, "step": 20192 }, { "epoch": 1.3681821261603089, "grad_norm": 6.77817964553833, "learning_rate": 7.338353070025327e-05, "loss": 0.7063, "step": 20193 }, { "epoch": 1.368249881428281, "grad_norm": 8.699446678161621, "learning_rate": 7.338216168115545e-05, "loss": 0.9417, "step": 20194 }, { "epoch": 1.368317636696253, "grad_norm": 9.95015811920166, "learning_rate": 7.338079266205765e-05, "loss": 0.5542, "step": 20195 }, { "epoch": 1.3683853919642253, "grad_norm": 6.480830669403076, "learning_rate": 7.337942364295983e-05, "loss": 0.5483, "step": 20196 }, { "epoch": 1.3684531472321972, "grad_norm": 6.260460376739502, "learning_rate": 7.337805462386201e-05, "loss": 0.7781, "step": 20197 }, { "epoch": 1.3685209025001694, "grad_norm": 5.96747350692749, "learning_rate": 7.33766856047642e-05, "loss": 0.6474, "step": 20198 }, { "epoch": 1.3685886577681414, "grad_norm": 5.325284957885742, "learning_rate": 7.337531658566638e-05, "loss": 0.6621, "step": 20199 }, { "epoch": 1.3686564130361136, "grad_norm": 6.398950576782227, "learning_rate": 7.337394756656856e-05, "loss": 0.8005, "step": 20200 }, { "epoch": 1.3687241683040856, "grad_norm": 5.737882137298584, "learning_rate": 7.337257854747074e-05, "loss": 0.6754, "step": 20201 }, { "epoch": 1.3687919235720578, "grad_norm": 8.167442321777344, "learning_rate": 7.337120952837292e-05, "loss": 0.6383, "step": 20202 }, { "epoch": 1.3688596788400298, "grad_norm": 5.735769748687744, "learning_rate": 7.336984050927512e-05, "loss": 0.7091, "step": 20203 }, { "epoch": 1.3689274341080018, "grad_norm": 9.236515998840332, "learning_rate": 7.33684714901773e-05, "loss": 0.9395, "step": 20204 }, { "epoch": 1.368995189375974, "grad_norm": 5.579743385314941, "learning_rate": 7.336710247107948e-05, "loss": 0.5371, "step": 20205 }, { "epoch": 1.3690629446439462, "grad_norm": 7.265985488891602, "learning_rate": 7.336573345198166e-05, "loss": 0.6223, "step": 20206 }, { "epoch": 1.3691306999119182, "grad_norm": 6.644430637359619, "learning_rate": 7.336436443288385e-05, "loss": 0.8193, "step": 20207 }, { "epoch": 1.3691984551798901, "grad_norm": 5.611253261566162, "learning_rate": 7.336299541378603e-05, "loss": 0.547, "step": 20208 }, { "epoch": 1.3692662104478623, "grad_norm": 6.648104190826416, "learning_rate": 7.336162639468821e-05, "loss": 0.5968, "step": 20209 }, { "epoch": 1.3693339657158345, "grad_norm": 7.135929107666016, "learning_rate": 7.336025737559039e-05, "loss": 0.5747, "step": 20210 }, { "epoch": 1.3694017209838065, "grad_norm": 4.654504776000977, "learning_rate": 7.335888835649257e-05, "loss": 0.5537, "step": 20211 }, { "epoch": 1.3694694762517785, "grad_norm": 5.404601573944092, "learning_rate": 7.335751933739477e-05, "loss": 0.8703, "step": 20212 }, { "epoch": 1.3695372315197507, "grad_norm": 5.747010707855225, "learning_rate": 7.335615031829695e-05, "loss": 0.8071, "step": 20213 }, { "epoch": 1.3696049867877227, "grad_norm": 4.667849063873291, "learning_rate": 7.335478129919913e-05, "loss": 0.8663, "step": 20214 }, { "epoch": 1.369672742055695, "grad_norm": 5.7387614250183105, "learning_rate": 7.335341228010131e-05, "loss": 0.6159, "step": 20215 }, { "epoch": 1.3697404973236669, "grad_norm": 6.030163764953613, "learning_rate": 7.335204326100349e-05, "loss": 0.66, "step": 20216 }, { "epoch": 1.369808252591639, "grad_norm": 5.371430397033691, "learning_rate": 7.335067424190568e-05, "loss": 0.6313, "step": 20217 }, { "epoch": 1.369876007859611, "grad_norm": 6.110231876373291, "learning_rate": 7.334930522280786e-05, "loss": 0.7159, "step": 20218 }, { "epoch": 1.369943763127583, "grad_norm": 5.315763473510742, "learning_rate": 7.334793620371004e-05, "loss": 0.8015, "step": 20219 }, { "epoch": 1.3700115183955552, "grad_norm": 4.556207180023193, "learning_rate": 7.334656718461222e-05, "loss": 0.529, "step": 20220 }, { "epoch": 1.3700792736635274, "grad_norm": 5.897430896759033, "learning_rate": 7.334519816551442e-05, "loss": 0.673, "step": 20221 }, { "epoch": 1.3701470289314994, "grad_norm": 6.456575870513916, "learning_rate": 7.33438291464166e-05, "loss": 0.6969, "step": 20222 }, { "epoch": 1.3702147841994714, "grad_norm": 5.691300392150879, "learning_rate": 7.334246012731878e-05, "loss": 0.6749, "step": 20223 }, { "epoch": 1.3702825394674436, "grad_norm": 6.398644924163818, "learning_rate": 7.334109110822096e-05, "loss": 0.7605, "step": 20224 }, { "epoch": 1.3703502947354158, "grad_norm": 5.263449192047119, "learning_rate": 7.333972208912314e-05, "loss": 0.7178, "step": 20225 }, { "epoch": 1.3704180500033878, "grad_norm": 6.170897006988525, "learning_rate": 7.333835307002533e-05, "loss": 0.6149, "step": 20226 }, { "epoch": 1.3704858052713598, "grad_norm": 5.4641313552856445, "learning_rate": 7.333698405092751e-05, "loss": 0.6496, "step": 20227 }, { "epoch": 1.370553560539332, "grad_norm": 4.535112380981445, "learning_rate": 7.333561503182969e-05, "loss": 0.5862, "step": 20228 }, { "epoch": 1.370621315807304, "grad_norm": 6.9944305419921875, "learning_rate": 7.333424601273187e-05, "loss": 0.761, "step": 20229 }, { "epoch": 1.3706890710752762, "grad_norm": 5.982017517089844, "learning_rate": 7.333287699363407e-05, "loss": 0.7067, "step": 20230 }, { "epoch": 1.3707568263432481, "grad_norm": 5.800622940063477, "learning_rate": 7.333150797453625e-05, "loss": 0.8714, "step": 20231 }, { "epoch": 1.3708245816112203, "grad_norm": 6.416711807250977, "learning_rate": 7.333013895543843e-05, "loss": 0.6901, "step": 20232 }, { "epoch": 1.3708923368791923, "grad_norm": 6.021724700927734, "learning_rate": 7.332876993634061e-05, "loss": 0.7686, "step": 20233 }, { "epoch": 1.3709600921471645, "grad_norm": 5.296408176422119, "learning_rate": 7.332740091724279e-05, "loss": 0.7064, "step": 20234 }, { "epoch": 1.3710278474151365, "grad_norm": 5.5727972984313965, "learning_rate": 7.332603189814498e-05, "loss": 0.6777, "step": 20235 }, { "epoch": 1.3710956026831087, "grad_norm": 5.813218593597412, "learning_rate": 7.332466287904716e-05, "loss": 0.8692, "step": 20236 }, { "epoch": 1.3711633579510807, "grad_norm": 10.084124565124512, "learning_rate": 7.332329385994934e-05, "loss": 0.6243, "step": 20237 }, { "epoch": 1.3712311132190527, "grad_norm": 5.395368576049805, "learning_rate": 7.332192484085152e-05, "loss": 0.5462, "step": 20238 }, { "epoch": 1.3712988684870249, "grad_norm": 8.354716300964355, "learning_rate": 7.332055582175372e-05, "loss": 0.6511, "step": 20239 }, { "epoch": 1.371366623754997, "grad_norm": 6.621623992919922, "learning_rate": 7.33191868026559e-05, "loss": 0.8578, "step": 20240 }, { "epoch": 1.371434379022969, "grad_norm": 4.08854866027832, "learning_rate": 7.331781778355808e-05, "loss": 0.4461, "step": 20241 }, { "epoch": 1.371502134290941, "grad_norm": 3.7924752235412598, "learning_rate": 7.331644876446027e-05, "loss": 0.4726, "step": 20242 }, { "epoch": 1.3715698895589132, "grad_norm": 5.49600076675415, "learning_rate": 7.331507974536245e-05, "loss": 0.4941, "step": 20243 }, { "epoch": 1.3716376448268852, "grad_norm": 9.949115753173828, "learning_rate": 7.331371072626463e-05, "loss": 0.6095, "step": 20244 }, { "epoch": 1.3717054000948574, "grad_norm": 5.580012321472168, "learning_rate": 7.331234170716683e-05, "loss": 0.9315, "step": 20245 }, { "epoch": 1.3717731553628294, "grad_norm": 6.709449291229248, "learning_rate": 7.331097268806901e-05, "loss": 0.7689, "step": 20246 }, { "epoch": 1.3718409106308016, "grad_norm": 5.6640448570251465, "learning_rate": 7.330960366897119e-05, "loss": 0.7272, "step": 20247 }, { "epoch": 1.3719086658987736, "grad_norm": 4.907552242279053, "learning_rate": 7.330823464987337e-05, "loss": 0.5979, "step": 20248 }, { "epoch": 1.3719764211667458, "grad_norm": 5.893479824066162, "learning_rate": 7.330686563077556e-05, "loss": 0.5956, "step": 20249 }, { "epoch": 1.3720441764347178, "grad_norm": 6.581900596618652, "learning_rate": 7.330549661167774e-05, "loss": 0.6446, "step": 20250 }, { "epoch": 1.37211193170269, "grad_norm": 7.291118144989014, "learning_rate": 7.330412759257992e-05, "loss": 0.8176, "step": 20251 }, { "epoch": 1.372179686970662, "grad_norm": 5.776988506317139, "learning_rate": 7.33027585734821e-05, "loss": 0.5513, "step": 20252 }, { "epoch": 1.372247442238634, "grad_norm": 6.24314022064209, "learning_rate": 7.33013895543843e-05, "loss": 0.7811, "step": 20253 }, { "epoch": 1.3723151975066061, "grad_norm": 6.57139253616333, "learning_rate": 7.330002053528648e-05, "loss": 0.8712, "step": 20254 }, { "epoch": 1.3723829527745783, "grad_norm": 5.512619972229004, "learning_rate": 7.329865151618866e-05, "loss": 0.7098, "step": 20255 }, { "epoch": 1.3724507080425503, "grad_norm": 4.208182334899902, "learning_rate": 7.329728249709084e-05, "loss": 0.3734, "step": 20256 }, { "epoch": 1.3725184633105223, "grad_norm": 4.379811763763428, "learning_rate": 7.329591347799302e-05, "loss": 0.7614, "step": 20257 }, { "epoch": 1.3725862185784945, "grad_norm": 4.9382100105285645, "learning_rate": 7.329454445889521e-05, "loss": 0.4686, "step": 20258 }, { "epoch": 1.3726539738464665, "grad_norm": 6.3968305587768555, "learning_rate": 7.329317543979739e-05, "loss": 0.6043, "step": 20259 }, { "epoch": 1.3727217291144387, "grad_norm": 7.11349630355835, "learning_rate": 7.329180642069957e-05, "loss": 0.6991, "step": 20260 }, { "epoch": 1.3727894843824107, "grad_norm": 7.547804355621338, "learning_rate": 7.329043740160175e-05, "loss": 0.7745, "step": 20261 }, { "epoch": 1.3728572396503829, "grad_norm": 5.4480767250061035, "learning_rate": 7.328906838250395e-05, "loss": 0.6008, "step": 20262 }, { "epoch": 1.3729249949183548, "grad_norm": 6.172518730163574, "learning_rate": 7.328769936340613e-05, "loss": 0.7722, "step": 20263 }, { "epoch": 1.372992750186327, "grad_norm": 7.10513973236084, "learning_rate": 7.328633034430831e-05, "loss": 0.6564, "step": 20264 }, { "epoch": 1.373060505454299, "grad_norm": 4.175897598266602, "learning_rate": 7.328496132521049e-05, "loss": 0.6027, "step": 20265 }, { "epoch": 1.3731282607222712, "grad_norm": 4.645842552185059, "learning_rate": 7.328359230611267e-05, "loss": 0.7735, "step": 20266 }, { "epoch": 1.3731960159902432, "grad_norm": 9.853218078613281, "learning_rate": 7.328222328701486e-05, "loss": 0.5394, "step": 20267 }, { "epoch": 1.3732637712582152, "grad_norm": 6.076828956604004, "learning_rate": 7.328085426791704e-05, "loss": 0.619, "step": 20268 }, { "epoch": 1.3733315265261874, "grad_norm": 6.183802604675293, "learning_rate": 7.327948524881922e-05, "loss": 0.6398, "step": 20269 }, { "epoch": 1.3733992817941596, "grad_norm": 6.852115154266357, "learning_rate": 7.32781162297214e-05, "loss": 0.9315, "step": 20270 }, { "epoch": 1.3734670370621316, "grad_norm": 5.837322235107422, "learning_rate": 7.327674721062358e-05, "loss": 0.4947, "step": 20271 }, { "epoch": 1.3735347923301036, "grad_norm": 6.0938944816589355, "learning_rate": 7.327537819152578e-05, "loss": 0.5814, "step": 20272 }, { "epoch": 1.3736025475980758, "grad_norm": 6.35918664932251, "learning_rate": 7.327400917242796e-05, "loss": 0.6994, "step": 20273 }, { "epoch": 1.373670302866048, "grad_norm": 5.368040561676025, "learning_rate": 7.327264015333014e-05, "loss": 0.6819, "step": 20274 }, { "epoch": 1.37373805813402, "grad_norm": 8.15754222869873, "learning_rate": 7.327127113423232e-05, "loss": 0.602, "step": 20275 }, { "epoch": 1.373805813401992, "grad_norm": 4.780981540679932, "learning_rate": 7.326990211513451e-05, "loss": 0.6068, "step": 20276 }, { "epoch": 1.3738735686699641, "grad_norm": 4.480286598205566, "learning_rate": 7.326853309603669e-05, "loss": 0.5164, "step": 20277 }, { "epoch": 1.373941323937936, "grad_norm": 6.823038101196289, "learning_rate": 7.326716407693887e-05, "loss": 0.7598, "step": 20278 }, { "epoch": 1.3740090792059083, "grad_norm": 5.026174545288086, "learning_rate": 7.326579505784105e-05, "loss": 0.5234, "step": 20279 }, { "epoch": 1.3740768344738803, "grad_norm": 5.320239067077637, "learning_rate": 7.326442603874323e-05, "loss": 0.7133, "step": 20280 }, { "epoch": 1.3741445897418525, "grad_norm": 6.632630348205566, "learning_rate": 7.326305701964543e-05, "loss": 0.6029, "step": 20281 }, { "epoch": 1.3742123450098245, "grad_norm": 5.793473720550537, "learning_rate": 7.326168800054761e-05, "loss": 0.7203, "step": 20282 }, { "epoch": 1.3742801002777967, "grad_norm": 6.004863262176514, "learning_rate": 7.326031898144979e-05, "loss": 0.6839, "step": 20283 }, { "epoch": 1.3743478555457687, "grad_norm": 5.197539806365967, "learning_rate": 7.325894996235197e-05, "loss": 0.7348, "step": 20284 }, { "epoch": 1.3744156108137409, "grad_norm": 5.693231582641602, "learning_rate": 7.325758094325416e-05, "loss": 0.7733, "step": 20285 }, { "epoch": 1.3744833660817128, "grad_norm": 5.883902549743652, "learning_rate": 7.325621192415634e-05, "loss": 0.7847, "step": 20286 }, { "epoch": 1.3745511213496848, "grad_norm": 6.905340671539307, "learning_rate": 7.325484290505852e-05, "loss": 0.8607, "step": 20287 }, { "epoch": 1.374618876617657, "grad_norm": 5.290592193603516, "learning_rate": 7.325347388596072e-05, "loss": 0.92, "step": 20288 }, { "epoch": 1.3746866318856292, "grad_norm": 5.894668102264404, "learning_rate": 7.32521048668629e-05, "loss": 0.9449, "step": 20289 }, { "epoch": 1.3747543871536012, "grad_norm": 7.014741897583008, "learning_rate": 7.325073584776508e-05, "loss": 0.7467, "step": 20290 }, { "epoch": 1.3748221424215732, "grad_norm": 5.869377136230469, "learning_rate": 7.324936682866727e-05, "loss": 0.7379, "step": 20291 }, { "epoch": 1.3748898976895454, "grad_norm": 6.878076553344727, "learning_rate": 7.324799780956945e-05, "loss": 0.705, "step": 20292 }, { "epoch": 1.3749576529575174, "grad_norm": 5.5401482582092285, "learning_rate": 7.324662879047163e-05, "loss": 0.6514, "step": 20293 }, { "epoch": 1.3750254082254896, "grad_norm": 5.7491536140441895, "learning_rate": 7.324525977137381e-05, "loss": 0.8843, "step": 20294 }, { "epoch": 1.3750931634934616, "grad_norm": 6.3691253662109375, "learning_rate": 7.3243890752276e-05, "loss": 0.826, "step": 20295 }, { "epoch": 1.3751609187614338, "grad_norm": 5.4571213722229, "learning_rate": 7.324252173317819e-05, "loss": 0.6353, "step": 20296 }, { "epoch": 1.3752286740294057, "grad_norm": 4.734516620635986, "learning_rate": 7.324115271408037e-05, "loss": 0.5693, "step": 20297 }, { "epoch": 1.375296429297378, "grad_norm": 6.117015838623047, "learning_rate": 7.323978369498255e-05, "loss": 0.6288, "step": 20298 }, { "epoch": 1.37536418456535, "grad_norm": 6.162884712219238, "learning_rate": 7.323841467588474e-05, "loss": 0.7605, "step": 20299 }, { "epoch": 1.3754319398333221, "grad_norm": 5.02625036239624, "learning_rate": 7.323704565678692e-05, "loss": 0.636, "step": 20300 }, { "epoch": 1.375499695101294, "grad_norm": 6.400944709777832, "learning_rate": 7.32356766376891e-05, "loss": 0.6726, "step": 20301 }, { "epoch": 1.375567450369266, "grad_norm": 6.170064926147461, "learning_rate": 7.323430761859128e-05, "loss": 0.5819, "step": 20302 }, { "epoch": 1.3756352056372383, "grad_norm": 4.994261264801025, "learning_rate": 7.323293859949346e-05, "loss": 0.7581, "step": 20303 }, { "epoch": 1.3757029609052105, "grad_norm": 9.164583206176758, "learning_rate": 7.323156958039566e-05, "loss": 0.797, "step": 20304 }, { "epoch": 1.3757707161731825, "grad_norm": 10.047574043273926, "learning_rate": 7.323020056129784e-05, "loss": 0.6573, "step": 20305 }, { "epoch": 1.3758384714411545, "grad_norm": 5.078217029571533, "learning_rate": 7.322883154220002e-05, "loss": 0.845, "step": 20306 }, { "epoch": 1.3759062267091267, "grad_norm": 6.422628402709961, "learning_rate": 7.32274625231022e-05, "loss": 0.8185, "step": 20307 }, { "epoch": 1.3759739819770986, "grad_norm": 5.372091770172119, "learning_rate": 7.322609350400439e-05, "loss": 0.5677, "step": 20308 }, { "epoch": 1.3760417372450708, "grad_norm": 5.8280181884765625, "learning_rate": 7.322472448490657e-05, "loss": 0.6653, "step": 20309 }, { "epoch": 1.3761094925130428, "grad_norm": 4.480457782745361, "learning_rate": 7.322335546580875e-05, "loss": 0.5778, "step": 20310 }, { "epoch": 1.376177247781015, "grad_norm": 4.853870868682861, "learning_rate": 7.322198644671093e-05, "loss": 0.77, "step": 20311 }, { "epoch": 1.376245003048987, "grad_norm": 4.8951005935668945, "learning_rate": 7.322061742761311e-05, "loss": 0.5748, "step": 20312 }, { "epoch": 1.3763127583169592, "grad_norm": 4.975350856781006, "learning_rate": 7.32192484085153e-05, "loss": 0.591, "step": 20313 }, { "epoch": 1.3763805135849312, "grad_norm": 5.807775974273682, "learning_rate": 7.321787938941749e-05, "loss": 0.6743, "step": 20314 }, { "epoch": 1.3764482688529034, "grad_norm": 4.968635559082031, "learning_rate": 7.321651037031967e-05, "loss": 0.6948, "step": 20315 }, { "epoch": 1.3765160241208754, "grad_norm": 6.76335334777832, "learning_rate": 7.321514135122185e-05, "loss": 0.9211, "step": 20316 }, { "epoch": 1.3765837793888474, "grad_norm": 5.687908172607422, "learning_rate": 7.321377233212404e-05, "loss": 0.6864, "step": 20317 }, { "epoch": 1.3766515346568196, "grad_norm": 4.929174423217773, "learning_rate": 7.321240331302622e-05, "loss": 0.668, "step": 20318 }, { "epoch": 1.3767192899247918, "grad_norm": 5.5727925300598145, "learning_rate": 7.32110342939284e-05, "loss": 0.7836, "step": 20319 }, { "epoch": 1.3767870451927637, "grad_norm": 5.8472137451171875, "learning_rate": 7.320966527483058e-05, "loss": 0.6915, "step": 20320 }, { "epoch": 1.3768548004607357, "grad_norm": 5.638412952423096, "learning_rate": 7.320829625573276e-05, "loss": 0.4108, "step": 20321 }, { "epoch": 1.376922555728708, "grad_norm": 5.857369899749756, "learning_rate": 7.320692723663496e-05, "loss": 0.7202, "step": 20322 }, { "epoch": 1.3769903109966801, "grad_norm": 5.475970268249512, "learning_rate": 7.320555821753714e-05, "loss": 0.7869, "step": 20323 }, { "epoch": 1.377058066264652, "grad_norm": 6.44049596786499, "learning_rate": 7.320418919843932e-05, "loss": 0.7879, "step": 20324 }, { "epoch": 1.377125821532624, "grad_norm": 5.286758899688721, "learning_rate": 7.32028201793415e-05, "loss": 0.5925, "step": 20325 }, { "epoch": 1.3771935768005963, "grad_norm": 6.243403911590576, "learning_rate": 7.320145116024368e-05, "loss": 0.7303, "step": 20326 }, { "epoch": 1.3772613320685683, "grad_norm": 4.560208797454834, "learning_rate": 7.320008214114587e-05, "loss": 0.7207, "step": 20327 }, { "epoch": 1.3773290873365405, "grad_norm": 6.1082892417907715, "learning_rate": 7.319871312204805e-05, "loss": 0.6901, "step": 20328 }, { "epoch": 1.3773968426045125, "grad_norm": 4.779086589813232, "learning_rate": 7.319734410295023e-05, "loss": 0.4677, "step": 20329 }, { "epoch": 1.3774645978724847, "grad_norm": 5.107228755950928, "learning_rate": 7.319597508385241e-05, "loss": 0.7197, "step": 20330 }, { "epoch": 1.3775323531404566, "grad_norm": 4.849102020263672, "learning_rate": 7.319460606475461e-05, "loss": 0.5448, "step": 20331 }, { "epoch": 1.3776001084084288, "grad_norm": 8.529727935791016, "learning_rate": 7.319323704565679e-05, "loss": 0.7663, "step": 20332 }, { "epoch": 1.3776678636764008, "grad_norm": 6.0061564445495605, "learning_rate": 7.319186802655897e-05, "loss": 0.6145, "step": 20333 }, { "epoch": 1.377735618944373, "grad_norm": 5.787989139556885, "learning_rate": 7.319049900746116e-05, "loss": 0.6453, "step": 20334 }, { "epoch": 1.377803374212345, "grad_norm": 5.576568603515625, "learning_rate": 7.318912998836334e-05, "loss": 0.8134, "step": 20335 }, { "epoch": 1.377871129480317, "grad_norm": 5.880032539367676, "learning_rate": 7.318776096926552e-05, "loss": 0.7944, "step": 20336 }, { "epoch": 1.3779388847482892, "grad_norm": 6.104119777679443, "learning_rate": 7.318639195016772e-05, "loss": 0.5183, "step": 20337 }, { "epoch": 1.3780066400162614, "grad_norm": 7.2437286376953125, "learning_rate": 7.31850229310699e-05, "loss": 0.647, "step": 20338 }, { "epoch": 1.3780743952842334, "grad_norm": 6.180482387542725, "learning_rate": 7.318365391197208e-05, "loss": 0.6426, "step": 20339 }, { "epoch": 1.3781421505522053, "grad_norm": 5.325170993804932, "learning_rate": 7.318228489287427e-05, "loss": 0.7455, "step": 20340 }, { "epoch": 1.3782099058201775, "grad_norm": 5.072115898132324, "learning_rate": 7.318091587377645e-05, "loss": 0.6666, "step": 20341 }, { "epoch": 1.3782776610881495, "grad_norm": 5.826999664306641, "learning_rate": 7.317954685467863e-05, "loss": 0.5184, "step": 20342 }, { "epoch": 1.3783454163561217, "grad_norm": 6.262898921966553, "learning_rate": 7.317817783558081e-05, "loss": 0.5792, "step": 20343 }, { "epoch": 1.3784131716240937, "grad_norm": 6.029056072235107, "learning_rate": 7.317680881648299e-05, "loss": 0.7057, "step": 20344 }, { "epoch": 1.378480926892066, "grad_norm": 6.6884074211120605, "learning_rate": 7.317543979738519e-05, "loss": 0.7382, "step": 20345 }, { "epoch": 1.378548682160038, "grad_norm": 6.910470485687256, "learning_rate": 7.317407077828737e-05, "loss": 0.926, "step": 20346 }, { "epoch": 1.37861643742801, "grad_norm": 7.457363128662109, "learning_rate": 7.317270175918955e-05, "loss": 0.5669, "step": 20347 }, { "epoch": 1.378684192695982, "grad_norm": 5.299932479858398, "learning_rate": 7.317133274009173e-05, "loss": 0.8212, "step": 20348 }, { "epoch": 1.3787519479639543, "grad_norm": 6.065412998199463, "learning_rate": 7.316996372099391e-05, "loss": 0.5041, "step": 20349 }, { "epoch": 1.3788197032319263, "grad_norm": 6.145285606384277, "learning_rate": 7.31685947018961e-05, "loss": 0.5454, "step": 20350 }, { "epoch": 1.3788874584998982, "grad_norm": 5.374916076660156, "learning_rate": 7.316722568279828e-05, "loss": 0.628, "step": 20351 }, { "epoch": 1.3789552137678704, "grad_norm": 5.50359582901001, "learning_rate": 7.316585666370046e-05, "loss": 0.5439, "step": 20352 }, { "epoch": 1.3790229690358426, "grad_norm": 6.5091071128845215, "learning_rate": 7.316448764460264e-05, "loss": 0.7833, "step": 20353 }, { "epoch": 1.3790907243038146, "grad_norm": 4.5075907707214355, "learning_rate": 7.316311862550484e-05, "loss": 0.5637, "step": 20354 }, { "epoch": 1.3791584795717866, "grad_norm": 7.816382884979248, "learning_rate": 7.316174960640702e-05, "loss": 0.7871, "step": 20355 }, { "epoch": 1.3792262348397588, "grad_norm": 6.3664021492004395, "learning_rate": 7.31603805873092e-05, "loss": 0.7258, "step": 20356 }, { "epoch": 1.3792939901077308, "grad_norm": 7.237109661102295, "learning_rate": 7.315901156821138e-05, "loss": 0.6609, "step": 20357 }, { "epoch": 1.379361745375703, "grad_norm": 4.956258773803711, "learning_rate": 7.315764254911356e-05, "loss": 0.6582, "step": 20358 }, { "epoch": 1.379429500643675, "grad_norm": 6.4355549812316895, "learning_rate": 7.315627353001575e-05, "loss": 0.7045, "step": 20359 }, { "epoch": 1.3794972559116472, "grad_norm": 6.701473712921143, "learning_rate": 7.315490451091793e-05, "loss": 0.7439, "step": 20360 }, { "epoch": 1.3795650111796192, "grad_norm": 5.054768085479736, "learning_rate": 7.315353549182011e-05, "loss": 0.7266, "step": 20361 }, { "epoch": 1.3796327664475914, "grad_norm": 6.684784412384033, "learning_rate": 7.315216647272229e-05, "loss": 0.6487, "step": 20362 }, { "epoch": 1.3797005217155633, "grad_norm": 7.196541786193848, "learning_rate": 7.315079745362449e-05, "loss": 0.7609, "step": 20363 }, { "epoch": 1.3797682769835355, "grad_norm": 5.671809196472168, "learning_rate": 7.314942843452667e-05, "loss": 0.5983, "step": 20364 }, { "epoch": 1.3798360322515075, "grad_norm": 5.178733825683594, "learning_rate": 7.314805941542885e-05, "loss": 0.7075, "step": 20365 }, { "epoch": 1.3799037875194795, "grad_norm": 5.666396141052246, "learning_rate": 7.314669039633103e-05, "loss": 0.8577, "step": 20366 }, { "epoch": 1.3799715427874517, "grad_norm": 5.6069841384887695, "learning_rate": 7.314532137723321e-05, "loss": 0.7043, "step": 20367 }, { "epoch": 1.380039298055424, "grad_norm": 5.956654071807861, "learning_rate": 7.31439523581354e-05, "loss": 0.6296, "step": 20368 }, { "epoch": 1.380107053323396, "grad_norm": 5.8251495361328125, "learning_rate": 7.314258333903758e-05, "loss": 0.786, "step": 20369 }, { "epoch": 1.3801748085913679, "grad_norm": 6.2874755859375, "learning_rate": 7.314121431993976e-05, "loss": 0.6488, "step": 20370 }, { "epoch": 1.38024256385934, "grad_norm": 4.935168743133545, "learning_rate": 7.313984530084194e-05, "loss": 0.5401, "step": 20371 }, { "epoch": 1.3803103191273123, "grad_norm": 5.350480556488037, "learning_rate": 7.313847628174414e-05, "loss": 0.5885, "step": 20372 }, { "epoch": 1.3803780743952843, "grad_norm": 6.442075252532959, "learning_rate": 7.313710726264632e-05, "loss": 0.6562, "step": 20373 }, { "epoch": 1.3804458296632562, "grad_norm": 4.90691614151001, "learning_rate": 7.31357382435485e-05, "loss": 0.7668, "step": 20374 }, { "epoch": 1.3805135849312284, "grad_norm": 6.224586486816406, "learning_rate": 7.313436922445068e-05, "loss": 0.5663, "step": 20375 }, { "epoch": 1.3805813401992004, "grad_norm": 7.080493450164795, "learning_rate": 7.313300020535286e-05, "loss": 0.607, "step": 20376 }, { "epoch": 1.3806490954671726, "grad_norm": 5.266905784606934, "learning_rate": 7.313163118625505e-05, "loss": 0.556, "step": 20377 }, { "epoch": 1.3807168507351446, "grad_norm": 5.194752216339111, "learning_rate": 7.313026216715723e-05, "loss": 0.6436, "step": 20378 }, { "epoch": 1.3807846060031168, "grad_norm": 6.65355110168457, "learning_rate": 7.312889314805941e-05, "loss": 0.7077, "step": 20379 }, { "epoch": 1.3808523612710888, "grad_norm": 6.874339580535889, "learning_rate": 7.31275241289616e-05, "loss": 0.6872, "step": 20380 }, { "epoch": 1.380920116539061, "grad_norm": 5.854693412780762, "learning_rate": 7.312615510986379e-05, "loss": 0.8174, "step": 20381 }, { "epoch": 1.380987871807033, "grad_norm": 8.31881046295166, "learning_rate": 7.312478609076597e-05, "loss": 0.8854, "step": 20382 }, { "epoch": 1.3810556270750052, "grad_norm": 9.814079284667969, "learning_rate": 7.312341707166816e-05, "loss": 0.7806, "step": 20383 }, { "epoch": 1.3811233823429772, "grad_norm": 5.6080098152160645, "learning_rate": 7.312204805257034e-05, "loss": 0.5368, "step": 20384 }, { "epoch": 1.3811911376109491, "grad_norm": 4.842423439025879, "learning_rate": 7.312067903347252e-05, "loss": 0.7288, "step": 20385 }, { "epoch": 1.3812588928789213, "grad_norm": 7.074604511260986, "learning_rate": 7.311931001437472e-05, "loss": 0.7693, "step": 20386 }, { "epoch": 1.3813266481468935, "grad_norm": 6.795322895050049, "learning_rate": 7.31179409952769e-05, "loss": 0.6153, "step": 20387 }, { "epoch": 1.3813944034148655, "grad_norm": 6.033797740936279, "learning_rate": 7.311657197617908e-05, "loss": 0.6024, "step": 20388 }, { "epoch": 1.3814621586828375, "grad_norm": 4.602676868438721, "learning_rate": 7.311520295708126e-05, "loss": 0.6704, "step": 20389 }, { "epoch": 1.3815299139508097, "grad_norm": 8.749063491821289, "learning_rate": 7.311383393798344e-05, "loss": 0.9615, "step": 20390 }, { "epoch": 1.3815976692187817, "grad_norm": 5.912140846252441, "learning_rate": 7.311246491888563e-05, "loss": 0.6941, "step": 20391 }, { "epoch": 1.381665424486754, "grad_norm": 6.187105178833008, "learning_rate": 7.311109589978781e-05, "loss": 0.6806, "step": 20392 }, { "epoch": 1.3817331797547259, "grad_norm": 5.959619045257568, "learning_rate": 7.310972688068999e-05, "loss": 0.7957, "step": 20393 }, { "epoch": 1.381800935022698, "grad_norm": 6.276679515838623, "learning_rate": 7.310835786159217e-05, "loss": 0.6713, "step": 20394 }, { "epoch": 1.38186869029067, "grad_norm": 7.183295249938965, "learning_rate": 7.310698884249437e-05, "loss": 0.641, "step": 20395 }, { "epoch": 1.3819364455586423, "grad_norm": 6.557753086090088, "learning_rate": 7.310561982339655e-05, "loss": 0.6827, "step": 20396 }, { "epoch": 1.3820042008266142, "grad_norm": 5.6322150230407715, "learning_rate": 7.310425080429873e-05, "loss": 0.6974, "step": 20397 }, { "epoch": 1.3820719560945864, "grad_norm": 4.737672805786133, "learning_rate": 7.31028817852009e-05, "loss": 0.663, "step": 20398 }, { "epoch": 1.3821397113625584, "grad_norm": 3.975170373916626, "learning_rate": 7.310151276610309e-05, "loss": 0.5643, "step": 20399 }, { "epoch": 1.3822074666305304, "grad_norm": 5.786171913146973, "learning_rate": 7.310014374700528e-05, "loss": 0.8125, "step": 20400 }, { "epoch": 1.3822752218985026, "grad_norm": 7.655409336090088, "learning_rate": 7.309877472790746e-05, "loss": 0.7926, "step": 20401 }, { "epoch": 1.3823429771664748, "grad_norm": 5.002418518066406, "learning_rate": 7.309740570880964e-05, "loss": 0.5995, "step": 20402 }, { "epoch": 1.3824107324344468, "grad_norm": 4.654088020324707, "learning_rate": 7.309603668971182e-05, "loss": 0.6314, "step": 20403 }, { "epoch": 1.3824784877024188, "grad_norm": 6.613973617553711, "learning_rate": 7.3094667670614e-05, "loss": 0.7401, "step": 20404 }, { "epoch": 1.382546242970391, "grad_norm": 5.213038444519043, "learning_rate": 7.30932986515162e-05, "loss": 0.4449, "step": 20405 }, { "epoch": 1.382613998238363, "grad_norm": 5.031029224395752, "learning_rate": 7.309192963241838e-05, "loss": 0.5607, "step": 20406 }, { "epoch": 1.3826817535063352, "grad_norm": 6.039463520050049, "learning_rate": 7.309056061332056e-05, "loss": 0.7238, "step": 20407 }, { "epoch": 1.3827495087743071, "grad_norm": 7.074490547180176, "learning_rate": 7.308919159422274e-05, "loss": 0.7184, "step": 20408 }, { "epoch": 1.3828172640422793, "grad_norm": 4.131440162658691, "learning_rate": 7.308782257512493e-05, "loss": 0.6093, "step": 20409 }, { "epoch": 1.3828850193102513, "grad_norm": 6.460183143615723, "learning_rate": 7.308645355602711e-05, "loss": 0.6836, "step": 20410 }, { "epoch": 1.3829527745782235, "grad_norm": 6.51292085647583, "learning_rate": 7.308508453692929e-05, "loss": 0.8358, "step": 20411 }, { "epoch": 1.3830205298461955, "grad_norm": 7.465755939483643, "learning_rate": 7.308371551783147e-05, "loss": 0.5239, "step": 20412 }, { "epoch": 1.3830882851141677, "grad_norm": 5.228099346160889, "learning_rate": 7.308234649873365e-05, "loss": 0.7213, "step": 20413 }, { "epoch": 1.3831560403821397, "grad_norm": 5.660417556762695, "learning_rate": 7.308097747963585e-05, "loss": 0.5706, "step": 20414 }, { "epoch": 1.3832237956501117, "grad_norm": 7.021927356719971, "learning_rate": 7.307960846053803e-05, "loss": 0.9632, "step": 20415 }, { "epoch": 1.3832915509180839, "grad_norm": 5.52778434753418, "learning_rate": 7.307823944144021e-05, "loss": 0.8217, "step": 20416 }, { "epoch": 1.383359306186056, "grad_norm": 7.488955020904541, "learning_rate": 7.307687042234239e-05, "loss": 0.8338, "step": 20417 }, { "epoch": 1.383427061454028, "grad_norm": 8.890849113464355, "learning_rate": 7.307550140324458e-05, "loss": 0.6151, "step": 20418 }, { "epoch": 1.383494816722, "grad_norm": 4.6548051834106445, "learning_rate": 7.307413238414676e-05, "loss": 0.5166, "step": 20419 }, { "epoch": 1.3835625719899722, "grad_norm": 7.700250625610352, "learning_rate": 7.307276336504894e-05, "loss": 0.6665, "step": 20420 }, { "epoch": 1.3836303272579444, "grad_norm": 4.801332473754883, "learning_rate": 7.307139434595112e-05, "loss": 0.5672, "step": 20421 }, { "epoch": 1.3836980825259164, "grad_norm": 6.802975654602051, "learning_rate": 7.30700253268533e-05, "loss": 0.627, "step": 20422 }, { "epoch": 1.3837658377938884, "grad_norm": 6.158637046813965, "learning_rate": 7.30686563077555e-05, "loss": 0.7941, "step": 20423 }, { "epoch": 1.3838335930618606, "grad_norm": 5.717502593994141, "learning_rate": 7.306728728865768e-05, "loss": 0.6815, "step": 20424 }, { "epoch": 1.3839013483298326, "grad_norm": 7.994260311126709, "learning_rate": 7.306591826955986e-05, "loss": 0.6307, "step": 20425 }, { "epoch": 1.3839691035978048, "grad_norm": 7.579348564147949, "learning_rate": 7.306454925046205e-05, "loss": 0.9128, "step": 20426 }, { "epoch": 1.3840368588657768, "grad_norm": 7.754908561706543, "learning_rate": 7.306318023136423e-05, "loss": 0.6607, "step": 20427 }, { "epoch": 1.384104614133749, "grad_norm": 4.582676410675049, "learning_rate": 7.306181121226641e-05, "loss": 0.6207, "step": 20428 }, { "epoch": 1.384172369401721, "grad_norm": 5.905163764953613, "learning_rate": 7.30604421931686e-05, "loss": 0.7516, "step": 20429 }, { "epoch": 1.3842401246696932, "grad_norm": 5.745233535766602, "learning_rate": 7.305907317407079e-05, "loss": 0.6284, "step": 20430 }, { "epoch": 1.3843078799376651, "grad_norm": 6.089517593383789, "learning_rate": 7.305770415497297e-05, "loss": 0.9449, "step": 20431 }, { "epoch": 1.3843756352056373, "grad_norm": 7.747912406921387, "learning_rate": 7.305633513587516e-05, "loss": 0.6675, "step": 20432 }, { "epoch": 1.3844433904736093, "grad_norm": 6.469924449920654, "learning_rate": 7.305496611677734e-05, "loss": 0.7093, "step": 20433 }, { "epoch": 1.3845111457415813, "grad_norm": 5.462471008300781, "learning_rate": 7.305359709767952e-05, "loss": 1.0206, "step": 20434 }, { "epoch": 1.3845789010095535, "grad_norm": 8.521968841552734, "learning_rate": 7.30522280785817e-05, "loss": 0.8246, "step": 20435 }, { "epoch": 1.3846466562775257, "grad_norm": 5.250690460205078, "learning_rate": 7.305085905948388e-05, "loss": 0.6936, "step": 20436 }, { "epoch": 1.3847144115454977, "grad_norm": 4.87784481048584, "learning_rate": 7.304949004038608e-05, "loss": 0.572, "step": 20437 }, { "epoch": 1.3847821668134697, "grad_norm": 5.196613788604736, "learning_rate": 7.304812102128826e-05, "loss": 0.6428, "step": 20438 }, { "epoch": 1.3848499220814419, "grad_norm": 5.704060077667236, "learning_rate": 7.304675200219044e-05, "loss": 0.4973, "step": 20439 }, { "epoch": 1.3849176773494138, "grad_norm": 5.8050994873046875, "learning_rate": 7.304538298309262e-05, "loss": 0.5353, "step": 20440 }, { "epoch": 1.384985432617386, "grad_norm": 4.382735252380371, "learning_rate": 7.304401396399481e-05, "loss": 0.5882, "step": 20441 }, { "epoch": 1.385053187885358, "grad_norm": 5.1663689613342285, "learning_rate": 7.304264494489699e-05, "loss": 0.6085, "step": 20442 }, { "epoch": 1.3851209431533302, "grad_norm": 4.810373783111572, "learning_rate": 7.304127592579917e-05, "loss": 0.6443, "step": 20443 }, { "epoch": 1.3851886984213022, "grad_norm": 4.354696750640869, "learning_rate": 7.303990690670135e-05, "loss": 0.8518, "step": 20444 }, { "epoch": 1.3852564536892744, "grad_norm": 7.247776985168457, "learning_rate": 7.303853788760353e-05, "loss": 0.5828, "step": 20445 }, { "epoch": 1.3853242089572464, "grad_norm": 6.5299153327941895, "learning_rate": 7.303716886850573e-05, "loss": 0.599, "step": 20446 }, { "epoch": 1.3853919642252186, "grad_norm": 6.256359577178955, "learning_rate": 7.30357998494079e-05, "loss": 0.6412, "step": 20447 }, { "epoch": 1.3854597194931906, "grad_norm": 8.702409744262695, "learning_rate": 7.303443083031009e-05, "loss": 0.7323, "step": 20448 }, { "epoch": 1.3855274747611626, "grad_norm": 5.545639514923096, "learning_rate": 7.303306181121227e-05, "loss": 0.6737, "step": 20449 }, { "epoch": 1.3855952300291348, "grad_norm": 6.769201755523682, "learning_rate": 7.303169279211446e-05, "loss": 0.7446, "step": 20450 }, { "epoch": 1.385662985297107, "grad_norm": 5.7761101722717285, "learning_rate": 7.303032377301664e-05, "loss": 0.6347, "step": 20451 }, { "epoch": 1.385730740565079, "grad_norm": 5.15712308883667, "learning_rate": 7.302895475391882e-05, "loss": 0.676, "step": 20452 }, { "epoch": 1.385798495833051, "grad_norm": 4.643533706665039, "learning_rate": 7.3027585734821e-05, "loss": 0.4227, "step": 20453 }, { "epoch": 1.3858662511010231, "grad_norm": 4.721457481384277, "learning_rate": 7.302621671572318e-05, "loss": 0.4891, "step": 20454 }, { "epoch": 1.385934006368995, "grad_norm": 6.479300022125244, "learning_rate": 7.302484769662538e-05, "loss": 0.7481, "step": 20455 }, { "epoch": 1.3860017616369673, "grad_norm": 5.578685283660889, "learning_rate": 7.302347867752756e-05, "loss": 0.7035, "step": 20456 }, { "epoch": 1.3860695169049393, "grad_norm": 5.744590759277344, "learning_rate": 7.302210965842974e-05, "loss": 0.7356, "step": 20457 }, { "epoch": 1.3861372721729115, "grad_norm": 4.564887046813965, "learning_rate": 7.302074063933192e-05, "loss": 0.6506, "step": 20458 }, { "epoch": 1.3862050274408835, "grad_norm": 4.902970314025879, "learning_rate": 7.30193716202341e-05, "loss": 0.5027, "step": 20459 }, { "epoch": 1.3862727827088557, "grad_norm": 5.609049320220947, "learning_rate": 7.301800260113629e-05, "loss": 0.6258, "step": 20460 }, { "epoch": 1.3863405379768277, "grad_norm": 4.349385738372803, "learning_rate": 7.301663358203847e-05, "loss": 0.5825, "step": 20461 }, { "epoch": 1.3864082932447999, "grad_norm": 8.04593563079834, "learning_rate": 7.301526456294065e-05, "loss": 0.7219, "step": 20462 }, { "epoch": 1.3864760485127718, "grad_norm": 9.800840377807617, "learning_rate": 7.301389554384283e-05, "loss": 0.7666, "step": 20463 }, { "epoch": 1.3865438037807438, "grad_norm": 7.539933204650879, "learning_rate": 7.301252652474503e-05, "loss": 0.8464, "step": 20464 }, { "epoch": 1.386611559048716, "grad_norm": 5.487764835357666, "learning_rate": 7.30111575056472e-05, "loss": 0.7567, "step": 20465 }, { "epoch": 1.3866793143166882, "grad_norm": 5.666287899017334, "learning_rate": 7.300978848654939e-05, "loss": 0.7712, "step": 20466 }, { "epoch": 1.3867470695846602, "grad_norm": 6.722214698791504, "learning_rate": 7.300841946745157e-05, "loss": 0.692, "step": 20467 }, { "epoch": 1.3868148248526322, "grad_norm": 4.737244606018066, "learning_rate": 7.300705044835375e-05, "loss": 0.5181, "step": 20468 }, { "epoch": 1.3868825801206044, "grad_norm": 6.382871150970459, "learning_rate": 7.300568142925594e-05, "loss": 0.7603, "step": 20469 }, { "epoch": 1.3869503353885766, "grad_norm": 5.340418815612793, "learning_rate": 7.300431241015812e-05, "loss": 0.7692, "step": 20470 }, { "epoch": 1.3870180906565486, "grad_norm": 4.241884708404541, "learning_rate": 7.30029433910603e-05, "loss": 0.6452, "step": 20471 }, { "epoch": 1.3870858459245206, "grad_norm": 4.678450107574463, "learning_rate": 7.30015743719625e-05, "loss": 0.6135, "step": 20472 }, { "epoch": 1.3871536011924928, "grad_norm": 5.767404079437256, "learning_rate": 7.300020535286468e-05, "loss": 0.7096, "step": 20473 }, { "epoch": 1.3872213564604647, "grad_norm": 5.82505464553833, "learning_rate": 7.299883633376686e-05, "loss": 0.6379, "step": 20474 }, { "epoch": 1.387289111728437, "grad_norm": 4.2887468338012695, "learning_rate": 7.299746731466905e-05, "loss": 0.598, "step": 20475 }, { "epoch": 1.387356866996409, "grad_norm": 4.690787315368652, "learning_rate": 7.299609829557123e-05, "loss": 0.7355, "step": 20476 }, { "epoch": 1.3874246222643811, "grad_norm": 5.67448616027832, "learning_rate": 7.299472927647341e-05, "loss": 0.7876, "step": 20477 }, { "epoch": 1.387492377532353, "grad_norm": 5.224425792694092, "learning_rate": 7.29933602573756e-05, "loss": 0.6225, "step": 20478 }, { "epoch": 1.3875601328003253, "grad_norm": 6.083443641662598, "learning_rate": 7.299199123827779e-05, "loss": 0.7589, "step": 20479 }, { "epoch": 1.3876278880682973, "grad_norm": 8.024711608886719, "learning_rate": 7.299062221917997e-05, "loss": 0.8642, "step": 20480 }, { "epoch": 1.3876956433362695, "grad_norm": 6.195340633392334, "learning_rate": 7.298925320008215e-05, "loss": 0.7475, "step": 20481 }, { "epoch": 1.3877633986042415, "grad_norm": 7.739239692687988, "learning_rate": 7.298788418098433e-05, "loss": 0.6524, "step": 20482 }, { "epoch": 1.3878311538722135, "grad_norm": 5.241196632385254, "learning_rate": 7.298651516188652e-05, "loss": 0.7318, "step": 20483 }, { "epoch": 1.3878989091401857, "grad_norm": 8.535255432128906, "learning_rate": 7.29851461427887e-05, "loss": 0.7554, "step": 20484 }, { "epoch": 1.3879666644081579, "grad_norm": 8.63418960571289, "learning_rate": 7.298377712369088e-05, "loss": 0.6363, "step": 20485 }, { "epoch": 1.3880344196761298, "grad_norm": 6.98511266708374, "learning_rate": 7.298240810459306e-05, "loss": 0.7992, "step": 20486 }, { "epoch": 1.3881021749441018, "grad_norm": 6.031458377838135, "learning_rate": 7.298103908549526e-05, "loss": 0.644, "step": 20487 }, { "epoch": 1.388169930212074, "grad_norm": 5.275487899780273, "learning_rate": 7.297967006639744e-05, "loss": 0.6453, "step": 20488 }, { "epoch": 1.388237685480046, "grad_norm": 5.963741779327393, "learning_rate": 7.297830104729962e-05, "loss": 0.8473, "step": 20489 }, { "epoch": 1.3883054407480182, "grad_norm": 6.580963611602783, "learning_rate": 7.29769320282018e-05, "loss": 0.727, "step": 20490 }, { "epoch": 1.3883731960159902, "grad_norm": 6.52919340133667, "learning_rate": 7.297556300910398e-05, "loss": 0.683, "step": 20491 }, { "epoch": 1.3884409512839624, "grad_norm": 6.411701679229736, "learning_rate": 7.297419399000617e-05, "loss": 0.7488, "step": 20492 }, { "epoch": 1.3885087065519344, "grad_norm": 6.478389739990234, "learning_rate": 7.297282497090835e-05, "loss": 0.7399, "step": 20493 }, { "epoch": 1.3885764618199066, "grad_norm": 4.216143608093262, "learning_rate": 7.297145595181053e-05, "loss": 0.583, "step": 20494 }, { "epoch": 1.3886442170878786, "grad_norm": 8.471415519714355, "learning_rate": 7.297008693271271e-05, "loss": 0.8536, "step": 20495 }, { "epoch": 1.3887119723558508, "grad_norm": 5.743775844573975, "learning_rate": 7.29687179136149e-05, "loss": 0.784, "step": 20496 }, { "epoch": 1.3887797276238227, "grad_norm": 6.4678730964660645, "learning_rate": 7.296734889451709e-05, "loss": 0.6675, "step": 20497 }, { "epoch": 1.3888474828917947, "grad_norm": 10.042323112487793, "learning_rate": 7.296597987541927e-05, "loss": 0.6322, "step": 20498 }, { "epoch": 1.388915238159767, "grad_norm": 5.827760219573975, "learning_rate": 7.296461085632145e-05, "loss": 0.8622, "step": 20499 }, { "epoch": 1.3889829934277391, "grad_norm": 4.488856792449951, "learning_rate": 7.296324183722363e-05, "loss": 0.5798, "step": 20500 }, { "epoch": 1.389050748695711, "grad_norm": 7.7415080070495605, "learning_rate": 7.296187281812582e-05, "loss": 0.6862, "step": 20501 }, { "epoch": 1.389118503963683, "grad_norm": 5.894077777862549, "learning_rate": 7.2960503799028e-05, "loss": 0.6005, "step": 20502 }, { "epoch": 1.3891862592316553, "grad_norm": 4.4778218269348145, "learning_rate": 7.295913477993018e-05, "loss": 0.6643, "step": 20503 }, { "epoch": 1.3892540144996273, "grad_norm": 5.879588603973389, "learning_rate": 7.295776576083236e-05, "loss": 0.7263, "step": 20504 }, { "epoch": 1.3893217697675995, "grad_norm": 5.82443904876709, "learning_rate": 7.295639674173456e-05, "loss": 0.8286, "step": 20505 }, { "epoch": 1.3893895250355714, "grad_norm": 4.2752556800842285, "learning_rate": 7.295502772263674e-05, "loss": 0.6427, "step": 20506 }, { "epoch": 1.3894572803035437, "grad_norm": 6.227874279022217, "learning_rate": 7.295365870353892e-05, "loss": 0.7474, "step": 20507 }, { "epoch": 1.3895250355715156, "grad_norm": 7.3680596351623535, "learning_rate": 7.29522896844411e-05, "loss": 0.7991, "step": 20508 }, { "epoch": 1.3895927908394878, "grad_norm": 6.60573673248291, "learning_rate": 7.295092066534328e-05, "loss": 0.8055, "step": 20509 }, { "epoch": 1.3896605461074598, "grad_norm": 5.480033874511719, "learning_rate": 7.294955164624547e-05, "loss": 0.674, "step": 20510 }, { "epoch": 1.389728301375432, "grad_norm": 6.01137113571167, "learning_rate": 7.294818262714765e-05, "loss": 0.5369, "step": 20511 }, { "epoch": 1.389796056643404, "grad_norm": 5.227288722991943, "learning_rate": 7.294681360804983e-05, "loss": 0.6005, "step": 20512 }, { "epoch": 1.389863811911376, "grad_norm": 5.338500499725342, "learning_rate": 7.294544458895201e-05, "loss": 0.605, "step": 20513 }, { "epoch": 1.3899315671793482, "grad_norm": 6.283463954925537, "learning_rate": 7.294407556985419e-05, "loss": 0.7765, "step": 20514 }, { "epoch": 1.3899993224473204, "grad_norm": 7.272385120391846, "learning_rate": 7.294270655075639e-05, "loss": 0.8038, "step": 20515 }, { "epoch": 1.3900670777152924, "grad_norm": 5.496292591094971, "learning_rate": 7.294133753165857e-05, "loss": 0.9752, "step": 20516 }, { "epoch": 1.3901348329832643, "grad_norm": 4.721925735473633, "learning_rate": 7.293996851256075e-05, "loss": 0.6508, "step": 20517 }, { "epoch": 1.3902025882512365, "grad_norm": 7.222270965576172, "learning_rate": 7.293859949346293e-05, "loss": 0.7113, "step": 20518 }, { "epoch": 1.3902703435192088, "grad_norm": 5.155622959136963, "learning_rate": 7.293723047436512e-05, "loss": 0.7241, "step": 20519 }, { "epoch": 1.3903380987871807, "grad_norm": 6.53490686416626, "learning_rate": 7.29358614552673e-05, "loss": 0.7656, "step": 20520 }, { "epoch": 1.3904058540551527, "grad_norm": 5.248679161071777, "learning_rate": 7.293449243616948e-05, "loss": 0.4829, "step": 20521 }, { "epoch": 1.390473609323125, "grad_norm": 4.006702899932861, "learning_rate": 7.293312341707168e-05, "loss": 0.6541, "step": 20522 }, { "epoch": 1.390541364591097, "grad_norm": 5.258564472198486, "learning_rate": 7.293175439797386e-05, "loss": 0.6486, "step": 20523 }, { "epoch": 1.390609119859069, "grad_norm": 7.182815074920654, "learning_rate": 7.293038537887604e-05, "loss": 0.6793, "step": 20524 }, { "epoch": 1.390676875127041, "grad_norm": 5.871503829956055, "learning_rate": 7.292901635977823e-05, "loss": 0.5429, "step": 20525 }, { "epoch": 1.3907446303950133, "grad_norm": 5.519516468048096, "learning_rate": 7.292764734068041e-05, "loss": 0.5606, "step": 20526 }, { "epoch": 1.3908123856629853, "grad_norm": 5.83746862411499, "learning_rate": 7.292627832158259e-05, "loss": 0.5923, "step": 20527 }, { "epoch": 1.3908801409309575, "grad_norm": 5.935216426849365, "learning_rate": 7.292490930248478e-05, "loss": 0.7961, "step": 20528 }, { "epoch": 1.3909478961989294, "grad_norm": 5.7030744552612305, "learning_rate": 7.292354028338696e-05, "loss": 0.5355, "step": 20529 }, { "epoch": 1.3910156514669016, "grad_norm": 7.065756797790527, "learning_rate": 7.292217126428915e-05, "loss": 0.5951, "step": 20530 }, { "epoch": 1.3910834067348736, "grad_norm": 6.235055446624756, "learning_rate": 7.292080224519133e-05, "loss": 0.5841, "step": 20531 }, { "epoch": 1.3911511620028456, "grad_norm": 4.4500555992126465, "learning_rate": 7.29194332260935e-05, "loss": 0.5591, "step": 20532 }, { "epoch": 1.3912189172708178, "grad_norm": 5.394603729248047, "learning_rate": 7.29180642069957e-05, "loss": 0.6246, "step": 20533 }, { "epoch": 1.39128667253879, "grad_norm": 5.33821964263916, "learning_rate": 7.291669518789788e-05, "loss": 0.6047, "step": 20534 }, { "epoch": 1.391354427806762, "grad_norm": 7.6039605140686035, "learning_rate": 7.291532616880006e-05, "loss": 0.614, "step": 20535 }, { "epoch": 1.391422183074734, "grad_norm": 5.971287727355957, "learning_rate": 7.291395714970224e-05, "loss": 0.7888, "step": 20536 }, { "epoch": 1.3914899383427062, "grad_norm": 6.0131731033325195, "learning_rate": 7.291258813060442e-05, "loss": 0.6824, "step": 20537 }, { "epoch": 1.3915576936106782, "grad_norm": 4.6639723777771, "learning_rate": 7.291121911150662e-05, "loss": 0.6474, "step": 20538 }, { "epoch": 1.3916254488786504, "grad_norm": 5.323382377624512, "learning_rate": 7.29098500924088e-05, "loss": 0.7566, "step": 20539 }, { "epoch": 1.3916932041466223, "grad_norm": 6.484259128570557, "learning_rate": 7.290848107331098e-05, "loss": 0.8543, "step": 20540 }, { "epoch": 1.3917609594145945, "grad_norm": 6.541477680206299, "learning_rate": 7.290711205421316e-05, "loss": 0.7319, "step": 20541 }, { "epoch": 1.3918287146825665, "grad_norm": 4.985487461090088, "learning_rate": 7.290574303511535e-05, "loss": 0.513, "step": 20542 }, { "epoch": 1.3918964699505387, "grad_norm": 4.819558143615723, "learning_rate": 7.290437401601753e-05, "loss": 0.5251, "step": 20543 }, { "epoch": 1.3919642252185107, "grad_norm": 5.807501316070557, "learning_rate": 7.290300499691971e-05, "loss": 0.5196, "step": 20544 }, { "epoch": 1.392031980486483, "grad_norm": 6.458996772766113, "learning_rate": 7.290163597782189e-05, "loss": 0.6449, "step": 20545 }, { "epoch": 1.392099735754455, "grad_norm": 5.673578262329102, "learning_rate": 7.290026695872407e-05, "loss": 0.726, "step": 20546 }, { "epoch": 1.3921674910224269, "grad_norm": 6.787565231323242, "learning_rate": 7.289889793962627e-05, "loss": 0.7374, "step": 20547 }, { "epoch": 1.392235246290399, "grad_norm": 5.997424125671387, "learning_rate": 7.289752892052845e-05, "loss": 0.4837, "step": 20548 }, { "epoch": 1.3923030015583713, "grad_norm": 7.189516544342041, "learning_rate": 7.289615990143063e-05, "loss": 0.6836, "step": 20549 }, { "epoch": 1.3923707568263433, "grad_norm": 6.958525657653809, "learning_rate": 7.28947908823328e-05, "loss": 0.4074, "step": 20550 }, { "epoch": 1.3924385120943152, "grad_norm": 5.8117828369140625, "learning_rate": 7.2893421863235e-05, "loss": 0.8071, "step": 20551 }, { "epoch": 1.3925062673622874, "grad_norm": 5.955989360809326, "learning_rate": 7.289205284413718e-05, "loss": 0.7881, "step": 20552 }, { "epoch": 1.3925740226302594, "grad_norm": 6.029367446899414, "learning_rate": 7.289068382503936e-05, "loss": 0.6528, "step": 20553 }, { "epoch": 1.3926417778982316, "grad_norm": 5.142862796783447, "learning_rate": 7.288931480594154e-05, "loss": 0.563, "step": 20554 }, { "epoch": 1.3927095331662036, "grad_norm": 5.742162227630615, "learning_rate": 7.288794578684372e-05, "loss": 0.5879, "step": 20555 }, { "epoch": 1.3927772884341758, "grad_norm": 4.98937463760376, "learning_rate": 7.288657676774592e-05, "loss": 0.6409, "step": 20556 }, { "epoch": 1.3928450437021478, "grad_norm": 6.341249465942383, "learning_rate": 7.28852077486481e-05, "loss": 0.6196, "step": 20557 }, { "epoch": 1.39291279897012, "grad_norm": 4.5655364990234375, "learning_rate": 7.288383872955028e-05, "loss": 0.4841, "step": 20558 }, { "epoch": 1.392980554238092, "grad_norm": 5.272241115570068, "learning_rate": 7.288246971045246e-05, "loss": 0.6041, "step": 20559 }, { "epoch": 1.3930483095060642, "grad_norm": 4.8256402015686035, "learning_rate": 7.288110069135464e-05, "loss": 0.6373, "step": 20560 }, { "epoch": 1.3931160647740362, "grad_norm": 5.518412113189697, "learning_rate": 7.287973167225683e-05, "loss": 0.7049, "step": 20561 }, { "epoch": 1.3931838200420081, "grad_norm": 5.353679180145264, "learning_rate": 7.287836265315901e-05, "loss": 0.9037, "step": 20562 }, { "epoch": 1.3932515753099803, "grad_norm": 9.145445823669434, "learning_rate": 7.287699363406119e-05, "loss": 0.713, "step": 20563 }, { "epoch": 1.3933193305779525, "grad_norm": 4.824127674102783, "learning_rate": 7.287562461496337e-05, "loss": 0.6073, "step": 20564 }, { "epoch": 1.3933870858459245, "grad_norm": 9.752026557922363, "learning_rate": 7.287425559586557e-05, "loss": 0.6449, "step": 20565 }, { "epoch": 1.3934548411138965, "grad_norm": 8.008760452270508, "learning_rate": 7.287288657676775e-05, "loss": 0.7964, "step": 20566 }, { "epoch": 1.3935225963818687, "grad_norm": 4.878408908843994, "learning_rate": 7.287151755766993e-05, "loss": 0.7633, "step": 20567 }, { "epoch": 1.393590351649841, "grad_norm": 6.4499006271362305, "learning_rate": 7.287014853857212e-05, "loss": 0.7495, "step": 20568 }, { "epoch": 1.3936581069178129, "grad_norm": 5.055918216705322, "learning_rate": 7.28687795194743e-05, "loss": 0.8584, "step": 20569 }, { "epoch": 1.3937258621857849, "grad_norm": 5.775984764099121, "learning_rate": 7.286741050037648e-05, "loss": 0.6903, "step": 20570 }, { "epoch": 1.393793617453757, "grad_norm": 4.7151923179626465, "learning_rate": 7.286604148127867e-05, "loss": 0.5483, "step": 20571 }, { "epoch": 1.393861372721729, "grad_norm": 8.757600784301758, "learning_rate": 7.286467246218086e-05, "loss": 0.7061, "step": 20572 }, { "epoch": 1.3939291279897013, "grad_norm": 5.78377628326416, "learning_rate": 7.286330344308304e-05, "loss": 0.6568, "step": 20573 }, { "epoch": 1.3939968832576732, "grad_norm": 5.809396743774414, "learning_rate": 7.286193442398523e-05, "loss": 0.5852, "step": 20574 }, { "epoch": 1.3940646385256454, "grad_norm": 7.560459613800049, "learning_rate": 7.286056540488741e-05, "loss": 0.7438, "step": 20575 }, { "epoch": 1.3941323937936174, "grad_norm": 6.712879180908203, "learning_rate": 7.285919638578959e-05, "loss": 0.6835, "step": 20576 }, { "epoch": 1.3942001490615894, "grad_norm": 6.142430782318115, "learning_rate": 7.285782736669177e-05, "loss": 0.9084, "step": 20577 }, { "epoch": 1.3942679043295616, "grad_norm": 5.716210842132568, "learning_rate": 7.285645834759395e-05, "loss": 0.6184, "step": 20578 }, { "epoch": 1.3943356595975338, "grad_norm": 4.671881198883057, "learning_rate": 7.285508932849614e-05, "loss": 0.6401, "step": 20579 }, { "epoch": 1.3944034148655058, "grad_norm": 6.182433128356934, "learning_rate": 7.285372030939832e-05, "loss": 0.6885, "step": 20580 }, { "epoch": 1.3944711701334778, "grad_norm": 5.057774543762207, "learning_rate": 7.28523512903005e-05, "loss": 0.7348, "step": 20581 }, { "epoch": 1.39453892540145, "grad_norm": 5.334207057952881, "learning_rate": 7.285098227120269e-05, "loss": 0.6031, "step": 20582 }, { "epoch": 1.3946066806694222, "grad_norm": 4.211550235748291, "learning_rate": 7.284961325210488e-05, "loss": 0.5129, "step": 20583 }, { "epoch": 1.3946744359373942, "grad_norm": 6.037423610687256, "learning_rate": 7.284824423300706e-05, "loss": 0.6945, "step": 20584 }, { "epoch": 1.3947421912053661, "grad_norm": 7.189939975738525, "learning_rate": 7.284687521390924e-05, "loss": 0.6484, "step": 20585 }, { "epoch": 1.3948099464733383, "grad_norm": 5.868744373321533, "learning_rate": 7.284550619481142e-05, "loss": 0.853, "step": 20586 }, { "epoch": 1.3948777017413103, "grad_norm": 3.7997655868530273, "learning_rate": 7.28441371757136e-05, "loss": 0.6024, "step": 20587 }, { "epoch": 1.3949454570092825, "grad_norm": 5.858749866485596, "learning_rate": 7.28427681566158e-05, "loss": 0.6834, "step": 20588 }, { "epoch": 1.3950132122772545, "grad_norm": 6.8775858879089355, "learning_rate": 7.284139913751798e-05, "loss": 0.7724, "step": 20589 }, { "epoch": 1.3950809675452267, "grad_norm": 7.414551258087158, "learning_rate": 7.284003011842016e-05, "loss": 0.7603, "step": 20590 }, { "epoch": 1.3951487228131987, "grad_norm": 6.198145389556885, "learning_rate": 7.283866109932234e-05, "loss": 0.7322, "step": 20591 }, { "epoch": 1.3952164780811709, "grad_norm": 5.116663455963135, "learning_rate": 7.283729208022452e-05, "loss": 0.6196, "step": 20592 }, { "epoch": 1.3952842333491429, "grad_norm": 6.59523344039917, "learning_rate": 7.283592306112671e-05, "loss": 0.8394, "step": 20593 }, { "epoch": 1.395351988617115, "grad_norm": 5.884121417999268, "learning_rate": 7.283455404202889e-05, "loss": 0.597, "step": 20594 }, { "epoch": 1.395419743885087, "grad_norm": 4.400575160980225, "learning_rate": 7.283318502293107e-05, "loss": 0.6725, "step": 20595 }, { "epoch": 1.395487499153059, "grad_norm": 7.258331298828125, "learning_rate": 7.283181600383325e-05, "loss": 0.7445, "step": 20596 }, { "epoch": 1.3955552544210312, "grad_norm": 5.085100173950195, "learning_rate": 7.283044698473544e-05, "loss": 0.6133, "step": 20597 }, { "epoch": 1.3956230096890034, "grad_norm": 5.1210761070251465, "learning_rate": 7.282907796563763e-05, "loss": 0.5331, "step": 20598 }, { "epoch": 1.3956907649569754, "grad_norm": 12.255176544189453, "learning_rate": 7.28277089465398e-05, "loss": 0.7894, "step": 20599 }, { "epoch": 1.3957585202249474, "grad_norm": 4.071352481842041, "learning_rate": 7.282633992744199e-05, "loss": 0.6638, "step": 20600 }, { "epoch": 1.3958262754929196, "grad_norm": 5.84407377243042, "learning_rate": 7.282497090834417e-05, "loss": 0.6766, "step": 20601 }, { "epoch": 1.3958940307608916, "grad_norm": 6.71152400970459, "learning_rate": 7.282360188924636e-05, "loss": 0.6006, "step": 20602 }, { "epoch": 1.3959617860288638, "grad_norm": 4.672503471374512, "learning_rate": 7.282223287014854e-05, "loss": 0.5435, "step": 20603 }, { "epoch": 1.3960295412968358, "grad_norm": 5.69974946975708, "learning_rate": 7.282086385105072e-05, "loss": 0.7814, "step": 20604 }, { "epoch": 1.396097296564808, "grad_norm": 6.436532974243164, "learning_rate": 7.28194948319529e-05, "loss": 0.6791, "step": 20605 }, { "epoch": 1.39616505183278, "grad_norm": 5.494413375854492, "learning_rate": 7.28181258128551e-05, "loss": 0.676, "step": 20606 }, { "epoch": 1.3962328071007521, "grad_norm": 5.317508697509766, "learning_rate": 7.281675679375728e-05, "loss": 0.7103, "step": 20607 }, { "epoch": 1.3963005623687241, "grad_norm": 5.30086088180542, "learning_rate": 7.281538777465946e-05, "loss": 0.6595, "step": 20608 }, { "epoch": 1.3963683176366963, "grad_norm": 6.089252948760986, "learning_rate": 7.281401875556164e-05, "loss": 0.7081, "step": 20609 }, { "epoch": 1.3964360729046683, "grad_norm": 6.77025032043457, "learning_rate": 7.281264973646382e-05, "loss": 0.5844, "step": 20610 }, { "epoch": 1.3965038281726403, "grad_norm": 4.529181003570557, "learning_rate": 7.281128071736601e-05, "loss": 0.566, "step": 20611 }, { "epoch": 1.3965715834406125, "grad_norm": 4.887881278991699, "learning_rate": 7.280991169826819e-05, "loss": 0.6927, "step": 20612 }, { "epoch": 1.3966393387085847, "grad_norm": 6.025885581970215, "learning_rate": 7.280854267917037e-05, "loss": 0.6701, "step": 20613 }, { "epoch": 1.3967070939765567, "grad_norm": 4.4143452644348145, "learning_rate": 7.280717366007256e-05, "loss": 0.6298, "step": 20614 }, { "epoch": 1.3967748492445287, "grad_norm": 7.4753241539001465, "learning_rate": 7.280580464097475e-05, "loss": 0.9062, "step": 20615 }, { "epoch": 1.3968426045125009, "grad_norm": 6.792645454406738, "learning_rate": 7.280443562187693e-05, "loss": 0.5929, "step": 20616 }, { "epoch": 1.396910359780473, "grad_norm": 7.330316543579102, "learning_rate": 7.280306660277912e-05, "loss": 0.6641, "step": 20617 }, { "epoch": 1.396978115048445, "grad_norm": 9.892037391662598, "learning_rate": 7.28016975836813e-05, "loss": 0.6347, "step": 20618 }, { "epoch": 1.397045870316417, "grad_norm": 5.788403034210205, "learning_rate": 7.280032856458348e-05, "loss": 0.7898, "step": 20619 }, { "epoch": 1.3971136255843892, "grad_norm": 5.299747467041016, "learning_rate": 7.279895954548567e-05, "loss": 0.8026, "step": 20620 }, { "epoch": 1.3971813808523612, "grad_norm": 10.044715881347656, "learning_rate": 7.279759052638785e-05, "loss": 0.707, "step": 20621 }, { "epoch": 1.3972491361203334, "grad_norm": 5.334182262420654, "learning_rate": 7.279622150729003e-05, "loss": 0.9063, "step": 20622 }, { "epoch": 1.3973168913883054, "grad_norm": 5.888604164123535, "learning_rate": 7.279485248819222e-05, "loss": 0.5369, "step": 20623 }, { "epoch": 1.3973846466562776, "grad_norm": 7.2684102058410645, "learning_rate": 7.27934834690944e-05, "loss": 0.7278, "step": 20624 }, { "epoch": 1.3974524019242496, "grad_norm": 4.9839887619018555, "learning_rate": 7.279211444999659e-05, "loss": 0.7578, "step": 20625 }, { "epoch": 1.3975201571922216, "grad_norm": 4.467967510223389, "learning_rate": 7.279074543089877e-05, "loss": 0.6547, "step": 20626 }, { "epoch": 1.3975879124601938, "grad_norm": 5.54287576675415, "learning_rate": 7.278937641180095e-05, "loss": 0.7799, "step": 20627 }, { "epoch": 1.397655667728166, "grad_norm": 8.063325881958008, "learning_rate": 7.278800739270313e-05, "loss": 0.814, "step": 20628 }, { "epoch": 1.397723422996138, "grad_norm": 4.2917070388793945, "learning_rate": 7.278663837360532e-05, "loss": 0.5726, "step": 20629 }, { "epoch": 1.39779117826411, "grad_norm": 5.576552867889404, "learning_rate": 7.27852693545075e-05, "loss": 0.7672, "step": 20630 }, { "epoch": 1.3978589335320821, "grad_norm": 5.606688499450684, "learning_rate": 7.278390033540968e-05, "loss": 0.6414, "step": 20631 }, { "epoch": 1.3979266888000543, "grad_norm": 7.5582427978515625, "learning_rate": 7.278253131631187e-05, "loss": 0.6393, "step": 20632 }, { "epoch": 1.3979944440680263, "grad_norm": 5.722403049468994, "learning_rate": 7.278116229721405e-05, "loss": 0.5252, "step": 20633 }, { "epoch": 1.3980621993359983, "grad_norm": 5.292598247528076, "learning_rate": 7.277979327811624e-05, "loss": 0.6473, "step": 20634 }, { "epoch": 1.3981299546039705, "grad_norm": 4.958985328674316, "learning_rate": 7.277842425901842e-05, "loss": 0.5718, "step": 20635 }, { "epoch": 1.3981977098719425, "grad_norm": 7.662604808807373, "learning_rate": 7.27770552399206e-05, "loss": 0.8776, "step": 20636 }, { "epoch": 1.3982654651399147, "grad_norm": 11.361144065856934, "learning_rate": 7.277568622082278e-05, "loss": 0.6405, "step": 20637 }, { "epoch": 1.3983332204078867, "grad_norm": 5.231286525726318, "learning_rate": 7.277431720172497e-05, "loss": 0.7526, "step": 20638 }, { "epoch": 1.3984009756758589, "grad_norm": 4.4123992919921875, "learning_rate": 7.277294818262715e-05, "loss": 0.6023, "step": 20639 }, { "epoch": 1.3984687309438308, "grad_norm": 6.353936195373535, "learning_rate": 7.277157916352934e-05, "loss": 0.6509, "step": 20640 }, { "epoch": 1.398536486211803, "grad_norm": 4.5772480964660645, "learning_rate": 7.277021014443152e-05, "loss": 0.7283, "step": 20641 }, { "epoch": 1.398604241479775, "grad_norm": 7.577533721923828, "learning_rate": 7.27688411253337e-05, "loss": 0.823, "step": 20642 }, { "epoch": 1.3986719967477472, "grad_norm": 5.701266765594482, "learning_rate": 7.276747210623589e-05, "loss": 0.7144, "step": 20643 }, { "epoch": 1.3987397520157192, "grad_norm": 6.972879886627197, "learning_rate": 7.276610308713807e-05, "loss": 0.6125, "step": 20644 }, { "epoch": 1.3988075072836912, "grad_norm": 4.810811519622803, "learning_rate": 7.276473406804025e-05, "loss": 0.5974, "step": 20645 }, { "epoch": 1.3988752625516634, "grad_norm": 5.5094804763793945, "learning_rate": 7.276336504894243e-05, "loss": 0.7645, "step": 20646 }, { "epoch": 1.3989430178196356, "grad_norm": 7.642696857452393, "learning_rate": 7.276199602984461e-05, "loss": 0.7194, "step": 20647 }, { "epoch": 1.3990107730876076, "grad_norm": 4.80438756942749, "learning_rate": 7.27606270107468e-05, "loss": 0.5491, "step": 20648 }, { "epoch": 1.3990785283555796, "grad_norm": 6.601073265075684, "learning_rate": 7.275925799164899e-05, "loss": 0.7409, "step": 20649 }, { "epoch": 1.3991462836235518, "grad_norm": 4.450863361358643, "learning_rate": 7.275788897255117e-05, "loss": 0.541, "step": 20650 }, { "epoch": 1.3992140388915237, "grad_norm": 5.762291431427002, "learning_rate": 7.275651995345335e-05, "loss": 0.6555, "step": 20651 }, { "epoch": 1.399281794159496, "grad_norm": 5.032901287078857, "learning_rate": 7.275515093435554e-05, "loss": 0.5356, "step": 20652 }, { "epoch": 1.399349549427468, "grad_norm": 8.5460844039917, "learning_rate": 7.275378191525772e-05, "loss": 0.6695, "step": 20653 }, { "epoch": 1.3994173046954401, "grad_norm": 7.89060115814209, "learning_rate": 7.27524128961599e-05, "loss": 0.5886, "step": 20654 }, { "epoch": 1.399485059963412, "grad_norm": 7.343736171722412, "learning_rate": 7.275104387706208e-05, "loss": 0.7772, "step": 20655 }, { "epoch": 1.3995528152313843, "grad_norm": 5.3778533935546875, "learning_rate": 7.274967485796426e-05, "loss": 0.6824, "step": 20656 }, { "epoch": 1.3996205704993563, "grad_norm": 5.561041831970215, "learning_rate": 7.274830583886646e-05, "loss": 0.8061, "step": 20657 }, { "epoch": 1.3996883257673285, "grad_norm": 7.154848575592041, "learning_rate": 7.274693681976864e-05, "loss": 0.7753, "step": 20658 }, { "epoch": 1.3997560810353005, "grad_norm": 5.542081356048584, "learning_rate": 7.274556780067082e-05, "loss": 0.5012, "step": 20659 }, { "epoch": 1.3998238363032725, "grad_norm": 5.336951732635498, "learning_rate": 7.274419878157301e-05, "loss": 0.7646, "step": 20660 }, { "epoch": 1.3998915915712447, "grad_norm": 4.772644996643066, "learning_rate": 7.274282976247519e-05, "loss": 0.6514, "step": 20661 }, { "epoch": 1.3999593468392169, "grad_norm": 8.897067070007324, "learning_rate": 7.274146074337737e-05, "loss": 0.6898, "step": 20662 }, { "epoch": 1.4000271021071888, "grad_norm": 4.805134296417236, "learning_rate": 7.274009172427956e-05, "loss": 0.5634, "step": 20663 }, { "epoch": 1.4000948573751608, "grad_norm": 6.334447860717773, "learning_rate": 7.273872270518174e-05, "loss": 0.7942, "step": 20664 }, { "epoch": 1.400162612643133, "grad_norm": 4.842981338500977, "learning_rate": 7.273735368608392e-05, "loss": 0.4608, "step": 20665 }, { "epoch": 1.4002303679111052, "grad_norm": 7.941520690917969, "learning_rate": 7.273598466698612e-05, "loss": 0.5447, "step": 20666 }, { "epoch": 1.4002981231790772, "grad_norm": 4.907886981964111, "learning_rate": 7.27346156478883e-05, "loss": 0.773, "step": 20667 }, { "epoch": 1.4003658784470492, "grad_norm": 5.279584884643555, "learning_rate": 7.273324662879048e-05, "loss": 0.592, "step": 20668 }, { "epoch": 1.4004336337150214, "grad_norm": 5.733725547790527, "learning_rate": 7.273187760969266e-05, "loss": 0.7766, "step": 20669 }, { "epoch": 1.4005013889829934, "grad_norm": 4.443740367889404, "learning_rate": 7.273050859059484e-05, "loss": 0.5764, "step": 20670 }, { "epoch": 1.4005691442509656, "grad_norm": 8.201933860778809, "learning_rate": 7.272913957149703e-05, "loss": 0.5513, "step": 20671 }, { "epoch": 1.4006368995189376, "grad_norm": 6.61494255065918, "learning_rate": 7.272777055239921e-05, "loss": 0.687, "step": 20672 }, { "epoch": 1.4007046547869098, "grad_norm": 8.42844009399414, "learning_rate": 7.27264015333014e-05, "loss": 0.7227, "step": 20673 }, { "epoch": 1.4007724100548817, "grad_norm": 5.402379989624023, "learning_rate": 7.272503251420358e-05, "loss": 0.7774, "step": 20674 }, { "epoch": 1.4008401653228537, "grad_norm": 4.207496643066406, "learning_rate": 7.272366349510577e-05, "loss": 0.5786, "step": 20675 }, { "epoch": 1.400907920590826, "grad_norm": 7.469076156616211, "learning_rate": 7.272229447600795e-05, "loss": 0.5001, "step": 20676 }, { "epoch": 1.4009756758587981, "grad_norm": 6.873068809509277, "learning_rate": 7.272092545691013e-05, "loss": 0.9978, "step": 20677 }, { "epoch": 1.40104343112677, "grad_norm": 4.88472318649292, "learning_rate": 7.271955643781231e-05, "loss": 0.6927, "step": 20678 }, { "epoch": 1.401111186394742, "grad_norm": 6.7449822425842285, "learning_rate": 7.271818741871449e-05, "loss": 0.6321, "step": 20679 }, { "epoch": 1.4011789416627143, "grad_norm": 5.478325366973877, "learning_rate": 7.271681839961668e-05, "loss": 0.6589, "step": 20680 }, { "epoch": 1.4012466969306865, "grad_norm": 4.490951061248779, "learning_rate": 7.271544938051886e-05, "loss": 0.6583, "step": 20681 }, { "epoch": 1.4013144521986585, "grad_norm": 7.423271179199219, "learning_rate": 7.271408036142104e-05, "loss": 0.7931, "step": 20682 }, { "epoch": 1.4013822074666304, "grad_norm": 6.176170349121094, "learning_rate": 7.271271134232323e-05, "loss": 0.7371, "step": 20683 }, { "epoch": 1.4014499627346027, "grad_norm": 4.758020877838135, "learning_rate": 7.271134232322542e-05, "loss": 0.7275, "step": 20684 }, { "epoch": 1.4015177180025746, "grad_norm": 5.996313571929932, "learning_rate": 7.27099733041276e-05, "loss": 0.5249, "step": 20685 }, { "epoch": 1.4015854732705468, "grad_norm": 7.03472375869751, "learning_rate": 7.270860428502978e-05, "loss": 0.7436, "step": 20686 }, { "epoch": 1.4016532285385188, "grad_norm": 8.36611270904541, "learning_rate": 7.270723526593196e-05, "loss": 0.6449, "step": 20687 }, { "epoch": 1.401720983806491, "grad_norm": 6.184720993041992, "learning_rate": 7.270586624683414e-05, "loss": 0.843, "step": 20688 }, { "epoch": 1.401788739074463, "grad_norm": 6.332955837249756, "learning_rate": 7.270449722773633e-05, "loss": 0.7246, "step": 20689 }, { "epoch": 1.4018564943424352, "grad_norm": 5.495899200439453, "learning_rate": 7.270312820863851e-05, "loss": 0.6088, "step": 20690 }, { "epoch": 1.4019242496104072, "grad_norm": 5.085668087005615, "learning_rate": 7.27017591895407e-05, "loss": 0.5534, "step": 20691 }, { "epoch": 1.4019920048783794, "grad_norm": 8.320247650146484, "learning_rate": 7.270039017044288e-05, "loss": 1.0155, "step": 20692 }, { "epoch": 1.4020597601463514, "grad_norm": 8.958895683288574, "learning_rate": 7.269902115134506e-05, "loss": 0.7746, "step": 20693 }, { "epoch": 1.4021275154143233, "grad_norm": 5.862815856933594, "learning_rate": 7.269765213224725e-05, "loss": 0.7058, "step": 20694 }, { "epoch": 1.4021952706822955, "grad_norm": 5.599630832672119, "learning_rate": 7.269628311314943e-05, "loss": 0.6391, "step": 20695 }, { "epoch": 1.4022630259502677, "grad_norm": 4.676006317138672, "learning_rate": 7.269491409405161e-05, "loss": 0.5458, "step": 20696 }, { "epoch": 1.4023307812182397, "grad_norm": 5.0131425857543945, "learning_rate": 7.269354507495379e-05, "loss": 0.6555, "step": 20697 }, { "epoch": 1.4023985364862117, "grad_norm": 4.756067752838135, "learning_rate": 7.269217605585598e-05, "loss": 0.6473, "step": 20698 }, { "epoch": 1.402466291754184, "grad_norm": 11.45476245880127, "learning_rate": 7.269080703675816e-05, "loss": 0.6377, "step": 20699 }, { "epoch": 1.402534047022156, "grad_norm": 5.899047374725342, "learning_rate": 7.268943801766035e-05, "loss": 0.5238, "step": 20700 }, { "epoch": 1.402601802290128, "grad_norm": 6.327764987945557, "learning_rate": 7.268806899856253e-05, "loss": 1.007, "step": 20701 }, { "epoch": 1.4026695575581, "grad_norm": 6.561734199523926, "learning_rate": 7.26866999794647e-05, "loss": 0.7032, "step": 20702 }, { "epoch": 1.4027373128260723, "grad_norm": 7.427700519561768, "learning_rate": 7.26853309603669e-05, "loss": 0.7453, "step": 20703 }, { "epoch": 1.4028050680940443, "grad_norm": 6.288816928863525, "learning_rate": 7.268396194126908e-05, "loss": 0.7208, "step": 20704 }, { "epoch": 1.4028728233620165, "grad_norm": 6.2427592277526855, "learning_rate": 7.268259292217126e-05, "loss": 0.6415, "step": 20705 }, { "epoch": 1.4029405786299884, "grad_norm": 7.817094802856445, "learning_rate": 7.268122390307345e-05, "loss": 0.5566, "step": 20706 }, { "epoch": 1.4030083338979606, "grad_norm": 7.789093017578125, "learning_rate": 7.267985488397563e-05, "loss": 0.6341, "step": 20707 }, { "epoch": 1.4030760891659326, "grad_norm": 6.131396293640137, "learning_rate": 7.267848586487782e-05, "loss": 0.6692, "step": 20708 }, { "epoch": 1.4031438444339046, "grad_norm": 7.360398292541504, "learning_rate": 7.267711684578001e-05, "loss": 0.6011, "step": 20709 }, { "epoch": 1.4032115997018768, "grad_norm": 7.449026107788086, "learning_rate": 7.267574782668219e-05, "loss": 0.9783, "step": 20710 }, { "epoch": 1.403279354969849, "grad_norm": 5.789677143096924, "learning_rate": 7.267437880758437e-05, "loss": 0.7376, "step": 20711 }, { "epoch": 1.403347110237821, "grad_norm": 6.20985746383667, "learning_rate": 7.267300978848656e-05, "loss": 0.7243, "step": 20712 }, { "epoch": 1.403414865505793, "grad_norm": 6.510644435882568, "learning_rate": 7.267164076938874e-05, "loss": 0.6755, "step": 20713 }, { "epoch": 1.4034826207737652, "grad_norm": 6.063263893127441, "learning_rate": 7.267027175029092e-05, "loss": 0.8957, "step": 20714 }, { "epoch": 1.4035503760417374, "grad_norm": 4.613265037536621, "learning_rate": 7.26689027311931e-05, "loss": 0.8698, "step": 20715 }, { "epoch": 1.4036181313097094, "grad_norm": 8.515042304992676, "learning_rate": 7.26675337120953e-05, "loss": 0.7156, "step": 20716 }, { "epoch": 1.4036858865776813, "grad_norm": 5.077844142913818, "learning_rate": 7.266616469299748e-05, "loss": 0.6874, "step": 20717 }, { "epoch": 1.4037536418456535, "grad_norm": 8.580635070800781, "learning_rate": 7.266479567389966e-05, "loss": 1.1178, "step": 20718 }, { "epoch": 1.4038213971136255, "grad_norm": 6.891363143920898, "learning_rate": 7.266342665480184e-05, "loss": 0.7484, "step": 20719 }, { "epoch": 1.4038891523815977, "grad_norm": 4.064321994781494, "learning_rate": 7.266205763570402e-05, "loss": 0.6188, "step": 20720 }, { "epoch": 1.4039569076495697, "grad_norm": 4.292640209197998, "learning_rate": 7.266068861660621e-05, "loss": 0.5479, "step": 20721 }, { "epoch": 1.404024662917542, "grad_norm": 6.199626445770264, "learning_rate": 7.26593195975084e-05, "loss": 0.6834, "step": 20722 }, { "epoch": 1.404092418185514, "grad_norm": 6.3640828132629395, "learning_rate": 7.265795057841057e-05, "loss": 0.7109, "step": 20723 }, { "epoch": 1.4041601734534859, "grad_norm": 5.690673351287842, "learning_rate": 7.265658155931275e-05, "loss": 0.6661, "step": 20724 }, { "epoch": 1.404227928721458, "grad_norm": 6.910696029663086, "learning_rate": 7.265521254021494e-05, "loss": 0.8694, "step": 20725 }, { "epoch": 1.4042956839894303, "grad_norm": 6.176210403442383, "learning_rate": 7.265384352111713e-05, "loss": 0.5864, "step": 20726 }, { "epoch": 1.4043634392574023, "grad_norm": 7.107362270355225, "learning_rate": 7.265247450201931e-05, "loss": 0.65, "step": 20727 }, { "epoch": 1.4044311945253742, "grad_norm": 6.563892841339111, "learning_rate": 7.265110548292149e-05, "loss": 0.757, "step": 20728 }, { "epoch": 1.4044989497933464, "grad_norm": 6.276259422302246, "learning_rate": 7.264973646382367e-05, "loss": 0.6604, "step": 20729 }, { "epoch": 1.4045667050613186, "grad_norm": 6.565476417541504, "learning_rate": 7.264836744472586e-05, "loss": 0.6098, "step": 20730 }, { "epoch": 1.4046344603292906, "grad_norm": 5.932304382324219, "learning_rate": 7.264699842562804e-05, "loss": 0.7436, "step": 20731 }, { "epoch": 1.4047022155972626, "grad_norm": 5.6540398597717285, "learning_rate": 7.264562940653022e-05, "loss": 0.7828, "step": 20732 }, { "epoch": 1.4047699708652348, "grad_norm": 6.34929084777832, "learning_rate": 7.26442603874324e-05, "loss": 0.6085, "step": 20733 }, { "epoch": 1.4048377261332068, "grad_norm": 6.200440406799316, "learning_rate": 7.264289136833459e-05, "loss": 0.6536, "step": 20734 }, { "epoch": 1.404905481401179, "grad_norm": 4.179171085357666, "learning_rate": 7.264152234923678e-05, "loss": 0.4195, "step": 20735 }, { "epoch": 1.404973236669151, "grad_norm": 5.915186882019043, "learning_rate": 7.264015333013896e-05, "loss": 0.7556, "step": 20736 }, { "epoch": 1.4050409919371232, "grad_norm": 5.322765350341797, "learning_rate": 7.263878431104114e-05, "loss": 0.5555, "step": 20737 }, { "epoch": 1.4051087472050952, "grad_norm": 5.9640350341796875, "learning_rate": 7.263741529194332e-05, "loss": 0.7468, "step": 20738 }, { "epoch": 1.4051765024730674, "grad_norm": 3.8958938121795654, "learning_rate": 7.263604627284551e-05, "loss": 0.5828, "step": 20739 }, { "epoch": 1.4052442577410393, "grad_norm": 5.590107440948486, "learning_rate": 7.26346772537477e-05, "loss": 0.778, "step": 20740 }, { "epoch": 1.4053120130090115, "grad_norm": 8.030940055847168, "learning_rate": 7.263330823464987e-05, "loss": 0.7436, "step": 20741 }, { "epoch": 1.4053797682769835, "grad_norm": 5.300573825836182, "learning_rate": 7.263193921555206e-05, "loss": 0.707, "step": 20742 }, { "epoch": 1.4054475235449555, "grad_norm": 5.5048041343688965, "learning_rate": 7.263057019645424e-05, "loss": 0.9087, "step": 20743 }, { "epoch": 1.4055152788129277, "grad_norm": 5.326664447784424, "learning_rate": 7.262920117735643e-05, "loss": 0.5474, "step": 20744 }, { "epoch": 1.4055830340809, "grad_norm": 5.541895866394043, "learning_rate": 7.262783215825861e-05, "loss": 0.6199, "step": 20745 }, { "epoch": 1.4056507893488719, "grad_norm": 5.6141037940979, "learning_rate": 7.262646313916079e-05, "loss": 0.6411, "step": 20746 }, { "epoch": 1.4057185446168439, "grad_norm": 7.123202800750732, "learning_rate": 7.262509412006297e-05, "loss": 0.7467, "step": 20747 }, { "epoch": 1.405786299884816, "grad_norm": 5.246259689331055, "learning_rate": 7.262372510096515e-05, "loss": 0.6569, "step": 20748 }, { "epoch": 1.405854055152788, "grad_norm": 6.392591953277588, "learning_rate": 7.262235608186734e-05, "loss": 0.5989, "step": 20749 }, { "epoch": 1.4059218104207603, "grad_norm": 3.970823049545288, "learning_rate": 7.262098706276952e-05, "loss": 0.573, "step": 20750 }, { "epoch": 1.4059895656887322, "grad_norm": 5.833157539367676, "learning_rate": 7.26196180436717e-05, "loss": 0.6142, "step": 20751 }, { "epoch": 1.4060573209567044, "grad_norm": 5.976133346557617, "learning_rate": 7.261824902457389e-05, "loss": 0.7199, "step": 20752 }, { "epoch": 1.4061250762246764, "grad_norm": 5.143008708953857, "learning_rate": 7.261688000547608e-05, "loss": 0.6395, "step": 20753 }, { "epoch": 1.4061928314926486, "grad_norm": 5.932631969451904, "learning_rate": 7.261551098637826e-05, "loss": 0.7041, "step": 20754 }, { "epoch": 1.4062605867606206, "grad_norm": 5.494353771209717, "learning_rate": 7.261414196728044e-05, "loss": 0.6503, "step": 20755 }, { "epoch": 1.4063283420285928, "grad_norm": 4.903101444244385, "learning_rate": 7.261277294818263e-05, "loss": 0.5115, "step": 20756 }, { "epoch": 1.4063960972965648, "grad_norm": 7.74245548248291, "learning_rate": 7.261140392908481e-05, "loss": 0.7022, "step": 20757 }, { "epoch": 1.4064638525645368, "grad_norm": 5.397243022918701, "learning_rate": 7.2610034909987e-05, "loss": 0.5015, "step": 20758 }, { "epoch": 1.406531607832509, "grad_norm": 4.7754364013671875, "learning_rate": 7.260866589088919e-05, "loss": 0.5644, "step": 20759 }, { "epoch": 1.4065993631004812, "grad_norm": 6.012414932250977, "learning_rate": 7.260729687179137e-05, "loss": 0.5937, "step": 20760 }, { "epoch": 1.4066671183684532, "grad_norm": 8.805522918701172, "learning_rate": 7.260592785269355e-05, "loss": 0.5494, "step": 20761 }, { "epoch": 1.4067348736364251, "grad_norm": 6.658268451690674, "learning_rate": 7.260455883359574e-05, "loss": 0.8236, "step": 20762 }, { "epoch": 1.4068026289043973, "grad_norm": 7.112606525421143, "learning_rate": 7.260318981449792e-05, "loss": 0.6601, "step": 20763 }, { "epoch": 1.4068703841723695, "grad_norm": 4.9433746337890625, "learning_rate": 7.26018207954001e-05, "loss": 0.4066, "step": 20764 }, { "epoch": 1.4069381394403415, "grad_norm": 5.913544654846191, "learning_rate": 7.260045177630228e-05, "loss": 0.5719, "step": 20765 }, { "epoch": 1.4070058947083135, "grad_norm": 9.182119369506836, "learning_rate": 7.259908275720446e-05, "loss": 0.8452, "step": 20766 }, { "epoch": 1.4070736499762857, "grad_norm": 6.72125244140625, "learning_rate": 7.259771373810666e-05, "loss": 0.7277, "step": 20767 }, { "epoch": 1.4071414052442577, "grad_norm": 5.728819847106934, "learning_rate": 7.259634471900884e-05, "loss": 0.604, "step": 20768 }, { "epoch": 1.4072091605122299, "grad_norm": 6.1261491775512695, "learning_rate": 7.259497569991102e-05, "loss": 0.5084, "step": 20769 }, { "epoch": 1.4072769157802019, "grad_norm": 5.870351314544678, "learning_rate": 7.25936066808132e-05, "loss": 0.6993, "step": 20770 }, { "epoch": 1.407344671048174, "grad_norm": 6.095668792724609, "learning_rate": 7.25922376617154e-05, "loss": 0.7236, "step": 20771 }, { "epoch": 1.407412426316146, "grad_norm": 5.3088202476501465, "learning_rate": 7.259086864261757e-05, "loss": 0.6115, "step": 20772 }, { "epoch": 1.407480181584118, "grad_norm": 5.254550457000732, "learning_rate": 7.258949962351975e-05, "loss": 0.7638, "step": 20773 }, { "epoch": 1.4075479368520902, "grad_norm": 4.712619781494141, "learning_rate": 7.258813060442193e-05, "loss": 0.5469, "step": 20774 }, { "epoch": 1.4076156921200624, "grad_norm": 6.004426002502441, "learning_rate": 7.258676158532411e-05, "loss": 0.8588, "step": 20775 }, { "epoch": 1.4076834473880344, "grad_norm": 5.934914588928223, "learning_rate": 7.258539256622631e-05, "loss": 0.8653, "step": 20776 }, { "epoch": 1.4077512026560064, "grad_norm": 6.452977657318115, "learning_rate": 7.258402354712849e-05, "loss": 0.6988, "step": 20777 }, { "epoch": 1.4078189579239786, "grad_norm": 4.802769660949707, "learning_rate": 7.258265452803067e-05, "loss": 0.6189, "step": 20778 }, { "epoch": 1.4078867131919508, "grad_norm": 5.104455471038818, "learning_rate": 7.258128550893285e-05, "loss": 0.5584, "step": 20779 }, { "epoch": 1.4079544684599228, "grad_norm": 4.873157024383545, "learning_rate": 7.257991648983503e-05, "loss": 0.5574, "step": 20780 }, { "epoch": 1.4080222237278948, "grad_norm": 4.609872341156006, "learning_rate": 7.257854747073722e-05, "loss": 0.5862, "step": 20781 }, { "epoch": 1.408089978995867, "grad_norm": 4.561013221740723, "learning_rate": 7.25771784516394e-05, "loss": 0.5956, "step": 20782 }, { "epoch": 1.408157734263839, "grad_norm": 6.679652214050293, "learning_rate": 7.257580943254158e-05, "loss": 0.8221, "step": 20783 }, { "epoch": 1.4082254895318111, "grad_norm": 4.643524646759033, "learning_rate": 7.257444041344376e-05, "loss": 0.5367, "step": 20784 }, { "epoch": 1.4082932447997831, "grad_norm": 5.216392517089844, "learning_rate": 7.257307139434596e-05, "loss": 0.643, "step": 20785 }, { "epoch": 1.4083610000677553, "grad_norm": 4.823494911193848, "learning_rate": 7.257170237524814e-05, "loss": 0.7847, "step": 20786 }, { "epoch": 1.4084287553357273, "grad_norm": 6.363922119140625, "learning_rate": 7.257033335615032e-05, "loss": 0.7955, "step": 20787 }, { "epoch": 1.4084965106036995, "grad_norm": 5.3204874992370605, "learning_rate": 7.25689643370525e-05, "loss": 0.6556, "step": 20788 }, { "epoch": 1.4085642658716715, "grad_norm": 4.501924514770508, "learning_rate": 7.256759531795468e-05, "loss": 0.6759, "step": 20789 }, { "epoch": 1.4086320211396437, "grad_norm": 5.401569366455078, "learning_rate": 7.256622629885687e-05, "loss": 0.6831, "step": 20790 }, { "epoch": 1.4086997764076157, "grad_norm": 6.477475166320801, "learning_rate": 7.256485727975905e-05, "loss": 0.7324, "step": 20791 }, { "epoch": 1.4087675316755877, "grad_norm": 5.686655044555664, "learning_rate": 7.256348826066123e-05, "loss": 0.6984, "step": 20792 }, { "epoch": 1.4088352869435599, "grad_norm": 5.1437835693359375, "learning_rate": 7.256211924156342e-05, "loss": 0.7075, "step": 20793 }, { "epoch": 1.408903042211532, "grad_norm": 6.197000980377197, "learning_rate": 7.256075022246561e-05, "loss": 0.7779, "step": 20794 }, { "epoch": 1.408970797479504, "grad_norm": 6.391395092010498, "learning_rate": 7.255938120336779e-05, "loss": 0.7406, "step": 20795 }, { "epoch": 1.409038552747476, "grad_norm": 5.79020357131958, "learning_rate": 7.255801218426997e-05, "loss": 0.6592, "step": 20796 }, { "epoch": 1.4091063080154482, "grad_norm": 5.870655059814453, "learning_rate": 7.255664316517215e-05, "loss": 0.7232, "step": 20797 }, { "epoch": 1.4091740632834202, "grad_norm": 5.169259071350098, "learning_rate": 7.255527414607433e-05, "loss": 0.5573, "step": 20798 }, { "epoch": 1.4092418185513924, "grad_norm": 5.885303020477295, "learning_rate": 7.255390512697652e-05, "loss": 0.6904, "step": 20799 }, { "epoch": 1.4093095738193644, "grad_norm": 4.378911972045898, "learning_rate": 7.25525361078787e-05, "loss": 0.6616, "step": 20800 }, { "epoch": 1.4093773290873366, "grad_norm": 5.254913330078125, "learning_rate": 7.255116708878088e-05, "loss": 0.6831, "step": 20801 }, { "epoch": 1.4094450843553086, "grad_norm": 5.600338935852051, "learning_rate": 7.254979806968308e-05, "loss": 0.7355, "step": 20802 }, { "epoch": 1.4095128396232808, "grad_norm": 5.163522243499756, "learning_rate": 7.254842905058526e-05, "loss": 0.6533, "step": 20803 }, { "epoch": 1.4095805948912528, "grad_norm": 5.301498889923096, "learning_rate": 7.254706003148744e-05, "loss": 0.6099, "step": 20804 }, { "epoch": 1.409648350159225, "grad_norm": 5.199283123016357, "learning_rate": 7.254569101238963e-05, "loss": 0.5894, "step": 20805 }, { "epoch": 1.409716105427197, "grad_norm": 6.122913360595703, "learning_rate": 7.254432199329181e-05, "loss": 0.6844, "step": 20806 }, { "epoch": 1.409783860695169, "grad_norm": 6.527642726898193, "learning_rate": 7.2542952974194e-05, "loss": 0.5177, "step": 20807 }, { "epoch": 1.4098516159631411, "grad_norm": 5.771257400512695, "learning_rate": 7.254158395509619e-05, "loss": 0.6565, "step": 20808 }, { "epoch": 1.4099193712311133, "grad_norm": 8.089409828186035, "learning_rate": 7.254021493599837e-05, "loss": 0.6774, "step": 20809 }, { "epoch": 1.4099871264990853, "grad_norm": 4.891401290893555, "learning_rate": 7.253884591690055e-05, "loss": 0.7565, "step": 20810 }, { "epoch": 1.4100548817670573, "grad_norm": 5.760569095611572, "learning_rate": 7.253747689780273e-05, "loss": 0.647, "step": 20811 }, { "epoch": 1.4101226370350295, "grad_norm": 9.452779769897461, "learning_rate": 7.253610787870491e-05, "loss": 0.5913, "step": 20812 }, { "epoch": 1.4101903923030017, "grad_norm": 4.646795272827148, "learning_rate": 7.25347388596071e-05, "loss": 0.6618, "step": 20813 }, { "epoch": 1.4102581475709737, "grad_norm": 8.734795570373535, "learning_rate": 7.253336984050928e-05, "loss": 0.5806, "step": 20814 }, { "epoch": 1.4103259028389457, "grad_norm": 8.16749095916748, "learning_rate": 7.253200082141146e-05, "loss": 0.5857, "step": 20815 }, { "epoch": 1.4103936581069179, "grad_norm": 5.168980121612549, "learning_rate": 7.253063180231364e-05, "loss": 0.5973, "step": 20816 }, { "epoch": 1.4104614133748898, "grad_norm": 8.036087036132812, "learning_rate": 7.252926278321584e-05, "loss": 0.9387, "step": 20817 }, { "epoch": 1.410529168642862, "grad_norm": 5.045963764190674, "learning_rate": 7.252789376411802e-05, "loss": 0.5249, "step": 20818 }, { "epoch": 1.410596923910834, "grad_norm": 4.302554607391357, "learning_rate": 7.25265247450202e-05, "loss": 0.5011, "step": 20819 }, { "epoch": 1.4106646791788062, "grad_norm": 5.956341743469238, "learning_rate": 7.252515572592238e-05, "loss": 0.6835, "step": 20820 }, { "epoch": 1.4107324344467782, "grad_norm": 5.18353796005249, "learning_rate": 7.252378670682456e-05, "loss": 0.602, "step": 20821 }, { "epoch": 1.4108001897147502, "grad_norm": 6.009150981903076, "learning_rate": 7.252241768772675e-05, "loss": 0.6386, "step": 20822 }, { "epoch": 1.4108679449827224, "grad_norm": 5.988224029541016, "learning_rate": 7.252104866862893e-05, "loss": 0.5975, "step": 20823 }, { "epoch": 1.4109357002506946, "grad_norm": 5.298572063446045, "learning_rate": 7.251967964953111e-05, "loss": 0.9092, "step": 20824 }, { "epoch": 1.4110034555186666, "grad_norm": 5.683076858520508, "learning_rate": 7.25183106304333e-05, "loss": 0.7121, "step": 20825 }, { "epoch": 1.4110712107866386, "grad_norm": 4.768420696258545, "learning_rate": 7.251694161133549e-05, "loss": 0.6752, "step": 20826 }, { "epoch": 1.4111389660546108, "grad_norm": 7.93467378616333, "learning_rate": 7.251557259223767e-05, "loss": 0.7874, "step": 20827 }, { "epoch": 1.411206721322583, "grad_norm": 6.007985591888428, "learning_rate": 7.251420357313985e-05, "loss": 0.7799, "step": 20828 }, { "epoch": 1.411274476590555, "grad_norm": 6.1685614585876465, "learning_rate": 7.251283455404203e-05, "loss": 0.685, "step": 20829 }, { "epoch": 1.411342231858527, "grad_norm": 5.77237606048584, "learning_rate": 7.251146553494421e-05, "loss": 0.7326, "step": 20830 }, { "epoch": 1.4114099871264991, "grad_norm": 5.439155578613281, "learning_rate": 7.25100965158464e-05, "loss": 0.4697, "step": 20831 }, { "epoch": 1.411477742394471, "grad_norm": 5.091054439544678, "learning_rate": 7.250872749674858e-05, "loss": 0.8518, "step": 20832 }, { "epoch": 1.4115454976624433, "grad_norm": 5.050314426422119, "learning_rate": 7.250735847765076e-05, "loss": 0.665, "step": 20833 }, { "epoch": 1.4116132529304153, "grad_norm": 7.240658283233643, "learning_rate": 7.250598945855294e-05, "loss": 0.714, "step": 20834 }, { "epoch": 1.4116810081983875, "grad_norm": 6.225192546844482, "learning_rate": 7.250462043945512e-05, "loss": 0.647, "step": 20835 }, { "epoch": 1.4117487634663595, "grad_norm": 6.363729476928711, "learning_rate": 7.250325142035732e-05, "loss": 0.5492, "step": 20836 }, { "epoch": 1.4118165187343317, "grad_norm": 5.927059650421143, "learning_rate": 7.25018824012595e-05, "loss": 0.6797, "step": 20837 }, { "epoch": 1.4118842740023037, "grad_norm": 4.231610298156738, "learning_rate": 7.250051338216168e-05, "loss": 0.7223, "step": 20838 }, { "epoch": 1.4119520292702759, "grad_norm": 4.941591262817383, "learning_rate": 7.249914436306386e-05, "loss": 0.5118, "step": 20839 }, { "epoch": 1.4120197845382478, "grad_norm": 4.315646648406982, "learning_rate": 7.249777534396605e-05, "loss": 0.503, "step": 20840 }, { "epoch": 1.4120875398062198, "grad_norm": 6.029323577880859, "learning_rate": 7.249640632486823e-05, "loss": 0.7571, "step": 20841 }, { "epoch": 1.412155295074192, "grad_norm": 5.207945823669434, "learning_rate": 7.249503730577041e-05, "loss": 0.696, "step": 20842 }, { "epoch": 1.4122230503421642, "grad_norm": 6.77159309387207, "learning_rate": 7.24936682866726e-05, "loss": 0.7615, "step": 20843 }, { "epoch": 1.4122908056101362, "grad_norm": 6.76339864730835, "learning_rate": 7.249229926757478e-05, "loss": 0.826, "step": 20844 }, { "epoch": 1.4123585608781082, "grad_norm": 6.108619213104248, "learning_rate": 7.249093024847697e-05, "loss": 0.821, "step": 20845 }, { "epoch": 1.4124263161460804, "grad_norm": 6.391281604766846, "learning_rate": 7.248956122937915e-05, "loss": 0.8239, "step": 20846 }, { "epoch": 1.4124940714140524, "grad_norm": 6.7142252922058105, "learning_rate": 7.248819221028133e-05, "loss": 0.8211, "step": 20847 }, { "epoch": 1.4125618266820246, "grad_norm": 5.926610946655273, "learning_rate": 7.248682319118352e-05, "loss": 0.6972, "step": 20848 }, { "epoch": 1.4126295819499965, "grad_norm": 5.359100818634033, "learning_rate": 7.24854541720857e-05, "loss": 0.6355, "step": 20849 }, { "epoch": 1.4126973372179688, "grad_norm": 4.997183799743652, "learning_rate": 7.248408515298788e-05, "loss": 0.5183, "step": 20850 }, { "epoch": 1.4127650924859407, "grad_norm": 7.127164840698242, "learning_rate": 7.248271613389008e-05, "loss": 0.6705, "step": 20851 }, { "epoch": 1.412832847753913, "grad_norm": 3.62056565284729, "learning_rate": 7.248134711479226e-05, "loss": 0.5071, "step": 20852 }, { "epoch": 1.412900603021885, "grad_norm": 5.8378119468688965, "learning_rate": 7.247997809569444e-05, "loss": 0.6882, "step": 20853 }, { "epoch": 1.4129683582898571, "grad_norm": 5.831943035125732, "learning_rate": 7.247860907659663e-05, "loss": 0.6821, "step": 20854 }, { "epoch": 1.413036113557829, "grad_norm": 5.298803329467773, "learning_rate": 7.247724005749881e-05, "loss": 0.6329, "step": 20855 }, { "epoch": 1.413103868825801, "grad_norm": 5.796782970428467, "learning_rate": 7.2475871038401e-05, "loss": 0.7763, "step": 20856 }, { "epoch": 1.4131716240937733, "grad_norm": 12.9525728225708, "learning_rate": 7.247450201930317e-05, "loss": 0.6922, "step": 20857 }, { "epoch": 1.4132393793617455, "grad_norm": 5.095679759979248, "learning_rate": 7.247313300020535e-05, "loss": 0.6766, "step": 20858 }, { "epoch": 1.4133071346297175, "grad_norm": 5.037111282348633, "learning_rate": 7.247176398110755e-05, "loss": 0.6388, "step": 20859 }, { "epoch": 1.4133748898976894, "grad_norm": 7.296316146850586, "learning_rate": 7.247039496200973e-05, "loss": 0.6298, "step": 20860 }, { "epoch": 1.4134426451656616, "grad_norm": 4.563043594360352, "learning_rate": 7.246902594291191e-05, "loss": 0.5014, "step": 20861 }, { "epoch": 1.4135104004336339, "grad_norm": 5.58361291885376, "learning_rate": 7.246765692381409e-05, "loss": 0.5595, "step": 20862 }, { "epoch": 1.4135781557016058, "grad_norm": 4.88395881652832, "learning_rate": 7.246628790471628e-05, "loss": 0.5788, "step": 20863 }, { "epoch": 1.4136459109695778, "grad_norm": 7.3552093505859375, "learning_rate": 7.246491888561846e-05, "loss": 0.7143, "step": 20864 }, { "epoch": 1.41371366623755, "grad_norm": 4.924375534057617, "learning_rate": 7.246354986652064e-05, "loss": 0.6788, "step": 20865 }, { "epoch": 1.413781421505522, "grad_norm": 9.274568557739258, "learning_rate": 7.246218084742282e-05, "loss": 0.7497, "step": 20866 }, { "epoch": 1.4138491767734942, "grad_norm": 5.331521511077881, "learning_rate": 7.2460811828325e-05, "loss": 0.6658, "step": 20867 }, { "epoch": 1.4139169320414662, "grad_norm": 6.177103519439697, "learning_rate": 7.24594428092272e-05, "loss": 0.8789, "step": 20868 }, { "epoch": 1.4139846873094384, "grad_norm": 4.5603108406066895, "learning_rate": 7.245807379012938e-05, "loss": 0.6267, "step": 20869 }, { "epoch": 1.4140524425774104, "grad_norm": 7.858142852783203, "learning_rate": 7.245670477103156e-05, "loss": 0.8237, "step": 20870 }, { "epoch": 1.4141201978453823, "grad_norm": 5.5325212478637695, "learning_rate": 7.245533575193374e-05, "loss": 0.6175, "step": 20871 }, { "epoch": 1.4141879531133545, "grad_norm": 5.406671524047852, "learning_rate": 7.245396673283593e-05, "loss": 0.572, "step": 20872 }, { "epoch": 1.4142557083813267, "grad_norm": 5.976593017578125, "learning_rate": 7.245259771373811e-05, "loss": 0.7107, "step": 20873 }, { "epoch": 1.4143234636492987, "grad_norm": 6.660350322723389, "learning_rate": 7.24512286946403e-05, "loss": 0.6716, "step": 20874 }, { "epoch": 1.4143912189172707, "grad_norm": 6.9584760665893555, "learning_rate": 7.244985967554247e-05, "loss": 0.7602, "step": 20875 }, { "epoch": 1.414458974185243, "grad_norm": 6.336503028869629, "learning_rate": 7.244849065644465e-05, "loss": 0.6872, "step": 20876 }, { "epoch": 1.4145267294532151, "grad_norm": 4.478384017944336, "learning_rate": 7.244712163734685e-05, "loss": 0.5547, "step": 20877 }, { "epoch": 1.414594484721187, "grad_norm": 6.127598285675049, "learning_rate": 7.244575261824903e-05, "loss": 0.7415, "step": 20878 }, { "epoch": 1.414662239989159, "grad_norm": 5.388698577880859, "learning_rate": 7.244438359915121e-05, "loss": 0.6837, "step": 20879 }, { "epoch": 1.4147299952571313, "grad_norm": 4.75826358795166, "learning_rate": 7.244301458005339e-05, "loss": 0.6447, "step": 20880 }, { "epoch": 1.4147977505251033, "grad_norm": 6.0732340812683105, "learning_rate": 7.244164556095557e-05, "loss": 0.7329, "step": 20881 }, { "epoch": 1.4148655057930755, "grad_norm": 5.375673294067383, "learning_rate": 7.244027654185776e-05, "loss": 0.5508, "step": 20882 }, { "epoch": 1.4149332610610474, "grad_norm": 5.003943920135498, "learning_rate": 7.243890752275994e-05, "loss": 0.6831, "step": 20883 }, { "epoch": 1.4150010163290196, "grad_norm": 6.038925647735596, "learning_rate": 7.243753850366212e-05, "loss": 0.7493, "step": 20884 }, { "epoch": 1.4150687715969916, "grad_norm": 3.9189395904541016, "learning_rate": 7.24361694845643e-05, "loss": 0.6535, "step": 20885 }, { "epoch": 1.4151365268649638, "grad_norm": 5.142685890197754, "learning_rate": 7.24348004654665e-05, "loss": 0.6595, "step": 20886 }, { "epoch": 1.4152042821329358, "grad_norm": 5.381821155548096, "learning_rate": 7.243343144636868e-05, "loss": 0.6646, "step": 20887 }, { "epoch": 1.415272037400908, "grad_norm": 5.222214221954346, "learning_rate": 7.243206242727086e-05, "loss": 0.8215, "step": 20888 }, { "epoch": 1.41533979266888, "grad_norm": 7.360916614532471, "learning_rate": 7.243069340817304e-05, "loss": 0.7908, "step": 20889 }, { "epoch": 1.415407547936852, "grad_norm": 10.079451560974121, "learning_rate": 7.242932438907522e-05, "loss": 0.6133, "step": 20890 }, { "epoch": 1.4154753032048242, "grad_norm": 5.151872634887695, "learning_rate": 7.242795536997741e-05, "loss": 0.6867, "step": 20891 }, { "epoch": 1.4155430584727964, "grad_norm": 6.201021671295166, "learning_rate": 7.24265863508796e-05, "loss": 0.7121, "step": 20892 }, { "epoch": 1.4156108137407684, "grad_norm": 5.914426326751709, "learning_rate": 7.242521733178177e-05, "loss": 0.4847, "step": 20893 }, { "epoch": 1.4156785690087403, "grad_norm": 6.342916488647461, "learning_rate": 7.242384831268397e-05, "loss": 0.5393, "step": 20894 }, { "epoch": 1.4157463242767125, "grad_norm": 5.274435997009277, "learning_rate": 7.242247929358615e-05, "loss": 0.5676, "step": 20895 }, { "epoch": 1.4158140795446845, "grad_norm": 6.247053623199463, "learning_rate": 7.242111027448833e-05, "loss": 0.8046, "step": 20896 }, { "epoch": 1.4158818348126567, "grad_norm": 3.548537015914917, "learning_rate": 7.241974125539052e-05, "loss": 0.5694, "step": 20897 }, { "epoch": 1.4159495900806287, "grad_norm": 8.980352401733398, "learning_rate": 7.24183722362927e-05, "loss": 0.8121, "step": 20898 }, { "epoch": 1.416017345348601, "grad_norm": 6.250555992126465, "learning_rate": 7.241700321719488e-05, "loss": 0.7777, "step": 20899 }, { "epoch": 1.416085100616573, "grad_norm": 7.438925266265869, "learning_rate": 7.241563419809708e-05, "loss": 0.6283, "step": 20900 }, { "epoch": 1.416152855884545, "grad_norm": 5.689207553863525, "learning_rate": 7.241426517899926e-05, "loss": 0.788, "step": 20901 }, { "epoch": 1.416220611152517, "grad_norm": 7.68367862701416, "learning_rate": 7.241289615990144e-05, "loss": 0.6057, "step": 20902 }, { "epoch": 1.4162883664204893, "grad_norm": 9.688300132751465, "learning_rate": 7.241152714080362e-05, "loss": 0.5961, "step": 20903 }, { "epoch": 1.4163561216884613, "grad_norm": 6.607765197753906, "learning_rate": 7.241015812170581e-05, "loss": 0.537, "step": 20904 }, { "epoch": 1.4164238769564332, "grad_norm": 5.82720947265625, "learning_rate": 7.240878910260799e-05, "loss": 0.7504, "step": 20905 }, { "epoch": 1.4164916322244054, "grad_norm": 5.576922416687012, "learning_rate": 7.240742008351017e-05, "loss": 0.6432, "step": 20906 }, { "epoch": 1.4165593874923776, "grad_norm": 5.114114761352539, "learning_rate": 7.240605106441235e-05, "loss": 0.6911, "step": 20907 }, { "epoch": 1.4166271427603496, "grad_norm": 5.657660007476807, "learning_rate": 7.240468204531453e-05, "loss": 0.6301, "step": 20908 }, { "epoch": 1.4166948980283216, "grad_norm": 6.846127510070801, "learning_rate": 7.240331302621673e-05, "loss": 0.9121, "step": 20909 }, { "epoch": 1.4167626532962938, "grad_norm": 4.892091274261475, "learning_rate": 7.240194400711891e-05, "loss": 0.5036, "step": 20910 }, { "epoch": 1.416830408564266, "grad_norm": 4.4560322761535645, "learning_rate": 7.240057498802109e-05, "loss": 0.6574, "step": 20911 }, { "epoch": 1.416898163832238, "grad_norm": 4.802989482879639, "learning_rate": 7.239920596892327e-05, "loss": 0.8795, "step": 20912 }, { "epoch": 1.41696591910021, "grad_norm": 5.331162452697754, "learning_rate": 7.239783694982545e-05, "loss": 0.7929, "step": 20913 }, { "epoch": 1.4170336743681822, "grad_norm": 4.476842403411865, "learning_rate": 7.239646793072764e-05, "loss": 0.647, "step": 20914 }, { "epoch": 1.4171014296361542, "grad_norm": 5.220469951629639, "learning_rate": 7.239509891162982e-05, "loss": 0.7062, "step": 20915 }, { "epoch": 1.4171691849041264, "grad_norm": 5.811756610870361, "learning_rate": 7.2393729892532e-05, "loss": 0.7163, "step": 20916 }, { "epoch": 1.4172369401720983, "grad_norm": 5.887594223022461, "learning_rate": 7.239236087343418e-05, "loss": 0.7164, "step": 20917 }, { "epoch": 1.4173046954400705, "grad_norm": 6.3105292320251465, "learning_rate": 7.239099185433638e-05, "loss": 0.8586, "step": 20918 }, { "epoch": 1.4173724507080425, "grad_norm": 4.978327751159668, "learning_rate": 7.238962283523856e-05, "loss": 0.6926, "step": 20919 }, { "epoch": 1.4174402059760145, "grad_norm": 5.756497383117676, "learning_rate": 7.238825381614074e-05, "loss": 0.7036, "step": 20920 }, { "epoch": 1.4175079612439867, "grad_norm": 7.775460243225098, "learning_rate": 7.238688479704292e-05, "loss": 0.9556, "step": 20921 }, { "epoch": 1.417575716511959, "grad_norm": 5.351320266723633, "learning_rate": 7.23855157779451e-05, "loss": 0.9877, "step": 20922 }, { "epoch": 1.4176434717799309, "grad_norm": 6.30314302444458, "learning_rate": 7.238414675884729e-05, "loss": 0.9997, "step": 20923 }, { "epoch": 1.4177112270479029, "grad_norm": 5.89908504486084, "learning_rate": 7.238277773974947e-05, "loss": 0.7771, "step": 20924 }, { "epoch": 1.417778982315875, "grad_norm": 4.501805305480957, "learning_rate": 7.238140872065165e-05, "loss": 0.5221, "step": 20925 }, { "epoch": 1.4178467375838473, "grad_norm": 4.581091403961182, "learning_rate": 7.238003970155383e-05, "loss": 0.5396, "step": 20926 }, { "epoch": 1.4179144928518193, "grad_norm": 5.5962395668029785, "learning_rate": 7.237867068245603e-05, "loss": 0.5512, "step": 20927 }, { "epoch": 1.4179822481197912, "grad_norm": 10.725104331970215, "learning_rate": 7.237730166335821e-05, "loss": 0.7758, "step": 20928 }, { "epoch": 1.4180500033877634, "grad_norm": 5.526170253753662, "learning_rate": 7.237593264426039e-05, "loss": 0.6252, "step": 20929 }, { "epoch": 1.4181177586557354, "grad_norm": 7.818890571594238, "learning_rate": 7.237456362516257e-05, "loss": 0.7878, "step": 20930 }, { "epoch": 1.4181855139237076, "grad_norm": 6.668206691741943, "learning_rate": 7.237319460606475e-05, "loss": 0.6022, "step": 20931 }, { "epoch": 1.4182532691916796, "grad_norm": 7.0494585037231445, "learning_rate": 7.237182558696694e-05, "loss": 0.71, "step": 20932 }, { "epoch": 1.4183210244596518, "grad_norm": 5.176377296447754, "learning_rate": 7.237045656786912e-05, "loss": 0.8053, "step": 20933 }, { "epoch": 1.4183887797276238, "grad_norm": 5.114962100982666, "learning_rate": 7.23690875487713e-05, "loss": 0.7034, "step": 20934 }, { "epoch": 1.418456534995596, "grad_norm": 5.896724700927734, "learning_rate": 7.236771852967348e-05, "loss": 0.5984, "step": 20935 }, { "epoch": 1.418524290263568, "grad_norm": 9.672106742858887, "learning_rate": 7.236634951057566e-05, "loss": 0.6431, "step": 20936 }, { "epoch": 1.4185920455315402, "grad_norm": 5.8165202140808105, "learning_rate": 7.236498049147786e-05, "loss": 0.6542, "step": 20937 }, { "epoch": 1.4186598007995121, "grad_norm": 5.500722885131836, "learning_rate": 7.236361147238004e-05, "loss": 0.9268, "step": 20938 }, { "epoch": 1.4187275560674841, "grad_norm": 8.498766899108887, "learning_rate": 7.236224245328222e-05, "loss": 0.5548, "step": 20939 }, { "epoch": 1.4187953113354563, "grad_norm": 4.768573760986328, "learning_rate": 7.236087343418441e-05, "loss": 0.5902, "step": 20940 }, { "epoch": 1.4188630666034285, "grad_norm": 5.560000896453857, "learning_rate": 7.23595044150866e-05, "loss": 0.7756, "step": 20941 }, { "epoch": 1.4189308218714005, "grad_norm": 7.442240238189697, "learning_rate": 7.235813539598877e-05, "loss": 0.5696, "step": 20942 }, { "epoch": 1.4189985771393725, "grad_norm": 6.50754976272583, "learning_rate": 7.235676637689097e-05, "loss": 0.6972, "step": 20943 }, { "epoch": 1.4190663324073447, "grad_norm": 6.421780109405518, "learning_rate": 7.235539735779315e-05, "loss": 0.5666, "step": 20944 }, { "epoch": 1.4191340876753167, "grad_norm": 5.475307464599609, "learning_rate": 7.235402833869533e-05, "loss": 0.4886, "step": 20945 }, { "epoch": 1.4192018429432889, "grad_norm": 4.133982181549072, "learning_rate": 7.235265931959752e-05, "loss": 0.5636, "step": 20946 }, { "epoch": 1.4192695982112609, "grad_norm": 6.589623928070068, "learning_rate": 7.23512903004997e-05, "loss": 0.6235, "step": 20947 }, { "epoch": 1.419337353479233, "grad_norm": 5.406386375427246, "learning_rate": 7.234992128140188e-05, "loss": 0.809, "step": 20948 }, { "epoch": 1.419405108747205, "grad_norm": 4.528289318084717, "learning_rate": 7.234855226230406e-05, "loss": 0.4982, "step": 20949 }, { "epoch": 1.4194728640151772, "grad_norm": 7.461536884307861, "learning_rate": 7.234718324320626e-05, "loss": 0.4975, "step": 20950 }, { "epoch": 1.4195406192831492, "grad_norm": 5.696765422821045, "learning_rate": 7.234581422410844e-05, "loss": 0.6863, "step": 20951 }, { "epoch": 1.4196083745511214, "grad_norm": 6.033190727233887, "learning_rate": 7.234444520501062e-05, "loss": 0.6713, "step": 20952 }, { "epoch": 1.4196761298190934, "grad_norm": 6.616346836090088, "learning_rate": 7.23430761859128e-05, "loss": 0.7614, "step": 20953 }, { "epoch": 1.4197438850870654, "grad_norm": 7.8426713943481445, "learning_rate": 7.234170716681498e-05, "loss": 0.6209, "step": 20954 }, { "epoch": 1.4198116403550376, "grad_norm": 6.025173187255859, "learning_rate": 7.234033814771717e-05, "loss": 0.6107, "step": 20955 }, { "epoch": 1.4198793956230098, "grad_norm": 5.724207401275635, "learning_rate": 7.233896912861935e-05, "loss": 0.9666, "step": 20956 }, { "epoch": 1.4199471508909818, "grad_norm": 4.676914691925049, "learning_rate": 7.233760010952153e-05, "loss": 0.7476, "step": 20957 }, { "epoch": 1.4200149061589538, "grad_norm": 5.440614223480225, "learning_rate": 7.233623109042371e-05, "loss": 0.8038, "step": 20958 }, { "epoch": 1.420082661426926, "grad_norm": 4.4495530128479, "learning_rate": 7.233486207132591e-05, "loss": 0.6202, "step": 20959 }, { "epoch": 1.4201504166948982, "grad_norm": 7.3673906326293945, "learning_rate": 7.233349305222809e-05, "loss": 0.7455, "step": 20960 }, { "epoch": 1.4202181719628701, "grad_norm": 5.913322925567627, "learning_rate": 7.233212403313027e-05, "loss": 0.699, "step": 20961 }, { "epoch": 1.4202859272308421, "grad_norm": 4.621917247772217, "learning_rate": 7.233075501403245e-05, "loss": 0.7287, "step": 20962 }, { "epoch": 1.4203536824988143, "grad_norm": 8.761709213256836, "learning_rate": 7.232938599493463e-05, "loss": 0.6121, "step": 20963 }, { "epoch": 1.4204214377667863, "grad_norm": 5.0227789878845215, "learning_rate": 7.232801697583682e-05, "loss": 0.5638, "step": 20964 }, { "epoch": 1.4204891930347585, "grad_norm": 6.7011847496032715, "learning_rate": 7.2326647956739e-05, "loss": 0.6078, "step": 20965 }, { "epoch": 1.4205569483027305, "grad_norm": 4.909398555755615, "learning_rate": 7.232527893764118e-05, "loss": 0.7278, "step": 20966 }, { "epoch": 1.4206247035707027, "grad_norm": 5.499317646026611, "learning_rate": 7.232390991854336e-05, "loss": 0.6927, "step": 20967 }, { "epoch": 1.4206924588386747, "grad_norm": 6.079032897949219, "learning_rate": 7.232254089944554e-05, "loss": 0.7818, "step": 20968 }, { "epoch": 1.4207602141066467, "grad_norm": 4.128228664398193, "learning_rate": 7.232117188034774e-05, "loss": 0.641, "step": 20969 }, { "epoch": 1.4208279693746189, "grad_norm": 6.207160472869873, "learning_rate": 7.231980286124992e-05, "loss": 0.6937, "step": 20970 }, { "epoch": 1.420895724642591, "grad_norm": 6.276054382324219, "learning_rate": 7.23184338421521e-05, "loss": 0.4658, "step": 20971 }, { "epoch": 1.420963479910563, "grad_norm": 5.945830821990967, "learning_rate": 7.231706482305428e-05, "loss": 0.769, "step": 20972 }, { "epoch": 1.421031235178535, "grad_norm": 6.29269552230835, "learning_rate": 7.231569580395647e-05, "loss": 0.7444, "step": 20973 }, { "epoch": 1.4210989904465072, "grad_norm": 4.551427364349365, "learning_rate": 7.231432678485865e-05, "loss": 0.4237, "step": 20974 }, { "epoch": 1.4211667457144794, "grad_norm": 3.839461088180542, "learning_rate": 7.231295776576083e-05, "loss": 0.3636, "step": 20975 }, { "epoch": 1.4212345009824514, "grad_norm": 7.112268447875977, "learning_rate": 7.231158874666301e-05, "loss": 0.6984, "step": 20976 }, { "epoch": 1.4213022562504234, "grad_norm": 5.118794918060303, "learning_rate": 7.23102197275652e-05, "loss": 0.5611, "step": 20977 }, { "epoch": 1.4213700115183956, "grad_norm": 5.829601764678955, "learning_rate": 7.230885070846739e-05, "loss": 0.66, "step": 20978 }, { "epoch": 1.4214377667863676, "grad_norm": 6.266690254211426, "learning_rate": 7.230748168936957e-05, "loss": 0.7661, "step": 20979 }, { "epoch": 1.4215055220543398, "grad_norm": 5.505138397216797, "learning_rate": 7.230611267027175e-05, "loss": 0.5642, "step": 20980 }, { "epoch": 1.4215732773223118, "grad_norm": 7.960724830627441, "learning_rate": 7.230474365117393e-05, "loss": 0.6222, "step": 20981 }, { "epoch": 1.421641032590284, "grad_norm": 4.603204250335693, "learning_rate": 7.230337463207612e-05, "loss": 0.5381, "step": 20982 }, { "epoch": 1.421708787858256, "grad_norm": 6.004194259643555, "learning_rate": 7.23020056129783e-05, "loss": 0.6578, "step": 20983 }, { "epoch": 1.4217765431262281, "grad_norm": 5.715832233428955, "learning_rate": 7.230063659388048e-05, "loss": 0.6259, "step": 20984 }, { "epoch": 1.4218442983942001, "grad_norm": 5.678028106689453, "learning_rate": 7.229926757478266e-05, "loss": 0.6952, "step": 20985 }, { "epoch": 1.4219120536621723, "grad_norm": 4.078176975250244, "learning_rate": 7.229789855568486e-05, "loss": 0.5064, "step": 20986 }, { "epoch": 1.4219798089301443, "grad_norm": 5.980291843414307, "learning_rate": 7.229652953658704e-05, "loss": 0.7331, "step": 20987 }, { "epoch": 1.4220475641981163, "grad_norm": 6.986227512359619, "learning_rate": 7.229516051748922e-05, "loss": 0.7505, "step": 20988 }, { "epoch": 1.4221153194660885, "grad_norm": 5.816247463226318, "learning_rate": 7.229379149839141e-05, "loss": 0.6736, "step": 20989 }, { "epoch": 1.4221830747340607, "grad_norm": 5.4272966384887695, "learning_rate": 7.229242247929359e-05, "loss": 0.5951, "step": 20990 }, { "epoch": 1.4222508300020327, "grad_norm": 7.265817165374756, "learning_rate": 7.229105346019577e-05, "loss": 0.757, "step": 20991 }, { "epoch": 1.4223185852700047, "grad_norm": 7.224908351898193, "learning_rate": 7.228968444109797e-05, "loss": 0.8365, "step": 20992 }, { "epoch": 1.4223863405379769, "grad_norm": 4.726144313812256, "learning_rate": 7.228831542200015e-05, "loss": 0.5902, "step": 20993 }, { "epoch": 1.4224540958059488, "grad_norm": 5.839087009429932, "learning_rate": 7.228694640290233e-05, "loss": 0.6913, "step": 20994 }, { "epoch": 1.422521851073921, "grad_norm": 4.628944396972656, "learning_rate": 7.228557738380451e-05, "loss": 0.7012, "step": 20995 }, { "epoch": 1.422589606341893, "grad_norm": 4.868803024291992, "learning_rate": 7.22842083647067e-05, "loss": 0.5313, "step": 20996 }, { "epoch": 1.4226573616098652, "grad_norm": 4.902781009674072, "learning_rate": 7.228283934560888e-05, "loss": 0.6325, "step": 20997 }, { "epoch": 1.4227251168778372, "grad_norm": 5.897185802459717, "learning_rate": 7.228147032651106e-05, "loss": 0.7293, "step": 20998 }, { "epoch": 1.4227928721458094, "grad_norm": 5.8811445236206055, "learning_rate": 7.228010130741324e-05, "loss": 0.7834, "step": 20999 }, { "epoch": 1.4228606274137814, "grad_norm": 5.362592697143555, "learning_rate": 7.227873228831542e-05, "loss": 0.6063, "step": 21000 }, { "epoch": 1.4229283826817536, "grad_norm": 6.514289855957031, "learning_rate": 7.227736326921762e-05, "loss": 0.4966, "step": 21001 }, { "epoch": 1.4229961379497256, "grad_norm": 8.021393775939941, "learning_rate": 7.22759942501198e-05, "loss": 1.0451, "step": 21002 }, { "epoch": 1.4230638932176976, "grad_norm": 5.2324299812316895, "learning_rate": 7.227462523102198e-05, "loss": 0.6748, "step": 21003 }, { "epoch": 1.4231316484856698, "grad_norm": 7.934859752655029, "learning_rate": 7.227325621192416e-05, "loss": 0.7221, "step": 21004 }, { "epoch": 1.423199403753642, "grad_norm": 4.858540058135986, "learning_rate": 7.227188719282635e-05, "loss": 0.7031, "step": 21005 }, { "epoch": 1.423267159021614, "grad_norm": 5.477261066436768, "learning_rate": 7.227051817372853e-05, "loss": 0.6137, "step": 21006 }, { "epoch": 1.423334914289586, "grad_norm": 5.862905025482178, "learning_rate": 7.226914915463071e-05, "loss": 0.6466, "step": 21007 }, { "epoch": 1.4234026695575581, "grad_norm": 6.632056713104248, "learning_rate": 7.226778013553289e-05, "loss": 0.7573, "step": 21008 }, { "epoch": 1.4234704248255303, "grad_norm": 5.46019172668457, "learning_rate": 7.226641111643507e-05, "loss": 0.7077, "step": 21009 }, { "epoch": 1.4235381800935023, "grad_norm": 9.703536033630371, "learning_rate": 7.226504209733727e-05, "loss": 0.4771, "step": 21010 }, { "epoch": 1.4236059353614743, "grad_norm": 6.206655025482178, "learning_rate": 7.226367307823945e-05, "loss": 0.6981, "step": 21011 }, { "epoch": 1.4236736906294465, "grad_norm": 6.443789005279541, "learning_rate": 7.226230405914163e-05, "loss": 0.7653, "step": 21012 }, { "epoch": 1.4237414458974185, "grad_norm": 6.943606853485107, "learning_rate": 7.226093504004381e-05, "loss": 1.0652, "step": 21013 }, { "epoch": 1.4238092011653907, "grad_norm": 4.594053268432617, "learning_rate": 7.225956602094599e-05, "loss": 0.7108, "step": 21014 }, { "epoch": 1.4238769564333627, "grad_norm": 5.477336883544922, "learning_rate": 7.225819700184818e-05, "loss": 0.683, "step": 21015 }, { "epoch": 1.4239447117013349, "grad_norm": 5.465267658233643, "learning_rate": 7.225682798275036e-05, "loss": 0.7213, "step": 21016 }, { "epoch": 1.4240124669693068, "grad_norm": 4.748956680297852, "learning_rate": 7.225545896365254e-05, "loss": 0.5445, "step": 21017 }, { "epoch": 1.4240802222372788, "grad_norm": 4.956427097320557, "learning_rate": 7.225408994455472e-05, "loss": 0.7084, "step": 21018 }, { "epoch": 1.424147977505251, "grad_norm": 5.593998432159424, "learning_rate": 7.225272092545692e-05, "loss": 0.7788, "step": 21019 }, { "epoch": 1.4242157327732232, "grad_norm": 5.831843852996826, "learning_rate": 7.22513519063591e-05, "loss": 0.5916, "step": 21020 }, { "epoch": 1.4242834880411952, "grad_norm": 5.675478458404541, "learning_rate": 7.224998288726128e-05, "loss": 0.6004, "step": 21021 }, { "epoch": 1.4243512433091672, "grad_norm": 6.635678291320801, "learning_rate": 7.224861386816346e-05, "loss": 0.7376, "step": 21022 }, { "epoch": 1.4244189985771394, "grad_norm": 6.526688575744629, "learning_rate": 7.224724484906564e-05, "loss": 0.8301, "step": 21023 }, { "epoch": 1.4244867538451116, "grad_norm": 6.965750217437744, "learning_rate": 7.224587582996783e-05, "loss": 0.5821, "step": 21024 }, { "epoch": 1.4245545091130836, "grad_norm": 7.32047700881958, "learning_rate": 7.224450681087001e-05, "loss": 0.6841, "step": 21025 }, { "epoch": 1.4246222643810555, "grad_norm": 5.111550807952881, "learning_rate": 7.22431377917722e-05, "loss": 0.6159, "step": 21026 }, { "epoch": 1.4246900196490278, "grad_norm": 5.426882743835449, "learning_rate": 7.224176877267437e-05, "loss": 0.4986, "step": 21027 }, { "epoch": 1.4247577749169997, "grad_norm": 5.105292320251465, "learning_rate": 7.224039975357657e-05, "loss": 0.5956, "step": 21028 }, { "epoch": 1.424825530184972, "grad_norm": 4.476670742034912, "learning_rate": 7.223903073447875e-05, "loss": 0.6806, "step": 21029 }, { "epoch": 1.424893285452944, "grad_norm": 5.6811628341674805, "learning_rate": 7.223766171538093e-05, "loss": 0.5695, "step": 21030 }, { "epoch": 1.4249610407209161, "grad_norm": 5.499662399291992, "learning_rate": 7.223629269628311e-05, "loss": 0.7193, "step": 21031 }, { "epoch": 1.425028795988888, "grad_norm": 8.392537117004395, "learning_rate": 7.223492367718529e-05, "loss": 0.4809, "step": 21032 }, { "epoch": 1.4250965512568603, "grad_norm": 5.815036773681641, "learning_rate": 7.223355465808748e-05, "loss": 0.606, "step": 21033 }, { "epoch": 1.4251643065248323, "grad_norm": 5.8543853759765625, "learning_rate": 7.223218563898966e-05, "loss": 0.5258, "step": 21034 }, { "epoch": 1.4252320617928045, "grad_norm": 6.263812065124512, "learning_rate": 7.223081661989184e-05, "loss": 0.6888, "step": 21035 }, { "epoch": 1.4252998170607765, "grad_norm": 6.833883285522461, "learning_rate": 7.222944760079404e-05, "loss": 0.6591, "step": 21036 }, { "epoch": 1.4253675723287484, "grad_norm": 5.0943193435668945, "learning_rate": 7.222807858169622e-05, "loss": 0.5385, "step": 21037 }, { "epoch": 1.4254353275967206, "grad_norm": 6.247521877288818, "learning_rate": 7.22267095625984e-05, "loss": 0.6966, "step": 21038 }, { "epoch": 1.4255030828646928, "grad_norm": 10.67426586151123, "learning_rate": 7.222534054350059e-05, "loss": 0.5475, "step": 21039 }, { "epoch": 1.4255708381326648, "grad_norm": 5.673305511474609, "learning_rate": 7.222397152440277e-05, "loss": 0.8561, "step": 21040 }, { "epoch": 1.4256385934006368, "grad_norm": 7.592621326446533, "learning_rate": 7.222260250530495e-05, "loss": 0.599, "step": 21041 }, { "epoch": 1.425706348668609, "grad_norm": 6.139354228973389, "learning_rate": 7.222123348620715e-05, "loss": 0.4623, "step": 21042 }, { "epoch": 1.425774103936581, "grad_norm": 6.604978084564209, "learning_rate": 7.221986446710933e-05, "loss": 0.7766, "step": 21043 }, { "epoch": 1.4258418592045532, "grad_norm": 7.686905384063721, "learning_rate": 7.221849544801151e-05, "loss": 0.8427, "step": 21044 }, { "epoch": 1.4259096144725252, "grad_norm": 6.551141738891602, "learning_rate": 7.221712642891369e-05, "loss": 0.6225, "step": 21045 }, { "epoch": 1.4259773697404974, "grad_norm": 4.813697338104248, "learning_rate": 7.221575740981587e-05, "loss": 0.6653, "step": 21046 }, { "epoch": 1.4260451250084694, "grad_norm": 5.97028923034668, "learning_rate": 7.221438839071806e-05, "loss": 0.5904, "step": 21047 }, { "epoch": 1.4261128802764416, "grad_norm": 6.188946723937988, "learning_rate": 7.221301937162024e-05, "loss": 0.6493, "step": 21048 }, { "epoch": 1.4261806355444135, "grad_norm": 7.23167085647583, "learning_rate": 7.221165035252242e-05, "loss": 0.686, "step": 21049 }, { "epoch": 1.4262483908123857, "grad_norm": 5.983429431915283, "learning_rate": 7.22102813334246e-05, "loss": 0.6265, "step": 21050 }, { "epoch": 1.4263161460803577, "grad_norm": 5.838659763336182, "learning_rate": 7.22089123143268e-05, "loss": 0.6099, "step": 21051 }, { "epoch": 1.4263839013483297, "grad_norm": 9.38227653503418, "learning_rate": 7.220754329522898e-05, "loss": 0.8006, "step": 21052 }, { "epoch": 1.426451656616302, "grad_norm": 5.577592372894287, "learning_rate": 7.220617427613116e-05, "loss": 0.6249, "step": 21053 }, { "epoch": 1.4265194118842741, "grad_norm": 5.723888874053955, "learning_rate": 7.220480525703334e-05, "loss": 0.785, "step": 21054 }, { "epoch": 1.426587167152246, "grad_norm": 12.271686553955078, "learning_rate": 7.220343623793552e-05, "loss": 0.8096, "step": 21055 }, { "epoch": 1.426654922420218, "grad_norm": 5.269440174102783, "learning_rate": 7.220206721883771e-05, "loss": 0.6499, "step": 21056 }, { "epoch": 1.4267226776881903, "grad_norm": 5.996860980987549, "learning_rate": 7.220069819973989e-05, "loss": 0.6744, "step": 21057 }, { "epoch": 1.4267904329561625, "grad_norm": 4.103590488433838, "learning_rate": 7.219932918064207e-05, "loss": 0.5192, "step": 21058 }, { "epoch": 1.4268581882241345, "grad_norm": 4.360240459442139, "learning_rate": 7.219796016154425e-05, "loss": 0.5821, "step": 21059 }, { "epoch": 1.4269259434921064, "grad_norm": 5.895482540130615, "learning_rate": 7.219659114244645e-05, "loss": 0.6838, "step": 21060 }, { "epoch": 1.4269936987600786, "grad_norm": 6.600510120391846, "learning_rate": 7.219522212334863e-05, "loss": 0.9067, "step": 21061 }, { "epoch": 1.4270614540280506, "grad_norm": 5.24995231628418, "learning_rate": 7.219385310425081e-05, "loss": 0.631, "step": 21062 }, { "epoch": 1.4271292092960228, "grad_norm": 5.691985130310059, "learning_rate": 7.219248408515299e-05, "loss": 0.6992, "step": 21063 }, { "epoch": 1.4271969645639948, "grad_norm": 6.790221691131592, "learning_rate": 7.219111506605517e-05, "loss": 0.4775, "step": 21064 }, { "epoch": 1.427264719831967, "grad_norm": 5.794742584228516, "learning_rate": 7.218974604695736e-05, "loss": 0.6822, "step": 21065 }, { "epoch": 1.427332475099939, "grad_norm": 4.285547733306885, "learning_rate": 7.218837702785954e-05, "loss": 0.5612, "step": 21066 }, { "epoch": 1.427400230367911, "grad_norm": 6.383194923400879, "learning_rate": 7.218700800876172e-05, "loss": 0.5671, "step": 21067 }, { "epoch": 1.4274679856358832, "grad_norm": 6.901458740234375, "learning_rate": 7.21856389896639e-05, "loss": 0.6394, "step": 21068 }, { "epoch": 1.4275357409038554, "grad_norm": 4.552248954772949, "learning_rate": 7.218426997056608e-05, "loss": 0.6382, "step": 21069 }, { "epoch": 1.4276034961718274, "grad_norm": 9.202725410461426, "learning_rate": 7.218290095146828e-05, "loss": 0.6549, "step": 21070 }, { "epoch": 1.4276712514397993, "grad_norm": 5.938570499420166, "learning_rate": 7.218153193237046e-05, "loss": 0.6555, "step": 21071 }, { "epoch": 1.4277390067077715, "grad_norm": 4.486370086669922, "learning_rate": 7.218016291327264e-05, "loss": 0.665, "step": 21072 }, { "epoch": 1.4278067619757437, "grad_norm": 6.489226341247559, "learning_rate": 7.217879389417482e-05, "loss": 0.4231, "step": 21073 }, { "epoch": 1.4278745172437157, "grad_norm": 5.363373756408691, "learning_rate": 7.217742487507701e-05, "loss": 0.506, "step": 21074 }, { "epoch": 1.4279422725116877, "grad_norm": 7.409879684448242, "learning_rate": 7.217605585597919e-05, "loss": 0.7561, "step": 21075 }, { "epoch": 1.42801002777966, "grad_norm": 5.223995208740234, "learning_rate": 7.217468683688137e-05, "loss": 0.7095, "step": 21076 }, { "epoch": 1.4280777830476319, "grad_norm": 5.4456024169921875, "learning_rate": 7.217331781778355e-05, "loss": 0.7571, "step": 21077 }, { "epoch": 1.428145538315604, "grad_norm": 6.117336750030518, "learning_rate": 7.217194879868573e-05, "loss": 0.9282, "step": 21078 }, { "epoch": 1.428213293583576, "grad_norm": 6.027008533477783, "learning_rate": 7.217057977958793e-05, "loss": 0.9104, "step": 21079 }, { "epoch": 1.4282810488515483, "grad_norm": 6.733952522277832, "learning_rate": 7.216921076049011e-05, "loss": 0.6074, "step": 21080 }, { "epoch": 1.4283488041195203, "grad_norm": 9.826337814331055, "learning_rate": 7.216784174139229e-05, "loss": 0.6386, "step": 21081 }, { "epoch": 1.4284165593874925, "grad_norm": 6.7357354164123535, "learning_rate": 7.216647272229448e-05, "loss": 0.8716, "step": 21082 }, { "epoch": 1.4284843146554644, "grad_norm": 4.6543049812316895, "learning_rate": 7.216510370319666e-05, "loss": 0.4437, "step": 21083 }, { "epoch": 1.4285520699234366, "grad_norm": 4.747936725616455, "learning_rate": 7.216373468409884e-05, "loss": 0.6969, "step": 21084 }, { "epoch": 1.4286198251914086, "grad_norm": 7.747589588165283, "learning_rate": 7.216236566500104e-05, "loss": 0.6968, "step": 21085 }, { "epoch": 1.4286875804593806, "grad_norm": 6.2851033210754395, "learning_rate": 7.216099664590322e-05, "loss": 0.63, "step": 21086 }, { "epoch": 1.4287553357273528, "grad_norm": 6.276386737823486, "learning_rate": 7.21596276268054e-05, "loss": 0.5077, "step": 21087 }, { "epoch": 1.428823090995325, "grad_norm": 7.910027027130127, "learning_rate": 7.215825860770759e-05, "loss": 0.7728, "step": 21088 }, { "epoch": 1.428890846263297, "grad_norm": 5.592106819152832, "learning_rate": 7.215688958860977e-05, "loss": 0.7896, "step": 21089 }, { "epoch": 1.428958601531269, "grad_norm": 4.674643516540527, "learning_rate": 7.215552056951195e-05, "loss": 0.5938, "step": 21090 }, { "epoch": 1.4290263567992412, "grad_norm": 6.988621711730957, "learning_rate": 7.215415155041413e-05, "loss": 0.5456, "step": 21091 }, { "epoch": 1.4290941120672132, "grad_norm": 6.991052150726318, "learning_rate": 7.215278253131633e-05, "loss": 0.5604, "step": 21092 }, { "epoch": 1.4291618673351854, "grad_norm": 6.325405597686768, "learning_rate": 7.21514135122185e-05, "loss": 0.6131, "step": 21093 }, { "epoch": 1.4292296226031573, "grad_norm": 5.61056661605835, "learning_rate": 7.215004449312069e-05, "loss": 0.7938, "step": 21094 }, { "epoch": 1.4292973778711295, "grad_norm": 4.854368686676025, "learning_rate": 7.214867547402287e-05, "loss": 0.6432, "step": 21095 }, { "epoch": 1.4293651331391015, "grad_norm": 8.328941345214844, "learning_rate": 7.214730645492505e-05, "loss": 0.5885, "step": 21096 }, { "epoch": 1.4294328884070737, "grad_norm": 8.183196067810059, "learning_rate": 7.214593743582724e-05, "loss": 0.588, "step": 21097 }, { "epoch": 1.4295006436750457, "grad_norm": 4.519683837890625, "learning_rate": 7.214456841672942e-05, "loss": 0.762, "step": 21098 }, { "epoch": 1.429568398943018, "grad_norm": 4.574007034301758, "learning_rate": 7.21431993976316e-05, "loss": 0.7192, "step": 21099 }, { "epoch": 1.4296361542109899, "grad_norm": 5.079801082611084, "learning_rate": 7.214183037853378e-05, "loss": 0.6321, "step": 21100 }, { "epoch": 1.4297039094789619, "grad_norm": 5.091111660003662, "learning_rate": 7.214046135943596e-05, "loss": 0.6138, "step": 21101 }, { "epoch": 1.429771664746934, "grad_norm": 6.896608829498291, "learning_rate": 7.213909234033816e-05, "loss": 0.6835, "step": 21102 }, { "epoch": 1.4298394200149063, "grad_norm": 8.103140830993652, "learning_rate": 7.213772332124034e-05, "loss": 0.6485, "step": 21103 }, { "epoch": 1.4299071752828783, "grad_norm": 6.542810916900635, "learning_rate": 7.213635430214252e-05, "loss": 0.6538, "step": 21104 }, { "epoch": 1.4299749305508502, "grad_norm": 6.190620422363281, "learning_rate": 7.21349852830447e-05, "loss": 0.968, "step": 21105 }, { "epoch": 1.4300426858188224, "grad_norm": 6.705104351043701, "learning_rate": 7.213361626394689e-05, "loss": 0.5961, "step": 21106 }, { "epoch": 1.4301104410867946, "grad_norm": 10.238106727600098, "learning_rate": 7.213224724484907e-05, "loss": 0.5905, "step": 21107 }, { "epoch": 1.4301781963547666, "grad_norm": 4.657444000244141, "learning_rate": 7.213087822575125e-05, "loss": 0.5481, "step": 21108 }, { "epoch": 1.4302459516227386, "grad_norm": 5.184266090393066, "learning_rate": 7.212950920665343e-05, "loss": 0.7042, "step": 21109 }, { "epoch": 1.4303137068907108, "grad_norm": 6.505661964416504, "learning_rate": 7.212814018755561e-05, "loss": 0.7288, "step": 21110 }, { "epoch": 1.4303814621586828, "grad_norm": 4.835206031799316, "learning_rate": 7.212677116845781e-05, "loss": 0.5517, "step": 21111 }, { "epoch": 1.430449217426655, "grad_norm": 8.904987335205078, "learning_rate": 7.212540214935999e-05, "loss": 0.7303, "step": 21112 }, { "epoch": 1.430516972694627, "grad_norm": 5.237167835235596, "learning_rate": 7.212403313026217e-05, "loss": 0.6446, "step": 21113 }, { "epoch": 1.4305847279625992, "grad_norm": 3.8788509368896484, "learning_rate": 7.212266411116435e-05, "loss": 0.5377, "step": 21114 }, { "epoch": 1.4306524832305711, "grad_norm": 6.170337200164795, "learning_rate": 7.212129509206654e-05, "loss": 0.6308, "step": 21115 }, { "epoch": 1.4307202384985431, "grad_norm": 5.524146556854248, "learning_rate": 7.211992607296872e-05, "loss": 0.8881, "step": 21116 }, { "epoch": 1.4307879937665153, "grad_norm": 4.382133960723877, "learning_rate": 7.21185570538709e-05, "loss": 0.5643, "step": 21117 }, { "epoch": 1.4308557490344875, "grad_norm": 6.607341766357422, "learning_rate": 7.211718803477308e-05, "loss": 0.5069, "step": 21118 }, { "epoch": 1.4309235043024595, "grad_norm": 6.404487133026123, "learning_rate": 7.211581901567526e-05, "loss": 0.6864, "step": 21119 }, { "epoch": 1.4309912595704315, "grad_norm": 5.2509965896606445, "learning_rate": 7.211444999657746e-05, "loss": 0.6382, "step": 21120 }, { "epoch": 1.4310590148384037, "grad_norm": 5.5533270835876465, "learning_rate": 7.211308097747964e-05, "loss": 0.605, "step": 21121 }, { "epoch": 1.431126770106376, "grad_norm": 7.135275363922119, "learning_rate": 7.211171195838182e-05, "loss": 0.8771, "step": 21122 }, { "epoch": 1.4311945253743479, "grad_norm": 4.881271839141846, "learning_rate": 7.2110342939284e-05, "loss": 0.6335, "step": 21123 }, { "epoch": 1.4312622806423199, "grad_norm": 5.305984020233154, "learning_rate": 7.210897392018618e-05, "loss": 0.7048, "step": 21124 }, { "epoch": 1.431330035910292, "grad_norm": 5.371762752532959, "learning_rate": 7.210760490108837e-05, "loss": 0.7604, "step": 21125 }, { "epoch": 1.431397791178264, "grad_norm": 8.191851615905762, "learning_rate": 7.210623588199055e-05, "loss": 0.7249, "step": 21126 }, { "epoch": 1.4314655464462362, "grad_norm": 5.696255683898926, "learning_rate": 7.210486686289273e-05, "loss": 0.8243, "step": 21127 }, { "epoch": 1.4315333017142082, "grad_norm": 5.093338489532471, "learning_rate": 7.210349784379493e-05, "loss": 0.578, "step": 21128 }, { "epoch": 1.4316010569821804, "grad_norm": 8.961396217346191, "learning_rate": 7.210212882469711e-05, "loss": 0.6078, "step": 21129 }, { "epoch": 1.4316688122501524, "grad_norm": 7.741024017333984, "learning_rate": 7.210075980559929e-05, "loss": 0.6768, "step": 21130 }, { "epoch": 1.4317365675181246, "grad_norm": 7.679696083068848, "learning_rate": 7.209939078650148e-05, "loss": 0.8208, "step": 21131 }, { "epoch": 1.4318043227860966, "grad_norm": 5.185970306396484, "learning_rate": 7.209802176740366e-05, "loss": 0.6193, "step": 21132 }, { "epoch": 1.4318720780540688, "grad_norm": 4.613734245300293, "learning_rate": 7.209665274830584e-05, "loss": 0.6289, "step": 21133 }, { "epoch": 1.4319398333220408, "grad_norm": 6.617764949798584, "learning_rate": 7.209528372920804e-05, "loss": 0.6279, "step": 21134 }, { "epoch": 1.4320075885900128, "grad_norm": 4.782965660095215, "learning_rate": 7.209391471011022e-05, "loss": 0.5293, "step": 21135 }, { "epoch": 1.432075343857985, "grad_norm": 5.865476131439209, "learning_rate": 7.20925456910124e-05, "loss": 0.7416, "step": 21136 }, { "epoch": 1.4321430991259572, "grad_norm": 6.476940631866455, "learning_rate": 7.209117667191458e-05, "loss": 0.8098, "step": 21137 }, { "epoch": 1.4322108543939291, "grad_norm": 7.449859142303467, "learning_rate": 7.208980765281677e-05, "loss": 0.913, "step": 21138 }, { "epoch": 1.4322786096619011, "grad_norm": 5.35452938079834, "learning_rate": 7.208843863371895e-05, "loss": 0.573, "step": 21139 }, { "epoch": 1.4323463649298733, "grad_norm": 6.679113388061523, "learning_rate": 7.208706961462113e-05, "loss": 0.7831, "step": 21140 }, { "epoch": 1.4324141201978453, "grad_norm": 5.533960819244385, "learning_rate": 7.208570059552331e-05, "loss": 0.7397, "step": 21141 }, { "epoch": 1.4324818754658175, "grad_norm": 5.350231170654297, "learning_rate": 7.208433157642549e-05, "loss": 0.5432, "step": 21142 }, { "epoch": 1.4325496307337895, "grad_norm": 5.167989730834961, "learning_rate": 7.208296255732769e-05, "loss": 0.605, "step": 21143 }, { "epoch": 1.4326173860017617, "grad_norm": 6.623338222503662, "learning_rate": 7.208159353822987e-05, "loss": 0.6385, "step": 21144 }, { "epoch": 1.4326851412697337, "grad_norm": 4.660898208618164, "learning_rate": 7.208022451913205e-05, "loss": 0.6419, "step": 21145 }, { "epoch": 1.4327528965377059, "grad_norm": 6.757255554199219, "learning_rate": 7.207885550003423e-05, "loss": 0.5952, "step": 21146 }, { "epoch": 1.4328206518056779, "grad_norm": 6.952462673187256, "learning_rate": 7.207748648093641e-05, "loss": 0.6762, "step": 21147 }, { "epoch": 1.43288840707365, "grad_norm": 4.940800666809082, "learning_rate": 7.20761174618386e-05, "loss": 0.6075, "step": 21148 }, { "epoch": 1.432956162341622, "grad_norm": 4.9300737380981445, "learning_rate": 7.207474844274078e-05, "loss": 0.7968, "step": 21149 }, { "epoch": 1.433023917609594, "grad_norm": 5.669771671295166, "learning_rate": 7.207337942364296e-05, "loss": 0.7014, "step": 21150 }, { "epoch": 1.4330916728775662, "grad_norm": 5.027920722961426, "learning_rate": 7.207201040454514e-05, "loss": 0.5731, "step": 21151 }, { "epoch": 1.4331594281455384, "grad_norm": 6.903927326202393, "learning_rate": 7.207064138544734e-05, "loss": 0.5867, "step": 21152 }, { "epoch": 1.4332271834135104, "grad_norm": 4.141942024230957, "learning_rate": 7.206927236634952e-05, "loss": 0.6488, "step": 21153 }, { "epoch": 1.4332949386814824, "grad_norm": 7.647695541381836, "learning_rate": 7.20679033472517e-05, "loss": 0.6702, "step": 21154 }, { "epoch": 1.4333626939494546, "grad_norm": 7.3778157234191895, "learning_rate": 7.206653432815388e-05, "loss": 0.7178, "step": 21155 }, { "epoch": 1.4334304492174266, "grad_norm": 5.953479766845703, "learning_rate": 7.206516530905606e-05, "loss": 0.5641, "step": 21156 }, { "epoch": 1.4334982044853988, "grad_norm": 6.025650501251221, "learning_rate": 7.206379628995825e-05, "loss": 0.4404, "step": 21157 }, { "epoch": 1.4335659597533708, "grad_norm": 7.285032272338867, "learning_rate": 7.206242727086043e-05, "loss": 0.7826, "step": 21158 }, { "epoch": 1.433633715021343, "grad_norm": 5.807623863220215, "learning_rate": 7.206105825176261e-05, "loss": 0.7233, "step": 21159 }, { "epoch": 1.433701470289315, "grad_norm": 6.043973445892334, "learning_rate": 7.205968923266479e-05, "loss": 0.7125, "step": 21160 }, { "epoch": 1.4337692255572871, "grad_norm": 8.424530029296875, "learning_rate": 7.205832021356699e-05, "loss": 0.7928, "step": 21161 }, { "epoch": 1.4338369808252591, "grad_norm": 5.638418674468994, "learning_rate": 7.205695119446917e-05, "loss": 0.5748, "step": 21162 }, { "epoch": 1.4339047360932313, "grad_norm": 4.747271537780762, "learning_rate": 7.205558217537135e-05, "loss": 0.7156, "step": 21163 }, { "epoch": 1.4339724913612033, "grad_norm": 3.693513870239258, "learning_rate": 7.205421315627353e-05, "loss": 0.6042, "step": 21164 }, { "epoch": 1.4340402466291753, "grad_norm": 4.9900078773498535, "learning_rate": 7.205284413717571e-05, "loss": 0.7026, "step": 21165 }, { "epoch": 1.4341080018971475, "grad_norm": 8.090832710266113, "learning_rate": 7.20514751180779e-05, "loss": 0.8607, "step": 21166 }, { "epoch": 1.4341757571651197, "grad_norm": 16.044631958007812, "learning_rate": 7.205010609898008e-05, "loss": 0.8713, "step": 21167 }, { "epoch": 1.4342435124330917, "grad_norm": 9.392417907714844, "learning_rate": 7.204873707988226e-05, "loss": 0.4984, "step": 21168 }, { "epoch": 1.4343112677010637, "grad_norm": 6.314112663269043, "learning_rate": 7.204736806078444e-05, "loss": 0.7425, "step": 21169 }, { "epoch": 1.4343790229690359, "grad_norm": 4.784892559051514, "learning_rate": 7.204599904168664e-05, "loss": 0.4962, "step": 21170 }, { "epoch": 1.434446778237008, "grad_norm": 4.448654651641846, "learning_rate": 7.204463002258882e-05, "loss": 0.568, "step": 21171 }, { "epoch": 1.43451453350498, "grad_norm": 7.133582592010498, "learning_rate": 7.2043261003491e-05, "loss": 0.6423, "step": 21172 }, { "epoch": 1.434582288772952, "grad_norm": 5.735981464385986, "learning_rate": 7.204189198439318e-05, "loss": 0.6092, "step": 21173 }, { "epoch": 1.4346500440409242, "grad_norm": 8.889864921569824, "learning_rate": 7.204052296529537e-05, "loss": 0.8187, "step": 21174 }, { "epoch": 1.4347177993088962, "grad_norm": 6.37787389755249, "learning_rate": 7.203915394619755e-05, "loss": 0.4332, "step": 21175 }, { "epoch": 1.4347855545768684, "grad_norm": 6.0002827644348145, "learning_rate": 7.203778492709973e-05, "loss": 0.6072, "step": 21176 }, { "epoch": 1.4348533098448404, "grad_norm": 6.082003116607666, "learning_rate": 7.203641590800193e-05, "loss": 0.7488, "step": 21177 }, { "epoch": 1.4349210651128126, "grad_norm": 5.181985855102539, "learning_rate": 7.20350468889041e-05, "loss": 0.5385, "step": 21178 }, { "epoch": 1.4349888203807846, "grad_norm": 11.087087631225586, "learning_rate": 7.203367786980629e-05, "loss": 0.589, "step": 21179 }, { "epoch": 1.4350565756487568, "grad_norm": 4.165190696716309, "learning_rate": 7.203230885070848e-05, "loss": 0.4613, "step": 21180 }, { "epoch": 1.4351243309167288, "grad_norm": 6.350247383117676, "learning_rate": 7.203093983161066e-05, "loss": 0.6879, "step": 21181 }, { "epoch": 1.435192086184701, "grad_norm": 5.48975133895874, "learning_rate": 7.202957081251284e-05, "loss": 0.6021, "step": 21182 }, { "epoch": 1.435259841452673, "grad_norm": 6.886169910430908, "learning_rate": 7.202820179341502e-05, "loss": 0.7094, "step": 21183 }, { "epoch": 1.435327596720645, "grad_norm": 5.148533344268799, "learning_rate": 7.202683277431722e-05, "loss": 0.8385, "step": 21184 }, { "epoch": 1.4353953519886171, "grad_norm": 5.972015380859375, "learning_rate": 7.20254637552194e-05, "loss": 0.8212, "step": 21185 }, { "epoch": 1.4354631072565893, "grad_norm": 7.750905513763428, "learning_rate": 7.202409473612158e-05, "loss": 0.9002, "step": 21186 }, { "epoch": 1.4355308625245613, "grad_norm": 4.735476016998291, "learning_rate": 7.202272571702376e-05, "loss": 0.4959, "step": 21187 }, { "epoch": 1.4355986177925333, "grad_norm": 5.8669257164001465, "learning_rate": 7.202135669792594e-05, "loss": 0.7505, "step": 21188 }, { "epoch": 1.4356663730605055, "grad_norm": 5.902633190155029, "learning_rate": 7.201998767882813e-05, "loss": 0.6764, "step": 21189 }, { "epoch": 1.4357341283284775, "grad_norm": 5.887857437133789, "learning_rate": 7.201861865973031e-05, "loss": 0.6059, "step": 21190 }, { "epoch": 1.4358018835964497, "grad_norm": 4.67966890335083, "learning_rate": 7.201724964063249e-05, "loss": 0.6141, "step": 21191 }, { "epoch": 1.4358696388644216, "grad_norm": 5.472180366516113, "learning_rate": 7.201588062153467e-05, "loss": 0.7045, "step": 21192 }, { "epoch": 1.4359373941323939, "grad_norm": 5.777398586273193, "learning_rate": 7.201451160243687e-05, "loss": 0.6492, "step": 21193 }, { "epoch": 1.4360051494003658, "grad_norm": 7.0353522300720215, "learning_rate": 7.201314258333905e-05, "loss": 1.0177, "step": 21194 }, { "epoch": 1.436072904668338, "grad_norm": 5.9157633781433105, "learning_rate": 7.201177356424123e-05, "loss": 0.8047, "step": 21195 }, { "epoch": 1.43614065993631, "grad_norm": 7.532500267028809, "learning_rate": 7.201040454514341e-05, "loss": 0.556, "step": 21196 }, { "epoch": 1.4362084152042822, "grad_norm": 5.959936618804932, "learning_rate": 7.200903552604559e-05, "loss": 0.5368, "step": 21197 }, { "epoch": 1.4362761704722542, "grad_norm": 7.159371852874756, "learning_rate": 7.200766650694778e-05, "loss": 0.596, "step": 21198 }, { "epoch": 1.4363439257402262, "grad_norm": 5.048837184906006, "learning_rate": 7.200629748784996e-05, "loss": 0.5654, "step": 21199 }, { "epoch": 1.4364116810081984, "grad_norm": 13.126113891601562, "learning_rate": 7.200492846875214e-05, "loss": 0.6373, "step": 21200 }, { "epoch": 1.4364794362761706, "grad_norm": 5.150922775268555, "learning_rate": 7.200355944965432e-05, "loss": 0.6435, "step": 21201 }, { "epoch": 1.4365471915441426, "grad_norm": 8.846650123596191, "learning_rate": 7.20021904305565e-05, "loss": 0.4893, "step": 21202 }, { "epoch": 1.4366149468121145, "grad_norm": 5.044472694396973, "learning_rate": 7.20008214114587e-05, "loss": 0.8581, "step": 21203 }, { "epoch": 1.4366827020800867, "grad_norm": 5.029001235961914, "learning_rate": 7.199945239236088e-05, "loss": 0.8179, "step": 21204 }, { "epoch": 1.4367504573480587, "grad_norm": 5.449691295623779, "learning_rate": 7.199808337326306e-05, "loss": 0.5579, "step": 21205 }, { "epoch": 1.436818212616031, "grad_norm": 5.1544928550720215, "learning_rate": 7.199671435416524e-05, "loss": 0.6589, "step": 21206 }, { "epoch": 1.436885967884003, "grad_norm": 6.028853416442871, "learning_rate": 7.199534533506743e-05, "loss": 0.6526, "step": 21207 }, { "epoch": 1.4369537231519751, "grad_norm": 4.992793083190918, "learning_rate": 7.199397631596961e-05, "loss": 0.6272, "step": 21208 }, { "epoch": 1.437021478419947, "grad_norm": 6.825835227966309, "learning_rate": 7.199260729687179e-05, "loss": 0.5573, "step": 21209 }, { "epoch": 1.4370892336879193, "grad_norm": 5.997669219970703, "learning_rate": 7.199123827777397e-05, "loss": 0.5548, "step": 21210 }, { "epoch": 1.4371569889558913, "grad_norm": 6.456986904144287, "learning_rate": 7.198986925867615e-05, "loss": 0.6997, "step": 21211 }, { "epoch": 1.4372247442238635, "grad_norm": 6.8386993408203125, "learning_rate": 7.198850023957835e-05, "loss": 0.5905, "step": 21212 }, { "epoch": 1.4372924994918355, "grad_norm": 7.1268205642700195, "learning_rate": 7.198713122048053e-05, "loss": 0.6743, "step": 21213 }, { "epoch": 1.4373602547598074, "grad_norm": 6.348610877990723, "learning_rate": 7.198576220138271e-05, "loss": 0.5171, "step": 21214 }, { "epoch": 1.4374280100277796, "grad_norm": 5.237998962402344, "learning_rate": 7.198439318228489e-05, "loss": 0.4502, "step": 21215 }, { "epoch": 1.4374957652957518, "grad_norm": 4.914703369140625, "learning_rate": 7.198302416318708e-05, "loss": 0.8431, "step": 21216 }, { "epoch": 1.4375635205637238, "grad_norm": 5.007193565368652, "learning_rate": 7.198165514408926e-05, "loss": 0.6131, "step": 21217 }, { "epoch": 1.4376312758316958, "grad_norm": 6.912498474121094, "learning_rate": 7.198028612499144e-05, "loss": 0.5511, "step": 21218 }, { "epoch": 1.437699031099668, "grad_norm": 6.885551929473877, "learning_rate": 7.197891710589362e-05, "loss": 0.5698, "step": 21219 }, { "epoch": 1.4377667863676402, "grad_norm": 4.053386211395264, "learning_rate": 7.197754808679582e-05, "loss": 0.6267, "step": 21220 }, { "epoch": 1.4378345416356122, "grad_norm": 5.894219875335693, "learning_rate": 7.1976179067698e-05, "loss": 0.7779, "step": 21221 }, { "epoch": 1.4379022969035842, "grad_norm": 7.574132442474365, "learning_rate": 7.197481004860018e-05, "loss": 0.4751, "step": 21222 }, { "epoch": 1.4379700521715564, "grad_norm": 9.557796478271484, "learning_rate": 7.197344102950237e-05, "loss": 0.7214, "step": 21223 }, { "epoch": 1.4380378074395284, "grad_norm": 6.987293243408203, "learning_rate": 7.197207201040455e-05, "loss": 0.9853, "step": 21224 }, { "epoch": 1.4381055627075006, "grad_norm": 6.196108818054199, "learning_rate": 7.197070299130673e-05, "loss": 0.7368, "step": 21225 }, { "epoch": 1.4381733179754725, "grad_norm": 6.757913112640381, "learning_rate": 7.196933397220893e-05, "loss": 1.0114, "step": 21226 }, { "epoch": 1.4382410732434447, "grad_norm": 7.3919148445129395, "learning_rate": 7.19679649531111e-05, "loss": 0.5428, "step": 21227 }, { "epoch": 1.4383088285114167, "grad_norm": 5.638350009918213, "learning_rate": 7.196659593401329e-05, "loss": 0.6207, "step": 21228 }, { "epoch": 1.438376583779389, "grad_norm": 4.884724140167236, "learning_rate": 7.196522691491547e-05, "loss": 0.7389, "step": 21229 }, { "epoch": 1.438444339047361, "grad_norm": 6.233888626098633, "learning_rate": 7.196385789581766e-05, "loss": 0.6551, "step": 21230 }, { "epoch": 1.4385120943153331, "grad_norm": 7.0116658210754395, "learning_rate": 7.196248887671984e-05, "loss": 0.9649, "step": 21231 }, { "epoch": 1.438579849583305, "grad_norm": 8.309822082519531, "learning_rate": 7.196111985762202e-05, "loss": 0.6096, "step": 21232 }, { "epoch": 1.438647604851277, "grad_norm": 5.920278549194336, "learning_rate": 7.19597508385242e-05, "loss": 0.8809, "step": 21233 }, { "epoch": 1.4387153601192493, "grad_norm": 6.738556861877441, "learning_rate": 7.195838181942638e-05, "loss": 0.5823, "step": 21234 }, { "epoch": 1.4387831153872215, "grad_norm": 5.840603828430176, "learning_rate": 7.195701280032858e-05, "loss": 0.7693, "step": 21235 }, { "epoch": 1.4388508706551935, "grad_norm": 9.781538963317871, "learning_rate": 7.195564378123076e-05, "loss": 0.7189, "step": 21236 }, { "epoch": 1.4389186259231654, "grad_norm": 5.143826007843018, "learning_rate": 7.195427476213294e-05, "loss": 0.5816, "step": 21237 }, { "epoch": 1.4389863811911376, "grad_norm": 6.868589878082275, "learning_rate": 7.195290574303512e-05, "loss": 0.5857, "step": 21238 }, { "epoch": 1.4390541364591096, "grad_norm": 5.530706882476807, "learning_rate": 7.195153672393731e-05, "loss": 0.5244, "step": 21239 }, { "epoch": 1.4391218917270818, "grad_norm": 6.920599460601807, "learning_rate": 7.195016770483949e-05, "loss": 0.8332, "step": 21240 }, { "epoch": 1.4391896469950538, "grad_norm": 7.344499111175537, "learning_rate": 7.194879868574167e-05, "loss": 0.7575, "step": 21241 }, { "epoch": 1.439257402263026, "grad_norm": 6.560253143310547, "learning_rate": 7.194742966664385e-05, "loss": 0.6889, "step": 21242 }, { "epoch": 1.439325157530998, "grad_norm": 5.398524284362793, "learning_rate": 7.194606064754603e-05, "loss": 0.6509, "step": 21243 }, { "epoch": 1.4393929127989702, "grad_norm": 5.487909317016602, "learning_rate": 7.194469162844823e-05, "loss": 0.6188, "step": 21244 }, { "epoch": 1.4394606680669422, "grad_norm": 5.543613433837891, "learning_rate": 7.19433226093504e-05, "loss": 0.659, "step": 21245 }, { "epoch": 1.4395284233349144, "grad_norm": 3.9932565689086914, "learning_rate": 7.194195359025259e-05, "loss": 0.7617, "step": 21246 }, { "epoch": 1.4395961786028864, "grad_norm": 4.393940448760986, "learning_rate": 7.194058457115477e-05, "loss": 0.6201, "step": 21247 }, { "epoch": 1.4396639338708583, "grad_norm": 6.6489577293396, "learning_rate": 7.193921555205696e-05, "loss": 0.6944, "step": 21248 }, { "epoch": 1.4397316891388305, "grad_norm": 4.839058876037598, "learning_rate": 7.193784653295914e-05, "loss": 0.7311, "step": 21249 }, { "epoch": 1.4397994444068027, "grad_norm": 6.997048377990723, "learning_rate": 7.193647751386132e-05, "loss": 0.7038, "step": 21250 }, { "epoch": 1.4398671996747747, "grad_norm": 4.0679473876953125, "learning_rate": 7.19351084947635e-05, "loss": 0.5096, "step": 21251 }, { "epoch": 1.4399349549427467, "grad_norm": 6.029549598693848, "learning_rate": 7.193373947566568e-05, "loss": 0.594, "step": 21252 }, { "epoch": 1.440002710210719, "grad_norm": 6.124058723449707, "learning_rate": 7.193237045656788e-05, "loss": 0.719, "step": 21253 }, { "epoch": 1.4400704654786909, "grad_norm": 5.385005474090576, "learning_rate": 7.193100143747006e-05, "loss": 0.7818, "step": 21254 }, { "epoch": 1.440138220746663, "grad_norm": 4.61655855178833, "learning_rate": 7.192963241837224e-05, "loss": 0.6185, "step": 21255 }, { "epoch": 1.440205976014635, "grad_norm": 3.92849063873291, "learning_rate": 7.192826339927442e-05, "loss": 0.5806, "step": 21256 }, { "epoch": 1.4402737312826073, "grad_norm": 5.654826641082764, "learning_rate": 7.19268943801766e-05, "loss": 0.5131, "step": 21257 }, { "epoch": 1.4403414865505793, "grad_norm": 5.279784202575684, "learning_rate": 7.192552536107879e-05, "loss": 0.5916, "step": 21258 }, { "epoch": 1.4404092418185515, "grad_norm": 7.348697662353516, "learning_rate": 7.192415634198097e-05, "loss": 0.757, "step": 21259 }, { "epoch": 1.4404769970865234, "grad_norm": 4.188069820404053, "learning_rate": 7.192278732288315e-05, "loss": 0.4298, "step": 21260 }, { "epoch": 1.4405447523544956, "grad_norm": 5.865291595458984, "learning_rate": 7.192141830378533e-05, "loss": 0.4883, "step": 21261 }, { "epoch": 1.4406125076224676, "grad_norm": 6.24110746383667, "learning_rate": 7.192004928468753e-05, "loss": 0.6933, "step": 21262 }, { "epoch": 1.4406802628904396, "grad_norm": 6.939300537109375, "learning_rate": 7.19186802655897e-05, "loss": 0.6908, "step": 21263 }, { "epoch": 1.4407480181584118, "grad_norm": 6.803098678588867, "learning_rate": 7.191731124649189e-05, "loss": 0.5736, "step": 21264 }, { "epoch": 1.440815773426384, "grad_norm": 7.103739261627197, "learning_rate": 7.191594222739407e-05, "loss": 0.7911, "step": 21265 }, { "epoch": 1.440883528694356, "grad_norm": 4.161977291107178, "learning_rate": 7.191457320829625e-05, "loss": 0.5159, "step": 21266 }, { "epoch": 1.440951283962328, "grad_norm": 6.345305919647217, "learning_rate": 7.191320418919844e-05, "loss": 0.6748, "step": 21267 }, { "epoch": 1.4410190392303002, "grad_norm": 6.25006103515625, "learning_rate": 7.191183517010062e-05, "loss": 0.7441, "step": 21268 }, { "epoch": 1.4410867944982724, "grad_norm": 5.145815372467041, "learning_rate": 7.19104661510028e-05, "loss": 0.9288, "step": 21269 }, { "epoch": 1.4411545497662444, "grad_norm": 6.978854656219482, "learning_rate": 7.1909097131905e-05, "loss": 0.5707, "step": 21270 }, { "epoch": 1.4412223050342163, "grad_norm": 6.014636516571045, "learning_rate": 7.190772811280718e-05, "loss": 0.7374, "step": 21271 }, { "epoch": 1.4412900603021885, "grad_norm": 5.1946702003479, "learning_rate": 7.190635909370936e-05, "loss": 0.7277, "step": 21272 }, { "epoch": 1.4413578155701605, "grad_norm": 5.300602436065674, "learning_rate": 7.190499007461155e-05, "loss": 0.6284, "step": 21273 }, { "epoch": 1.4414255708381327, "grad_norm": 5.362601280212402, "learning_rate": 7.190362105551373e-05, "loss": 0.7664, "step": 21274 }, { "epoch": 1.4414933261061047, "grad_norm": 5.474440574645996, "learning_rate": 7.190225203641591e-05, "loss": 0.6289, "step": 21275 }, { "epoch": 1.441561081374077, "grad_norm": 7.920563697814941, "learning_rate": 7.19008830173181e-05, "loss": 0.8955, "step": 21276 }, { "epoch": 1.4416288366420489, "grad_norm": 5.887422561645508, "learning_rate": 7.189951399822029e-05, "loss": 0.8632, "step": 21277 }, { "epoch": 1.441696591910021, "grad_norm": 5.438183784484863, "learning_rate": 7.189814497912247e-05, "loss": 0.5375, "step": 21278 }, { "epoch": 1.441764347177993, "grad_norm": 5.824024200439453, "learning_rate": 7.189677596002465e-05, "loss": 0.7674, "step": 21279 }, { "epoch": 1.4418321024459653, "grad_norm": 7.866394519805908, "learning_rate": 7.189540694092683e-05, "loss": 0.82, "step": 21280 }, { "epoch": 1.4418998577139372, "grad_norm": 7.24639892578125, "learning_rate": 7.189403792182902e-05, "loss": 0.5205, "step": 21281 }, { "epoch": 1.4419676129819092, "grad_norm": 4.791794300079346, "learning_rate": 7.18926689027312e-05, "loss": 0.6267, "step": 21282 }, { "epoch": 1.4420353682498814, "grad_norm": 6.42345666885376, "learning_rate": 7.189129988363338e-05, "loss": 0.5976, "step": 21283 }, { "epoch": 1.4421031235178536, "grad_norm": 6.295878887176514, "learning_rate": 7.188993086453556e-05, "loss": 0.848, "step": 21284 }, { "epoch": 1.4421708787858256, "grad_norm": 5.447689056396484, "learning_rate": 7.188856184543776e-05, "loss": 0.7565, "step": 21285 }, { "epoch": 1.4422386340537976, "grad_norm": 7.1255784034729, "learning_rate": 7.188719282633994e-05, "loss": 0.8195, "step": 21286 }, { "epoch": 1.4423063893217698, "grad_norm": 4.20014762878418, "learning_rate": 7.188582380724212e-05, "loss": 0.4992, "step": 21287 }, { "epoch": 1.4423741445897418, "grad_norm": 4.768876552581787, "learning_rate": 7.18844547881443e-05, "loss": 0.5829, "step": 21288 }, { "epoch": 1.442441899857714, "grad_norm": 6.021373748779297, "learning_rate": 7.188308576904648e-05, "loss": 0.7129, "step": 21289 }, { "epoch": 1.442509655125686, "grad_norm": 5.665954113006592, "learning_rate": 7.188171674994867e-05, "loss": 0.9546, "step": 21290 }, { "epoch": 1.4425774103936582, "grad_norm": 5.1718902587890625, "learning_rate": 7.188034773085085e-05, "loss": 0.6683, "step": 21291 }, { "epoch": 1.4426451656616301, "grad_norm": 5.6117095947265625, "learning_rate": 7.187897871175303e-05, "loss": 0.6311, "step": 21292 }, { "epoch": 1.4427129209296023, "grad_norm": 6.450825214385986, "learning_rate": 7.187760969265521e-05, "loss": 0.921, "step": 21293 }, { "epoch": 1.4427806761975743, "grad_norm": 7.070262908935547, "learning_rate": 7.18762406735574e-05, "loss": 0.7268, "step": 21294 }, { "epoch": 1.4428484314655465, "grad_norm": 5.82457160949707, "learning_rate": 7.187487165445959e-05, "loss": 0.9942, "step": 21295 }, { "epoch": 1.4429161867335185, "grad_norm": 6.232304096221924, "learning_rate": 7.187350263536177e-05, "loss": 0.6966, "step": 21296 }, { "epoch": 1.4429839420014905, "grad_norm": 6.437962055206299, "learning_rate": 7.187213361626395e-05, "loss": 0.5982, "step": 21297 }, { "epoch": 1.4430516972694627, "grad_norm": 5.511158466339111, "learning_rate": 7.187076459716613e-05, "loss": 0.706, "step": 21298 }, { "epoch": 1.443119452537435, "grad_norm": 5.531700611114502, "learning_rate": 7.186939557806832e-05, "loss": 0.55, "step": 21299 }, { "epoch": 1.4431872078054069, "grad_norm": 6.174670696258545, "learning_rate": 7.18680265589705e-05, "loss": 0.6449, "step": 21300 }, { "epoch": 1.4432549630733789, "grad_norm": 7.663538455963135, "learning_rate": 7.186665753987268e-05, "loss": 0.7234, "step": 21301 }, { "epoch": 1.443322718341351, "grad_norm": 8.459357261657715, "learning_rate": 7.186528852077486e-05, "loss": 0.6585, "step": 21302 }, { "epoch": 1.443390473609323, "grad_norm": 6.293954849243164, "learning_rate": 7.186391950167706e-05, "loss": 0.6156, "step": 21303 }, { "epoch": 1.4434582288772952, "grad_norm": 5.5088653564453125, "learning_rate": 7.186255048257924e-05, "loss": 0.646, "step": 21304 }, { "epoch": 1.4435259841452672, "grad_norm": 5.386280536651611, "learning_rate": 7.186118146348142e-05, "loss": 0.7327, "step": 21305 }, { "epoch": 1.4435937394132394, "grad_norm": 5.943539619445801, "learning_rate": 7.18598124443836e-05, "loss": 0.7462, "step": 21306 }, { "epoch": 1.4436614946812114, "grad_norm": 5.474667072296143, "learning_rate": 7.185844342528578e-05, "loss": 0.6744, "step": 21307 }, { "epoch": 1.4437292499491836, "grad_norm": 3.9423787593841553, "learning_rate": 7.185707440618797e-05, "loss": 0.6647, "step": 21308 }, { "epoch": 1.4437970052171556, "grad_norm": 6.682604789733887, "learning_rate": 7.185570538709015e-05, "loss": 0.7233, "step": 21309 }, { "epoch": 1.4438647604851278, "grad_norm": 4.800355911254883, "learning_rate": 7.185433636799233e-05, "loss": 0.7767, "step": 21310 }, { "epoch": 1.4439325157530998, "grad_norm": 5.030117988586426, "learning_rate": 7.185296734889451e-05, "loss": 0.5424, "step": 21311 }, { "epoch": 1.4440002710210718, "grad_norm": 5.080108165740967, "learning_rate": 7.185159832979669e-05, "loss": 0.6903, "step": 21312 }, { "epoch": 1.444068026289044, "grad_norm": 4.75273323059082, "learning_rate": 7.185022931069889e-05, "loss": 0.5392, "step": 21313 }, { "epoch": 1.4441357815570162, "grad_norm": 5.595869064331055, "learning_rate": 7.184886029160107e-05, "loss": 0.6488, "step": 21314 }, { "epoch": 1.4442035368249881, "grad_norm": 4.182771682739258, "learning_rate": 7.184749127250325e-05, "loss": 0.5889, "step": 21315 }, { "epoch": 1.4442712920929601, "grad_norm": 5.478609085083008, "learning_rate": 7.184612225340544e-05, "loss": 0.8268, "step": 21316 }, { "epoch": 1.4443390473609323, "grad_norm": 5.896068572998047, "learning_rate": 7.184475323430762e-05, "loss": 0.65, "step": 21317 }, { "epoch": 1.4444068026289045, "grad_norm": 5.2081708908081055, "learning_rate": 7.18433842152098e-05, "loss": 0.6469, "step": 21318 }, { "epoch": 1.4444745578968765, "grad_norm": 5.955501079559326, "learning_rate": 7.1842015196112e-05, "loss": 0.7039, "step": 21319 }, { "epoch": 1.4445423131648485, "grad_norm": 5.313881874084473, "learning_rate": 7.184064617701418e-05, "loss": 0.8276, "step": 21320 }, { "epoch": 1.4446100684328207, "grad_norm": 4.967574119567871, "learning_rate": 7.183927715791636e-05, "loss": 0.5918, "step": 21321 }, { "epoch": 1.4446778237007927, "grad_norm": 5.371888160705566, "learning_rate": 7.183790813881855e-05, "loss": 0.7379, "step": 21322 }, { "epoch": 1.4447455789687649, "grad_norm": 5.456552028656006, "learning_rate": 7.183653911972073e-05, "loss": 0.6821, "step": 21323 }, { "epoch": 1.4448133342367369, "grad_norm": 6.307971954345703, "learning_rate": 7.183517010062291e-05, "loss": 0.6893, "step": 21324 }, { "epoch": 1.444881089504709, "grad_norm": 5.662734031677246, "learning_rate": 7.183380108152509e-05, "loss": 0.5946, "step": 21325 }, { "epoch": 1.444948844772681, "grad_norm": 4.807537078857422, "learning_rate": 7.183243206242728e-05, "loss": 0.6352, "step": 21326 }, { "epoch": 1.4450166000406532, "grad_norm": 4.514939308166504, "learning_rate": 7.183106304332947e-05, "loss": 0.6515, "step": 21327 }, { "epoch": 1.4450843553086252, "grad_norm": 6.383943557739258, "learning_rate": 7.182969402423165e-05, "loss": 0.741, "step": 21328 }, { "epoch": 1.4451521105765974, "grad_norm": 5.06636905670166, "learning_rate": 7.182832500513383e-05, "loss": 0.7215, "step": 21329 }, { "epoch": 1.4452198658445694, "grad_norm": 5.589737892150879, "learning_rate": 7.1826955986036e-05, "loss": 0.7132, "step": 21330 }, { "epoch": 1.4452876211125414, "grad_norm": 6.011363983154297, "learning_rate": 7.18255869669382e-05, "loss": 0.6308, "step": 21331 }, { "epoch": 1.4453553763805136, "grad_norm": 7.630017280578613, "learning_rate": 7.182421794784038e-05, "loss": 0.705, "step": 21332 }, { "epoch": 1.4454231316484858, "grad_norm": 7.289696216583252, "learning_rate": 7.182284892874256e-05, "loss": 0.647, "step": 21333 }, { "epoch": 1.4454908869164578, "grad_norm": 6.248398780822754, "learning_rate": 7.182147990964474e-05, "loss": 0.7157, "step": 21334 }, { "epoch": 1.4455586421844298, "grad_norm": 6.78162145614624, "learning_rate": 7.182011089054692e-05, "loss": 0.8407, "step": 21335 }, { "epoch": 1.445626397452402, "grad_norm": 6.391787052154541, "learning_rate": 7.181874187144912e-05, "loss": 0.6076, "step": 21336 }, { "epoch": 1.445694152720374, "grad_norm": 5.523345947265625, "learning_rate": 7.18173728523513e-05, "loss": 0.5753, "step": 21337 }, { "epoch": 1.4457619079883461, "grad_norm": 5.661657810211182, "learning_rate": 7.181600383325348e-05, "loss": 0.6877, "step": 21338 }, { "epoch": 1.4458296632563181, "grad_norm": 5.605064868927002, "learning_rate": 7.181463481415566e-05, "loss": 0.6788, "step": 21339 }, { "epoch": 1.4458974185242903, "grad_norm": 5.155650615692139, "learning_rate": 7.181326579505785e-05, "loss": 0.7576, "step": 21340 }, { "epoch": 1.4459651737922623, "grad_norm": 9.181048393249512, "learning_rate": 7.181189677596003e-05, "loss": 0.7043, "step": 21341 }, { "epoch": 1.4460329290602345, "grad_norm": 5.101927280426025, "learning_rate": 7.181052775686221e-05, "loss": 0.741, "step": 21342 }, { "epoch": 1.4461006843282065, "grad_norm": 4.9101386070251465, "learning_rate": 7.180915873776439e-05, "loss": 0.6155, "step": 21343 }, { "epoch": 1.4461684395961787, "grad_norm": 5.187961101531982, "learning_rate": 7.180778971866657e-05, "loss": 0.7267, "step": 21344 }, { "epoch": 1.4462361948641507, "grad_norm": 4.419912338256836, "learning_rate": 7.180642069956877e-05, "loss": 0.7263, "step": 21345 }, { "epoch": 1.4463039501321227, "grad_norm": 6.172843933105469, "learning_rate": 7.180505168047095e-05, "loss": 0.7879, "step": 21346 }, { "epoch": 1.4463717054000949, "grad_norm": 4.515700340270996, "learning_rate": 7.180368266137313e-05, "loss": 0.6172, "step": 21347 }, { "epoch": 1.446439460668067, "grad_norm": 6.466606616973877, "learning_rate": 7.18023136422753e-05, "loss": 0.6937, "step": 21348 }, { "epoch": 1.446507215936039, "grad_norm": 4.111232757568359, "learning_rate": 7.18009446231775e-05, "loss": 0.4889, "step": 21349 }, { "epoch": 1.446574971204011, "grad_norm": 9.649117469787598, "learning_rate": 7.179957560407968e-05, "loss": 0.6074, "step": 21350 }, { "epoch": 1.4466427264719832, "grad_norm": 4.857731342315674, "learning_rate": 7.179820658498186e-05, "loss": 0.6253, "step": 21351 }, { "epoch": 1.4467104817399552, "grad_norm": 5.340709686279297, "learning_rate": 7.179683756588404e-05, "loss": 0.9152, "step": 21352 }, { "epoch": 1.4467782370079274, "grad_norm": 4.548199653625488, "learning_rate": 7.179546854678622e-05, "loss": 0.6269, "step": 21353 }, { "epoch": 1.4468459922758994, "grad_norm": 4.821907997131348, "learning_rate": 7.179409952768842e-05, "loss": 0.5049, "step": 21354 }, { "epoch": 1.4469137475438716, "grad_norm": 5.015774250030518, "learning_rate": 7.17927305085906e-05, "loss": 0.6883, "step": 21355 }, { "epoch": 1.4469815028118436, "grad_norm": 5.505579471588135, "learning_rate": 7.179136148949278e-05, "loss": 0.6535, "step": 21356 }, { "epoch": 1.4470492580798158, "grad_norm": 7.182992458343506, "learning_rate": 7.178999247039496e-05, "loss": 0.7676, "step": 21357 }, { "epoch": 1.4471170133477878, "grad_norm": 5.087315559387207, "learning_rate": 7.178862345129715e-05, "loss": 0.5173, "step": 21358 }, { "epoch": 1.44718476861576, "grad_norm": 4.2544264793396, "learning_rate": 7.178725443219933e-05, "loss": 0.666, "step": 21359 }, { "epoch": 1.447252523883732, "grad_norm": 4.1802659034729, "learning_rate": 7.178588541310151e-05, "loss": 0.6231, "step": 21360 }, { "epoch": 1.447320279151704, "grad_norm": 4.579672336578369, "learning_rate": 7.178451639400369e-05, "loss": 0.4737, "step": 21361 }, { "epoch": 1.4473880344196761, "grad_norm": 5.860485553741455, "learning_rate": 7.178314737490589e-05, "loss": 0.551, "step": 21362 }, { "epoch": 1.4474557896876483, "grad_norm": 6.151031970977783, "learning_rate": 7.178177835580807e-05, "loss": 0.7422, "step": 21363 }, { "epoch": 1.4475235449556203, "grad_norm": 4.0939226150512695, "learning_rate": 7.178040933671025e-05, "loss": 0.4576, "step": 21364 }, { "epoch": 1.4475913002235923, "grad_norm": 6.790271759033203, "learning_rate": 7.177904031761244e-05, "loss": 0.7161, "step": 21365 }, { "epoch": 1.4476590554915645, "grad_norm": 6.568310737609863, "learning_rate": 7.177767129851462e-05, "loss": 0.6687, "step": 21366 }, { "epoch": 1.4477268107595367, "grad_norm": 5.083438873291016, "learning_rate": 7.17763022794168e-05, "loss": 0.7691, "step": 21367 }, { "epoch": 1.4477945660275087, "grad_norm": 5.282893657684326, "learning_rate": 7.1774933260319e-05, "loss": 0.6151, "step": 21368 }, { "epoch": 1.4478623212954806, "grad_norm": 4.5835371017456055, "learning_rate": 7.177356424122117e-05, "loss": 0.7995, "step": 21369 }, { "epoch": 1.4479300765634529, "grad_norm": 4.017055034637451, "learning_rate": 7.177219522212336e-05, "loss": 0.6387, "step": 21370 }, { "epoch": 1.4479978318314248, "grad_norm": 7.127228736877441, "learning_rate": 7.177082620302554e-05, "loss": 0.6829, "step": 21371 }, { "epoch": 1.448065587099397, "grad_norm": 6.169859886169434, "learning_rate": 7.176945718392773e-05, "loss": 0.6337, "step": 21372 }, { "epoch": 1.448133342367369, "grad_norm": 7.833038330078125, "learning_rate": 7.176808816482991e-05, "loss": 0.6311, "step": 21373 }, { "epoch": 1.4482010976353412, "grad_norm": 4.922698497772217, "learning_rate": 7.176671914573209e-05, "loss": 0.6777, "step": 21374 }, { "epoch": 1.4482688529033132, "grad_norm": 4.683440208435059, "learning_rate": 7.176535012663427e-05, "loss": 0.6397, "step": 21375 }, { "epoch": 1.4483366081712854, "grad_norm": 6.328221321105957, "learning_rate": 7.176398110753645e-05, "loss": 0.6817, "step": 21376 }, { "epoch": 1.4484043634392574, "grad_norm": 6.748929977416992, "learning_rate": 7.176261208843864e-05, "loss": 0.8091, "step": 21377 }, { "epoch": 1.4484721187072296, "grad_norm": 6.724064350128174, "learning_rate": 7.176124306934083e-05, "loss": 0.7298, "step": 21378 }, { "epoch": 1.4485398739752016, "grad_norm": 5.054249286651611, "learning_rate": 7.1759874050243e-05, "loss": 0.5153, "step": 21379 }, { "epoch": 1.4486076292431735, "grad_norm": 5.5139336585998535, "learning_rate": 7.175850503114519e-05, "loss": 0.7499, "step": 21380 }, { "epoch": 1.4486753845111457, "grad_norm": 9.016888618469238, "learning_rate": 7.175713601204738e-05, "loss": 0.8061, "step": 21381 }, { "epoch": 1.448743139779118, "grad_norm": 4.572885990142822, "learning_rate": 7.175576699294956e-05, "loss": 0.4714, "step": 21382 }, { "epoch": 1.44881089504709, "grad_norm": 5.508910655975342, "learning_rate": 7.175439797385174e-05, "loss": 0.6877, "step": 21383 }, { "epoch": 1.448878650315062, "grad_norm": 4.6175642013549805, "learning_rate": 7.175302895475392e-05, "loss": 0.5095, "step": 21384 }, { "epoch": 1.4489464055830341, "grad_norm": 4.087683200836182, "learning_rate": 7.17516599356561e-05, "loss": 0.7198, "step": 21385 }, { "epoch": 1.449014160851006, "grad_norm": 6.638469219207764, "learning_rate": 7.17502909165583e-05, "loss": 0.4483, "step": 21386 }, { "epoch": 1.4490819161189783, "grad_norm": 6.441884517669678, "learning_rate": 7.174892189746048e-05, "loss": 0.6562, "step": 21387 }, { "epoch": 1.4491496713869503, "grad_norm": 5.646290302276611, "learning_rate": 7.174755287836266e-05, "loss": 0.6136, "step": 21388 }, { "epoch": 1.4492174266549225, "grad_norm": 8.193419456481934, "learning_rate": 7.174618385926484e-05, "loss": 0.6995, "step": 21389 }, { "epoch": 1.4492851819228945, "grad_norm": 9.119156837463379, "learning_rate": 7.174481484016702e-05, "loss": 0.6508, "step": 21390 }, { "epoch": 1.4493529371908667, "grad_norm": 4.711736679077148, "learning_rate": 7.174344582106921e-05, "loss": 0.5769, "step": 21391 }, { "epoch": 1.4494206924588386, "grad_norm": 6.288131237030029, "learning_rate": 7.174207680197139e-05, "loss": 0.4719, "step": 21392 }, { "epoch": 1.4494884477268108, "grad_norm": 7.246135711669922, "learning_rate": 7.174070778287357e-05, "loss": 0.6679, "step": 21393 }, { "epoch": 1.4495562029947828, "grad_norm": 5.808905124664307, "learning_rate": 7.173933876377575e-05, "loss": 0.6527, "step": 21394 }, { "epoch": 1.4496239582627548, "grad_norm": 6.971479892730713, "learning_rate": 7.173796974467795e-05, "loss": 0.7588, "step": 21395 }, { "epoch": 1.449691713530727, "grad_norm": 6.79066801071167, "learning_rate": 7.173660072558013e-05, "loss": 0.6336, "step": 21396 }, { "epoch": 1.4497594687986992, "grad_norm": 7.917309284210205, "learning_rate": 7.17352317064823e-05, "loss": 0.6725, "step": 21397 }, { "epoch": 1.4498272240666712, "grad_norm": 5.968051910400391, "learning_rate": 7.173386268738449e-05, "loss": 0.7047, "step": 21398 }, { "epoch": 1.4498949793346432, "grad_norm": 5.850289344787598, "learning_rate": 7.173249366828667e-05, "loss": 0.771, "step": 21399 }, { "epoch": 1.4499627346026154, "grad_norm": 5.712911605834961, "learning_rate": 7.173112464918886e-05, "loss": 0.6858, "step": 21400 }, { "epoch": 1.4500304898705874, "grad_norm": 4.881656169891357, "learning_rate": 7.172975563009104e-05, "loss": 0.5907, "step": 21401 }, { "epoch": 1.4500982451385596, "grad_norm": 6.230344295501709, "learning_rate": 7.172838661099322e-05, "loss": 0.8082, "step": 21402 }, { "epoch": 1.4501660004065315, "grad_norm": 5.844206809997559, "learning_rate": 7.17270175918954e-05, "loss": 0.9139, "step": 21403 }, { "epoch": 1.4502337556745037, "grad_norm": 6.433323860168457, "learning_rate": 7.17256485727976e-05, "loss": 0.5421, "step": 21404 }, { "epoch": 1.4503015109424757, "grad_norm": 7.774054050445557, "learning_rate": 7.172427955369978e-05, "loss": 0.7498, "step": 21405 }, { "epoch": 1.450369266210448, "grad_norm": 7.151160717010498, "learning_rate": 7.172291053460196e-05, "loss": 0.6059, "step": 21406 }, { "epoch": 1.45043702147842, "grad_norm": 6.669567108154297, "learning_rate": 7.172154151550414e-05, "loss": 0.6636, "step": 21407 }, { "epoch": 1.450504776746392, "grad_norm": 7.084716796875, "learning_rate": 7.172017249640633e-05, "loss": 0.7249, "step": 21408 }, { "epoch": 1.450572532014364, "grad_norm": 7.06024169921875, "learning_rate": 7.171880347730851e-05, "loss": 0.4686, "step": 21409 }, { "epoch": 1.450640287282336, "grad_norm": 6.3339033126831055, "learning_rate": 7.171743445821069e-05, "loss": 0.6212, "step": 21410 }, { "epoch": 1.4507080425503083, "grad_norm": 6.345374584197998, "learning_rate": 7.171606543911288e-05, "loss": 0.9731, "step": 21411 }, { "epoch": 1.4507757978182805, "grad_norm": 6.091363430023193, "learning_rate": 7.171469642001507e-05, "loss": 0.596, "step": 21412 }, { "epoch": 1.4508435530862525, "grad_norm": 7.188753604888916, "learning_rate": 7.171332740091725e-05, "loss": 0.7119, "step": 21413 }, { "epoch": 1.4509113083542244, "grad_norm": 5.623603820800781, "learning_rate": 7.171195838181944e-05, "loss": 0.616, "step": 21414 }, { "epoch": 1.4509790636221966, "grad_norm": 8.253162384033203, "learning_rate": 7.171058936272162e-05, "loss": 0.608, "step": 21415 }, { "epoch": 1.4510468188901688, "grad_norm": 5.506199836730957, "learning_rate": 7.17092203436238e-05, "loss": 0.5371, "step": 21416 }, { "epoch": 1.4511145741581408, "grad_norm": 5.468584060668945, "learning_rate": 7.170785132452598e-05, "loss": 0.5786, "step": 21417 }, { "epoch": 1.4511823294261128, "grad_norm": 5.055920124053955, "learning_rate": 7.170648230542817e-05, "loss": 0.6863, "step": 21418 }, { "epoch": 1.451250084694085, "grad_norm": 6.0445380210876465, "learning_rate": 7.170511328633035e-05, "loss": 0.7739, "step": 21419 }, { "epoch": 1.451317839962057, "grad_norm": 7.641643524169922, "learning_rate": 7.170374426723253e-05, "loss": 0.8722, "step": 21420 }, { "epoch": 1.4513855952300292, "grad_norm": 4.814655780792236, "learning_rate": 7.170237524813472e-05, "loss": 0.5163, "step": 21421 }, { "epoch": 1.4514533504980012, "grad_norm": 9.328154563903809, "learning_rate": 7.17010062290369e-05, "loss": 0.8441, "step": 21422 }, { "epoch": 1.4515211057659734, "grad_norm": 6.1203718185424805, "learning_rate": 7.169963720993909e-05, "loss": 0.7562, "step": 21423 }, { "epoch": 1.4515888610339454, "grad_norm": 4.314103603363037, "learning_rate": 7.169826819084127e-05, "loss": 0.7223, "step": 21424 }, { "epoch": 1.4516566163019176, "grad_norm": 5.436856269836426, "learning_rate": 7.169689917174345e-05, "loss": 0.611, "step": 21425 }, { "epoch": 1.4517243715698895, "grad_norm": 4.399587154388428, "learning_rate": 7.169553015264563e-05, "loss": 0.4358, "step": 21426 }, { "epoch": 1.4517921268378617, "grad_norm": 5.007474422454834, "learning_rate": 7.169416113354782e-05, "loss": 0.6277, "step": 21427 }, { "epoch": 1.4518598821058337, "grad_norm": 5.089382648468018, "learning_rate": 7.169279211445e-05, "loss": 0.7519, "step": 21428 }, { "epoch": 1.4519276373738057, "grad_norm": 5.85869836807251, "learning_rate": 7.169142309535219e-05, "loss": 0.7061, "step": 21429 }, { "epoch": 1.451995392641778, "grad_norm": 5.302163124084473, "learning_rate": 7.169005407625437e-05, "loss": 0.6285, "step": 21430 }, { "epoch": 1.45206314790975, "grad_norm": 7.315493583679199, "learning_rate": 7.168868505715655e-05, "loss": 0.7883, "step": 21431 }, { "epoch": 1.452130903177722, "grad_norm": 6.123226642608643, "learning_rate": 7.168731603805874e-05, "loss": 0.5811, "step": 21432 }, { "epoch": 1.452198658445694, "grad_norm": 4.989895343780518, "learning_rate": 7.168594701896092e-05, "loss": 0.6367, "step": 21433 }, { "epoch": 1.4522664137136663, "grad_norm": 5.485174655914307, "learning_rate": 7.16845779998631e-05, "loss": 0.733, "step": 21434 }, { "epoch": 1.4523341689816383, "grad_norm": 4.9026360511779785, "learning_rate": 7.168320898076528e-05, "loss": 0.603, "step": 21435 }, { "epoch": 1.4524019242496105, "grad_norm": 4.930315017700195, "learning_rate": 7.168183996166747e-05, "loss": 0.7544, "step": 21436 }, { "epoch": 1.4524696795175824, "grad_norm": 6.406541347503662, "learning_rate": 7.168047094256965e-05, "loss": 0.8466, "step": 21437 }, { "epoch": 1.4525374347855546, "grad_norm": 3.8222954273223877, "learning_rate": 7.167910192347184e-05, "loss": 0.5928, "step": 21438 }, { "epoch": 1.4526051900535266, "grad_norm": 7.777657985687256, "learning_rate": 7.167773290437402e-05, "loss": 0.6296, "step": 21439 }, { "epoch": 1.4526729453214988, "grad_norm": 5.466617107391357, "learning_rate": 7.16763638852762e-05, "loss": 0.6914, "step": 21440 }, { "epoch": 1.4527407005894708, "grad_norm": 5.135159015655518, "learning_rate": 7.167499486617839e-05, "loss": 0.6691, "step": 21441 }, { "epoch": 1.452808455857443, "grad_norm": 6.084162712097168, "learning_rate": 7.167362584708057e-05, "loss": 0.8604, "step": 21442 }, { "epoch": 1.452876211125415, "grad_norm": 5.295618057250977, "learning_rate": 7.167225682798275e-05, "loss": 0.5782, "step": 21443 }, { "epoch": 1.452943966393387, "grad_norm": 7.043880462646484, "learning_rate": 7.167088780888493e-05, "loss": 0.9115, "step": 21444 }, { "epoch": 1.4530117216613592, "grad_norm": 6.048927307128906, "learning_rate": 7.166951878978711e-05, "loss": 0.7272, "step": 21445 }, { "epoch": 1.4530794769293314, "grad_norm": 6.329336643218994, "learning_rate": 7.16681497706893e-05, "loss": 0.7797, "step": 21446 }, { "epoch": 1.4531472321973034, "grad_norm": 5.039395332336426, "learning_rate": 7.166678075159149e-05, "loss": 0.7127, "step": 21447 }, { "epoch": 1.4532149874652753, "grad_norm": 5.415740013122559, "learning_rate": 7.166541173249367e-05, "loss": 0.6144, "step": 21448 }, { "epoch": 1.4532827427332475, "grad_norm": 8.622990608215332, "learning_rate": 7.166404271339585e-05, "loss": 0.7232, "step": 21449 }, { "epoch": 1.4533504980012195, "grad_norm": 5.368243217468262, "learning_rate": 7.166267369429804e-05, "loss": 0.5155, "step": 21450 }, { "epoch": 1.4534182532691917, "grad_norm": 5.420581340789795, "learning_rate": 7.166130467520022e-05, "loss": 0.6946, "step": 21451 }, { "epoch": 1.4534860085371637, "grad_norm": 7.433884143829346, "learning_rate": 7.16599356561024e-05, "loss": 0.7289, "step": 21452 }, { "epoch": 1.453553763805136, "grad_norm": 6.1962151527404785, "learning_rate": 7.165856663700458e-05, "loss": 0.9278, "step": 21453 }, { "epoch": 1.4536215190731079, "grad_norm": 5.34564733505249, "learning_rate": 7.165719761790677e-05, "loss": 0.5948, "step": 21454 }, { "epoch": 1.45368927434108, "grad_norm": 8.732294082641602, "learning_rate": 7.165582859880896e-05, "loss": 0.6693, "step": 21455 }, { "epoch": 1.453757029609052, "grad_norm": 6.988463878631592, "learning_rate": 7.165445957971114e-05, "loss": 0.7155, "step": 21456 }, { "epoch": 1.4538247848770243, "grad_norm": 6.9480414390563965, "learning_rate": 7.165309056061333e-05, "loss": 0.7806, "step": 21457 }, { "epoch": 1.4538925401449962, "grad_norm": 5.388878345489502, "learning_rate": 7.165172154151551e-05, "loss": 0.6181, "step": 21458 }, { "epoch": 1.4539602954129682, "grad_norm": 5.586951732635498, "learning_rate": 7.165035252241769e-05, "loss": 0.7885, "step": 21459 }, { "epoch": 1.4540280506809404, "grad_norm": 5.4556756019592285, "learning_rate": 7.164898350331988e-05, "loss": 0.8619, "step": 21460 }, { "epoch": 1.4540958059489126, "grad_norm": 4.403500556945801, "learning_rate": 7.164761448422206e-05, "loss": 0.6006, "step": 21461 }, { "epoch": 1.4541635612168846, "grad_norm": 12.345792770385742, "learning_rate": 7.164624546512424e-05, "loss": 0.5938, "step": 21462 }, { "epoch": 1.4542313164848566, "grad_norm": 5.576444625854492, "learning_rate": 7.164487644602643e-05, "loss": 0.5765, "step": 21463 }, { "epoch": 1.4542990717528288, "grad_norm": 4.276403427124023, "learning_rate": 7.164350742692862e-05, "loss": 0.552, "step": 21464 }, { "epoch": 1.454366827020801, "grad_norm": 5.930202484130859, "learning_rate": 7.16421384078308e-05, "loss": 0.7995, "step": 21465 }, { "epoch": 1.454434582288773, "grad_norm": 6.809557914733887, "learning_rate": 7.164076938873298e-05, "loss": 0.6335, "step": 21466 }, { "epoch": 1.454502337556745, "grad_norm": 5.7469282150268555, "learning_rate": 7.163940036963516e-05, "loss": 0.6608, "step": 21467 }, { "epoch": 1.4545700928247172, "grad_norm": 4.632800579071045, "learning_rate": 7.163803135053734e-05, "loss": 0.4952, "step": 21468 }, { "epoch": 1.4546378480926891, "grad_norm": 7.574417591094971, "learning_rate": 7.163666233143953e-05, "loss": 0.7382, "step": 21469 }, { "epoch": 1.4547056033606613, "grad_norm": 7.35561466217041, "learning_rate": 7.163529331234171e-05, "loss": 0.6219, "step": 21470 }, { "epoch": 1.4547733586286333, "grad_norm": 4.72305965423584, "learning_rate": 7.16339242932439e-05, "loss": 0.4139, "step": 21471 }, { "epoch": 1.4548411138966055, "grad_norm": 5.109449863433838, "learning_rate": 7.163255527414608e-05, "loss": 0.7007, "step": 21472 }, { "epoch": 1.4549088691645775, "grad_norm": 8.151754379272461, "learning_rate": 7.163118625504827e-05, "loss": 0.8191, "step": 21473 }, { "epoch": 1.4549766244325495, "grad_norm": 5.9724602699279785, "learning_rate": 7.162981723595045e-05, "loss": 0.6287, "step": 21474 }, { "epoch": 1.4550443797005217, "grad_norm": 8.380650520324707, "learning_rate": 7.162844821685263e-05, "loss": 0.711, "step": 21475 }, { "epoch": 1.455112134968494, "grad_norm": 5.566275119781494, "learning_rate": 7.162707919775481e-05, "loss": 0.6576, "step": 21476 }, { "epoch": 1.4551798902364659, "grad_norm": 6.038303852081299, "learning_rate": 7.162571017865699e-05, "loss": 0.6656, "step": 21477 }, { "epoch": 1.4552476455044379, "grad_norm": 8.571480751037598, "learning_rate": 7.162434115955918e-05, "loss": 0.6518, "step": 21478 }, { "epoch": 1.45531540077241, "grad_norm": 5.990694999694824, "learning_rate": 7.162297214046136e-05, "loss": 0.6334, "step": 21479 }, { "epoch": 1.4553831560403823, "grad_norm": 5.606784343719482, "learning_rate": 7.162160312136355e-05, "loss": 0.5969, "step": 21480 }, { "epoch": 1.4554509113083542, "grad_norm": 5.372265338897705, "learning_rate": 7.162023410226573e-05, "loss": 0.7337, "step": 21481 }, { "epoch": 1.4555186665763262, "grad_norm": 5.95213508605957, "learning_rate": 7.161886508316792e-05, "loss": 0.5486, "step": 21482 }, { "epoch": 1.4555864218442984, "grad_norm": 5.886258602142334, "learning_rate": 7.16174960640701e-05, "loss": 0.6295, "step": 21483 }, { "epoch": 1.4556541771122704, "grad_norm": 6.704195022583008, "learning_rate": 7.161612704497228e-05, "loss": 0.5524, "step": 21484 }, { "epoch": 1.4557219323802426, "grad_norm": 4.109350204467773, "learning_rate": 7.161475802587446e-05, "loss": 0.4113, "step": 21485 }, { "epoch": 1.4557896876482146, "grad_norm": 6.853222846984863, "learning_rate": 7.161338900677664e-05, "loss": 0.7875, "step": 21486 }, { "epoch": 1.4558574429161868, "grad_norm": 5.388269901275635, "learning_rate": 7.161201998767883e-05, "loss": 0.7238, "step": 21487 }, { "epoch": 1.4559251981841588, "grad_norm": 6.189793109893799, "learning_rate": 7.161065096858101e-05, "loss": 0.8097, "step": 21488 }, { "epoch": 1.455992953452131, "grad_norm": 6.3549981117248535, "learning_rate": 7.16092819494832e-05, "loss": 0.6012, "step": 21489 }, { "epoch": 1.456060708720103, "grad_norm": 4.686523914337158, "learning_rate": 7.160791293038538e-05, "loss": 0.6477, "step": 21490 }, { "epoch": 1.4561284639880752, "grad_norm": 6.714654445648193, "learning_rate": 7.160654391128757e-05, "loss": 0.7332, "step": 21491 }, { "epoch": 1.4561962192560471, "grad_norm": 6.970283508300781, "learning_rate": 7.160517489218975e-05, "loss": 0.8333, "step": 21492 }, { "epoch": 1.4562639745240191, "grad_norm": 5.7268595695495605, "learning_rate": 7.160380587309193e-05, "loss": 0.8277, "step": 21493 }, { "epoch": 1.4563317297919913, "grad_norm": 6.709218978881836, "learning_rate": 7.160243685399411e-05, "loss": 0.7129, "step": 21494 }, { "epoch": 1.4563994850599635, "grad_norm": 5.430080413818359, "learning_rate": 7.160106783489629e-05, "loss": 0.7144, "step": 21495 }, { "epoch": 1.4564672403279355, "grad_norm": 7.1316914558410645, "learning_rate": 7.159969881579848e-05, "loss": 0.7432, "step": 21496 }, { "epoch": 1.4565349955959075, "grad_norm": 5.902157783508301, "learning_rate": 7.159832979670067e-05, "loss": 0.5329, "step": 21497 }, { "epoch": 1.4566027508638797, "grad_norm": 8.923229217529297, "learning_rate": 7.159696077760285e-05, "loss": 0.6447, "step": 21498 }, { "epoch": 1.4566705061318517, "grad_norm": 5.534242153167725, "learning_rate": 7.159559175850503e-05, "loss": 0.7655, "step": 21499 }, { "epoch": 1.4567382613998239, "grad_norm": 5.201180934906006, "learning_rate": 7.15942227394072e-05, "loss": 0.5984, "step": 21500 }, { "epoch": 1.4568060166677959, "grad_norm": 5.510782718658447, "learning_rate": 7.15928537203094e-05, "loss": 0.6667, "step": 21501 }, { "epoch": 1.456873771935768, "grad_norm": 5.060631275177002, "learning_rate": 7.159148470121158e-05, "loss": 0.6604, "step": 21502 }, { "epoch": 1.45694152720374, "grad_norm": 6.018972396850586, "learning_rate": 7.159011568211377e-05, "loss": 0.9444, "step": 21503 }, { "epoch": 1.4570092824717122, "grad_norm": 4.7158026695251465, "learning_rate": 7.158874666301595e-05, "loss": 0.4615, "step": 21504 }, { "epoch": 1.4570770377396842, "grad_norm": 6.177850723266602, "learning_rate": 7.158737764391813e-05, "loss": 0.8838, "step": 21505 }, { "epoch": 1.4571447930076564, "grad_norm": 5.5321245193481445, "learning_rate": 7.158600862482033e-05, "loss": 0.7089, "step": 21506 }, { "epoch": 1.4572125482756284, "grad_norm": 9.728373527526855, "learning_rate": 7.158463960572251e-05, "loss": 0.5902, "step": 21507 }, { "epoch": 1.4572803035436004, "grad_norm": 5.0874528884887695, "learning_rate": 7.158327058662469e-05, "loss": 0.5462, "step": 21508 }, { "epoch": 1.4573480588115726, "grad_norm": 5.285484313964844, "learning_rate": 7.158190156752687e-05, "loss": 0.604, "step": 21509 }, { "epoch": 1.4574158140795448, "grad_norm": 5.163124084472656, "learning_rate": 7.158053254842906e-05, "loss": 0.6387, "step": 21510 }, { "epoch": 1.4574835693475168, "grad_norm": 7.384795188903809, "learning_rate": 7.157916352933124e-05, "loss": 0.8853, "step": 21511 }, { "epoch": 1.4575513246154888, "grad_norm": 5.79692268371582, "learning_rate": 7.157779451023342e-05, "loss": 0.7015, "step": 21512 }, { "epoch": 1.457619079883461, "grad_norm": 6.081192493438721, "learning_rate": 7.15764254911356e-05, "loss": 0.6498, "step": 21513 }, { "epoch": 1.4576868351514332, "grad_norm": 5.86506462097168, "learning_rate": 7.15750564720378e-05, "loss": 0.6142, "step": 21514 }, { "epoch": 1.4577545904194051, "grad_norm": 8.759547233581543, "learning_rate": 7.157368745293998e-05, "loss": 0.5418, "step": 21515 }, { "epoch": 1.4578223456873771, "grad_norm": 5.053228378295898, "learning_rate": 7.157231843384216e-05, "loss": 0.5441, "step": 21516 }, { "epoch": 1.4578901009553493, "grad_norm": 4.427437782287598, "learning_rate": 7.157094941474434e-05, "loss": 0.5636, "step": 21517 }, { "epoch": 1.4579578562233213, "grad_norm": 7.295726776123047, "learning_rate": 7.156958039564652e-05, "loss": 0.6262, "step": 21518 }, { "epoch": 1.4580256114912935, "grad_norm": 5.993203639984131, "learning_rate": 7.156821137654871e-05, "loss": 0.7165, "step": 21519 }, { "epoch": 1.4580933667592655, "grad_norm": 5.129610061645508, "learning_rate": 7.15668423574509e-05, "loss": 0.6057, "step": 21520 }, { "epoch": 1.4581611220272377, "grad_norm": 6.006467819213867, "learning_rate": 7.156547333835307e-05, "loss": 0.7713, "step": 21521 }, { "epoch": 1.4582288772952097, "grad_norm": 5.196933746337891, "learning_rate": 7.156410431925525e-05, "loss": 0.7966, "step": 21522 }, { "epoch": 1.4582966325631816, "grad_norm": 4.797604084014893, "learning_rate": 7.156273530015744e-05, "loss": 0.7534, "step": 21523 }, { "epoch": 1.4583643878311539, "grad_norm": 4.931893825531006, "learning_rate": 7.156136628105963e-05, "loss": 0.6723, "step": 21524 }, { "epoch": 1.458432143099126, "grad_norm": 8.237302780151367, "learning_rate": 7.155999726196181e-05, "loss": 0.6672, "step": 21525 }, { "epoch": 1.458499898367098, "grad_norm": 6.420501232147217, "learning_rate": 7.155862824286399e-05, "loss": 0.5923, "step": 21526 }, { "epoch": 1.45856765363507, "grad_norm": 4.682703018188477, "learning_rate": 7.155725922376617e-05, "loss": 0.7074, "step": 21527 }, { "epoch": 1.4586354089030422, "grad_norm": 9.373278617858887, "learning_rate": 7.155589020466836e-05, "loss": 0.7548, "step": 21528 }, { "epoch": 1.4587031641710144, "grad_norm": 7.340688705444336, "learning_rate": 7.155452118557054e-05, "loss": 0.8934, "step": 21529 }, { "epoch": 1.4587709194389864, "grad_norm": 6.547774791717529, "learning_rate": 7.155315216647272e-05, "loss": 0.7902, "step": 21530 }, { "epoch": 1.4588386747069584, "grad_norm": 4.277866840362549, "learning_rate": 7.15517831473749e-05, "loss": 0.7257, "step": 21531 }, { "epoch": 1.4589064299749306, "grad_norm": 5.622948169708252, "learning_rate": 7.155041412827709e-05, "loss": 0.6042, "step": 21532 }, { "epoch": 1.4589741852429026, "grad_norm": 5.730656623840332, "learning_rate": 7.154904510917928e-05, "loss": 0.7662, "step": 21533 }, { "epoch": 1.4590419405108748, "grad_norm": 6.411100387573242, "learning_rate": 7.154767609008146e-05, "loss": 0.501, "step": 21534 }, { "epoch": 1.4591096957788467, "grad_norm": 5.173393249511719, "learning_rate": 7.154630707098364e-05, "loss": 0.6282, "step": 21535 }, { "epoch": 1.459177451046819, "grad_norm": 6.169179439544678, "learning_rate": 7.154493805188582e-05, "loss": 0.7994, "step": 21536 }, { "epoch": 1.459245206314791, "grad_norm": 5.058782577514648, "learning_rate": 7.154356903278801e-05, "loss": 0.4633, "step": 21537 }, { "epoch": 1.4593129615827631, "grad_norm": 8.362473487854004, "learning_rate": 7.15422000136902e-05, "loss": 0.7055, "step": 21538 }, { "epoch": 1.4593807168507351, "grad_norm": 6.14398717880249, "learning_rate": 7.154083099459237e-05, "loss": 0.7417, "step": 21539 }, { "epoch": 1.4594484721187073, "grad_norm": 8.814742088317871, "learning_rate": 7.153946197549456e-05, "loss": 0.7417, "step": 21540 }, { "epoch": 1.4595162273866793, "grad_norm": 4.979259014129639, "learning_rate": 7.153809295639674e-05, "loss": 0.6233, "step": 21541 }, { "epoch": 1.4595839826546513, "grad_norm": 5.7426042556762695, "learning_rate": 7.153672393729893e-05, "loss": 0.7054, "step": 21542 }, { "epoch": 1.4596517379226235, "grad_norm": 8.8035888671875, "learning_rate": 7.153535491820111e-05, "loss": 0.5054, "step": 21543 }, { "epoch": 1.4597194931905957, "grad_norm": 7.210206031799316, "learning_rate": 7.153398589910329e-05, "loss": 0.6748, "step": 21544 }, { "epoch": 1.4597872484585677, "grad_norm": 5.326686859130859, "learning_rate": 7.153261688000547e-05, "loss": 0.5181, "step": 21545 }, { "epoch": 1.4598550037265396, "grad_norm": 5.398369312286377, "learning_rate": 7.153124786090766e-05, "loss": 0.7473, "step": 21546 }, { "epoch": 1.4599227589945118, "grad_norm": 5.409416675567627, "learning_rate": 7.152987884180984e-05, "loss": 0.6462, "step": 21547 }, { "epoch": 1.4599905142624838, "grad_norm": 7.475605010986328, "learning_rate": 7.152850982271203e-05, "loss": 1.1831, "step": 21548 }, { "epoch": 1.460058269530456, "grad_norm": 4.736668586730957, "learning_rate": 7.15271408036142e-05, "loss": 0.5457, "step": 21549 }, { "epoch": 1.460126024798428, "grad_norm": 7.568926811218262, "learning_rate": 7.15257717845164e-05, "loss": 0.9032, "step": 21550 }, { "epoch": 1.4601937800664002, "grad_norm": 6.689256191253662, "learning_rate": 7.152440276541858e-05, "loss": 0.7459, "step": 21551 }, { "epoch": 1.4602615353343722, "grad_norm": 5.442495346069336, "learning_rate": 7.152303374632076e-05, "loss": 0.7123, "step": 21552 }, { "epoch": 1.4603292906023444, "grad_norm": 5.209478378295898, "learning_rate": 7.152166472722295e-05, "loss": 0.5601, "step": 21553 }, { "epoch": 1.4603970458703164, "grad_norm": 6.193780422210693, "learning_rate": 7.152029570812513e-05, "loss": 0.698, "step": 21554 }, { "epoch": 1.4604648011382886, "grad_norm": 8.368922233581543, "learning_rate": 7.151892668902731e-05, "loss": 0.6268, "step": 21555 }, { "epoch": 1.4605325564062606, "grad_norm": 4.950161933898926, "learning_rate": 7.151755766992951e-05, "loss": 0.5655, "step": 21556 }, { "epoch": 1.4606003116742325, "grad_norm": 7.713269233703613, "learning_rate": 7.151618865083169e-05, "loss": 0.7471, "step": 21557 }, { "epoch": 1.4606680669422047, "grad_norm": 4.321671009063721, "learning_rate": 7.151481963173387e-05, "loss": 0.5963, "step": 21558 }, { "epoch": 1.460735822210177, "grad_norm": 3.841702461242676, "learning_rate": 7.151345061263605e-05, "loss": 0.5156, "step": 21559 }, { "epoch": 1.460803577478149, "grad_norm": 6.041382789611816, "learning_rate": 7.151208159353824e-05, "loss": 0.7038, "step": 21560 }, { "epoch": 1.460871332746121, "grad_norm": 4.524684906005859, "learning_rate": 7.151071257444042e-05, "loss": 0.59, "step": 21561 }, { "epoch": 1.4609390880140931, "grad_norm": 5.8914337158203125, "learning_rate": 7.15093435553426e-05, "loss": 0.5401, "step": 21562 }, { "epoch": 1.4610068432820653, "grad_norm": 5.717686653137207, "learning_rate": 7.150797453624478e-05, "loss": 0.7725, "step": 21563 }, { "epoch": 1.4610745985500373, "grad_norm": 8.735217094421387, "learning_rate": 7.150660551714696e-05, "loss": 0.7594, "step": 21564 }, { "epoch": 1.4611423538180093, "grad_norm": 6.906649112701416, "learning_rate": 7.150523649804916e-05, "loss": 0.674, "step": 21565 }, { "epoch": 1.4612101090859815, "grad_norm": 6.025679588317871, "learning_rate": 7.150386747895134e-05, "loss": 0.6139, "step": 21566 }, { "epoch": 1.4612778643539535, "grad_norm": 4.76900577545166, "learning_rate": 7.150249845985352e-05, "loss": 0.5514, "step": 21567 }, { "epoch": 1.4613456196219257, "grad_norm": 4.804394721984863, "learning_rate": 7.15011294407557e-05, "loss": 0.6298, "step": 21568 }, { "epoch": 1.4614133748898976, "grad_norm": 8.061301231384277, "learning_rate": 7.14997604216579e-05, "loss": 0.8245, "step": 21569 }, { "epoch": 1.4614811301578698, "grad_norm": 6.91434907913208, "learning_rate": 7.149839140256007e-05, "loss": 0.6476, "step": 21570 }, { "epoch": 1.4615488854258418, "grad_norm": 6.215130805969238, "learning_rate": 7.149702238346225e-05, "loss": 0.6867, "step": 21571 }, { "epoch": 1.4616166406938138, "grad_norm": 6.301210403442383, "learning_rate": 7.149565336436443e-05, "loss": 0.8125, "step": 21572 }, { "epoch": 1.461684395961786, "grad_norm": 5.213703632354736, "learning_rate": 7.149428434526661e-05, "loss": 0.6149, "step": 21573 }, { "epoch": 1.4617521512297582, "grad_norm": 5.436930179595947, "learning_rate": 7.149291532616881e-05, "loss": 0.7238, "step": 21574 }, { "epoch": 1.4618199064977302, "grad_norm": 8.166367530822754, "learning_rate": 7.149154630707099e-05, "loss": 0.7858, "step": 21575 }, { "epoch": 1.4618876617657022, "grad_norm": 6.781332015991211, "learning_rate": 7.149017728797317e-05, "loss": 0.674, "step": 21576 }, { "epoch": 1.4619554170336744, "grad_norm": 5.096229553222656, "learning_rate": 7.148880826887535e-05, "loss": 0.6692, "step": 21577 }, { "epoch": 1.4620231723016466, "grad_norm": 5.620342254638672, "learning_rate": 7.148743924977753e-05, "loss": 0.685, "step": 21578 }, { "epoch": 1.4620909275696186, "grad_norm": 4.595557689666748, "learning_rate": 7.148607023067972e-05, "loss": 0.5649, "step": 21579 }, { "epoch": 1.4621586828375905, "grad_norm": 4.658647537231445, "learning_rate": 7.14847012115819e-05, "loss": 0.5355, "step": 21580 }, { "epoch": 1.4622264381055627, "grad_norm": 5.409909248352051, "learning_rate": 7.148333219248408e-05, "loss": 0.6072, "step": 21581 }, { "epoch": 1.4622941933735347, "grad_norm": 6.58162784576416, "learning_rate": 7.148196317338627e-05, "loss": 0.5651, "step": 21582 }, { "epoch": 1.462361948641507, "grad_norm": 5.753164768218994, "learning_rate": 7.148059415428846e-05, "loss": 0.7874, "step": 21583 }, { "epoch": 1.462429703909479, "grad_norm": 5.204945087432861, "learning_rate": 7.147922513519064e-05, "loss": 0.6517, "step": 21584 }, { "epoch": 1.462497459177451, "grad_norm": 5.762413024902344, "learning_rate": 7.147785611609282e-05, "loss": 0.7336, "step": 21585 }, { "epoch": 1.462565214445423, "grad_norm": 6.297985553741455, "learning_rate": 7.1476487096995e-05, "loss": 0.5987, "step": 21586 }, { "epoch": 1.4626329697133953, "grad_norm": 8.400461196899414, "learning_rate": 7.147511807789718e-05, "loss": 0.648, "step": 21587 }, { "epoch": 1.4627007249813673, "grad_norm": 6.860273838043213, "learning_rate": 7.147374905879937e-05, "loss": 0.6608, "step": 21588 }, { "epoch": 1.4627684802493395, "grad_norm": 5.391541957855225, "learning_rate": 7.147238003970155e-05, "loss": 0.7528, "step": 21589 }, { "epoch": 1.4628362355173115, "grad_norm": 4.586338996887207, "learning_rate": 7.147101102060373e-05, "loss": 0.644, "step": 21590 }, { "epoch": 1.4629039907852834, "grad_norm": 5.055821418762207, "learning_rate": 7.146964200150592e-05, "loss": 0.7436, "step": 21591 }, { "epoch": 1.4629717460532556, "grad_norm": 4.725174903869629, "learning_rate": 7.146827298240811e-05, "loss": 0.5523, "step": 21592 }, { "epoch": 1.4630395013212278, "grad_norm": 6.975754261016846, "learning_rate": 7.146690396331029e-05, "loss": 0.9365, "step": 21593 }, { "epoch": 1.4631072565891998, "grad_norm": 5.228897571563721, "learning_rate": 7.146553494421247e-05, "loss": 0.808, "step": 21594 }, { "epoch": 1.4631750118571718, "grad_norm": 7.85492467880249, "learning_rate": 7.146416592511465e-05, "loss": 0.7397, "step": 21595 }, { "epoch": 1.463242767125144, "grad_norm": 6.946386814117432, "learning_rate": 7.146279690601684e-05, "loss": 0.7052, "step": 21596 }, { "epoch": 1.463310522393116, "grad_norm": 5.81506872177124, "learning_rate": 7.146142788691902e-05, "loss": 0.6694, "step": 21597 }, { "epoch": 1.4633782776610882, "grad_norm": 10.114153861999512, "learning_rate": 7.14600588678212e-05, "loss": 0.6275, "step": 21598 }, { "epoch": 1.4634460329290602, "grad_norm": 7.872120380401611, "learning_rate": 7.14586898487234e-05, "loss": 0.577, "step": 21599 }, { "epoch": 1.4635137881970324, "grad_norm": 11.657118797302246, "learning_rate": 7.145732082962558e-05, "loss": 0.853, "step": 21600 }, { "epoch": 1.4635815434650044, "grad_norm": 6.137749195098877, "learning_rate": 7.145595181052776e-05, "loss": 0.5486, "step": 21601 }, { "epoch": 1.4636492987329766, "grad_norm": 7.475892066955566, "learning_rate": 7.145458279142995e-05, "loss": 0.7886, "step": 21602 }, { "epoch": 1.4637170540009485, "grad_norm": 6.879278659820557, "learning_rate": 7.145321377233213e-05, "loss": 0.6823, "step": 21603 }, { "epoch": 1.4637848092689207, "grad_norm": 5.642472267150879, "learning_rate": 7.145184475323431e-05, "loss": 0.6967, "step": 21604 }, { "epoch": 1.4638525645368927, "grad_norm": 6.712484359741211, "learning_rate": 7.14504757341365e-05, "loss": 0.6004, "step": 21605 }, { "epoch": 1.4639203198048647, "grad_norm": 3.698824405670166, "learning_rate": 7.144910671503869e-05, "loss": 0.4891, "step": 21606 }, { "epoch": 1.463988075072837, "grad_norm": 5.627575874328613, "learning_rate": 7.144773769594087e-05, "loss": 0.4928, "step": 21607 }, { "epoch": 1.464055830340809, "grad_norm": 6.021406173706055, "learning_rate": 7.144636867684305e-05, "loss": 0.5522, "step": 21608 }, { "epoch": 1.464123585608781, "grad_norm": 5.19459867477417, "learning_rate": 7.144499965774523e-05, "loss": 0.76, "step": 21609 }, { "epoch": 1.464191340876753, "grad_norm": 6.041921615600586, "learning_rate": 7.144363063864741e-05, "loss": 0.696, "step": 21610 }, { "epoch": 1.4642590961447253, "grad_norm": 5.8656721115112305, "learning_rate": 7.14422616195496e-05, "loss": 0.7676, "step": 21611 }, { "epoch": 1.4643268514126975, "grad_norm": 5.363099575042725, "learning_rate": 7.144089260045178e-05, "loss": 0.5986, "step": 21612 }, { "epoch": 1.4643946066806695, "grad_norm": 8.096660614013672, "learning_rate": 7.143952358135396e-05, "loss": 1.0564, "step": 21613 }, { "epoch": 1.4644623619486414, "grad_norm": 7.16405725479126, "learning_rate": 7.143815456225614e-05, "loss": 0.7359, "step": 21614 }, { "epoch": 1.4645301172166136, "grad_norm": 5.085155487060547, "learning_rate": 7.143678554315834e-05, "loss": 0.6322, "step": 21615 }, { "epoch": 1.4645978724845856, "grad_norm": 9.60994815826416, "learning_rate": 7.143541652406052e-05, "loss": 0.77, "step": 21616 }, { "epoch": 1.4646656277525578, "grad_norm": 6.231583595275879, "learning_rate": 7.14340475049627e-05, "loss": 0.761, "step": 21617 }, { "epoch": 1.4647333830205298, "grad_norm": 5.71886682510376, "learning_rate": 7.143267848586488e-05, "loss": 0.7052, "step": 21618 }, { "epoch": 1.464801138288502, "grad_norm": 5.908305644989014, "learning_rate": 7.143130946676706e-05, "loss": 0.6606, "step": 21619 }, { "epoch": 1.464868893556474, "grad_norm": 6.285672187805176, "learning_rate": 7.142994044766925e-05, "loss": 0.8951, "step": 21620 }, { "epoch": 1.464936648824446, "grad_norm": 5.535191059112549, "learning_rate": 7.142857142857143e-05, "loss": 0.5328, "step": 21621 }, { "epoch": 1.4650044040924182, "grad_norm": 5.468017578125, "learning_rate": 7.142720240947361e-05, "loss": 0.6795, "step": 21622 }, { "epoch": 1.4650721593603904, "grad_norm": 6.6771955490112305, "learning_rate": 7.14258333903758e-05, "loss": 0.6964, "step": 21623 }, { "epoch": 1.4651399146283623, "grad_norm": 6.178287506103516, "learning_rate": 7.142446437127799e-05, "loss": 0.675, "step": 21624 }, { "epoch": 1.4652076698963343, "grad_norm": 5.343182563781738, "learning_rate": 7.142309535218017e-05, "loss": 0.6886, "step": 21625 }, { "epoch": 1.4652754251643065, "grad_norm": 6.538660049438477, "learning_rate": 7.142172633308235e-05, "loss": 0.7041, "step": 21626 }, { "epoch": 1.4653431804322787, "grad_norm": 5.068603515625, "learning_rate": 7.142035731398453e-05, "loss": 0.5804, "step": 21627 }, { "epoch": 1.4654109357002507, "grad_norm": 5.706067085266113, "learning_rate": 7.141898829488671e-05, "loss": 0.7748, "step": 21628 }, { "epoch": 1.4654786909682227, "grad_norm": 6.873477935791016, "learning_rate": 7.14176192757889e-05, "loss": 0.8857, "step": 21629 }, { "epoch": 1.465546446236195, "grad_norm": 5.758304119110107, "learning_rate": 7.141625025669108e-05, "loss": 0.8262, "step": 21630 }, { "epoch": 1.4656142015041669, "grad_norm": 5.575211048126221, "learning_rate": 7.141488123759326e-05, "loss": 0.6755, "step": 21631 }, { "epoch": 1.465681956772139, "grad_norm": 4.19789457321167, "learning_rate": 7.141351221849544e-05, "loss": 0.5595, "step": 21632 }, { "epoch": 1.465749712040111, "grad_norm": 4.849717140197754, "learning_rate": 7.141214319939763e-05, "loss": 0.5894, "step": 21633 }, { "epoch": 1.4658174673080833, "grad_norm": 5.887254238128662, "learning_rate": 7.141077418029982e-05, "loss": 0.6709, "step": 21634 }, { "epoch": 1.4658852225760552, "grad_norm": 5.190128803253174, "learning_rate": 7.1409405161202e-05, "loss": 0.6844, "step": 21635 }, { "epoch": 1.4659529778440274, "grad_norm": 6.514214992523193, "learning_rate": 7.140803614210418e-05, "loss": 0.6913, "step": 21636 }, { "epoch": 1.4660207331119994, "grad_norm": 7.83294677734375, "learning_rate": 7.140666712300636e-05, "loss": 0.6288, "step": 21637 }, { "epoch": 1.4660884883799716, "grad_norm": 6.397664546966553, "learning_rate": 7.140529810390855e-05, "loss": 0.6254, "step": 21638 }, { "epoch": 1.4661562436479436, "grad_norm": 5.3548994064331055, "learning_rate": 7.140392908481073e-05, "loss": 0.7029, "step": 21639 }, { "epoch": 1.4662239989159156, "grad_norm": 4.888891220092773, "learning_rate": 7.140256006571291e-05, "loss": 0.5521, "step": 21640 }, { "epoch": 1.4662917541838878, "grad_norm": 7.359992027282715, "learning_rate": 7.14011910466151e-05, "loss": 0.6367, "step": 21641 }, { "epoch": 1.46635950945186, "grad_norm": 5.644524574279785, "learning_rate": 7.139982202751729e-05, "loss": 0.8054, "step": 21642 }, { "epoch": 1.466427264719832, "grad_norm": 8.93367862701416, "learning_rate": 7.139845300841947e-05, "loss": 0.7247, "step": 21643 }, { "epoch": 1.466495019987804, "grad_norm": 5.52249002456665, "learning_rate": 7.139708398932165e-05, "loss": 0.6572, "step": 21644 }, { "epoch": 1.4665627752557762, "grad_norm": 5.03657865524292, "learning_rate": 7.139571497022384e-05, "loss": 0.5033, "step": 21645 }, { "epoch": 1.4666305305237481, "grad_norm": 3.8990414142608643, "learning_rate": 7.139434595112602e-05, "loss": 0.5968, "step": 21646 }, { "epoch": 1.4666982857917203, "grad_norm": 6.9647536277771, "learning_rate": 7.13929769320282e-05, "loss": 0.6236, "step": 21647 }, { "epoch": 1.4667660410596923, "grad_norm": 4.818854331970215, "learning_rate": 7.13916079129304e-05, "loss": 0.702, "step": 21648 }, { "epoch": 1.4668337963276645, "grad_norm": 4.488203048706055, "learning_rate": 7.139023889383258e-05, "loss": 0.8428, "step": 21649 }, { "epoch": 1.4669015515956365, "grad_norm": 7.0130839347839355, "learning_rate": 7.138886987473476e-05, "loss": 0.5373, "step": 21650 }, { "epoch": 1.4669693068636087, "grad_norm": 5.956991672515869, "learning_rate": 7.138750085563694e-05, "loss": 0.7291, "step": 21651 }, { "epoch": 1.4670370621315807, "grad_norm": 6.0034260749816895, "learning_rate": 7.138613183653913e-05, "loss": 0.6947, "step": 21652 }, { "epoch": 1.467104817399553, "grad_norm": 6.572457313537598, "learning_rate": 7.138476281744131e-05, "loss": 0.6596, "step": 21653 }, { "epoch": 1.4671725726675249, "grad_norm": 6.670360088348389, "learning_rate": 7.13833937983435e-05, "loss": 0.6459, "step": 21654 }, { "epoch": 1.4672403279354969, "grad_norm": 4.973862171173096, "learning_rate": 7.138202477924567e-05, "loss": 0.5438, "step": 21655 }, { "epoch": 1.467308083203469, "grad_norm": 5.691517353057861, "learning_rate": 7.138065576014785e-05, "loss": 0.6004, "step": 21656 }, { "epoch": 1.4673758384714413, "grad_norm": 8.540769577026367, "learning_rate": 7.137928674105005e-05, "loss": 0.5799, "step": 21657 }, { "epoch": 1.4674435937394132, "grad_norm": 5.864646911621094, "learning_rate": 7.137791772195223e-05, "loss": 0.5926, "step": 21658 }, { "epoch": 1.4675113490073852, "grad_norm": 5.246252536773682, "learning_rate": 7.137654870285441e-05, "loss": 0.6802, "step": 21659 }, { "epoch": 1.4675791042753574, "grad_norm": 7.614772319793701, "learning_rate": 7.137517968375659e-05, "loss": 0.7327, "step": 21660 }, { "epoch": 1.4676468595433296, "grad_norm": 5.311723709106445, "learning_rate": 7.137381066465878e-05, "loss": 0.5922, "step": 21661 }, { "epoch": 1.4677146148113016, "grad_norm": 10.052451133728027, "learning_rate": 7.137244164556096e-05, "loss": 0.7115, "step": 21662 }, { "epoch": 1.4677823700792736, "grad_norm": 4.470024108886719, "learning_rate": 7.137107262646314e-05, "loss": 0.5655, "step": 21663 }, { "epoch": 1.4678501253472458, "grad_norm": 7.268585681915283, "learning_rate": 7.136970360736532e-05, "loss": 0.6487, "step": 21664 }, { "epoch": 1.4679178806152178, "grad_norm": 6.372290134429932, "learning_rate": 7.13683345882675e-05, "loss": 0.7824, "step": 21665 }, { "epoch": 1.46798563588319, "grad_norm": 7.272246360778809, "learning_rate": 7.13669655691697e-05, "loss": 0.6297, "step": 21666 }, { "epoch": 1.468053391151162, "grad_norm": 9.862757682800293, "learning_rate": 7.136559655007188e-05, "loss": 0.5416, "step": 21667 }, { "epoch": 1.4681211464191342, "grad_norm": 6.651132583618164, "learning_rate": 7.136422753097406e-05, "loss": 0.5541, "step": 21668 }, { "epoch": 1.4681889016871061, "grad_norm": 6.303805351257324, "learning_rate": 7.136285851187624e-05, "loss": 0.7187, "step": 21669 }, { "epoch": 1.4682566569550781, "grad_norm": 8.383051872253418, "learning_rate": 7.136148949277843e-05, "loss": 0.6104, "step": 21670 }, { "epoch": 1.4683244122230503, "grad_norm": 4.826161861419678, "learning_rate": 7.136012047368061e-05, "loss": 0.7104, "step": 21671 }, { "epoch": 1.4683921674910225, "grad_norm": 6.198061943054199, "learning_rate": 7.13587514545828e-05, "loss": 0.7492, "step": 21672 }, { "epoch": 1.4684599227589945, "grad_norm": 6.217910289764404, "learning_rate": 7.135738243548497e-05, "loss": 0.7274, "step": 21673 }, { "epoch": 1.4685276780269665, "grad_norm": 6.324878215789795, "learning_rate": 7.135601341638715e-05, "loss": 0.6035, "step": 21674 }, { "epoch": 1.4685954332949387, "grad_norm": 8.987545013427734, "learning_rate": 7.135464439728935e-05, "loss": 0.6331, "step": 21675 }, { "epoch": 1.468663188562911, "grad_norm": 6.274022102355957, "learning_rate": 7.135327537819153e-05, "loss": 0.6629, "step": 21676 }, { "epoch": 1.4687309438308829, "grad_norm": 6.144443511962891, "learning_rate": 7.135190635909371e-05, "loss": 0.8939, "step": 21677 }, { "epoch": 1.4687986990988549, "grad_norm": 4.918601989746094, "learning_rate": 7.135053733999589e-05, "loss": 0.6595, "step": 21678 }, { "epoch": 1.468866454366827, "grad_norm": 6.639081954956055, "learning_rate": 7.134916832089808e-05, "loss": 0.7244, "step": 21679 }, { "epoch": 1.468934209634799, "grad_norm": 7.3865742683410645, "learning_rate": 7.134779930180026e-05, "loss": 0.8766, "step": 21680 }, { "epoch": 1.4690019649027712, "grad_norm": 6.059460639953613, "learning_rate": 7.134643028270244e-05, "loss": 0.8663, "step": 21681 }, { "epoch": 1.4690697201707432, "grad_norm": 6.284973621368408, "learning_rate": 7.134506126360462e-05, "loss": 0.5793, "step": 21682 }, { "epoch": 1.4691374754387154, "grad_norm": 4.468286514282227, "learning_rate": 7.13436922445068e-05, "loss": 0.6044, "step": 21683 }, { "epoch": 1.4692052307066874, "grad_norm": 5.602799415588379, "learning_rate": 7.1342323225409e-05, "loss": 0.7729, "step": 21684 }, { "epoch": 1.4692729859746596, "grad_norm": 5.902412414550781, "learning_rate": 7.134095420631118e-05, "loss": 0.7508, "step": 21685 }, { "epoch": 1.4693407412426316, "grad_norm": 6.150251388549805, "learning_rate": 7.133958518721336e-05, "loss": 0.661, "step": 21686 }, { "epoch": 1.4694084965106038, "grad_norm": 10.347826957702637, "learning_rate": 7.133821616811554e-05, "loss": 0.6547, "step": 21687 }, { "epoch": 1.4694762517785758, "grad_norm": 5.150879383087158, "learning_rate": 7.133684714901773e-05, "loss": 0.5712, "step": 21688 }, { "epoch": 1.4695440070465478, "grad_norm": 4.790515899658203, "learning_rate": 7.133547812991991e-05, "loss": 0.6183, "step": 21689 }, { "epoch": 1.46961176231452, "grad_norm": 6.475719451904297, "learning_rate": 7.13341091108221e-05, "loss": 0.8101, "step": 21690 }, { "epoch": 1.4696795175824922, "grad_norm": 4.8480916023254395, "learning_rate": 7.133274009172429e-05, "loss": 0.6219, "step": 21691 }, { "epoch": 1.4697472728504641, "grad_norm": 6.3804168701171875, "learning_rate": 7.133137107262647e-05, "loss": 0.5148, "step": 21692 }, { "epoch": 1.4698150281184361, "grad_norm": 10.527505874633789, "learning_rate": 7.133000205352865e-05, "loss": 0.3753, "step": 21693 }, { "epoch": 1.4698827833864083, "grad_norm": 6.569975852966309, "learning_rate": 7.132863303443084e-05, "loss": 0.652, "step": 21694 }, { "epoch": 1.4699505386543803, "grad_norm": 5.112076759338379, "learning_rate": 7.132726401533302e-05, "loss": 0.6588, "step": 21695 }, { "epoch": 1.4700182939223525, "grad_norm": 5.137506484985352, "learning_rate": 7.13258949962352e-05, "loss": 0.7662, "step": 21696 }, { "epoch": 1.4700860491903245, "grad_norm": 5.513987064361572, "learning_rate": 7.132452597713738e-05, "loss": 0.7078, "step": 21697 }, { "epoch": 1.4701538044582967, "grad_norm": 6.240415096282959, "learning_rate": 7.132315695803958e-05, "loss": 0.5385, "step": 21698 }, { "epoch": 1.4702215597262687, "grad_norm": 4.496334075927734, "learning_rate": 7.132178793894176e-05, "loss": 0.6527, "step": 21699 }, { "epoch": 1.4702893149942409, "grad_norm": 5.841010093688965, "learning_rate": 7.132041891984394e-05, "loss": 0.8461, "step": 21700 }, { "epoch": 1.4703570702622129, "grad_norm": 5.358018398284912, "learning_rate": 7.131904990074612e-05, "loss": 0.7471, "step": 21701 }, { "epoch": 1.470424825530185, "grad_norm": 5.414307117462158, "learning_rate": 7.131768088164831e-05, "loss": 0.6307, "step": 21702 }, { "epoch": 1.470492580798157, "grad_norm": 5.318230628967285, "learning_rate": 7.131631186255049e-05, "loss": 0.6441, "step": 21703 }, { "epoch": 1.470560336066129, "grad_norm": 5.122453689575195, "learning_rate": 7.131494284345267e-05, "loss": 0.5265, "step": 21704 }, { "epoch": 1.4706280913341012, "grad_norm": 6.227200031280518, "learning_rate": 7.131357382435485e-05, "loss": 0.567, "step": 21705 }, { "epoch": 1.4706958466020734, "grad_norm": 6.379117488861084, "learning_rate": 7.131220480525703e-05, "loss": 0.6654, "step": 21706 }, { "epoch": 1.4707636018700454, "grad_norm": 5.619192123413086, "learning_rate": 7.131083578615923e-05, "loss": 0.5321, "step": 21707 }, { "epoch": 1.4708313571380174, "grad_norm": 5.286746025085449, "learning_rate": 7.130946676706141e-05, "loss": 0.6481, "step": 21708 }, { "epoch": 1.4708991124059896, "grad_norm": 3.7455196380615234, "learning_rate": 7.130809774796359e-05, "loss": 0.5271, "step": 21709 }, { "epoch": 1.4709668676739618, "grad_norm": 5.008398056030273, "learning_rate": 7.130672872886577e-05, "loss": 0.7011, "step": 21710 }, { "epoch": 1.4710346229419338, "grad_norm": 5.085233211517334, "learning_rate": 7.130535970976795e-05, "loss": 0.778, "step": 21711 }, { "epoch": 1.4711023782099057, "grad_norm": 6.1486897468566895, "learning_rate": 7.130399069067014e-05, "loss": 0.8439, "step": 21712 }, { "epoch": 1.471170133477878, "grad_norm": 6.175623893737793, "learning_rate": 7.130262167157232e-05, "loss": 0.6777, "step": 21713 }, { "epoch": 1.47123788874585, "grad_norm": 4.841026306152344, "learning_rate": 7.13012526524745e-05, "loss": 0.617, "step": 21714 }, { "epoch": 1.4713056440138221, "grad_norm": 5.529433250427246, "learning_rate": 7.129988363337668e-05, "loss": 0.7381, "step": 21715 }, { "epoch": 1.4713733992817941, "grad_norm": 5.113159656524658, "learning_rate": 7.129851461427888e-05, "loss": 0.6534, "step": 21716 }, { "epoch": 1.4714411545497663, "grad_norm": 5.857573509216309, "learning_rate": 7.129714559518106e-05, "loss": 0.4977, "step": 21717 }, { "epoch": 1.4715089098177383, "grad_norm": 3.7531652450561523, "learning_rate": 7.129577657608324e-05, "loss": 0.4698, "step": 21718 }, { "epoch": 1.4715766650857103, "grad_norm": 5.303880214691162, "learning_rate": 7.129440755698542e-05, "loss": 0.7623, "step": 21719 }, { "epoch": 1.4716444203536825, "grad_norm": 8.549286842346191, "learning_rate": 7.12930385378876e-05, "loss": 0.9631, "step": 21720 }, { "epoch": 1.4717121756216547, "grad_norm": 4.764150142669678, "learning_rate": 7.12916695187898e-05, "loss": 0.663, "step": 21721 }, { "epoch": 1.4717799308896267, "grad_norm": 8.707677841186523, "learning_rate": 7.129030049969197e-05, "loss": 0.6849, "step": 21722 }, { "epoch": 1.4718476861575986, "grad_norm": 4.881777286529541, "learning_rate": 7.128893148059415e-05, "loss": 0.6694, "step": 21723 }, { "epoch": 1.4719154414255708, "grad_norm": 6.10659646987915, "learning_rate": 7.128756246149633e-05, "loss": 0.8651, "step": 21724 }, { "epoch": 1.471983196693543, "grad_norm": 9.05642318725586, "learning_rate": 7.128619344239853e-05, "loss": 0.7221, "step": 21725 }, { "epoch": 1.472050951961515, "grad_norm": 9.181722640991211, "learning_rate": 7.128482442330071e-05, "loss": 1.1329, "step": 21726 }, { "epoch": 1.472118707229487, "grad_norm": 5.359407901763916, "learning_rate": 7.128345540420289e-05, "loss": 0.7451, "step": 21727 }, { "epoch": 1.4721864624974592, "grad_norm": 6.165289402008057, "learning_rate": 7.128208638510507e-05, "loss": 0.5822, "step": 21728 }, { "epoch": 1.4722542177654312, "grad_norm": 4.3777079582214355, "learning_rate": 7.128071736600725e-05, "loss": 0.7065, "step": 21729 }, { "epoch": 1.4723219730334034, "grad_norm": 5.20616340637207, "learning_rate": 7.127934834690944e-05, "loss": 0.7143, "step": 21730 }, { "epoch": 1.4723897283013754, "grad_norm": 7.012877941131592, "learning_rate": 7.127797932781162e-05, "loss": 1.071, "step": 21731 }, { "epoch": 1.4724574835693476, "grad_norm": 6.38242244720459, "learning_rate": 7.12766103087138e-05, "loss": 0.7701, "step": 21732 }, { "epoch": 1.4725252388373196, "grad_norm": 5.328784942626953, "learning_rate": 7.127524128961598e-05, "loss": 0.7945, "step": 21733 }, { "epoch": 1.4725929941052918, "grad_norm": 5.9666643142700195, "learning_rate": 7.127387227051818e-05, "loss": 0.6395, "step": 21734 }, { "epoch": 1.4726607493732637, "grad_norm": 8.982746124267578, "learning_rate": 7.127250325142036e-05, "loss": 0.7238, "step": 21735 }, { "epoch": 1.472728504641236, "grad_norm": 5.306856155395508, "learning_rate": 7.127113423232254e-05, "loss": 0.5856, "step": 21736 }, { "epoch": 1.472796259909208, "grad_norm": 7.937626838684082, "learning_rate": 7.126976521322473e-05, "loss": 0.6052, "step": 21737 }, { "epoch": 1.47286401517718, "grad_norm": 5.628710746765137, "learning_rate": 7.126839619412691e-05, "loss": 0.5983, "step": 21738 }, { "epoch": 1.472931770445152, "grad_norm": 8.543285369873047, "learning_rate": 7.12670271750291e-05, "loss": 0.748, "step": 21739 }, { "epoch": 1.4729995257131243, "grad_norm": 6.022785186767578, "learning_rate": 7.126565815593129e-05, "loss": 0.8587, "step": 21740 }, { "epoch": 1.4730672809810963, "grad_norm": 5.865413188934326, "learning_rate": 7.126428913683347e-05, "loss": 0.7923, "step": 21741 }, { "epoch": 1.4731350362490683, "grad_norm": 7.5003132820129395, "learning_rate": 7.126292011773565e-05, "loss": 0.7663, "step": 21742 }, { "epoch": 1.4732027915170405, "grad_norm": 8.309082984924316, "learning_rate": 7.126155109863783e-05, "loss": 0.6667, "step": 21743 }, { "epoch": 1.4732705467850125, "grad_norm": 4.912498474121094, "learning_rate": 7.126018207954002e-05, "loss": 0.6236, "step": 21744 }, { "epoch": 1.4733383020529847, "grad_norm": 5.59297513961792, "learning_rate": 7.12588130604422e-05, "loss": 0.6028, "step": 21745 }, { "epoch": 1.4734060573209566, "grad_norm": 4.466393947601318, "learning_rate": 7.125744404134438e-05, "loss": 0.637, "step": 21746 }, { "epoch": 1.4734738125889288, "grad_norm": 6.879052639007568, "learning_rate": 7.125607502224656e-05, "loss": 0.6159, "step": 21747 }, { "epoch": 1.4735415678569008, "grad_norm": 6.0279388427734375, "learning_rate": 7.125470600314876e-05, "loss": 0.5653, "step": 21748 }, { "epoch": 1.473609323124873, "grad_norm": 6.425625324249268, "learning_rate": 7.125333698405094e-05, "loss": 0.8203, "step": 21749 }, { "epoch": 1.473677078392845, "grad_norm": 6.157146453857422, "learning_rate": 7.125196796495312e-05, "loss": 0.7837, "step": 21750 }, { "epoch": 1.4737448336608172, "grad_norm": 5.071956634521484, "learning_rate": 7.12505989458553e-05, "loss": 0.5852, "step": 21751 }, { "epoch": 1.4738125889287892, "grad_norm": 8.880087852478027, "learning_rate": 7.124922992675748e-05, "loss": 0.5281, "step": 21752 }, { "epoch": 1.4738803441967612, "grad_norm": 7.715792179107666, "learning_rate": 7.124786090765967e-05, "loss": 0.6583, "step": 21753 }, { "epoch": 1.4739480994647334, "grad_norm": 6.793040752410889, "learning_rate": 7.124649188856185e-05, "loss": 0.8852, "step": 21754 }, { "epoch": 1.4740158547327056, "grad_norm": 8.578542709350586, "learning_rate": 7.124512286946403e-05, "loss": 0.7089, "step": 21755 }, { "epoch": 1.4740836100006776, "grad_norm": 5.610144138336182, "learning_rate": 7.124375385036621e-05, "loss": 0.8239, "step": 21756 }, { "epoch": 1.4741513652686495, "grad_norm": 5.437003135681152, "learning_rate": 7.124238483126841e-05, "loss": 0.7513, "step": 21757 }, { "epoch": 1.4742191205366217, "grad_norm": 5.499956130981445, "learning_rate": 7.124101581217059e-05, "loss": 0.8237, "step": 21758 }, { "epoch": 1.474286875804594, "grad_norm": 7.407476902008057, "learning_rate": 7.123964679307277e-05, "loss": 0.5528, "step": 21759 }, { "epoch": 1.474354631072566, "grad_norm": 8.50042724609375, "learning_rate": 7.123827777397495e-05, "loss": 0.6021, "step": 21760 }, { "epoch": 1.474422386340538, "grad_norm": 5.6611409187316895, "learning_rate": 7.123690875487713e-05, "loss": 0.9343, "step": 21761 }, { "epoch": 1.47449014160851, "grad_norm": 5.682448387145996, "learning_rate": 7.123553973577932e-05, "loss": 0.5455, "step": 21762 }, { "epoch": 1.474557896876482, "grad_norm": 13.792447090148926, "learning_rate": 7.12341707166815e-05, "loss": 0.5195, "step": 21763 }, { "epoch": 1.4746256521444543, "grad_norm": 5.128006935119629, "learning_rate": 7.123280169758368e-05, "loss": 0.6971, "step": 21764 }, { "epoch": 1.4746934074124263, "grad_norm": 6.455353736877441, "learning_rate": 7.123143267848586e-05, "loss": 0.8059, "step": 21765 }, { "epoch": 1.4747611626803985, "grad_norm": 4.480746269226074, "learning_rate": 7.123006365938804e-05, "loss": 0.5986, "step": 21766 }, { "epoch": 1.4748289179483705, "grad_norm": 4.393582820892334, "learning_rate": 7.122869464029024e-05, "loss": 0.4918, "step": 21767 }, { "epoch": 1.4748966732163424, "grad_norm": 8.471491813659668, "learning_rate": 7.122732562119242e-05, "loss": 0.7548, "step": 21768 }, { "epoch": 1.4749644284843146, "grad_norm": 5.444960594177246, "learning_rate": 7.12259566020946e-05, "loss": 0.6544, "step": 21769 }, { "epoch": 1.4750321837522868, "grad_norm": 5.098048210144043, "learning_rate": 7.122458758299678e-05, "loss": 0.9427, "step": 21770 }, { "epoch": 1.4750999390202588, "grad_norm": 6.979254722595215, "learning_rate": 7.122321856389897e-05, "loss": 0.5557, "step": 21771 }, { "epoch": 1.4751676942882308, "grad_norm": 4.195895195007324, "learning_rate": 7.122184954480115e-05, "loss": 0.7583, "step": 21772 }, { "epoch": 1.475235449556203, "grad_norm": 5.391801357269287, "learning_rate": 7.122048052570333e-05, "loss": 0.636, "step": 21773 }, { "epoch": 1.4753032048241752, "grad_norm": 5.594984531402588, "learning_rate": 7.121911150660551e-05, "loss": 0.6622, "step": 21774 }, { "epoch": 1.4753709600921472, "grad_norm": 5.1343488693237305, "learning_rate": 7.12177424875077e-05, "loss": 0.6314, "step": 21775 }, { "epoch": 1.4754387153601192, "grad_norm": 7.435176372528076, "learning_rate": 7.121637346840989e-05, "loss": 0.6458, "step": 21776 }, { "epoch": 1.4755064706280914, "grad_norm": 7.583083629608154, "learning_rate": 7.121500444931207e-05, "loss": 0.6758, "step": 21777 }, { "epoch": 1.4755742258960634, "grad_norm": 6.895628452301025, "learning_rate": 7.121363543021425e-05, "loss": 0.7157, "step": 21778 }, { "epoch": 1.4756419811640356, "grad_norm": 6.189311981201172, "learning_rate": 7.121226641111643e-05, "loss": 0.5943, "step": 21779 }, { "epoch": 1.4757097364320075, "grad_norm": 13.25635051727295, "learning_rate": 7.121089739201862e-05, "loss": 0.6868, "step": 21780 }, { "epoch": 1.4757774916999797, "grad_norm": 5.629393100738525, "learning_rate": 7.12095283729208e-05, "loss": 0.7125, "step": 21781 }, { "epoch": 1.4758452469679517, "grad_norm": 4.756425857543945, "learning_rate": 7.120815935382298e-05, "loss": 0.6708, "step": 21782 }, { "epoch": 1.475913002235924, "grad_norm": 5.445009231567383, "learning_rate": 7.120679033472516e-05, "loss": 0.6285, "step": 21783 }, { "epoch": 1.475980757503896, "grad_norm": 7.55637264251709, "learning_rate": 7.120542131562736e-05, "loss": 0.7819, "step": 21784 }, { "epoch": 1.476048512771868, "grad_norm": 6.0098795890808105, "learning_rate": 7.120405229652954e-05, "loss": 0.6151, "step": 21785 }, { "epoch": 1.47611626803984, "grad_norm": 7.189872741699219, "learning_rate": 7.120268327743172e-05, "loss": 0.6085, "step": 21786 }, { "epoch": 1.476184023307812, "grad_norm": 5.741978168487549, "learning_rate": 7.120131425833391e-05, "loss": 0.6474, "step": 21787 }, { "epoch": 1.4762517785757843, "grad_norm": 7.0941290855407715, "learning_rate": 7.119994523923609e-05, "loss": 0.6977, "step": 21788 }, { "epoch": 1.4763195338437565, "grad_norm": 6.161652088165283, "learning_rate": 7.119857622013827e-05, "loss": 0.6852, "step": 21789 }, { "epoch": 1.4763872891117285, "grad_norm": 5.6524977684021, "learning_rate": 7.119720720104047e-05, "loss": 0.8773, "step": 21790 }, { "epoch": 1.4764550443797004, "grad_norm": 6.571997165679932, "learning_rate": 7.119583818194265e-05, "loss": 0.9905, "step": 21791 }, { "epoch": 1.4765227996476726, "grad_norm": 7.786444187164307, "learning_rate": 7.119446916284483e-05, "loss": 0.5574, "step": 21792 }, { "epoch": 1.4765905549156446, "grad_norm": 5.431092262268066, "learning_rate": 7.119310014374701e-05, "loss": 0.6193, "step": 21793 }, { "epoch": 1.4766583101836168, "grad_norm": 8.320332527160645, "learning_rate": 7.11917311246492e-05, "loss": 0.6975, "step": 21794 }, { "epoch": 1.4767260654515888, "grad_norm": 7.629312992095947, "learning_rate": 7.119036210555138e-05, "loss": 0.7276, "step": 21795 }, { "epoch": 1.476793820719561, "grad_norm": 6.053386211395264, "learning_rate": 7.118899308645356e-05, "loss": 0.6627, "step": 21796 }, { "epoch": 1.476861575987533, "grad_norm": 6.790355205535889, "learning_rate": 7.118762406735574e-05, "loss": 0.7257, "step": 21797 }, { "epoch": 1.4769293312555052, "grad_norm": 6.854659557342529, "learning_rate": 7.118625504825792e-05, "loss": 0.5662, "step": 21798 }, { "epoch": 1.4769970865234772, "grad_norm": 5.845192909240723, "learning_rate": 7.118488602916012e-05, "loss": 0.5074, "step": 21799 }, { "epoch": 1.4770648417914494, "grad_norm": 4.697106838226318, "learning_rate": 7.11835170100623e-05, "loss": 0.6778, "step": 21800 }, { "epoch": 1.4771325970594213, "grad_norm": 7.815839767456055, "learning_rate": 7.118214799096448e-05, "loss": 0.397, "step": 21801 }, { "epoch": 1.4772003523273933, "grad_norm": 8.386428833007812, "learning_rate": 7.118077897186666e-05, "loss": 1.029, "step": 21802 }, { "epoch": 1.4772681075953655, "grad_norm": 4.455373764038086, "learning_rate": 7.117940995276885e-05, "loss": 0.5201, "step": 21803 }, { "epoch": 1.4773358628633377, "grad_norm": 5.681417942047119, "learning_rate": 7.117804093367103e-05, "loss": 0.5816, "step": 21804 }, { "epoch": 1.4774036181313097, "grad_norm": 7.505406856536865, "learning_rate": 7.117667191457321e-05, "loss": 0.7718, "step": 21805 }, { "epoch": 1.4774713733992817, "grad_norm": 6.251115798950195, "learning_rate": 7.11753028954754e-05, "loss": 0.6822, "step": 21806 }, { "epoch": 1.477539128667254, "grad_norm": 7.770137310028076, "learning_rate": 7.117393387637757e-05, "loss": 0.8242, "step": 21807 }, { "epoch": 1.477606883935226, "grad_norm": 4.895290851593018, "learning_rate": 7.117256485727977e-05, "loss": 0.586, "step": 21808 }, { "epoch": 1.477674639203198, "grad_norm": 8.008575439453125, "learning_rate": 7.117119583818195e-05, "loss": 0.7035, "step": 21809 }, { "epoch": 1.47774239447117, "grad_norm": 8.28019905090332, "learning_rate": 7.116982681908413e-05, "loss": 0.7001, "step": 21810 }, { "epoch": 1.4778101497391423, "grad_norm": 6.273717880249023, "learning_rate": 7.116845779998631e-05, "loss": 0.6004, "step": 21811 }, { "epoch": 1.4778779050071142, "grad_norm": 6.122562408447266, "learning_rate": 7.11670887808885e-05, "loss": 0.6663, "step": 21812 }, { "epoch": 1.4779456602750864, "grad_norm": 5.961752891540527, "learning_rate": 7.116571976179068e-05, "loss": 0.7056, "step": 21813 }, { "epoch": 1.4780134155430584, "grad_norm": 7.564919948577881, "learning_rate": 7.116435074269286e-05, "loss": 0.549, "step": 21814 }, { "epoch": 1.4780811708110306, "grad_norm": 6.161078453063965, "learning_rate": 7.116298172359504e-05, "loss": 0.693, "step": 21815 }, { "epoch": 1.4781489260790026, "grad_norm": 5.643840312957764, "learning_rate": 7.116161270449722e-05, "loss": 0.8399, "step": 21816 }, { "epoch": 1.4782166813469746, "grad_norm": 6.963353633880615, "learning_rate": 7.116024368539942e-05, "loss": 0.7439, "step": 21817 }, { "epoch": 1.4782844366149468, "grad_norm": 7.098206043243408, "learning_rate": 7.11588746663016e-05, "loss": 0.6837, "step": 21818 }, { "epoch": 1.478352191882919, "grad_norm": 6.019665241241455, "learning_rate": 7.115750564720378e-05, "loss": 0.6365, "step": 21819 }, { "epoch": 1.478419947150891, "grad_norm": 6.085427761077881, "learning_rate": 7.115613662810596e-05, "loss": 0.6821, "step": 21820 }, { "epoch": 1.478487702418863, "grad_norm": 5.712550640106201, "learning_rate": 7.115476760900814e-05, "loss": 0.7834, "step": 21821 }, { "epoch": 1.4785554576868352, "grad_norm": 6.379909515380859, "learning_rate": 7.115339858991033e-05, "loss": 0.5005, "step": 21822 }, { "epoch": 1.4786232129548074, "grad_norm": 5.254218578338623, "learning_rate": 7.115202957081251e-05, "loss": 0.6773, "step": 21823 }, { "epoch": 1.4786909682227793, "grad_norm": 6.985934257507324, "learning_rate": 7.11506605517147e-05, "loss": 0.642, "step": 21824 }, { "epoch": 1.4787587234907513, "grad_norm": 5.438014030456543, "learning_rate": 7.114929153261687e-05, "loss": 0.5887, "step": 21825 }, { "epoch": 1.4788264787587235, "grad_norm": 6.572701930999756, "learning_rate": 7.114792251351907e-05, "loss": 0.9152, "step": 21826 }, { "epoch": 1.4788942340266955, "grad_norm": 5.790900707244873, "learning_rate": 7.114655349442125e-05, "loss": 0.6473, "step": 21827 }, { "epoch": 1.4789619892946677, "grad_norm": 7.940485000610352, "learning_rate": 7.114518447532343e-05, "loss": 0.4637, "step": 21828 }, { "epoch": 1.4790297445626397, "grad_norm": 6.325809955596924, "learning_rate": 7.114381545622561e-05, "loss": 0.6598, "step": 21829 }, { "epoch": 1.479097499830612, "grad_norm": 7.416320323944092, "learning_rate": 7.11424464371278e-05, "loss": 0.7141, "step": 21830 }, { "epoch": 1.4791652550985839, "grad_norm": 4.925529479980469, "learning_rate": 7.114107741802998e-05, "loss": 0.6494, "step": 21831 }, { "epoch": 1.479233010366556, "grad_norm": 5.552628517150879, "learning_rate": 7.113970839893216e-05, "loss": 0.652, "step": 21832 }, { "epoch": 1.479300765634528, "grad_norm": 5.2854390144348145, "learning_rate": 7.113833937983436e-05, "loss": 0.8125, "step": 21833 }, { "epoch": 1.4793685209025003, "grad_norm": 5.900308132171631, "learning_rate": 7.113697036073654e-05, "loss": 0.6991, "step": 21834 }, { "epoch": 1.4794362761704722, "grad_norm": 5.446276664733887, "learning_rate": 7.113560134163872e-05, "loss": 0.8138, "step": 21835 }, { "epoch": 1.4795040314384442, "grad_norm": 10.77336597442627, "learning_rate": 7.113423232254091e-05, "loss": 0.7449, "step": 21836 }, { "epoch": 1.4795717867064164, "grad_norm": 6.156933784484863, "learning_rate": 7.113286330344309e-05, "loss": 0.62, "step": 21837 }, { "epoch": 1.4796395419743886, "grad_norm": 4.368791580200195, "learning_rate": 7.113149428434527e-05, "loss": 0.4858, "step": 21838 }, { "epoch": 1.4797072972423606, "grad_norm": 4.857654571533203, "learning_rate": 7.113012526524745e-05, "loss": 0.6503, "step": 21839 }, { "epoch": 1.4797750525103326, "grad_norm": 6.367594242095947, "learning_rate": 7.112875624614965e-05, "loss": 0.6908, "step": 21840 }, { "epoch": 1.4798428077783048, "grad_norm": 5.479550838470459, "learning_rate": 7.112738722705183e-05, "loss": 0.7368, "step": 21841 }, { "epoch": 1.4799105630462768, "grad_norm": 10.277019500732422, "learning_rate": 7.112601820795401e-05, "loss": 0.9172, "step": 21842 }, { "epoch": 1.479978318314249, "grad_norm": 5.843985557556152, "learning_rate": 7.112464918885619e-05, "loss": 0.7648, "step": 21843 }, { "epoch": 1.480046073582221, "grad_norm": 5.3389973640441895, "learning_rate": 7.112328016975837e-05, "loss": 0.6293, "step": 21844 }, { "epoch": 1.4801138288501932, "grad_norm": 5.396963119506836, "learning_rate": 7.112191115066056e-05, "loss": 0.639, "step": 21845 }, { "epoch": 1.4801815841181651, "grad_norm": 6.382904529571533, "learning_rate": 7.112054213156274e-05, "loss": 0.8789, "step": 21846 }, { "epoch": 1.4802493393861373, "grad_norm": 6.183492660522461, "learning_rate": 7.111917311246492e-05, "loss": 0.6405, "step": 21847 }, { "epoch": 1.4803170946541093, "grad_norm": 5.39617919921875, "learning_rate": 7.11178040933671e-05, "loss": 0.6756, "step": 21848 }, { "epoch": 1.4803848499220815, "grad_norm": 6.426948547363281, "learning_rate": 7.11164350742693e-05, "loss": 0.5616, "step": 21849 }, { "epoch": 1.4804526051900535, "grad_norm": 5.347081661224365, "learning_rate": 7.111506605517148e-05, "loss": 0.6404, "step": 21850 }, { "epoch": 1.4805203604580255, "grad_norm": 7.056222915649414, "learning_rate": 7.111369703607366e-05, "loss": 0.6699, "step": 21851 }, { "epoch": 1.4805881157259977, "grad_norm": 5.874282360076904, "learning_rate": 7.111232801697584e-05, "loss": 0.695, "step": 21852 }, { "epoch": 1.48065587099397, "grad_norm": 7.254631042480469, "learning_rate": 7.111095899787802e-05, "loss": 0.7625, "step": 21853 }, { "epoch": 1.4807236262619419, "grad_norm": 5.772106170654297, "learning_rate": 7.110958997878021e-05, "loss": 0.7097, "step": 21854 }, { "epoch": 1.4807913815299139, "grad_norm": 4.59554386138916, "learning_rate": 7.110822095968239e-05, "loss": 0.6455, "step": 21855 }, { "epoch": 1.480859136797886, "grad_norm": 5.444210052490234, "learning_rate": 7.110685194058457e-05, "loss": 0.6106, "step": 21856 }, { "epoch": 1.4809268920658583, "grad_norm": 6.203567981719971, "learning_rate": 7.110548292148675e-05, "loss": 0.7615, "step": 21857 }, { "epoch": 1.4809946473338302, "grad_norm": 5.0294365882873535, "learning_rate": 7.110411390238895e-05, "loss": 0.6376, "step": 21858 }, { "epoch": 1.4810624026018022, "grad_norm": 6.734910011291504, "learning_rate": 7.110274488329113e-05, "loss": 0.5604, "step": 21859 }, { "epoch": 1.4811301578697744, "grad_norm": 5.542819976806641, "learning_rate": 7.110137586419331e-05, "loss": 0.5524, "step": 21860 }, { "epoch": 1.4811979131377464, "grad_norm": 6.652702331542969, "learning_rate": 7.110000684509549e-05, "loss": 0.567, "step": 21861 }, { "epoch": 1.4812656684057186, "grad_norm": 6.122315883636475, "learning_rate": 7.109863782599767e-05, "loss": 0.8038, "step": 21862 }, { "epoch": 1.4813334236736906, "grad_norm": 6.622242450714111, "learning_rate": 7.109726880689986e-05, "loss": 0.9491, "step": 21863 }, { "epoch": 1.4814011789416628, "grad_norm": 6.916037559509277, "learning_rate": 7.109589978780204e-05, "loss": 0.7878, "step": 21864 }, { "epoch": 1.4814689342096348, "grad_norm": 7.334831714630127, "learning_rate": 7.109453076870422e-05, "loss": 0.7229, "step": 21865 }, { "epoch": 1.4815366894776067, "grad_norm": 8.572863578796387, "learning_rate": 7.10931617496064e-05, "loss": 0.667, "step": 21866 }, { "epoch": 1.481604444745579, "grad_norm": 7.697519779205322, "learning_rate": 7.109179273050858e-05, "loss": 0.9579, "step": 21867 }, { "epoch": 1.4816722000135512, "grad_norm": 5.4145402908325195, "learning_rate": 7.109042371141078e-05, "loss": 0.6371, "step": 21868 }, { "epoch": 1.4817399552815231, "grad_norm": 6.284193992614746, "learning_rate": 7.108905469231296e-05, "loss": 0.7629, "step": 21869 }, { "epoch": 1.4818077105494951, "grad_norm": 5.603421688079834, "learning_rate": 7.108768567321514e-05, "loss": 0.6933, "step": 21870 }, { "epoch": 1.4818754658174673, "grad_norm": 4.957241535186768, "learning_rate": 7.108631665411732e-05, "loss": 0.4785, "step": 21871 }, { "epoch": 1.4819432210854395, "grad_norm": 5.025025844573975, "learning_rate": 7.108494763501951e-05, "loss": 0.552, "step": 21872 }, { "epoch": 1.4820109763534115, "grad_norm": 5.8130364418029785, "learning_rate": 7.108357861592169e-05, "loss": 0.6369, "step": 21873 }, { "epoch": 1.4820787316213835, "grad_norm": 6.808913707733154, "learning_rate": 7.108220959682387e-05, "loss": 0.734, "step": 21874 }, { "epoch": 1.4821464868893557, "grad_norm": 7.8863115310668945, "learning_rate": 7.108084057772605e-05, "loss": 0.7553, "step": 21875 }, { "epoch": 1.4822142421573277, "grad_norm": 7.173527717590332, "learning_rate": 7.107947155862825e-05, "loss": 0.6025, "step": 21876 }, { "epoch": 1.4822819974252999, "grad_norm": 5.231805324554443, "learning_rate": 7.107810253953043e-05, "loss": 0.8404, "step": 21877 }, { "epoch": 1.4823497526932718, "grad_norm": 4.611957550048828, "learning_rate": 7.107673352043261e-05, "loss": 0.6189, "step": 21878 }, { "epoch": 1.482417507961244, "grad_norm": 5.2666802406311035, "learning_rate": 7.10753645013348e-05, "loss": 0.7172, "step": 21879 }, { "epoch": 1.482485263229216, "grad_norm": 5.183822154998779, "learning_rate": 7.107399548223698e-05, "loss": 0.6554, "step": 21880 }, { "epoch": 1.4825530184971882, "grad_norm": 4.936515808105469, "learning_rate": 7.107262646313916e-05, "loss": 0.7094, "step": 21881 }, { "epoch": 1.4826207737651602, "grad_norm": 5.546382427215576, "learning_rate": 7.107125744404136e-05, "loss": 0.7249, "step": 21882 }, { "epoch": 1.4826885290331324, "grad_norm": 4.961779594421387, "learning_rate": 7.106988842494354e-05, "loss": 0.5825, "step": 21883 }, { "epoch": 1.4827562843011044, "grad_norm": 5.227276802062988, "learning_rate": 7.106851940584572e-05, "loss": 0.7007, "step": 21884 }, { "epoch": 1.4828240395690764, "grad_norm": 6.752979755401611, "learning_rate": 7.10671503867479e-05, "loss": 0.6915, "step": 21885 }, { "epoch": 1.4828917948370486, "grad_norm": 5.958538055419922, "learning_rate": 7.106578136765009e-05, "loss": 0.5828, "step": 21886 }, { "epoch": 1.4829595501050208, "grad_norm": 7.262551784515381, "learning_rate": 7.106441234855227e-05, "loss": 0.5571, "step": 21887 }, { "epoch": 1.4830273053729928, "grad_norm": 7.4582672119140625, "learning_rate": 7.106304332945445e-05, "loss": 0.6194, "step": 21888 }, { "epoch": 1.4830950606409647, "grad_norm": 9.2800874710083, "learning_rate": 7.106167431035663e-05, "loss": 0.7326, "step": 21889 }, { "epoch": 1.483162815908937, "grad_norm": 5.802163124084473, "learning_rate": 7.106030529125883e-05, "loss": 0.5854, "step": 21890 }, { "epoch": 1.483230571176909, "grad_norm": 6.852213382720947, "learning_rate": 7.105893627216101e-05, "loss": 0.6215, "step": 21891 }, { "epoch": 1.4832983264448811, "grad_norm": 6.9432525634765625, "learning_rate": 7.105756725306319e-05, "loss": 0.5364, "step": 21892 }, { "epoch": 1.4833660817128531, "grad_norm": 7.0583086013793945, "learning_rate": 7.105619823396537e-05, "loss": 0.8171, "step": 21893 }, { "epoch": 1.4834338369808253, "grad_norm": 5.896312713623047, "learning_rate": 7.105482921486755e-05, "loss": 0.803, "step": 21894 }, { "epoch": 1.4835015922487973, "grad_norm": 7.363595485687256, "learning_rate": 7.105346019576974e-05, "loss": 0.4564, "step": 21895 }, { "epoch": 1.4835693475167695, "grad_norm": 5.9118123054504395, "learning_rate": 7.105209117667192e-05, "loss": 0.707, "step": 21896 }, { "epoch": 1.4836371027847415, "grad_norm": 4.911188125610352, "learning_rate": 7.10507221575741e-05, "loss": 0.7598, "step": 21897 }, { "epoch": 1.4837048580527137, "grad_norm": 8.472593307495117, "learning_rate": 7.104935313847628e-05, "loss": 0.7722, "step": 21898 }, { "epoch": 1.4837726133206857, "grad_norm": 4.989665508270264, "learning_rate": 7.104798411937846e-05, "loss": 0.7844, "step": 21899 }, { "epoch": 1.4838403685886576, "grad_norm": 5.630456447601318, "learning_rate": 7.104661510028066e-05, "loss": 0.4925, "step": 21900 }, { "epoch": 1.4839081238566298, "grad_norm": 7.414863109588623, "learning_rate": 7.104524608118284e-05, "loss": 0.854, "step": 21901 }, { "epoch": 1.483975879124602, "grad_norm": 4.7564167976379395, "learning_rate": 7.104387706208502e-05, "loss": 0.5392, "step": 21902 }, { "epoch": 1.484043634392574, "grad_norm": 6.919773101806641, "learning_rate": 7.10425080429872e-05, "loss": 0.6933, "step": 21903 }, { "epoch": 1.484111389660546, "grad_norm": 6.856180191040039, "learning_rate": 7.104113902388939e-05, "loss": 0.5479, "step": 21904 }, { "epoch": 1.4841791449285182, "grad_norm": 8.402336120605469, "learning_rate": 7.103977000479157e-05, "loss": 0.5471, "step": 21905 }, { "epoch": 1.4842469001964904, "grad_norm": 4.991842269897461, "learning_rate": 7.103840098569375e-05, "loss": 0.6406, "step": 21906 }, { "epoch": 1.4843146554644624, "grad_norm": 4.924518585205078, "learning_rate": 7.103703196659593e-05, "loss": 0.4924, "step": 21907 }, { "epoch": 1.4843824107324344, "grad_norm": 5.327103614807129, "learning_rate": 7.103566294749811e-05, "loss": 0.6487, "step": 21908 }, { "epoch": 1.4844501660004066, "grad_norm": 6.599485874176025, "learning_rate": 7.103429392840031e-05, "loss": 0.6536, "step": 21909 }, { "epoch": 1.4845179212683786, "grad_norm": 5.781223773956299, "learning_rate": 7.103292490930249e-05, "loss": 0.7098, "step": 21910 }, { "epoch": 1.4845856765363508, "grad_norm": 7.925943374633789, "learning_rate": 7.103155589020467e-05, "loss": 0.8541, "step": 21911 }, { "epoch": 1.4846534318043227, "grad_norm": 4.937210559844971, "learning_rate": 7.103018687110685e-05, "loss": 0.5719, "step": 21912 }, { "epoch": 1.484721187072295, "grad_norm": 5.461892127990723, "learning_rate": 7.102881785200904e-05, "loss": 0.5927, "step": 21913 }, { "epoch": 1.484788942340267, "grad_norm": 6.0040388107299805, "learning_rate": 7.102744883291122e-05, "loss": 0.5177, "step": 21914 }, { "epoch": 1.484856697608239, "grad_norm": 5.552609443664551, "learning_rate": 7.10260798138134e-05, "loss": 0.5897, "step": 21915 }, { "epoch": 1.484924452876211, "grad_norm": 4.911177158355713, "learning_rate": 7.102471079471558e-05, "loss": 0.4418, "step": 21916 }, { "epoch": 1.4849922081441833, "grad_norm": 6.276457786560059, "learning_rate": 7.102334177561776e-05, "loss": 0.9994, "step": 21917 }, { "epoch": 1.4850599634121553, "grad_norm": 6.623502254486084, "learning_rate": 7.102197275651996e-05, "loss": 0.811, "step": 21918 }, { "epoch": 1.4851277186801273, "grad_norm": 6.506345748901367, "learning_rate": 7.102060373742214e-05, "loss": 0.6862, "step": 21919 }, { "epoch": 1.4851954739480995, "grad_norm": 6.310751914978027, "learning_rate": 7.101923471832432e-05, "loss": 0.6398, "step": 21920 }, { "epoch": 1.4852632292160717, "grad_norm": 5.5568132400512695, "learning_rate": 7.10178656992265e-05, "loss": 0.4975, "step": 21921 }, { "epoch": 1.4853309844840437, "grad_norm": 4.986909866333008, "learning_rate": 7.101649668012869e-05, "loss": 0.6711, "step": 21922 }, { "epoch": 1.4853987397520156, "grad_norm": 6.393757343292236, "learning_rate": 7.101512766103087e-05, "loss": 0.6636, "step": 21923 }, { "epoch": 1.4854664950199878, "grad_norm": 7.940303325653076, "learning_rate": 7.101375864193305e-05, "loss": 0.7768, "step": 21924 }, { "epoch": 1.4855342502879598, "grad_norm": 5.260739326477051, "learning_rate": 7.101238962283525e-05, "loss": 0.7546, "step": 21925 }, { "epoch": 1.485602005555932, "grad_norm": 7.1842498779296875, "learning_rate": 7.101102060373743e-05, "loss": 0.6661, "step": 21926 }, { "epoch": 1.485669760823904, "grad_norm": 6.162209987640381, "learning_rate": 7.100965158463961e-05, "loss": 0.4773, "step": 21927 }, { "epoch": 1.4857375160918762, "grad_norm": 5.460573196411133, "learning_rate": 7.10082825655418e-05, "loss": 0.6729, "step": 21928 }, { "epoch": 1.4858052713598482, "grad_norm": 10.905681610107422, "learning_rate": 7.100691354644398e-05, "loss": 0.7492, "step": 21929 }, { "epoch": 1.4858730266278204, "grad_norm": 5.39735221862793, "learning_rate": 7.100554452734616e-05, "loss": 0.7156, "step": 21930 }, { "epoch": 1.4859407818957924, "grad_norm": 7.6677374839782715, "learning_rate": 7.100417550824834e-05, "loss": 0.7733, "step": 21931 }, { "epoch": 1.4860085371637646, "grad_norm": 5.970035076141357, "learning_rate": 7.100280648915054e-05, "loss": 0.7114, "step": 21932 }, { "epoch": 1.4860762924317366, "grad_norm": 6.140134334564209, "learning_rate": 7.100143747005272e-05, "loss": 0.6586, "step": 21933 }, { "epoch": 1.4861440476997085, "grad_norm": 4.751783847808838, "learning_rate": 7.10000684509549e-05, "loss": 0.5973, "step": 21934 }, { "epoch": 1.4862118029676807, "grad_norm": 6.2129316329956055, "learning_rate": 7.099869943185708e-05, "loss": 0.5744, "step": 21935 }, { "epoch": 1.486279558235653, "grad_norm": 6.153050899505615, "learning_rate": 7.099733041275927e-05, "loss": 0.6922, "step": 21936 }, { "epoch": 1.486347313503625, "grad_norm": 7.0990309715271, "learning_rate": 7.099596139366145e-05, "loss": 0.7651, "step": 21937 }, { "epoch": 1.486415068771597, "grad_norm": 5.434704303741455, "learning_rate": 7.099459237456363e-05, "loss": 0.7493, "step": 21938 }, { "epoch": 1.486482824039569, "grad_norm": 7.168928623199463, "learning_rate": 7.099322335546581e-05, "loss": 0.5329, "step": 21939 }, { "epoch": 1.486550579307541, "grad_norm": 5.33208703994751, "learning_rate": 7.099185433636799e-05, "loss": 0.7534, "step": 21940 }, { "epoch": 1.4866183345755133, "grad_norm": 5.791597366333008, "learning_rate": 7.099048531727019e-05, "loss": 0.6606, "step": 21941 }, { "epoch": 1.4866860898434853, "grad_norm": 7.644525527954102, "learning_rate": 7.098911629817237e-05, "loss": 0.6845, "step": 21942 }, { "epoch": 1.4867538451114575, "grad_norm": 5.097345352172852, "learning_rate": 7.098774727907455e-05, "loss": 0.6557, "step": 21943 }, { "epoch": 1.4868216003794295, "grad_norm": 7.360633373260498, "learning_rate": 7.098637825997673e-05, "loss": 0.8, "step": 21944 }, { "epoch": 1.4868893556474017, "grad_norm": 9.552695274353027, "learning_rate": 7.098500924087892e-05, "loss": 0.8165, "step": 21945 }, { "epoch": 1.4869571109153736, "grad_norm": 5.191266059875488, "learning_rate": 7.09836402217811e-05, "loss": 0.6005, "step": 21946 }, { "epoch": 1.4870248661833458, "grad_norm": 5.180266380310059, "learning_rate": 7.098227120268328e-05, "loss": 0.6668, "step": 21947 }, { "epoch": 1.4870926214513178, "grad_norm": 5.663448333740234, "learning_rate": 7.098090218358546e-05, "loss": 0.7212, "step": 21948 }, { "epoch": 1.4871603767192898, "grad_norm": 6.41900634765625, "learning_rate": 7.097953316448764e-05, "loss": 0.654, "step": 21949 }, { "epoch": 1.487228131987262, "grad_norm": 4.536510944366455, "learning_rate": 7.097816414538984e-05, "loss": 0.578, "step": 21950 }, { "epoch": 1.4872958872552342, "grad_norm": 8.940361022949219, "learning_rate": 7.097679512629202e-05, "loss": 0.6253, "step": 21951 }, { "epoch": 1.4873636425232062, "grad_norm": 5.85870361328125, "learning_rate": 7.09754261071942e-05, "loss": 0.6461, "step": 21952 }, { "epoch": 1.4874313977911782, "grad_norm": 6.328300952911377, "learning_rate": 7.097405708809638e-05, "loss": 0.6411, "step": 21953 }, { "epoch": 1.4874991530591504, "grad_norm": 4.9644999504089355, "learning_rate": 7.097268806899856e-05, "loss": 0.582, "step": 21954 }, { "epoch": 1.4875669083271226, "grad_norm": 4.997863292694092, "learning_rate": 7.097131904990075e-05, "loss": 0.7165, "step": 21955 }, { "epoch": 1.4876346635950946, "grad_norm": 11.087621688842773, "learning_rate": 7.096995003080293e-05, "loss": 0.5299, "step": 21956 }, { "epoch": 1.4877024188630665, "grad_norm": 7.927825450897217, "learning_rate": 7.096858101170511e-05, "loss": 0.5081, "step": 21957 }, { "epoch": 1.4877701741310387, "grad_norm": 5.931427955627441, "learning_rate": 7.096721199260729e-05, "loss": 0.9271, "step": 21958 }, { "epoch": 1.4878379293990107, "grad_norm": 4.905730724334717, "learning_rate": 7.096584297350949e-05, "loss": 0.6458, "step": 21959 }, { "epoch": 1.487905684666983, "grad_norm": 6.783291816711426, "learning_rate": 7.096447395441167e-05, "loss": 0.7249, "step": 21960 }, { "epoch": 1.487973439934955, "grad_norm": 5.102269649505615, "learning_rate": 7.096310493531385e-05, "loss": 0.5472, "step": 21961 }, { "epoch": 1.488041195202927, "grad_norm": 9.025117874145508, "learning_rate": 7.096173591621603e-05, "loss": 0.7506, "step": 21962 }, { "epoch": 1.488108950470899, "grad_norm": 8.450045585632324, "learning_rate": 7.096036689711821e-05, "loss": 0.6556, "step": 21963 }, { "epoch": 1.488176705738871, "grad_norm": 6.15355110168457, "learning_rate": 7.09589978780204e-05, "loss": 0.7123, "step": 21964 }, { "epoch": 1.4882444610068433, "grad_norm": 6.221551895141602, "learning_rate": 7.095762885892258e-05, "loss": 0.6185, "step": 21965 }, { "epoch": 1.4883122162748155, "grad_norm": 4.670135498046875, "learning_rate": 7.095625983982476e-05, "loss": 0.6158, "step": 21966 }, { "epoch": 1.4883799715427874, "grad_norm": 5.210943222045898, "learning_rate": 7.095489082072694e-05, "loss": 0.582, "step": 21967 }, { "epoch": 1.4884477268107594, "grad_norm": 10.116141319274902, "learning_rate": 7.095352180162914e-05, "loss": 0.873, "step": 21968 }, { "epoch": 1.4885154820787316, "grad_norm": 6.481102466583252, "learning_rate": 7.095215278253132e-05, "loss": 0.7207, "step": 21969 }, { "epoch": 1.4885832373467038, "grad_norm": 4.752083778381348, "learning_rate": 7.09507837634335e-05, "loss": 0.4503, "step": 21970 }, { "epoch": 1.4886509926146758, "grad_norm": 5.561951160430908, "learning_rate": 7.094941474433569e-05, "loss": 0.6637, "step": 21971 }, { "epoch": 1.4887187478826478, "grad_norm": 5.2104034423828125, "learning_rate": 7.094804572523787e-05, "loss": 0.7035, "step": 21972 }, { "epoch": 1.48878650315062, "grad_norm": 6.6871185302734375, "learning_rate": 7.094667670614005e-05, "loss": 0.6785, "step": 21973 }, { "epoch": 1.488854258418592, "grad_norm": 5.1811957359313965, "learning_rate": 7.094530768704225e-05, "loss": 0.6341, "step": 21974 }, { "epoch": 1.4889220136865642, "grad_norm": 5.180508136749268, "learning_rate": 7.094393866794443e-05, "loss": 0.6467, "step": 21975 }, { "epoch": 1.4889897689545362, "grad_norm": 5.214288234710693, "learning_rate": 7.094256964884661e-05, "loss": 0.6831, "step": 21976 }, { "epoch": 1.4890575242225084, "grad_norm": 4.660975456237793, "learning_rate": 7.094120062974879e-05, "loss": 0.5011, "step": 21977 }, { "epoch": 1.4891252794904803, "grad_norm": 6.3312249183654785, "learning_rate": 7.093983161065098e-05, "loss": 0.7993, "step": 21978 }, { "epoch": 1.4891930347584525, "grad_norm": 6.83062219619751, "learning_rate": 7.093846259155316e-05, "loss": 0.7559, "step": 21979 }, { "epoch": 1.4892607900264245, "grad_norm": 7.013361930847168, "learning_rate": 7.093709357245534e-05, "loss": 0.989, "step": 21980 }, { "epoch": 1.4893285452943967, "grad_norm": 6.921490669250488, "learning_rate": 7.093572455335752e-05, "loss": 0.8715, "step": 21981 }, { "epoch": 1.4893963005623687, "grad_norm": 5.438993453979492, "learning_rate": 7.093435553425972e-05, "loss": 0.792, "step": 21982 }, { "epoch": 1.4894640558303407, "grad_norm": 8.418713569641113, "learning_rate": 7.09329865151619e-05, "loss": 0.9744, "step": 21983 }, { "epoch": 1.489531811098313, "grad_norm": 5.657731533050537, "learning_rate": 7.093161749606408e-05, "loss": 0.5858, "step": 21984 }, { "epoch": 1.489599566366285, "grad_norm": 5.061615467071533, "learning_rate": 7.093024847696626e-05, "loss": 0.7928, "step": 21985 }, { "epoch": 1.489667321634257, "grad_norm": 5.199770927429199, "learning_rate": 7.092887945786844e-05, "loss": 0.619, "step": 21986 }, { "epoch": 1.489735076902229, "grad_norm": 5.337277412414551, "learning_rate": 7.092751043877063e-05, "loss": 0.8507, "step": 21987 }, { "epoch": 1.4898028321702013, "grad_norm": 7.148277759552002, "learning_rate": 7.092614141967281e-05, "loss": 0.6024, "step": 21988 }, { "epoch": 1.4898705874381732, "grad_norm": 4.171398162841797, "learning_rate": 7.092477240057499e-05, "loss": 0.5769, "step": 21989 }, { "epoch": 1.4899383427061454, "grad_norm": 4.939512729644775, "learning_rate": 7.092340338147717e-05, "loss": 0.6596, "step": 21990 }, { "epoch": 1.4900060979741174, "grad_norm": 14.337895393371582, "learning_rate": 7.092203436237937e-05, "loss": 0.6575, "step": 21991 }, { "epoch": 1.4900738532420896, "grad_norm": 5.058944225311279, "learning_rate": 7.092066534328155e-05, "loss": 0.6601, "step": 21992 }, { "epoch": 1.4901416085100616, "grad_norm": 7.2605061531066895, "learning_rate": 7.091929632418373e-05, "loss": 0.6919, "step": 21993 }, { "epoch": 1.4902093637780338, "grad_norm": 10.385626792907715, "learning_rate": 7.091792730508591e-05, "loss": 0.5626, "step": 21994 }, { "epoch": 1.4902771190460058, "grad_norm": 6.838784694671631, "learning_rate": 7.091655828598809e-05, "loss": 0.6392, "step": 21995 }, { "epoch": 1.490344874313978, "grad_norm": 5.707305431365967, "learning_rate": 7.091518926689028e-05, "loss": 0.7264, "step": 21996 }, { "epoch": 1.49041262958195, "grad_norm": 5.880498886108398, "learning_rate": 7.091382024779246e-05, "loss": 0.6718, "step": 21997 }, { "epoch": 1.490480384849922, "grad_norm": 5.579658031463623, "learning_rate": 7.091245122869464e-05, "loss": 0.6234, "step": 21998 }, { "epoch": 1.4905481401178942, "grad_norm": 4.632273197174072, "learning_rate": 7.091108220959682e-05, "loss": 0.6508, "step": 21999 }, { "epoch": 1.4906158953858664, "grad_norm": 5.620992183685303, "learning_rate": 7.090971319049902e-05, "loss": 0.949, "step": 22000 }, { "epoch": 1.4906836506538383, "grad_norm": 5.511868000030518, "learning_rate": 7.09083441714012e-05, "loss": 0.6109, "step": 22001 }, { "epoch": 1.4907514059218103, "grad_norm": 5.469490051269531, "learning_rate": 7.090697515230338e-05, "loss": 0.7354, "step": 22002 }, { "epoch": 1.4908191611897825, "grad_norm": 4.224766254425049, "learning_rate": 7.090560613320556e-05, "loss": 0.6512, "step": 22003 }, { "epoch": 1.4908869164577545, "grad_norm": 6.322409152984619, "learning_rate": 7.090423711410774e-05, "loss": 0.7224, "step": 22004 }, { "epoch": 1.4909546717257267, "grad_norm": 6.014798641204834, "learning_rate": 7.090286809500993e-05, "loss": 0.7055, "step": 22005 }, { "epoch": 1.4910224269936987, "grad_norm": 5.3260722160339355, "learning_rate": 7.090149907591211e-05, "loss": 0.5676, "step": 22006 }, { "epoch": 1.491090182261671, "grad_norm": 5.233611583709717, "learning_rate": 7.090013005681429e-05, "loss": 0.8554, "step": 22007 }, { "epoch": 1.4911579375296429, "grad_norm": 5.133505344390869, "learning_rate": 7.089876103771647e-05, "loss": 0.7246, "step": 22008 }, { "epoch": 1.491225692797615, "grad_norm": 8.229393005371094, "learning_rate": 7.089739201861865e-05, "loss": 0.9214, "step": 22009 }, { "epoch": 1.491293448065587, "grad_norm": 5.58939790725708, "learning_rate": 7.089602299952085e-05, "loss": 0.8351, "step": 22010 }, { "epoch": 1.4913612033335593, "grad_norm": 8.200170516967773, "learning_rate": 7.089465398042303e-05, "loss": 0.5238, "step": 22011 }, { "epoch": 1.4914289586015312, "grad_norm": 9.57175350189209, "learning_rate": 7.089328496132521e-05, "loss": 0.7055, "step": 22012 }, { "epoch": 1.4914967138695032, "grad_norm": 5.898972511291504, "learning_rate": 7.089191594222739e-05, "loss": 0.6201, "step": 22013 }, { "epoch": 1.4915644691374754, "grad_norm": 5.102303504943848, "learning_rate": 7.089054692312958e-05, "loss": 0.7608, "step": 22014 }, { "epoch": 1.4916322244054476, "grad_norm": 4.2893877029418945, "learning_rate": 7.088917790403176e-05, "loss": 0.6644, "step": 22015 }, { "epoch": 1.4916999796734196, "grad_norm": 6.000941753387451, "learning_rate": 7.088780888493394e-05, "loss": 0.7805, "step": 22016 }, { "epoch": 1.4917677349413916, "grad_norm": 8.498360633850098, "learning_rate": 7.088643986583614e-05, "loss": 0.8967, "step": 22017 }, { "epoch": 1.4918354902093638, "grad_norm": 6.613840103149414, "learning_rate": 7.088507084673832e-05, "loss": 0.6251, "step": 22018 }, { "epoch": 1.491903245477336, "grad_norm": 5.141783714294434, "learning_rate": 7.08837018276405e-05, "loss": 0.6567, "step": 22019 }, { "epoch": 1.491971000745308, "grad_norm": 5.212697982788086, "learning_rate": 7.088233280854269e-05, "loss": 0.5754, "step": 22020 }, { "epoch": 1.49203875601328, "grad_norm": 6.249224662780762, "learning_rate": 7.088096378944487e-05, "loss": 0.6935, "step": 22021 }, { "epoch": 1.4921065112812522, "grad_norm": 6.438807010650635, "learning_rate": 7.087959477034705e-05, "loss": 0.6231, "step": 22022 }, { "epoch": 1.4921742665492241, "grad_norm": 5.43194055557251, "learning_rate": 7.087822575124925e-05, "loss": 0.7531, "step": 22023 }, { "epoch": 1.4922420218171963, "grad_norm": 4.327939033508301, "learning_rate": 7.087685673215143e-05, "loss": 0.5997, "step": 22024 }, { "epoch": 1.4923097770851683, "grad_norm": 5.943202972412109, "learning_rate": 7.08754877130536e-05, "loss": 0.6489, "step": 22025 }, { "epoch": 1.4923775323531405, "grad_norm": 5.430817604064941, "learning_rate": 7.087411869395579e-05, "loss": 0.5891, "step": 22026 }, { "epoch": 1.4924452876211125, "grad_norm": 5.9742279052734375, "learning_rate": 7.087274967485797e-05, "loss": 0.7898, "step": 22027 }, { "epoch": 1.4925130428890847, "grad_norm": 4.850603103637695, "learning_rate": 7.087138065576016e-05, "loss": 0.6225, "step": 22028 }, { "epoch": 1.4925807981570567, "grad_norm": 5.074647903442383, "learning_rate": 7.087001163666234e-05, "loss": 0.8312, "step": 22029 }, { "epoch": 1.492648553425029, "grad_norm": 4.597614288330078, "learning_rate": 7.086864261756452e-05, "loss": 0.6607, "step": 22030 }, { "epoch": 1.4927163086930009, "grad_norm": 6.080015182495117, "learning_rate": 7.08672735984667e-05, "loss": 0.8176, "step": 22031 }, { "epoch": 1.4927840639609729, "grad_norm": 8.085951805114746, "learning_rate": 7.086590457936888e-05, "loss": 0.5499, "step": 22032 }, { "epoch": 1.492851819228945, "grad_norm": 6.048823356628418, "learning_rate": 7.086453556027108e-05, "loss": 0.6576, "step": 22033 }, { "epoch": 1.4929195744969173, "grad_norm": 4.443872928619385, "learning_rate": 7.086316654117326e-05, "loss": 0.6951, "step": 22034 }, { "epoch": 1.4929873297648892, "grad_norm": 5.61836576461792, "learning_rate": 7.086179752207544e-05, "loss": 0.5488, "step": 22035 }, { "epoch": 1.4930550850328612, "grad_norm": 6.085595607757568, "learning_rate": 7.086042850297762e-05, "loss": 0.7913, "step": 22036 }, { "epoch": 1.4931228403008334, "grad_norm": 4.363150119781494, "learning_rate": 7.085905948387981e-05, "loss": 0.7457, "step": 22037 }, { "epoch": 1.4931905955688054, "grad_norm": 5.833744525909424, "learning_rate": 7.085769046478199e-05, "loss": 0.7235, "step": 22038 }, { "epoch": 1.4932583508367776, "grad_norm": 5.427975177764893, "learning_rate": 7.085632144568417e-05, "loss": 0.7056, "step": 22039 }, { "epoch": 1.4933261061047496, "grad_norm": 8.134842872619629, "learning_rate": 7.085495242658635e-05, "loss": 0.8359, "step": 22040 }, { "epoch": 1.4933938613727218, "grad_norm": 6.897619724273682, "learning_rate": 7.085358340748853e-05, "loss": 0.6058, "step": 22041 }, { "epoch": 1.4934616166406938, "grad_norm": 4.74287748336792, "learning_rate": 7.085221438839073e-05, "loss": 0.7007, "step": 22042 }, { "epoch": 1.493529371908666, "grad_norm": 5.57700252532959, "learning_rate": 7.08508453692929e-05, "loss": 0.6289, "step": 22043 }, { "epoch": 1.493597127176638, "grad_norm": 7.433844089508057, "learning_rate": 7.084947635019509e-05, "loss": 0.4801, "step": 22044 }, { "epoch": 1.4936648824446102, "grad_norm": 8.1654691696167, "learning_rate": 7.084810733109727e-05, "loss": 0.5133, "step": 22045 }, { "epoch": 1.4937326377125821, "grad_norm": 4.01910924911499, "learning_rate": 7.084673831199946e-05, "loss": 0.6797, "step": 22046 }, { "epoch": 1.4938003929805541, "grad_norm": 6.498193264007568, "learning_rate": 7.084536929290164e-05, "loss": 0.6902, "step": 22047 }, { "epoch": 1.4938681482485263, "grad_norm": 6.121109485626221, "learning_rate": 7.084400027380382e-05, "loss": 0.8806, "step": 22048 }, { "epoch": 1.4939359035164985, "grad_norm": 5.747509956359863, "learning_rate": 7.0842631254706e-05, "loss": 0.7227, "step": 22049 }, { "epoch": 1.4940036587844705, "grad_norm": 5.2741570472717285, "learning_rate": 7.084126223560818e-05, "loss": 0.5321, "step": 22050 }, { "epoch": 1.4940714140524425, "grad_norm": 4.97558069229126, "learning_rate": 7.083989321651038e-05, "loss": 0.7249, "step": 22051 }, { "epoch": 1.4941391693204147, "grad_norm": 4.977479934692383, "learning_rate": 7.083852419741256e-05, "loss": 0.7343, "step": 22052 }, { "epoch": 1.4942069245883867, "grad_norm": 4.404728412628174, "learning_rate": 7.083715517831474e-05, "loss": 0.5846, "step": 22053 }, { "epoch": 1.4942746798563589, "grad_norm": 5.9822001457214355, "learning_rate": 7.083578615921692e-05, "loss": 0.7807, "step": 22054 }, { "epoch": 1.4943424351243308, "grad_norm": 5.575675010681152, "learning_rate": 7.08344171401191e-05, "loss": 0.4854, "step": 22055 }, { "epoch": 1.494410190392303, "grad_norm": 6.861571788787842, "learning_rate": 7.083304812102129e-05, "loss": 0.7243, "step": 22056 }, { "epoch": 1.494477945660275, "grad_norm": 4.884634017944336, "learning_rate": 7.083167910192347e-05, "loss": 0.836, "step": 22057 }, { "epoch": 1.4945457009282472, "grad_norm": 5.666802406311035, "learning_rate": 7.083031008282565e-05, "loss": 0.7778, "step": 22058 }, { "epoch": 1.4946134561962192, "grad_norm": 6.525592803955078, "learning_rate": 7.082894106372783e-05, "loss": 0.6147, "step": 22059 }, { "epoch": 1.4946812114641914, "grad_norm": 6.362815856933594, "learning_rate": 7.082757204463003e-05, "loss": 0.5349, "step": 22060 }, { "epoch": 1.4947489667321634, "grad_norm": 4.6374921798706055, "learning_rate": 7.082620302553221e-05, "loss": 0.6943, "step": 22061 }, { "epoch": 1.4948167220001354, "grad_norm": 6.965389728546143, "learning_rate": 7.082483400643439e-05, "loss": 0.7153, "step": 22062 }, { "epoch": 1.4948844772681076, "grad_norm": 6.712467193603516, "learning_rate": 7.082346498733657e-05, "loss": 0.6394, "step": 22063 }, { "epoch": 1.4949522325360798, "grad_norm": 5.7538251876831055, "learning_rate": 7.082209596823876e-05, "loss": 0.6197, "step": 22064 }, { "epoch": 1.4950199878040518, "grad_norm": 4.7185869216918945, "learning_rate": 7.082072694914094e-05, "loss": 0.52, "step": 22065 }, { "epoch": 1.4950877430720237, "grad_norm": 6.957514762878418, "learning_rate": 7.081935793004312e-05, "loss": 1.0268, "step": 22066 }, { "epoch": 1.495155498339996, "grad_norm": 5.18220329284668, "learning_rate": 7.081798891094532e-05, "loss": 0.6685, "step": 22067 }, { "epoch": 1.4952232536079682, "grad_norm": 6.5980730056762695, "learning_rate": 7.08166198918475e-05, "loss": 0.8332, "step": 22068 }, { "epoch": 1.4952910088759401, "grad_norm": 6.957843780517578, "learning_rate": 7.081525087274968e-05, "loss": 0.918, "step": 22069 }, { "epoch": 1.4953587641439121, "grad_norm": 5.952478885650635, "learning_rate": 7.081388185365187e-05, "loss": 0.5306, "step": 22070 }, { "epoch": 1.4954265194118843, "grad_norm": 5.741704940795898, "learning_rate": 7.081251283455405e-05, "loss": 0.6348, "step": 22071 }, { "epoch": 1.4954942746798563, "grad_norm": 4.910528182983398, "learning_rate": 7.081114381545623e-05, "loss": 0.5478, "step": 22072 }, { "epoch": 1.4955620299478285, "grad_norm": 5.595044136047363, "learning_rate": 7.080977479635841e-05, "loss": 0.816, "step": 22073 }, { "epoch": 1.4956297852158005, "grad_norm": 4.9874796867370605, "learning_rate": 7.08084057772606e-05, "loss": 0.6106, "step": 22074 }, { "epoch": 1.4956975404837727, "grad_norm": 5.500063419342041, "learning_rate": 7.080703675816279e-05, "loss": 0.6847, "step": 22075 }, { "epoch": 1.4957652957517447, "grad_norm": 8.218989372253418, "learning_rate": 7.080566773906497e-05, "loss": 0.8654, "step": 22076 }, { "epoch": 1.4958330510197169, "grad_norm": 4.6933770179748535, "learning_rate": 7.080429871996715e-05, "loss": 0.5527, "step": 22077 }, { "epoch": 1.4959008062876888, "grad_norm": 7.854067802429199, "learning_rate": 7.080292970086934e-05, "loss": 0.7978, "step": 22078 }, { "epoch": 1.495968561555661, "grad_norm": 7.972220420837402, "learning_rate": 7.080156068177152e-05, "loss": 0.6693, "step": 22079 }, { "epoch": 1.496036316823633, "grad_norm": 6.065375328063965, "learning_rate": 7.08001916626737e-05, "loss": 0.6346, "step": 22080 }, { "epoch": 1.496104072091605, "grad_norm": 6.695498466491699, "learning_rate": 7.079882264357588e-05, "loss": 0.7461, "step": 22081 }, { "epoch": 1.4961718273595772, "grad_norm": 4.7845025062561035, "learning_rate": 7.079745362447806e-05, "loss": 0.5359, "step": 22082 }, { "epoch": 1.4962395826275494, "grad_norm": 6.331785202026367, "learning_rate": 7.079608460538026e-05, "loss": 0.9256, "step": 22083 }, { "epoch": 1.4963073378955214, "grad_norm": 5.960836887359619, "learning_rate": 7.079471558628244e-05, "loss": 0.7302, "step": 22084 }, { "epoch": 1.4963750931634934, "grad_norm": 6.054520130157471, "learning_rate": 7.079334656718462e-05, "loss": 0.7843, "step": 22085 }, { "epoch": 1.4964428484314656, "grad_norm": 6.227099418640137, "learning_rate": 7.07919775480868e-05, "loss": 0.5721, "step": 22086 }, { "epoch": 1.4965106036994376, "grad_norm": 5.825899600982666, "learning_rate": 7.079060852898898e-05, "loss": 0.5587, "step": 22087 }, { "epoch": 1.4965783589674098, "grad_norm": 5.253388404846191, "learning_rate": 7.078923950989117e-05, "loss": 0.6088, "step": 22088 }, { "epoch": 1.4966461142353817, "grad_norm": 5.717947483062744, "learning_rate": 7.078787049079335e-05, "loss": 0.8546, "step": 22089 }, { "epoch": 1.496713869503354, "grad_norm": 8.158141136169434, "learning_rate": 7.078650147169553e-05, "loss": 0.6334, "step": 22090 }, { "epoch": 1.496781624771326, "grad_norm": 6.275701999664307, "learning_rate": 7.078513245259771e-05, "loss": 0.6534, "step": 22091 }, { "epoch": 1.4968493800392981, "grad_norm": 11.656972885131836, "learning_rate": 7.07837634334999e-05, "loss": 0.6788, "step": 22092 }, { "epoch": 1.49691713530727, "grad_norm": 5.128320217132568, "learning_rate": 7.078239441440209e-05, "loss": 0.5951, "step": 22093 }, { "epoch": 1.4969848905752423, "grad_norm": 5.513734817504883, "learning_rate": 7.078102539530427e-05, "loss": 0.6366, "step": 22094 }, { "epoch": 1.4970526458432143, "grad_norm": 6.584788799285889, "learning_rate": 7.077965637620645e-05, "loss": 0.7007, "step": 22095 }, { "epoch": 1.4971204011111863, "grad_norm": 6.0423583984375, "learning_rate": 7.077828735710863e-05, "loss": 0.6221, "step": 22096 }, { "epoch": 1.4971881563791585, "grad_norm": 8.032084465026855, "learning_rate": 7.077691833801082e-05, "loss": 0.5191, "step": 22097 }, { "epoch": 1.4972559116471307, "grad_norm": 4.563541889190674, "learning_rate": 7.0775549318913e-05, "loss": 0.5638, "step": 22098 }, { "epoch": 1.4973236669151027, "grad_norm": 6.114015579223633, "learning_rate": 7.077418029981518e-05, "loss": 0.7095, "step": 22099 }, { "epoch": 1.4973914221830746, "grad_norm": 6.4325642585754395, "learning_rate": 7.077281128071736e-05, "loss": 0.585, "step": 22100 }, { "epoch": 1.4974591774510468, "grad_norm": 5.801186561584473, "learning_rate": 7.077144226161956e-05, "loss": 0.8223, "step": 22101 }, { "epoch": 1.4975269327190188, "grad_norm": 7.425800323486328, "learning_rate": 7.077007324252174e-05, "loss": 0.8282, "step": 22102 }, { "epoch": 1.497594687986991, "grad_norm": 5.9739909172058105, "learning_rate": 7.076870422342392e-05, "loss": 0.5279, "step": 22103 }, { "epoch": 1.497662443254963, "grad_norm": 5.428225040435791, "learning_rate": 7.07673352043261e-05, "loss": 0.6302, "step": 22104 }, { "epoch": 1.4977301985229352, "grad_norm": 6.988409042358398, "learning_rate": 7.076596618522828e-05, "loss": 0.6331, "step": 22105 }, { "epoch": 1.4977979537909072, "grad_norm": 4.8965325355529785, "learning_rate": 7.076459716613047e-05, "loss": 0.631, "step": 22106 }, { "epoch": 1.4978657090588794, "grad_norm": 7.882279396057129, "learning_rate": 7.076322814703265e-05, "loss": 0.8488, "step": 22107 }, { "epoch": 1.4979334643268514, "grad_norm": 6.032040596008301, "learning_rate": 7.076185912793483e-05, "loss": 0.8727, "step": 22108 }, { "epoch": 1.4980012195948236, "grad_norm": 4.491772174835205, "learning_rate": 7.076049010883701e-05, "loss": 0.6253, "step": 22109 }, { "epoch": 1.4980689748627956, "grad_norm": 4.536585330963135, "learning_rate": 7.07591210897392e-05, "loss": 0.8034, "step": 22110 }, { "epoch": 1.4981367301307675, "grad_norm": 4.501182556152344, "learning_rate": 7.075775207064139e-05, "loss": 0.7292, "step": 22111 }, { "epoch": 1.4982044853987397, "grad_norm": 4.431582450866699, "learning_rate": 7.075638305154357e-05, "loss": 0.6318, "step": 22112 }, { "epoch": 1.498272240666712, "grad_norm": 7.827727317810059, "learning_rate": 7.075501403244576e-05, "loss": 0.5015, "step": 22113 }, { "epoch": 1.498339995934684, "grad_norm": 5.825408458709717, "learning_rate": 7.075364501334794e-05, "loss": 0.6435, "step": 22114 }, { "epoch": 1.498407751202656, "grad_norm": 4.42203950881958, "learning_rate": 7.075227599425012e-05, "loss": 0.5549, "step": 22115 }, { "epoch": 1.498475506470628, "grad_norm": 4.553739547729492, "learning_rate": 7.075090697515232e-05, "loss": 0.738, "step": 22116 }, { "epoch": 1.4985432617386003, "grad_norm": 4.36562967300415, "learning_rate": 7.07495379560545e-05, "loss": 0.7095, "step": 22117 }, { "epoch": 1.4986110170065723, "grad_norm": 5.109296798706055, "learning_rate": 7.074816893695668e-05, "loss": 0.7394, "step": 22118 }, { "epoch": 1.4986787722745443, "grad_norm": 4.153988838195801, "learning_rate": 7.074679991785886e-05, "loss": 0.5823, "step": 22119 }, { "epoch": 1.4987465275425165, "grad_norm": 5.033723831176758, "learning_rate": 7.074543089876105e-05, "loss": 0.7033, "step": 22120 }, { "epoch": 1.4988142828104885, "grad_norm": 6.226871967315674, "learning_rate": 7.074406187966323e-05, "loss": 0.6971, "step": 22121 }, { "epoch": 1.4988820380784607, "grad_norm": 5.1524200439453125, "learning_rate": 7.074269286056541e-05, "loss": 0.6503, "step": 22122 }, { "epoch": 1.4989497933464326, "grad_norm": 4.258758068084717, "learning_rate": 7.074132384146759e-05, "loss": 0.6974, "step": 22123 }, { "epoch": 1.4990175486144048, "grad_norm": 6.550229549407959, "learning_rate": 7.073995482236979e-05, "loss": 0.782, "step": 22124 }, { "epoch": 1.4990853038823768, "grad_norm": 6.167341232299805, "learning_rate": 7.073858580327197e-05, "loss": 0.6029, "step": 22125 }, { "epoch": 1.499153059150349, "grad_norm": 6.208250522613525, "learning_rate": 7.073721678417415e-05, "loss": 0.6505, "step": 22126 }, { "epoch": 1.499220814418321, "grad_norm": 7.001287460327148, "learning_rate": 7.073584776507633e-05, "loss": 1.0634, "step": 22127 }, { "epoch": 1.4992885696862932, "grad_norm": 4.142703056335449, "learning_rate": 7.07344787459785e-05, "loss": 0.5524, "step": 22128 }, { "epoch": 1.4993563249542652, "grad_norm": 6.056220054626465, "learning_rate": 7.07331097268807e-05, "loss": 0.7421, "step": 22129 }, { "epoch": 1.4994240802222372, "grad_norm": 6.19115686416626, "learning_rate": 7.073174070778288e-05, "loss": 0.7036, "step": 22130 }, { "epoch": 1.4994918354902094, "grad_norm": 5.699278831481934, "learning_rate": 7.073037168868506e-05, "loss": 0.7779, "step": 22131 }, { "epoch": 1.4995595907581816, "grad_norm": 5.574917793273926, "learning_rate": 7.072900266958724e-05, "loss": 0.6823, "step": 22132 }, { "epoch": 1.4996273460261536, "grad_norm": 6.929681301116943, "learning_rate": 7.072763365048944e-05, "loss": 0.8248, "step": 22133 }, { "epoch": 1.4996951012941255, "grad_norm": 6.755744934082031, "learning_rate": 7.072626463139162e-05, "loss": 0.7413, "step": 22134 }, { "epoch": 1.4996951012941255, "eval_loss": 0.6909573078155518, "eval_noise_accuracy": 0.0, "eval_runtime": 1457.9579, "eval_samples_per_second": 3.525, "eval_steps_per_second": 0.221, "eval_wer": 60.10979334635811, "step": 22134 }, { "epoch": 1.4997628565620977, "grad_norm": 5.470348358154297, "learning_rate": 7.07248956122938e-05, "loss": 0.6105, "step": 22135 }, { "epoch": 1.4998306118300697, "grad_norm": 5.315043926239014, "learning_rate": 7.072352659319598e-05, "loss": 0.7327, "step": 22136 }, { "epoch": 1.499898367098042, "grad_norm": 4.8134765625, "learning_rate": 7.072215757409816e-05, "loss": 0.4919, "step": 22137 }, { "epoch": 1.499966122366014, "grad_norm": 4.735479354858398, "learning_rate": 7.072078855500035e-05, "loss": 0.5045, "step": 22138 }, { "epoch": 1.500033877633986, "grad_norm": 4.6653666496276855, "learning_rate": 7.071941953590253e-05, "loss": 0.5543, "step": 22139 }, { "epoch": 1.500101632901958, "grad_norm": 7.3937578201293945, "learning_rate": 7.071805051680471e-05, "loss": 0.7844, "step": 22140 }, { "epoch": 1.50016938816993, "grad_norm": 6.37563943862915, "learning_rate": 7.071668149770689e-05, "loss": 0.5891, "step": 22141 }, { "epoch": 1.5002371434379023, "grad_norm": 7.23908805847168, "learning_rate": 7.071531247860907e-05, "loss": 0.6121, "step": 22142 }, { "epoch": 1.5003048987058745, "grad_norm": 6.817636013031006, "learning_rate": 7.071394345951127e-05, "loss": 0.6649, "step": 22143 }, { "epoch": 1.5003726539738464, "grad_norm": 7.064699172973633, "learning_rate": 7.071257444041345e-05, "loss": 0.5636, "step": 22144 }, { "epoch": 1.5004404092418184, "grad_norm": 6.335339069366455, "learning_rate": 7.071120542131563e-05, "loss": 0.8745, "step": 22145 }, { "epoch": 1.5005081645097906, "grad_norm": 5.996376991271973, "learning_rate": 7.070983640221781e-05, "loss": 0.676, "step": 22146 }, { "epoch": 1.5005759197777628, "grad_norm": 5.682497024536133, "learning_rate": 7.070846738312e-05, "loss": 0.7216, "step": 22147 }, { "epoch": 1.5006436750457348, "grad_norm": 5.991323947906494, "learning_rate": 7.070709836402218e-05, "loss": 0.6779, "step": 22148 }, { "epoch": 1.5007114303137068, "grad_norm": 5.495023250579834, "learning_rate": 7.070572934492436e-05, "loss": 0.5199, "step": 22149 }, { "epoch": 1.500779185581679, "grad_norm": 5.68798828125, "learning_rate": 7.070436032582654e-05, "loss": 0.6394, "step": 22150 }, { "epoch": 1.5008469408496512, "grad_norm": 7.054636478424072, "learning_rate": 7.070299130672872e-05, "loss": 0.599, "step": 22151 }, { "epoch": 1.5009146961176232, "grad_norm": 5.194554328918457, "learning_rate": 7.070162228763092e-05, "loss": 0.647, "step": 22152 }, { "epoch": 1.5009824513855952, "grad_norm": 5.042093753814697, "learning_rate": 7.07002532685331e-05, "loss": 0.8606, "step": 22153 }, { "epoch": 1.5010502066535674, "grad_norm": 4.215078830718994, "learning_rate": 7.069888424943528e-05, "loss": 0.6676, "step": 22154 }, { "epoch": 1.5011179619215393, "grad_norm": 5.007110118865967, "learning_rate": 7.069751523033746e-05, "loss": 0.6965, "step": 22155 }, { "epoch": 1.5011857171895113, "grad_norm": 4.144171714782715, "learning_rate": 7.069614621123965e-05, "loss": 0.5897, "step": 22156 }, { "epoch": 1.5012534724574835, "grad_norm": 4.688210487365723, "learning_rate": 7.069477719214183e-05, "loss": 0.5875, "step": 22157 }, { "epoch": 1.5013212277254557, "grad_norm": 4.319125652313232, "learning_rate": 7.069340817304401e-05, "loss": 0.5517, "step": 22158 }, { "epoch": 1.5013889829934277, "grad_norm": 4.990036487579346, "learning_rate": 7.06920391539462e-05, "loss": 0.5655, "step": 22159 }, { "epoch": 1.5014567382613997, "grad_norm": 5.914369583129883, "learning_rate": 7.069067013484839e-05, "loss": 0.8887, "step": 22160 }, { "epoch": 1.501524493529372, "grad_norm": 5.2788214683532715, "learning_rate": 7.068930111575057e-05, "loss": 0.6428, "step": 22161 }, { "epoch": 1.501592248797344, "grad_norm": 10.430497169494629, "learning_rate": 7.068793209665276e-05, "loss": 0.7738, "step": 22162 }, { "epoch": 1.501660004065316, "grad_norm": 5.06930685043335, "learning_rate": 7.068656307755494e-05, "loss": 0.4761, "step": 22163 }, { "epoch": 1.501727759333288, "grad_norm": 5.921952724456787, "learning_rate": 7.068519405845712e-05, "loss": 0.8236, "step": 22164 }, { "epoch": 1.5017955146012603, "grad_norm": 6.596759796142578, "learning_rate": 7.06838250393593e-05, "loss": 0.6754, "step": 22165 }, { "epoch": 1.5018632698692325, "grad_norm": 7.066503524780273, "learning_rate": 7.06824560202615e-05, "loss": 0.4936, "step": 22166 }, { "epoch": 1.5019310251372044, "grad_norm": 6.866799831390381, "learning_rate": 7.068108700116368e-05, "loss": 0.6835, "step": 22167 }, { "epoch": 1.5019987804051764, "grad_norm": 6.146024227142334, "learning_rate": 7.067971798206586e-05, "loss": 0.5745, "step": 22168 }, { "epoch": 1.5020665356731486, "grad_norm": 6.32077169418335, "learning_rate": 7.067834896296804e-05, "loss": 0.666, "step": 22169 }, { "epoch": 1.5021342909411208, "grad_norm": 4.872300624847412, "learning_rate": 7.067697994387023e-05, "loss": 0.8006, "step": 22170 }, { "epoch": 1.5022020462090928, "grad_norm": 9.164327621459961, "learning_rate": 7.067561092477241e-05, "loss": 0.782, "step": 22171 }, { "epoch": 1.5022698014770648, "grad_norm": 6.054755687713623, "learning_rate": 7.067424190567459e-05, "loss": 0.7208, "step": 22172 }, { "epoch": 1.502337556745037, "grad_norm": 6.5664215087890625, "learning_rate": 7.067287288657677e-05, "loss": 0.6148, "step": 22173 }, { "epoch": 1.502405312013009, "grad_norm": 5.45991325378418, "learning_rate": 7.067150386747895e-05, "loss": 0.5726, "step": 22174 }, { "epoch": 1.502473067280981, "grad_norm": 5.362268447875977, "learning_rate": 7.067013484838115e-05, "loss": 0.5258, "step": 22175 }, { "epoch": 1.5025408225489532, "grad_norm": 5.967691898345947, "learning_rate": 7.066876582928333e-05, "loss": 0.6706, "step": 22176 }, { "epoch": 1.5026085778169254, "grad_norm": 5.97809362411499, "learning_rate": 7.06673968101855e-05, "loss": 0.6776, "step": 22177 }, { "epoch": 1.5026763330848973, "grad_norm": 7.201791763305664, "learning_rate": 7.066602779108769e-05, "loss": 0.6677, "step": 22178 }, { "epoch": 1.5027440883528693, "grad_norm": 5.959704399108887, "learning_rate": 7.066465877198988e-05, "loss": 0.501, "step": 22179 }, { "epoch": 1.5028118436208415, "grad_norm": 5.209079265594482, "learning_rate": 7.066328975289206e-05, "loss": 0.7005, "step": 22180 }, { "epoch": 1.5028795988888137, "grad_norm": 4.95795202255249, "learning_rate": 7.066192073379424e-05, "loss": 0.9431, "step": 22181 }, { "epoch": 1.5029473541567857, "grad_norm": 4.934592247009277, "learning_rate": 7.066055171469642e-05, "loss": 0.5895, "step": 22182 }, { "epoch": 1.5030151094247577, "grad_norm": 6.107872009277344, "learning_rate": 7.06591826955986e-05, "loss": 0.855, "step": 22183 }, { "epoch": 1.50308286469273, "grad_norm": 5.504770278930664, "learning_rate": 7.06578136765008e-05, "loss": 0.5653, "step": 22184 }, { "epoch": 1.503150619960702, "grad_norm": 7.380399227142334, "learning_rate": 7.065644465740298e-05, "loss": 0.6958, "step": 22185 }, { "epoch": 1.503218375228674, "grad_norm": 5.88925313949585, "learning_rate": 7.065507563830516e-05, "loss": 0.6346, "step": 22186 }, { "epoch": 1.503286130496646, "grad_norm": 5.662038803100586, "learning_rate": 7.065370661920734e-05, "loss": 0.6149, "step": 22187 }, { "epoch": 1.5033538857646183, "grad_norm": 6.538374900817871, "learning_rate": 7.065233760010952e-05, "loss": 0.7529, "step": 22188 }, { "epoch": 1.5034216410325902, "grad_norm": 6.005514621734619, "learning_rate": 7.065096858101171e-05, "loss": 0.595, "step": 22189 }, { "epoch": 1.5034893963005622, "grad_norm": 5.606465816497803, "learning_rate": 7.064959956191389e-05, "loss": 0.7288, "step": 22190 }, { "epoch": 1.5035571515685344, "grad_norm": 6.46964693069458, "learning_rate": 7.064823054281607e-05, "loss": 0.898, "step": 22191 }, { "epoch": 1.5036249068365066, "grad_norm": 4.417416572570801, "learning_rate": 7.064686152371825e-05, "loss": 0.7164, "step": 22192 }, { "epoch": 1.5036926621044786, "grad_norm": 6.45339822769165, "learning_rate": 7.064549250462045e-05, "loss": 0.8433, "step": 22193 }, { "epoch": 1.5037604173724506, "grad_norm": 4.921087741851807, "learning_rate": 7.064412348552263e-05, "loss": 0.6289, "step": 22194 }, { "epoch": 1.5038281726404228, "grad_norm": 4.555755138397217, "learning_rate": 7.06427544664248e-05, "loss": 0.4889, "step": 22195 }, { "epoch": 1.503895927908395, "grad_norm": 6.459226131439209, "learning_rate": 7.064138544732699e-05, "loss": 0.7962, "step": 22196 }, { "epoch": 1.503963683176367, "grad_norm": 6.906379222869873, "learning_rate": 7.064001642822917e-05, "loss": 0.9198, "step": 22197 }, { "epoch": 1.504031438444339, "grad_norm": 5.618672847747803, "learning_rate": 7.063864740913136e-05, "loss": 0.7847, "step": 22198 }, { "epoch": 1.5040991937123112, "grad_norm": 5.024678707122803, "learning_rate": 7.063727839003354e-05, "loss": 0.6363, "step": 22199 }, { "epoch": 1.5041669489802834, "grad_norm": 4.62335729598999, "learning_rate": 7.063590937093572e-05, "loss": 0.4682, "step": 22200 }, { "epoch": 1.5042347042482553, "grad_norm": 6.205492973327637, "learning_rate": 7.06345403518379e-05, "loss": 0.8261, "step": 22201 }, { "epoch": 1.5043024595162273, "grad_norm": 6.2861127853393555, "learning_rate": 7.06331713327401e-05, "loss": 0.6435, "step": 22202 }, { "epoch": 1.5043702147841995, "grad_norm": 5.228672981262207, "learning_rate": 7.063180231364228e-05, "loss": 0.6261, "step": 22203 }, { "epoch": 1.5044379700521715, "grad_norm": 7.495336055755615, "learning_rate": 7.063043329454446e-05, "loss": 0.8861, "step": 22204 }, { "epoch": 1.5045057253201435, "grad_norm": 5.331389427185059, "learning_rate": 7.062906427544665e-05, "loss": 0.5736, "step": 22205 }, { "epoch": 1.5045734805881157, "grad_norm": 5.61313009262085, "learning_rate": 7.062769525634883e-05, "loss": 0.6112, "step": 22206 }, { "epoch": 1.5046412358560879, "grad_norm": 5.374246120452881, "learning_rate": 7.062632623725101e-05, "loss": 0.4356, "step": 22207 }, { "epoch": 1.5047089911240599, "grad_norm": 5.933205604553223, "learning_rate": 7.06249572181532e-05, "loss": 0.7676, "step": 22208 }, { "epoch": 1.5047767463920318, "grad_norm": 4.853750705718994, "learning_rate": 7.062358819905539e-05, "loss": 0.4773, "step": 22209 }, { "epoch": 1.504844501660004, "grad_norm": 6.45151948928833, "learning_rate": 7.062221917995757e-05, "loss": 0.8512, "step": 22210 }, { "epoch": 1.5049122569279763, "grad_norm": 4.8151044845581055, "learning_rate": 7.062085016085976e-05, "loss": 0.5964, "step": 22211 }, { "epoch": 1.5049800121959482, "grad_norm": 7.664250373840332, "learning_rate": 7.061948114176194e-05, "loss": 0.7549, "step": 22212 }, { "epoch": 1.5050477674639202, "grad_norm": 5.535643577575684, "learning_rate": 7.061811212266412e-05, "loss": 0.6514, "step": 22213 }, { "epoch": 1.5051155227318924, "grad_norm": 5.310704231262207, "learning_rate": 7.06167431035663e-05, "loss": 0.6334, "step": 22214 }, { "epoch": 1.5051832779998646, "grad_norm": 6.788402080535889, "learning_rate": 7.061537408446848e-05, "loss": 0.9118, "step": 22215 }, { "epoch": 1.5052510332678366, "grad_norm": 4.413593769073486, "learning_rate": 7.061400506537067e-05, "loss": 0.6359, "step": 22216 }, { "epoch": 1.5053187885358086, "grad_norm": 5.105248928070068, "learning_rate": 7.061263604627285e-05, "loss": 0.5327, "step": 22217 }, { "epoch": 1.5053865438037808, "grad_norm": 5.584339141845703, "learning_rate": 7.061126702717504e-05, "loss": 0.651, "step": 22218 }, { "epoch": 1.505454299071753, "grad_norm": 5.5387349128723145, "learning_rate": 7.060989800807722e-05, "loss": 0.6219, "step": 22219 }, { "epoch": 1.505522054339725, "grad_norm": 4.123884201049805, "learning_rate": 7.06085289889794e-05, "loss": 0.7002, "step": 22220 }, { "epoch": 1.505589809607697, "grad_norm": 4.572237491607666, "learning_rate": 7.060715996988159e-05, "loss": 0.5506, "step": 22221 }, { "epoch": 1.5056575648756692, "grad_norm": 5.152343273162842, "learning_rate": 7.060579095078377e-05, "loss": 0.4775, "step": 22222 }, { "epoch": 1.5057253201436411, "grad_norm": 4.12152099609375, "learning_rate": 7.060442193168595e-05, "loss": 0.8111, "step": 22223 }, { "epoch": 1.5057930754116131, "grad_norm": 4.691867828369141, "learning_rate": 7.060305291258813e-05, "loss": 0.6849, "step": 22224 }, { "epoch": 1.5058608306795853, "grad_norm": 6.672001361846924, "learning_rate": 7.060168389349032e-05, "loss": 0.7342, "step": 22225 }, { "epoch": 1.5059285859475575, "grad_norm": 5.941802024841309, "learning_rate": 7.06003148743925e-05, "loss": 0.8838, "step": 22226 }, { "epoch": 1.5059963412155295, "grad_norm": 9.313305854797363, "learning_rate": 7.059894585529469e-05, "loss": 0.6405, "step": 22227 }, { "epoch": 1.5060640964835015, "grad_norm": 5.606958389282227, "learning_rate": 7.059757683619687e-05, "loss": 0.6253, "step": 22228 }, { "epoch": 1.5061318517514737, "grad_norm": 5.680193901062012, "learning_rate": 7.059620781709905e-05, "loss": 0.8544, "step": 22229 }, { "epoch": 1.5061996070194459, "grad_norm": 5.846311092376709, "learning_rate": 7.059483879800124e-05, "loss": 0.6775, "step": 22230 }, { "epoch": 1.5062673622874179, "grad_norm": 4.5619730949401855, "learning_rate": 7.059346977890342e-05, "loss": 0.609, "step": 22231 }, { "epoch": 1.5063351175553898, "grad_norm": 6.482041835784912, "learning_rate": 7.05921007598056e-05, "loss": 0.5768, "step": 22232 }, { "epoch": 1.506402872823362, "grad_norm": 4.798067092895508, "learning_rate": 7.059073174070778e-05, "loss": 0.6025, "step": 22233 }, { "epoch": 1.5064706280913343, "grad_norm": 5.587893962860107, "learning_rate": 7.058936272160997e-05, "loss": 0.8273, "step": 22234 }, { "epoch": 1.5065383833593062, "grad_norm": 5.879166603088379, "learning_rate": 7.058799370251216e-05, "loss": 0.7624, "step": 22235 }, { "epoch": 1.5066061386272782, "grad_norm": 5.056779861450195, "learning_rate": 7.058662468341434e-05, "loss": 0.6761, "step": 22236 }, { "epoch": 1.5066738938952504, "grad_norm": 7.048844814300537, "learning_rate": 7.058525566431652e-05, "loss": 0.7082, "step": 22237 }, { "epoch": 1.5067416491632224, "grad_norm": 4.962614059448242, "learning_rate": 7.05838866452187e-05, "loss": 0.5715, "step": 22238 }, { "epoch": 1.5068094044311944, "grad_norm": 9.45591926574707, "learning_rate": 7.058251762612089e-05, "loss": 0.5109, "step": 22239 }, { "epoch": 1.5068771596991666, "grad_norm": 5.57602071762085, "learning_rate": 7.058114860702307e-05, "loss": 0.7123, "step": 22240 }, { "epoch": 1.5069449149671388, "grad_norm": 6.191617965698242, "learning_rate": 7.057977958792525e-05, "loss": 0.813, "step": 22241 }, { "epoch": 1.5070126702351108, "grad_norm": 5.721245765686035, "learning_rate": 7.057841056882743e-05, "loss": 0.6447, "step": 22242 }, { "epoch": 1.5070804255030827, "grad_norm": 5.897307395935059, "learning_rate": 7.057704154972961e-05, "loss": 0.6374, "step": 22243 }, { "epoch": 1.507148180771055, "grad_norm": 4.921908378601074, "learning_rate": 7.05756725306318e-05, "loss": 0.5839, "step": 22244 }, { "epoch": 1.5072159360390271, "grad_norm": 6.693628311157227, "learning_rate": 7.057430351153399e-05, "loss": 0.8302, "step": 22245 }, { "epoch": 1.5072836913069991, "grad_norm": 6.015374660491943, "learning_rate": 7.057293449243617e-05, "loss": 0.6703, "step": 22246 }, { "epoch": 1.507351446574971, "grad_norm": 7.743432998657227, "learning_rate": 7.057156547333835e-05, "loss": 0.7017, "step": 22247 }, { "epoch": 1.5074192018429433, "grad_norm": 5.661342144012451, "learning_rate": 7.057019645424054e-05, "loss": 0.6787, "step": 22248 }, { "epoch": 1.5074869571109155, "grad_norm": 5.7096967697143555, "learning_rate": 7.056882743514272e-05, "loss": 0.7591, "step": 22249 }, { "epoch": 1.5075547123788875, "grad_norm": 6.533322334289551, "learning_rate": 7.05674584160449e-05, "loss": 0.6596, "step": 22250 }, { "epoch": 1.5076224676468595, "grad_norm": 5.634537220001221, "learning_rate": 7.05660893969471e-05, "loss": 0.9337, "step": 22251 }, { "epoch": 1.5076902229148317, "grad_norm": 5.945096969604492, "learning_rate": 7.056472037784928e-05, "loss": 0.816, "step": 22252 }, { "epoch": 1.5077579781828037, "grad_norm": 4.189168930053711, "learning_rate": 7.056335135875146e-05, "loss": 0.6417, "step": 22253 }, { "epoch": 1.5078257334507756, "grad_norm": 5.911306381225586, "learning_rate": 7.056198233965365e-05, "loss": 0.6075, "step": 22254 }, { "epoch": 1.5078934887187478, "grad_norm": 3.5864310264587402, "learning_rate": 7.056061332055583e-05, "loss": 0.4995, "step": 22255 }, { "epoch": 1.50796124398672, "grad_norm": 5.799752235412598, "learning_rate": 7.055924430145801e-05, "loss": 0.765, "step": 22256 }, { "epoch": 1.508028999254692, "grad_norm": 4.708792209625244, "learning_rate": 7.05578752823602e-05, "loss": 0.7408, "step": 22257 }, { "epoch": 1.508096754522664, "grad_norm": 7.255552768707275, "learning_rate": 7.055650626326238e-05, "loss": 0.7446, "step": 22258 }, { "epoch": 1.5081645097906362, "grad_norm": 5.5177836418151855, "learning_rate": 7.055513724416456e-05, "loss": 0.8678, "step": 22259 }, { "epoch": 1.5082322650586084, "grad_norm": 9.735072135925293, "learning_rate": 7.055376822506675e-05, "loss": 0.6964, "step": 22260 }, { "epoch": 1.5083000203265804, "grad_norm": 6.3877854347229, "learning_rate": 7.055239920596893e-05, "loss": 0.7315, "step": 22261 }, { "epoch": 1.5083677755945524, "grad_norm": 6.044626712799072, "learning_rate": 7.055103018687112e-05, "loss": 0.7221, "step": 22262 }, { "epoch": 1.5084355308625246, "grad_norm": 8.029038429260254, "learning_rate": 7.05496611677733e-05, "loss": 0.5674, "step": 22263 }, { "epoch": 1.5085032861304968, "grad_norm": 6.695366382598877, "learning_rate": 7.054829214867548e-05, "loss": 0.4348, "step": 22264 }, { "epoch": 1.5085710413984688, "grad_norm": 4.653717994689941, "learning_rate": 7.054692312957766e-05, "loss": 0.5804, "step": 22265 }, { "epoch": 1.5086387966664407, "grad_norm": 6.013125419616699, "learning_rate": 7.054555411047985e-05, "loss": 0.6294, "step": 22266 }, { "epoch": 1.508706551934413, "grad_norm": 5.728625297546387, "learning_rate": 7.054418509138203e-05, "loss": 0.6806, "step": 22267 }, { "epoch": 1.5087743072023851, "grad_norm": 12.73327922821045, "learning_rate": 7.054281607228421e-05, "loss": 0.692, "step": 22268 }, { "epoch": 1.508842062470357, "grad_norm": 7.58489990234375, "learning_rate": 7.05414470531864e-05, "loss": 0.5598, "step": 22269 }, { "epoch": 1.508909817738329, "grad_norm": 11.422286033630371, "learning_rate": 7.054007803408858e-05, "loss": 0.7521, "step": 22270 }, { "epoch": 1.5089775730063013, "grad_norm": 9.40255069732666, "learning_rate": 7.053870901499077e-05, "loss": 0.6138, "step": 22271 }, { "epoch": 1.5090453282742733, "grad_norm": 7.956184387207031, "learning_rate": 7.053733999589295e-05, "loss": 0.7497, "step": 22272 }, { "epoch": 1.5091130835422453, "grad_norm": 7.175859451293945, "learning_rate": 7.053597097679513e-05, "loss": 0.8331, "step": 22273 }, { "epoch": 1.5091808388102175, "grad_norm": 5.444225311279297, "learning_rate": 7.053460195769731e-05, "loss": 0.6266, "step": 22274 }, { "epoch": 1.5092485940781897, "grad_norm": 4.91699743270874, "learning_rate": 7.053323293859949e-05, "loss": 0.5853, "step": 22275 }, { "epoch": 1.5093163493461617, "grad_norm": 5.104366779327393, "learning_rate": 7.053186391950168e-05, "loss": 0.592, "step": 22276 }, { "epoch": 1.5093841046141336, "grad_norm": 5.728134632110596, "learning_rate": 7.053049490040387e-05, "loss": 0.5374, "step": 22277 }, { "epoch": 1.5094518598821058, "grad_norm": 6.349438667297363, "learning_rate": 7.052912588130605e-05, "loss": 0.8378, "step": 22278 }, { "epoch": 1.509519615150078, "grad_norm": 5.697259426116943, "learning_rate": 7.052775686220823e-05, "loss": 0.6823, "step": 22279 }, { "epoch": 1.50958737041805, "grad_norm": 7.426070213317871, "learning_rate": 7.052638784311042e-05, "loss": 0.5653, "step": 22280 }, { "epoch": 1.509655125686022, "grad_norm": 4.2285661697387695, "learning_rate": 7.05250188240126e-05, "loss": 0.4616, "step": 22281 }, { "epoch": 1.5097228809539942, "grad_norm": 4.592179775238037, "learning_rate": 7.052364980491478e-05, "loss": 0.5226, "step": 22282 }, { "epoch": 1.5097906362219664, "grad_norm": 4.854991436004639, "learning_rate": 7.052228078581696e-05, "loss": 0.5344, "step": 22283 }, { "epoch": 1.5098583914899384, "grad_norm": 4.455118179321289, "learning_rate": 7.052091176671914e-05, "loss": 0.4128, "step": 22284 }, { "epoch": 1.5099261467579104, "grad_norm": 4.998953342437744, "learning_rate": 7.051954274762133e-05, "loss": 0.726, "step": 22285 }, { "epoch": 1.5099939020258826, "grad_norm": 7.910358905792236, "learning_rate": 7.051817372852352e-05, "loss": 0.7176, "step": 22286 }, { "epoch": 1.5100616572938546, "grad_norm": 5.212973117828369, "learning_rate": 7.05168047094257e-05, "loss": 0.5387, "step": 22287 }, { "epoch": 1.5101294125618265, "grad_norm": 4.160896301269531, "learning_rate": 7.051543569032788e-05, "loss": 0.629, "step": 22288 }, { "epoch": 1.5101971678297987, "grad_norm": 4.466064453125, "learning_rate": 7.051406667123007e-05, "loss": 0.5847, "step": 22289 }, { "epoch": 1.510264923097771, "grad_norm": 5.891330242156982, "learning_rate": 7.051269765213225e-05, "loss": 0.6847, "step": 22290 }, { "epoch": 1.510332678365743, "grad_norm": 6.103761672973633, "learning_rate": 7.051132863303443e-05, "loss": 0.7644, "step": 22291 }, { "epoch": 1.510400433633715, "grad_norm": 5.938507080078125, "learning_rate": 7.050995961393661e-05, "loss": 0.7496, "step": 22292 }, { "epoch": 1.510468188901687, "grad_norm": 4.91427755355835, "learning_rate": 7.050859059483879e-05, "loss": 0.5302, "step": 22293 }, { "epoch": 1.5105359441696593, "grad_norm": 4.669795513153076, "learning_rate": 7.050722157574099e-05, "loss": 0.4937, "step": 22294 }, { "epoch": 1.5106036994376313, "grad_norm": 5.670753002166748, "learning_rate": 7.050585255664317e-05, "loss": 0.7258, "step": 22295 }, { "epoch": 1.5106714547056033, "grad_norm": 5.892319202423096, "learning_rate": 7.050448353754535e-05, "loss": 0.5601, "step": 22296 }, { "epoch": 1.5107392099735755, "grad_norm": 7.190999507904053, "learning_rate": 7.050311451844753e-05, "loss": 0.5324, "step": 22297 }, { "epoch": 1.5108069652415477, "grad_norm": 6.036324977874756, "learning_rate": 7.050174549934972e-05, "loss": 0.5621, "step": 22298 }, { "epoch": 1.5108747205095197, "grad_norm": 5.489318370819092, "learning_rate": 7.05003764802519e-05, "loss": 0.532, "step": 22299 }, { "epoch": 1.5109424757774916, "grad_norm": 6.703178882598877, "learning_rate": 7.049900746115408e-05, "loss": 0.88, "step": 22300 }, { "epoch": 1.5110102310454638, "grad_norm": 5.5264058113098145, "learning_rate": 7.049763844205627e-05, "loss": 0.6322, "step": 22301 }, { "epoch": 1.5110779863134358, "grad_norm": 8.514747619628906, "learning_rate": 7.049626942295845e-05, "loss": 0.5135, "step": 22302 }, { "epoch": 1.5111457415814078, "grad_norm": 5.439519882202148, "learning_rate": 7.049490040386064e-05, "loss": 0.7053, "step": 22303 }, { "epoch": 1.51121349684938, "grad_norm": 9.745363235473633, "learning_rate": 7.049353138476283e-05, "loss": 0.6385, "step": 22304 }, { "epoch": 1.5112812521173522, "grad_norm": 5.665804862976074, "learning_rate": 7.049216236566501e-05, "loss": 0.4884, "step": 22305 }, { "epoch": 1.5113490073853242, "grad_norm": 6.14369010925293, "learning_rate": 7.049079334656719e-05, "loss": 0.9473, "step": 22306 }, { "epoch": 1.5114167626532962, "grad_norm": 5.319423675537109, "learning_rate": 7.048942432746937e-05, "loss": 0.7235, "step": 22307 }, { "epoch": 1.5114845179212684, "grad_norm": 6.848842620849609, "learning_rate": 7.048805530837156e-05, "loss": 0.5781, "step": 22308 }, { "epoch": 1.5115522731892406, "grad_norm": 4.191540241241455, "learning_rate": 7.048668628927374e-05, "loss": 0.5802, "step": 22309 }, { "epoch": 1.5116200284572126, "grad_norm": 5.211784839630127, "learning_rate": 7.048531727017592e-05, "loss": 0.5804, "step": 22310 }, { "epoch": 1.5116877837251845, "grad_norm": 5.957603931427002, "learning_rate": 7.04839482510781e-05, "loss": 0.7856, "step": 22311 }, { "epoch": 1.5117555389931567, "grad_norm": 6.998965263366699, "learning_rate": 7.04825792319803e-05, "loss": 0.7977, "step": 22312 }, { "epoch": 1.511823294261129, "grad_norm": 5.273736476898193, "learning_rate": 7.048121021288248e-05, "loss": 0.8015, "step": 22313 }, { "epoch": 1.511891049529101, "grad_norm": 7.150277614593506, "learning_rate": 7.047984119378466e-05, "loss": 0.6246, "step": 22314 }, { "epoch": 1.511958804797073, "grad_norm": 6.9883809089660645, "learning_rate": 7.047847217468684e-05, "loss": 0.7347, "step": 22315 }, { "epoch": 1.512026560065045, "grad_norm": 5.494887351989746, "learning_rate": 7.047710315558902e-05, "loss": 0.7036, "step": 22316 }, { "epoch": 1.5120943153330173, "grad_norm": 5.120458126068115, "learning_rate": 7.047573413649121e-05, "loss": 0.7689, "step": 22317 }, { "epoch": 1.512162070600989, "grad_norm": 5.649974346160889, "learning_rate": 7.04743651173934e-05, "loss": 0.6532, "step": 22318 }, { "epoch": 1.5122298258689613, "grad_norm": 9.796534538269043, "learning_rate": 7.047299609829557e-05, "loss": 0.892, "step": 22319 }, { "epoch": 1.5122975811369335, "grad_norm": 7.772524833679199, "learning_rate": 7.047162707919776e-05, "loss": 0.6921, "step": 22320 }, { "epoch": 1.5123653364049054, "grad_norm": 5.502130508422852, "learning_rate": 7.047025806009994e-05, "loss": 0.6129, "step": 22321 }, { "epoch": 1.5124330916728774, "grad_norm": 5.142253398895264, "learning_rate": 7.046888904100213e-05, "loss": 0.8022, "step": 22322 }, { "epoch": 1.5125008469408496, "grad_norm": 5.344793319702148, "learning_rate": 7.046752002190431e-05, "loss": 0.7125, "step": 22323 }, { "epoch": 1.5125686022088218, "grad_norm": 4.0786638259887695, "learning_rate": 7.046615100280649e-05, "loss": 0.579, "step": 22324 }, { "epoch": 1.5126363574767938, "grad_norm": 8.060111999511719, "learning_rate": 7.046478198370867e-05, "loss": 0.6021, "step": 22325 }, { "epoch": 1.5127041127447658, "grad_norm": 6.3836140632629395, "learning_rate": 7.046341296461086e-05, "loss": 0.5897, "step": 22326 }, { "epoch": 1.512771868012738, "grad_norm": 5.069344997406006, "learning_rate": 7.046204394551304e-05, "loss": 0.6192, "step": 22327 }, { "epoch": 1.5128396232807102, "grad_norm": 7.597228050231934, "learning_rate": 7.046067492641523e-05, "loss": 0.6112, "step": 22328 }, { "epoch": 1.5129073785486822, "grad_norm": 5.850467681884766, "learning_rate": 7.04593059073174e-05, "loss": 0.5706, "step": 22329 }, { "epoch": 1.5129751338166542, "grad_norm": 4.591449737548828, "learning_rate": 7.045793688821959e-05, "loss": 0.5998, "step": 22330 }, { "epoch": 1.5130428890846264, "grad_norm": 9.603981018066406, "learning_rate": 7.045656786912178e-05, "loss": 0.5136, "step": 22331 }, { "epoch": 1.5131106443525986, "grad_norm": 5.033041477203369, "learning_rate": 7.045519885002396e-05, "loss": 0.8018, "step": 22332 }, { "epoch": 1.5131783996205705, "grad_norm": 7.199285984039307, "learning_rate": 7.045382983092614e-05, "loss": 0.6365, "step": 22333 }, { "epoch": 1.5132461548885425, "grad_norm": 5.051862716674805, "learning_rate": 7.045246081182832e-05, "loss": 0.6472, "step": 22334 }, { "epoch": 1.5133139101565147, "grad_norm": 9.934009552001953, "learning_rate": 7.045109179273051e-05, "loss": 0.6963, "step": 22335 }, { "epoch": 1.5133816654244867, "grad_norm": 7.765810489654541, "learning_rate": 7.04497227736327e-05, "loss": 0.4803, "step": 22336 }, { "epoch": 1.5134494206924587, "grad_norm": 6.106934547424316, "learning_rate": 7.044835375453488e-05, "loss": 0.6407, "step": 22337 }, { "epoch": 1.513517175960431, "grad_norm": 6.490913391113281, "learning_rate": 7.044698473543706e-05, "loss": 0.7421, "step": 22338 }, { "epoch": 1.513584931228403, "grad_norm": 5.642805576324463, "learning_rate": 7.044561571633924e-05, "loss": 0.7007, "step": 22339 }, { "epoch": 1.513652686496375, "grad_norm": 5.4891557693481445, "learning_rate": 7.044424669724143e-05, "loss": 0.6374, "step": 22340 }, { "epoch": 1.513720441764347, "grad_norm": 5.614975929260254, "learning_rate": 7.044287767814361e-05, "loss": 0.6047, "step": 22341 }, { "epoch": 1.5137881970323193, "grad_norm": 7.834063529968262, "learning_rate": 7.044150865904579e-05, "loss": 0.7152, "step": 22342 }, { "epoch": 1.5138559523002915, "grad_norm": 6.585364818572998, "learning_rate": 7.044013963994797e-05, "loss": 0.9639, "step": 22343 }, { "epoch": 1.5139237075682634, "grad_norm": 5.369997978210449, "learning_rate": 7.043877062085016e-05, "loss": 0.5924, "step": 22344 }, { "epoch": 1.5139914628362354, "grad_norm": 6.3812971115112305, "learning_rate": 7.043740160175235e-05, "loss": 0.7547, "step": 22345 }, { "epoch": 1.5140592181042076, "grad_norm": 7.231853008270264, "learning_rate": 7.043603258265453e-05, "loss": 0.7751, "step": 22346 }, { "epoch": 1.5141269733721798, "grad_norm": 5.480526924133301, "learning_rate": 7.043466356355672e-05, "loss": 0.7582, "step": 22347 }, { "epoch": 1.5141947286401518, "grad_norm": 5.2574052810668945, "learning_rate": 7.04332945444589e-05, "loss": 0.5124, "step": 22348 }, { "epoch": 1.5142624839081238, "grad_norm": 6.836224555969238, "learning_rate": 7.043192552536108e-05, "loss": 0.6781, "step": 22349 }, { "epoch": 1.514330239176096, "grad_norm": 4.84830904006958, "learning_rate": 7.043055650626327e-05, "loss": 0.6512, "step": 22350 }, { "epoch": 1.514397994444068, "grad_norm": 6.948510646820068, "learning_rate": 7.042918748716545e-05, "loss": 0.6043, "step": 22351 }, { "epoch": 1.51446574971204, "grad_norm": 4.599993705749512, "learning_rate": 7.042781846806763e-05, "loss": 0.5107, "step": 22352 }, { "epoch": 1.5145335049800122, "grad_norm": 9.981439590454102, "learning_rate": 7.042644944896981e-05, "loss": 0.7296, "step": 22353 }, { "epoch": 1.5146012602479844, "grad_norm": 5.8340606689453125, "learning_rate": 7.042508042987201e-05, "loss": 0.6462, "step": 22354 }, { "epoch": 1.5146690155159563, "grad_norm": 5.768056392669678, "learning_rate": 7.042371141077419e-05, "loss": 0.6284, "step": 22355 }, { "epoch": 1.5147367707839283, "grad_norm": 5.7194976806640625, "learning_rate": 7.042234239167637e-05, "loss": 0.558, "step": 22356 }, { "epoch": 1.5148045260519005, "grad_norm": 5.636447429656982, "learning_rate": 7.042097337257855e-05, "loss": 0.7207, "step": 22357 }, { "epoch": 1.5148722813198727, "grad_norm": 7.578178405761719, "learning_rate": 7.041960435348074e-05, "loss": 0.6111, "step": 22358 }, { "epoch": 1.5149400365878447, "grad_norm": 4.841312885284424, "learning_rate": 7.041823533438292e-05, "loss": 0.6335, "step": 22359 }, { "epoch": 1.5150077918558167, "grad_norm": 5.917613983154297, "learning_rate": 7.04168663152851e-05, "loss": 0.7179, "step": 22360 }, { "epoch": 1.515075547123789, "grad_norm": 5.030770778656006, "learning_rate": 7.041549729618728e-05, "loss": 0.5464, "step": 22361 }, { "epoch": 1.515143302391761, "grad_norm": 5.381250858306885, "learning_rate": 7.041412827708947e-05, "loss": 0.6938, "step": 22362 }, { "epoch": 1.515211057659733, "grad_norm": 7.178584575653076, "learning_rate": 7.041275925799166e-05, "loss": 0.8229, "step": 22363 }, { "epoch": 1.515278812927705, "grad_norm": 5.554690361022949, "learning_rate": 7.041139023889384e-05, "loss": 0.5896, "step": 22364 }, { "epoch": 1.5153465681956773, "grad_norm": 6.872684955596924, "learning_rate": 7.041002121979602e-05, "loss": 0.5527, "step": 22365 }, { "epoch": 1.5154143234636495, "grad_norm": 7.210061073303223, "learning_rate": 7.04086522006982e-05, "loss": 0.8501, "step": 22366 }, { "epoch": 1.5154820787316212, "grad_norm": 7.928126335144043, "learning_rate": 7.04072831816004e-05, "loss": 0.6665, "step": 22367 }, { "epoch": 1.5155498339995934, "grad_norm": 5.621495723724365, "learning_rate": 7.040591416250257e-05, "loss": 0.7451, "step": 22368 }, { "epoch": 1.5156175892675656, "grad_norm": 5.531711101531982, "learning_rate": 7.040454514340475e-05, "loss": 0.5925, "step": 22369 }, { "epoch": 1.5156853445355376, "grad_norm": 11.534028053283691, "learning_rate": 7.040317612430693e-05, "loss": 0.5258, "step": 22370 }, { "epoch": 1.5157530998035096, "grad_norm": 5.217789649963379, "learning_rate": 7.040180710520912e-05, "loss": 0.7068, "step": 22371 }, { "epoch": 1.5158208550714818, "grad_norm": 9.081314086914062, "learning_rate": 7.040043808611131e-05, "loss": 0.6314, "step": 22372 }, { "epoch": 1.515888610339454, "grad_norm": 8.901869773864746, "learning_rate": 7.039906906701349e-05, "loss": 0.6004, "step": 22373 }, { "epoch": 1.515956365607426, "grad_norm": 5.067282676696777, "learning_rate": 7.039770004791567e-05, "loss": 0.8366, "step": 22374 }, { "epoch": 1.516024120875398, "grad_norm": 5.735860347747803, "learning_rate": 7.039633102881785e-05, "loss": 0.5529, "step": 22375 }, { "epoch": 1.5160918761433702, "grad_norm": 5.530388832092285, "learning_rate": 7.039496200972003e-05, "loss": 0.7682, "step": 22376 }, { "epoch": 1.5161596314113424, "grad_norm": 6.000363349914551, "learning_rate": 7.039359299062222e-05, "loss": 0.8906, "step": 22377 }, { "epoch": 1.5162273866793143, "grad_norm": 5.767869472503662, "learning_rate": 7.03922239715244e-05, "loss": 0.5661, "step": 22378 }, { "epoch": 1.5162951419472863, "grad_norm": 5.506709098815918, "learning_rate": 7.039085495242659e-05, "loss": 0.6215, "step": 22379 }, { "epoch": 1.5163628972152585, "grad_norm": 5.871591567993164, "learning_rate": 7.038948593332877e-05, "loss": 0.8314, "step": 22380 }, { "epoch": 1.5164306524832307, "grad_norm": 7.795440196990967, "learning_rate": 7.038811691423096e-05, "loss": 0.5627, "step": 22381 }, { "epoch": 1.5164984077512027, "grad_norm": 5.383607387542725, "learning_rate": 7.038674789513314e-05, "loss": 0.6024, "step": 22382 }, { "epoch": 1.5165661630191747, "grad_norm": 5.657977104187012, "learning_rate": 7.038537887603532e-05, "loss": 0.5922, "step": 22383 }, { "epoch": 1.5166339182871469, "grad_norm": 4.469954967498779, "learning_rate": 7.03840098569375e-05, "loss": 0.6374, "step": 22384 }, { "epoch": 1.5167016735551189, "grad_norm": 5.126262187957764, "learning_rate": 7.038264083783968e-05, "loss": 0.8193, "step": 22385 }, { "epoch": 1.5167694288230908, "grad_norm": 4.9243574142456055, "learning_rate": 7.038127181874187e-05, "loss": 0.5742, "step": 22386 }, { "epoch": 1.516837184091063, "grad_norm": 6.247372150421143, "learning_rate": 7.037990279964405e-05, "loss": 0.7461, "step": 22387 }, { "epoch": 1.5169049393590353, "grad_norm": 5.127383708953857, "learning_rate": 7.037853378054624e-05, "loss": 0.7277, "step": 22388 }, { "epoch": 1.5169726946270072, "grad_norm": 5.404329299926758, "learning_rate": 7.037716476144842e-05, "loss": 0.8147, "step": 22389 }, { "epoch": 1.5170404498949792, "grad_norm": 5.164083003997803, "learning_rate": 7.037579574235061e-05, "loss": 0.661, "step": 22390 }, { "epoch": 1.5171082051629514, "grad_norm": 6.5904011726379395, "learning_rate": 7.037442672325279e-05, "loss": 0.6823, "step": 22391 }, { "epoch": 1.5171759604309236, "grad_norm": 6.067331314086914, "learning_rate": 7.037305770415497e-05, "loss": 0.777, "step": 22392 }, { "epoch": 1.5172437156988956, "grad_norm": 4.082434177398682, "learning_rate": 7.037168868505716e-05, "loss": 0.5358, "step": 22393 }, { "epoch": 1.5173114709668676, "grad_norm": 5.288156986236572, "learning_rate": 7.037031966595934e-05, "loss": 0.6287, "step": 22394 }, { "epoch": 1.5173792262348398, "grad_norm": 8.06344223022461, "learning_rate": 7.036895064686152e-05, "loss": 0.7557, "step": 22395 }, { "epoch": 1.517446981502812, "grad_norm": 5.464791774749756, "learning_rate": 7.036758162776372e-05, "loss": 0.4207, "step": 22396 }, { "epoch": 1.517514736770784, "grad_norm": 5.889843463897705, "learning_rate": 7.03662126086659e-05, "loss": 0.503, "step": 22397 }, { "epoch": 1.517582492038756, "grad_norm": 6.167796611785889, "learning_rate": 7.036484358956808e-05, "loss": 0.6654, "step": 22398 }, { "epoch": 1.5176502473067282, "grad_norm": 4.803990840911865, "learning_rate": 7.036347457047027e-05, "loss": 0.768, "step": 22399 }, { "epoch": 1.5177180025747001, "grad_norm": 5.469797134399414, "learning_rate": 7.036210555137245e-05, "loss": 0.6229, "step": 22400 }, { "epoch": 1.5177857578426721, "grad_norm": 8.538554191589355, "learning_rate": 7.036073653227463e-05, "loss": 0.6499, "step": 22401 }, { "epoch": 1.5178535131106443, "grad_norm": 4.686540603637695, "learning_rate": 7.035936751317681e-05, "loss": 0.5959, "step": 22402 }, { "epoch": 1.5179212683786165, "grad_norm": 6.199711322784424, "learning_rate": 7.0357998494079e-05, "loss": 0.7092, "step": 22403 }, { "epoch": 1.5179890236465885, "grad_norm": 5.322921276092529, "learning_rate": 7.035662947498119e-05, "loss": 0.5549, "step": 22404 }, { "epoch": 1.5180567789145605, "grad_norm": 7.0216217041015625, "learning_rate": 7.035526045588337e-05, "loss": 0.8613, "step": 22405 }, { "epoch": 1.5181245341825327, "grad_norm": 6.468639850616455, "learning_rate": 7.035389143678555e-05, "loss": 0.5845, "step": 22406 }, { "epoch": 1.5181922894505049, "grad_norm": 5.599163055419922, "learning_rate": 7.035252241768773e-05, "loss": 0.7101, "step": 22407 }, { "epoch": 1.5182600447184769, "grad_norm": 4.921897888183594, "learning_rate": 7.035115339858991e-05, "loss": 0.6311, "step": 22408 }, { "epoch": 1.5183277999864488, "grad_norm": 5.494976043701172, "learning_rate": 7.03497843794921e-05, "loss": 0.5063, "step": 22409 }, { "epoch": 1.518395555254421, "grad_norm": 5.6555657386779785, "learning_rate": 7.034841536039428e-05, "loss": 0.6237, "step": 22410 }, { "epoch": 1.5184633105223933, "grad_norm": 4.794495582580566, "learning_rate": 7.034704634129646e-05, "loss": 0.5352, "step": 22411 }, { "epoch": 1.5185310657903652, "grad_norm": 5.097184181213379, "learning_rate": 7.034567732219864e-05, "loss": 0.6945, "step": 22412 }, { "epoch": 1.5185988210583372, "grad_norm": 5.010088920593262, "learning_rate": 7.034430830310084e-05, "loss": 0.6608, "step": 22413 }, { "epoch": 1.5186665763263094, "grad_norm": 6.494564056396484, "learning_rate": 7.034293928400302e-05, "loss": 0.792, "step": 22414 }, { "epoch": 1.5187343315942816, "grad_norm": 6.040915489196777, "learning_rate": 7.03415702649052e-05, "loss": 0.7446, "step": 22415 }, { "epoch": 1.5188020868622534, "grad_norm": 7.190260410308838, "learning_rate": 7.034020124580738e-05, "loss": 0.5075, "step": 22416 }, { "epoch": 1.5188698421302256, "grad_norm": 5.16436767578125, "learning_rate": 7.033883222670956e-05, "loss": 0.7262, "step": 22417 }, { "epoch": 1.5189375973981978, "grad_norm": 5.387063026428223, "learning_rate": 7.033746320761175e-05, "loss": 0.5693, "step": 22418 }, { "epoch": 1.5190053526661698, "grad_norm": 7.082568168640137, "learning_rate": 7.033609418851393e-05, "loss": 0.7764, "step": 22419 }, { "epoch": 1.5190731079341417, "grad_norm": 8.208841323852539, "learning_rate": 7.033472516941611e-05, "loss": 0.6265, "step": 22420 }, { "epoch": 1.519140863202114, "grad_norm": 6.008127212524414, "learning_rate": 7.03333561503183e-05, "loss": 0.6501, "step": 22421 }, { "epoch": 1.5192086184700861, "grad_norm": 6.240877151489258, "learning_rate": 7.033198713122049e-05, "loss": 0.835, "step": 22422 }, { "epoch": 1.5192763737380581, "grad_norm": 4.103389739990234, "learning_rate": 7.033061811212267e-05, "loss": 0.7431, "step": 22423 }, { "epoch": 1.51934412900603, "grad_norm": 6.0646748542785645, "learning_rate": 7.032924909302485e-05, "loss": 0.4727, "step": 22424 }, { "epoch": 1.5194118842740023, "grad_norm": 5.002461910247803, "learning_rate": 7.032788007392703e-05, "loss": 0.6363, "step": 22425 }, { "epoch": 1.5194796395419745, "grad_norm": 5.638789176940918, "learning_rate": 7.032651105482921e-05, "loss": 0.6835, "step": 22426 }, { "epoch": 1.5195473948099465, "grad_norm": 6.260293006896973, "learning_rate": 7.03251420357314e-05, "loss": 0.6816, "step": 22427 }, { "epoch": 1.5196151500779185, "grad_norm": 6.349669933319092, "learning_rate": 7.032377301663358e-05, "loss": 0.6546, "step": 22428 }, { "epoch": 1.5196829053458907, "grad_norm": 5.251279830932617, "learning_rate": 7.032240399753576e-05, "loss": 0.6069, "step": 22429 }, { "epoch": 1.5197506606138629, "grad_norm": 8.201900482177734, "learning_rate": 7.032103497843795e-05, "loss": 0.6554, "step": 22430 }, { "epoch": 1.5198184158818349, "grad_norm": 8.616822242736816, "learning_rate": 7.031966595934013e-05, "loss": 0.9714, "step": 22431 }, { "epoch": 1.5198861711498068, "grad_norm": 5.238753795623779, "learning_rate": 7.031829694024232e-05, "loss": 0.6695, "step": 22432 }, { "epoch": 1.519953926417779, "grad_norm": 5.719534397125244, "learning_rate": 7.03169279211445e-05, "loss": 0.6966, "step": 22433 }, { "epoch": 1.520021681685751, "grad_norm": 6.013643741607666, "learning_rate": 7.031555890204668e-05, "loss": 0.7747, "step": 22434 }, { "epoch": 1.520089436953723, "grad_norm": 7.125194549560547, "learning_rate": 7.031418988294886e-05, "loss": 0.8943, "step": 22435 }, { "epoch": 1.5201571922216952, "grad_norm": 6.042530536651611, "learning_rate": 7.031282086385105e-05, "loss": 0.771, "step": 22436 }, { "epoch": 1.5202249474896674, "grad_norm": 8.490594863891602, "learning_rate": 7.031145184475323e-05, "loss": 0.6932, "step": 22437 }, { "epoch": 1.5202927027576394, "grad_norm": 6.026341915130615, "learning_rate": 7.031008282565541e-05, "loss": 0.7633, "step": 22438 }, { "epoch": 1.5203604580256114, "grad_norm": 5.5878496170043945, "learning_rate": 7.030871380655761e-05, "loss": 0.7924, "step": 22439 }, { "epoch": 1.5204282132935836, "grad_norm": 5.581553936004639, "learning_rate": 7.030734478745979e-05, "loss": 0.7162, "step": 22440 }, { "epoch": 1.5204959685615558, "grad_norm": 7.152987003326416, "learning_rate": 7.030597576836197e-05, "loss": 0.7442, "step": 22441 }, { "epoch": 1.5205637238295278, "grad_norm": 4.562288761138916, "learning_rate": 7.030460674926416e-05, "loss": 0.6724, "step": 22442 }, { "epoch": 1.5206314790974997, "grad_norm": 7.852826118469238, "learning_rate": 7.030323773016634e-05, "loss": 0.815, "step": 22443 }, { "epoch": 1.520699234365472, "grad_norm": 5.828840255737305, "learning_rate": 7.030186871106852e-05, "loss": 0.5428, "step": 22444 }, { "epoch": 1.5207669896334441, "grad_norm": 4.176302909851074, "learning_rate": 7.030049969197072e-05, "loss": 0.4754, "step": 22445 }, { "epoch": 1.5208347449014161, "grad_norm": 5.789463043212891, "learning_rate": 7.02991306728729e-05, "loss": 0.9156, "step": 22446 }, { "epoch": 1.520902500169388, "grad_norm": 8.38708782196045, "learning_rate": 7.029776165377508e-05, "loss": 0.6318, "step": 22447 }, { "epoch": 1.5209702554373603, "grad_norm": 5.213127136230469, "learning_rate": 7.029639263467726e-05, "loss": 0.7619, "step": 22448 }, { "epoch": 1.5210380107053323, "grad_norm": 5.454869270324707, "learning_rate": 7.029502361557944e-05, "loss": 0.5869, "step": 22449 }, { "epoch": 1.5211057659733043, "grad_norm": 5.584767818450928, "learning_rate": 7.029365459648163e-05, "loss": 0.6769, "step": 22450 }, { "epoch": 1.5211735212412765, "grad_norm": 7.310015678405762, "learning_rate": 7.029228557738381e-05, "loss": 0.448, "step": 22451 }, { "epoch": 1.5212412765092487, "grad_norm": 6.12633752822876, "learning_rate": 7.0290916558286e-05, "loss": 0.6605, "step": 22452 }, { "epoch": 1.5213090317772207, "grad_norm": 7.955782413482666, "learning_rate": 7.028954753918817e-05, "loss": 0.7401, "step": 22453 }, { "epoch": 1.5213767870451926, "grad_norm": 4.579739093780518, "learning_rate": 7.028817852009035e-05, "loss": 0.5958, "step": 22454 }, { "epoch": 1.5214445423131648, "grad_norm": 6.126485347747803, "learning_rate": 7.028680950099255e-05, "loss": 0.7554, "step": 22455 }, { "epoch": 1.521512297581137, "grad_norm": 5.5529327392578125, "learning_rate": 7.028544048189473e-05, "loss": 0.6725, "step": 22456 }, { "epoch": 1.521580052849109, "grad_norm": 6.668890953063965, "learning_rate": 7.028407146279691e-05, "loss": 0.5976, "step": 22457 }, { "epoch": 1.521647808117081, "grad_norm": 6.742668628692627, "learning_rate": 7.028270244369909e-05, "loss": 0.6139, "step": 22458 }, { "epoch": 1.5217155633850532, "grad_norm": 4.40582799911499, "learning_rate": 7.028133342460128e-05, "loss": 0.442, "step": 22459 }, { "epoch": 1.5217833186530254, "grad_norm": 7.967436790466309, "learning_rate": 7.027996440550346e-05, "loss": 0.8895, "step": 22460 }, { "epoch": 1.5218510739209974, "grad_norm": 3.9633684158325195, "learning_rate": 7.027859538640564e-05, "loss": 0.4178, "step": 22461 }, { "epoch": 1.5219188291889694, "grad_norm": 5.418778896331787, "learning_rate": 7.027722636730782e-05, "loss": 0.6912, "step": 22462 }, { "epoch": 1.5219865844569416, "grad_norm": 8.723609924316406, "learning_rate": 7.027585734821e-05, "loss": 0.7046, "step": 22463 }, { "epoch": 1.5220543397249138, "grad_norm": 6.933527946472168, "learning_rate": 7.02744883291122e-05, "loss": 0.716, "step": 22464 }, { "epoch": 1.5221220949928855, "grad_norm": 6.939694404602051, "learning_rate": 7.027311931001438e-05, "loss": 0.7466, "step": 22465 }, { "epoch": 1.5221898502608577, "grad_norm": 5.284454822540283, "learning_rate": 7.027175029091656e-05, "loss": 0.7553, "step": 22466 }, { "epoch": 1.52225760552883, "grad_norm": 6.221249580383301, "learning_rate": 7.027038127181874e-05, "loss": 0.5285, "step": 22467 }, { "epoch": 1.522325360796802, "grad_norm": 5.290321350097656, "learning_rate": 7.026901225272093e-05, "loss": 0.7089, "step": 22468 }, { "epoch": 1.522393116064774, "grad_norm": 4.514113903045654, "learning_rate": 7.026764323362311e-05, "loss": 0.6501, "step": 22469 }, { "epoch": 1.522460871332746, "grad_norm": 5.20502233505249, "learning_rate": 7.02662742145253e-05, "loss": 0.7138, "step": 22470 }, { "epoch": 1.5225286266007183, "grad_norm": 5.53525972366333, "learning_rate": 7.026490519542747e-05, "loss": 0.7912, "step": 22471 }, { "epoch": 1.5225963818686903, "grad_norm": 6.453444957733154, "learning_rate": 7.026353617632965e-05, "loss": 0.6602, "step": 22472 }, { "epoch": 1.5226641371366623, "grad_norm": 6.805888652801514, "learning_rate": 7.026216715723185e-05, "loss": 0.7159, "step": 22473 }, { "epoch": 1.5227318924046345, "grad_norm": 6.966465473175049, "learning_rate": 7.026079813813403e-05, "loss": 0.703, "step": 22474 }, { "epoch": 1.5227996476726067, "grad_norm": 5.998817443847656, "learning_rate": 7.025942911903621e-05, "loss": 0.6096, "step": 22475 }, { "epoch": 1.5228674029405787, "grad_norm": 6.711500644683838, "learning_rate": 7.025806009993839e-05, "loss": 0.7525, "step": 22476 }, { "epoch": 1.5229351582085506, "grad_norm": 8.656238555908203, "learning_rate": 7.025669108084058e-05, "loss": 0.5749, "step": 22477 }, { "epoch": 1.5230029134765228, "grad_norm": 7.698650360107422, "learning_rate": 7.025532206174276e-05, "loss": 0.7068, "step": 22478 }, { "epoch": 1.523070668744495, "grad_norm": 5.568065166473389, "learning_rate": 7.025395304264494e-05, "loss": 0.5406, "step": 22479 }, { "epoch": 1.523138424012467, "grad_norm": 10.435654640197754, "learning_rate": 7.025258402354712e-05, "loss": 0.6972, "step": 22480 }, { "epoch": 1.523206179280439, "grad_norm": 6.424154281616211, "learning_rate": 7.02512150044493e-05, "loss": 0.7319, "step": 22481 }, { "epoch": 1.5232739345484112, "grad_norm": 6.674673080444336, "learning_rate": 7.02498459853515e-05, "loss": 0.6363, "step": 22482 }, { "epoch": 1.5233416898163832, "grad_norm": 4.952864646911621, "learning_rate": 7.024847696625368e-05, "loss": 0.6214, "step": 22483 }, { "epoch": 1.5234094450843552, "grad_norm": 4.8186798095703125, "learning_rate": 7.024710794715586e-05, "loss": 0.65, "step": 22484 }, { "epoch": 1.5234772003523274, "grad_norm": 7.224124908447266, "learning_rate": 7.024573892805805e-05, "loss": 0.6602, "step": 22485 }, { "epoch": 1.5235449556202996, "grad_norm": 6.324204921722412, "learning_rate": 7.024436990896023e-05, "loss": 0.6578, "step": 22486 }, { "epoch": 1.5236127108882715, "grad_norm": 5.314189434051514, "learning_rate": 7.024300088986241e-05, "loss": 0.6139, "step": 22487 }, { "epoch": 1.5236804661562435, "grad_norm": 5.117730617523193, "learning_rate": 7.024163187076461e-05, "loss": 0.7755, "step": 22488 }, { "epoch": 1.5237482214242157, "grad_norm": 7.3742828369140625, "learning_rate": 7.024026285166679e-05, "loss": 0.6746, "step": 22489 }, { "epoch": 1.523815976692188, "grad_norm": 6.3516058921813965, "learning_rate": 7.023889383256897e-05, "loss": 0.9182, "step": 22490 }, { "epoch": 1.52388373196016, "grad_norm": 6.394031524658203, "learning_rate": 7.023752481347116e-05, "loss": 0.617, "step": 22491 }, { "epoch": 1.523951487228132, "grad_norm": 5.576115608215332, "learning_rate": 7.023615579437334e-05, "loss": 0.7069, "step": 22492 }, { "epoch": 1.524019242496104, "grad_norm": 7.051174640655518, "learning_rate": 7.023478677527552e-05, "loss": 0.5732, "step": 22493 }, { "epoch": 1.5240869977640763, "grad_norm": 5.142271518707275, "learning_rate": 7.02334177561777e-05, "loss": 0.6664, "step": 22494 }, { "epoch": 1.5241547530320483, "grad_norm": 6.030731201171875, "learning_rate": 7.023204873707988e-05, "loss": 0.7991, "step": 22495 }, { "epoch": 1.5242225083000203, "grad_norm": 5.18792724609375, "learning_rate": 7.023067971798208e-05, "loss": 0.6281, "step": 22496 }, { "epoch": 1.5242902635679925, "grad_norm": 4.382856369018555, "learning_rate": 7.022931069888426e-05, "loss": 0.5804, "step": 22497 }, { "epoch": 1.5243580188359644, "grad_norm": 6.502490043640137, "learning_rate": 7.022794167978644e-05, "loss": 0.5618, "step": 22498 }, { "epoch": 1.5244257741039364, "grad_norm": 8.30154037475586, "learning_rate": 7.022657266068862e-05, "loss": 0.7179, "step": 22499 }, { "epoch": 1.5244935293719086, "grad_norm": 6.316712379455566, "learning_rate": 7.022520364159081e-05, "loss": 0.7012, "step": 22500 }, { "epoch": 1.5245612846398808, "grad_norm": 6.489625930786133, "learning_rate": 7.022383462249299e-05, "loss": 0.5677, "step": 22501 }, { "epoch": 1.5246290399078528, "grad_norm": 5.384828090667725, "learning_rate": 7.022246560339517e-05, "loss": 0.6287, "step": 22502 }, { "epoch": 1.5246967951758248, "grad_norm": 6.26328182220459, "learning_rate": 7.022109658429735e-05, "loss": 0.7702, "step": 22503 }, { "epoch": 1.524764550443797, "grad_norm": 8.258795738220215, "learning_rate": 7.021972756519953e-05, "loss": 0.8172, "step": 22504 }, { "epoch": 1.5248323057117692, "grad_norm": 4.789498805999756, "learning_rate": 7.021835854610173e-05, "loss": 0.6161, "step": 22505 }, { "epoch": 1.5249000609797412, "grad_norm": 4.029824256896973, "learning_rate": 7.021698952700391e-05, "loss": 0.4855, "step": 22506 }, { "epoch": 1.5249678162477132, "grad_norm": 4.8742780685424805, "learning_rate": 7.021562050790609e-05, "loss": 0.5027, "step": 22507 }, { "epoch": 1.5250355715156854, "grad_norm": 5.4516096115112305, "learning_rate": 7.021425148880827e-05, "loss": 0.6046, "step": 22508 }, { "epoch": 1.5251033267836576, "grad_norm": 7.072483062744141, "learning_rate": 7.021288246971045e-05, "loss": 0.6255, "step": 22509 }, { "epoch": 1.5251710820516295, "grad_norm": 5.213129997253418, "learning_rate": 7.021151345061264e-05, "loss": 0.6459, "step": 22510 }, { "epoch": 1.5252388373196015, "grad_norm": 5.9263014793396, "learning_rate": 7.021014443151482e-05, "loss": 0.8836, "step": 22511 }, { "epoch": 1.5253065925875737, "grad_norm": 5.860553741455078, "learning_rate": 7.0208775412417e-05, "loss": 0.6463, "step": 22512 }, { "epoch": 1.525374347855546, "grad_norm": 6.088913917541504, "learning_rate": 7.020740639331918e-05, "loss": 0.5565, "step": 22513 }, { "epoch": 1.5254421031235177, "grad_norm": 5.805849552154541, "learning_rate": 7.020603737422138e-05, "loss": 0.6649, "step": 22514 }, { "epoch": 1.52550985839149, "grad_norm": 6.5700554847717285, "learning_rate": 7.020466835512356e-05, "loss": 0.6337, "step": 22515 }, { "epoch": 1.525577613659462, "grad_norm": 4.032923221588135, "learning_rate": 7.020329933602574e-05, "loss": 0.6537, "step": 22516 }, { "epoch": 1.525645368927434, "grad_norm": 4.65299129486084, "learning_rate": 7.020193031692792e-05, "loss": 0.4669, "step": 22517 }, { "epoch": 1.525713124195406, "grad_norm": 5.296009063720703, "learning_rate": 7.02005612978301e-05, "loss": 0.6252, "step": 22518 }, { "epoch": 1.5257808794633783, "grad_norm": 4.73383092880249, "learning_rate": 7.01991922787323e-05, "loss": 0.6758, "step": 22519 }, { "epoch": 1.5258486347313505, "grad_norm": 4.164519786834717, "learning_rate": 7.019782325963447e-05, "loss": 0.5789, "step": 22520 }, { "epoch": 1.5259163899993224, "grad_norm": 6.923268795013428, "learning_rate": 7.019645424053665e-05, "loss": 0.7139, "step": 22521 }, { "epoch": 1.5259841452672944, "grad_norm": 5.95957088470459, "learning_rate": 7.019508522143883e-05, "loss": 0.6017, "step": 22522 }, { "epoch": 1.5260519005352666, "grad_norm": 7.275595188140869, "learning_rate": 7.019371620234103e-05, "loss": 0.7723, "step": 22523 }, { "epoch": 1.5261196558032388, "grad_norm": 5.6039886474609375, "learning_rate": 7.019234718324321e-05, "loss": 0.7237, "step": 22524 }, { "epoch": 1.5261874110712108, "grad_norm": 5.71054220199585, "learning_rate": 7.019097816414539e-05, "loss": 0.7378, "step": 22525 }, { "epoch": 1.5262551663391828, "grad_norm": 8.270402908325195, "learning_rate": 7.018960914504757e-05, "loss": 0.61, "step": 22526 }, { "epoch": 1.526322921607155, "grad_norm": 8.196904182434082, "learning_rate": 7.018824012594975e-05, "loss": 0.4905, "step": 22527 }, { "epoch": 1.5263906768751272, "grad_norm": 9.512206077575684, "learning_rate": 7.018687110685194e-05, "loss": 0.7347, "step": 22528 }, { "epoch": 1.5264584321430992, "grad_norm": 4.628918170928955, "learning_rate": 7.018550208775412e-05, "loss": 0.4884, "step": 22529 }, { "epoch": 1.5265261874110712, "grad_norm": 7.08717679977417, "learning_rate": 7.01841330686563e-05, "loss": 0.6767, "step": 22530 }, { "epoch": 1.5265939426790434, "grad_norm": 6.081893444061279, "learning_rate": 7.018276404955848e-05, "loss": 0.6623, "step": 22531 }, { "epoch": 1.5266616979470153, "grad_norm": 4.83754301071167, "learning_rate": 7.018139503046068e-05, "loss": 0.6236, "step": 22532 }, { "epoch": 1.5267294532149873, "grad_norm": 6.059781551361084, "learning_rate": 7.018002601136286e-05, "loss": 0.7642, "step": 22533 }, { "epoch": 1.5267972084829595, "grad_norm": 5.767043113708496, "learning_rate": 7.017865699226505e-05, "loss": 0.5519, "step": 22534 }, { "epoch": 1.5268649637509317, "grad_norm": 6.4423441886901855, "learning_rate": 7.017728797316723e-05, "loss": 0.8883, "step": 22535 }, { "epoch": 1.5269327190189037, "grad_norm": 5.8549933433532715, "learning_rate": 7.017591895406941e-05, "loss": 0.7156, "step": 22536 }, { "epoch": 1.5270004742868757, "grad_norm": 4.513707637786865, "learning_rate": 7.017454993497161e-05, "loss": 0.6596, "step": 22537 }, { "epoch": 1.527068229554848, "grad_norm": 6.184037685394287, "learning_rate": 7.017318091587379e-05, "loss": 0.5664, "step": 22538 }, { "epoch": 1.52713598482282, "grad_norm": 5.0629425048828125, "learning_rate": 7.017181189677597e-05, "loss": 0.6331, "step": 22539 }, { "epoch": 1.527203740090792, "grad_norm": 8.220196723937988, "learning_rate": 7.017044287767815e-05, "loss": 0.6779, "step": 22540 }, { "epoch": 1.527271495358764, "grad_norm": 5.681403636932373, "learning_rate": 7.016907385858033e-05, "loss": 0.9299, "step": 22541 }, { "epoch": 1.5273392506267363, "grad_norm": 6.6130571365356445, "learning_rate": 7.016770483948252e-05, "loss": 0.685, "step": 22542 }, { "epoch": 1.5274070058947085, "grad_norm": 5.672062873840332, "learning_rate": 7.01663358203847e-05, "loss": 0.7433, "step": 22543 }, { "epoch": 1.5274747611626804, "grad_norm": 7.770251274108887, "learning_rate": 7.016496680128688e-05, "loss": 0.5427, "step": 22544 }, { "epoch": 1.5275425164306524, "grad_norm": 5.521313667297363, "learning_rate": 7.016359778218906e-05, "loss": 0.6257, "step": 22545 }, { "epoch": 1.5276102716986246, "grad_norm": 6.573612213134766, "learning_rate": 7.016222876309126e-05, "loss": 0.8183, "step": 22546 }, { "epoch": 1.5276780269665966, "grad_norm": 5.668809413909912, "learning_rate": 7.016085974399344e-05, "loss": 0.5916, "step": 22547 }, { "epoch": 1.5277457822345686, "grad_norm": 5.366463661193848, "learning_rate": 7.015949072489562e-05, "loss": 0.7123, "step": 22548 }, { "epoch": 1.5278135375025408, "grad_norm": 6.205472469329834, "learning_rate": 7.01581217057978e-05, "loss": 0.6212, "step": 22549 }, { "epoch": 1.527881292770513, "grad_norm": 5.318550109863281, "learning_rate": 7.015675268669998e-05, "loss": 0.6462, "step": 22550 }, { "epoch": 1.527949048038485, "grad_norm": 6.859971523284912, "learning_rate": 7.015538366760217e-05, "loss": 0.5195, "step": 22551 }, { "epoch": 1.528016803306457, "grad_norm": 5.79581880569458, "learning_rate": 7.015401464850435e-05, "loss": 0.5339, "step": 22552 }, { "epoch": 1.5280845585744292, "grad_norm": 6.979801654815674, "learning_rate": 7.015264562940653e-05, "loss": 0.8986, "step": 22553 }, { "epoch": 1.5281523138424014, "grad_norm": 7.577960968017578, "learning_rate": 7.015127661030871e-05, "loss": 0.6907, "step": 22554 }, { "epoch": 1.5282200691103733, "grad_norm": 7.352293968200684, "learning_rate": 7.014990759121091e-05, "loss": 0.6039, "step": 22555 }, { "epoch": 1.5282878243783453, "grad_norm": 6.182598114013672, "learning_rate": 7.014853857211309e-05, "loss": 0.5459, "step": 22556 }, { "epoch": 1.5283555796463175, "grad_norm": 5.810654163360596, "learning_rate": 7.014716955301527e-05, "loss": 0.6698, "step": 22557 }, { "epoch": 1.5284233349142897, "grad_norm": 6.62141752243042, "learning_rate": 7.014580053391745e-05, "loss": 0.8372, "step": 22558 }, { "epoch": 1.5284910901822617, "grad_norm": 5.087663650512695, "learning_rate": 7.014443151481963e-05, "loss": 0.7286, "step": 22559 }, { "epoch": 1.5285588454502337, "grad_norm": 4.817636489868164, "learning_rate": 7.014306249572182e-05, "loss": 0.677, "step": 22560 }, { "epoch": 1.5286266007182059, "grad_norm": 5.013729095458984, "learning_rate": 7.0141693476624e-05, "loss": 0.6982, "step": 22561 }, { "epoch": 1.528694355986178, "grad_norm": 4.636411666870117, "learning_rate": 7.014032445752618e-05, "loss": 0.6031, "step": 22562 }, { "epoch": 1.5287621112541498, "grad_norm": 5.825439929962158, "learning_rate": 7.013895543842836e-05, "loss": 0.5314, "step": 22563 }, { "epoch": 1.528829866522122, "grad_norm": 4.623283386230469, "learning_rate": 7.013758641933054e-05, "loss": 0.6993, "step": 22564 }, { "epoch": 1.5288976217900943, "grad_norm": 6.470966339111328, "learning_rate": 7.013621740023274e-05, "loss": 0.858, "step": 22565 }, { "epoch": 1.5289653770580662, "grad_norm": 4.560364723205566, "learning_rate": 7.013484838113492e-05, "loss": 0.7173, "step": 22566 }, { "epoch": 1.5290331323260382, "grad_norm": 4.876102924346924, "learning_rate": 7.01334793620371e-05, "loss": 0.6131, "step": 22567 }, { "epoch": 1.5291008875940104, "grad_norm": 5.255398273468018, "learning_rate": 7.013211034293928e-05, "loss": 0.6105, "step": 22568 }, { "epoch": 1.5291686428619826, "grad_norm": 4.597468376159668, "learning_rate": 7.013074132384147e-05, "loss": 0.5651, "step": 22569 }, { "epoch": 1.5292363981299546, "grad_norm": 6.28586483001709, "learning_rate": 7.012937230474365e-05, "loss": 0.5679, "step": 22570 }, { "epoch": 1.5293041533979266, "grad_norm": 5.696252346038818, "learning_rate": 7.012800328564583e-05, "loss": 0.6001, "step": 22571 }, { "epoch": 1.5293719086658988, "grad_norm": 5.10597038269043, "learning_rate": 7.012663426654801e-05, "loss": 0.73, "step": 22572 }, { "epoch": 1.529439663933871, "grad_norm": 5.357786178588867, "learning_rate": 7.01252652474502e-05, "loss": 0.6327, "step": 22573 }, { "epoch": 1.529507419201843, "grad_norm": 5.285953521728516, "learning_rate": 7.012389622835239e-05, "loss": 0.6233, "step": 22574 }, { "epoch": 1.529575174469815, "grad_norm": 5.757745742797852, "learning_rate": 7.012252720925457e-05, "loss": 0.9602, "step": 22575 }, { "epoch": 1.5296429297377871, "grad_norm": 4.748471736907959, "learning_rate": 7.012115819015675e-05, "loss": 0.7317, "step": 22576 }, { "epoch": 1.5297106850057594, "grad_norm": 7.065159320831299, "learning_rate": 7.011978917105893e-05, "loss": 0.6263, "step": 22577 }, { "epoch": 1.5297784402737313, "grad_norm": 6.061386585235596, "learning_rate": 7.011842015196112e-05, "loss": 0.6016, "step": 22578 }, { "epoch": 1.5298461955417033, "grad_norm": 5.089146137237549, "learning_rate": 7.01170511328633e-05, "loss": 0.4764, "step": 22579 }, { "epoch": 1.5299139508096755, "grad_norm": 6.4331793785095215, "learning_rate": 7.011568211376548e-05, "loss": 0.9533, "step": 22580 }, { "epoch": 1.5299817060776475, "grad_norm": 5.454836845397949, "learning_rate": 7.011431309466768e-05, "loss": 0.6416, "step": 22581 }, { "epoch": 1.5300494613456195, "grad_norm": 7.712970733642578, "learning_rate": 7.011294407556986e-05, "loss": 0.7328, "step": 22582 }, { "epoch": 1.5301172166135917, "grad_norm": 5.853811740875244, "learning_rate": 7.011157505647204e-05, "loss": 0.6525, "step": 22583 }, { "epoch": 1.5301849718815639, "grad_norm": 7.068652629852295, "learning_rate": 7.011020603737423e-05, "loss": 0.6423, "step": 22584 }, { "epoch": 1.5302527271495359, "grad_norm": 7.900768280029297, "learning_rate": 7.010883701827641e-05, "loss": 0.7328, "step": 22585 }, { "epoch": 1.5303204824175078, "grad_norm": 6.233859539031982, "learning_rate": 7.010746799917859e-05, "loss": 0.687, "step": 22586 }, { "epoch": 1.53038823768548, "grad_norm": 5.554741382598877, "learning_rate": 7.010609898008079e-05, "loss": 0.6289, "step": 22587 }, { "epoch": 1.5304559929534522, "grad_norm": 6.925263404846191, "learning_rate": 7.010472996098297e-05, "loss": 0.6277, "step": 22588 }, { "epoch": 1.5305237482214242, "grad_norm": 6.040051460266113, "learning_rate": 7.010336094188515e-05, "loss": 0.5651, "step": 22589 }, { "epoch": 1.5305915034893962, "grad_norm": 8.645063400268555, "learning_rate": 7.010199192278733e-05, "loss": 0.759, "step": 22590 }, { "epoch": 1.5306592587573684, "grad_norm": 5.295748233795166, "learning_rate": 7.010062290368951e-05, "loss": 0.7293, "step": 22591 }, { "epoch": 1.5307270140253406, "grad_norm": 5.143377304077148, "learning_rate": 7.00992538845917e-05, "loss": 0.6297, "step": 22592 }, { "epoch": 1.5307947692933126, "grad_norm": 6.0404253005981445, "learning_rate": 7.009788486549388e-05, "loss": 0.7712, "step": 22593 }, { "epoch": 1.5308625245612846, "grad_norm": 6.719381332397461, "learning_rate": 7.009651584639606e-05, "loss": 0.7471, "step": 22594 }, { "epoch": 1.5309302798292568, "grad_norm": 7.8405561447143555, "learning_rate": 7.009514682729824e-05, "loss": 0.5781, "step": 22595 }, { "epoch": 1.5309980350972288, "grad_norm": 6.350671291351318, "learning_rate": 7.009377780820042e-05, "loss": 0.7759, "step": 22596 }, { "epoch": 1.5310657903652007, "grad_norm": 4.768768787384033, "learning_rate": 7.009240878910262e-05, "loss": 0.6241, "step": 22597 }, { "epoch": 1.531133545633173, "grad_norm": 7.058276653289795, "learning_rate": 7.00910397700048e-05, "loss": 0.7035, "step": 22598 }, { "epoch": 1.5312013009011451, "grad_norm": 3.7895913124084473, "learning_rate": 7.008967075090698e-05, "loss": 0.5606, "step": 22599 }, { "epoch": 1.5312690561691171, "grad_norm": 7.114832878112793, "learning_rate": 7.008830173180916e-05, "loss": 0.5596, "step": 22600 }, { "epoch": 1.531336811437089, "grad_norm": 4.337161540985107, "learning_rate": 7.008693271271135e-05, "loss": 0.4975, "step": 22601 }, { "epoch": 1.5314045667050613, "grad_norm": 11.510652542114258, "learning_rate": 7.008556369361353e-05, "loss": 0.7265, "step": 22602 }, { "epoch": 1.5314723219730335, "grad_norm": 8.857338905334473, "learning_rate": 7.008419467451571e-05, "loss": 0.6413, "step": 22603 }, { "epoch": 1.5315400772410055, "grad_norm": 7.3711442947387695, "learning_rate": 7.00828256554179e-05, "loss": 0.8293, "step": 22604 }, { "epoch": 1.5316078325089775, "grad_norm": 6.918889999389648, "learning_rate": 7.008145663632007e-05, "loss": 0.5024, "step": 22605 }, { "epoch": 1.5316755877769497, "grad_norm": 6.242055416107178, "learning_rate": 7.008008761722227e-05, "loss": 0.6755, "step": 22606 }, { "epoch": 1.5317433430449219, "grad_norm": 7.50224494934082, "learning_rate": 7.007871859812445e-05, "loss": 0.6813, "step": 22607 }, { "epoch": 1.5318110983128939, "grad_norm": 5.476876735687256, "learning_rate": 7.007734957902663e-05, "loss": 0.5339, "step": 22608 }, { "epoch": 1.5318788535808658, "grad_norm": 5.553281307220459, "learning_rate": 7.007598055992881e-05, "loss": 0.4773, "step": 22609 }, { "epoch": 1.531946608848838, "grad_norm": 6.337301254272461, "learning_rate": 7.0074611540831e-05, "loss": 0.6382, "step": 22610 }, { "epoch": 1.5320143641168102, "grad_norm": 7.283638954162598, "learning_rate": 7.007324252173318e-05, "loss": 0.6599, "step": 22611 }, { "epoch": 1.532082119384782, "grad_norm": 7.8495073318481445, "learning_rate": 7.007187350263536e-05, "loss": 0.5905, "step": 22612 }, { "epoch": 1.5321498746527542, "grad_norm": 5.374534606933594, "learning_rate": 7.007050448353754e-05, "loss": 0.6623, "step": 22613 }, { "epoch": 1.5322176299207264, "grad_norm": 3.8343098163604736, "learning_rate": 7.006913546443972e-05, "loss": 0.458, "step": 22614 }, { "epoch": 1.5322853851886984, "grad_norm": 5.785122394561768, "learning_rate": 7.006776644534192e-05, "loss": 0.8094, "step": 22615 }, { "epoch": 1.5323531404566704, "grad_norm": 7.64768648147583, "learning_rate": 7.00663974262441e-05, "loss": 0.6864, "step": 22616 }, { "epoch": 1.5324208957246426, "grad_norm": 3.8764166831970215, "learning_rate": 7.006502840714628e-05, "loss": 0.6529, "step": 22617 }, { "epoch": 1.5324886509926148, "grad_norm": 6.844041347503662, "learning_rate": 7.006365938804846e-05, "loss": 0.6369, "step": 22618 }, { "epoch": 1.5325564062605868, "grad_norm": 5.837104797363281, "learning_rate": 7.006229036895064e-05, "loss": 0.6844, "step": 22619 }, { "epoch": 1.5326241615285587, "grad_norm": 4.651566028594971, "learning_rate": 7.006092134985283e-05, "loss": 0.5835, "step": 22620 }, { "epoch": 1.532691916796531, "grad_norm": 14.277108192443848, "learning_rate": 7.005955233075501e-05, "loss": 0.6347, "step": 22621 }, { "epoch": 1.5327596720645031, "grad_norm": 4.900221824645996, "learning_rate": 7.00581833116572e-05, "loss": 0.6105, "step": 22622 }, { "epoch": 1.5328274273324751, "grad_norm": 6.724163055419922, "learning_rate": 7.005681429255937e-05, "loss": 0.8585, "step": 22623 }, { "epoch": 1.532895182600447, "grad_norm": 6.553070068359375, "learning_rate": 7.005544527346157e-05, "loss": 0.8789, "step": 22624 }, { "epoch": 1.5329629378684193, "grad_norm": 5.542783737182617, "learning_rate": 7.005407625436375e-05, "loss": 0.4303, "step": 22625 }, { "epoch": 1.5330306931363915, "grad_norm": 5.4968719482421875, "learning_rate": 7.005270723526593e-05, "loss": 0.7799, "step": 22626 }, { "epoch": 1.5330984484043635, "grad_norm": 6.269726753234863, "learning_rate": 7.005133821616812e-05, "loss": 0.575, "step": 22627 }, { "epoch": 1.5331662036723355, "grad_norm": 6.039109706878662, "learning_rate": 7.00499691970703e-05, "loss": 0.5947, "step": 22628 }, { "epoch": 1.5332339589403077, "grad_norm": 5.811922550201416, "learning_rate": 7.004860017797248e-05, "loss": 0.7683, "step": 22629 }, { "epoch": 1.5333017142082797, "grad_norm": 6.253519535064697, "learning_rate": 7.004723115887468e-05, "loss": 0.7882, "step": 22630 }, { "epoch": 1.5333694694762516, "grad_norm": 7.294342041015625, "learning_rate": 7.004586213977686e-05, "loss": 0.6345, "step": 22631 }, { "epoch": 1.5334372247442238, "grad_norm": 7.146905899047852, "learning_rate": 7.004449312067904e-05, "loss": 0.6707, "step": 22632 }, { "epoch": 1.533504980012196, "grad_norm": 6.84444522857666, "learning_rate": 7.004312410158123e-05, "loss": 0.8903, "step": 22633 }, { "epoch": 1.533572735280168, "grad_norm": 4.643156051635742, "learning_rate": 7.004175508248341e-05, "loss": 0.706, "step": 22634 }, { "epoch": 1.53364049054814, "grad_norm": 5.11895227432251, "learning_rate": 7.004038606338559e-05, "loss": 0.6222, "step": 22635 }, { "epoch": 1.5337082458161122, "grad_norm": 4.635238170623779, "learning_rate": 7.003901704428777e-05, "loss": 0.687, "step": 22636 }, { "epoch": 1.5337760010840844, "grad_norm": 6.294194221496582, "learning_rate": 7.003764802518995e-05, "loss": 0.7478, "step": 22637 }, { "epoch": 1.5338437563520564, "grad_norm": 5.841409683227539, "learning_rate": 7.003627900609215e-05, "loss": 0.6947, "step": 22638 }, { "epoch": 1.5339115116200284, "grad_norm": 6.817178726196289, "learning_rate": 7.003490998699433e-05, "loss": 0.6275, "step": 22639 }, { "epoch": 1.5339792668880006, "grad_norm": 5.358313083648682, "learning_rate": 7.003354096789651e-05, "loss": 0.5967, "step": 22640 }, { "epoch": 1.5340470221559728, "grad_norm": 5.5462727546691895, "learning_rate": 7.003217194879869e-05, "loss": 0.7578, "step": 22641 }, { "epoch": 1.5341147774239448, "grad_norm": 5.4626569747924805, "learning_rate": 7.003080292970087e-05, "loss": 0.7652, "step": 22642 }, { "epoch": 1.5341825326919167, "grad_norm": 9.104828834533691, "learning_rate": 7.002943391060306e-05, "loss": 0.6344, "step": 22643 }, { "epoch": 1.534250287959889, "grad_norm": 7.0004472732543945, "learning_rate": 7.002806489150524e-05, "loss": 0.7669, "step": 22644 }, { "epoch": 1.534318043227861, "grad_norm": 7.0266313552856445, "learning_rate": 7.002669587240742e-05, "loss": 0.6354, "step": 22645 }, { "epoch": 1.534385798495833, "grad_norm": 6.088472366333008, "learning_rate": 7.00253268533096e-05, "loss": 0.8415, "step": 22646 }, { "epoch": 1.534453553763805, "grad_norm": 7.766452789306641, "learning_rate": 7.00239578342118e-05, "loss": 0.8326, "step": 22647 }, { "epoch": 1.5345213090317773, "grad_norm": 5.286564350128174, "learning_rate": 7.002258881511398e-05, "loss": 0.5799, "step": 22648 }, { "epoch": 1.5345890642997493, "grad_norm": 5.185844898223877, "learning_rate": 7.002121979601616e-05, "loss": 0.7654, "step": 22649 }, { "epoch": 1.5346568195677213, "grad_norm": 5.954355716705322, "learning_rate": 7.001985077691834e-05, "loss": 0.4765, "step": 22650 }, { "epoch": 1.5347245748356935, "grad_norm": 4.4063401222229, "learning_rate": 7.001848175782052e-05, "loss": 0.6151, "step": 22651 }, { "epoch": 1.5347923301036657, "grad_norm": 9.531402587890625, "learning_rate": 7.001711273872271e-05, "loss": 0.7017, "step": 22652 }, { "epoch": 1.5348600853716377, "grad_norm": 7.194576740264893, "learning_rate": 7.001574371962489e-05, "loss": 0.7798, "step": 22653 }, { "epoch": 1.5349278406396096, "grad_norm": 8.557249069213867, "learning_rate": 7.001437470052707e-05, "loss": 0.6804, "step": 22654 }, { "epoch": 1.5349955959075818, "grad_norm": 6.502073287963867, "learning_rate": 7.001300568142925e-05, "loss": 0.7123, "step": 22655 }, { "epoch": 1.535063351175554, "grad_norm": 5.97341775894165, "learning_rate": 7.001163666233145e-05, "loss": 0.571, "step": 22656 }, { "epoch": 1.535131106443526, "grad_norm": 6.931853294372559, "learning_rate": 7.001026764323363e-05, "loss": 0.7953, "step": 22657 }, { "epoch": 1.535198861711498, "grad_norm": 11.52963638305664, "learning_rate": 7.000889862413581e-05, "loss": 0.7059, "step": 22658 }, { "epoch": 1.5352666169794702, "grad_norm": 5.702601432800293, "learning_rate": 7.000752960503799e-05, "loss": 0.6475, "step": 22659 }, { "epoch": 1.5353343722474424, "grad_norm": 5.558526039123535, "learning_rate": 7.000616058594017e-05, "loss": 0.6756, "step": 22660 }, { "epoch": 1.5354021275154142, "grad_norm": 5.864680767059326, "learning_rate": 7.000479156684236e-05, "loss": 0.7819, "step": 22661 }, { "epoch": 1.5354698827833864, "grad_norm": 4.263807773590088, "learning_rate": 7.000342254774454e-05, "loss": 0.4681, "step": 22662 }, { "epoch": 1.5355376380513586, "grad_norm": 6.914703369140625, "learning_rate": 7.000205352864672e-05, "loss": 0.7734, "step": 22663 }, { "epoch": 1.5356053933193305, "grad_norm": 6.568969249725342, "learning_rate": 7.00006845095489e-05, "loss": 0.4895, "step": 22664 }, { "epoch": 1.5356731485873025, "grad_norm": 8.108369827270508, "learning_rate": 6.99993154904511e-05, "loss": 0.5982, "step": 22665 }, { "epoch": 1.5357409038552747, "grad_norm": 5.625303268432617, "learning_rate": 6.999794647135328e-05, "loss": 0.969, "step": 22666 }, { "epoch": 1.535808659123247, "grad_norm": 5.224907398223877, "learning_rate": 6.999657745225546e-05, "loss": 0.7389, "step": 22667 }, { "epoch": 1.535876414391219, "grad_norm": 4.705897808074951, "learning_rate": 6.999520843315764e-05, "loss": 0.6139, "step": 22668 }, { "epoch": 1.535944169659191, "grad_norm": 5.216924667358398, "learning_rate": 6.999383941405982e-05, "loss": 0.6108, "step": 22669 }, { "epoch": 1.536011924927163, "grad_norm": 4.485081195831299, "learning_rate": 6.999247039496201e-05, "loss": 0.5863, "step": 22670 }, { "epoch": 1.5360796801951353, "grad_norm": 5.056951522827148, "learning_rate": 6.999110137586419e-05, "loss": 0.4979, "step": 22671 }, { "epoch": 1.5361474354631073, "grad_norm": 6.9430999755859375, "learning_rate": 6.998973235676637e-05, "loss": 0.6882, "step": 22672 }, { "epoch": 1.5362151907310793, "grad_norm": 7.880124092102051, "learning_rate": 6.998836333766857e-05, "loss": 0.6339, "step": 22673 }, { "epoch": 1.5362829459990515, "grad_norm": 4.570502758026123, "learning_rate": 6.998699431857075e-05, "loss": 0.6736, "step": 22674 }, { "epoch": 1.5363507012670237, "grad_norm": 6.164181709289551, "learning_rate": 6.998562529947293e-05, "loss": 0.6008, "step": 22675 }, { "epoch": 1.5364184565349956, "grad_norm": 5.61137580871582, "learning_rate": 6.998425628037512e-05, "loss": 0.5796, "step": 22676 }, { "epoch": 1.5364862118029676, "grad_norm": 6.259246826171875, "learning_rate": 6.99828872612773e-05, "loss": 0.8412, "step": 22677 }, { "epoch": 1.5365539670709398, "grad_norm": 4.805912971496582, "learning_rate": 6.998151824217948e-05, "loss": 0.643, "step": 22678 }, { "epoch": 1.5366217223389118, "grad_norm": 7.096707344055176, "learning_rate": 6.998014922308168e-05, "loss": 0.5835, "step": 22679 }, { "epoch": 1.5366894776068838, "grad_norm": 8.959208488464355, "learning_rate": 6.997878020398386e-05, "loss": 0.7109, "step": 22680 }, { "epoch": 1.536757232874856, "grad_norm": 5.191287994384766, "learning_rate": 6.997741118488604e-05, "loss": 0.6964, "step": 22681 }, { "epoch": 1.5368249881428282, "grad_norm": 6.61174201965332, "learning_rate": 6.997604216578822e-05, "loss": 0.602, "step": 22682 }, { "epoch": 1.5368927434108002, "grad_norm": 6.5462236404418945, "learning_rate": 6.99746731466904e-05, "loss": 0.4813, "step": 22683 }, { "epoch": 1.5369604986787722, "grad_norm": 5.85744571685791, "learning_rate": 6.997330412759259e-05, "loss": 0.6608, "step": 22684 }, { "epoch": 1.5370282539467444, "grad_norm": 9.320167541503906, "learning_rate": 6.997193510849477e-05, "loss": 0.6429, "step": 22685 }, { "epoch": 1.5370960092147166, "grad_norm": 9.373885154724121, "learning_rate": 6.997056608939695e-05, "loss": 0.5617, "step": 22686 }, { "epoch": 1.5371637644826885, "grad_norm": 10.568325996398926, "learning_rate": 6.996919707029913e-05, "loss": 0.7483, "step": 22687 }, { "epoch": 1.5372315197506605, "grad_norm": 9.881688117980957, "learning_rate": 6.996782805120133e-05, "loss": 0.5271, "step": 22688 }, { "epoch": 1.5372992750186327, "grad_norm": 7.7520432472229, "learning_rate": 6.996645903210351e-05, "loss": 0.6991, "step": 22689 }, { "epoch": 1.537367030286605, "grad_norm": 4.655795097351074, "learning_rate": 6.996509001300569e-05, "loss": 0.5317, "step": 22690 }, { "epoch": 1.537434785554577, "grad_norm": 4.578963279724121, "learning_rate": 6.996372099390787e-05, "loss": 0.4739, "step": 22691 }, { "epoch": 1.537502540822549, "grad_norm": 5.091914653778076, "learning_rate": 6.996235197481005e-05, "loss": 0.6883, "step": 22692 }, { "epoch": 1.537570296090521, "grad_norm": 8.787245750427246, "learning_rate": 6.996098295571224e-05, "loss": 0.6229, "step": 22693 }, { "epoch": 1.537638051358493, "grad_norm": 5.773130416870117, "learning_rate": 6.995961393661442e-05, "loss": 0.496, "step": 22694 }, { "epoch": 1.537705806626465, "grad_norm": 4.750080585479736, "learning_rate": 6.99582449175166e-05, "loss": 0.6956, "step": 22695 }, { "epoch": 1.5377735618944373, "grad_norm": 6.965307712554932, "learning_rate": 6.995687589841878e-05, "loss": 0.6227, "step": 22696 }, { "epoch": 1.5378413171624095, "grad_norm": 5.990235805511475, "learning_rate": 6.995550687932096e-05, "loss": 0.6265, "step": 22697 }, { "epoch": 1.5379090724303814, "grad_norm": 6.310254096984863, "learning_rate": 6.995413786022316e-05, "loss": 0.5141, "step": 22698 }, { "epoch": 1.5379768276983534, "grad_norm": 6.840160369873047, "learning_rate": 6.995276884112534e-05, "loss": 0.6224, "step": 22699 }, { "epoch": 1.5380445829663256, "grad_norm": 5.637760639190674, "learning_rate": 6.995139982202752e-05, "loss": 0.7249, "step": 22700 }, { "epoch": 1.5381123382342978, "grad_norm": 10.911355972290039, "learning_rate": 6.99500308029297e-05, "loss": 0.6791, "step": 22701 }, { "epoch": 1.5381800935022698, "grad_norm": 11.90958309173584, "learning_rate": 6.994866178383189e-05, "loss": 0.8548, "step": 22702 }, { "epoch": 1.5382478487702418, "grad_norm": 5.60015344619751, "learning_rate": 6.994729276473407e-05, "loss": 0.4624, "step": 22703 }, { "epoch": 1.538315604038214, "grad_norm": 6.912858963012695, "learning_rate": 6.994592374563625e-05, "loss": 0.5176, "step": 22704 }, { "epoch": 1.5383833593061862, "grad_norm": 7.172194004058838, "learning_rate": 6.994455472653843e-05, "loss": 0.8263, "step": 22705 }, { "epoch": 1.5384511145741582, "grad_norm": 7.946822166442871, "learning_rate": 6.994318570744061e-05, "loss": 0.7087, "step": 22706 }, { "epoch": 1.5385188698421302, "grad_norm": 4.36579704284668, "learning_rate": 6.994181668834281e-05, "loss": 0.6396, "step": 22707 }, { "epoch": 1.5385866251101024, "grad_norm": 8.407509803771973, "learning_rate": 6.994044766924499e-05, "loss": 0.8489, "step": 22708 }, { "epoch": 1.5386543803780746, "grad_norm": 5.790008068084717, "learning_rate": 6.993907865014717e-05, "loss": 0.4878, "step": 22709 }, { "epoch": 1.5387221356460463, "grad_norm": 5.8444695472717285, "learning_rate": 6.993770963104935e-05, "loss": 0.5765, "step": 22710 }, { "epoch": 1.5387898909140185, "grad_norm": 6.084954738616943, "learning_rate": 6.993634061195154e-05, "loss": 0.7386, "step": 22711 }, { "epoch": 1.5388576461819907, "grad_norm": 6.099859237670898, "learning_rate": 6.993497159285372e-05, "loss": 0.6491, "step": 22712 }, { "epoch": 1.5389254014499627, "grad_norm": 5.709012508392334, "learning_rate": 6.99336025737559e-05, "loss": 0.622, "step": 22713 }, { "epoch": 1.5389931567179347, "grad_norm": 5.598257064819336, "learning_rate": 6.993223355465808e-05, "loss": 0.6064, "step": 22714 }, { "epoch": 1.5390609119859069, "grad_norm": 5.579864501953125, "learning_rate": 6.993086453556026e-05, "loss": 0.6235, "step": 22715 }, { "epoch": 1.539128667253879, "grad_norm": 7.002440929412842, "learning_rate": 6.992949551646246e-05, "loss": 0.6238, "step": 22716 }, { "epoch": 1.539196422521851, "grad_norm": 5.829963207244873, "learning_rate": 6.992812649736464e-05, "loss": 0.7477, "step": 22717 }, { "epoch": 1.539264177789823, "grad_norm": 6.896578788757324, "learning_rate": 6.992675747826682e-05, "loss": 0.6945, "step": 22718 }, { "epoch": 1.5393319330577953, "grad_norm": 6.887509346008301, "learning_rate": 6.992538845916901e-05, "loss": 0.6291, "step": 22719 }, { "epoch": 1.5393996883257675, "grad_norm": 6.429437160491943, "learning_rate": 6.992401944007119e-05, "loss": 0.8283, "step": 22720 }, { "epoch": 1.5394674435937394, "grad_norm": 6.821274280548096, "learning_rate": 6.992265042097337e-05, "loss": 0.8415, "step": 22721 }, { "epoch": 1.5395351988617114, "grad_norm": 7.762423515319824, "learning_rate": 6.992128140187557e-05, "loss": 0.5989, "step": 22722 }, { "epoch": 1.5396029541296836, "grad_norm": 6.16066837310791, "learning_rate": 6.991991238277775e-05, "loss": 0.5916, "step": 22723 }, { "epoch": 1.5396707093976558, "grad_norm": 7.60310697555542, "learning_rate": 6.991854336367993e-05, "loss": 0.8483, "step": 22724 }, { "epoch": 1.5397384646656278, "grad_norm": 4.7958807945251465, "learning_rate": 6.991717434458212e-05, "loss": 0.8075, "step": 22725 }, { "epoch": 1.5398062199335998, "grad_norm": 7.294519424438477, "learning_rate": 6.99158053254843e-05, "loss": 0.6439, "step": 22726 }, { "epoch": 1.539873975201572, "grad_norm": 5.149703502655029, "learning_rate": 6.991443630638648e-05, "loss": 0.7318, "step": 22727 }, { "epoch": 1.539941730469544, "grad_norm": 5.0648417472839355, "learning_rate": 6.991306728728866e-05, "loss": 0.5986, "step": 22728 }, { "epoch": 1.540009485737516, "grad_norm": 5.6238555908203125, "learning_rate": 6.991169826819084e-05, "loss": 0.782, "step": 22729 }, { "epoch": 1.5400772410054882, "grad_norm": 5.318024635314941, "learning_rate": 6.991032924909304e-05, "loss": 0.644, "step": 22730 }, { "epoch": 1.5401449962734604, "grad_norm": 6.65131139755249, "learning_rate": 6.990896022999522e-05, "loss": 0.7112, "step": 22731 }, { "epoch": 1.5402127515414323, "grad_norm": 5.360286235809326, "learning_rate": 6.99075912108974e-05, "loss": 0.5774, "step": 22732 }, { "epoch": 1.5402805068094043, "grad_norm": 4.38901424407959, "learning_rate": 6.990622219179958e-05, "loss": 0.5318, "step": 22733 }, { "epoch": 1.5403482620773765, "grad_norm": 5.883554458618164, "learning_rate": 6.990485317270177e-05, "loss": 0.5474, "step": 22734 }, { "epoch": 1.5404160173453487, "grad_norm": 6.453225612640381, "learning_rate": 6.990348415360395e-05, "loss": 0.8017, "step": 22735 }, { "epoch": 1.5404837726133207, "grad_norm": 6.55412483215332, "learning_rate": 6.990211513450613e-05, "loss": 0.6397, "step": 22736 }, { "epoch": 1.5405515278812927, "grad_norm": 5.222686290740967, "learning_rate": 6.990074611540831e-05, "loss": 0.5255, "step": 22737 }, { "epoch": 1.5406192831492649, "grad_norm": 4.613766193389893, "learning_rate": 6.989937709631049e-05, "loss": 0.6851, "step": 22738 }, { "epoch": 1.540687038417237, "grad_norm": 6.946084976196289, "learning_rate": 6.989800807721269e-05, "loss": 0.5744, "step": 22739 }, { "epoch": 1.540754793685209, "grad_norm": 4.892561435699463, "learning_rate": 6.989663905811487e-05, "loss": 0.6431, "step": 22740 }, { "epoch": 1.540822548953181, "grad_norm": 4.871883392333984, "learning_rate": 6.989527003901705e-05, "loss": 0.6858, "step": 22741 }, { "epoch": 1.5408903042211533, "grad_norm": 5.142613887786865, "learning_rate": 6.989390101991923e-05, "loss": 0.5677, "step": 22742 }, { "epoch": 1.5409580594891252, "grad_norm": 4.833871364593506, "learning_rate": 6.989253200082142e-05, "loss": 0.5768, "step": 22743 }, { "epoch": 1.5410258147570972, "grad_norm": 4.8887224197387695, "learning_rate": 6.98911629817236e-05, "loss": 0.5796, "step": 22744 }, { "epoch": 1.5410935700250694, "grad_norm": 6.412557125091553, "learning_rate": 6.988979396262578e-05, "loss": 0.6153, "step": 22745 }, { "epoch": 1.5411613252930416, "grad_norm": 6.001385688781738, "learning_rate": 6.988842494352796e-05, "loss": 0.6734, "step": 22746 }, { "epoch": 1.5412290805610136, "grad_norm": 6.141444206237793, "learning_rate": 6.988705592443014e-05, "loss": 0.6757, "step": 22747 }, { "epoch": 1.5412968358289856, "grad_norm": 4.77182674407959, "learning_rate": 6.988568690533234e-05, "loss": 0.7458, "step": 22748 }, { "epoch": 1.5413645910969578, "grad_norm": 7.0759124755859375, "learning_rate": 6.988431788623452e-05, "loss": 0.5741, "step": 22749 }, { "epoch": 1.54143234636493, "grad_norm": 6.102531433105469, "learning_rate": 6.98829488671367e-05, "loss": 0.56, "step": 22750 }, { "epoch": 1.541500101632902, "grad_norm": 5.876333713531494, "learning_rate": 6.988157984803888e-05, "loss": 0.7497, "step": 22751 }, { "epoch": 1.541567856900874, "grad_norm": 4.801648139953613, "learning_rate": 6.988021082894106e-05, "loss": 0.5532, "step": 22752 }, { "epoch": 1.5416356121688461, "grad_norm": 5.201422691345215, "learning_rate": 6.987884180984325e-05, "loss": 0.7541, "step": 22753 }, { "epoch": 1.5417033674368184, "grad_norm": 3.725203275680542, "learning_rate": 6.987747279074543e-05, "loss": 0.4109, "step": 22754 }, { "epoch": 1.5417711227047903, "grad_norm": 4.350356101989746, "learning_rate": 6.987610377164761e-05, "loss": 0.5473, "step": 22755 }, { "epoch": 1.5418388779727623, "grad_norm": 5.606915473937988, "learning_rate": 6.987473475254979e-05, "loss": 0.4918, "step": 22756 }, { "epoch": 1.5419066332407345, "grad_norm": 4.54536247253418, "learning_rate": 6.987336573345199e-05, "loss": 0.8026, "step": 22757 }, { "epoch": 1.5419743885087067, "grad_norm": 6.194653034210205, "learning_rate": 6.987199671435417e-05, "loss": 0.4947, "step": 22758 }, { "epoch": 1.5420421437766785, "grad_norm": 5.391516208648682, "learning_rate": 6.987062769525635e-05, "loss": 0.6015, "step": 22759 }, { "epoch": 1.5421098990446507, "grad_norm": 10.117959976196289, "learning_rate": 6.986925867615853e-05, "loss": 0.7004, "step": 22760 }, { "epoch": 1.5421776543126229, "grad_norm": 5.1439056396484375, "learning_rate": 6.986788965706071e-05, "loss": 0.7469, "step": 22761 }, { "epoch": 1.5422454095805949, "grad_norm": 6.461843967437744, "learning_rate": 6.98665206379629e-05, "loss": 0.8076, "step": 22762 }, { "epoch": 1.5423131648485668, "grad_norm": 6.288424968719482, "learning_rate": 6.986515161886508e-05, "loss": 0.8396, "step": 22763 }, { "epoch": 1.542380920116539, "grad_norm": 8.15124225616455, "learning_rate": 6.986378259976726e-05, "loss": 0.6216, "step": 22764 }, { "epoch": 1.5424486753845112, "grad_norm": 5.232387542724609, "learning_rate": 6.986241358066946e-05, "loss": 0.5214, "step": 22765 }, { "epoch": 1.5425164306524832, "grad_norm": 12.429887771606445, "learning_rate": 6.986104456157164e-05, "loss": 0.6247, "step": 22766 }, { "epoch": 1.5425841859204552, "grad_norm": 7.120373249053955, "learning_rate": 6.985967554247382e-05, "loss": 0.8061, "step": 22767 }, { "epoch": 1.5426519411884274, "grad_norm": 6.675090789794922, "learning_rate": 6.985830652337601e-05, "loss": 0.5621, "step": 22768 }, { "epoch": 1.5427196964563996, "grad_norm": 5.940648078918457, "learning_rate": 6.985693750427819e-05, "loss": 0.7294, "step": 22769 }, { "epoch": 1.5427874517243716, "grad_norm": 6.0000481605529785, "learning_rate": 6.985556848518037e-05, "loss": 0.5299, "step": 22770 }, { "epoch": 1.5428552069923436, "grad_norm": 6.489519119262695, "learning_rate": 6.985419946608257e-05, "loss": 0.668, "step": 22771 }, { "epoch": 1.5429229622603158, "grad_norm": 7.264878749847412, "learning_rate": 6.985283044698475e-05, "loss": 0.5793, "step": 22772 }, { "epoch": 1.542990717528288, "grad_norm": 7.482590675354004, "learning_rate": 6.985146142788693e-05, "loss": 0.825, "step": 22773 }, { "epoch": 1.54305847279626, "grad_norm": 5.853081703186035, "learning_rate": 6.985009240878911e-05, "loss": 0.8053, "step": 22774 }, { "epoch": 1.543126228064232, "grad_norm": 6.112782001495361, "learning_rate": 6.984872338969129e-05, "loss": 0.6349, "step": 22775 }, { "epoch": 1.5431939833322041, "grad_norm": 6.623645305633545, "learning_rate": 6.984735437059348e-05, "loss": 0.8647, "step": 22776 }, { "epoch": 1.5432617386001761, "grad_norm": 4.543199062347412, "learning_rate": 6.984598535149566e-05, "loss": 0.5979, "step": 22777 }, { "epoch": 1.543329493868148, "grad_norm": 5.48776912689209, "learning_rate": 6.984461633239784e-05, "loss": 0.8818, "step": 22778 }, { "epoch": 1.5433972491361203, "grad_norm": 4.595972061157227, "learning_rate": 6.984324731330002e-05, "loss": 0.6294, "step": 22779 }, { "epoch": 1.5434650044040925, "grad_norm": 4.480964660644531, "learning_rate": 6.984187829420222e-05, "loss": 0.5662, "step": 22780 }, { "epoch": 1.5435327596720645, "grad_norm": 6.2654008865356445, "learning_rate": 6.98405092751044e-05, "loss": 0.7883, "step": 22781 }, { "epoch": 1.5436005149400365, "grad_norm": 6.452686786651611, "learning_rate": 6.983914025600658e-05, "loss": 0.9229, "step": 22782 }, { "epoch": 1.5436682702080087, "grad_norm": 4.913832187652588, "learning_rate": 6.983777123690876e-05, "loss": 0.4728, "step": 22783 }, { "epoch": 1.5437360254759809, "grad_norm": 7.525357723236084, "learning_rate": 6.983640221781094e-05, "loss": 0.5802, "step": 22784 }, { "epoch": 1.5438037807439529, "grad_norm": 5.039961814880371, "learning_rate": 6.983503319871313e-05, "loss": 0.687, "step": 22785 }, { "epoch": 1.5438715360119248, "grad_norm": 10.10362434387207, "learning_rate": 6.983366417961531e-05, "loss": 0.6359, "step": 22786 }, { "epoch": 1.543939291279897, "grad_norm": 6.112766265869141, "learning_rate": 6.983229516051749e-05, "loss": 0.5819, "step": 22787 }, { "epoch": 1.5440070465478692, "grad_norm": 7.330380439758301, "learning_rate": 6.983092614141967e-05, "loss": 0.7501, "step": 22788 }, { "epoch": 1.5440748018158412, "grad_norm": 4.44428825378418, "learning_rate": 6.982955712232187e-05, "loss": 0.7117, "step": 22789 }, { "epoch": 1.5441425570838132, "grad_norm": 9.280022621154785, "learning_rate": 6.982818810322405e-05, "loss": 0.501, "step": 22790 }, { "epoch": 1.5442103123517854, "grad_norm": 5.681324481964111, "learning_rate": 6.982681908412623e-05, "loss": 0.5964, "step": 22791 }, { "epoch": 1.5442780676197574, "grad_norm": 6.7462286949157715, "learning_rate": 6.982545006502841e-05, "loss": 0.7201, "step": 22792 }, { "epoch": 1.5443458228877294, "grad_norm": 6.436206340789795, "learning_rate": 6.982408104593059e-05, "loss": 0.8395, "step": 22793 }, { "epoch": 1.5444135781557016, "grad_norm": 6.667518138885498, "learning_rate": 6.982271202683278e-05, "loss": 0.5415, "step": 22794 }, { "epoch": 1.5444813334236738, "grad_norm": 5.134167194366455, "learning_rate": 6.982134300773496e-05, "loss": 0.7092, "step": 22795 }, { "epoch": 1.5445490886916458, "grad_norm": 4.98570442199707, "learning_rate": 6.981997398863714e-05, "loss": 0.8354, "step": 22796 }, { "epoch": 1.5446168439596177, "grad_norm": 7.355459213256836, "learning_rate": 6.981860496953932e-05, "loss": 0.7638, "step": 22797 }, { "epoch": 1.54468459922759, "grad_norm": 6.554946422576904, "learning_rate": 6.981723595044152e-05, "loss": 0.637, "step": 22798 }, { "epoch": 1.5447523544955621, "grad_norm": 6.986687183380127, "learning_rate": 6.98158669313437e-05, "loss": 0.772, "step": 22799 }, { "epoch": 1.5448201097635341, "grad_norm": 6.526783466339111, "learning_rate": 6.981449791224588e-05, "loss": 0.8433, "step": 22800 }, { "epoch": 1.544887865031506, "grad_norm": 5.674108982086182, "learning_rate": 6.981312889314806e-05, "loss": 0.7299, "step": 22801 }, { "epoch": 1.5449556202994783, "grad_norm": 8.508087158203125, "learning_rate": 6.981175987405024e-05, "loss": 0.8396, "step": 22802 }, { "epoch": 1.5450233755674505, "grad_norm": 4.54975700378418, "learning_rate": 6.981039085495243e-05, "loss": 0.7175, "step": 22803 }, { "epoch": 1.5450911308354225, "grad_norm": 6.193877696990967, "learning_rate": 6.980902183585461e-05, "loss": 0.744, "step": 22804 }, { "epoch": 1.5451588861033945, "grad_norm": 6.690474510192871, "learning_rate": 6.980765281675679e-05, "loss": 0.8198, "step": 22805 }, { "epoch": 1.5452266413713667, "grad_norm": 6.009565830230713, "learning_rate": 6.980628379765897e-05, "loss": 0.5153, "step": 22806 }, { "epoch": 1.5452943966393389, "grad_norm": 7.6128339767456055, "learning_rate": 6.980491477856115e-05, "loss": 0.8796, "step": 22807 }, { "epoch": 1.5453621519073106, "grad_norm": 6.098743915557861, "learning_rate": 6.980354575946335e-05, "loss": 0.7346, "step": 22808 }, { "epoch": 1.5454299071752828, "grad_norm": 7.381857395172119, "learning_rate": 6.980217674036553e-05, "loss": 0.6276, "step": 22809 }, { "epoch": 1.545497662443255, "grad_norm": 9.814278602600098, "learning_rate": 6.980080772126771e-05, "loss": 0.5179, "step": 22810 }, { "epoch": 1.545565417711227, "grad_norm": 4.930272102355957, "learning_rate": 6.979943870216989e-05, "loss": 0.8034, "step": 22811 }, { "epoch": 1.545633172979199, "grad_norm": 6.488096714019775, "learning_rate": 6.979806968307208e-05, "loss": 0.7903, "step": 22812 }, { "epoch": 1.5457009282471712, "grad_norm": 6.713217258453369, "learning_rate": 6.979670066397426e-05, "loss": 0.8936, "step": 22813 }, { "epoch": 1.5457686835151434, "grad_norm": 8.276215553283691, "learning_rate": 6.979533164487644e-05, "loss": 0.6958, "step": 22814 }, { "epoch": 1.5458364387831154, "grad_norm": 6.5031609535217285, "learning_rate": 6.979396262577864e-05, "loss": 0.6989, "step": 22815 }, { "epoch": 1.5459041940510874, "grad_norm": 5.285594463348389, "learning_rate": 6.979259360668082e-05, "loss": 0.7474, "step": 22816 }, { "epoch": 1.5459719493190596, "grad_norm": 5.352348804473877, "learning_rate": 6.9791224587583e-05, "loss": 0.715, "step": 22817 }, { "epoch": 1.5460397045870318, "grad_norm": 5.76752233505249, "learning_rate": 6.978985556848519e-05, "loss": 0.6887, "step": 22818 }, { "epoch": 1.5461074598550038, "grad_norm": 5.4910688400268555, "learning_rate": 6.978848654938737e-05, "loss": 0.65, "step": 22819 }, { "epoch": 1.5461752151229757, "grad_norm": 5.432984828948975, "learning_rate": 6.978711753028955e-05, "loss": 0.6064, "step": 22820 }, { "epoch": 1.546242970390948, "grad_norm": 5.012134552001953, "learning_rate": 6.978574851119175e-05, "loss": 0.604, "step": 22821 }, { "epoch": 1.5463107256589201, "grad_norm": 5.926977634429932, "learning_rate": 6.978437949209393e-05, "loss": 0.7098, "step": 22822 }, { "epoch": 1.5463784809268921, "grad_norm": 4.900906085968018, "learning_rate": 6.97830104729961e-05, "loss": 0.6023, "step": 22823 }, { "epoch": 1.546446236194864, "grad_norm": 5.576010227203369, "learning_rate": 6.978164145389829e-05, "loss": 0.7236, "step": 22824 }, { "epoch": 1.5465139914628363, "grad_norm": 4.740788459777832, "learning_rate": 6.978027243480047e-05, "loss": 0.7141, "step": 22825 }, { "epoch": 1.5465817467308083, "grad_norm": 5.102362632751465, "learning_rate": 6.977890341570266e-05, "loss": 0.6769, "step": 22826 }, { "epoch": 1.5466495019987803, "grad_norm": 7.287747383117676, "learning_rate": 6.977753439660484e-05, "loss": 0.9269, "step": 22827 }, { "epoch": 1.5467172572667525, "grad_norm": 5.8248186111450195, "learning_rate": 6.977616537750702e-05, "loss": 0.5567, "step": 22828 }, { "epoch": 1.5467850125347247, "grad_norm": 4.112695217132568, "learning_rate": 6.97747963584092e-05, "loss": 0.5198, "step": 22829 }, { "epoch": 1.5468527678026966, "grad_norm": 4.029034614562988, "learning_rate": 6.977342733931138e-05, "loss": 0.5441, "step": 22830 }, { "epoch": 1.5469205230706686, "grad_norm": 6.784334182739258, "learning_rate": 6.977205832021358e-05, "loss": 0.6321, "step": 22831 }, { "epoch": 1.5469882783386408, "grad_norm": 3.5104193687438965, "learning_rate": 6.977068930111576e-05, "loss": 0.5428, "step": 22832 }, { "epoch": 1.547056033606613, "grad_norm": 5.869329452514648, "learning_rate": 6.976932028201794e-05, "loss": 0.5986, "step": 22833 }, { "epoch": 1.547123788874585, "grad_norm": 6.353670120239258, "learning_rate": 6.976795126292012e-05, "loss": 0.7585, "step": 22834 }, { "epoch": 1.547191544142557, "grad_norm": 6.307089805603027, "learning_rate": 6.976658224382231e-05, "loss": 0.7967, "step": 22835 }, { "epoch": 1.5472592994105292, "grad_norm": 6.920375347137451, "learning_rate": 6.976521322472449e-05, "loss": 0.8328, "step": 22836 }, { "epoch": 1.5473270546785014, "grad_norm": 6.755915641784668, "learning_rate": 6.976384420562667e-05, "loss": 0.6911, "step": 22837 }, { "epoch": 1.5473948099464734, "grad_norm": 6.169987201690674, "learning_rate": 6.976247518652885e-05, "loss": 0.5811, "step": 22838 }, { "epoch": 1.5474625652144454, "grad_norm": 4.323638439178467, "learning_rate": 6.976110616743103e-05, "loss": 0.4505, "step": 22839 }, { "epoch": 1.5475303204824176, "grad_norm": 5.654609680175781, "learning_rate": 6.975973714833323e-05, "loss": 0.5826, "step": 22840 }, { "epoch": 1.5475980757503895, "grad_norm": 7.010591983795166, "learning_rate": 6.97583681292354e-05, "loss": 0.9222, "step": 22841 }, { "epoch": 1.5476658310183615, "grad_norm": 5.839097499847412, "learning_rate": 6.975699911013759e-05, "loss": 0.5586, "step": 22842 }, { "epoch": 1.5477335862863337, "grad_norm": 4.715919494628906, "learning_rate": 6.975563009103977e-05, "loss": 0.7246, "step": 22843 }, { "epoch": 1.547801341554306, "grad_norm": 6.662782192230225, "learning_rate": 6.975426107194196e-05, "loss": 0.836, "step": 22844 }, { "epoch": 1.547869096822278, "grad_norm": 9.398289680480957, "learning_rate": 6.975289205284414e-05, "loss": 0.7512, "step": 22845 }, { "epoch": 1.54793685209025, "grad_norm": 5.910295486450195, "learning_rate": 6.975152303374632e-05, "loss": 0.798, "step": 22846 }, { "epoch": 1.548004607358222, "grad_norm": 5.635481834411621, "learning_rate": 6.97501540146485e-05, "loss": 0.6917, "step": 22847 }, { "epoch": 1.5480723626261943, "grad_norm": 6.0941267013549805, "learning_rate": 6.974878499555068e-05, "loss": 0.6308, "step": 22848 }, { "epoch": 1.5481401178941663, "grad_norm": 5.014622688293457, "learning_rate": 6.974741597645288e-05, "loss": 0.5895, "step": 22849 }, { "epoch": 1.5482078731621383, "grad_norm": 5.371978282928467, "learning_rate": 6.974604695735506e-05, "loss": 0.6731, "step": 22850 }, { "epoch": 1.5482756284301105, "grad_norm": 7.458617210388184, "learning_rate": 6.974467793825724e-05, "loss": 0.5237, "step": 22851 }, { "epoch": 1.5483433836980827, "grad_norm": 4.966724872589111, "learning_rate": 6.974330891915942e-05, "loss": 0.6155, "step": 22852 }, { "epoch": 1.5484111389660546, "grad_norm": 6.727430820465088, "learning_rate": 6.974193990006161e-05, "loss": 0.5508, "step": 22853 }, { "epoch": 1.5484788942340266, "grad_norm": 5.456569671630859, "learning_rate": 6.974057088096379e-05, "loss": 0.6921, "step": 22854 }, { "epoch": 1.5485466495019988, "grad_norm": 5.0242533683776855, "learning_rate": 6.973920186186597e-05, "loss": 0.6657, "step": 22855 }, { "epoch": 1.548614404769971, "grad_norm": 7.238978385925293, "learning_rate": 6.973783284276815e-05, "loss": 0.5414, "step": 22856 }, { "epoch": 1.5486821600379428, "grad_norm": 5.997554779052734, "learning_rate": 6.973646382367033e-05, "loss": 0.669, "step": 22857 }, { "epoch": 1.548749915305915, "grad_norm": 5.231083393096924, "learning_rate": 6.973509480457253e-05, "loss": 0.7319, "step": 22858 }, { "epoch": 1.5488176705738872, "grad_norm": 4.9231648445129395, "learning_rate": 6.973372578547471e-05, "loss": 0.5749, "step": 22859 }, { "epoch": 1.5488854258418592, "grad_norm": 5.628284454345703, "learning_rate": 6.973235676637689e-05, "loss": 0.5875, "step": 22860 }, { "epoch": 1.5489531811098312, "grad_norm": 5.221574783325195, "learning_rate": 6.973098774727908e-05, "loss": 0.5596, "step": 22861 }, { "epoch": 1.5490209363778034, "grad_norm": 6.102112770080566, "learning_rate": 6.972961872818126e-05, "loss": 0.5672, "step": 22862 }, { "epoch": 1.5490886916457756, "grad_norm": 5.628496170043945, "learning_rate": 6.972824970908344e-05, "loss": 0.8102, "step": 22863 }, { "epoch": 1.5491564469137475, "grad_norm": 6.916885852813721, "learning_rate": 6.972688068998564e-05, "loss": 0.6991, "step": 22864 }, { "epoch": 1.5492242021817195, "grad_norm": 5.651325702667236, "learning_rate": 6.972551167088782e-05, "loss": 0.9127, "step": 22865 }, { "epoch": 1.5492919574496917, "grad_norm": 6.419774055480957, "learning_rate": 6.972414265179e-05, "loss": 0.6729, "step": 22866 }, { "epoch": 1.549359712717664, "grad_norm": 5.124453544616699, "learning_rate": 6.972277363269219e-05, "loss": 0.7664, "step": 22867 }, { "epoch": 1.549427467985636, "grad_norm": 4.981832981109619, "learning_rate": 6.972140461359437e-05, "loss": 0.5302, "step": 22868 }, { "epoch": 1.549495223253608, "grad_norm": 8.013341903686523, "learning_rate": 6.972003559449655e-05, "loss": 0.7466, "step": 22869 }, { "epoch": 1.54956297852158, "grad_norm": 5.821895122528076, "learning_rate": 6.971866657539873e-05, "loss": 0.6614, "step": 22870 }, { "epoch": 1.5496307337895523, "grad_norm": 8.008550643920898, "learning_rate": 6.971729755630091e-05, "loss": 0.7895, "step": 22871 }, { "epoch": 1.5496984890575243, "grad_norm": 5.563565731048584, "learning_rate": 6.97159285372031e-05, "loss": 0.5858, "step": 22872 }, { "epoch": 1.5497662443254963, "grad_norm": 5.383426666259766, "learning_rate": 6.971455951810529e-05, "loss": 0.8376, "step": 22873 }, { "epoch": 1.5498339995934685, "grad_norm": 5.750175476074219, "learning_rate": 6.971319049900747e-05, "loss": 0.7767, "step": 22874 }, { "epoch": 1.5499017548614404, "grad_norm": 6.610818386077881, "learning_rate": 6.971182147990965e-05, "loss": 0.5775, "step": 22875 }, { "epoch": 1.5499695101294124, "grad_norm": 5.527643203735352, "learning_rate": 6.971045246081184e-05, "loss": 0.5992, "step": 22876 }, { "epoch": 1.5500372653973846, "grad_norm": 4.899708271026611, "learning_rate": 6.970908344171402e-05, "loss": 0.5875, "step": 22877 }, { "epoch": 1.5501050206653568, "grad_norm": 7.256153583526611, "learning_rate": 6.97077144226162e-05, "loss": 0.5216, "step": 22878 }, { "epoch": 1.5501727759333288, "grad_norm": 5.674491882324219, "learning_rate": 6.970634540351838e-05, "loss": 0.5817, "step": 22879 }, { "epoch": 1.5502405312013008, "grad_norm": 8.75024127960205, "learning_rate": 6.970497638442056e-05, "loss": 0.5779, "step": 22880 }, { "epoch": 1.550308286469273, "grad_norm": 6.317795276641846, "learning_rate": 6.970360736532276e-05, "loss": 0.7865, "step": 22881 }, { "epoch": 1.5503760417372452, "grad_norm": 4.824620723724365, "learning_rate": 6.970223834622494e-05, "loss": 0.7753, "step": 22882 }, { "epoch": 1.5504437970052172, "grad_norm": 7.179446697235107, "learning_rate": 6.970086932712712e-05, "loss": 0.7514, "step": 22883 }, { "epoch": 1.5505115522731892, "grad_norm": 4.740686893463135, "learning_rate": 6.96995003080293e-05, "loss": 0.5625, "step": 22884 }, { "epoch": 1.5505793075411614, "grad_norm": 6.539283752441406, "learning_rate": 6.969813128893148e-05, "loss": 0.4674, "step": 22885 }, { "epoch": 1.5506470628091336, "grad_norm": 5.1155009269714355, "learning_rate": 6.969676226983367e-05, "loss": 0.541, "step": 22886 }, { "epoch": 1.5507148180771055, "grad_norm": 7.341643810272217, "learning_rate": 6.969539325073585e-05, "loss": 0.7193, "step": 22887 }, { "epoch": 1.5507825733450775, "grad_norm": 6.033017635345459, "learning_rate": 6.969402423163803e-05, "loss": 0.6433, "step": 22888 }, { "epoch": 1.5508503286130497, "grad_norm": 7.396689414978027, "learning_rate": 6.969265521254021e-05, "loss": 0.7365, "step": 22889 }, { "epoch": 1.5509180838810217, "grad_norm": 9.500696182250977, "learning_rate": 6.96912861934424e-05, "loss": 0.7393, "step": 22890 }, { "epoch": 1.5509858391489937, "grad_norm": 9.363235473632812, "learning_rate": 6.968991717434459e-05, "loss": 0.4899, "step": 22891 }, { "epoch": 1.5510535944169659, "grad_norm": 8.230768203735352, "learning_rate": 6.968854815524677e-05, "loss": 0.6171, "step": 22892 }, { "epoch": 1.551121349684938, "grad_norm": 6.019417762756348, "learning_rate": 6.968717913614895e-05, "loss": 0.811, "step": 22893 }, { "epoch": 1.55118910495291, "grad_norm": 7.502788543701172, "learning_rate": 6.968581011705113e-05, "loss": 0.7988, "step": 22894 }, { "epoch": 1.551256860220882, "grad_norm": 5.405455589294434, "learning_rate": 6.968444109795332e-05, "loss": 0.6973, "step": 22895 }, { "epoch": 1.5513246154888543, "grad_norm": 8.637812614440918, "learning_rate": 6.96830720788555e-05, "loss": 0.6486, "step": 22896 }, { "epoch": 1.5513923707568265, "grad_norm": 5.3299784660339355, "learning_rate": 6.968170305975768e-05, "loss": 0.78, "step": 22897 }, { "epoch": 1.5514601260247984, "grad_norm": 4.934391498565674, "learning_rate": 6.968033404065986e-05, "loss": 0.6073, "step": 22898 }, { "epoch": 1.5515278812927704, "grad_norm": 5.697473049163818, "learning_rate": 6.967896502156206e-05, "loss": 0.4224, "step": 22899 }, { "epoch": 1.5515956365607426, "grad_norm": 4.9552693367004395, "learning_rate": 6.967759600246424e-05, "loss": 0.5327, "step": 22900 }, { "epoch": 1.5516633918287148, "grad_norm": 14.300459861755371, "learning_rate": 6.967622698336642e-05, "loss": 0.6281, "step": 22901 }, { "epoch": 1.5517311470966868, "grad_norm": 6.0964813232421875, "learning_rate": 6.96748579642686e-05, "loss": 0.6034, "step": 22902 }, { "epoch": 1.5517989023646588, "grad_norm": 10.064780235290527, "learning_rate": 6.967348894517078e-05, "loss": 0.6734, "step": 22903 }, { "epoch": 1.551866657632631, "grad_norm": 5.763314723968506, "learning_rate": 6.967211992607297e-05, "loss": 0.7331, "step": 22904 }, { "epoch": 1.551934412900603, "grad_norm": 5.868747234344482, "learning_rate": 6.967075090697515e-05, "loss": 0.7055, "step": 22905 }, { "epoch": 1.552002168168575, "grad_norm": 5.09495735168457, "learning_rate": 6.966938188787733e-05, "loss": 0.8904, "step": 22906 }, { "epoch": 1.5520699234365471, "grad_norm": 5.22659158706665, "learning_rate": 6.966801286877953e-05, "loss": 0.5788, "step": 22907 }, { "epoch": 1.5521376787045194, "grad_norm": 5.142848014831543, "learning_rate": 6.96666438496817e-05, "loss": 0.7075, "step": 22908 }, { "epoch": 1.5522054339724913, "grad_norm": 4.5463480949401855, "learning_rate": 6.966527483058389e-05, "loss": 0.6302, "step": 22909 }, { "epoch": 1.5522731892404633, "grad_norm": 4.888208389282227, "learning_rate": 6.966390581148608e-05, "loss": 0.5814, "step": 22910 }, { "epoch": 1.5523409445084355, "grad_norm": 6.547183513641357, "learning_rate": 6.966253679238826e-05, "loss": 0.7755, "step": 22911 }, { "epoch": 1.5524086997764077, "grad_norm": 5.220139503479004, "learning_rate": 6.966116777329044e-05, "loss": 0.5403, "step": 22912 }, { "epoch": 1.5524764550443797, "grad_norm": 5.74117374420166, "learning_rate": 6.965979875419264e-05, "loss": 0.7256, "step": 22913 }, { "epoch": 1.5525442103123517, "grad_norm": 4.913910865783691, "learning_rate": 6.965842973509482e-05, "loss": 0.6385, "step": 22914 }, { "epoch": 1.5526119655803239, "grad_norm": 6.618523597717285, "learning_rate": 6.9657060715997e-05, "loss": 0.462, "step": 22915 }, { "epoch": 1.552679720848296, "grad_norm": 4.9656758308410645, "learning_rate": 6.965569169689918e-05, "loss": 0.6329, "step": 22916 }, { "epoch": 1.552747476116268, "grad_norm": 8.323156356811523, "learning_rate": 6.965432267780136e-05, "loss": 0.5001, "step": 22917 }, { "epoch": 1.55281523138424, "grad_norm": 4.925621509552002, "learning_rate": 6.965295365870355e-05, "loss": 0.6666, "step": 22918 }, { "epoch": 1.5528829866522122, "grad_norm": 6.43455171585083, "learning_rate": 6.965158463960573e-05, "loss": 0.7537, "step": 22919 }, { "epoch": 1.5529507419201845, "grad_norm": 5.8788933753967285, "learning_rate": 6.965021562050791e-05, "loss": 0.5765, "step": 22920 }, { "epoch": 1.5530184971881564, "grad_norm": 4.524697780609131, "learning_rate": 6.964884660141009e-05, "loss": 0.7419, "step": 22921 }, { "epoch": 1.5530862524561284, "grad_norm": 3.7928884029388428, "learning_rate": 6.964747758231229e-05, "loss": 0.6797, "step": 22922 }, { "epoch": 1.5531540077241006, "grad_norm": 7.946840286254883, "learning_rate": 6.964610856321447e-05, "loss": 0.8545, "step": 22923 }, { "epoch": 1.5532217629920726, "grad_norm": 5.44534158706665, "learning_rate": 6.964473954411665e-05, "loss": 0.7446, "step": 22924 }, { "epoch": 1.5532895182600446, "grad_norm": 4.381308555603027, "learning_rate": 6.964337052501883e-05, "loss": 0.6725, "step": 22925 }, { "epoch": 1.5533572735280168, "grad_norm": 6.133522987365723, "learning_rate": 6.9642001505921e-05, "loss": 0.5149, "step": 22926 }, { "epoch": 1.553425028795989, "grad_norm": 6.905616760253906, "learning_rate": 6.96406324868232e-05, "loss": 0.6946, "step": 22927 }, { "epoch": 1.553492784063961, "grad_norm": 5.2961530685424805, "learning_rate": 6.963926346772538e-05, "loss": 0.5556, "step": 22928 }, { "epoch": 1.553560539331933, "grad_norm": 8.618420600891113, "learning_rate": 6.963789444862756e-05, "loss": 0.8473, "step": 22929 }, { "epoch": 1.5536282945999051, "grad_norm": 4.090160846710205, "learning_rate": 6.963652542952974e-05, "loss": 0.6992, "step": 22930 }, { "epoch": 1.5536960498678773, "grad_norm": 6.597294807434082, "learning_rate": 6.963515641043194e-05, "loss": 0.9378, "step": 22931 }, { "epoch": 1.5537638051358493, "grad_norm": 8.417004585266113, "learning_rate": 6.963378739133412e-05, "loss": 0.655, "step": 22932 }, { "epoch": 1.5538315604038213, "grad_norm": 6.700901031494141, "learning_rate": 6.96324183722363e-05, "loss": 0.634, "step": 22933 }, { "epoch": 1.5538993156717935, "grad_norm": 7.631453037261963, "learning_rate": 6.963104935313848e-05, "loss": 0.6162, "step": 22934 }, { "epoch": 1.5539670709397657, "grad_norm": 8.01236629486084, "learning_rate": 6.962968033404066e-05, "loss": 0.7041, "step": 22935 }, { "epoch": 1.5540348262077377, "grad_norm": 5.591830730438232, "learning_rate": 6.962831131494285e-05, "loss": 0.4354, "step": 22936 }, { "epoch": 1.5541025814757097, "grad_norm": 6.470213890075684, "learning_rate": 6.962694229584503e-05, "loss": 0.7326, "step": 22937 }, { "epoch": 1.5541703367436819, "grad_norm": 5.343122482299805, "learning_rate": 6.962557327674721e-05, "loss": 0.6453, "step": 22938 }, { "epoch": 1.5542380920116539, "grad_norm": 4.678362846374512, "learning_rate": 6.962420425764939e-05, "loss": 0.6363, "step": 22939 }, { "epoch": 1.5543058472796258, "grad_norm": 6.415695667266846, "learning_rate": 6.962283523855157e-05, "loss": 0.7744, "step": 22940 }, { "epoch": 1.554373602547598, "grad_norm": 4.751974582672119, "learning_rate": 6.962146621945377e-05, "loss": 0.6912, "step": 22941 }, { "epoch": 1.5544413578155702, "grad_norm": 6.279700756072998, "learning_rate": 6.962009720035595e-05, "loss": 0.6235, "step": 22942 }, { "epoch": 1.5545091130835422, "grad_norm": 6.903271198272705, "learning_rate": 6.961872818125813e-05, "loss": 0.8095, "step": 22943 }, { "epoch": 1.5545768683515142, "grad_norm": 5.657155513763428, "learning_rate": 6.961735916216031e-05, "loss": 0.6912, "step": 22944 }, { "epoch": 1.5546446236194864, "grad_norm": 6.83554220199585, "learning_rate": 6.96159901430625e-05, "loss": 0.5521, "step": 22945 }, { "epoch": 1.5547123788874586, "grad_norm": 8.965258598327637, "learning_rate": 6.961462112396468e-05, "loss": 0.7283, "step": 22946 }, { "epoch": 1.5547801341554306, "grad_norm": 5.204257011413574, "learning_rate": 6.961325210486686e-05, "loss": 0.7182, "step": 22947 }, { "epoch": 1.5548478894234026, "grad_norm": 5.924692153930664, "learning_rate": 6.961188308576904e-05, "loss": 0.6772, "step": 22948 }, { "epoch": 1.5549156446913748, "grad_norm": 10.214069366455078, "learning_rate": 6.961051406667122e-05, "loss": 0.6887, "step": 22949 }, { "epoch": 1.554983399959347, "grad_norm": 6.267570495605469, "learning_rate": 6.960914504757342e-05, "loss": 0.6182, "step": 22950 }, { "epoch": 1.555051155227319, "grad_norm": 4.7631306648254395, "learning_rate": 6.96077760284756e-05, "loss": 0.7565, "step": 22951 }, { "epoch": 1.555118910495291, "grad_norm": 6.165985584259033, "learning_rate": 6.960640700937778e-05, "loss": 0.801, "step": 22952 }, { "epoch": 1.5551866657632631, "grad_norm": 6.165381908416748, "learning_rate": 6.960503799027997e-05, "loss": 0.6632, "step": 22953 }, { "epoch": 1.5552544210312351, "grad_norm": 6.269731521606445, "learning_rate": 6.960366897118215e-05, "loss": 0.5848, "step": 22954 }, { "epoch": 1.555322176299207, "grad_norm": 8.338239669799805, "learning_rate": 6.960229995208433e-05, "loss": 0.5119, "step": 22955 }, { "epoch": 1.5553899315671793, "grad_norm": 8.706860542297363, "learning_rate": 6.960093093298653e-05, "loss": 0.4472, "step": 22956 }, { "epoch": 1.5554576868351515, "grad_norm": 5.896061420440674, "learning_rate": 6.95995619138887e-05, "loss": 0.5175, "step": 22957 }, { "epoch": 1.5555254421031235, "grad_norm": 5.299034595489502, "learning_rate": 6.959819289479089e-05, "loss": 0.4446, "step": 22958 }, { "epoch": 1.5555931973710955, "grad_norm": 4.742358207702637, "learning_rate": 6.959682387569308e-05, "loss": 0.6318, "step": 22959 }, { "epoch": 1.5556609526390677, "grad_norm": 5.286736488342285, "learning_rate": 6.959545485659526e-05, "loss": 0.6602, "step": 22960 }, { "epoch": 1.5557287079070399, "grad_norm": 5.2145233154296875, "learning_rate": 6.959408583749744e-05, "loss": 0.666, "step": 22961 }, { "epoch": 1.5557964631750119, "grad_norm": 6.872986316680908, "learning_rate": 6.959271681839962e-05, "loss": 0.6452, "step": 22962 }, { "epoch": 1.5558642184429838, "grad_norm": 4.895934104919434, "learning_rate": 6.95913477993018e-05, "loss": 0.9016, "step": 22963 }, { "epoch": 1.555931973710956, "grad_norm": 5.867410659790039, "learning_rate": 6.9589978780204e-05, "loss": 0.8369, "step": 22964 }, { "epoch": 1.5559997289789282, "grad_norm": 7.243858337402344, "learning_rate": 6.958860976110618e-05, "loss": 0.5562, "step": 22965 }, { "epoch": 1.5560674842469002, "grad_norm": 7.75425386428833, "learning_rate": 6.958724074200836e-05, "loss": 0.6126, "step": 22966 }, { "epoch": 1.5561352395148722, "grad_norm": 5.747697353363037, "learning_rate": 6.958587172291054e-05, "loss": 0.7775, "step": 22967 }, { "epoch": 1.5562029947828444, "grad_norm": 6.530494689941406, "learning_rate": 6.958450270381273e-05, "loss": 0.654, "step": 22968 }, { "epoch": 1.5562707500508166, "grad_norm": 6.505775451660156, "learning_rate": 6.958313368471491e-05, "loss": 0.8015, "step": 22969 }, { "epoch": 1.5563385053187886, "grad_norm": 5.626256942749023, "learning_rate": 6.958176466561709e-05, "loss": 0.5463, "step": 22970 }, { "epoch": 1.5564062605867606, "grad_norm": 4.677628993988037, "learning_rate": 6.958039564651927e-05, "loss": 0.5353, "step": 22971 }, { "epoch": 1.5564740158547328, "grad_norm": 5.6429243087768555, "learning_rate": 6.957902662742145e-05, "loss": 0.7972, "step": 22972 }, { "epoch": 1.5565417711227048, "grad_norm": 5.64269495010376, "learning_rate": 6.957765760832365e-05, "loss": 0.622, "step": 22973 }, { "epoch": 1.5566095263906767, "grad_norm": 6.696455955505371, "learning_rate": 6.957628858922583e-05, "loss": 0.669, "step": 22974 }, { "epoch": 1.556677281658649, "grad_norm": 6.871999740600586, "learning_rate": 6.9574919570128e-05, "loss": 0.6162, "step": 22975 }, { "epoch": 1.5567450369266211, "grad_norm": 6.070414066314697, "learning_rate": 6.957355055103019e-05, "loss": 0.6546, "step": 22976 }, { "epoch": 1.5568127921945931, "grad_norm": 7.870287895202637, "learning_rate": 6.957218153193238e-05, "loss": 0.6818, "step": 22977 }, { "epoch": 1.556880547462565, "grad_norm": 5.515468120574951, "learning_rate": 6.957081251283456e-05, "loss": 0.6596, "step": 22978 }, { "epoch": 1.5569483027305373, "grad_norm": 6.090325355529785, "learning_rate": 6.956944349373674e-05, "loss": 0.7969, "step": 22979 }, { "epoch": 1.5570160579985095, "grad_norm": 5.682682037353516, "learning_rate": 6.956807447463892e-05, "loss": 0.8196, "step": 22980 }, { "epoch": 1.5570838132664815, "grad_norm": 5.912393569946289, "learning_rate": 6.95667054555411e-05, "loss": 0.7736, "step": 22981 }, { "epoch": 1.5571515685344535, "grad_norm": 6.311899662017822, "learning_rate": 6.95653364364433e-05, "loss": 0.872, "step": 22982 }, { "epoch": 1.5572193238024257, "grad_norm": 4.768764019012451, "learning_rate": 6.956396741734548e-05, "loss": 0.678, "step": 22983 }, { "epoch": 1.5572870790703979, "grad_norm": 5.141350746154785, "learning_rate": 6.956259839824766e-05, "loss": 0.6446, "step": 22984 }, { "epoch": 1.5573548343383699, "grad_norm": 5.541138648986816, "learning_rate": 6.956122937914984e-05, "loss": 0.7419, "step": 22985 }, { "epoch": 1.5574225896063418, "grad_norm": 4.94514799118042, "learning_rate": 6.955986036005203e-05, "loss": 0.7667, "step": 22986 }, { "epoch": 1.557490344874314, "grad_norm": 5.17431640625, "learning_rate": 6.955849134095421e-05, "loss": 0.7393, "step": 22987 }, { "epoch": 1.557558100142286, "grad_norm": 4.772801876068115, "learning_rate": 6.955712232185639e-05, "loss": 0.4294, "step": 22988 }, { "epoch": 1.557625855410258, "grad_norm": 4.8459601402282715, "learning_rate": 6.955575330275857e-05, "loss": 0.6517, "step": 22989 }, { "epoch": 1.5576936106782302, "grad_norm": 5.010031223297119, "learning_rate": 6.955438428366075e-05, "loss": 0.9243, "step": 22990 }, { "epoch": 1.5577613659462024, "grad_norm": 4.776615142822266, "learning_rate": 6.955301526456295e-05, "loss": 0.5768, "step": 22991 }, { "epoch": 1.5578291212141744, "grad_norm": 5.087480545043945, "learning_rate": 6.955164624546513e-05, "loss": 0.5644, "step": 22992 }, { "epoch": 1.5578968764821464, "grad_norm": 5.679810523986816, "learning_rate": 6.95502772263673e-05, "loss": 0.6583, "step": 22993 }, { "epoch": 1.5579646317501186, "grad_norm": 6.166821479797363, "learning_rate": 6.954890820726949e-05, "loss": 0.5384, "step": 22994 }, { "epoch": 1.5580323870180908, "grad_norm": 5.199182987213135, "learning_rate": 6.954753918817167e-05, "loss": 0.5047, "step": 22995 }, { "epoch": 1.5581001422860628, "grad_norm": 5.630538463592529, "learning_rate": 6.954617016907386e-05, "loss": 0.5135, "step": 22996 }, { "epoch": 1.5581678975540347, "grad_norm": 4.790014266967773, "learning_rate": 6.954480114997604e-05, "loss": 0.4848, "step": 22997 }, { "epoch": 1.558235652822007, "grad_norm": 4.634469985961914, "learning_rate": 6.954343213087822e-05, "loss": 0.5942, "step": 22998 }, { "epoch": 1.5583034080899791, "grad_norm": 5.5768609046936035, "learning_rate": 6.954206311178042e-05, "loss": 0.6965, "step": 22999 }, { "epoch": 1.5583711633579511, "grad_norm": 5.097869873046875, "learning_rate": 6.95406940926826e-05, "loss": 0.7572, "step": 23000 }, { "epoch": 1.558438918625923, "grad_norm": 5.287055492401123, "learning_rate": 6.953932507358478e-05, "loss": 0.6225, "step": 23001 }, { "epoch": 1.5585066738938953, "grad_norm": 6.704644203186035, "learning_rate": 6.953795605448697e-05, "loss": 0.5958, "step": 23002 }, { "epoch": 1.5585744291618673, "grad_norm": 5.449206829071045, "learning_rate": 6.953658703538915e-05, "loss": 0.6641, "step": 23003 }, { "epoch": 1.5586421844298393, "grad_norm": 5.7397332191467285, "learning_rate": 6.953521801629133e-05, "loss": 0.8075, "step": 23004 }, { "epoch": 1.5587099396978115, "grad_norm": 5.882147312164307, "learning_rate": 6.953384899719352e-05, "loss": 0.5603, "step": 23005 }, { "epoch": 1.5587776949657837, "grad_norm": 11.437142372131348, "learning_rate": 6.95324799780957e-05, "loss": 0.7027, "step": 23006 }, { "epoch": 1.5588454502337556, "grad_norm": 4.872852802276611, "learning_rate": 6.953111095899789e-05, "loss": 0.5409, "step": 23007 }, { "epoch": 1.5589132055017276, "grad_norm": 6.0569748878479, "learning_rate": 6.952974193990007e-05, "loss": 0.5433, "step": 23008 }, { "epoch": 1.5589809607696998, "grad_norm": 6.669095516204834, "learning_rate": 6.952837292080226e-05, "loss": 0.4753, "step": 23009 }, { "epoch": 1.559048716037672, "grad_norm": 6.855456352233887, "learning_rate": 6.952700390170444e-05, "loss": 0.6907, "step": 23010 }, { "epoch": 1.559116471305644, "grad_norm": 6.425726413726807, "learning_rate": 6.952563488260662e-05, "loss": 0.9886, "step": 23011 }, { "epoch": 1.559184226573616, "grad_norm": 6.336601734161377, "learning_rate": 6.95242658635088e-05, "loss": 0.6053, "step": 23012 }, { "epoch": 1.5592519818415882, "grad_norm": 5.87202262878418, "learning_rate": 6.952289684441098e-05, "loss": 0.54, "step": 23013 }, { "epoch": 1.5593197371095604, "grad_norm": 5.117269039154053, "learning_rate": 6.952152782531317e-05, "loss": 0.5863, "step": 23014 }, { "epoch": 1.5593874923775324, "grad_norm": 8.808147430419922, "learning_rate": 6.952015880621536e-05, "loss": 0.7313, "step": 23015 }, { "epoch": 1.5594552476455044, "grad_norm": 11.07321834564209, "learning_rate": 6.951878978711754e-05, "loss": 0.656, "step": 23016 }, { "epoch": 1.5595230029134766, "grad_norm": 5.772731304168701, "learning_rate": 6.951742076801972e-05, "loss": 0.64, "step": 23017 }, { "epoch": 1.5595907581814488, "grad_norm": 4.789035320281982, "learning_rate": 6.95160517489219e-05, "loss": 0.9042, "step": 23018 }, { "epoch": 1.5596585134494207, "grad_norm": 5.848088264465332, "learning_rate": 6.951468272982409e-05, "loss": 0.7198, "step": 23019 }, { "epoch": 1.5597262687173927, "grad_norm": 6.563215732574463, "learning_rate": 6.951331371072627e-05, "loss": 0.7933, "step": 23020 }, { "epoch": 1.559794023985365, "grad_norm": 5.688724040985107, "learning_rate": 6.951194469162845e-05, "loss": 0.7032, "step": 23021 }, { "epoch": 1.559861779253337, "grad_norm": 5.65102481842041, "learning_rate": 6.951057567253063e-05, "loss": 0.5722, "step": 23022 }, { "epoch": 1.559929534521309, "grad_norm": 6.1572089195251465, "learning_rate": 6.950920665343283e-05, "loss": 0.7317, "step": 23023 }, { "epoch": 1.559997289789281, "grad_norm": 6.804615020751953, "learning_rate": 6.9507837634335e-05, "loss": 0.9452, "step": 23024 }, { "epoch": 1.5600650450572533, "grad_norm": 5.338657855987549, "learning_rate": 6.950646861523719e-05, "loss": 0.6702, "step": 23025 }, { "epoch": 1.5601328003252253, "grad_norm": 5.478714942932129, "learning_rate": 6.950509959613937e-05, "loss": 0.6809, "step": 23026 }, { "epoch": 1.5602005555931973, "grad_norm": 4.142380237579346, "learning_rate": 6.950373057704155e-05, "loss": 0.6472, "step": 23027 }, { "epoch": 1.5602683108611695, "grad_norm": 5.571270942687988, "learning_rate": 6.950236155794374e-05, "loss": 0.5548, "step": 23028 }, { "epoch": 1.5603360661291417, "grad_norm": 7.114266395568848, "learning_rate": 6.950099253884592e-05, "loss": 0.5876, "step": 23029 }, { "epoch": 1.5604038213971136, "grad_norm": 6.619661808013916, "learning_rate": 6.94996235197481e-05, "loss": 0.8395, "step": 23030 }, { "epoch": 1.5604715766650856, "grad_norm": 8.856958389282227, "learning_rate": 6.949825450065028e-05, "loss": 0.7196, "step": 23031 }, { "epoch": 1.5605393319330578, "grad_norm": 5.4331207275390625, "learning_rate": 6.949688548155248e-05, "loss": 0.6053, "step": 23032 }, { "epoch": 1.56060708720103, "grad_norm": 7.402438163757324, "learning_rate": 6.949551646245466e-05, "loss": 0.7566, "step": 23033 }, { "epoch": 1.560674842469002, "grad_norm": 6.066718578338623, "learning_rate": 6.949414744335684e-05, "loss": 0.7111, "step": 23034 }, { "epoch": 1.560742597736974, "grad_norm": 4.5193891525268555, "learning_rate": 6.949277842425902e-05, "loss": 0.5378, "step": 23035 }, { "epoch": 1.5608103530049462, "grad_norm": 6.2976179122924805, "learning_rate": 6.94914094051612e-05, "loss": 0.6654, "step": 23036 }, { "epoch": 1.5608781082729182, "grad_norm": 8.404053688049316, "learning_rate": 6.949004038606339e-05, "loss": 0.8766, "step": 23037 }, { "epoch": 1.5609458635408902, "grad_norm": 5.123511791229248, "learning_rate": 6.948867136696557e-05, "loss": 0.4243, "step": 23038 }, { "epoch": 1.5610136188088624, "grad_norm": 4.545421600341797, "learning_rate": 6.948730234786775e-05, "loss": 0.6162, "step": 23039 }, { "epoch": 1.5610813740768346, "grad_norm": 5.2261505126953125, "learning_rate": 6.948593332876993e-05, "loss": 0.5476, "step": 23040 }, { "epoch": 1.5611491293448065, "grad_norm": 5.637330055236816, "learning_rate": 6.948456430967211e-05, "loss": 0.7822, "step": 23041 }, { "epoch": 1.5612168846127785, "grad_norm": 5.146833419799805, "learning_rate": 6.94831952905743e-05, "loss": 0.793, "step": 23042 }, { "epoch": 1.5612846398807507, "grad_norm": 6.0013275146484375, "learning_rate": 6.948182627147649e-05, "loss": 0.7117, "step": 23043 }, { "epoch": 1.561352395148723, "grad_norm": 4.48512601852417, "learning_rate": 6.948045725237867e-05, "loss": 0.7411, "step": 23044 }, { "epoch": 1.561420150416695, "grad_norm": 5.404778480529785, "learning_rate": 6.947908823328085e-05, "loss": 0.6076, "step": 23045 }, { "epoch": 1.5614879056846669, "grad_norm": 6.4304399490356445, "learning_rate": 6.947771921418304e-05, "loss": 0.5514, "step": 23046 }, { "epoch": 1.561555660952639, "grad_norm": 4.997159957885742, "learning_rate": 6.947635019508522e-05, "loss": 0.5436, "step": 23047 }, { "epoch": 1.5616234162206113, "grad_norm": 6.29677677154541, "learning_rate": 6.947498117598741e-05, "loss": 0.7604, "step": 23048 }, { "epoch": 1.5616911714885833, "grad_norm": 6.7044548988342285, "learning_rate": 6.94736121568896e-05, "loss": 0.7946, "step": 23049 }, { "epoch": 1.5617589267565553, "grad_norm": 4.592782020568848, "learning_rate": 6.947224313779178e-05, "loss": 0.7137, "step": 23050 }, { "epoch": 1.5618266820245275, "grad_norm": 4.340238094329834, "learning_rate": 6.947087411869397e-05, "loss": 0.557, "step": 23051 }, { "epoch": 1.5618944372924994, "grad_norm": 4.515213489532471, "learning_rate": 6.946950509959615e-05, "loss": 0.537, "step": 23052 }, { "epoch": 1.5619621925604714, "grad_norm": 6.154815196990967, "learning_rate": 6.946813608049833e-05, "loss": 0.5667, "step": 23053 }, { "epoch": 1.5620299478284436, "grad_norm": 6.747567653656006, "learning_rate": 6.946676706140051e-05, "loss": 0.9413, "step": 23054 }, { "epoch": 1.5620977030964158, "grad_norm": 6.009265899658203, "learning_rate": 6.94653980423027e-05, "loss": 0.5767, "step": 23055 }, { "epoch": 1.5621654583643878, "grad_norm": 6.456292629241943, "learning_rate": 6.946402902320488e-05, "loss": 0.6855, "step": 23056 }, { "epoch": 1.5622332136323598, "grad_norm": 7.14618444442749, "learning_rate": 6.946266000410707e-05, "loss": 0.6726, "step": 23057 }, { "epoch": 1.562300968900332, "grad_norm": 5.69216775894165, "learning_rate": 6.946129098500925e-05, "loss": 0.7585, "step": 23058 }, { "epoch": 1.5623687241683042, "grad_norm": 6.227018356323242, "learning_rate": 6.945992196591143e-05, "loss": 0.7307, "step": 23059 }, { "epoch": 1.5624364794362762, "grad_norm": 4.853418350219727, "learning_rate": 6.945855294681362e-05, "loss": 0.6817, "step": 23060 }, { "epoch": 1.5625042347042482, "grad_norm": 4.889648914337158, "learning_rate": 6.94571839277158e-05, "loss": 0.468, "step": 23061 }, { "epoch": 1.5625719899722204, "grad_norm": 5.58096981048584, "learning_rate": 6.945581490861798e-05, "loss": 0.5904, "step": 23062 }, { "epoch": 1.5626397452401926, "grad_norm": 5.775217056274414, "learning_rate": 6.945444588952016e-05, "loss": 0.7427, "step": 23063 }, { "epoch": 1.5627075005081645, "grad_norm": 8.675199508666992, "learning_rate": 6.945307687042235e-05, "loss": 0.7465, "step": 23064 }, { "epoch": 1.5627752557761365, "grad_norm": 6.030036926269531, "learning_rate": 6.945170785132453e-05, "loss": 0.6048, "step": 23065 }, { "epoch": 1.5628430110441087, "grad_norm": 7.775744915008545, "learning_rate": 6.945033883222672e-05, "loss": 0.6476, "step": 23066 }, { "epoch": 1.562910766312081, "grad_norm": 5.312354564666748, "learning_rate": 6.94489698131289e-05, "loss": 0.6165, "step": 23067 }, { "epoch": 1.562978521580053, "grad_norm": 8.013888359069824, "learning_rate": 6.944760079403108e-05, "loss": 0.8628, "step": 23068 }, { "epoch": 1.5630462768480249, "grad_norm": 5.559372425079346, "learning_rate": 6.944623177493327e-05, "loss": 0.5954, "step": 23069 }, { "epoch": 1.563114032115997, "grad_norm": 6.006679058074951, "learning_rate": 6.944486275583545e-05, "loss": 0.6468, "step": 23070 }, { "epoch": 1.563181787383969, "grad_norm": 5.3813157081604, "learning_rate": 6.944349373673763e-05, "loss": 0.5163, "step": 23071 }, { "epoch": 1.563249542651941, "grad_norm": 5.673634052276611, "learning_rate": 6.944212471763981e-05, "loss": 0.7469, "step": 23072 }, { "epoch": 1.5633172979199133, "grad_norm": 6.378027439117432, "learning_rate": 6.944075569854199e-05, "loss": 0.8334, "step": 23073 }, { "epoch": 1.5633850531878855, "grad_norm": 4.59268045425415, "learning_rate": 6.943938667944419e-05, "loss": 0.6729, "step": 23074 }, { "epoch": 1.5634528084558574, "grad_norm": 5.801781177520752, "learning_rate": 6.943801766034637e-05, "loss": 0.5111, "step": 23075 }, { "epoch": 1.5635205637238294, "grad_norm": 5.474250316619873, "learning_rate": 6.943664864124855e-05, "loss": 0.7842, "step": 23076 }, { "epoch": 1.5635883189918016, "grad_norm": 7.139461994171143, "learning_rate": 6.943527962215073e-05, "loss": 0.9001, "step": 23077 }, { "epoch": 1.5636560742597738, "grad_norm": 4.895784378051758, "learning_rate": 6.943391060305292e-05, "loss": 0.6541, "step": 23078 }, { "epoch": 1.5637238295277458, "grad_norm": 3.821998119354248, "learning_rate": 6.94325415839551e-05, "loss": 0.5329, "step": 23079 }, { "epoch": 1.5637915847957178, "grad_norm": 5.580734729766846, "learning_rate": 6.943117256485728e-05, "loss": 0.5946, "step": 23080 }, { "epoch": 1.56385934006369, "grad_norm": 6.179320335388184, "learning_rate": 6.942980354575946e-05, "loss": 0.8187, "step": 23081 }, { "epoch": 1.5639270953316622, "grad_norm": 9.023565292358398, "learning_rate": 6.942843452666164e-05, "loss": 0.6519, "step": 23082 }, { "epoch": 1.5639948505996342, "grad_norm": 6.453300476074219, "learning_rate": 6.942706550756384e-05, "loss": 0.6083, "step": 23083 }, { "epoch": 1.5640626058676061, "grad_norm": 6.478032112121582, "learning_rate": 6.942569648846602e-05, "loss": 0.945, "step": 23084 }, { "epoch": 1.5641303611355784, "grad_norm": 6.357826232910156, "learning_rate": 6.94243274693682e-05, "loss": 0.7252, "step": 23085 }, { "epoch": 1.5641981164035503, "grad_norm": 5.602384567260742, "learning_rate": 6.942295845027038e-05, "loss": 0.7994, "step": 23086 }, { "epoch": 1.5642658716715223, "grad_norm": 4.214155673980713, "learning_rate": 6.942158943117257e-05, "loss": 0.755, "step": 23087 }, { "epoch": 1.5643336269394945, "grad_norm": 6.149993896484375, "learning_rate": 6.942022041207475e-05, "loss": 0.5499, "step": 23088 }, { "epoch": 1.5644013822074667, "grad_norm": 7.872978687286377, "learning_rate": 6.941885139297693e-05, "loss": 0.4973, "step": 23089 }, { "epoch": 1.5644691374754387, "grad_norm": 5.1022629737854, "learning_rate": 6.941748237387911e-05, "loss": 0.6975, "step": 23090 }, { "epoch": 1.5645368927434107, "grad_norm": 5.628147125244141, "learning_rate": 6.941611335478129e-05, "loss": 0.4676, "step": 23091 }, { "epoch": 1.5646046480113829, "grad_norm": 6.358670234680176, "learning_rate": 6.941474433568349e-05, "loss": 0.5402, "step": 23092 }, { "epoch": 1.564672403279355, "grad_norm": 5.886524200439453, "learning_rate": 6.941337531658567e-05, "loss": 0.4879, "step": 23093 }, { "epoch": 1.564740158547327, "grad_norm": 6.901047706604004, "learning_rate": 6.941200629748785e-05, "loss": 0.7326, "step": 23094 }, { "epoch": 1.564807913815299, "grad_norm": 5.984903335571289, "learning_rate": 6.941063727839004e-05, "loss": 0.7967, "step": 23095 }, { "epoch": 1.5648756690832712, "grad_norm": 4.539672374725342, "learning_rate": 6.940926825929222e-05, "loss": 0.7107, "step": 23096 }, { "epoch": 1.5649434243512435, "grad_norm": 4.892421245574951, "learning_rate": 6.94078992401944e-05, "loss": 0.6083, "step": 23097 }, { "epoch": 1.5650111796192154, "grad_norm": 7.366227149963379, "learning_rate": 6.94065302210966e-05, "loss": 0.8855, "step": 23098 }, { "epoch": 1.5650789348871874, "grad_norm": 7.440433025360107, "learning_rate": 6.940516120199877e-05, "loss": 0.6595, "step": 23099 }, { "epoch": 1.5651466901551596, "grad_norm": 5.768868923187256, "learning_rate": 6.940379218290096e-05, "loss": 0.6512, "step": 23100 }, { "epoch": 1.5652144454231316, "grad_norm": 6.3930463790893555, "learning_rate": 6.940242316380315e-05, "loss": 0.7204, "step": 23101 }, { "epoch": 1.5652822006911036, "grad_norm": 6.714649677276611, "learning_rate": 6.940105414470533e-05, "loss": 0.6305, "step": 23102 }, { "epoch": 1.5653499559590758, "grad_norm": 8.150721549987793, "learning_rate": 6.939968512560751e-05, "loss": 0.6533, "step": 23103 }, { "epoch": 1.565417711227048, "grad_norm": 12.461868286132812, "learning_rate": 6.939831610650969e-05, "loss": 0.4865, "step": 23104 }, { "epoch": 1.56548546649502, "grad_norm": 7.669212818145752, "learning_rate": 6.939694708741187e-05, "loss": 0.638, "step": 23105 }, { "epoch": 1.565553221762992, "grad_norm": 7.218318939208984, "learning_rate": 6.939557806831406e-05, "loss": 0.7107, "step": 23106 }, { "epoch": 1.5656209770309641, "grad_norm": 5.111324310302734, "learning_rate": 6.939420904921624e-05, "loss": 0.5796, "step": 23107 }, { "epoch": 1.5656887322989363, "grad_norm": 7.453074932098389, "learning_rate": 6.939284003011843e-05, "loss": 0.6117, "step": 23108 }, { "epoch": 1.5657564875669083, "grad_norm": 5.923208236694336, "learning_rate": 6.93914710110206e-05, "loss": 0.5385, "step": 23109 }, { "epoch": 1.5658242428348803, "grad_norm": 4.465889930725098, "learning_rate": 6.93901019919228e-05, "loss": 0.5103, "step": 23110 }, { "epoch": 1.5658919981028525, "grad_norm": 5.005853176116943, "learning_rate": 6.938873297282498e-05, "loss": 0.8238, "step": 23111 }, { "epoch": 1.5659597533708247, "grad_norm": 4.6972174644470215, "learning_rate": 6.938736395372716e-05, "loss": 0.6499, "step": 23112 }, { "epoch": 1.5660275086387967, "grad_norm": 6.771262168884277, "learning_rate": 6.938599493462934e-05, "loss": 0.8397, "step": 23113 }, { "epoch": 1.5660952639067687, "grad_norm": 5.915985107421875, "learning_rate": 6.938462591553152e-05, "loss": 0.7923, "step": 23114 }, { "epoch": 1.5661630191747409, "grad_norm": 7.00087308883667, "learning_rate": 6.938325689643371e-05, "loss": 0.7242, "step": 23115 }, { "epoch": 1.566230774442713, "grad_norm": 7.781373977661133, "learning_rate": 6.93818878773359e-05, "loss": 0.7199, "step": 23116 }, { "epoch": 1.5662985297106848, "grad_norm": 9.416449546813965, "learning_rate": 6.938051885823808e-05, "loss": 0.5935, "step": 23117 }, { "epoch": 1.566366284978657, "grad_norm": 4.990318298339844, "learning_rate": 6.937914983914026e-05, "loss": 0.7034, "step": 23118 }, { "epoch": 1.5664340402466292, "grad_norm": 5.702027320861816, "learning_rate": 6.937778082004245e-05, "loss": 0.6364, "step": 23119 }, { "epoch": 1.5665017955146012, "grad_norm": 6.446103572845459, "learning_rate": 6.937641180094463e-05, "loss": 0.546, "step": 23120 }, { "epoch": 1.5665695507825732, "grad_norm": 5.711670398712158, "learning_rate": 6.937504278184681e-05, "loss": 0.5513, "step": 23121 }, { "epoch": 1.5666373060505454, "grad_norm": 5.603619575500488, "learning_rate": 6.937367376274899e-05, "loss": 0.6309, "step": 23122 }, { "epoch": 1.5667050613185176, "grad_norm": 5.941346645355225, "learning_rate": 6.937230474365117e-05, "loss": 0.8767, "step": 23123 }, { "epoch": 1.5667728165864896, "grad_norm": 4.299803733825684, "learning_rate": 6.937093572455336e-05, "loss": 0.5132, "step": 23124 }, { "epoch": 1.5668405718544616, "grad_norm": 6.985152721405029, "learning_rate": 6.936956670545555e-05, "loss": 0.6786, "step": 23125 }, { "epoch": 1.5669083271224338, "grad_norm": 6.02107048034668, "learning_rate": 6.936819768635773e-05, "loss": 0.8098, "step": 23126 }, { "epoch": 1.566976082390406, "grad_norm": 6.017368316650391, "learning_rate": 6.93668286672599e-05, "loss": 0.6089, "step": 23127 }, { "epoch": 1.567043837658378, "grad_norm": 6.797067165374756, "learning_rate": 6.936545964816209e-05, "loss": 0.5223, "step": 23128 }, { "epoch": 1.56711159292635, "grad_norm": 6.761626243591309, "learning_rate": 6.936409062906428e-05, "loss": 0.5525, "step": 23129 }, { "epoch": 1.5671793481943221, "grad_norm": 4.871332168579102, "learning_rate": 6.936272160996646e-05, "loss": 0.4759, "step": 23130 }, { "epoch": 1.5672471034622943, "grad_norm": 5.279536247253418, "learning_rate": 6.936135259086864e-05, "loss": 0.4481, "step": 23131 }, { "epoch": 1.5673148587302663, "grad_norm": 7.2915849685668945, "learning_rate": 6.935998357177082e-05, "loss": 0.7152, "step": 23132 }, { "epoch": 1.5673826139982383, "grad_norm": 4.805109024047852, "learning_rate": 6.935861455267301e-05, "loss": 0.6362, "step": 23133 }, { "epoch": 1.5674503692662105, "grad_norm": 7.751153469085693, "learning_rate": 6.93572455335752e-05, "loss": 0.7144, "step": 23134 }, { "epoch": 1.5675181245341825, "grad_norm": 5.672407150268555, "learning_rate": 6.935587651447738e-05, "loss": 0.736, "step": 23135 }, { "epoch": 1.5675858798021545, "grad_norm": 6.232601642608643, "learning_rate": 6.935450749537956e-05, "loss": 0.5765, "step": 23136 }, { "epoch": 1.5676536350701267, "grad_norm": 6.287648677825928, "learning_rate": 6.935313847628174e-05, "loss": 0.7739, "step": 23137 }, { "epoch": 1.5677213903380989, "grad_norm": 6.2068023681640625, "learning_rate": 6.935176945718393e-05, "loss": 0.6293, "step": 23138 }, { "epoch": 1.5677891456060709, "grad_norm": 5.111085414886475, "learning_rate": 6.935040043808611e-05, "loss": 0.8103, "step": 23139 }, { "epoch": 1.5678569008740428, "grad_norm": 7.135025501251221, "learning_rate": 6.934903141898829e-05, "loss": 0.5146, "step": 23140 }, { "epoch": 1.567924656142015, "grad_norm": 10.522978782653809, "learning_rate": 6.934766239989048e-05, "loss": 0.6218, "step": 23141 }, { "epoch": 1.5679924114099872, "grad_norm": 7.032009124755859, "learning_rate": 6.934629338079267e-05, "loss": 0.7314, "step": 23142 }, { "epoch": 1.5680601666779592, "grad_norm": 5.428807258605957, "learning_rate": 6.934492436169485e-05, "loss": 0.6328, "step": 23143 }, { "epoch": 1.5681279219459312, "grad_norm": 9.494443893432617, "learning_rate": 6.934355534259704e-05, "loss": 0.846, "step": 23144 }, { "epoch": 1.5681956772139034, "grad_norm": 7.907226085662842, "learning_rate": 6.934218632349922e-05, "loss": 0.7131, "step": 23145 }, { "epoch": 1.5682634324818756, "grad_norm": 4.526653289794922, "learning_rate": 6.93408173044014e-05, "loss": 0.6683, "step": 23146 }, { "epoch": 1.5683311877498476, "grad_norm": 7.610047817230225, "learning_rate": 6.93394482853036e-05, "loss": 0.6798, "step": 23147 }, { "epoch": 1.5683989430178196, "grad_norm": 5.8663554191589355, "learning_rate": 6.933807926620577e-05, "loss": 0.6746, "step": 23148 }, { "epoch": 1.5684666982857918, "grad_norm": 4.51732063293457, "learning_rate": 6.933671024710795e-05, "loss": 0.7293, "step": 23149 }, { "epoch": 1.5685344535537638, "grad_norm": 5.421596527099609, "learning_rate": 6.933534122801013e-05, "loss": 0.6835, "step": 23150 }, { "epoch": 1.5686022088217357, "grad_norm": 6.837584972381592, "learning_rate": 6.933397220891232e-05, "loss": 0.9197, "step": 23151 }, { "epoch": 1.568669964089708, "grad_norm": 6.714905738830566, "learning_rate": 6.933260318981451e-05, "loss": 0.6742, "step": 23152 }, { "epoch": 1.5687377193576801, "grad_norm": 7.120656490325928, "learning_rate": 6.933123417071669e-05, "loss": 0.5739, "step": 23153 }, { "epoch": 1.5688054746256521, "grad_norm": 4.074704647064209, "learning_rate": 6.932986515161887e-05, "loss": 0.5892, "step": 23154 }, { "epoch": 1.568873229893624, "grad_norm": 5.988368988037109, "learning_rate": 6.932849613252105e-05, "loss": 0.8329, "step": 23155 }, { "epoch": 1.5689409851615963, "grad_norm": 5.790524482727051, "learning_rate": 6.932712711342324e-05, "loss": 0.6562, "step": 23156 }, { "epoch": 1.5690087404295685, "grad_norm": 6.093327522277832, "learning_rate": 6.932575809432542e-05, "loss": 0.5906, "step": 23157 }, { "epoch": 1.5690764956975405, "grad_norm": 3.763629674911499, "learning_rate": 6.93243890752276e-05, "loss": 0.5785, "step": 23158 }, { "epoch": 1.5691442509655125, "grad_norm": 4.797458648681641, "learning_rate": 6.932302005612979e-05, "loss": 0.6472, "step": 23159 }, { "epoch": 1.5692120062334847, "grad_norm": 5.2299089431762695, "learning_rate": 6.932165103703197e-05, "loss": 0.694, "step": 23160 }, { "epoch": 1.5692797615014569, "grad_norm": 5.521928310394287, "learning_rate": 6.932028201793416e-05, "loss": 0.5506, "step": 23161 }, { "epoch": 1.5693475167694289, "grad_norm": 8.856697082519531, "learning_rate": 6.931891299883634e-05, "loss": 0.7705, "step": 23162 }, { "epoch": 1.5694152720374008, "grad_norm": 8.348708152770996, "learning_rate": 6.931754397973852e-05, "loss": 0.7876, "step": 23163 }, { "epoch": 1.569483027305373, "grad_norm": 5.208896636962891, "learning_rate": 6.93161749606407e-05, "loss": 0.6022, "step": 23164 }, { "epoch": 1.5695507825733452, "grad_norm": 6.634048938751221, "learning_rate": 6.93148059415429e-05, "loss": 0.6287, "step": 23165 }, { "epoch": 1.569618537841317, "grad_norm": 7.603479862213135, "learning_rate": 6.931343692244507e-05, "loss": 0.551, "step": 23166 }, { "epoch": 1.5696862931092892, "grad_norm": 6.431528091430664, "learning_rate": 6.931206790334725e-05, "loss": 0.7886, "step": 23167 }, { "epoch": 1.5697540483772614, "grad_norm": 3.9917898178100586, "learning_rate": 6.931069888424944e-05, "loss": 0.4482, "step": 23168 }, { "epoch": 1.5698218036452334, "grad_norm": 9.238652229309082, "learning_rate": 6.930932986515162e-05, "loss": 0.5665, "step": 23169 }, { "epoch": 1.5698895589132054, "grad_norm": 7.300850868225098, "learning_rate": 6.930796084605381e-05, "loss": 0.6474, "step": 23170 }, { "epoch": 1.5699573141811776, "grad_norm": 5.6943359375, "learning_rate": 6.930659182695599e-05, "loss": 0.5858, "step": 23171 }, { "epoch": 1.5700250694491498, "grad_norm": 6.0494065284729, "learning_rate": 6.930522280785817e-05, "loss": 0.6992, "step": 23172 }, { "epoch": 1.5700928247171217, "grad_norm": 7.282288074493408, "learning_rate": 6.930385378876035e-05, "loss": 0.7192, "step": 23173 }, { "epoch": 1.5701605799850937, "grad_norm": 4.9891862869262695, "learning_rate": 6.930248476966254e-05, "loss": 0.6255, "step": 23174 }, { "epoch": 1.570228335253066, "grad_norm": 5.224235534667969, "learning_rate": 6.930111575056472e-05, "loss": 0.703, "step": 23175 }, { "epoch": 1.5702960905210381, "grad_norm": 6.754304885864258, "learning_rate": 6.92997467314669e-05, "loss": 0.5132, "step": 23176 }, { "epoch": 1.5703638457890101, "grad_norm": 5.702521800994873, "learning_rate": 6.929837771236909e-05, "loss": 0.7405, "step": 23177 }, { "epoch": 1.570431601056982, "grad_norm": 4.886368274688721, "learning_rate": 6.929700869327127e-05, "loss": 0.6163, "step": 23178 }, { "epoch": 1.5704993563249543, "grad_norm": 4.917201042175293, "learning_rate": 6.929563967417346e-05, "loss": 0.5945, "step": 23179 }, { "epoch": 1.5705671115929265, "grad_norm": 4.685647010803223, "learning_rate": 6.929427065507564e-05, "loss": 0.6709, "step": 23180 }, { "epoch": 1.5706348668608985, "grad_norm": 6.1422624588012695, "learning_rate": 6.929290163597782e-05, "loss": 0.7766, "step": 23181 }, { "epoch": 1.5707026221288705, "grad_norm": 5.330309867858887, "learning_rate": 6.929153261688e-05, "loss": 0.61, "step": 23182 }, { "epoch": 1.5707703773968427, "grad_norm": 6.441524028778076, "learning_rate": 6.929016359778218e-05, "loss": 0.4898, "step": 23183 }, { "epoch": 1.5708381326648146, "grad_norm": 4.974136829376221, "learning_rate": 6.928879457868437e-05, "loss": 0.845, "step": 23184 }, { "epoch": 1.5709058879327866, "grad_norm": 5.690096855163574, "learning_rate": 6.928742555958656e-05, "loss": 0.6797, "step": 23185 }, { "epoch": 1.5709736432007588, "grad_norm": 5.305364608764648, "learning_rate": 6.928605654048874e-05, "loss": 0.6532, "step": 23186 }, { "epoch": 1.571041398468731, "grad_norm": 5.012294769287109, "learning_rate": 6.928468752139093e-05, "loss": 0.6002, "step": 23187 }, { "epoch": 1.571109153736703, "grad_norm": 7.108641147613525, "learning_rate": 6.928331850229311e-05, "loss": 0.6506, "step": 23188 }, { "epoch": 1.571176909004675, "grad_norm": 9.937156677246094, "learning_rate": 6.928194948319529e-05, "loss": 0.7955, "step": 23189 }, { "epoch": 1.5712446642726472, "grad_norm": 5.235907554626465, "learning_rate": 6.928058046409748e-05, "loss": 0.9231, "step": 23190 }, { "epoch": 1.5713124195406194, "grad_norm": 7.641676425933838, "learning_rate": 6.927921144499966e-05, "loss": 0.8823, "step": 23191 }, { "epoch": 1.5713801748085914, "grad_norm": 6.004765033721924, "learning_rate": 6.927784242590184e-05, "loss": 0.8405, "step": 23192 }, { "epoch": 1.5714479300765634, "grad_norm": 5.531987190246582, "learning_rate": 6.927647340680404e-05, "loss": 0.599, "step": 23193 }, { "epoch": 1.5715156853445356, "grad_norm": 5.925148010253906, "learning_rate": 6.927510438770622e-05, "loss": 0.6682, "step": 23194 }, { "epoch": 1.5715834406125078, "grad_norm": 8.720983505249023, "learning_rate": 6.92737353686084e-05, "loss": 0.6229, "step": 23195 }, { "epoch": 1.5716511958804797, "grad_norm": 4.8993635177612305, "learning_rate": 6.927236634951058e-05, "loss": 0.6932, "step": 23196 }, { "epoch": 1.5717189511484517, "grad_norm": 5.65689754486084, "learning_rate": 6.927099733041277e-05, "loss": 0.9113, "step": 23197 }, { "epoch": 1.571786706416424, "grad_norm": 5.175230503082275, "learning_rate": 6.926962831131495e-05, "loss": 0.4871, "step": 23198 }, { "epoch": 1.571854461684396, "grad_norm": 6.856473445892334, "learning_rate": 6.926825929221713e-05, "loss": 0.7186, "step": 23199 }, { "epoch": 1.571922216952368, "grad_norm": 5.721275329589844, "learning_rate": 6.926689027311931e-05, "loss": 0.5564, "step": 23200 }, { "epoch": 1.57198997222034, "grad_norm": 8.006075859069824, "learning_rate": 6.92655212540215e-05, "loss": 0.6433, "step": 23201 }, { "epoch": 1.5720577274883123, "grad_norm": 6.086495399475098, "learning_rate": 6.926415223492369e-05, "loss": 0.5951, "step": 23202 }, { "epoch": 1.5721254827562843, "grad_norm": 13.858671188354492, "learning_rate": 6.926278321582587e-05, "loss": 0.7776, "step": 23203 }, { "epoch": 1.5721932380242563, "grad_norm": 6.830959796905518, "learning_rate": 6.926141419672805e-05, "loss": 0.6964, "step": 23204 }, { "epoch": 1.5722609932922285, "grad_norm": 5.105858325958252, "learning_rate": 6.926004517763023e-05, "loss": 0.6268, "step": 23205 }, { "epoch": 1.5723287485602007, "grad_norm": 4.084735870361328, "learning_rate": 6.925867615853241e-05, "loss": 0.4932, "step": 23206 }, { "epoch": 1.5723965038281726, "grad_norm": 4.461615562438965, "learning_rate": 6.92573071394346e-05, "loss": 0.5467, "step": 23207 }, { "epoch": 1.5724642590961446, "grad_norm": 4.359823226928711, "learning_rate": 6.925593812033678e-05, "loss": 0.5939, "step": 23208 }, { "epoch": 1.5725320143641168, "grad_norm": 6.732251167297363, "learning_rate": 6.925456910123896e-05, "loss": 0.7494, "step": 23209 }, { "epoch": 1.572599769632089, "grad_norm": 8.215747833251953, "learning_rate": 6.925320008214115e-05, "loss": 0.6763, "step": 23210 }, { "epoch": 1.572667524900061, "grad_norm": 7.856454849243164, "learning_rate": 6.925183106304334e-05, "loss": 0.8126, "step": 23211 }, { "epoch": 1.572735280168033, "grad_norm": 6.709907054901123, "learning_rate": 6.925046204394552e-05, "loss": 0.6335, "step": 23212 }, { "epoch": 1.5728030354360052, "grad_norm": 5.452547073364258, "learning_rate": 6.92490930248477e-05, "loss": 0.5941, "step": 23213 }, { "epoch": 1.5728707907039774, "grad_norm": 8.824601173400879, "learning_rate": 6.924772400574988e-05, "loss": 0.6718, "step": 23214 }, { "epoch": 1.5729385459719492, "grad_norm": 4.169334888458252, "learning_rate": 6.924635498665206e-05, "loss": 0.5546, "step": 23215 }, { "epoch": 1.5730063012399214, "grad_norm": 8.700821876525879, "learning_rate": 6.924498596755425e-05, "loss": 0.6549, "step": 23216 }, { "epoch": 1.5730740565078936, "grad_norm": 6.509072780609131, "learning_rate": 6.924361694845643e-05, "loss": 0.804, "step": 23217 }, { "epoch": 1.5731418117758655, "grad_norm": 4.622922897338867, "learning_rate": 6.924224792935861e-05, "loss": 0.6916, "step": 23218 }, { "epoch": 1.5732095670438375, "grad_norm": 6.461478233337402, "learning_rate": 6.92408789102608e-05, "loss": 0.5452, "step": 23219 }, { "epoch": 1.5732773223118097, "grad_norm": 4.729046821594238, "learning_rate": 6.923950989116299e-05, "loss": 0.7479, "step": 23220 }, { "epoch": 1.573345077579782, "grad_norm": 6.337672233581543, "learning_rate": 6.923814087206517e-05, "loss": 0.5552, "step": 23221 }, { "epoch": 1.573412832847754, "grad_norm": 6.877690315246582, "learning_rate": 6.923677185296735e-05, "loss": 0.8417, "step": 23222 }, { "epoch": 1.5734805881157259, "grad_norm": 5.476076602935791, "learning_rate": 6.923540283386953e-05, "loss": 0.7809, "step": 23223 }, { "epoch": 1.573548343383698, "grad_norm": 13.548277854919434, "learning_rate": 6.923403381477171e-05, "loss": 0.5463, "step": 23224 }, { "epoch": 1.5736160986516703, "grad_norm": 6.379286766052246, "learning_rate": 6.92326647956739e-05, "loss": 0.5604, "step": 23225 }, { "epoch": 1.5736838539196423, "grad_norm": 5.4981536865234375, "learning_rate": 6.923129577657608e-05, "loss": 0.6499, "step": 23226 }, { "epoch": 1.5737516091876143, "grad_norm": 5.65683126449585, "learning_rate": 6.922992675747827e-05, "loss": 0.7845, "step": 23227 }, { "epoch": 1.5738193644555865, "grad_norm": 5.628829479217529, "learning_rate": 6.922855773838045e-05, "loss": 0.6401, "step": 23228 }, { "epoch": 1.5738871197235587, "grad_norm": 6.388766288757324, "learning_rate": 6.922718871928263e-05, "loss": 0.6118, "step": 23229 }, { "epoch": 1.5739548749915306, "grad_norm": 5.1198410987854, "learning_rate": 6.922581970018482e-05, "loss": 0.6437, "step": 23230 }, { "epoch": 1.5740226302595026, "grad_norm": 6.411342620849609, "learning_rate": 6.9224450681087e-05, "loss": 0.7713, "step": 23231 }, { "epoch": 1.5740903855274748, "grad_norm": 5.132130146026611, "learning_rate": 6.922308166198918e-05, "loss": 0.5656, "step": 23232 }, { "epoch": 1.5741581407954468, "grad_norm": 7.441186428070068, "learning_rate": 6.922171264289137e-05, "loss": 0.7258, "step": 23233 }, { "epoch": 1.5742258960634188, "grad_norm": 3.8561346530914307, "learning_rate": 6.922034362379355e-05, "loss": 0.422, "step": 23234 }, { "epoch": 1.574293651331391, "grad_norm": 4.845437049865723, "learning_rate": 6.921897460469573e-05, "loss": 0.6419, "step": 23235 }, { "epoch": 1.5743614065993632, "grad_norm": 4.777760982513428, "learning_rate": 6.921760558559793e-05, "loss": 0.594, "step": 23236 }, { "epoch": 1.5744291618673352, "grad_norm": 4.55365514755249, "learning_rate": 6.921623656650011e-05, "loss": 0.559, "step": 23237 }, { "epoch": 1.5744969171353072, "grad_norm": 4.015712738037109, "learning_rate": 6.921486754740229e-05, "loss": 0.5448, "step": 23238 }, { "epoch": 1.5745646724032794, "grad_norm": 4.4763288497924805, "learning_rate": 6.921349852830448e-05, "loss": 0.572, "step": 23239 }, { "epoch": 1.5746324276712516, "grad_norm": 9.382132530212402, "learning_rate": 6.921212950920666e-05, "loss": 0.5311, "step": 23240 }, { "epoch": 1.5747001829392235, "grad_norm": 4.658585071563721, "learning_rate": 6.921076049010884e-05, "loss": 0.6014, "step": 23241 }, { "epoch": 1.5747679382071955, "grad_norm": 6.856054306030273, "learning_rate": 6.920939147101102e-05, "loss": 0.8126, "step": 23242 }, { "epoch": 1.5748356934751677, "grad_norm": 5.45219087600708, "learning_rate": 6.920802245191322e-05, "loss": 0.5025, "step": 23243 }, { "epoch": 1.57490344874314, "grad_norm": 5.059000015258789, "learning_rate": 6.92066534328154e-05, "loss": 0.6718, "step": 23244 }, { "epoch": 1.574971204011112, "grad_norm": 5.98452091217041, "learning_rate": 6.920528441371758e-05, "loss": 0.5682, "step": 23245 }, { "epoch": 1.5750389592790839, "grad_norm": 7.1489787101745605, "learning_rate": 6.920391539461976e-05, "loss": 0.6062, "step": 23246 }, { "epoch": 1.575106714547056, "grad_norm": 6.120086669921875, "learning_rate": 6.920254637552194e-05, "loss": 0.6219, "step": 23247 }, { "epoch": 1.575174469815028, "grad_norm": 5.682742118835449, "learning_rate": 6.920117735642413e-05, "loss": 0.569, "step": 23248 }, { "epoch": 1.575242225083, "grad_norm": 8.015316009521484, "learning_rate": 6.919980833732631e-05, "loss": 0.7483, "step": 23249 }, { "epoch": 1.5753099803509722, "grad_norm": 9.841140747070312, "learning_rate": 6.91984393182285e-05, "loss": 0.4255, "step": 23250 }, { "epoch": 1.5753777356189445, "grad_norm": 9.122374534606934, "learning_rate": 6.919707029913067e-05, "loss": 0.6875, "step": 23251 }, { "epoch": 1.5754454908869164, "grad_norm": 5.794983863830566, "learning_rate": 6.919570128003287e-05, "loss": 0.7575, "step": 23252 }, { "epoch": 1.5755132461548884, "grad_norm": 5.405019760131836, "learning_rate": 6.919433226093505e-05, "loss": 0.6521, "step": 23253 }, { "epoch": 1.5755810014228606, "grad_norm": 4.142784118652344, "learning_rate": 6.919296324183723e-05, "loss": 0.5256, "step": 23254 }, { "epoch": 1.5756487566908328, "grad_norm": 5.495188236236572, "learning_rate": 6.919159422273941e-05, "loss": 0.8299, "step": 23255 }, { "epoch": 1.5757165119588048, "grad_norm": 7.326843738555908, "learning_rate": 6.919022520364159e-05, "loss": 0.7674, "step": 23256 }, { "epoch": 1.5757842672267768, "grad_norm": 5.46429967880249, "learning_rate": 6.918885618454378e-05, "loss": 0.6444, "step": 23257 }, { "epoch": 1.575852022494749, "grad_norm": 7.962948799133301, "learning_rate": 6.918748716544596e-05, "loss": 0.7743, "step": 23258 }, { "epoch": 1.5759197777627212, "grad_norm": 4.992894172668457, "learning_rate": 6.918611814634814e-05, "loss": 0.5802, "step": 23259 }, { "epoch": 1.5759875330306932, "grad_norm": 10.944401741027832, "learning_rate": 6.918474912725032e-05, "loss": 0.8214, "step": 23260 }, { "epoch": 1.5760552882986651, "grad_norm": 6.821809768676758, "learning_rate": 6.91833801081525e-05, "loss": 0.6289, "step": 23261 }, { "epoch": 1.5761230435666373, "grad_norm": 8.202953338623047, "learning_rate": 6.91820110890547e-05, "loss": 0.503, "step": 23262 }, { "epoch": 1.5761907988346096, "grad_norm": 5.977260589599609, "learning_rate": 6.918064206995688e-05, "loss": 0.6679, "step": 23263 }, { "epoch": 1.5762585541025813, "grad_norm": 5.137282371520996, "learning_rate": 6.917927305085906e-05, "loss": 0.5567, "step": 23264 }, { "epoch": 1.5763263093705535, "grad_norm": 6.3863749504089355, "learning_rate": 6.917790403176124e-05, "loss": 0.7328, "step": 23265 }, { "epoch": 1.5763940646385257, "grad_norm": 6.95006799697876, "learning_rate": 6.917653501266343e-05, "loss": 0.7765, "step": 23266 }, { "epoch": 1.5764618199064977, "grad_norm": 6.163890361785889, "learning_rate": 6.917516599356561e-05, "loss": 0.5966, "step": 23267 }, { "epoch": 1.5765295751744697, "grad_norm": 5.839390277862549, "learning_rate": 6.91737969744678e-05, "loss": 0.6387, "step": 23268 }, { "epoch": 1.5765973304424419, "grad_norm": 5.590254783630371, "learning_rate": 6.917242795536997e-05, "loss": 0.5814, "step": 23269 }, { "epoch": 1.576665085710414, "grad_norm": 7.2848381996154785, "learning_rate": 6.917105893627216e-05, "loss": 0.6798, "step": 23270 }, { "epoch": 1.576732840978386, "grad_norm": 7.238433361053467, "learning_rate": 6.916968991717435e-05, "loss": 0.9747, "step": 23271 }, { "epoch": 1.576800596246358, "grad_norm": 5.96785306930542, "learning_rate": 6.916832089807653e-05, "loss": 0.5445, "step": 23272 }, { "epoch": 1.5768683515143302, "grad_norm": 5.243039608001709, "learning_rate": 6.916695187897871e-05, "loss": 0.7171, "step": 23273 }, { "epoch": 1.5769361067823024, "grad_norm": 6.679584503173828, "learning_rate": 6.916558285988089e-05, "loss": 0.6312, "step": 23274 }, { "epoch": 1.5770038620502744, "grad_norm": 6.728513717651367, "learning_rate": 6.916421384078308e-05, "loss": 0.7812, "step": 23275 }, { "epoch": 1.5770716173182464, "grad_norm": 4.296855449676514, "learning_rate": 6.916284482168526e-05, "loss": 0.6562, "step": 23276 }, { "epoch": 1.5771393725862186, "grad_norm": 6.089743614196777, "learning_rate": 6.916147580258744e-05, "loss": 0.674, "step": 23277 }, { "epoch": 1.5772071278541908, "grad_norm": 4.580448627471924, "learning_rate": 6.916010678348963e-05, "loss": 0.5247, "step": 23278 }, { "epoch": 1.5772748831221628, "grad_norm": 4.503934860229492, "learning_rate": 6.915873776439182e-05, "loss": 0.5695, "step": 23279 }, { "epoch": 1.5773426383901348, "grad_norm": 7.523309707641602, "learning_rate": 6.9157368745294e-05, "loss": 0.5545, "step": 23280 }, { "epoch": 1.577410393658107, "grad_norm": 5.488033771514893, "learning_rate": 6.915599972619618e-05, "loss": 0.6837, "step": 23281 }, { "epoch": 1.577478148926079, "grad_norm": 7.666971206665039, "learning_rate": 6.915463070709837e-05, "loss": 0.6499, "step": 23282 }, { "epoch": 1.577545904194051, "grad_norm": 6.494741439819336, "learning_rate": 6.915326168800055e-05, "loss": 0.7415, "step": 23283 }, { "epoch": 1.5776136594620231, "grad_norm": 4.590104103088379, "learning_rate": 6.915189266890273e-05, "loss": 0.4667, "step": 23284 }, { "epoch": 1.5776814147299953, "grad_norm": 6.761297225952148, "learning_rate": 6.915052364980493e-05, "loss": 0.7078, "step": 23285 }, { "epoch": 1.5777491699979673, "grad_norm": 5.805452346801758, "learning_rate": 6.914915463070711e-05, "loss": 0.6476, "step": 23286 }, { "epoch": 1.5778169252659393, "grad_norm": 4.827930450439453, "learning_rate": 6.914778561160929e-05, "loss": 0.6986, "step": 23287 }, { "epoch": 1.5778846805339115, "grad_norm": 4.5777459144592285, "learning_rate": 6.914641659251147e-05, "loss": 0.553, "step": 23288 }, { "epoch": 1.5779524358018837, "grad_norm": 4.929087162017822, "learning_rate": 6.914504757341366e-05, "loss": 0.6684, "step": 23289 }, { "epoch": 1.5780201910698557, "grad_norm": 4.904839992523193, "learning_rate": 6.914367855431584e-05, "loss": 0.5953, "step": 23290 }, { "epoch": 1.5780879463378277, "grad_norm": 4.968245029449463, "learning_rate": 6.914230953521802e-05, "loss": 0.63, "step": 23291 }, { "epoch": 1.5781557016057999, "grad_norm": 6.784894943237305, "learning_rate": 6.91409405161202e-05, "loss": 0.7199, "step": 23292 }, { "epoch": 1.578223456873772, "grad_norm": 5.12901496887207, "learning_rate": 6.913957149702238e-05, "loss": 0.5945, "step": 23293 }, { "epoch": 1.578291212141744, "grad_norm": 5.836991786956787, "learning_rate": 6.913820247792458e-05, "loss": 0.7544, "step": 23294 }, { "epoch": 1.578358967409716, "grad_norm": 7.102161884307861, "learning_rate": 6.913683345882676e-05, "loss": 0.7442, "step": 23295 }, { "epoch": 1.5784267226776882, "grad_norm": 7.239696979522705, "learning_rate": 6.913546443972894e-05, "loss": 0.6383, "step": 23296 }, { "epoch": 1.5784944779456602, "grad_norm": 6.600534915924072, "learning_rate": 6.913409542063112e-05, "loss": 0.9087, "step": 23297 }, { "epoch": 1.5785622332136322, "grad_norm": 4.568850994110107, "learning_rate": 6.913272640153331e-05, "loss": 0.6455, "step": 23298 }, { "epoch": 1.5786299884816044, "grad_norm": 4.978238105773926, "learning_rate": 6.91313573824355e-05, "loss": 0.4408, "step": 23299 }, { "epoch": 1.5786977437495766, "grad_norm": 5.223607540130615, "learning_rate": 6.912998836333767e-05, "loss": 0.4947, "step": 23300 }, { "epoch": 1.5787654990175486, "grad_norm": 5.918456077575684, "learning_rate": 6.912861934423985e-05, "loss": 0.7457, "step": 23301 }, { "epoch": 1.5788332542855206, "grad_norm": 7.62845516204834, "learning_rate": 6.912725032514203e-05, "loss": 0.5495, "step": 23302 }, { "epoch": 1.5789010095534928, "grad_norm": 7.781985759735107, "learning_rate": 6.912588130604423e-05, "loss": 0.7615, "step": 23303 }, { "epoch": 1.578968764821465, "grad_norm": 6.614993095397949, "learning_rate": 6.912451228694641e-05, "loss": 0.6875, "step": 23304 }, { "epoch": 1.579036520089437, "grad_norm": 4.86826229095459, "learning_rate": 6.912314326784859e-05, "loss": 0.5989, "step": 23305 }, { "epoch": 1.579104275357409, "grad_norm": 4.573125839233398, "learning_rate": 6.912177424875077e-05, "loss": 0.5072, "step": 23306 }, { "epoch": 1.5791720306253811, "grad_norm": 4.725343704223633, "learning_rate": 6.912040522965296e-05, "loss": 0.6465, "step": 23307 }, { "epoch": 1.5792397858933533, "grad_norm": 6.2587056159973145, "learning_rate": 6.911903621055514e-05, "loss": 0.7311, "step": 23308 }, { "epoch": 1.5793075411613253, "grad_norm": 5.181295871734619, "learning_rate": 6.911766719145732e-05, "loss": 0.5641, "step": 23309 }, { "epoch": 1.5793752964292973, "grad_norm": 6.289618492126465, "learning_rate": 6.91162981723595e-05, "loss": 0.6402, "step": 23310 }, { "epoch": 1.5794430516972695, "grad_norm": 5.523825645446777, "learning_rate": 6.911492915326168e-05, "loss": 0.7492, "step": 23311 }, { "epoch": 1.5795108069652417, "grad_norm": 5.4323835372924805, "learning_rate": 6.911356013416388e-05, "loss": 0.5672, "step": 23312 }, { "epoch": 1.5795785622332135, "grad_norm": 6.1110663414001465, "learning_rate": 6.911219111506606e-05, "loss": 0.625, "step": 23313 }, { "epoch": 1.5796463175011857, "grad_norm": 5.684808254241943, "learning_rate": 6.911082209596824e-05, "loss": 0.7181, "step": 23314 }, { "epoch": 1.5797140727691579, "grad_norm": 6.808009147644043, "learning_rate": 6.910945307687042e-05, "loss": 0.5803, "step": 23315 }, { "epoch": 1.5797818280371299, "grad_norm": 6.0759687423706055, "learning_rate": 6.91080840577726e-05, "loss": 0.643, "step": 23316 }, { "epoch": 1.5798495833051018, "grad_norm": 5.894423007965088, "learning_rate": 6.91067150386748e-05, "loss": 0.5959, "step": 23317 }, { "epoch": 1.579917338573074, "grad_norm": 6.571285724639893, "learning_rate": 6.910534601957697e-05, "loss": 0.555, "step": 23318 }, { "epoch": 1.5799850938410462, "grad_norm": 5.203495979309082, "learning_rate": 6.910397700047915e-05, "loss": 0.5638, "step": 23319 }, { "epoch": 1.5800528491090182, "grad_norm": 6.683701992034912, "learning_rate": 6.910260798138133e-05, "loss": 0.548, "step": 23320 }, { "epoch": 1.5801206043769902, "grad_norm": 4.794409275054932, "learning_rate": 6.910123896228353e-05, "loss": 0.7745, "step": 23321 }, { "epoch": 1.5801883596449624, "grad_norm": 5.144410133361816, "learning_rate": 6.909986994318571e-05, "loss": 0.582, "step": 23322 }, { "epoch": 1.5802561149129346, "grad_norm": 7.3457841873168945, "learning_rate": 6.909850092408789e-05, "loss": 0.7579, "step": 23323 }, { "epoch": 1.5803238701809066, "grad_norm": 5.658916473388672, "learning_rate": 6.909713190499007e-05, "loss": 0.6376, "step": 23324 }, { "epoch": 1.5803916254488786, "grad_norm": 5.199863433837891, "learning_rate": 6.909576288589225e-05, "loss": 0.7128, "step": 23325 }, { "epoch": 1.5804593807168508, "grad_norm": 5.4065842628479, "learning_rate": 6.909439386679444e-05, "loss": 0.8072, "step": 23326 }, { "epoch": 1.580527135984823, "grad_norm": 5.633886814117432, "learning_rate": 6.909302484769662e-05, "loss": 0.7047, "step": 23327 }, { "epoch": 1.580594891252795, "grad_norm": 6.338996887207031, "learning_rate": 6.90916558285988e-05, "loss": 0.5762, "step": 23328 }, { "epoch": 1.580662646520767, "grad_norm": 6.226584434509277, "learning_rate": 6.9090286809501e-05, "loss": 0.5143, "step": 23329 }, { "epoch": 1.5807304017887391, "grad_norm": 5.734573841094971, "learning_rate": 6.908891779040318e-05, "loss": 0.5678, "step": 23330 }, { "epoch": 1.5807981570567111, "grad_norm": 5.25604772567749, "learning_rate": 6.908754877130536e-05, "loss": 0.7205, "step": 23331 }, { "epoch": 1.580865912324683, "grad_norm": 6.029503345489502, "learning_rate": 6.908617975220755e-05, "loss": 0.5262, "step": 23332 }, { "epoch": 1.5809336675926553, "grad_norm": 5.294908046722412, "learning_rate": 6.908481073310973e-05, "loss": 0.7036, "step": 23333 }, { "epoch": 1.5810014228606275, "grad_norm": 5.919643402099609, "learning_rate": 6.908344171401191e-05, "loss": 0.6703, "step": 23334 }, { "epoch": 1.5810691781285995, "grad_norm": 4.461254596710205, "learning_rate": 6.908207269491411e-05, "loss": 0.6399, "step": 23335 }, { "epoch": 1.5811369333965715, "grad_norm": 4.874650001525879, "learning_rate": 6.908070367581629e-05, "loss": 0.6191, "step": 23336 }, { "epoch": 1.5812046886645437, "grad_norm": 5.508666515350342, "learning_rate": 6.907933465671847e-05, "loss": 0.4668, "step": 23337 }, { "epoch": 1.5812724439325159, "grad_norm": 4.925353050231934, "learning_rate": 6.907796563762065e-05, "loss": 0.631, "step": 23338 }, { "epoch": 1.5813401992004879, "grad_norm": 6.366619110107422, "learning_rate": 6.907659661852283e-05, "loss": 0.6484, "step": 23339 }, { "epoch": 1.5814079544684598, "grad_norm": 5.705472946166992, "learning_rate": 6.907522759942502e-05, "loss": 0.7232, "step": 23340 }, { "epoch": 1.581475709736432, "grad_norm": 7.179691314697266, "learning_rate": 6.90738585803272e-05, "loss": 0.6697, "step": 23341 }, { "epoch": 1.5815434650044042, "grad_norm": 5.416062831878662, "learning_rate": 6.907248956122938e-05, "loss": 0.6506, "step": 23342 }, { "epoch": 1.5816112202723762, "grad_norm": 4.757863998413086, "learning_rate": 6.907112054213156e-05, "loss": 0.5077, "step": 23343 }, { "epoch": 1.5816789755403482, "grad_norm": 6.851430416107178, "learning_rate": 6.906975152303376e-05, "loss": 0.8378, "step": 23344 }, { "epoch": 1.5817467308083204, "grad_norm": 8.147314071655273, "learning_rate": 6.906838250393594e-05, "loss": 0.5223, "step": 23345 }, { "epoch": 1.5818144860762924, "grad_norm": 5.042129993438721, "learning_rate": 6.906701348483812e-05, "loss": 0.6748, "step": 23346 }, { "epoch": 1.5818822413442644, "grad_norm": 5.770470142364502, "learning_rate": 6.90656444657403e-05, "loss": 0.6848, "step": 23347 }, { "epoch": 1.5819499966122366, "grad_norm": 4.54495906829834, "learning_rate": 6.906427544664248e-05, "loss": 0.4178, "step": 23348 }, { "epoch": 1.5820177518802088, "grad_norm": 4.804688453674316, "learning_rate": 6.906290642754467e-05, "loss": 0.502, "step": 23349 }, { "epoch": 1.5820855071481807, "grad_norm": 5.963826656341553, "learning_rate": 6.906153740844685e-05, "loss": 0.6885, "step": 23350 }, { "epoch": 1.5821532624161527, "grad_norm": 6.242585182189941, "learning_rate": 6.906016838934903e-05, "loss": 0.5656, "step": 23351 }, { "epoch": 1.582221017684125, "grad_norm": 6.053038597106934, "learning_rate": 6.905879937025121e-05, "loss": 0.6802, "step": 23352 }, { "epoch": 1.5822887729520971, "grad_norm": 5.275432586669922, "learning_rate": 6.905743035115341e-05, "loss": 0.6206, "step": 23353 }, { "epoch": 1.5823565282200691, "grad_norm": 10.448474884033203, "learning_rate": 6.905606133205559e-05, "loss": 0.5255, "step": 23354 }, { "epoch": 1.582424283488041, "grad_norm": 5.606541633605957, "learning_rate": 6.905469231295777e-05, "loss": 0.6966, "step": 23355 }, { "epoch": 1.5824920387560133, "grad_norm": 6.2353668212890625, "learning_rate": 6.905332329385995e-05, "loss": 0.6423, "step": 23356 }, { "epoch": 1.5825597940239855, "grad_norm": 4.600300312042236, "learning_rate": 6.905195427476213e-05, "loss": 0.6232, "step": 23357 }, { "epoch": 1.5826275492919575, "grad_norm": 6.6172590255737305, "learning_rate": 6.905058525566432e-05, "loss": 0.673, "step": 23358 }, { "epoch": 1.5826953045599295, "grad_norm": 6.515198230743408, "learning_rate": 6.90492162365665e-05, "loss": 0.7546, "step": 23359 }, { "epoch": 1.5827630598279017, "grad_norm": 6.655471324920654, "learning_rate": 6.904784721746868e-05, "loss": 0.5214, "step": 23360 }, { "epoch": 1.5828308150958739, "grad_norm": 6.144202709197998, "learning_rate": 6.904647819837086e-05, "loss": 0.6055, "step": 23361 }, { "epoch": 1.5828985703638456, "grad_norm": 9.291864395141602, "learning_rate": 6.904510917927304e-05, "loss": 0.7512, "step": 23362 }, { "epoch": 1.5829663256318178, "grad_norm": 9.134875297546387, "learning_rate": 6.904374016017524e-05, "loss": 0.5137, "step": 23363 }, { "epoch": 1.58303408089979, "grad_norm": 5.023855686187744, "learning_rate": 6.904237114107742e-05, "loss": 0.542, "step": 23364 }, { "epoch": 1.583101836167762, "grad_norm": 7.070440292358398, "learning_rate": 6.90410021219796e-05, "loss": 0.6464, "step": 23365 }, { "epoch": 1.583169591435734, "grad_norm": 6.181284427642822, "learning_rate": 6.903963310288178e-05, "loss": 0.6528, "step": 23366 }, { "epoch": 1.5832373467037062, "grad_norm": 6.6295905113220215, "learning_rate": 6.903826408378397e-05, "loss": 0.6796, "step": 23367 }, { "epoch": 1.5833051019716784, "grad_norm": 5.270658016204834, "learning_rate": 6.903689506468615e-05, "loss": 0.7839, "step": 23368 }, { "epoch": 1.5833728572396504, "grad_norm": 6.414453983306885, "learning_rate": 6.903552604558833e-05, "loss": 0.6294, "step": 23369 }, { "epoch": 1.5834406125076224, "grad_norm": 5.888903617858887, "learning_rate": 6.903415702649051e-05, "loss": 0.5853, "step": 23370 }, { "epoch": 1.5835083677755946, "grad_norm": 5.866122722625732, "learning_rate": 6.90327880073927e-05, "loss": 0.5878, "step": 23371 }, { "epoch": 1.5835761230435668, "grad_norm": 7.102929592132568, "learning_rate": 6.903141898829489e-05, "loss": 0.7546, "step": 23372 }, { "epoch": 1.5836438783115387, "grad_norm": 5.216640949249268, "learning_rate": 6.903004996919707e-05, "loss": 0.4769, "step": 23373 }, { "epoch": 1.5837116335795107, "grad_norm": 5.637636184692383, "learning_rate": 6.902868095009925e-05, "loss": 0.4716, "step": 23374 }, { "epoch": 1.583779388847483, "grad_norm": 4.639126300811768, "learning_rate": 6.902731193100144e-05, "loss": 0.4862, "step": 23375 }, { "epoch": 1.5838471441154551, "grad_norm": 6.085652828216553, "learning_rate": 6.902594291190362e-05, "loss": 0.6991, "step": 23376 }, { "epoch": 1.583914899383427, "grad_norm": 6.10144567489624, "learning_rate": 6.90245738928058e-05, "loss": 0.7452, "step": 23377 }, { "epoch": 1.583982654651399, "grad_norm": 5.141543865203857, "learning_rate": 6.9023204873708e-05, "loss": 0.6097, "step": 23378 }, { "epoch": 1.5840504099193713, "grad_norm": 4.925025463104248, "learning_rate": 6.902183585461018e-05, "loss": 0.6499, "step": 23379 }, { "epoch": 1.5841181651873433, "grad_norm": 5.286544322967529, "learning_rate": 6.902046683551236e-05, "loss": 0.7885, "step": 23380 }, { "epoch": 1.5841859204553153, "grad_norm": 5.2764573097229, "learning_rate": 6.901909781641455e-05, "loss": 0.6009, "step": 23381 }, { "epoch": 1.5842536757232875, "grad_norm": 8.036896705627441, "learning_rate": 6.901772879731673e-05, "loss": 0.5485, "step": 23382 }, { "epoch": 1.5843214309912597, "grad_norm": 6.799760341644287, "learning_rate": 6.901635977821891e-05, "loss": 0.6197, "step": 23383 }, { "epoch": 1.5843891862592316, "grad_norm": 6.378658294677734, "learning_rate": 6.90149907591211e-05, "loss": 0.7736, "step": 23384 }, { "epoch": 1.5844569415272036, "grad_norm": 5.336338043212891, "learning_rate": 6.901362174002329e-05, "loss": 0.5794, "step": 23385 }, { "epoch": 1.5845246967951758, "grad_norm": 11.828568458557129, "learning_rate": 6.901225272092547e-05, "loss": 0.6557, "step": 23386 }, { "epoch": 1.584592452063148, "grad_norm": 6.570085048675537, "learning_rate": 6.901088370182765e-05, "loss": 0.5827, "step": 23387 }, { "epoch": 1.58466020733112, "grad_norm": 5.922152519226074, "learning_rate": 6.900951468272983e-05, "loss": 0.6457, "step": 23388 }, { "epoch": 1.584727962599092, "grad_norm": 11.265925407409668, "learning_rate": 6.900814566363201e-05, "loss": 0.5835, "step": 23389 }, { "epoch": 1.5847957178670642, "grad_norm": 5.265225887298584, "learning_rate": 6.90067766445342e-05, "loss": 0.701, "step": 23390 }, { "epoch": 1.5848634731350364, "grad_norm": 4.686645984649658, "learning_rate": 6.900540762543638e-05, "loss": 0.6229, "step": 23391 }, { "epoch": 1.5849312284030084, "grad_norm": 6.196696758270264, "learning_rate": 6.900403860633856e-05, "loss": 0.7786, "step": 23392 }, { "epoch": 1.5849989836709804, "grad_norm": 5.620382785797119, "learning_rate": 6.900266958724074e-05, "loss": 0.6733, "step": 23393 }, { "epoch": 1.5850667389389526, "grad_norm": 10.992968559265137, "learning_rate": 6.900130056814292e-05, "loss": 0.6252, "step": 23394 }, { "epoch": 1.5851344942069245, "grad_norm": 5.3475518226623535, "learning_rate": 6.899993154904512e-05, "loss": 0.8138, "step": 23395 }, { "epoch": 1.5852022494748965, "grad_norm": 4.859070777893066, "learning_rate": 6.89985625299473e-05, "loss": 0.6758, "step": 23396 }, { "epoch": 1.5852700047428687, "grad_norm": 5.715543270111084, "learning_rate": 6.899719351084948e-05, "loss": 0.6641, "step": 23397 }, { "epoch": 1.585337760010841, "grad_norm": 5.5324907302856445, "learning_rate": 6.899582449175166e-05, "loss": 0.621, "step": 23398 }, { "epoch": 1.585405515278813, "grad_norm": 8.513348579406738, "learning_rate": 6.899445547265385e-05, "loss": 0.7473, "step": 23399 }, { "epoch": 1.5854732705467849, "grad_norm": 7.829124927520752, "learning_rate": 6.899308645355603e-05, "loss": 0.8878, "step": 23400 }, { "epoch": 1.585541025814757, "grad_norm": 4.43104887008667, "learning_rate": 6.899171743445821e-05, "loss": 0.5998, "step": 23401 }, { "epoch": 1.5856087810827293, "grad_norm": 4.892396926879883, "learning_rate": 6.89903484153604e-05, "loss": 0.3588, "step": 23402 }, { "epoch": 1.5856765363507013, "grad_norm": 6.281479358673096, "learning_rate": 6.898897939626257e-05, "loss": 0.6513, "step": 23403 }, { "epoch": 1.5857442916186733, "grad_norm": 5.403383731842041, "learning_rate": 6.898761037716477e-05, "loss": 0.8016, "step": 23404 }, { "epoch": 1.5858120468866455, "grad_norm": 6.15323543548584, "learning_rate": 6.898624135806695e-05, "loss": 0.6812, "step": 23405 }, { "epoch": 1.5858798021546177, "grad_norm": 6.9651007652282715, "learning_rate": 6.898487233896913e-05, "loss": 0.5684, "step": 23406 }, { "epoch": 1.5859475574225896, "grad_norm": 5.4005632400512695, "learning_rate": 6.898350331987131e-05, "loss": 0.6053, "step": 23407 }, { "epoch": 1.5860153126905616, "grad_norm": 7.369541168212891, "learning_rate": 6.89821343007735e-05, "loss": 0.5939, "step": 23408 }, { "epoch": 1.5860830679585338, "grad_norm": 7.090412139892578, "learning_rate": 6.898076528167568e-05, "loss": 0.9637, "step": 23409 }, { "epoch": 1.586150823226506, "grad_norm": 4.312934398651123, "learning_rate": 6.897939626257786e-05, "loss": 0.491, "step": 23410 }, { "epoch": 1.5862185784944778, "grad_norm": 4.897157669067383, "learning_rate": 6.897802724348004e-05, "loss": 0.5793, "step": 23411 }, { "epoch": 1.58628633376245, "grad_norm": 8.544978141784668, "learning_rate": 6.897665822438222e-05, "loss": 0.7529, "step": 23412 }, { "epoch": 1.5863540890304222, "grad_norm": 6.584326267242432, "learning_rate": 6.897528920528442e-05, "loss": 0.7805, "step": 23413 }, { "epoch": 1.5864218442983942, "grad_norm": 6.334148406982422, "learning_rate": 6.89739201861866e-05, "loss": 0.5653, "step": 23414 }, { "epoch": 1.5864895995663661, "grad_norm": 6.571914196014404, "learning_rate": 6.897255116708878e-05, "loss": 0.5108, "step": 23415 }, { "epoch": 1.5865573548343384, "grad_norm": 7.435220718383789, "learning_rate": 6.897118214799096e-05, "loss": 0.6997, "step": 23416 }, { "epoch": 1.5866251101023106, "grad_norm": 4.67202615737915, "learning_rate": 6.896981312889314e-05, "loss": 0.712, "step": 23417 }, { "epoch": 1.5866928653702825, "grad_norm": 4.568981170654297, "learning_rate": 6.896844410979533e-05, "loss": 0.5381, "step": 23418 }, { "epoch": 1.5867606206382545, "grad_norm": 5.7247796058654785, "learning_rate": 6.896707509069751e-05, "loss": 0.6046, "step": 23419 }, { "epoch": 1.5868283759062267, "grad_norm": 6.614546775817871, "learning_rate": 6.89657060715997e-05, "loss": 0.6393, "step": 23420 }, { "epoch": 1.586896131174199, "grad_norm": 4.604551792144775, "learning_rate": 6.896433705250189e-05, "loss": 0.7777, "step": 23421 }, { "epoch": 1.586963886442171, "grad_norm": 8.991721153259277, "learning_rate": 6.896296803340407e-05, "loss": 0.7524, "step": 23422 }, { "epoch": 1.5870316417101429, "grad_norm": 4.514760971069336, "learning_rate": 6.896159901430625e-05, "loss": 0.675, "step": 23423 }, { "epoch": 1.587099396978115, "grad_norm": 6.599597454071045, "learning_rate": 6.896022999520844e-05, "loss": 0.8258, "step": 23424 }, { "epoch": 1.5871671522460873, "grad_norm": 4.718210220336914, "learning_rate": 6.895886097611062e-05, "loss": 0.5615, "step": 23425 }, { "epoch": 1.5872349075140593, "grad_norm": 4.556129455566406, "learning_rate": 6.89574919570128e-05, "loss": 0.5074, "step": 23426 }, { "epoch": 1.5873026627820312, "grad_norm": 6.100066661834717, "learning_rate": 6.8956122937915e-05, "loss": 0.6856, "step": 23427 }, { "epoch": 1.5873704180500035, "grad_norm": 5.798023223876953, "learning_rate": 6.895475391881718e-05, "loss": 0.4809, "step": 23428 }, { "epoch": 1.5874381733179754, "grad_norm": 5.771924018859863, "learning_rate": 6.895338489971936e-05, "loss": 0.6973, "step": 23429 }, { "epoch": 1.5875059285859474, "grad_norm": 8.740870475769043, "learning_rate": 6.895201588062154e-05, "loss": 0.5669, "step": 23430 }, { "epoch": 1.5875736838539196, "grad_norm": 5.683804512023926, "learning_rate": 6.895064686152373e-05, "loss": 0.6412, "step": 23431 }, { "epoch": 1.5876414391218918, "grad_norm": 4.858431339263916, "learning_rate": 6.894927784242591e-05, "loss": 0.643, "step": 23432 }, { "epoch": 1.5877091943898638, "grad_norm": 11.011713027954102, "learning_rate": 6.894790882332809e-05, "loss": 0.504, "step": 23433 }, { "epoch": 1.5877769496578358, "grad_norm": 5.710129737854004, "learning_rate": 6.894653980423027e-05, "loss": 0.6088, "step": 23434 }, { "epoch": 1.587844704925808, "grad_norm": 5.722718715667725, "learning_rate": 6.894517078513245e-05, "loss": 0.9502, "step": 23435 }, { "epoch": 1.5879124601937802, "grad_norm": 6.013335227966309, "learning_rate": 6.894380176603465e-05, "loss": 0.6064, "step": 23436 }, { "epoch": 1.5879802154617522, "grad_norm": 4.0707621574401855, "learning_rate": 6.894243274693683e-05, "loss": 0.6329, "step": 23437 }, { "epoch": 1.5880479707297241, "grad_norm": 4.657120227813721, "learning_rate": 6.894106372783901e-05, "loss": 0.4818, "step": 23438 }, { "epoch": 1.5881157259976963, "grad_norm": 5.227647304534912, "learning_rate": 6.893969470874119e-05, "loss": 0.9141, "step": 23439 }, { "epoch": 1.5881834812656686, "grad_norm": 4.552366256713867, "learning_rate": 6.893832568964338e-05, "loss": 0.6208, "step": 23440 }, { "epoch": 1.5882512365336405, "grad_norm": 7.596582412719727, "learning_rate": 6.893695667054556e-05, "loss": 0.8835, "step": 23441 }, { "epoch": 1.5883189918016125, "grad_norm": 5.288389682769775, "learning_rate": 6.893558765144774e-05, "loss": 0.8476, "step": 23442 }, { "epoch": 1.5883867470695847, "grad_norm": 5.958263397216797, "learning_rate": 6.893421863234992e-05, "loss": 0.8419, "step": 23443 }, { "epoch": 1.5884545023375567, "grad_norm": 5.6720662117004395, "learning_rate": 6.89328496132521e-05, "loss": 0.7254, "step": 23444 }, { "epoch": 1.5885222576055287, "grad_norm": 7.078034400939941, "learning_rate": 6.89314805941543e-05, "loss": 0.6677, "step": 23445 }, { "epoch": 1.5885900128735009, "grad_norm": 6.8332085609436035, "learning_rate": 6.893011157505648e-05, "loss": 0.5616, "step": 23446 }, { "epoch": 1.588657768141473, "grad_norm": 9.01617431640625, "learning_rate": 6.892874255595866e-05, "loss": 0.5765, "step": 23447 }, { "epoch": 1.588725523409445, "grad_norm": 4.688689708709717, "learning_rate": 6.892737353686084e-05, "loss": 0.7845, "step": 23448 }, { "epoch": 1.588793278677417, "grad_norm": 4.860182285308838, "learning_rate": 6.892600451776302e-05, "loss": 0.7092, "step": 23449 }, { "epoch": 1.5888610339453892, "grad_norm": 4.691659927368164, "learning_rate": 6.892463549866521e-05, "loss": 0.6728, "step": 23450 }, { "epoch": 1.5889287892133614, "grad_norm": 9.19191837310791, "learning_rate": 6.892326647956739e-05, "loss": 0.7573, "step": 23451 }, { "epoch": 1.5889965444813334, "grad_norm": 5.690774440765381, "learning_rate": 6.892189746046957e-05, "loss": 0.6199, "step": 23452 }, { "epoch": 1.5890642997493054, "grad_norm": 5.512000560760498, "learning_rate": 6.892052844137175e-05, "loss": 0.708, "step": 23453 }, { "epoch": 1.5891320550172776, "grad_norm": 7.373483657836914, "learning_rate": 6.891915942227395e-05, "loss": 0.7774, "step": 23454 }, { "epoch": 1.5891998102852498, "grad_norm": 5.70918607711792, "learning_rate": 6.891779040317613e-05, "loss": 0.6776, "step": 23455 }, { "epoch": 1.5892675655532218, "grad_norm": 5.978252410888672, "learning_rate": 6.891642138407831e-05, "loss": 0.5254, "step": 23456 }, { "epoch": 1.5893353208211938, "grad_norm": 4.791055679321289, "learning_rate": 6.891505236498049e-05, "loss": 0.6013, "step": 23457 }, { "epoch": 1.589403076089166, "grad_norm": 4.121306419372559, "learning_rate": 6.891368334588267e-05, "loss": 0.6395, "step": 23458 }, { "epoch": 1.5894708313571382, "grad_norm": 7.174111843109131, "learning_rate": 6.891231432678486e-05, "loss": 0.9192, "step": 23459 }, { "epoch": 1.58953858662511, "grad_norm": 4.841155529022217, "learning_rate": 6.891094530768704e-05, "loss": 0.6888, "step": 23460 }, { "epoch": 1.5896063418930821, "grad_norm": 6.333797931671143, "learning_rate": 6.890957628858922e-05, "loss": 0.7755, "step": 23461 }, { "epoch": 1.5896740971610543, "grad_norm": 4.637367248535156, "learning_rate": 6.89082072694914e-05, "loss": 0.5099, "step": 23462 }, { "epoch": 1.5897418524290263, "grad_norm": 5.110837936401367, "learning_rate": 6.89068382503936e-05, "loss": 0.5226, "step": 23463 }, { "epoch": 1.5898096076969983, "grad_norm": 5.938870429992676, "learning_rate": 6.890546923129578e-05, "loss": 0.7152, "step": 23464 }, { "epoch": 1.5898773629649705, "grad_norm": 7.909888744354248, "learning_rate": 6.890410021219796e-05, "loss": 0.7471, "step": 23465 }, { "epoch": 1.5899451182329427, "grad_norm": 5.5084686279296875, "learning_rate": 6.890273119310014e-05, "loss": 0.6986, "step": 23466 }, { "epoch": 1.5900128735009147, "grad_norm": 5.688173770904541, "learning_rate": 6.890136217400233e-05, "loss": 0.7165, "step": 23467 }, { "epoch": 1.5900806287688867, "grad_norm": 5.098193168640137, "learning_rate": 6.889999315490451e-05, "loss": 0.8618, "step": 23468 }, { "epoch": 1.5901483840368589, "grad_norm": 5.84057092666626, "learning_rate": 6.88986241358067e-05, "loss": 0.5096, "step": 23469 }, { "epoch": 1.590216139304831, "grad_norm": 6.850484848022461, "learning_rate": 6.889725511670889e-05, "loss": 0.5327, "step": 23470 }, { "epoch": 1.590283894572803, "grad_norm": 6.494194984436035, "learning_rate": 6.889588609761107e-05, "loss": 0.8463, "step": 23471 }, { "epoch": 1.590351649840775, "grad_norm": 6.391314506530762, "learning_rate": 6.889451707851325e-05, "loss": 0.5503, "step": 23472 }, { "epoch": 1.5904194051087472, "grad_norm": 8.508984565734863, "learning_rate": 6.889314805941544e-05, "loss": 0.6804, "step": 23473 }, { "epoch": 1.5904871603767194, "grad_norm": 6.6334943771362305, "learning_rate": 6.889177904031762e-05, "loss": 0.6293, "step": 23474 }, { "epoch": 1.5905549156446914, "grad_norm": 5.036404132843018, "learning_rate": 6.88904100212198e-05, "loss": 0.5106, "step": 23475 }, { "epoch": 1.5906226709126634, "grad_norm": 4.636396884918213, "learning_rate": 6.888904100212198e-05, "loss": 0.6987, "step": 23476 }, { "epoch": 1.5906904261806356, "grad_norm": 5.40059232711792, "learning_rate": 6.888767198302418e-05, "loss": 0.8006, "step": 23477 }, { "epoch": 1.5907581814486076, "grad_norm": 5.811666011810303, "learning_rate": 6.888630296392636e-05, "loss": 0.7043, "step": 23478 }, { "epoch": 1.5908259367165796, "grad_norm": 5.849229335784912, "learning_rate": 6.888493394482854e-05, "loss": 0.6827, "step": 23479 }, { "epoch": 1.5908936919845518, "grad_norm": 5.427197456359863, "learning_rate": 6.888356492573072e-05, "loss": 0.8912, "step": 23480 }, { "epoch": 1.590961447252524, "grad_norm": 5.845024585723877, "learning_rate": 6.88821959066329e-05, "loss": 0.5872, "step": 23481 }, { "epoch": 1.591029202520496, "grad_norm": 5.177428245544434, "learning_rate": 6.888082688753509e-05, "loss": 0.6733, "step": 23482 }, { "epoch": 1.591096957788468, "grad_norm": 5.4635467529296875, "learning_rate": 6.887945786843727e-05, "loss": 0.9936, "step": 23483 }, { "epoch": 1.5911647130564401, "grad_norm": 4.362663269042969, "learning_rate": 6.887808884933945e-05, "loss": 0.5174, "step": 23484 }, { "epoch": 1.5912324683244123, "grad_norm": 5.987303733825684, "learning_rate": 6.887671983024163e-05, "loss": 0.8461, "step": 23485 }, { "epoch": 1.5913002235923843, "grad_norm": 4.279614448547363, "learning_rate": 6.887535081114383e-05, "loss": 0.6293, "step": 23486 }, { "epoch": 1.5913679788603563, "grad_norm": 4.6164679527282715, "learning_rate": 6.887398179204601e-05, "loss": 0.5991, "step": 23487 }, { "epoch": 1.5914357341283285, "grad_norm": 6.314345836639404, "learning_rate": 6.887261277294819e-05, "loss": 0.7818, "step": 23488 }, { "epoch": 1.5915034893963007, "grad_norm": 5.3361382484436035, "learning_rate": 6.887124375385037e-05, "loss": 0.5823, "step": 23489 }, { "epoch": 1.5915712446642727, "grad_norm": 5.658084392547607, "learning_rate": 6.886987473475255e-05, "loss": 0.5461, "step": 23490 }, { "epoch": 1.5916389999322447, "grad_norm": 6.219390392303467, "learning_rate": 6.886850571565474e-05, "loss": 0.7478, "step": 23491 }, { "epoch": 1.5917067552002169, "grad_norm": 7.032169818878174, "learning_rate": 6.886713669655692e-05, "loss": 0.8957, "step": 23492 }, { "epoch": 1.5917745104681889, "grad_norm": 6.538294792175293, "learning_rate": 6.88657676774591e-05, "loss": 0.5796, "step": 23493 }, { "epoch": 1.5918422657361608, "grad_norm": 6.693767070770264, "learning_rate": 6.886439865836128e-05, "loss": 0.5979, "step": 23494 }, { "epoch": 1.591910021004133, "grad_norm": 5.926671504974365, "learning_rate": 6.886302963926346e-05, "loss": 0.853, "step": 23495 }, { "epoch": 1.5919777762721052, "grad_norm": 5.9633331298828125, "learning_rate": 6.886166062016566e-05, "loss": 0.6703, "step": 23496 }, { "epoch": 1.5920455315400772, "grad_norm": 11.392780303955078, "learning_rate": 6.886029160106784e-05, "loss": 0.7272, "step": 23497 }, { "epoch": 1.5921132868080492, "grad_norm": 4.867154121398926, "learning_rate": 6.885892258197002e-05, "loss": 0.5921, "step": 23498 }, { "epoch": 1.5921810420760214, "grad_norm": 5.3215508460998535, "learning_rate": 6.88575535628722e-05, "loss": 0.6752, "step": 23499 }, { "epoch": 1.5922487973439936, "grad_norm": 5.97247314453125, "learning_rate": 6.885618454377439e-05, "loss": 0.6957, "step": 23500 }, { "epoch": 1.5923165526119656, "grad_norm": 4.764074325561523, "learning_rate": 6.885481552467657e-05, "loss": 0.6514, "step": 23501 }, { "epoch": 1.5923843078799376, "grad_norm": 6.791749477386475, "learning_rate": 6.885344650557875e-05, "loss": 0.7472, "step": 23502 }, { "epoch": 1.5924520631479098, "grad_norm": 12.287877082824707, "learning_rate": 6.885207748648093e-05, "loss": 0.8133, "step": 23503 }, { "epoch": 1.592519818415882, "grad_norm": 6.136162281036377, "learning_rate": 6.885070846738311e-05, "loss": 0.7896, "step": 23504 }, { "epoch": 1.592587573683854, "grad_norm": 7.469366550445557, "learning_rate": 6.884933944828531e-05, "loss": 0.6069, "step": 23505 }, { "epoch": 1.592655328951826, "grad_norm": 5.980533599853516, "learning_rate": 6.884797042918749e-05, "loss": 0.672, "step": 23506 }, { "epoch": 1.5927230842197981, "grad_norm": 5.568281650543213, "learning_rate": 6.884660141008967e-05, "loss": 0.6207, "step": 23507 }, { "epoch": 1.5927908394877703, "grad_norm": 8.015199661254883, "learning_rate": 6.884523239099185e-05, "loss": 0.8219, "step": 23508 }, { "epoch": 1.592858594755742, "grad_norm": 5.988598346710205, "learning_rate": 6.884386337189404e-05, "loss": 0.6741, "step": 23509 }, { "epoch": 1.5929263500237143, "grad_norm": 3.9450607299804688, "learning_rate": 6.884249435279622e-05, "loss": 0.5544, "step": 23510 }, { "epoch": 1.5929941052916865, "grad_norm": 5.989316940307617, "learning_rate": 6.88411253336984e-05, "loss": 0.7235, "step": 23511 }, { "epoch": 1.5930618605596585, "grad_norm": 7.319903373718262, "learning_rate": 6.883975631460058e-05, "loss": 0.8976, "step": 23512 }, { "epoch": 1.5931296158276305, "grad_norm": 5.284731388092041, "learning_rate": 6.883838729550278e-05, "loss": 0.5702, "step": 23513 }, { "epoch": 1.5931973710956027, "grad_norm": 6.563782691955566, "learning_rate": 6.883701827640496e-05, "loss": 0.4783, "step": 23514 }, { "epoch": 1.5932651263635749, "grad_norm": 6.209489345550537, "learning_rate": 6.883564925730714e-05, "loss": 0.915, "step": 23515 }, { "epoch": 1.5933328816315468, "grad_norm": 5.161864757537842, "learning_rate": 6.883428023820933e-05, "loss": 0.7382, "step": 23516 }, { "epoch": 1.5934006368995188, "grad_norm": 5.709946155548096, "learning_rate": 6.883291121911151e-05, "loss": 0.4953, "step": 23517 }, { "epoch": 1.593468392167491, "grad_norm": 4.9402289390563965, "learning_rate": 6.883154220001369e-05, "loss": 0.525, "step": 23518 }, { "epoch": 1.5935361474354632, "grad_norm": 6.627299785614014, "learning_rate": 6.883017318091589e-05, "loss": 0.7688, "step": 23519 }, { "epoch": 1.5936039027034352, "grad_norm": 5.242444038391113, "learning_rate": 6.882880416181807e-05, "loss": 0.6402, "step": 23520 }, { "epoch": 1.5936716579714072, "grad_norm": 5.277553081512451, "learning_rate": 6.882743514272025e-05, "loss": 0.6519, "step": 23521 }, { "epoch": 1.5937394132393794, "grad_norm": 7.744827747344971, "learning_rate": 6.882606612362243e-05, "loss": 0.8865, "step": 23522 }, { "epoch": 1.5938071685073516, "grad_norm": 4.8401265144348145, "learning_rate": 6.882469710452462e-05, "loss": 0.734, "step": 23523 }, { "epoch": 1.5938749237753236, "grad_norm": 4.972725868225098, "learning_rate": 6.88233280854268e-05, "loss": 0.5463, "step": 23524 }, { "epoch": 1.5939426790432956, "grad_norm": 5.550691604614258, "learning_rate": 6.882195906632898e-05, "loss": 0.749, "step": 23525 }, { "epoch": 1.5940104343112678, "grad_norm": 4.47913932800293, "learning_rate": 6.882059004723116e-05, "loss": 0.6191, "step": 23526 }, { "epoch": 1.5940781895792397, "grad_norm": 5.574995994567871, "learning_rate": 6.881922102813334e-05, "loss": 0.7508, "step": 23527 }, { "epoch": 1.5941459448472117, "grad_norm": 6.255677223205566, "learning_rate": 6.881785200903554e-05, "loss": 0.8392, "step": 23528 }, { "epoch": 1.594213700115184, "grad_norm": 5.5084381103515625, "learning_rate": 6.881648298993772e-05, "loss": 0.7994, "step": 23529 }, { "epoch": 1.5942814553831561, "grad_norm": 7.929598331451416, "learning_rate": 6.88151139708399e-05, "loss": 0.7165, "step": 23530 }, { "epoch": 1.5943492106511281, "grad_norm": 5.527851104736328, "learning_rate": 6.881374495174208e-05, "loss": 0.6431, "step": 23531 }, { "epoch": 1.5944169659191, "grad_norm": 4.742830753326416, "learning_rate": 6.881237593264427e-05, "loss": 0.5534, "step": 23532 }, { "epoch": 1.5944847211870723, "grad_norm": 4.476757526397705, "learning_rate": 6.881100691354645e-05, "loss": 0.5667, "step": 23533 }, { "epoch": 1.5945524764550445, "grad_norm": 5.438236713409424, "learning_rate": 6.880963789444863e-05, "loss": 0.7614, "step": 23534 }, { "epoch": 1.5946202317230165, "grad_norm": 6.9567718505859375, "learning_rate": 6.880826887535081e-05, "loss": 0.5902, "step": 23535 }, { "epoch": 1.5946879869909885, "grad_norm": 6.294891357421875, "learning_rate": 6.880689985625299e-05, "loss": 0.5614, "step": 23536 }, { "epoch": 1.5947557422589607, "grad_norm": 6.649505138397217, "learning_rate": 6.880553083715519e-05, "loss": 0.7383, "step": 23537 }, { "epoch": 1.5948234975269329, "grad_norm": 7.1089372634887695, "learning_rate": 6.880416181805737e-05, "loss": 0.5122, "step": 23538 }, { "epoch": 1.5948912527949048, "grad_norm": 4.3875885009765625, "learning_rate": 6.880279279895955e-05, "loss": 0.7097, "step": 23539 }, { "epoch": 1.5949590080628768, "grad_norm": 6.1685638427734375, "learning_rate": 6.880142377986173e-05, "loss": 0.729, "step": 23540 }, { "epoch": 1.595026763330849, "grad_norm": 4.820277214050293, "learning_rate": 6.880005476076392e-05, "loss": 0.593, "step": 23541 }, { "epoch": 1.595094518598821, "grad_norm": 5.3484344482421875, "learning_rate": 6.87986857416661e-05, "loss": 0.6651, "step": 23542 }, { "epoch": 1.595162273866793, "grad_norm": 5.227877616882324, "learning_rate": 6.879731672256828e-05, "loss": 0.7981, "step": 23543 }, { "epoch": 1.5952300291347652, "grad_norm": 6.0562944412231445, "learning_rate": 6.879594770347046e-05, "loss": 0.5491, "step": 23544 }, { "epoch": 1.5952977844027374, "grad_norm": 11.903589248657227, "learning_rate": 6.879457868437264e-05, "loss": 0.7226, "step": 23545 }, { "epoch": 1.5953655396707094, "grad_norm": 5.241898536682129, "learning_rate": 6.879320966527484e-05, "loss": 0.8699, "step": 23546 }, { "epoch": 1.5954332949386814, "grad_norm": 6.689347743988037, "learning_rate": 6.879184064617702e-05, "loss": 0.783, "step": 23547 }, { "epoch": 1.5955010502066536, "grad_norm": 4.832264423370361, "learning_rate": 6.87904716270792e-05, "loss": 0.6115, "step": 23548 }, { "epoch": 1.5955688054746258, "grad_norm": 6.15548038482666, "learning_rate": 6.878910260798138e-05, "loss": 0.752, "step": 23549 }, { "epoch": 1.5956365607425977, "grad_norm": 4.545421123504639, "learning_rate": 6.878773358888356e-05, "loss": 0.6495, "step": 23550 }, { "epoch": 1.5957043160105697, "grad_norm": 4.504945278167725, "learning_rate": 6.878636456978575e-05, "loss": 0.8742, "step": 23551 }, { "epoch": 1.595772071278542, "grad_norm": 5.831279754638672, "learning_rate": 6.878499555068793e-05, "loss": 0.6921, "step": 23552 }, { "epoch": 1.5958398265465141, "grad_norm": 6.414180755615234, "learning_rate": 6.878362653159011e-05, "loss": 0.7118, "step": 23553 }, { "epoch": 1.595907581814486, "grad_norm": 8.658065795898438, "learning_rate": 6.87822575124923e-05, "loss": 0.7323, "step": 23554 }, { "epoch": 1.595975337082458, "grad_norm": 4.844724655151367, "learning_rate": 6.878088849339449e-05, "loss": 0.5231, "step": 23555 }, { "epoch": 1.5960430923504303, "grad_norm": 6.587430953979492, "learning_rate": 6.877951947429667e-05, "loss": 0.7154, "step": 23556 }, { "epoch": 1.5961108476184025, "grad_norm": 5.666919231414795, "learning_rate": 6.877815045519885e-05, "loss": 0.6094, "step": 23557 }, { "epoch": 1.5961786028863743, "grad_norm": 5.763611793518066, "learning_rate": 6.877678143610103e-05, "loss": 0.8098, "step": 23558 }, { "epoch": 1.5962463581543465, "grad_norm": 5.5320281982421875, "learning_rate": 6.877541241700321e-05, "loss": 0.6196, "step": 23559 }, { "epoch": 1.5963141134223187, "grad_norm": 7.564871311187744, "learning_rate": 6.87740433979054e-05, "loss": 0.8093, "step": 23560 }, { "epoch": 1.5963818686902906, "grad_norm": 5.542654037475586, "learning_rate": 6.877267437880758e-05, "loss": 0.4484, "step": 23561 }, { "epoch": 1.5964496239582626, "grad_norm": 6.420172691345215, "learning_rate": 6.877130535970976e-05, "loss": 0.7271, "step": 23562 }, { "epoch": 1.5965173792262348, "grad_norm": 6.637720584869385, "learning_rate": 6.876993634061196e-05, "loss": 0.5009, "step": 23563 }, { "epoch": 1.596585134494207, "grad_norm": 5.97924280166626, "learning_rate": 6.876856732151414e-05, "loss": 0.8266, "step": 23564 }, { "epoch": 1.596652889762179, "grad_norm": 4.4966020584106445, "learning_rate": 6.876719830241633e-05, "loss": 0.6213, "step": 23565 }, { "epoch": 1.596720645030151, "grad_norm": 8.317852973937988, "learning_rate": 6.876582928331851e-05, "loss": 0.6796, "step": 23566 }, { "epoch": 1.5967884002981232, "grad_norm": 5.0673136711120605, "learning_rate": 6.876446026422069e-05, "loss": 0.7545, "step": 23567 }, { "epoch": 1.5968561555660954, "grad_norm": 4.199815273284912, "learning_rate": 6.876309124512287e-05, "loss": 0.6563, "step": 23568 }, { "epoch": 1.5969239108340674, "grad_norm": 6.539623737335205, "learning_rate": 6.876172222602507e-05, "loss": 0.7036, "step": 23569 }, { "epoch": 1.5969916661020394, "grad_norm": 5.33034086227417, "learning_rate": 6.876035320692725e-05, "loss": 0.8558, "step": 23570 }, { "epoch": 1.5970594213700116, "grad_norm": 5.40518045425415, "learning_rate": 6.875898418782943e-05, "loss": 0.6012, "step": 23571 }, { "epoch": 1.5971271766379838, "grad_norm": 5.97910737991333, "learning_rate": 6.875761516873161e-05, "loss": 0.6993, "step": 23572 }, { "epoch": 1.5971949319059557, "grad_norm": 5.537639141082764, "learning_rate": 6.87562461496338e-05, "loss": 0.4963, "step": 23573 }, { "epoch": 1.5972626871739277, "grad_norm": 5.922773361206055, "learning_rate": 6.875487713053598e-05, "loss": 0.4673, "step": 23574 }, { "epoch": 1.5973304424419, "grad_norm": 5.147026538848877, "learning_rate": 6.875350811143816e-05, "loss": 0.7034, "step": 23575 }, { "epoch": 1.597398197709872, "grad_norm": 6.982605934143066, "learning_rate": 6.875213909234034e-05, "loss": 0.7406, "step": 23576 }, { "epoch": 1.5974659529778439, "grad_norm": 10.902595520019531, "learning_rate": 6.875077007324252e-05, "loss": 0.6719, "step": 23577 }, { "epoch": 1.597533708245816, "grad_norm": 5.000636577606201, "learning_rate": 6.874940105414472e-05, "loss": 0.496, "step": 23578 }, { "epoch": 1.5976014635137883, "grad_norm": 6.918285846710205, "learning_rate": 6.87480320350469e-05, "loss": 0.5621, "step": 23579 }, { "epoch": 1.5976692187817603, "grad_norm": 5.30927038192749, "learning_rate": 6.874666301594908e-05, "loss": 0.4976, "step": 23580 }, { "epoch": 1.5977369740497323, "grad_norm": 5.548561096191406, "learning_rate": 6.874529399685126e-05, "loss": 0.6341, "step": 23581 }, { "epoch": 1.5978047293177045, "grad_norm": 6.8178486824035645, "learning_rate": 6.874392497775344e-05, "loss": 0.4743, "step": 23582 }, { "epoch": 1.5978724845856767, "grad_norm": 4.636435031890869, "learning_rate": 6.874255595865563e-05, "loss": 0.7429, "step": 23583 }, { "epoch": 1.5979402398536486, "grad_norm": 4.817155838012695, "learning_rate": 6.874118693955781e-05, "loss": 0.5869, "step": 23584 }, { "epoch": 1.5980079951216206, "grad_norm": 5.4816508293151855, "learning_rate": 6.873981792045999e-05, "loss": 0.8642, "step": 23585 }, { "epoch": 1.5980757503895928, "grad_norm": 5.448094367980957, "learning_rate": 6.873844890136217e-05, "loss": 0.6397, "step": 23586 }, { "epoch": 1.598143505657565, "grad_norm": 6.379266738891602, "learning_rate": 6.873707988226437e-05, "loss": 0.6478, "step": 23587 }, { "epoch": 1.598211260925537, "grad_norm": 5.08195161819458, "learning_rate": 6.873571086316655e-05, "loss": 0.6314, "step": 23588 }, { "epoch": 1.598279016193509, "grad_norm": 6.127610683441162, "learning_rate": 6.873434184406873e-05, "loss": 0.5741, "step": 23589 }, { "epoch": 1.5983467714614812, "grad_norm": 8.041386604309082, "learning_rate": 6.873297282497091e-05, "loss": 0.4959, "step": 23590 }, { "epoch": 1.5984145267294532, "grad_norm": 8.381261825561523, "learning_rate": 6.873160380587309e-05, "loss": 0.7985, "step": 23591 }, { "epoch": 1.5984822819974251, "grad_norm": 9.62452507019043, "learning_rate": 6.873023478677528e-05, "loss": 0.8509, "step": 23592 }, { "epoch": 1.5985500372653973, "grad_norm": 8.500664710998535, "learning_rate": 6.872886576767746e-05, "loss": 0.5585, "step": 23593 }, { "epoch": 1.5986177925333696, "grad_norm": 14.061116218566895, "learning_rate": 6.872749674857964e-05, "loss": 0.6349, "step": 23594 }, { "epoch": 1.5986855478013415, "grad_norm": 7.493358612060547, "learning_rate": 6.872612772948182e-05, "loss": 0.6834, "step": 23595 }, { "epoch": 1.5987533030693135, "grad_norm": 4.394151210784912, "learning_rate": 6.872475871038402e-05, "loss": 0.529, "step": 23596 }, { "epoch": 1.5988210583372857, "grad_norm": 5.350720405578613, "learning_rate": 6.87233896912862e-05, "loss": 0.702, "step": 23597 }, { "epoch": 1.598888813605258, "grad_norm": 11.298641204833984, "learning_rate": 6.872202067218838e-05, "loss": 0.6316, "step": 23598 }, { "epoch": 1.59895656887323, "grad_norm": 4.811050891876221, "learning_rate": 6.872065165309056e-05, "loss": 0.6844, "step": 23599 }, { "epoch": 1.5990243241412019, "grad_norm": 5.466344833374023, "learning_rate": 6.871928263399274e-05, "loss": 0.8445, "step": 23600 }, { "epoch": 1.599092079409174, "grad_norm": 8.02235221862793, "learning_rate": 6.871791361489493e-05, "loss": 0.6829, "step": 23601 }, { "epoch": 1.5991598346771463, "grad_norm": 5.3720903396606445, "learning_rate": 6.871654459579711e-05, "loss": 0.6193, "step": 23602 }, { "epoch": 1.5992275899451183, "grad_norm": 4.707223892211914, "learning_rate": 6.871517557669929e-05, "loss": 0.511, "step": 23603 }, { "epoch": 1.5992953452130902, "grad_norm": 8.962212562561035, "learning_rate": 6.871380655760147e-05, "loss": 0.5147, "step": 23604 }, { "epoch": 1.5993631004810624, "grad_norm": 7.751879692077637, "learning_rate": 6.871243753850365e-05, "loss": 0.6919, "step": 23605 }, { "epoch": 1.5994308557490347, "grad_norm": 5.212927341461182, "learning_rate": 6.871106851940585e-05, "loss": 0.5543, "step": 23606 }, { "epoch": 1.5994986110170064, "grad_norm": 6.603880405426025, "learning_rate": 6.870969950030803e-05, "loss": 0.5503, "step": 23607 }, { "epoch": 1.5995663662849786, "grad_norm": 4.891867637634277, "learning_rate": 6.870833048121021e-05, "loss": 0.5207, "step": 23608 }, { "epoch": 1.5996341215529508, "grad_norm": 4.312719345092773, "learning_rate": 6.87069614621124e-05, "loss": 0.5153, "step": 23609 }, { "epoch": 1.5997018768209228, "grad_norm": 6.800570011138916, "learning_rate": 6.870559244301458e-05, "loss": 0.5692, "step": 23610 }, { "epoch": 1.5997696320888948, "grad_norm": 5.150250434875488, "learning_rate": 6.870422342391676e-05, "loss": 0.7644, "step": 23611 }, { "epoch": 1.599837387356867, "grad_norm": 6.192625045776367, "learning_rate": 6.870285440481896e-05, "loss": 0.8308, "step": 23612 }, { "epoch": 1.5999051426248392, "grad_norm": 5.850280284881592, "learning_rate": 6.870148538572114e-05, "loss": 0.4402, "step": 23613 }, { "epoch": 1.5999728978928112, "grad_norm": 6.911935329437256, "learning_rate": 6.870011636662332e-05, "loss": 0.5985, "step": 23614 }, { "epoch": 1.6000406531607831, "grad_norm": 7.709860801696777, "learning_rate": 6.869874734752551e-05, "loss": 0.6482, "step": 23615 }, { "epoch": 1.6001084084287553, "grad_norm": 6.119106292724609, "learning_rate": 6.869737832842769e-05, "loss": 0.7168, "step": 23616 }, { "epoch": 1.6001761636967275, "grad_norm": 5.560430526733398, "learning_rate": 6.869600930932987e-05, "loss": 0.7318, "step": 23617 }, { "epoch": 1.6002439189646995, "grad_norm": 5.443650245666504, "learning_rate": 6.869464029023205e-05, "loss": 0.6674, "step": 23618 }, { "epoch": 1.6003116742326715, "grad_norm": 7.733170032501221, "learning_rate": 6.869327127113425e-05, "loss": 0.6992, "step": 23619 }, { "epoch": 1.6003794295006437, "grad_norm": 4.4801411628723145, "learning_rate": 6.869190225203643e-05, "loss": 0.4957, "step": 23620 }, { "epoch": 1.600447184768616, "grad_norm": 4.708012580871582, "learning_rate": 6.86905332329386e-05, "loss": 0.5588, "step": 23621 }, { "epoch": 1.600514940036588, "grad_norm": 4.834713459014893, "learning_rate": 6.868916421384079e-05, "loss": 0.6567, "step": 23622 }, { "epoch": 1.6005826953045599, "grad_norm": 6.948449611663818, "learning_rate": 6.868779519474297e-05, "loss": 0.5723, "step": 23623 }, { "epoch": 1.600650450572532, "grad_norm": 4.606142044067383, "learning_rate": 6.868642617564516e-05, "loss": 0.721, "step": 23624 }, { "epoch": 1.600718205840504, "grad_norm": 5.054271221160889, "learning_rate": 6.868505715654734e-05, "loss": 0.5516, "step": 23625 }, { "epoch": 1.600785961108476, "grad_norm": 5.853102684020996, "learning_rate": 6.868368813744952e-05, "loss": 0.5, "step": 23626 }, { "epoch": 1.6008537163764482, "grad_norm": 4.875627517700195, "learning_rate": 6.86823191183517e-05, "loss": 0.6743, "step": 23627 }, { "epoch": 1.6009214716444204, "grad_norm": 5.738729476928711, "learning_rate": 6.868095009925388e-05, "loss": 0.6181, "step": 23628 }, { "epoch": 1.6009892269123924, "grad_norm": 6.314669132232666, "learning_rate": 6.867958108015608e-05, "loss": 0.5273, "step": 23629 }, { "epoch": 1.6010569821803644, "grad_norm": 5.659343242645264, "learning_rate": 6.867821206105826e-05, "loss": 0.6559, "step": 23630 }, { "epoch": 1.6011247374483366, "grad_norm": 4.8176069259643555, "learning_rate": 6.867684304196044e-05, "loss": 0.5499, "step": 23631 }, { "epoch": 1.6011924927163088, "grad_norm": 7.3833818435668945, "learning_rate": 6.867547402286262e-05, "loss": 0.7609, "step": 23632 }, { "epoch": 1.6012602479842808, "grad_norm": 7.2533464431762695, "learning_rate": 6.867410500376481e-05, "loss": 0.618, "step": 23633 }, { "epoch": 1.6013280032522528, "grad_norm": 6.015063285827637, "learning_rate": 6.867273598466699e-05, "loss": 0.6918, "step": 23634 }, { "epoch": 1.601395758520225, "grad_norm": 4.777444839477539, "learning_rate": 6.867136696556917e-05, "loss": 0.7174, "step": 23635 }, { "epoch": 1.6014635137881972, "grad_norm": 4.434206962585449, "learning_rate": 6.866999794647135e-05, "loss": 0.6132, "step": 23636 }, { "epoch": 1.6015312690561692, "grad_norm": 7.136768341064453, "learning_rate": 6.866862892737353e-05, "loss": 0.6615, "step": 23637 }, { "epoch": 1.6015990243241411, "grad_norm": 5.7878875732421875, "learning_rate": 6.866725990827573e-05, "loss": 0.6173, "step": 23638 }, { "epoch": 1.6016667795921133, "grad_norm": 11.770334243774414, "learning_rate": 6.866589088917791e-05, "loss": 0.6842, "step": 23639 }, { "epoch": 1.6017345348600853, "grad_norm": 5.548005104064941, "learning_rate": 6.866452187008009e-05, "loss": 0.7525, "step": 23640 }, { "epoch": 1.6018022901280573, "grad_norm": 4.8259782791137695, "learning_rate": 6.866315285098227e-05, "loss": 0.62, "step": 23641 }, { "epoch": 1.6018700453960295, "grad_norm": 5.91743803024292, "learning_rate": 6.866178383188446e-05, "loss": 0.7628, "step": 23642 }, { "epoch": 1.6019378006640017, "grad_norm": 5.7117743492126465, "learning_rate": 6.866041481278664e-05, "loss": 0.5882, "step": 23643 }, { "epoch": 1.6020055559319737, "grad_norm": 7.660804271697998, "learning_rate": 6.865904579368882e-05, "loss": 0.812, "step": 23644 }, { "epoch": 1.6020733111999457, "grad_norm": 4.667804718017578, "learning_rate": 6.8657676774591e-05, "loss": 0.5126, "step": 23645 }, { "epoch": 1.6021410664679179, "grad_norm": 5.306486129760742, "learning_rate": 6.865630775549318e-05, "loss": 0.5176, "step": 23646 }, { "epoch": 1.60220882173589, "grad_norm": 8.011886596679688, "learning_rate": 6.865493873639538e-05, "loss": 0.7278, "step": 23647 }, { "epoch": 1.602276577003862, "grad_norm": 5.954351425170898, "learning_rate": 6.865356971729756e-05, "loss": 0.5543, "step": 23648 }, { "epoch": 1.602344332271834, "grad_norm": 8.193537712097168, "learning_rate": 6.865220069819974e-05, "loss": 0.7412, "step": 23649 }, { "epoch": 1.6024120875398062, "grad_norm": 6.262233734130859, "learning_rate": 6.865083167910192e-05, "loss": 0.8961, "step": 23650 }, { "epoch": 1.6024798428077784, "grad_norm": 6.693604469299316, "learning_rate": 6.864946266000411e-05, "loss": 0.9583, "step": 23651 }, { "epoch": 1.6025475980757504, "grad_norm": 4.690883159637451, "learning_rate": 6.864809364090629e-05, "loss": 0.5888, "step": 23652 }, { "epoch": 1.6026153533437224, "grad_norm": 7.858147621154785, "learning_rate": 6.864672462180847e-05, "loss": 0.7034, "step": 23653 }, { "epoch": 1.6026831086116946, "grad_norm": 5.200597763061523, "learning_rate": 6.864535560271065e-05, "loss": 0.6933, "step": 23654 }, { "epoch": 1.6027508638796668, "grad_norm": 9.766071319580078, "learning_rate": 6.864398658361285e-05, "loss": 0.7112, "step": 23655 }, { "epoch": 1.6028186191476386, "grad_norm": 5.718202114105225, "learning_rate": 6.864261756451503e-05, "loss": 0.699, "step": 23656 }, { "epoch": 1.6028863744156108, "grad_norm": 8.824848175048828, "learning_rate": 6.864124854541721e-05, "loss": 0.6861, "step": 23657 }, { "epoch": 1.602954129683583, "grad_norm": 6.857663631439209, "learning_rate": 6.86398795263194e-05, "loss": 0.6314, "step": 23658 }, { "epoch": 1.603021884951555, "grad_norm": 5.600087642669678, "learning_rate": 6.863851050722158e-05, "loss": 0.7233, "step": 23659 }, { "epoch": 1.603089640219527, "grad_norm": 5.973272800445557, "learning_rate": 6.863714148812376e-05, "loss": 0.572, "step": 23660 }, { "epoch": 1.6031573954874991, "grad_norm": 5.367004871368408, "learning_rate": 6.863577246902596e-05, "loss": 0.7043, "step": 23661 }, { "epoch": 1.6032251507554713, "grad_norm": 4.476526260375977, "learning_rate": 6.863440344992814e-05, "loss": 0.602, "step": 23662 }, { "epoch": 1.6032929060234433, "grad_norm": 5.839150428771973, "learning_rate": 6.863303443083032e-05, "loss": 0.8704, "step": 23663 }, { "epoch": 1.6033606612914153, "grad_norm": 5.713308811187744, "learning_rate": 6.86316654117325e-05, "loss": 0.6992, "step": 23664 }, { "epoch": 1.6034284165593875, "grad_norm": 4.361336708068848, "learning_rate": 6.863029639263469e-05, "loss": 0.6652, "step": 23665 }, { "epoch": 1.6034961718273597, "grad_norm": 4.507424354553223, "learning_rate": 6.862892737353687e-05, "loss": 0.6059, "step": 23666 }, { "epoch": 1.6035639270953317, "grad_norm": 4.242824077606201, "learning_rate": 6.862755835443905e-05, "loss": 0.5755, "step": 23667 }, { "epoch": 1.6036316823633037, "grad_norm": 7.0851969718933105, "learning_rate": 6.862618933534123e-05, "loss": 0.9229, "step": 23668 }, { "epoch": 1.6036994376312759, "grad_norm": 9.523407936096191, "learning_rate": 6.862482031624341e-05, "loss": 0.6756, "step": 23669 }, { "epoch": 1.603767192899248, "grad_norm": 5.1588134765625, "learning_rate": 6.86234512971456e-05, "loss": 0.7744, "step": 23670 }, { "epoch": 1.60383494816722, "grad_norm": 9.532681465148926, "learning_rate": 6.862208227804779e-05, "loss": 0.7716, "step": 23671 }, { "epoch": 1.603902703435192, "grad_norm": 6.649491310119629, "learning_rate": 6.862071325894997e-05, "loss": 0.8383, "step": 23672 }, { "epoch": 1.6039704587031642, "grad_norm": 4.729587554931641, "learning_rate": 6.861934423985215e-05, "loss": 0.499, "step": 23673 }, { "epoch": 1.6040382139711362, "grad_norm": 6.330681800842285, "learning_rate": 6.861797522075434e-05, "loss": 0.656, "step": 23674 }, { "epoch": 1.6041059692391082, "grad_norm": 5.656576156616211, "learning_rate": 6.861660620165652e-05, "loss": 0.5979, "step": 23675 }, { "epoch": 1.6041737245070804, "grad_norm": 7.292180061340332, "learning_rate": 6.86152371825587e-05, "loss": 0.7611, "step": 23676 }, { "epoch": 1.6042414797750526, "grad_norm": 3.9458019733428955, "learning_rate": 6.861386816346088e-05, "loss": 0.6932, "step": 23677 }, { "epoch": 1.6043092350430246, "grad_norm": 8.425257682800293, "learning_rate": 6.861249914436306e-05, "loss": 0.6999, "step": 23678 }, { "epoch": 1.6043769903109966, "grad_norm": 8.77173900604248, "learning_rate": 6.861113012526526e-05, "loss": 0.5688, "step": 23679 }, { "epoch": 1.6044447455789688, "grad_norm": 4.175816535949707, "learning_rate": 6.860976110616744e-05, "loss": 0.5603, "step": 23680 }, { "epoch": 1.604512500846941, "grad_norm": 6.283186912536621, "learning_rate": 6.860839208706962e-05, "loss": 0.6403, "step": 23681 }, { "epoch": 1.604580256114913, "grad_norm": 6.008872985839844, "learning_rate": 6.86070230679718e-05, "loss": 0.7432, "step": 23682 }, { "epoch": 1.604648011382885, "grad_norm": 6.595198631286621, "learning_rate": 6.860565404887398e-05, "loss": 0.7306, "step": 23683 }, { "epoch": 1.6047157666508571, "grad_norm": 12.226011276245117, "learning_rate": 6.860428502977617e-05, "loss": 0.8329, "step": 23684 }, { "epoch": 1.6047835219188293, "grad_norm": 5.944432735443115, "learning_rate": 6.860291601067835e-05, "loss": 0.6966, "step": 23685 }, { "epoch": 1.6048512771868013, "grad_norm": 5.390777111053467, "learning_rate": 6.860154699158053e-05, "loss": 0.6616, "step": 23686 }, { "epoch": 1.6049190324547733, "grad_norm": 6.667038917541504, "learning_rate": 6.860017797248271e-05, "loss": 0.9145, "step": 23687 }, { "epoch": 1.6049867877227455, "grad_norm": 8.100889205932617, "learning_rate": 6.85988089533849e-05, "loss": 0.6877, "step": 23688 }, { "epoch": 1.6050545429907175, "grad_norm": 5.793933391571045, "learning_rate": 6.859743993428709e-05, "loss": 0.6275, "step": 23689 }, { "epoch": 1.6051222982586895, "grad_norm": 5.54113245010376, "learning_rate": 6.859607091518927e-05, "loss": 0.5253, "step": 23690 }, { "epoch": 1.6051900535266617, "grad_norm": 6.579443454742432, "learning_rate": 6.859470189609145e-05, "loss": 0.6541, "step": 23691 }, { "epoch": 1.6052578087946339, "grad_norm": 5.781435966491699, "learning_rate": 6.859333287699363e-05, "loss": 0.8593, "step": 23692 }, { "epoch": 1.6053255640626058, "grad_norm": 7.901644229888916, "learning_rate": 6.859196385789582e-05, "loss": 0.4963, "step": 23693 }, { "epoch": 1.6053933193305778, "grad_norm": 8.32093334197998, "learning_rate": 6.8590594838798e-05, "loss": 0.6187, "step": 23694 }, { "epoch": 1.60546107459855, "grad_norm": 5.393187522888184, "learning_rate": 6.858922581970018e-05, "loss": 0.5526, "step": 23695 }, { "epoch": 1.6055288298665222, "grad_norm": 6.667273044586182, "learning_rate": 6.858785680060236e-05, "loss": 0.9406, "step": 23696 }, { "epoch": 1.6055965851344942, "grad_norm": 5.659417629241943, "learning_rate": 6.858648778150456e-05, "loss": 0.6299, "step": 23697 }, { "epoch": 1.6056643404024662, "grad_norm": 6.216113090515137, "learning_rate": 6.858511876240674e-05, "loss": 0.7774, "step": 23698 }, { "epoch": 1.6057320956704384, "grad_norm": 6.085124492645264, "learning_rate": 6.858374974330892e-05, "loss": 0.6793, "step": 23699 }, { "epoch": 1.6057998509384106, "grad_norm": 5.002739906311035, "learning_rate": 6.85823807242111e-05, "loss": 0.5293, "step": 23700 }, { "epoch": 1.6058676062063826, "grad_norm": 4.774441719055176, "learning_rate": 6.858101170511329e-05, "loss": 0.5666, "step": 23701 }, { "epoch": 1.6059353614743546, "grad_norm": 8.02164077758789, "learning_rate": 6.857964268601547e-05, "loss": 0.7101, "step": 23702 }, { "epoch": 1.6060031167423268, "grad_norm": 5.827270984649658, "learning_rate": 6.857827366691765e-05, "loss": 0.7426, "step": 23703 }, { "epoch": 1.606070872010299, "grad_norm": 6.77017879486084, "learning_rate": 6.857690464781985e-05, "loss": 0.6245, "step": 23704 }, { "epoch": 1.6061386272782707, "grad_norm": 5.812680244445801, "learning_rate": 6.857553562872203e-05, "loss": 0.5993, "step": 23705 }, { "epoch": 1.606206382546243, "grad_norm": 5.9360198974609375, "learning_rate": 6.85741666096242e-05, "loss": 0.7796, "step": 23706 }, { "epoch": 1.6062741378142151, "grad_norm": 7.660834789276123, "learning_rate": 6.85727975905264e-05, "loss": 0.886, "step": 23707 }, { "epoch": 1.6063418930821871, "grad_norm": 4.930080413818359, "learning_rate": 6.857142857142858e-05, "loss": 0.8032, "step": 23708 }, { "epoch": 1.606409648350159, "grad_norm": 5.507931709289551, "learning_rate": 6.857005955233076e-05, "loss": 0.6135, "step": 23709 }, { "epoch": 1.6064774036181313, "grad_norm": 4.729072570800781, "learning_rate": 6.856869053323294e-05, "loss": 0.7354, "step": 23710 }, { "epoch": 1.6065451588861035, "grad_norm": 4.061792373657227, "learning_rate": 6.856732151413514e-05, "loss": 0.6031, "step": 23711 }, { "epoch": 1.6066129141540755, "grad_norm": 5.203408718109131, "learning_rate": 6.856595249503732e-05, "loss": 0.6043, "step": 23712 }, { "epoch": 1.6066806694220475, "grad_norm": 6.248551845550537, "learning_rate": 6.85645834759395e-05, "loss": 0.8417, "step": 23713 }, { "epoch": 1.6067484246900197, "grad_norm": 5.389490127563477, "learning_rate": 6.856321445684168e-05, "loss": 0.8264, "step": 23714 }, { "epoch": 1.6068161799579919, "grad_norm": 3.9970993995666504, "learning_rate": 6.856184543774386e-05, "loss": 0.5273, "step": 23715 }, { "epoch": 1.6068839352259638, "grad_norm": 4.345502853393555, "learning_rate": 6.856047641864605e-05, "loss": 0.5536, "step": 23716 }, { "epoch": 1.6069516904939358, "grad_norm": 6.879473686218262, "learning_rate": 6.855910739954823e-05, "loss": 0.7192, "step": 23717 }, { "epoch": 1.607019445761908, "grad_norm": 4.064130783081055, "learning_rate": 6.855773838045041e-05, "loss": 0.5555, "step": 23718 }, { "epoch": 1.6070872010298802, "grad_norm": 7.108002185821533, "learning_rate": 6.855636936135259e-05, "loss": 0.8024, "step": 23719 }, { "epoch": 1.6071549562978522, "grad_norm": 5.335479736328125, "learning_rate": 6.855500034225479e-05, "loss": 0.6152, "step": 23720 }, { "epoch": 1.6072227115658242, "grad_norm": 4.633594989776611, "learning_rate": 6.855363132315697e-05, "loss": 0.8485, "step": 23721 }, { "epoch": 1.6072904668337964, "grad_norm": 5.191250801086426, "learning_rate": 6.855226230405915e-05, "loss": 0.5871, "step": 23722 }, { "epoch": 1.6073582221017684, "grad_norm": 4.858529567718506, "learning_rate": 6.855089328496133e-05, "loss": 0.6656, "step": 23723 }, { "epoch": 1.6074259773697404, "grad_norm": 6.8153557777404785, "learning_rate": 6.854952426586351e-05, "loss": 0.7717, "step": 23724 }, { "epoch": 1.6074937326377126, "grad_norm": 6.765839099884033, "learning_rate": 6.85481552467657e-05, "loss": 0.6513, "step": 23725 }, { "epoch": 1.6075614879056848, "grad_norm": 4.34801721572876, "learning_rate": 6.854678622766788e-05, "loss": 0.7142, "step": 23726 }, { "epoch": 1.6076292431736567, "grad_norm": 6.529106616973877, "learning_rate": 6.854541720857006e-05, "loss": 0.6635, "step": 23727 }, { "epoch": 1.6076969984416287, "grad_norm": 5.0805559158325195, "learning_rate": 6.854404818947224e-05, "loss": 0.5429, "step": 23728 }, { "epoch": 1.607764753709601, "grad_norm": 5.519004821777344, "learning_rate": 6.854267917037444e-05, "loss": 0.5768, "step": 23729 }, { "epoch": 1.6078325089775731, "grad_norm": 7.651691436767578, "learning_rate": 6.854131015127662e-05, "loss": 0.7189, "step": 23730 }, { "epoch": 1.607900264245545, "grad_norm": 6.789473056793213, "learning_rate": 6.85399411321788e-05, "loss": 0.5854, "step": 23731 }, { "epoch": 1.607968019513517, "grad_norm": 6.537936687469482, "learning_rate": 6.853857211308098e-05, "loss": 0.6608, "step": 23732 }, { "epoch": 1.6080357747814893, "grad_norm": 6.478116035461426, "learning_rate": 6.853720309398316e-05, "loss": 0.7154, "step": 23733 }, { "epoch": 1.6081035300494615, "grad_norm": 4.731537342071533, "learning_rate": 6.853583407488535e-05, "loss": 0.5435, "step": 23734 }, { "epoch": 1.6081712853174335, "grad_norm": 6.596753120422363, "learning_rate": 6.853446505578753e-05, "loss": 0.7306, "step": 23735 }, { "epoch": 1.6082390405854055, "grad_norm": 4.295361042022705, "learning_rate": 6.853309603668971e-05, "loss": 0.7657, "step": 23736 }, { "epoch": 1.6083067958533777, "grad_norm": 4.113565921783447, "learning_rate": 6.853172701759189e-05, "loss": 0.5763, "step": 23737 }, { "epoch": 1.6083745511213496, "grad_norm": 6.903542518615723, "learning_rate": 6.853035799849407e-05, "loss": 0.471, "step": 23738 }, { "epoch": 1.6084423063893216, "grad_norm": 5.879730701446533, "learning_rate": 6.852898897939627e-05, "loss": 0.6507, "step": 23739 }, { "epoch": 1.6085100616572938, "grad_norm": 13.536928176879883, "learning_rate": 6.852761996029845e-05, "loss": 0.4941, "step": 23740 }, { "epoch": 1.608577816925266, "grad_norm": 6.025749206542969, "learning_rate": 6.852625094120063e-05, "loss": 0.8067, "step": 23741 }, { "epoch": 1.608645572193238, "grad_norm": 5.8799285888671875, "learning_rate": 6.852488192210281e-05, "loss": 0.6262, "step": 23742 }, { "epoch": 1.60871332746121, "grad_norm": 5.187122344970703, "learning_rate": 6.8523512903005e-05, "loss": 0.5476, "step": 23743 }, { "epoch": 1.6087810827291822, "grad_norm": 5.390186786651611, "learning_rate": 6.852214388390718e-05, "loss": 0.6905, "step": 23744 }, { "epoch": 1.6088488379971544, "grad_norm": 5.378807544708252, "learning_rate": 6.852077486480936e-05, "loss": 0.5506, "step": 23745 }, { "epoch": 1.6089165932651264, "grad_norm": 8.068032264709473, "learning_rate": 6.851940584571154e-05, "loss": 0.6122, "step": 23746 }, { "epoch": 1.6089843485330984, "grad_norm": 4.875119686126709, "learning_rate": 6.851803682661374e-05, "loss": 0.5896, "step": 23747 }, { "epoch": 1.6090521038010706, "grad_norm": 4.956336498260498, "learning_rate": 6.851666780751592e-05, "loss": 0.7224, "step": 23748 }, { "epoch": 1.6091198590690428, "grad_norm": 5.993435859680176, "learning_rate": 6.85152987884181e-05, "loss": 0.7679, "step": 23749 }, { "epoch": 1.6091876143370147, "grad_norm": 4.398682594299316, "learning_rate": 6.851392976932029e-05, "loss": 0.6127, "step": 23750 }, { "epoch": 1.6092553696049867, "grad_norm": 5.680661201477051, "learning_rate": 6.851256075022247e-05, "loss": 0.6123, "step": 23751 }, { "epoch": 1.609323124872959, "grad_norm": 5.494085311889648, "learning_rate": 6.851119173112465e-05, "loss": 0.5323, "step": 23752 }, { "epoch": 1.6093908801409311, "grad_norm": 7.635923862457275, "learning_rate": 6.850982271202685e-05, "loss": 0.5707, "step": 23753 }, { "epoch": 1.6094586354089029, "grad_norm": 7.47560977935791, "learning_rate": 6.850845369292903e-05, "loss": 0.6549, "step": 23754 }, { "epoch": 1.609526390676875, "grad_norm": 6.233575820922852, "learning_rate": 6.85070846738312e-05, "loss": 0.6693, "step": 23755 }, { "epoch": 1.6095941459448473, "grad_norm": 6.926915645599365, "learning_rate": 6.850571565473339e-05, "loss": 0.7383, "step": 23756 }, { "epoch": 1.6096619012128193, "grad_norm": 7.652651309967041, "learning_rate": 6.850434663563558e-05, "loss": 0.7258, "step": 23757 }, { "epoch": 1.6097296564807912, "grad_norm": 5.447443008422852, "learning_rate": 6.850297761653776e-05, "loss": 0.833, "step": 23758 }, { "epoch": 1.6097974117487635, "grad_norm": 4.857990741729736, "learning_rate": 6.850160859743994e-05, "loss": 0.7059, "step": 23759 }, { "epoch": 1.6098651670167357, "grad_norm": 5.3532938957214355, "learning_rate": 6.850023957834212e-05, "loss": 0.7544, "step": 23760 }, { "epoch": 1.6099329222847076, "grad_norm": 8.251593589782715, "learning_rate": 6.84988705592443e-05, "loss": 0.7966, "step": 23761 }, { "epoch": 1.6100006775526796, "grad_norm": 5.084717273712158, "learning_rate": 6.84975015401465e-05, "loss": 0.7158, "step": 23762 }, { "epoch": 1.6100684328206518, "grad_norm": 5.366341590881348, "learning_rate": 6.849613252104868e-05, "loss": 0.5121, "step": 23763 }, { "epoch": 1.610136188088624, "grad_norm": 5.483534812927246, "learning_rate": 6.849476350195086e-05, "loss": 0.6649, "step": 23764 }, { "epoch": 1.610203943356596, "grad_norm": 6.502875804901123, "learning_rate": 6.849339448285304e-05, "loss": 0.7848, "step": 23765 }, { "epoch": 1.610271698624568, "grad_norm": 5.005404472351074, "learning_rate": 6.849202546375523e-05, "loss": 0.4659, "step": 23766 }, { "epoch": 1.6103394538925402, "grad_norm": 4.2718634605407715, "learning_rate": 6.849065644465741e-05, "loss": 0.6682, "step": 23767 }, { "epoch": 1.6104072091605124, "grad_norm": 4.419189453125, "learning_rate": 6.848928742555959e-05, "loss": 0.6121, "step": 23768 }, { "epoch": 1.6104749644284844, "grad_norm": 8.196402549743652, "learning_rate": 6.848791840646177e-05, "loss": 0.7191, "step": 23769 }, { "epoch": 1.6105427196964563, "grad_norm": 7.571183204650879, "learning_rate": 6.848654938736395e-05, "loss": 0.8897, "step": 23770 }, { "epoch": 1.6106104749644286, "grad_norm": 5.516266822814941, "learning_rate": 6.848518036826615e-05, "loss": 0.6778, "step": 23771 }, { "epoch": 1.6106782302324005, "grad_norm": 4.875823497772217, "learning_rate": 6.848381134916833e-05, "loss": 0.5216, "step": 23772 }, { "epoch": 1.6107459855003725, "grad_norm": 5.361309051513672, "learning_rate": 6.84824423300705e-05, "loss": 0.6074, "step": 23773 }, { "epoch": 1.6108137407683447, "grad_norm": 6.002277374267578, "learning_rate": 6.848107331097269e-05, "loss": 0.7456, "step": 23774 }, { "epoch": 1.610881496036317, "grad_norm": 5.16649055480957, "learning_rate": 6.847970429187488e-05, "loss": 0.5701, "step": 23775 }, { "epoch": 1.610949251304289, "grad_norm": 9.408458709716797, "learning_rate": 6.847833527277706e-05, "loss": 0.7259, "step": 23776 }, { "epoch": 1.6110170065722609, "grad_norm": 5.137820243835449, "learning_rate": 6.847696625367924e-05, "loss": 0.6242, "step": 23777 }, { "epoch": 1.611084761840233, "grad_norm": 6.129801273345947, "learning_rate": 6.847559723458142e-05, "loss": 0.6513, "step": 23778 }, { "epoch": 1.6111525171082053, "grad_norm": 6.46537446975708, "learning_rate": 6.84742282154836e-05, "loss": 0.6136, "step": 23779 }, { "epoch": 1.6112202723761773, "grad_norm": 9.102548599243164, "learning_rate": 6.84728591963858e-05, "loss": 0.5972, "step": 23780 }, { "epoch": 1.6112880276441492, "grad_norm": 8.942181587219238, "learning_rate": 6.847149017728798e-05, "loss": 0.8033, "step": 23781 }, { "epoch": 1.6113557829121214, "grad_norm": 5.744556903839111, "learning_rate": 6.847012115819016e-05, "loss": 0.5306, "step": 23782 }, { "epoch": 1.6114235381800937, "grad_norm": 5.0467209815979, "learning_rate": 6.846875213909234e-05, "loss": 0.811, "step": 23783 }, { "epoch": 1.6114912934480656, "grad_norm": 4.188144207000732, "learning_rate": 6.846738311999453e-05, "loss": 0.4534, "step": 23784 }, { "epoch": 1.6115590487160376, "grad_norm": 6.345221519470215, "learning_rate": 6.846601410089671e-05, "loss": 0.6384, "step": 23785 }, { "epoch": 1.6116268039840098, "grad_norm": 6.748525619506836, "learning_rate": 6.846464508179889e-05, "loss": 0.9369, "step": 23786 }, { "epoch": 1.6116945592519818, "grad_norm": 5.665132522583008, "learning_rate": 6.846327606270107e-05, "loss": 0.578, "step": 23787 }, { "epoch": 1.6117623145199538, "grad_norm": 5.508127689361572, "learning_rate": 6.846190704360325e-05, "loss": 0.6253, "step": 23788 }, { "epoch": 1.611830069787926, "grad_norm": 6.351274013519287, "learning_rate": 6.846053802450545e-05, "loss": 0.7512, "step": 23789 }, { "epoch": 1.6118978250558982, "grad_norm": 6.486546039581299, "learning_rate": 6.845916900540763e-05, "loss": 0.6625, "step": 23790 }, { "epoch": 1.6119655803238702, "grad_norm": 5.104377746582031, "learning_rate": 6.84577999863098e-05, "loss": 0.4757, "step": 23791 }, { "epoch": 1.6120333355918421, "grad_norm": 7.352209091186523, "learning_rate": 6.845643096721199e-05, "loss": 0.8249, "step": 23792 }, { "epoch": 1.6121010908598143, "grad_norm": 7.397157192230225, "learning_rate": 6.845506194811418e-05, "loss": 0.6299, "step": 23793 }, { "epoch": 1.6121688461277865, "grad_norm": 8.84139633178711, "learning_rate": 6.845369292901636e-05, "loss": 0.7726, "step": 23794 }, { "epoch": 1.6122366013957585, "grad_norm": 4.382164001464844, "learning_rate": 6.845232390991854e-05, "loss": 0.5716, "step": 23795 }, { "epoch": 1.6123043566637305, "grad_norm": 5.04110860824585, "learning_rate": 6.845095489082074e-05, "loss": 0.5346, "step": 23796 }, { "epoch": 1.6123721119317027, "grad_norm": 9.304261207580566, "learning_rate": 6.844958587172292e-05, "loss": 0.686, "step": 23797 }, { "epoch": 1.612439867199675, "grad_norm": 5.5881571769714355, "learning_rate": 6.84482168526251e-05, "loss": 0.5722, "step": 23798 }, { "epoch": 1.612507622467647, "grad_norm": 5.473517894744873, "learning_rate": 6.844684783352729e-05, "loss": 0.8385, "step": 23799 }, { "epoch": 1.6125753777356189, "grad_norm": 9.924430847167969, "learning_rate": 6.844547881442947e-05, "loss": 0.6173, "step": 23800 }, { "epoch": 1.612643133003591, "grad_norm": 7.005767822265625, "learning_rate": 6.844410979533165e-05, "loss": 0.6784, "step": 23801 }, { "epoch": 1.612710888271563, "grad_norm": 4.457620620727539, "learning_rate": 6.844274077623383e-05, "loss": 0.6475, "step": 23802 }, { "epoch": 1.612778643539535, "grad_norm": 4.933784008026123, "learning_rate": 6.844137175713602e-05, "loss": 0.6105, "step": 23803 }, { "epoch": 1.6128463988075072, "grad_norm": 5.848489761352539, "learning_rate": 6.84400027380382e-05, "loss": 0.618, "step": 23804 }, { "epoch": 1.6129141540754794, "grad_norm": 6.031303882598877, "learning_rate": 6.843863371894039e-05, "loss": 0.5378, "step": 23805 }, { "epoch": 1.6129819093434514, "grad_norm": 7.765096187591553, "learning_rate": 6.843726469984257e-05, "loss": 0.758, "step": 23806 }, { "epoch": 1.6130496646114234, "grad_norm": 5.721081256866455, "learning_rate": 6.843589568074476e-05, "loss": 0.7712, "step": 23807 }, { "epoch": 1.6131174198793956, "grad_norm": 6.295535564422607, "learning_rate": 6.843452666164694e-05, "loss": 0.7815, "step": 23808 }, { "epoch": 1.6131851751473678, "grad_norm": 4.722443580627441, "learning_rate": 6.843315764254912e-05, "loss": 0.6329, "step": 23809 }, { "epoch": 1.6132529304153398, "grad_norm": 8.883573532104492, "learning_rate": 6.84317886234513e-05, "loss": 0.4963, "step": 23810 }, { "epoch": 1.6133206856833118, "grad_norm": 5.774490833282471, "learning_rate": 6.843041960435348e-05, "loss": 0.4397, "step": 23811 }, { "epoch": 1.613388440951284, "grad_norm": 4.005722999572754, "learning_rate": 6.842905058525568e-05, "loss": 0.6251, "step": 23812 }, { "epoch": 1.6134561962192562, "grad_norm": 7.335656642913818, "learning_rate": 6.842768156615786e-05, "loss": 0.6095, "step": 23813 }, { "epoch": 1.6135239514872282, "grad_norm": 4.361253261566162, "learning_rate": 6.842631254706004e-05, "loss": 0.6697, "step": 23814 }, { "epoch": 1.6135917067552001, "grad_norm": 7.206592082977295, "learning_rate": 6.842494352796222e-05, "loss": 0.8606, "step": 23815 }, { "epoch": 1.6136594620231723, "grad_norm": 6.8114423751831055, "learning_rate": 6.84235745088644e-05, "loss": 0.4933, "step": 23816 }, { "epoch": 1.6137272172911445, "grad_norm": 5.370650291442871, "learning_rate": 6.842220548976659e-05, "loss": 0.6282, "step": 23817 }, { "epoch": 1.6137949725591165, "grad_norm": 4.877401351928711, "learning_rate": 6.842083647066877e-05, "loss": 0.6456, "step": 23818 }, { "epoch": 1.6138627278270885, "grad_norm": 9.379902839660645, "learning_rate": 6.841946745157095e-05, "loss": 0.81, "step": 23819 }, { "epoch": 1.6139304830950607, "grad_norm": 6.37748384475708, "learning_rate": 6.841809843247313e-05, "loss": 0.5315, "step": 23820 }, { "epoch": 1.6139982383630327, "grad_norm": 4.405511379241943, "learning_rate": 6.841672941337533e-05, "loss": 0.5961, "step": 23821 }, { "epoch": 1.6140659936310047, "grad_norm": 4.763770580291748, "learning_rate": 6.84153603942775e-05, "loss": 0.6058, "step": 23822 }, { "epoch": 1.6141337488989769, "grad_norm": 4.184586524963379, "learning_rate": 6.841399137517969e-05, "loss": 0.6019, "step": 23823 }, { "epoch": 1.614201504166949, "grad_norm": 5.756418228149414, "learning_rate": 6.841262235608187e-05, "loss": 0.6114, "step": 23824 }, { "epoch": 1.614269259434921, "grad_norm": 5.306129455566406, "learning_rate": 6.841125333698405e-05, "loss": 0.6578, "step": 23825 }, { "epoch": 1.614337014702893, "grad_norm": 6.150895595550537, "learning_rate": 6.840988431788624e-05, "loss": 0.6332, "step": 23826 }, { "epoch": 1.6144047699708652, "grad_norm": 7.311967849731445, "learning_rate": 6.840851529878842e-05, "loss": 0.6819, "step": 23827 }, { "epoch": 1.6144725252388374, "grad_norm": 4.514043807983398, "learning_rate": 6.84071462796906e-05, "loss": 0.7505, "step": 23828 }, { "epoch": 1.6145402805068094, "grad_norm": 5.730600357055664, "learning_rate": 6.840577726059278e-05, "loss": 0.4188, "step": 23829 }, { "epoch": 1.6146080357747814, "grad_norm": 5.863734245300293, "learning_rate": 6.840440824149498e-05, "loss": 0.6651, "step": 23830 }, { "epoch": 1.6146757910427536, "grad_norm": 6.085780620574951, "learning_rate": 6.840303922239716e-05, "loss": 0.6164, "step": 23831 }, { "epoch": 1.6147435463107258, "grad_norm": 6.533575534820557, "learning_rate": 6.840167020329934e-05, "loss": 0.775, "step": 23832 }, { "epoch": 1.6148113015786978, "grad_norm": 5.933652400970459, "learning_rate": 6.840030118420152e-05, "loss": 0.8079, "step": 23833 }, { "epoch": 1.6148790568466698, "grad_norm": 5.6156158447265625, "learning_rate": 6.83989321651037e-05, "loss": 0.7999, "step": 23834 }, { "epoch": 1.614946812114642, "grad_norm": 7.43499231338501, "learning_rate": 6.839756314600589e-05, "loss": 0.8289, "step": 23835 }, { "epoch": 1.615014567382614, "grad_norm": 8.961986541748047, "learning_rate": 6.839619412690807e-05, "loss": 0.7801, "step": 23836 }, { "epoch": 1.615082322650586, "grad_norm": 5.603696346282959, "learning_rate": 6.839482510781025e-05, "loss": 0.9129, "step": 23837 }, { "epoch": 1.6151500779185581, "grad_norm": 5.792304992675781, "learning_rate": 6.839345608871243e-05, "loss": 0.5352, "step": 23838 }, { "epoch": 1.6152178331865303, "grad_norm": 7.990598201751709, "learning_rate": 6.839208706961463e-05, "loss": 0.8666, "step": 23839 }, { "epoch": 1.6152855884545023, "grad_norm": 6.167096138000488, "learning_rate": 6.83907180505168e-05, "loss": 0.6889, "step": 23840 }, { "epoch": 1.6153533437224743, "grad_norm": 5.31458854675293, "learning_rate": 6.838934903141899e-05, "loss": 0.6524, "step": 23841 }, { "epoch": 1.6154210989904465, "grad_norm": 6.484391212463379, "learning_rate": 6.838798001232117e-05, "loss": 0.8805, "step": 23842 }, { "epoch": 1.6154888542584187, "grad_norm": 4.827744960784912, "learning_rate": 6.838661099322336e-05, "loss": 0.6601, "step": 23843 }, { "epoch": 1.6155566095263907, "grad_norm": 6.184671401977539, "learning_rate": 6.838524197412554e-05, "loss": 0.6622, "step": 23844 }, { "epoch": 1.6156243647943627, "grad_norm": 10.181243896484375, "learning_rate": 6.838387295502772e-05, "loss": 0.7098, "step": 23845 }, { "epoch": 1.6156921200623349, "grad_norm": 7.065110206604004, "learning_rate": 6.838250393592992e-05, "loss": 0.6282, "step": 23846 }, { "epoch": 1.615759875330307, "grad_norm": 8.570528030395508, "learning_rate": 6.83811349168321e-05, "loss": 0.8339, "step": 23847 }, { "epoch": 1.615827630598279, "grad_norm": 5.577297687530518, "learning_rate": 6.837976589773428e-05, "loss": 0.5634, "step": 23848 }, { "epoch": 1.615895385866251, "grad_norm": 4.6736159324646, "learning_rate": 6.837839687863647e-05, "loss": 0.6904, "step": 23849 }, { "epoch": 1.6159631411342232, "grad_norm": 5.277221202850342, "learning_rate": 6.837702785953865e-05, "loss": 0.6509, "step": 23850 }, { "epoch": 1.6160308964021952, "grad_norm": 4.121088981628418, "learning_rate": 6.837565884044083e-05, "loss": 0.5913, "step": 23851 }, { "epoch": 1.6160986516701672, "grad_norm": 5.979040622711182, "learning_rate": 6.837428982134301e-05, "loss": 0.564, "step": 23852 }, { "epoch": 1.6161664069381394, "grad_norm": 5.014745712280273, "learning_rate": 6.83729208022452e-05, "loss": 0.631, "step": 23853 }, { "epoch": 1.6162341622061116, "grad_norm": 8.261077880859375, "learning_rate": 6.837155178314738e-05, "loss": 0.7109, "step": 23854 }, { "epoch": 1.6163019174740836, "grad_norm": 5.055196285247803, "learning_rate": 6.837018276404957e-05, "loss": 0.4879, "step": 23855 }, { "epoch": 1.6163696727420556, "grad_norm": 10.068723678588867, "learning_rate": 6.836881374495175e-05, "loss": 0.6354, "step": 23856 }, { "epoch": 1.6164374280100278, "grad_norm": 6.628906726837158, "learning_rate": 6.836744472585393e-05, "loss": 0.602, "step": 23857 }, { "epoch": 1.616505183278, "grad_norm": 4.8231940269470215, "learning_rate": 6.836607570675612e-05, "loss": 0.5487, "step": 23858 }, { "epoch": 1.616572938545972, "grad_norm": 6.190921783447266, "learning_rate": 6.83647066876583e-05, "loss": 0.6395, "step": 23859 }, { "epoch": 1.616640693813944, "grad_norm": 8.282098770141602, "learning_rate": 6.836333766856048e-05, "loss": 0.7849, "step": 23860 }, { "epoch": 1.6167084490819161, "grad_norm": 6.455921173095703, "learning_rate": 6.836196864946266e-05, "loss": 0.7027, "step": 23861 }, { "epoch": 1.6167762043498883, "grad_norm": 6.402316570281982, "learning_rate": 6.836059963036485e-05, "loss": 0.5967, "step": 23862 }, { "epoch": 1.6168439596178603, "grad_norm": 5.261387348175049, "learning_rate": 6.835923061126704e-05, "loss": 0.8105, "step": 23863 }, { "epoch": 1.6169117148858323, "grad_norm": 6.820036888122559, "learning_rate": 6.835786159216922e-05, "loss": 0.631, "step": 23864 }, { "epoch": 1.6169794701538045, "grad_norm": 5.190160751342773, "learning_rate": 6.83564925730714e-05, "loss": 0.726, "step": 23865 }, { "epoch": 1.6170472254217767, "grad_norm": 5.615127086639404, "learning_rate": 6.835512355397358e-05, "loss": 0.8115, "step": 23866 }, { "epoch": 1.6171149806897487, "grad_norm": 7.0527472496032715, "learning_rate": 6.835375453487577e-05, "loss": 0.7607, "step": 23867 }, { "epoch": 1.6171827359577207, "grad_norm": 5.898183345794678, "learning_rate": 6.835238551577795e-05, "loss": 0.7336, "step": 23868 }, { "epoch": 1.6172504912256929, "grad_norm": 4.828089237213135, "learning_rate": 6.835101649668013e-05, "loss": 0.6757, "step": 23869 }, { "epoch": 1.6173182464936648, "grad_norm": 5.885139465332031, "learning_rate": 6.834964747758231e-05, "loss": 0.7015, "step": 23870 }, { "epoch": 1.6173860017616368, "grad_norm": 4.904937267303467, "learning_rate": 6.834827845848449e-05, "loss": 0.6078, "step": 23871 }, { "epoch": 1.617453757029609, "grad_norm": 4.912672519683838, "learning_rate": 6.834690943938669e-05, "loss": 0.5463, "step": 23872 }, { "epoch": 1.6175215122975812, "grad_norm": 6.10015344619751, "learning_rate": 6.834554042028887e-05, "loss": 0.6357, "step": 23873 }, { "epoch": 1.6175892675655532, "grad_norm": 6.825981616973877, "learning_rate": 6.834417140119105e-05, "loss": 0.7905, "step": 23874 }, { "epoch": 1.6176570228335252, "grad_norm": 6.510390758514404, "learning_rate": 6.834280238209323e-05, "loss": 0.6679, "step": 23875 }, { "epoch": 1.6177247781014974, "grad_norm": 6.4297661781311035, "learning_rate": 6.834143336299542e-05, "loss": 0.7105, "step": 23876 }, { "epoch": 1.6177925333694696, "grad_norm": 6.710338115692139, "learning_rate": 6.83400643438976e-05, "loss": 0.7905, "step": 23877 }, { "epoch": 1.6178602886374416, "grad_norm": 4.445905685424805, "learning_rate": 6.833869532479978e-05, "loss": 0.4206, "step": 23878 }, { "epoch": 1.6179280439054136, "grad_norm": 5.532150745391846, "learning_rate": 6.833732630570196e-05, "loss": 0.5073, "step": 23879 }, { "epoch": 1.6179957991733858, "grad_norm": 8.812516212463379, "learning_rate": 6.833595728660414e-05, "loss": 0.7256, "step": 23880 }, { "epoch": 1.618063554441358, "grad_norm": 5.938845634460449, "learning_rate": 6.833458826750634e-05, "loss": 0.5879, "step": 23881 }, { "epoch": 1.61813130970933, "grad_norm": 4.968221664428711, "learning_rate": 6.833321924840852e-05, "loss": 0.868, "step": 23882 }, { "epoch": 1.618199064977302, "grad_norm": 6.131258010864258, "learning_rate": 6.83318502293107e-05, "loss": 0.7421, "step": 23883 }, { "epoch": 1.6182668202452741, "grad_norm": 5.287716388702393, "learning_rate": 6.833048121021288e-05, "loss": 0.6484, "step": 23884 }, { "epoch": 1.618334575513246, "grad_norm": 5.7608160972595215, "learning_rate": 6.832911219111507e-05, "loss": 0.5829, "step": 23885 }, { "epoch": 1.618402330781218, "grad_norm": 5.7163801193237305, "learning_rate": 6.832774317201725e-05, "loss": 0.6703, "step": 23886 }, { "epoch": 1.6184700860491903, "grad_norm": 8.400796890258789, "learning_rate": 6.832637415291943e-05, "loss": 0.6798, "step": 23887 }, { "epoch": 1.6185378413171625, "grad_norm": 6.43309211730957, "learning_rate": 6.832500513382161e-05, "loss": 0.5984, "step": 23888 }, { "epoch": 1.6186055965851345, "grad_norm": 10.194961547851562, "learning_rate": 6.83236361147238e-05, "loss": 0.7242, "step": 23889 }, { "epoch": 1.6186733518531065, "grad_norm": 9.0968599319458, "learning_rate": 6.832226709562599e-05, "loss": 0.7639, "step": 23890 }, { "epoch": 1.6187411071210787, "grad_norm": 4.6374897956848145, "learning_rate": 6.832089807652817e-05, "loss": 0.592, "step": 23891 }, { "epoch": 1.6188088623890509, "grad_norm": 7.7875566482543945, "learning_rate": 6.831952905743036e-05, "loss": 0.7109, "step": 23892 }, { "epoch": 1.6188766176570228, "grad_norm": 5.436453342437744, "learning_rate": 6.831816003833254e-05, "loss": 0.7742, "step": 23893 }, { "epoch": 1.6189443729249948, "grad_norm": 7.23473596572876, "learning_rate": 6.831679101923472e-05, "loss": 0.6065, "step": 23894 }, { "epoch": 1.619012128192967, "grad_norm": 5.297054290771484, "learning_rate": 6.831542200013691e-05, "loss": 0.5596, "step": 23895 }, { "epoch": 1.6190798834609392, "grad_norm": 6.013125896453857, "learning_rate": 6.83140529810391e-05, "loss": 0.6189, "step": 23896 }, { "epoch": 1.6191476387289112, "grad_norm": 6.2935872077941895, "learning_rate": 6.831268396194128e-05, "loss": 0.8457, "step": 23897 }, { "epoch": 1.6192153939968832, "grad_norm": 5.7628068923950195, "learning_rate": 6.831131494284346e-05, "loss": 0.717, "step": 23898 }, { "epoch": 1.6192831492648554, "grad_norm": 5.628773212432861, "learning_rate": 6.830994592374565e-05, "loss": 0.8138, "step": 23899 }, { "epoch": 1.6193509045328274, "grad_norm": 7.1628313064575195, "learning_rate": 6.830857690464783e-05, "loss": 0.8223, "step": 23900 }, { "epoch": 1.6194186598007994, "grad_norm": 4.552783966064453, "learning_rate": 6.830720788555001e-05, "loss": 0.5203, "step": 23901 }, { "epoch": 1.6194864150687716, "grad_norm": 6.439541816711426, "learning_rate": 6.830583886645219e-05, "loss": 0.7535, "step": 23902 }, { "epoch": 1.6195541703367438, "grad_norm": 5.402059555053711, "learning_rate": 6.830446984735437e-05, "loss": 0.6296, "step": 23903 }, { "epoch": 1.6196219256047157, "grad_norm": 6.647703647613525, "learning_rate": 6.830310082825656e-05, "loss": 0.6517, "step": 23904 }, { "epoch": 1.6196896808726877, "grad_norm": 6.102250099182129, "learning_rate": 6.830173180915874e-05, "loss": 0.7429, "step": 23905 }, { "epoch": 1.61975743614066, "grad_norm": 5.035518169403076, "learning_rate": 6.830036279006093e-05, "loss": 0.4504, "step": 23906 }, { "epoch": 1.6198251914086321, "grad_norm": 9.927040100097656, "learning_rate": 6.82989937709631e-05, "loss": 0.6785, "step": 23907 }, { "epoch": 1.619892946676604, "grad_norm": 6.0016279220581055, "learning_rate": 6.82976247518653e-05, "loss": 0.7922, "step": 23908 }, { "epoch": 1.619960701944576, "grad_norm": 5.507855415344238, "learning_rate": 6.829625573276748e-05, "loss": 0.6401, "step": 23909 }, { "epoch": 1.6200284572125483, "grad_norm": 6.164402961730957, "learning_rate": 6.829488671366966e-05, "loss": 0.9006, "step": 23910 }, { "epoch": 1.6200962124805205, "grad_norm": 5.704516887664795, "learning_rate": 6.829351769457184e-05, "loss": 0.6654, "step": 23911 }, { "epoch": 1.6201639677484925, "grad_norm": 5.613555431365967, "learning_rate": 6.829214867547402e-05, "loss": 0.7952, "step": 23912 }, { "epoch": 1.6202317230164645, "grad_norm": 6.281595230102539, "learning_rate": 6.829077965637621e-05, "loss": 0.6195, "step": 23913 }, { "epoch": 1.6202994782844367, "grad_norm": 6.544998645782471, "learning_rate": 6.82894106372784e-05, "loss": 0.5328, "step": 23914 }, { "epoch": 1.6203672335524089, "grad_norm": 5.5044989585876465, "learning_rate": 6.828804161818058e-05, "loss": 0.6403, "step": 23915 }, { "epoch": 1.6204349888203808, "grad_norm": 5.378137588500977, "learning_rate": 6.828667259908276e-05, "loss": 0.7691, "step": 23916 }, { "epoch": 1.6205027440883528, "grad_norm": 4.502652168273926, "learning_rate": 6.828530357998495e-05, "loss": 0.5852, "step": 23917 }, { "epoch": 1.620570499356325, "grad_norm": 5.2445244789123535, "learning_rate": 6.828393456088713e-05, "loss": 0.7899, "step": 23918 }, { "epoch": 1.620638254624297, "grad_norm": 4.770193576812744, "learning_rate": 6.828256554178931e-05, "loss": 0.6876, "step": 23919 }, { "epoch": 1.620706009892269, "grad_norm": 6.10335636138916, "learning_rate": 6.828119652269149e-05, "loss": 0.7487, "step": 23920 }, { "epoch": 1.6207737651602412, "grad_norm": 9.035701751708984, "learning_rate": 6.827982750359367e-05, "loss": 0.7768, "step": 23921 }, { "epoch": 1.6208415204282134, "grad_norm": 8.471132278442383, "learning_rate": 6.827845848449586e-05, "loss": 0.7399, "step": 23922 }, { "epoch": 1.6209092756961854, "grad_norm": 5.98414945602417, "learning_rate": 6.827708946539805e-05, "loss": 0.7939, "step": 23923 }, { "epoch": 1.6209770309641574, "grad_norm": 5.73787784576416, "learning_rate": 6.827572044630023e-05, "loss": 0.7048, "step": 23924 }, { "epoch": 1.6210447862321296, "grad_norm": 6.444807052612305, "learning_rate": 6.82743514272024e-05, "loss": 0.7705, "step": 23925 }, { "epoch": 1.6211125415001018, "grad_norm": 4.961414813995361, "learning_rate": 6.827298240810459e-05, "loss": 0.859, "step": 23926 }, { "epoch": 1.6211802967680737, "grad_norm": 6.858839511871338, "learning_rate": 6.827161338900678e-05, "loss": 0.6751, "step": 23927 }, { "epoch": 1.6212480520360457, "grad_norm": 4.329789638519287, "learning_rate": 6.827024436990896e-05, "loss": 0.4813, "step": 23928 }, { "epoch": 1.621315807304018, "grad_norm": 6.10967493057251, "learning_rate": 6.826887535081114e-05, "loss": 0.8057, "step": 23929 }, { "epoch": 1.6213835625719901, "grad_norm": 4.803404808044434, "learning_rate": 6.826750633171332e-05, "loss": 0.5133, "step": 23930 }, { "epoch": 1.621451317839962, "grad_norm": 6.993913650512695, "learning_rate": 6.826613731261552e-05, "loss": 0.6272, "step": 23931 }, { "epoch": 1.621519073107934, "grad_norm": 4.712862968444824, "learning_rate": 6.82647682935177e-05, "loss": 0.4955, "step": 23932 }, { "epoch": 1.6215868283759063, "grad_norm": 7.47714900970459, "learning_rate": 6.826339927441988e-05, "loss": 0.7388, "step": 23933 }, { "epoch": 1.6216545836438783, "grad_norm": 4.374339580535889, "learning_rate": 6.826203025532206e-05, "loss": 0.5138, "step": 23934 }, { "epoch": 1.6217223389118502, "grad_norm": 9.503299713134766, "learning_rate": 6.826066123622425e-05, "loss": 0.7857, "step": 23935 }, { "epoch": 1.6217900941798225, "grad_norm": 6.504443645477295, "learning_rate": 6.825929221712643e-05, "loss": 0.8115, "step": 23936 }, { "epoch": 1.6218578494477947, "grad_norm": 5.0501909255981445, "learning_rate": 6.825792319802861e-05, "loss": 0.545, "step": 23937 }, { "epoch": 1.6219256047157666, "grad_norm": 7.738496780395508, "learning_rate": 6.82565541789308e-05, "loss": 0.6158, "step": 23938 }, { "epoch": 1.6219933599837386, "grad_norm": 7.864198684692383, "learning_rate": 6.825518515983298e-05, "loss": 0.9128, "step": 23939 }, { "epoch": 1.6220611152517108, "grad_norm": 7.360284328460693, "learning_rate": 6.825381614073517e-05, "loss": 0.7504, "step": 23940 }, { "epoch": 1.622128870519683, "grad_norm": 5.104711055755615, "learning_rate": 6.825244712163736e-05, "loss": 0.4831, "step": 23941 }, { "epoch": 1.622196625787655, "grad_norm": 5.8658270835876465, "learning_rate": 6.825107810253954e-05, "loss": 0.7147, "step": 23942 }, { "epoch": 1.622264381055627, "grad_norm": 5.641109943389893, "learning_rate": 6.824970908344172e-05, "loss": 0.6668, "step": 23943 }, { "epoch": 1.6223321363235992, "grad_norm": 6.304354667663574, "learning_rate": 6.82483400643439e-05, "loss": 0.7464, "step": 23944 }, { "epoch": 1.6223998915915714, "grad_norm": 7.349808216094971, "learning_rate": 6.82469710452461e-05, "loss": 0.7468, "step": 23945 }, { "epoch": 1.6224676468595434, "grad_norm": 4.725027561187744, "learning_rate": 6.824560202614827e-05, "loss": 0.5518, "step": 23946 }, { "epoch": 1.6225354021275153, "grad_norm": 5.575742721557617, "learning_rate": 6.824423300705045e-05, "loss": 0.5652, "step": 23947 }, { "epoch": 1.6226031573954875, "grad_norm": 4.580389976501465, "learning_rate": 6.824286398795264e-05, "loss": 0.6449, "step": 23948 }, { "epoch": 1.6226709126634595, "grad_norm": 4.2550368309021, "learning_rate": 6.824149496885482e-05, "loss": 0.6037, "step": 23949 }, { "epoch": 1.6227386679314315, "grad_norm": 6.092131614685059, "learning_rate": 6.824012594975701e-05, "loss": 0.5773, "step": 23950 }, { "epoch": 1.6228064231994037, "grad_norm": 5.871645450592041, "learning_rate": 6.823875693065919e-05, "loss": 0.6494, "step": 23951 }, { "epoch": 1.622874178467376, "grad_norm": 7.979231834411621, "learning_rate": 6.823738791156137e-05, "loss": 0.6394, "step": 23952 }, { "epoch": 1.622941933735348, "grad_norm": 6.014817714691162, "learning_rate": 6.823601889246355e-05, "loss": 0.662, "step": 23953 }, { "epoch": 1.6230096890033199, "grad_norm": 9.477133750915527, "learning_rate": 6.823464987336574e-05, "loss": 0.872, "step": 23954 }, { "epoch": 1.623077444271292, "grad_norm": 7.020091533660889, "learning_rate": 6.823328085426792e-05, "loss": 0.8394, "step": 23955 }, { "epoch": 1.6231451995392643, "grad_norm": 5.339729309082031, "learning_rate": 6.82319118351701e-05, "loss": 0.5337, "step": 23956 }, { "epoch": 1.6232129548072363, "grad_norm": 5.006267070770264, "learning_rate": 6.823054281607229e-05, "loss": 0.5921, "step": 23957 }, { "epoch": 1.6232807100752082, "grad_norm": 8.158183097839355, "learning_rate": 6.822917379697447e-05, "loss": 0.5946, "step": 23958 }, { "epoch": 1.6233484653431804, "grad_norm": 7.706267356872559, "learning_rate": 6.822780477787666e-05, "loss": 0.6804, "step": 23959 }, { "epoch": 1.6234162206111526, "grad_norm": 7.326105117797852, "learning_rate": 6.822643575877884e-05, "loss": 0.7275, "step": 23960 }, { "epoch": 1.6234839758791246, "grad_norm": 6.056076526641846, "learning_rate": 6.822506673968102e-05, "loss": 0.6059, "step": 23961 }, { "epoch": 1.6235517311470966, "grad_norm": 7.934529781341553, "learning_rate": 6.82236977205832e-05, "loss": 0.5525, "step": 23962 }, { "epoch": 1.6236194864150688, "grad_norm": 7.257071018218994, "learning_rate": 6.82223287014854e-05, "loss": 0.6066, "step": 23963 }, { "epoch": 1.623687241683041, "grad_norm": 5.552701950073242, "learning_rate": 6.822095968238757e-05, "loss": 0.7324, "step": 23964 }, { "epoch": 1.623754996951013, "grad_norm": 5.003308296203613, "learning_rate": 6.821959066328976e-05, "loss": 0.618, "step": 23965 }, { "epoch": 1.623822752218985, "grad_norm": 5.381232261657715, "learning_rate": 6.821822164419194e-05, "loss": 0.6103, "step": 23966 }, { "epoch": 1.6238905074869572, "grad_norm": 3.6289281845092773, "learning_rate": 6.821685262509412e-05, "loss": 0.4855, "step": 23967 }, { "epoch": 1.6239582627549292, "grad_norm": 5.033482074737549, "learning_rate": 6.821548360599631e-05, "loss": 0.6174, "step": 23968 }, { "epoch": 1.6240260180229011, "grad_norm": 6.573016166687012, "learning_rate": 6.821411458689849e-05, "loss": 0.5949, "step": 23969 }, { "epoch": 1.6240937732908733, "grad_norm": 5.70029878616333, "learning_rate": 6.821274556780067e-05, "loss": 0.6596, "step": 23970 }, { "epoch": 1.6241615285588455, "grad_norm": 6.770984172821045, "learning_rate": 6.821137654870285e-05, "loss": 0.7827, "step": 23971 }, { "epoch": 1.6242292838268175, "grad_norm": 5.851580619812012, "learning_rate": 6.821000752960504e-05, "loss": 0.7636, "step": 23972 }, { "epoch": 1.6242970390947895, "grad_norm": 5.834631443023682, "learning_rate": 6.820863851050722e-05, "loss": 0.6227, "step": 23973 }, { "epoch": 1.6243647943627617, "grad_norm": 6.433433532714844, "learning_rate": 6.82072694914094e-05, "loss": 0.6105, "step": 23974 }, { "epoch": 1.624432549630734, "grad_norm": 5.517070293426514, "learning_rate": 6.820590047231159e-05, "loss": 0.6743, "step": 23975 }, { "epoch": 1.624500304898706, "grad_norm": 6.64608097076416, "learning_rate": 6.820453145321377e-05, "loss": 0.9558, "step": 23976 }, { "epoch": 1.6245680601666779, "grad_norm": 7.508203506469727, "learning_rate": 6.820316243411596e-05, "loss": 0.7474, "step": 23977 }, { "epoch": 1.62463581543465, "grad_norm": 8.693068504333496, "learning_rate": 6.820179341501814e-05, "loss": 0.5846, "step": 23978 }, { "epoch": 1.6247035707026223, "grad_norm": 6.619268417358398, "learning_rate": 6.820042439592032e-05, "loss": 0.9708, "step": 23979 }, { "epoch": 1.6247713259705943, "grad_norm": 6.326210975646973, "learning_rate": 6.81990553768225e-05, "loss": 0.6042, "step": 23980 }, { "epoch": 1.6248390812385662, "grad_norm": 7.453950881958008, "learning_rate": 6.81976863577247e-05, "loss": 0.9514, "step": 23981 }, { "epoch": 1.6249068365065384, "grad_norm": 6.084409713745117, "learning_rate": 6.819631733862688e-05, "loss": 0.6656, "step": 23982 }, { "epoch": 1.6249745917745104, "grad_norm": 4.641823768615723, "learning_rate": 6.819494831952906e-05, "loss": 0.5732, "step": 23983 }, { "epoch": 1.6250423470424824, "grad_norm": 7.5051188468933105, "learning_rate": 6.819357930043125e-05, "loss": 0.8195, "step": 23984 }, { "epoch": 1.6251101023104546, "grad_norm": 4.759945869445801, "learning_rate": 6.819221028133343e-05, "loss": 0.6011, "step": 23985 }, { "epoch": 1.6251778575784268, "grad_norm": 5.789501190185547, "learning_rate": 6.819084126223561e-05, "loss": 0.5848, "step": 23986 }, { "epoch": 1.6252456128463988, "grad_norm": 6.495411396026611, "learning_rate": 6.81894722431378e-05, "loss": 0.5693, "step": 23987 }, { "epoch": 1.6253133681143708, "grad_norm": 5.757691383361816, "learning_rate": 6.818810322403998e-05, "loss": 0.732, "step": 23988 }, { "epoch": 1.625381123382343, "grad_norm": 6.853086948394775, "learning_rate": 6.818673420494216e-05, "loss": 0.6044, "step": 23989 }, { "epoch": 1.6254488786503152, "grad_norm": 6.234113693237305, "learning_rate": 6.818536518584434e-05, "loss": 0.6207, "step": 23990 }, { "epoch": 1.6255166339182872, "grad_norm": 5.709715843200684, "learning_rate": 6.818399616674654e-05, "loss": 0.8096, "step": 23991 }, { "epoch": 1.6255843891862591, "grad_norm": 5.681191921234131, "learning_rate": 6.818262714764872e-05, "loss": 0.6544, "step": 23992 }, { "epoch": 1.6256521444542313, "grad_norm": 6.556152820587158, "learning_rate": 6.81812581285509e-05, "loss": 0.6014, "step": 23993 }, { "epoch": 1.6257198997222035, "grad_norm": 4.415158271789551, "learning_rate": 6.817988910945308e-05, "loss": 0.6168, "step": 23994 }, { "epoch": 1.6257876549901755, "grad_norm": 6.368795394897461, "learning_rate": 6.817852009035527e-05, "loss": 0.8043, "step": 23995 }, { "epoch": 1.6258554102581475, "grad_norm": 5.426401615142822, "learning_rate": 6.817715107125745e-05, "loss": 0.972, "step": 23996 }, { "epoch": 1.6259231655261197, "grad_norm": 5.252473831176758, "learning_rate": 6.817578205215963e-05, "loss": 0.7379, "step": 23997 }, { "epoch": 1.6259909207940917, "grad_norm": 6.258996963500977, "learning_rate": 6.817441303306181e-05, "loss": 0.6302, "step": 23998 }, { "epoch": 1.6260586760620637, "grad_norm": 4.720067501068115, "learning_rate": 6.8173044013964e-05, "loss": 0.7502, "step": 23999 }, { "epoch": 1.6261264313300359, "grad_norm": 6.674588680267334, "learning_rate": 6.817167499486619e-05, "loss": 0.6719, "step": 24000 }, { "epoch": 1.626194186598008, "grad_norm": 5.499941349029541, "learning_rate": 6.817030597576837e-05, "loss": 0.5268, "step": 24001 }, { "epoch": 1.62626194186598, "grad_norm": 5.5910725593566895, "learning_rate": 6.816893695667055e-05, "loss": 0.567, "step": 24002 }, { "epoch": 1.626329697133952, "grad_norm": 5.141921043395996, "learning_rate": 6.816756793757273e-05, "loss": 0.558, "step": 24003 }, { "epoch": 1.6263974524019242, "grad_norm": 4.82295036315918, "learning_rate": 6.816619891847491e-05, "loss": 0.7217, "step": 24004 }, { "epoch": 1.6264652076698964, "grad_norm": 5.900907516479492, "learning_rate": 6.81648298993771e-05, "loss": 0.6782, "step": 24005 }, { "epoch": 1.6265329629378684, "grad_norm": 6.2278242111206055, "learning_rate": 6.816346088027928e-05, "loss": 0.9371, "step": 24006 }, { "epoch": 1.6266007182058404, "grad_norm": 4.680534839630127, "learning_rate": 6.816209186118146e-05, "loss": 0.6838, "step": 24007 }, { "epoch": 1.6266684734738126, "grad_norm": 6.102897644042969, "learning_rate": 6.816072284208365e-05, "loss": 0.5592, "step": 24008 }, { "epoch": 1.6267362287417848, "grad_norm": 5.1831231117248535, "learning_rate": 6.815935382298584e-05, "loss": 0.5745, "step": 24009 }, { "epoch": 1.6268039840097568, "grad_norm": 4.923707485198975, "learning_rate": 6.815798480388802e-05, "loss": 0.6388, "step": 24010 }, { "epoch": 1.6268717392777288, "grad_norm": 6.223697185516357, "learning_rate": 6.81566157847902e-05, "loss": 0.8923, "step": 24011 }, { "epoch": 1.626939494545701, "grad_norm": 5.655559062957764, "learning_rate": 6.815524676569238e-05, "loss": 0.725, "step": 24012 }, { "epoch": 1.6270072498136732, "grad_norm": 10.72536563873291, "learning_rate": 6.815387774659456e-05, "loss": 0.6875, "step": 24013 }, { "epoch": 1.627075005081645, "grad_norm": 6.27579402923584, "learning_rate": 6.815250872749675e-05, "loss": 0.6393, "step": 24014 }, { "epoch": 1.6271427603496171, "grad_norm": 6.701814651489258, "learning_rate": 6.815113970839893e-05, "loss": 0.5809, "step": 24015 }, { "epoch": 1.6272105156175893, "grad_norm": 5.5416765213012695, "learning_rate": 6.814977068930112e-05, "loss": 0.6562, "step": 24016 }, { "epoch": 1.6272782708855613, "grad_norm": 6.73140287399292, "learning_rate": 6.81484016702033e-05, "loss": 0.6259, "step": 24017 }, { "epoch": 1.6273460261535333, "grad_norm": 4.827282905578613, "learning_rate": 6.814703265110549e-05, "loss": 0.6303, "step": 24018 }, { "epoch": 1.6274137814215055, "grad_norm": 5.525455474853516, "learning_rate": 6.814566363200767e-05, "loss": 0.7008, "step": 24019 }, { "epoch": 1.6274815366894777, "grad_norm": 6.778247833251953, "learning_rate": 6.814429461290985e-05, "loss": 0.6958, "step": 24020 }, { "epoch": 1.6275492919574497, "grad_norm": 12.71335506439209, "learning_rate": 6.814292559381203e-05, "loss": 0.7936, "step": 24021 }, { "epoch": 1.6276170472254217, "grad_norm": 7.772406101226807, "learning_rate": 6.814155657471421e-05, "loss": 0.6619, "step": 24022 }, { "epoch": 1.6276848024933939, "grad_norm": 4.786691188812256, "learning_rate": 6.81401875556164e-05, "loss": 0.673, "step": 24023 }, { "epoch": 1.627752557761366, "grad_norm": 7.698526859283447, "learning_rate": 6.813881853651858e-05, "loss": 0.8391, "step": 24024 }, { "epoch": 1.627820313029338, "grad_norm": 5.41485071182251, "learning_rate": 6.813744951742077e-05, "loss": 0.597, "step": 24025 }, { "epoch": 1.62788806829731, "grad_norm": 4.641302585601807, "learning_rate": 6.813608049832295e-05, "loss": 0.8424, "step": 24026 }, { "epoch": 1.6279558235652822, "grad_norm": 5.999403476715088, "learning_rate": 6.813471147922514e-05, "loss": 0.7272, "step": 24027 }, { "epoch": 1.6280235788332544, "grad_norm": 6.078277587890625, "learning_rate": 6.813334246012732e-05, "loss": 0.5378, "step": 24028 }, { "epoch": 1.6280913341012264, "grad_norm": 7.477430820465088, "learning_rate": 6.81319734410295e-05, "loss": 0.5105, "step": 24029 }, { "epoch": 1.6281590893691984, "grad_norm": 6.577996253967285, "learning_rate": 6.81306044219317e-05, "loss": 0.7035, "step": 24030 }, { "epoch": 1.6282268446371706, "grad_norm": 6.215611934661865, "learning_rate": 6.812923540283387e-05, "loss": 0.5899, "step": 24031 }, { "epoch": 1.6282945999051426, "grad_norm": 6.500051498413086, "learning_rate": 6.812786638373605e-05, "loss": 0.7206, "step": 24032 }, { "epoch": 1.6283623551731146, "grad_norm": 6.127330780029297, "learning_rate": 6.812649736463825e-05, "loss": 0.6388, "step": 24033 }, { "epoch": 1.6284301104410868, "grad_norm": 5.682804584503174, "learning_rate": 6.812512834554043e-05, "loss": 0.8058, "step": 24034 }, { "epoch": 1.628497865709059, "grad_norm": 6.487948894500732, "learning_rate": 6.812375932644261e-05, "loss": 1.022, "step": 24035 }, { "epoch": 1.628565620977031, "grad_norm": 6.872677803039551, "learning_rate": 6.812239030734479e-05, "loss": 0.5568, "step": 24036 }, { "epoch": 1.628633376245003, "grad_norm": 4.522365093231201, "learning_rate": 6.812102128824698e-05, "loss": 0.5621, "step": 24037 }, { "epoch": 1.6287011315129751, "grad_norm": 5.13101863861084, "learning_rate": 6.811965226914916e-05, "loss": 0.6355, "step": 24038 }, { "epoch": 1.6287688867809473, "grad_norm": 5.592864513397217, "learning_rate": 6.811828325005134e-05, "loss": 0.5614, "step": 24039 }, { "epoch": 1.6288366420489193, "grad_norm": 4.8060173988342285, "learning_rate": 6.811691423095352e-05, "loss": 0.6949, "step": 24040 }, { "epoch": 1.6289043973168913, "grad_norm": 5.3777265548706055, "learning_rate": 6.811554521185572e-05, "loss": 0.6474, "step": 24041 }, { "epoch": 1.6289721525848635, "grad_norm": 5.445167064666748, "learning_rate": 6.81141761927579e-05, "loss": 0.6537, "step": 24042 }, { "epoch": 1.6290399078528357, "grad_norm": 5.5675883293151855, "learning_rate": 6.811280717366008e-05, "loss": 0.6794, "step": 24043 }, { "epoch": 1.6291076631208077, "grad_norm": 5.372957229614258, "learning_rate": 6.811143815456226e-05, "loss": 0.6511, "step": 24044 }, { "epoch": 1.6291754183887797, "grad_norm": 4.630867958068848, "learning_rate": 6.811006913546444e-05, "loss": 0.5095, "step": 24045 }, { "epoch": 1.6292431736567519, "grad_norm": 5.750266075134277, "learning_rate": 6.810870011636663e-05, "loss": 0.7623, "step": 24046 }, { "epoch": 1.6293109289247238, "grad_norm": 5.9110107421875, "learning_rate": 6.810733109726881e-05, "loss": 0.8018, "step": 24047 }, { "epoch": 1.6293786841926958, "grad_norm": 5.639161109924316, "learning_rate": 6.8105962078171e-05, "loss": 0.5125, "step": 24048 }, { "epoch": 1.629446439460668, "grad_norm": 7.517844200134277, "learning_rate": 6.810459305907317e-05, "loss": 0.8172, "step": 24049 }, { "epoch": 1.6295141947286402, "grad_norm": 7.77977991104126, "learning_rate": 6.810322403997537e-05, "loss": 0.656, "step": 24050 }, { "epoch": 1.6295819499966122, "grad_norm": 5.615512847900391, "learning_rate": 6.810185502087755e-05, "loss": 0.7471, "step": 24051 }, { "epoch": 1.6296497052645842, "grad_norm": 6.471240043640137, "learning_rate": 6.810048600177973e-05, "loss": 0.8015, "step": 24052 }, { "epoch": 1.6297174605325564, "grad_norm": 8.280376434326172, "learning_rate": 6.809911698268191e-05, "loss": 0.811, "step": 24053 }, { "epoch": 1.6297852158005286, "grad_norm": 6.702836513519287, "learning_rate": 6.809774796358409e-05, "loss": 0.5352, "step": 24054 }, { "epoch": 1.6298529710685006, "grad_norm": 9.252845764160156, "learning_rate": 6.809637894448628e-05, "loss": 0.7251, "step": 24055 }, { "epoch": 1.6299207263364726, "grad_norm": 8.862089157104492, "learning_rate": 6.809500992538846e-05, "loss": 0.7688, "step": 24056 }, { "epoch": 1.6299884816044448, "grad_norm": 5.008428573608398, "learning_rate": 6.809364090629064e-05, "loss": 0.6373, "step": 24057 }, { "epoch": 1.630056236872417, "grad_norm": 7.488552093505859, "learning_rate": 6.809227188719282e-05, "loss": 0.5574, "step": 24058 }, { "epoch": 1.630123992140389, "grad_norm": 6.2802581787109375, "learning_rate": 6.8090902868095e-05, "loss": 0.8074, "step": 24059 }, { "epoch": 1.630191747408361, "grad_norm": 6.18681001663208, "learning_rate": 6.80895338489972e-05, "loss": 0.6223, "step": 24060 }, { "epoch": 1.6302595026763331, "grad_norm": 6.765928268432617, "learning_rate": 6.808816482989938e-05, "loss": 0.675, "step": 24061 }, { "epoch": 1.6303272579443053, "grad_norm": 10.19306755065918, "learning_rate": 6.808679581080156e-05, "loss": 0.7786, "step": 24062 }, { "epoch": 1.630395013212277, "grad_norm": 7.706125259399414, "learning_rate": 6.808542679170374e-05, "loss": 0.7438, "step": 24063 }, { "epoch": 1.6304627684802493, "grad_norm": 4.817500591278076, "learning_rate": 6.808405777260593e-05, "loss": 0.5175, "step": 24064 }, { "epoch": 1.6305305237482215, "grad_norm": 4.860335826873779, "learning_rate": 6.808268875350811e-05, "loss": 0.796, "step": 24065 }, { "epoch": 1.6305982790161935, "grad_norm": 5.196244716644287, "learning_rate": 6.80813197344103e-05, "loss": 0.5888, "step": 24066 }, { "epoch": 1.6306660342841655, "grad_norm": 5.5659942626953125, "learning_rate": 6.807995071531248e-05, "loss": 0.7218, "step": 24067 }, { "epoch": 1.6307337895521377, "grad_norm": 6.326866626739502, "learning_rate": 6.807858169621466e-05, "loss": 0.705, "step": 24068 }, { "epoch": 1.6308015448201099, "grad_norm": 11.754804611206055, "learning_rate": 6.807721267711685e-05, "loss": 0.635, "step": 24069 }, { "epoch": 1.6308693000880818, "grad_norm": 9.583431243896484, "learning_rate": 6.807584365801903e-05, "loss": 0.5501, "step": 24070 }, { "epoch": 1.6309370553560538, "grad_norm": 6.256405830383301, "learning_rate": 6.807447463892121e-05, "loss": 0.5606, "step": 24071 }, { "epoch": 1.631004810624026, "grad_norm": 5.633983135223389, "learning_rate": 6.807310561982339e-05, "loss": 0.5971, "step": 24072 }, { "epoch": 1.6310725658919982, "grad_norm": 5.2622480392456055, "learning_rate": 6.807173660072558e-05, "loss": 0.5246, "step": 24073 }, { "epoch": 1.6311403211599702, "grad_norm": 6.029838562011719, "learning_rate": 6.807036758162776e-05, "loss": 0.6845, "step": 24074 }, { "epoch": 1.6312080764279422, "grad_norm": 7.418111801147461, "learning_rate": 6.806899856252994e-05, "loss": 0.9011, "step": 24075 }, { "epoch": 1.6312758316959144, "grad_norm": 5.57143497467041, "learning_rate": 6.806762954343213e-05, "loss": 0.6457, "step": 24076 }, { "epoch": 1.6313435869638866, "grad_norm": 6.3404622077941895, "learning_rate": 6.806626052433432e-05, "loss": 0.7826, "step": 24077 }, { "epoch": 1.6314113422318586, "grad_norm": 7.354036331176758, "learning_rate": 6.80648915052365e-05, "loss": 1.0426, "step": 24078 }, { "epoch": 1.6314790974998306, "grad_norm": 5.7276763916015625, "learning_rate": 6.806352248613868e-05, "loss": 0.6764, "step": 24079 }, { "epoch": 1.6315468527678028, "grad_norm": 8.023699760437012, "learning_rate": 6.806215346704087e-05, "loss": 0.6964, "step": 24080 }, { "epoch": 1.6316146080357747, "grad_norm": 5.219616889953613, "learning_rate": 6.806078444794305e-05, "loss": 0.7172, "step": 24081 }, { "epoch": 1.6316823633037467, "grad_norm": 6.240829944610596, "learning_rate": 6.805941542884523e-05, "loss": 0.5057, "step": 24082 }, { "epoch": 1.631750118571719, "grad_norm": 5.241275787353516, "learning_rate": 6.805804640974743e-05, "loss": 0.5538, "step": 24083 }, { "epoch": 1.6318178738396911, "grad_norm": 4.521687984466553, "learning_rate": 6.805667739064961e-05, "loss": 0.6277, "step": 24084 }, { "epoch": 1.631885629107663, "grad_norm": 8.76430606842041, "learning_rate": 6.805530837155179e-05, "loss": 0.6032, "step": 24085 }, { "epoch": 1.631953384375635, "grad_norm": 6.455832004547119, "learning_rate": 6.805393935245397e-05, "loss": 0.6762, "step": 24086 }, { "epoch": 1.6320211396436073, "grad_norm": 5.9226460456848145, "learning_rate": 6.805257033335616e-05, "loss": 0.7357, "step": 24087 }, { "epoch": 1.6320888949115795, "grad_norm": 6.248168468475342, "learning_rate": 6.805120131425834e-05, "loss": 0.6062, "step": 24088 }, { "epoch": 1.6321566501795515, "grad_norm": 5.174620628356934, "learning_rate": 6.804983229516052e-05, "loss": 0.796, "step": 24089 }, { "epoch": 1.6322244054475235, "grad_norm": 6.029219627380371, "learning_rate": 6.80484632760627e-05, "loss": 1.0679, "step": 24090 }, { "epoch": 1.6322921607154957, "grad_norm": 7.157137393951416, "learning_rate": 6.804709425696488e-05, "loss": 0.5875, "step": 24091 }, { "epoch": 1.6323599159834679, "grad_norm": 6.587831974029541, "learning_rate": 6.804572523786708e-05, "loss": 0.4597, "step": 24092 }, { "epoch": 1.6324276712514398, "grad_norm": 6.74803352355957, "learning_rate": 6.804435621876926e-05, "loss": 0.5799, "step": 24093 }, { "epoch": 1.6324954265194118, "grad_norm": 6.8103132247924805, "learning_rate": 6.804298719967144e-05, "loss": 0.6729, "step": 24094 }, { "epoch": 1.632563181787384, "grad_norm": 6.149486064910889, "learning_rate": 6.804161818057362e-05, "loss": 0.6358, "step": 24095 }, { "epoch": 1.632630937055356, "grad_norm": 7.249516487121582, "learning_rate": 6.804024916147581e-05, "loss": 0.7142, "step": 24096 }, { "epoch": 1.632698692323328, "grad_norm": 5.139228820800781, "learning_rate": 6.8038880142378e-05, "loss": 0.5161, "step": 24097 }, { "epoch": 1.6327664475913002, "grad_norm": 7.024577617645264, "learning_rate": 6.803751112328017e-05, "loss": 0.5672, "step": 24098 }, { "epoch": 1.6328342028592724, "grad_norm": 6.652586936950684, "learning_rate": 6.803614210418235e-05, "loss": 0.9558, "step": 24099 }, { "epoch": 1.6329019581272444, "grad_norm": 4.935428142547607, "learning_rate": 6.803477308508453e-05, "loss": 0.6493, "step": 24100 }, { "epoch": 1.6329697133952163, "grad_norm": 4.074288845062256, "learning_rate": 6.803340406598673e-05, "loss": 0.4799, "step": 24101 }, { "epoch": 1.6330374686631886, "grad_norm": 5.5010457038879395, "learning_rate": 6.803203504688891e-05, "loss": 0.6387, "step": 24102 }, { "epoch": 1.6331052239311608, "grad_norm": 4.634259223937988, "learning_rate": 6.803066602779109e-05, "loss": 0.7498, "step": 24103 }, { "epoch": 1.6331729791991327, "grad_norm": 4.484640121459961, "learning_rate": 6.802929700869327e-05, "loss": 0.5057, "step": 24104 }, { "epoch": 1.6332407344671047, "grad_norm": 7.0988335609436035, "learning_rate": 6.802792798959546e-05, "loss": 0.8024, "step": 24105 }, { "epoch": 1.633308489735077, "grad_norm": 6.799429416656494, "learning_rate": 6.802655897049764e-05, "loss": 0.8922, "step": 24106 }, { "epoch": 1.6333762450030491, "grad_norm": 9.161711692810059, "learning_rate": 6.802518995139982e-05, "loss": 0.5583, "step": 24107 }, { "epoch": 1.633444000271021, "grad_norm": 7.58330774307251, "learning_rate": 6.8023820932302e-05, "loss": 0.5893, "step": 24108 }, { "epoch": 1.633511755538993, "grad_norm": 4.02595329284668, "learning_rate": 6.802245191320418e-05, "loss": 0.6532, "step": 24109 }, { "epoch": 1.6335795108069653, "grad_norm": 6.162059783935547, "learning_rate": 6.802108289410638e-05, "loss": 0.6873, "step": 24110 }, { "epoch": 1.6336472660749375, "grad_norm": 11.419020652770996, "learning_rate": 6.801971387500856e-05, "loss": 0.6285, "step": 24111 }, { "epoch": 1.6337150213429092, "grad_norm": 4.467072010040283, "learning_rate": 6.801834485591074e-05, "loss": 0.6031, "step": 24112 }, { "epoch": 1.6337827766108814, "grad_norm": 5.226381778717041, "learning_rate": 6.801697583681292e-05, "loss": 0.5378, "step": 24113 }, { "epoch": 1.6338505318788537, "grad_norm": 4.838660717010498, "learning_rate": 6.80156068177151e-05, "loss": 0.6396, "step": 24114 }, { "epoch": 1.6339182871468256, "grad_norm": 5.5012125968933105, "learning_rate": 6.80142377986173e-05, "loss": 0.7093, "step": 24115 }, { "epoch": 1.6339860424147976, "grad_norm": 4.640468597412109, "learning_rate": 6.801286877951947e-05, "loss": 0.4555, "step": 24116 }, { "epoch": 1.6340537976827698, "grad_norm": 6.30916690826416, "learning_rate": 6.801149976042165e-05, "loss": 1.052, "step": 24117 }, { "epoch": 1.634121552950742, "grad_norm": 4.963046073913574, "learning_rate": 6.801013074132384e-05, "loss": 0.7988, "step": 24118 }, { "epoch": 1.634189308218714, "grad_norm": 5.128486156463623, "learning_rate": 6.800876172222603e-05, "loss": 0.5746, "step": 24119 }, { "epoch": 1.634257063486686, "grad_norm": 8.567333221435547, "learning_rate": 6.800739270312821e-05, "loss": 0.7686, "step": 24120 }, { "epoch": 1.6343248187546582, "grad_norm": 6.26622200012207, "learning_rate": 6.800602368403039e-05, "loss": 0.7774, "step": 24121 }, { "epoch": 1.6343925740226304, "grad_norm": 4.721916675567627, "learning_rate": 6.800465466493257e-05, "loss": 0.6833, "step": 24122 }, { "epoch": 1.6344603292906024, "grad_norm": 8.423517227172852, "learning_rate": 6.800328564583476e-05, "loss": 0.7977, "step": 24123 }, { "epoch": 1.6345280845585743, "grad_norm": 8.291106224060059, "learning_rate": 6.800191662673694e-05, "loss": 0.5768, "step": 24124 }, { "epoch": 1.6345958398265465, "grad_norm": 8.155228614807129, "learning_rate": 6.800054760763912e-05, "loss": 0.8072, "step": 24125 }, { "epoch": 1.6346635950945188, "grad_norm": 5.015176296234131, "learning_rate": 6.799917858854132e-05, "loss": 0.6725, "step": 24126 }, { "epoch": 1.6347313503624907, "grad_norm": 7.453382968902588, "learning_rate": 6.79978095694435e-05, "loss": 0.6321, "step": 24127 }, { "epoch": 1.6347991056304627, "grad_norm": 5.344571113586426, "learning_rate": 6.799644055034568e-05, "loss": 0.7337, "step": 24128 }, { "epoch": 1.634866860898435, "grad_norm": 6.831218719482422, "learning_rate": 6.799507153124787e-05, "loss": 0.7238, "step": 24129 }, { "epoch": 1.634934616166407, "grad_norm": 4.112753868103027, "learning_rate": 6.799370251215005e-05, "loss": 0.5484, "step": 24130 }, { "epoch": 1.6350023714343789, "grad_norm": 5.594783306121826, "learning_rate": 6.799233349305223e-05, "loss": 0.6175, "step": 24131 }, { "epoch": 1.635070126702351, "grad_norm": 5.525727272033691, "learning_rate": 6.799096447395441e-05, "loss": 0.8044, "step": 24132 }, { "epoch": 1.6351378819703233, "grad_norm": 4.288642883300781, "learning_rate": 6.798959545485661e-05, "loss": 0.5375, "step": 24133 }, { "epoch": 1.6352056372382953, "grad_norm": 5.728660583496094, "learning_rate": 6.798822643575879e-05, "loss": 0.6759, "step": 24134 }, { "epoch": 1.6352733925062672, "grad_norm": 4.8501811027526855, "learning_rate": 6.798685741666097e-05, "loss": 0.6427, "step": 24135 }, { "epoch": 1.6353411477742394, "grad_norm": 5.315530776977539, "learning_rate": 6.798548839756315e-05, "loss": 0.5644, "step": 24136 }, { "epoch": 1.6354089030422116, "grad_norm": 4.787325382232666, "learning_rate": 6.798411937846533e-05, "loss": 0.5494, "step": 24137 }, { "epoch": 1.6354766583101836, "grad_norm": 6.549224853515625, "learning_rate": 6.798275035936752e-05, "loss": 0.7252, "step": 24138 }, { "epoch": 1.6355444135781556, "grad_norm": 5.952584743499756, "learning_rate": 6.79813813402697e-05, "loss": 0.8399, "step": 24139 }, { "epoch": 1.6356121688461278, "grad_norm": 5.021061420440674, "learning_rate": 6.798001232117188e-05, "loss": 0.6113, "step": 24140 }, { "epoch": 1.6356799241141, "grad_norm": 6.679224967956543, "learning_rate": 6.797864330207406e-05, "loss": 0.5736, "step": 24141 }, { "epoch": 1.635747679382072, "grad_norm": 6.521693706512451, "learning_rate": 6.797727428297626e-05, "loss": 0.6415, "step": 24142 }, { "epoch": 1.635815434650044, "grad_norm": 4.414869785308838, "learning_rate": 6.797590526387844e-05, "loss": 0.2866, "step": 24143 }, { "epoch": 1.6358831899180162, "grad_norm": 6.58344841003418, "learning_rate": 6.797453624478062e-05, "loss": 0.6608, "step": 24144 }, { "epoch": 1.6359509451859882, "grad_norm": 5.889130115509033, "learning_rate": 6.79731672256828e-05, "loss": 0.7077, "step": 24145 }, { "epoch": 1.6360187004539601, "grad_norm": 6.106472492218018, "learning_rate": 6.797179820658498e-05, "loss": 0.6398, "step": 24146 }, { "epoch": 1.6360864557219323, "grad_norm": 8.835082054138184, "learning_rate": 6.797042918748717e-05, "loss": 0.624, "step": 24147 }, { "epoch": 1.6361542109899045, "grad_norm": 5.716066360473633, "learning_rate": 6.796906016838935e-05, "loss": 0.7478, "step": 24148 }, { "epoch": 1.6362219662578765, "grad_norm": 4.854745864868164, "learning_rate": 6.796769114929153e-05, "loss": 0.6239, "step": 24149 }, { "epoch": 1.6362897215258485, "grad_norm": 7.935161113739014, "learning_rate": 6.796632213019371e-05, "loss": 0.5458, "step": 24150 }, { "epoch": 1.6363574767938207, "grad_norm": 5.933248043060303, "learning_rate": 6.796495311109591e-05, "loss": 0.5315, "step": 24151 }, { "epoch": 1.636425232061793, "grad_norm": 6.454056739807129, "learning_rate": 6.796358409199809e-05, "loss": 0.524, "step": 24152 }, { "epoch": 1.636492987329765, "grad_norm": 5.368408679962158, "learning_rate": 6.796221507290027e-05, "loss": 0.7365, "step": 24153 }, { "epoch": 1.6365607425977369, "grad_norm": 6.333790302276611, "learning_rate": 6.796084605380245e-05, "loss": 0.8045, "step": 24154 }, { "epoch": 1.636628497865709, "grad_norm": 6.892549514770508, "learning_rate": 6.795947703470463e-05, "loss": 0.8057, "step": 24155 }, { "epoch": 1.6366962531336813, "grad_norm": 4.66261100769043, "learning_rate": 6.795810801560682e-05, "loss": 0.5941, "step": 24156 }, { "epoch": 1.6367640084016533, "grad_norm": 7.2031779289245605, "learning_rate": 6.7956738996509e-05, "loss": 0.554, "step": 24157 }, { "epoch": 1.6368317636696252, "grad_norm": 8.632057189941406, "learning_rate": 6.795536997741118e-05, "loss": 0.6803, "step": 24158 }, { "epoch": 1.6368995189375974, "grad_norm": 5.224258899688721, "learning_rate": 6.795400095831336e-05, "loss": 0.7045, "step": 24159 }, { "epoch": 1.6369672742055696, "grad_norm": 5.105559825897217, "learning_rate": 6.795263193921556e-05, "loss": 0.7571, "step": 24160 }, { "epoch": 1.6370350294735414, "grad_norm": 5.365443706512451, "learning_rate": 6.795126292011774e-05, "loss": 0.6124, "step": 24161 }, { "epoch": 1.6371027847415136, "grad_norm": 6.53223180770874, "learning_rate": 6.794989390101992e-05, "loss": 0.7434, "step": 24162 }, { "epoch": 1.6371705400094858, "grad_norm": 5.018377304077148, "learning_rate": 6.79485248819221e-05, "loss": 0.6529, "step": 24163 }, { "epoch": 1.6372382952774578, "grad_norm": 6.031050205230713, "learning_rate": 6.794715586282428e-05, "loss": 0.73, "step": 24164 }, { "epoch": 1.6373060505454298, "grad_norm": 5.891262531280518, "learning_rate": 6.794578684372647e-05, "loss": 0.6776, "step": 24165 }, { "epoch": 1.637373805813402, "grad_norm": 6.380420684814453, "learning_rate": 6.794441782462865e-05, "loss": 0.5782, "step": 24166 }, { "epoch": 1.6374415610813742, "grad_norm": 4.2644877433776855, "learning_rate": 6.794304880553083e-05, "loss": 0.4868, "step": 24167 }, { "epoch": 1.6375093163493462, "grad_norm": 5.330821990966797, "learning_rate": 6.794167978643301e-05, "loss": 0.6328, "step": 24168 }, { "epoch": 1.6375770716173181, "grad_norm": 5.32259464263916, "learning_rate": 6.794031076733521e-05, "loss": 0.7069, "step": 24169 }, { "epoch": 1.6376448268852903, "grad_norm": 5.1258721351623535, "learning_rate": 6.793894174823739e-05, "loss": 0.6583, "step": 24170 }, { "epoch": 1.6377125821532625, "grad_norm": 4.144199371337891, "learning_rate": 6.793757272913957e-05, "loss": 0.6443, "step": 24171 }, { "epoch": 1.6377803374212345, "grad_norm": 6.666686534881592, "learning_rate": 6.793620371004176e-05, "loss": 0.664, "step": 24172 }, { "epoch": 1.6378480926892065, "grad_norm": 7.890723705291748, "learning_rate": 6.793483469094394e-05, "loss": 0.8551, "step": 24173 }, { "epoch": 1.6379158479571787, "grad_norm": 5.384733200073242, "learning_rate": 6.793346567184612e-05, "loss": 0.6801, "step": 24174 }, { "epoch": 1.637983603225151, "grad_norm": 7.035154819488525, "learning_rate": 6.793209665274832e-05, "loss": 0.6509, "step": 24175 }, { "epoch": 1.6380513584931229, "grad_norm": 4.173189640045166, "learning_rate": 6.79307276336505e-05, "loss": 0.5843, "step": 24176 }, { "epoch": 1.6381191137610949, "grad_norm": 6.35711145401001, "learning_rate": 6.792935861455268e-05, "loss": 0.6255, "step": 24177 }, { "epoch": 1.638186869029067, "grad_norm": 5.236429691314697, "learning_rate": 6.792798959545486e-05, "loss": 0.7503, "step": 24178 }, { "epoch": 1.638254624297039, "grad_norm": 7.117665767669678, "learning_rate": 6.792662057635705e-05, "loss": 0.7084, "step": 24179 }, { "epoch": 1.638322379565011, "grad_norm": 6.502695083618164, "learning_rate": 6.792525155725923e-05, "loss": 0.634, "step": 24180 }, { "epoch": 1.6383901348329832, "grad_norm": 5.582119464874268, "learning_rate": 6.792388253816141e-05, "loss": 0.6052, "step": 24181 }, { "epoch": 1.6384578901009554, "grad_norm": 5.496036529541016, "learning_rate": 6.79225135190636e-05, "loss": 0.6879, "step": 24182 }, { "epoch": 1.6385256453689274, "grad_norm": 6.9800286293029785, "learning_rate": 6.792114449996579e-05, "loss": 0.5618, "step": 24183 }, { "epoch": 1.6385934006368994, "grad_norm": 5.580502033233643, "learning_rate": 6.791977548086797e-05, "loss": 0.8404, "step": 24184 }, { "epoch": 1.6386611559048716, "grad_norm": 6.46726655960083, "learning_rate": 6.791840646177015e-05, "loss": 0.6244, "step": 24185 }, { "epoch": 1.6387289111728438, "grad_norm": 7.373898983001709, "learning_rate": 6.791703744267233e-05, "loss": 0.6123, "step": 24186 }, { "epoch": 1.6387966664408158, "grad_norm": 5.142141819000244, "learning_rate": 6.791566842357451e-05, "loss": 0.6282, "step": 24187 }, { "epoch": 1.6388644217087878, "grad_norm": 4.569967746734619, "learning_rate": 6.79142994044767e-05, "loss": 0.5959, "step": 24188 }, { "epoch": 1.63893217697676, "grad_norm": 6.718389511108398, "learning_rate": 6.791293038537888e-05, "loss": 0.8419, "step": 24189 }, { "epoch": 1.6389999322447322, "grad_norm": 8.094079971313477, "learning_rate": 6.791156136628106e-05, "loss": 0.8344, "step": 24190 }, { "epoch": 1.6390676875127042, "grad_norm": 6.398443698883057, "learning_rate": 6.791019234718324e-05, "loss": 0.7357, "step": 24191 }, { "epoch": 1.6391354427806761, "grad_norm": 5.9509711265563965, "learning_rate": 6.790882332808542e-05, "loss": 0.8463, "step": 24192 }, { "epoch": 1.6392031980486483, "grad_norm": 4.9299211502075195, "learning_rate": 6.790745430898762e-05, "loss": 0.7021, "step": 24193 }, { "epoch": 1.6392709533166203, "grad_norm": 6.04346227645874, "learning_rate": 6.79060852898898e-05, "loss": 0.7183, "step": 24194 }, { "epoch": 1.6393387085845923, "grad_norm": 5.788272857666016, "learning_rate": 6.790471627079198e-05, "loss": 0.663, "step": 24195 }, { "epoch": 1.6394064638525645, "grad_norm": 5.105538845062256, "learning_rate": 6.790334725169416e-05, "loss": 0.5911, "step": 24196 }, { "epoch": 1.6394742191205367, "grad_norm": 5.058742523193359, "learning_rate": 6.790197823259635e-05, "loss": 0.5294, "step": 24197 }, { "epoch": 1.6395419743885087, "grad_norm": 5.312749862670898, "learning_rate": 6.790060921349853e-05, "loss": 0.5479, "step": 24198 }, { "epoch": 1.6396097296564807, "grad_norm": 5.735808849334717, "learning_rate": 6.789924019440071e-05, "loss": 0.8587, "step": 24199 }, { "epoch": 1.6396774849244529, "grad_norm": 4.144581317901611, "learning_rate": 6.78978711753029e-05, "loss": 0.5546, "step": 24200 }, { "epoch": 1.639745240192425, "grad_norm": 4.176133632659912, "learning_rate": 6.789650215620507e-05, "loss": 0.597, "step": 24201 }, { "epoch": 1.639812995460397, "grad_norm": 4.637962341308594, "learning_rate": 6.789513313710727e-05, "loss": 0.67, "step": 24202 }, { "epoch": 1.639880750728369, "grad_norm": 4.6249680519104, "learning_rate": 6.789376411800945e-05, "loss": 0.704, "step": 24203 }, { "epoch": 1.6399485059963412, "grad_norm": 8.267187118530273, "learning_rate": 6.789239509891163e-05, "loss": 0.6646, "step": 24204 }, { "epoch": 1.6400162612643134, "grad_norm": 5.139389514923096, "learning_rate": 6.789102607981381e-05, "loss": 0.7056, "step": 24205 }, { "epoch": 1.6400840165322854, "grad_norm": 5.090887069702148, "learning_rate": 6.7889657060716e-05, "loss": 0.5437, "step": 24206 }, { "epoch": 1.6401517718002574, "grad_norm": 4.637491226196289, "learning_rate": 6.788828804161818e-05, "loss": 0.6657, "step": 24207 }, { "epoch": 1.6402195270682296, "grad_norm": 5.22127103805542, "learning_rate": 6.788691902252036e-05, "loss": 0.6629, "step": 24208 }, { "epoch": 1.6402872823362018, "grad_norm": 7.03727388381958, "learning_rate": 6.788555000342254e-05, "loss": 0.8242, "step": 24209 }, { "epoch": 1.6403550376041736, "grad_norm": 4.818588733673096, "learning_rate": 6.788418098432472e-05, "loss": 0.6649, "step": 24210 }, { "epoch": 1.6404227928721458, "grad_norm": 5.017003536224365, "learning_rate": 6.788281196522692e-05, "loss": 0.6123, "step": 24211 }, { "epoch": 1.640490548140118, "grad_norm": 7.082085609436035, "learning_rate": 6.78814429461291e-05, "loss": 0.7929, "step": 24212 }, { "epoch": 1.64055830340809, "grad_norm": 4.122866153717041, "learning_rate": 6.788007392703128e-05, "loss": 0.5013, "step": 24213 }, { "epoch": 1.640626058676062, "grad_norm": 5.980356216430664, "learning_rate": 6.787870490793346e-05, "loss": 0.5808, "step": 24214 }, { "epoch": 1.6406938139440341, "grad_norm": 5.507762908935547, "learning_rate": 6.787733588883565e-05, "loss": 0.7642, "step": 24215 }, { "epoch": 1.6407615692120063, "grad_norm": 5.553664207458496, "learning_rate": 6.787596686973783e-05, "loss": 0.5394, "step": 24216 }, { "epoch": 1.6408293244799783, "grad_norm": 10.505146980285645, "learning_rate": 6.787459785064001e-05, "loss": 0.673, "step": 24217 }, { "epoch": 1.6408970797479503, "grad_norm": 5.448019504547119, "learning_rate": 6.787322883154221e-05, "loss": 0.6102, "step": 24218 }, { "epoch": 1.6409648350159225, "grad_norm": 6.489044189453125, "learning_rate": 6.787185981244439e-05, "loss": 0.8788, "step": 24219 }, { "epoch": 1.6410325902838947, "grad_norm": 6.777378559112549, "learning_rate": 6.787049079334657e-05, "loss": 0.5816, "step": 24220 }, { "epoch": 1.6411003455518667, "grad_norm": 5.945206165313721, "learning_rate": 6.786912177424876e-05, "loss": 0.495, "step": 24221 }, { "epoch": 1.6411681008198387, "grad_norm": 9.089851379394531, "learning_rate": 6.786775275515094e-05, "loss": 0.7618, "step": 24222 }, { "epoch": 1.6412358560878109, "grad_norm": 5.507379055023193, "learning_rate": 6.786638373605312e-05, "loss": 0.582, "step": 24223 }, { "epoch": 1.641303611355783, "grad_norm": 6.640849590301514, "learning_rate": 6.78650147169553e-05, "loss": 0.736, "step": 24224 }, { "epoch": 1.641371366623755, "grad_norm": 6.408087730407715, "learning_rate": 6.78636456978575e-05, "loss": 0.8408, "step": 24225 }, { "epoch": 1.641439121891727, "grad_norm": 5.363539218902588, "learning_rate": 6.786227667875968e-05, "loss": 0.6526, "step": 24226 }, { "epoch": 1.6415068771596992, "grad_norm": 6.496897220611572, "learning_rate": 6.786090765966186e-05, "loss": 0.5694, "step": 24227 }, { "epoch": 1.6415746324276712, "grad_norm": 4.513779640197754, "learning_rate": 6.785953864056404e-05, "loss": 0.4704, "step": 24228 }, { "epoch": 1.6416423876956432, "grad_norm": 5.056511402130127, "learning_rate": 6.785816962146623e-05, "loss": 0.5191, "step": 24229 }, { "epoch": 1.6417101429636154, "grad_norm": 3.974031686782837, "learning_rate": 6.785680060236841e-05, "loss": 0.63, "step": 24230 }, { "epoch": 1.6417778982315876, "grad_norm": 6.047205448150635, "learning_rate": 6.785543158327059e-05, "loss": 0.5364, "step": 24231 }, { "epoch": 1.6418456534995596, "grad_norm": 6.220521926879883, "learning_rate": 6.785406256417277e-05, "loss": 0.669, "step": 24232 }, { "epoch": 1.6419134087675316, "grad_norm": 5.975975036621094, "learning_rate": 6.785269354507495e-05, "loss": 0.6181, "step": 24233 }, { "epoch": 1.6419811640355038, "grad_norm": 5.393496036529541, "learning_rate": 6.785132452597715e-05, "loss": 0.4907, "step": 24234 }, { "epoch": 1.642048919303476, "grad_norm": 4.472797393798828, "learning_rate": 6.784995550687933e-05, "loss": 0.5727, "step": 24235 }, { "epoch": 1.642116674571448, "grad_norm": 6.253902435302734, "learning_rate": 6.784858648778151e-05, "loss": 0.6869, "step": 24236 }, { "epoch": 1.64218442983942, "grad_norm": 7.87222957611084, "learning_rate": 6.784721746868369e-05, "loss": 0.5371, "step": 24237 }, { "epoch": 1.6422521851073921, "grad_norm": 4.861351490020752, "learning_rate": 6.784584844958588e-05, "loss": 0.5305, "step": 24238 }, { "epoch": 1.6423199403753643, "grad_norm": 5.576367378234863, "learning_rate": 6.784447943048806e-05, "loss": 0.7108, "step": 24239 }, { "epoch": 1.6423876956433363, "grad_norm": 6.302206039428711, "learning_rate": 6.784311041139024e-05, "loss": 0.6026, "step": 24240 }, { "epoch": 1.6424554509113083, "grad_norm": 6.220592498779297, "learning_rate": 6.784174139229242e-05, "loss": 0.722, "step": 24241 }, { "epoch": 1.6425232061792805, "grad_norm": 7.6559343338012695, "learning_rate": 6.78403723731946e-05, "loss": 0.7205, "step": 24242 }, { "epoch": 1.6425909614472525, "grad_norm": 5.881769180297852, "learning_rate": 6.78390033540968e-05, "loss": 0.6253, "step": 24243 }, { "epoch": 1.6426587167152245, "grad_norm": 6.244131088256836, "learning_rate": 6.783763433499898e-05, "loss": 0.551, "step": 24244 }, { "epoch": 1.6427264719831967, "grad_norm": 4.607573986053467, "learning_rate": 6.783626531590116e-05, "loss": 0.6028, "step": 24245 }, { "epoch": 1.6427942272511689, "grad_norm": 6.293932914733887, "learning_rate": 6.783489629680334e-05, "loss": 0.7329, "step": 24246 }, { "epoch": 1.6428619825191408, "grad_norm": 6.597748279571533, "learning_rate": 6.783352727770552e-05, "loss": 0.7233, "step": 24247 }, { "epoch": 1.6429297377871128, "grad_norm": 6.356225490570068, "learning_rate": 6.783215825860771e-05, "loss": 0.8375, "step": 24248 }, { "epoch": 1.642997493055085, "grad_norm": 6.935635089874268, "learning_rate": 6.78307892395099e-05, "loss": 0.6294, "step": 24249 }, { "epoch": 1.6430652483230572, "grad_norm": 5.559404373168945, "learning_rate": 6.782942022041207e-05, "loss": 0.6879, "step": 24250 }, { "epoch": 1.6431330035910292, "grad_norm": 6.295341968536377, "learning_rate": 6.782805120131425e-05, "loss": 0.7746, "step": 24251 }, { "epoch": 1.6432007588590012, "grad_norm": 4.830505847930908, "learning_rate": 6.782668218221645e-05, "loss": 0.6955, "step": 24252 }, { "epoch": 1.6432685141269734, "grad_norm": 6.680715560913086, "learning_rate": 6.782531316311863e-05, "loss": 0.7912, "step": 24253 }, { "epoch": 1.6433362693949456, "grad_norm": 7.827907085418701, "learning_rate": 6.782394414402081e-05, "loss": 0.8349, "step": 24254 }, { "epoch": 1.6434040246629176, "grad_norm": 5.651306629180908, "learning_rate": 6.782257512492299e-05, "loss": 0.6455, "step": 24255 }, { "epoch": 1.6434717799308896, "grad_norm": 5.18255090713501, "learning_rate": 6.782120610582517e-05, "loss": 0.8131, "step": 24256 }, { "epoch": 1.6435395351988618, "grad_norm": 5.603715419769287, "learning_rate": 6.781983708672736e-05, "loss": 0.6443, "step": 24257 }, { "epoch": 1.643607290466834, "grad_norm": 5.536622047424316, "learning_rate": 6.781846806762954e-05, "loss": 0.5644, "step": 24258 }, { "epoch": 1.6436750457348057, "grad_norm": 4.860998630523682, "learning_rate": 6.781709904853172e-05, "loss": 0.5635, "step": 24259 }, { "epoch": 1.643742801002778, "grad_norm": 4.787585258483887, "learning_rate": 6.78157300294339e-05, "loss": 0.6647, "step": 24260 }, { "epoch": 1.6438105562707501, "grad_norm": 6.469789505004883, "learning_rate": 6.78143610103361e-05, "loss": 0.4645, "step": 24261 }, { "epoch": 1.643878311538722, "grad_norm": 5.807828426361084, "learning_rate": 6.781299199123828e-05, "loss": 0.5775, "step": 24262 }, { "epoch": 1.643946066806694, "grad_norm": 5.633342742919922, "learning_rate": 6.781162297214046e-05, "loss": 0.6193, "step": 24263 }, { "epoch": 1.6440138220746663, "grad_norm": 10.108819007873535, "learning_rate": 6.781025395304265e-05, "loss": 0.6499, "step": 24264 }, { "epoch": 1.6440815773426385, "grad_norm": 8.979475975036621, "learning_rate": 6.780888493394483e-05, "loss": 0.8285, "step": 24265 }, { "epoch": 1.6441493326106105, "grad_norm": 5.745243072509766, "learning_rate": 6.780751591484701e-05, "loss": 0.494, "step": 24266 }, { "epoch": 1.6442170878785825, "grad_norm": 4.739820957183838, "learning_rate": 6.780614689574921e-05, "loss": 0.6374, "step": 24267 }, { "epoch": 1.6442848431465547, "grad_norm": 6.330533981323242, "learning_rate": 6.780477787665139e-05, "loss": 0.7026, "step": 24268 }, { "epoch": 1.6443525984145269, "grad_norm": 5.970046043395996, "learning_rate": 6.780340885755357e-05, "loss": 0.5476, "step": 24269 }, { "epoch": 1.6444203536824988, "grad_norm": 6.5760884284973145, "learning_rate": 6.780203983845575e-05, "loss": 0.6391, "step": 24270 }, { "epoch": 1.6444881089504708, "grad_norm": 5.119706153869629, "learning_rate": 6.780067081935794e-05, "loss": 0.677, "step": 24271 }, { "epoch": 1.644555864218443, "grad_norm": 4.547575950622559, "learning_rate": 6.779930180026012e-05, "loss": 0.5961, "step": 24272 }, { "epoch": 1.6446236194864152, "grad_norm": 5.272701263427734, "learning_rate": 6.77979327811623e-05, "loss": 0.7176, "step": 24273 }, { "epoch": 1.6446913747543872, "grad_norm": 6.390678405761719, "learning_rate": 6.779656376206448e-05, "loss": 0.6434, "step": 24274 }, { "epoch": 1.6447591300223592, "grad_norm": 6.06951904296875, "learning_rate": 6.779519474296668e-05, "loss": 0.9049, "step": 24275 }, { "epoch": 1.6448268852903314, "grad_norm": 4.334080219268799, "learning_rate": 6.779382572386886e-05, "loss": 0.512, "step": 24276 }, { "epoch": 1.6448946405583034, "grad_norm": 8.017261505126953, "learning_rate": 6.779245670477104e-05, "loss": 0.5549, "step": 24277 }, { "epoch": 1.6449623958262753, "grad_norm": 6.2129669189453125, "learning_rate": 6.779108768567322e-05, "loss": 0.751, "step": 24278 }, { "epoch": 1.6450301510942476, "grad_norm": 5.5928778648376465, "learning_rate": 6.77897186665754e-05, "loss": 0.5415, "step": 24279 }, { "epoch": 1.6450979063622198, "grad_norm": 8.923968315124512, "learning_rate": 6.778834964747759e-05, "loss": 0.6753, "step": 24280 }, { "epoch": 1.6451656616301917, "grad_norm": 6.647210121154785, "learning_rate": 6.778698062837977e-05, "loss": 0.5774, "step": 24281 }, { "epoch": 1.6452334168981637, "grad_norm": 5.204244613647461, "learning_rate": 6.778561160928195e-05, "loss": 0.6982, "step": 24282 }, { "epoch": 1.645301172166136, "grad_norm": 7.22610330581665, "learning_rate": 6.778424259018413e-05, "loss": 0.7336, "step": 24283 }, { "epoch": 1.6453689274341081, "grad_norm": 4.962253570556641, "learning_rate": 6.778287357108633e-05, "loss": 0.7742, "step": 24284 }, { "epoch": 1.64543668270208, "grad_norm": 6.0928802490234375, "learning_rate": 6.778150455198851e-05, "loss": 0.662, "step": 24285 }, { "epoch": 1.645504437970052, "grad_norm": 4.776394367218018, "learning_rate": 6.778013553289069e-05, "loss": 0.5622, "step": 24286 }, { "epoch": 1.6455721932380243, "grad_norm": 5.310511112213135, "learning_rate": 6.777876651379287e-05, "loss": 0.7068, "step": 24287 }, { "epoch": 1.6456399485059965, "grad_norm": 7.133009433746338, "learning_rate": 6.777739749469505e-05, "loss": 0.6611, "step": 24288 }, { "epoch": 1.6457077037739685, "grad_norm": 6.594620704650879, "learning_rate": 6.777602847559724e-05, "loss": 0.6355, "step": 24289 }, { "epoch": 1.6457754590419404, "grad_norm": 10.675470352172852, "learning_rate": 6.777465945649942e-05, "loss": 0.5075, "step": 24290 }, { "epoch": 1.6458432143099126, "grad_norm": 5.05479097366333, "learning_rate": 6.77732904374016e-05, "loss": 0.7321, "step": 24291 }, { "epoch": 1.6459109695778846, "grad_norm": 7.713277816772461, "learning_rate": 6.777192141830378e-05, "loss": 0.661, "step": 24292 }, { "epoch": 1.6459787248458566, "grad_norm": 4.849925518035889, "learning_rate": 6.777055239920598e-05, "loss": 0.7004, "step": 24293 }, { "epoch": 1.6460464801138288, "grad_norm": 7.701627254486084, "learning_rate": 6.776918338010816e-05, "loss": 0.5963, "step": 24294 }, { "epoch": 1.646114235381801, "grad_norm": 7.040931224822998, "learning_rate": 6.776781436101034e-05, "loss": 0.7039, "step": 24295 }, { "epoch": 1.646181990649773, "grad_norm": 5.114952087402344, "learning_rate": 6.776644534191252e-05, "loss": 0.65, "step": 24296 }, { "epoch": 1.646249745917745, "grad_norm": 6.780006408691406, "learning_rate": 6.77650763228147e-05, "loss": 0.7591, "step": 24297 }, { "epoch": 1.6463175011857172, "grad_norm": 5.041226387023926, "learning_rate": 6.776370730371689e-05, "loss": 0.6543, "step": 24298 }, { "epoch": 1.6463852564536894, "grad_norm": 6.3083295822143555, "learning_rate": 6.776233828461907e-05, "loss": 0.6376, "step": 24299 }, { "epoch": 1.6464530117216614, "grad_norm": 6.508799076080322, "learning_rate": 6.776096926552125e-05, "loss": 0.5442, "step": 24300 }, { "epoch": 1.6465207669896333, "grad_norm": 4.307370185852051, "learning_rate": 6.775960024642343e-05, "loss": 0.5261, "step": 24301 }, { "epoch": 1.6465885222576055, "grad_norm": 5.466468811035156, "learning_rate": 6.775823122732561e-05, "loss": 0.6281, "step": 24302 }, { "epoch": 1.6466562775255777, "grad_norm": 4.68580436706543, "learning_rate": 6.775686220822781e-05, "loss": 0.5248, "step": 24303 }, { "epoch": 1.6467240327935497, "grad_norm": 6.591990947723389, "learning_rate": 6.775549318912999e-05, "loss": 0.6979, "step": 24304 }, { "epoch": 1.6467917880615217, "grad_norm": 5.817078590393066, "learning_rate": 6.775412417003217e-05, "loss": 0.6628, "step": 24305 }, { "epoch": 1.646859543329494, "grad_norm": 6.484322547912598, "learning_rate": 6.775275515093435e-05, "loss": 0.6911, "step": 24306 }, { "epoch": 1.6469272985974661, "grad_norm": 4.945478916168213, "learning_rate": 6.775138613183654e-05, "loss": 0.7554, "step": 24307 }, { "epoch": 1.6469950538654379, "grad_norm": 5.687304973602295, "learning_rate": 6.775001711273872e-05, "loss": 0.7461, "step": 24308 }, { "epoch": 1.64706280913341, "grad_norm": 7.082693576812744, "learning_rate": 6.77486480936409e-05, "loss": 0.6445, "step": 24309 }, { "epoch": 1.6471305644013823, "grad_norm": 7.967297554016113, "learning_rate": 6.77472790745431e-05, "loss": 0.6129, "step": 24310 }, { "epoch": 1.6471983196693543, "grad_norm": 5.099714279174805, "learning_rate": 6.774591005544528e-05, "loss": 0.65, "step": 24311 }, { "epoch": 1.6472660749373262, "grad_norm": 7.267844200134277, "learning_rate": 6.774454103634746e-05, "loss": 0.5875, "step": 24312 }, { "epoch": 1.6473338302052984, "grad_norm": 4.902021884918213, "learning_rate": 6.774317201724965e-05, "loss": 0.5442, "step": 24313 }, { "epoch": 1.6474015854732706, "grad_norm": 6.218705654144287, "learning_rate": 6.774180299815183e-05, "loss": 0.8667, "step": 24314 }, { "epoch": 1.6474693407412426, "grad_norm": 4.4718122482299805, "learning_rate": 6.774043397905401e-05, "loss": 0.6507, "step": 24315 }, { "epoch": 1.6475370960092146, "grad_norm": 7.686657905578613, "learning_rate": 6.77390649599562e-05, "loss": 0.7591, "step": 24316 }, { "epoch": 1.6476048512771868, "grad_norm": 6.538692951202393, "learning_rate": 6.773769594085839e-05, "loss": 0.9303, "step": 24317 }, { "epoch": 1.647672606545159, "grad_norm": 6.610010623931885, "learning_rate": 6.773632692176057e-05, "loss": 0.6199, "step": 24318 }, { "epoch": 1.647740361813131, "grad_norm": 6.025900363922119, "learning_rate": 6.773495790266275e-05, "loss": 0.7115, "step": 24319 }, { "epoch": 1.647808117081103, "grad_norm": 6.423794746398926, "learning_rate": 6.773358888356493e-05, "loss": 0.5976, "step": 24320 }, { "epoch": 1.6478758723490752, "grad_norm": 5.231936454772949, "learning_rate": 6.773221986446712e-05, "loss": 0.9007, "step": 24321 }, { "epoch": 1.6479436276170474, "grad_norm": 4.85367488861084, "learning_rate": 6.77308508453693e-05, "loss": 0.5704, "step": 24322 }, { "epoch": 1.6480113828850194, "grad_norm": 7.679934501647949, "learning_rate": 6.772948182627148e-05, "loss": 0.5894, "step": 24323 }, { "epoch": 1.6480791381529913, "grad_norm": 6.42826509475708, "learning_rate": 6.772811280717366e-05, "loss": 0.6, "step": 24324 }, { "epoch": 1.6481468934209635, "grad_norm": 8.387645721435547, "learning_rate": 6.772674378807584e-05, "loss": 0.8285, "step": 24325 }, { "epoch": 1.6482146486889355, "grad_norm": 4.975339412689209, "learning_rate": 6.772537476897804e-05, "loss": 0.5709, "step": 24326 }, { "epoch": 1.6482824039569075, "grad_norm": 7.744299411773682, "learning_rate": 6.772400574988022e-05, "loss": 0.7091, "step": 24327 }, { "epoch": 1.6483501592248797, "grad_norm": 7.331718444824219, "learning_rate": 6.77226367307824e-05, "loss": 0.733, "step": 24328 }, { "epoch": 1.648417914492852, "grad_norm": 6.657279968261719, "learning_rate": 6.772126771168458e-05, "loss": 0.5488, "step": 24329 }, { "epoch": 1.648485669760824, "grad_norm": 5.142001628875732, "learning_rate": 6.771989869258677e-05, "loss": 0.5474, "step": 24330 }, { "epoch": 1.6485534250287959, "grad_norm": 8.318038940429688, "learning_rate": 6.771852967348895e-05, "loss": 0.5641, "step": 24331 }, { "epoch": 1.648621180296768, "grad_norm": 5.246788024902344, "learning_rate": 6.771716065439113e-05, "loss": 0.8232, "step": 24332 }, { "epoch": 1.6486889355647403, "grad_norm": 5.616459369659424, "learning_rate": 6.771579163529331e-05, "loss": 0.6939, "step": 24333 }, { "epoch": 1.6487566908327123, "grad_norm": 5.557784080505371, "learning_rate": 6.77144226161955e-05, "loss": 0.6083, "step": 24334 }, { "epoch": 1.6488244461006842, "grad_norm": 5.60261344909668, "learning_rate": 6.771305359709769e-05, "loss": 0.8302, "step": 24335 }, { "epoch": 1.6488922013686564, "grad_norm": 5.317370414733887, "learning_rate": 6.771168457799987e-05, "loss": 0.5393, "step": 24336 }, { "epoch": 1.6489599566366286, "grad_norm": 6.739059925079346, "learning_rate": 6.771031555890205e-05, "loss": 0.7184, "step": 24337 }, { "epoch": 1.6490277119046006, "grad_norm": 6.066035747528076, "learning_rate": 6.770894653980423e-05, "loss": 0.7621, "step": 24338 }, { "epoch": 1.6490954671725726, "grad_norm": 7.333264350891113, "learning_rate": 6.770757752070642e-05, "loss": 0.6409, "step": 24339 }, { "epoch": 1.6491632224405448, "grad_norm": 4.5347394943237305, "learning_rate": 6.77062085016086e-05, "loss": 0.5689, "step": 24340 }, { "epoch": 1.6492309777085168, "grad_norm": 10.720396995544434, "learning_rate": 6.770483948251078e-05, "loss": 0.5867, "step": 24341 }, { "epoch": 1.6492987329764888, "grad_norm": 6.178685665130615, "learning_rate": 6.770347046341296e-05, "loss": 0.835, "step": 24342 }, { "epoch": 1.649366488244461, "grad_norm": 6.256001949310303, "learning_rate": 6.770210144431514e-05, "loss": 0.7432, "step": 24343 }, { "epoch": 1.6494342435124332, "grad_norm": 5.342929363250732, "learning_rate": 6.770073242521734e-05, "loss": 0.661, "step": 24344 }, { "epoch": 1.6495019987804052, "grad_norm": 6.409214496612549, "learning_rate": 6.769936340611952e-05, "loss": 0.778, "step": 24345 }, { "epoch": 1.6495697540483771, "grad_norm": 4.1953125, "learning_rate": 6.76979943870217e-05, "loss": 0.7187, "step": 24346 }, { "epoch": 1.6496375093163493, "grad_norm": 8.151801109313965, "learning_rate": 6.769662536792388e-05, "loss": 0.8593, "step": 24347 }, { "epoch": 1.6497052645843215, "grad_norm": 5.232264995574951, "learning_rate": 6.769525634882606e-05, "loss": 0.8831, "step": 24348 }, { "epoch": 1.6497730198522935, "grad_norm": 6.759119510650635, "learning_rate": 6.769388732972825e-05, "loss": 0.6941, "step": 24349 }, { "epoch": 1.6498407751202655, "grad_norm": 4.775367736816406, "learning_rate": 6.769251831063043e-05, "loss": 0.7491, "step": 24350 }, { "epoch": 1.6499085303882377, "grad_norm": 5.923184871673584, "learning_rate": 6.769114929153261e-05, "loss": 0.7476, "step": 24351 }, { "epoch": 1.64997628565621, "grad_norm": 4.110563278198242, "learning_rate": 6.76897802724348e-05, "loss": 0.5614, "step": 24352 }, { "epoch": 1.6500440409241819, "grad_norm": 5.103874206542969, "learning_rate": 6.768841125333699e-05, "loss": 0.6384, "step": 24353 }, { "epoch": 1.6501117961921539, "grad_norm": 4.499089241027832, "learning_rate": 6.768704223423917e-05, "loss": 0.6061, "step": 24354 }, { "epoch": 1.650179551460126, "grad_norm": 6.463099956512451, "learning_rate": 6.768567321514135e-05, "loss": 0.832, "step": 24355 }, { "epoch": 1.6502473067280983, "grad_norm": 6.191483974456787, "learning_rate": 6.768430419604353e-05, "loss": 0.6586, "step": 24356 }, { "epoch": 1.65031506199607, "grad_norm": 4.705417156219482, "learning_rate": 6.768293517694572e-05, "loss": 0.7288, "step": 24357 }, { "epoch": 1.6503828172640422, "grad_norm": 4.977295398712158, "learning_rate": 6.76815661578479e-05, "loss": 0.5554, "step": 24358 }, { "epoch": 1.6504505725320144, "grad_norm": 6.465582370758057, "learning_rate": 6.768019713875008e-05, "loss": 0.6932, "step": 24359 }, { "epoch": 1.6505183277999864, "grad_norm": 8.984996795654297, "learning_rate": 6.767882811965228e-05, "loss": 0.6691, "step": 24360 }, { "epoch": 1.6505860830679584, "grad_norm": 4.540802001953125, "learning_rate": 6.767745910055446e-05, "loss": 0.5037, "step": 24361 }, { "epoch": 1.6506538383359306, "grad_norm": 6.547104358673096, "learning_rate": 6.767609008145664e-05, "loss": 0.725, "step": 24362 }, { "epoch": 1.6507215936039028, "grad_norm": 3.8877882957458496, "learning_rate": 6.767472106235883e-05, "loss": 0.4592, "step": 24363 }, { "epoch": 1.6507893488718748, "grad_norm": 6.326330184936523, "learning_rate": 6.767335204326101e-05, "loss": 0.9282, "step": 24364 }, { "epoch": 1.6508571041398468, "grad_norm": 6.171874523162842, "learning_rate": 6.767198302416319e-05, "loss": 0.6738, "step": 24365 }, { "epoch": 1.650924859407819, "grad_norm": 5.602095127105713, "learning_rate": 6.767061400506537e-05, "loss": 0.6334, "step": 24366 }, { "epoch": 1.6509926146757912, "grad_norm": 9.760573387145996, "learning_rate": 6.766924498596757e-05, "loss": 0.9031, "step": 24367 }, { "epoch": 1.6510603699437632, "grad_norm": 8.027615547180176, "learning_rate": 6.766787596686975e-05, "loss": 0.6517, "step": 24368 }, { "epoch": 1.6511281252117351, "grad_norm": 6.640181064605713, "learning_rate": 6.766650694777193e-05, "loss": 0.6378, "step": 24369 }, { "epoch": 1.6511958804797073, "grad_norm": 8.053338050842285, "learning_rate": 6.766513792867411e-05, "loss": 0.8603, "step": 24370 }, { "epoch": 1.6512636357476795, "grad_norm": 4.960137844085693, "learning_rate": 6.76637689095763e-05, "loss": 0.6322, "step": 24371 }, { "epoch": 1.6513313910156515, "grad_norm": 5.265758991241455, "learning_rate": 6.766239989047848e-05, "loss": 0.7536, "step": 24372 }, { "epoch": 1.6513991462836235, "grad_norm": 5.06497859954834, "learning_rate": 6.766103087138066e-05, "loss": 0.6735, "step": 24373 }, { "epoch": 1.6514669015515957, "grad_norm": 7.542830467224121, "learning_rate": 6.765966185228284e-05, "loss": 0.759, "step": 24374 }, { "epoch": 1.6515346568195677, "grad_norm": 5.251003265380859, "learning_rate": 6.765829283318502e-05, "loss": 0.6085, "step": 24375 }, { "epoch": 1.6516024120875397, "grad_norm": 5.040851593017578, "learning_rate": 6.765692381408722e-05, "loss": 0.7052, "step": 24376 }, { "epoch": 1.6516701673555119, "grad_norm": 7.217167377471924, "learning_rate": 6.76555547949894e-05, "loss": 0.8275, "step": 24377 }, { "epoch": 1.651737922623484, "grad_norm": 5.767691612243652, "learning_rate": 6.765418577589158e-05, "loss": 0.799, "step": 24378 }, { "epoch": 1.651805677891456, "grad_norm": 5.5566864013671875, "learning_rate": 6.765281675679376e-05, "loss": 0.4102, "step": 24379 }, { "epoch": 1.651873433159428, "grad_norm": 5.427443504333496, "learning_rate": 6.765144773769594e-05, "loss": 0.576, "step": 24380 }, { "epoch": 1.6519411884274002, "grad_norm": 5.664072513580322, "learning_rate": 6.765007871859813e-05, "loss": 0.9112, "step": 24381 }, { "epoch": 1.6520089436953724, "grad_norm": 5.0744452476501465, "learning_rate": 6.764870969950031e-05, "loss": 0.7229, "step": 24382 }, { "epoch": 1.6520766989633444, "grad_norm": 5.456031322479248, "learning_rate": 6.764734068040249e-05, "loss": 0.6899, "step": 24383 }, { "epoch": 1.6521444542313164, "grad_norm": 4.237999439239502, "learning_rate": 6.764597166130467e-05, "loss": 0.6705, "step": 24384 }, { "epoch": 1.6522122094992886, "grad_norm": 6.46606969833374, "learning_rate": 6.764460264220687e-05, "loss": 0.4739, "step": 24385 }, { "epoch": 1.6522799647672608, "grad_norm": 6.321880340576172, "learning_rate": 6.764323362310905e-05, "loss": 0.7203, "step": 24386 }, { "epoch": 1.6523477200352328, "grad_norm": 5.426930904388428, "learning_rate": 6.764186460401123e-05, "loss": 0.8461, "step": 24387 }, { "epoch": 1.6524154753032048, "grad_norm": 5.599241733551025, "learning_rate": 6.764049558491341e-05, "loss": 0.695, "step": 24388 }, { "epoch": 1.652483230571177, "grad_norm": 5.427164077758789, "learning_rate": 6.763912656581559e-05, "loss": 0.6803, "step": 24389 }, { "epoch": 1.652550985839149, "grad_norm": 5.186045169830322, "learning_rate": 6.763775754671778e-05, "loss": 0.7217, "step": 24390 }, { "epoch": 1.652618741107121, "grad_norm": 6.375591278076172, "learning_rate": 6.763638852761996e-05, "loss": 0.5617, "step": 24391 }, { "epoch": 1.6526864963750931, "grad_norm": 5.0230207443237305, "learning_rate": 6.763501950852214e-05, "loss": 0.614, "step": 24392 }, { "epoch": 1.6527542516430653, "grad_norm": 5.186682224273682, "learning_rate": 6.763365048942432e-05, "loss": 0.5616, "step": 24393 }, { "epoch": 1.6528220069110373, "grad_norm": 9.034564018249512, "learning_rate": 6.763228147032652e-05, "loss": 0.616, "step": 24394 }, { "epoch": 1.6528897621790093, "grad_norm": 6.9548845291137695, "learning_rate": 6.76309124512287e-05, "loss": 0.8689, "step": 24395 }, { "epoch": 1.6529575174469815, "grad_norm": 6.371827602386475, "learning_rate": 6.762954343213088e-05, "loss": 0.6421, "step": 24396 }, { "epoch": 1.6530252727149537, "grad_norm": 5.694929599761963, "learning_rate": 6.762817441303306e-05, "loss": 0.6464, "step": 24397 }, { "epoch": 1.6530930279829257, "grad_norm": 5.560603618621826, "learning_rate": 6.762680539393524e-05, "loss": 0.6407, "step": 24398 }, { "epoch": 1.6531607832508977, "grad_norm": 4.594111442565918, "learning_rate": 6.762543637483743e-05, "loss": 0.5502, "step": 24399 }, { "epoch": 1.6532285385188699, "grad_norm": 5.498208045959473, "learning_rate": 6.762406735573961e-05, "loss": 0.6123, "step": 24400 }, { "epoch": 1.653296293786842, "grad_norm": 5.903857707977295, "learning_rate": 6.762269833664179e-05, "loss": 0.5573, "step": 24401 }, { "epoch": 1.653364049054814, "grad_norm": 5.922566890716553, "learning_rate": 6.762132931754397e-05, "loss": 0.5568, "step": 24402 }, { "epoch": 1.653431804322786, "grad_norm": 4.86178731918335, "learning_rate": 6.761996029844617e-05, "loss": 0.6432, "step": 24403 }, { "epoch": 1.6534995595907582, "grad_norm": 3.93855357170105, "learning_rate": 6.761859127934835e-05, "loss": 0.609, "step": 24404 }, { "epoch": 1.6535673148587304, "grad_norm": 6.262641429901123, "learning_rate": 6.761722226025053e-05, "loss": 0.7842, "step": 24405 }, { "epoch": 1.6536350701267022, "grad_norm": 5.708234786987305, "learning_rate": 6.761585324115272e-05, "loss": 0.7151, "step": 24406 }, { "epoch": 1.6537028253946744, "grad_norm": 4.011505603790283, "learning_rate": 6.76144842220549e-05, "loss": 0.7077, "step": 24407 }, { "epoch": 1.6537705806626466, "grad_norm": 6.191923141479492, "learning_rate": 6.761311520295708e-05, "loss": 0.8793, "step": 24408 }, { "epoch": 1.6538383359306186, "grad_norm": 7.275065898895264, "learning_rate": 6.761174618385928e-05, "loss": 0.78, "step": 24409 }, { "epoch": 1.6539060911985906, "grad_norm": 5.661311149597168, "learning_rate": 6.761037716476146e-05, "loss": 0.6582, "step": 24410 }, { "epoch": 1.6539738464665628, "grad_norm": 6.59944486618042, "learning_rate": 6.760900814566364e-05, "loss": 0.5936, "step": 24411 }, { "epoch": 1.654041601734535, "grad_norm": 6.010465621948242, "learning_rate": 6.760763912656582e-05, "loss": 0.8633, "step": 24412 }, { "epoch": 1.654109357002507, "grad_norm": 6.3292765617370605, "learning_rate": 6.760627010746801e-05, "loss": 0.647, "step": 24413 }, { "epoch": 1.654177112270479, "grad_norm": 4.425679683685303, "learning_rate": 6.760490108837019e-05, "loss": 0.6146, "step": 24414 }, { "epoch": 1.6542448675384511, "grad_norm": 4.763862133026123, "learning_rate": 6.760353206927237e-05, "loss": 0.5668, "step": 24415 }, { "epoch": 1.6543126228064233, "grad_norm": 4.682102680206299, "learning_rate": 6.760216305017455e-05, "loss": 0.512, "step": 24416 }, { "epoch": 1.6543803780743953, "grad_norm": 6.45216178894043, "learning_rate": 6.760079403107675e-05, "loss": 0.6172, "step": 24417 }, { "epoch": 1.6544481333423673, "grad_norm": 5.510179042816162, "learning_rate": 6.759942501197893e-05, "loss": 0.5681, "step": 24418 }, { "epoch": 1.6545158886103395, "grad_norm": 6.38509464263916, "learning_rate": 6.759805599288111e-05, "loss": 0.6518, "step": 24419 }, { "epoch": 1.6545836438783117, "grad_norm": 6.253263473510742, "learning_rate": 6.759668697378329e-05, "loss": 0.7675, "step": 24420 }, { "epoch": 1.6546513991462837, "grad_norm": 5.528787612915039, "learning_rate": 6.759531795468547e-05, "loss": 0.5553, "step": 24421 }, { "epoch": 1.6547191544142557, "grad_norm": 6.141293525695801, "learning_rate": 6.759394893558766e-05, "loss": 0.5608, "step": 24422 }, { "epoch": 1.6547869096822279, "grad_norm": 5.291539192199707, "learning_rate": 6.759257991648984e-05, "loss": 0.8749, "step": 24423 }, { "epoch": 1.6548546649501998, "grad_norm": 3.767702102661133, "learning_rate": 6.759121089739202e-05, "loss": 0.4895, "step": 24424 }, { "epoch": 1.6549224202181718, "grad_norm": 6.595837116241455, "learning_rate": 6.75898418782942e-05, "loss": 0.633, "step": 24425 }, { "epoch": 1.654990175486144, "grad_norm": 4.0051093101501465, "learning_rate": 6.75884728591964e-05, "loss": 0.485, "step": 24426 }, { "epoch": 1.6550579307541162, "grad_norm": 5.052982330322266, "learning_rate": 6.758710384009858e-05, "loss": 0.7948, "step": 24427 }, { "epoch": 1.6551256860220882, "grad_norm": 5.43797492980957, "learning_rate": 6.758573482100076e-05, "loss": 0.7054, "step": 24428 }, { "epoch": 1.6551934412900602, "grad_norm": 5.872108459472656, "learning_rate": 6.758436580190294e-05, "loss": 0.6102, "step": 24429 }, { "epoch": 1.6552611965580324, "grad_norm": 5.509016036987305, "learning_rate": 6.758299678280512e-05, "loss": 0.7443, "step": 24430 }, { "epoch": 1.6553289518260046, "grad_norm": 7.306905746459961, "learning_rate": 6.758162776370731e-05, "loss": 0.5623, "step": 24431 }, { "epoch": 1.6553967070939766, "grad_norm": 5.564026355743408, "learning_rate": 6.758025874460949e-05, "loss": 0.6936, "step": 24432 }, { "epoch": 1.6554644623619486, "grad_norm": 6.671096324920654, "learning_rate": 6.757888972551167e-05, "loss": 0.6622, "step": 24433 }, { "epoch": 1.6555322176299208, "grad_norm": 5.722023010253906, "learning_rate": 6.757752070641385e-05, "loss": 0.6554, "step": 24434 }, { "epoch": 1.655599972897893, "grad_norm": 4.068141937255859, "learning_rate": 6.757615168731603e-05, "loss": 0.4647, "step": 24435 }, { "epoch": 1.655667728165865, "grad_norm": 5.937231540679932, "learning_rate": 6.757478266821823e-05, "loss": 0.7645, "step": 24436 }, { "epoch": 1.655735483433837, "grad_norm": 5.894315719604492, "learning_rate": 6.757341364912041e-05, "loss": 0.5732, "step": 24437 }, { "epoch": 1.6558032387018091, "grad_norm": 4.478759288787842, "learning_rate": 6.757204463002259e-05, "loss": 0.5718, "step": 24438 }, { "epoch": 1.655870993969781, "grad_norm": 5.530681133270264, "learning_rate": 6.757067561092477e-05, "loss": 0.6748, "step": 24439 }, { "epoch": 1.655938749237753, "grad_norm": 5.157857418060303, "learning_rate": 6.756930659182696e-05, "loss": 0.7102, "step": 24440 }, { "epoch": 1.6560065045057253, "grad_norm": 6.609468460083008, "learning_rate": 6.756793757272914e-05, "loss": 0.8769, "step": 24441 }, { "epoch": 1.6560742597736975, "grad_norm": 7.53725528717041, "learning_rate": 6.756656855363132e-05, "loss": 0.5036, "step": 24442 }, { "epoch": 1.6561420150416695, "grad_norm": 7.699795722961426, "learning_rate": 6.75651995345335e-05, "loss": 0.7205, "step": 24443 }, { "epoch": 1.6562097703096414, "grad_norm": 4.745132923126221, "learning_rate": 6.756383051543568e-05, "loss": 0.5888, "step": 24444 }, { "epoch": 1.6562775255776137, "grad_norm": 6.253511428833008, "learning_rate": 6.756246149633788e-05, "loss": 0.7205, "step": 24445 }, { "epoch": 1.6563452808455859, "grad_norm": 5.205974578857422, "learning_rate": 6.756109247724006e-05, "loss": 0.4816, "step": 24446 }, { "epoch": 1.6564130361135578, "grad_norm": 7.59642219543457, "learning_rate": 6.755972345814224e-05, "loss": 0.5505, "step": 24447 }, { "epoch": 1.6564807913815298, "grad_norm": 4.729750633239746, "learning_rate": 6.755835443904442e-05, "loss": 0.6193, "step": 24448 }, { "epoch": 1.656548546649502, "grad_norm": 5.1383056640625, "learning_rate": 6.755698541994661e-05, "loss": 0.6299, "step": 24449 }, { "epoch": 1.6566163019174742, "grad_norm": 6.057745456695557, "learning_rate": 6.755561640084879e-05, "loss": 0.8819, "step": 24450 }, { "epoch": 1.6566840571854462, "grad_norm": 5.751891136169434, "learning_rate": 6.755424738175097e-05, "loss": 0.6008, "step": 24451 }, { "epoch": 1.6567518124534182, "grad_norm": 7.9424543380737305, "learning_rate": 6.755287836265317e-05, "loss": 0.852, "step": 24452 }, { "epoch": 1.6568195677213904, "grad_norm": 4.748288631439209, "learning_rate": 6.755150934355535e-05, "loss": 0.6157, "step": 24453 }, { "epoch": 1.6568873229893626, "grad_norm": 12.3513765335083, "learning_rate": 6.755014032445753e-05, "loss": 0.6241, "step": 24454 }, { "epoch": 1.6569550782573343, "grad_norm": 8.525871276855469, "learning_rate": 6.754877130535972e-05, "loss": 0.6173, "step": 24455 }, { "epoch": 1.6570228335253065, "grad_norm": 7.144158363342285, "learning_rate": 6.75474022862619e-05, "loss": 1.0364, "step": 24456 }, { "epoch": 1.6570905887932788, "grad_norm": 6.5613484382629395, "learning_rate": 6.754603326716408e-05, "loss": 0.6137, "step": 24457 }, { "epoch": 1.6571583440612507, "grad_norm": 7.2061076164245605, "learning_rate": 6.754466424806626e-05, "loss": 0.5691, "step": 24458 }, { "epoch": 1.6572260993292227, "grad_norm": 4.621383190155029, "learning_rate": 6.754329522896846e-05, "loss": 0.71, "step": 24459 }, { "epoch": 1.657293854597195, "grad_norm": 5.927046298980713, "learning_rate": 6.754192620987064e-05, "loss": 0.7722, "step": 24460 }, { "epoch": 1.6573616098651671, "grad_norm": 6.084840297698975, "learning_rate": 6.754055719077282e-05, "loss": 0.5688, "step": 24461 }, { "epoch": 1.657429365133139, "grad_norm": 4.350424766540527, "learning_rate": 6.7539188171675e-05, "loss": 0.5258, "step": 24462 }, { "epoch": 1.657497120401111, "grad_norm": 5.692440986633301, "learning_rate": 6.753781915257719e-05, "loss": 0.7472, "step": 24463 }, { "epoch": 1.6575648756690833, "grad_norm": 4.691887378692627, "learning_rate": 6.753645013347937e-05, "loss": 0.6456, "step": 24464 }, { "epoch": 1.6576326309370555, "grad_norm": 6.305905342102051, "learning_rate": 6.753508111438155e-05, "loss": 0.6281, "step": 24465 }, { "epoch": 1.6577003862050275, "grad_norm": 5.645185947418213, "learning_rate": 6.753371209528373e-05, "loss": 0.6084, "step": 24466 }, { "epoch": 1.6577681414729994, "grad_norm": 5.8027801513671875, "learning_rate": 6.753234307618591e-05, "loss": 0.4905, "step": 24467 }, { "epoch": 1.6578358967409716, "grad_norm": 5.385025501251221, "learning_rate": 6.75309740570881e-05, "loss": 0.5143, "step": 24468 }, { "epoch": 1.6579036520089439, "grad_norm": 5.010136127471924, "learning_rate": 6.752960503799029e-05, "loss": 0.6982, "step": 24469 }, { "epoch": 1.6579714072769158, "grad_norm": 5.755368709564209, "learning_rate": 6.752823601889247e-05, "loss": 0.6709, "step": 24470 }, { "epoch": 1.6580391625448878, "grad_norm": 5.618014335632324, "learning_rate": 6.752686699979465e-05, "loss": 0.624, "step": 24471 }, { "epoch": 1.65810691781286, "grad_norm": 5.465909004211426, "learning_rate": 6.752549798069684e-05, "loss": 0.7623, "step": 24472 }, { "epoch": 1.658174673080832, "grad_norm": 4.5822224617004395, "learning_rate": 6.752412896159902e-05, "loss": 0.7897, "step": 24473 }, { "epoch": 1.658242428348804, "grad_norm": 6.153263092041016, "learning_rate": 6.75227599425012e-05, "loss": 0.7163, "step": 24474 }, { "epoch": 1.6583101836167762, "grad_norm": 5.918305397033691, "learning_rate": 6.752139092340338e-05, "loss": 0.9131, "step": 24475 }, { "epoch": 1.6583779388847484, "grad_norm": 4.863615036010742, "learning_rate": 6.752002190430556e-05, "loss": 0.7118, "step": 24476 }, { "epoch": 1.6584456941527204, "grad_norm": 6.715060710906982, "learning_rate": 6.751865288520776e-05, "loss": 0.7228, "step": 24477 }, { "epoch": 1.6585134494206923, "grad_norm": 5.53355073928833, "learning_rate": 6.751728386610994e-05, "loss": 0.7169, "step": 24478 }, { "epoch": 1.6585812046886645, "grad_norm": 4.42555046081543, "learning_rate": 6.751591484701212e-05, "loss": 0.6048, "step": 24479 }, { "epoch": 1.6586489599566367, "grad_norm": 4.511417865753174, "learning_rate": 6.75145458279143e-05, "loss": 0.6961, "step": 24480 }, { "epoch": 1.6587167152246087, "grad_norm": 6.197656154632568, "learning_rate": 6.751317680881649e-05, "loss": 0.6918, "step": 24481 }, { "epoch": 1.6587844704925807, "grad_norm": 6.084252834320068, "learning_rate": 6.751180778971867e-05, "loss": 0.7823, "step": 24482 }, { "epoch": 1.658852225760553, "grad_norm": 10.445891380310059, "learning_rate": 6.751043877062085e-05, "loss": 0.4997, "step": 24483 }, { "epoch": 1.6589199810285251, "grad_norm": 7.24600887298584, "learning_rate": 6.750906975152303e-05, "loss": 0.53, "step": 24484 }, { "epoch": 1.658987736296497, "grad_norm": 4.841829776763916, "learning_rate": 6.750770073242521e-05, "loss": 0.5796, "step": 24485 }, { "epoch": 1.659055491564469, "grad_norm": 8.213785171508789, "learning_rate": 6.75063317133274e-05, "loss": 0.5248, "step": 24486 }, { "epoch": 1.6591232468324413, "grad_norm": 4.3658246994018555, "learning_rate": 6.750496269422959e-05, "loss": 0.6501, "step": 24487 }, { "epoch": 1.6591910021004133, "grad_norm": 6.076489448547363, "learning_rate": 6.750359367513177e-05, "loss": 0.7031, "step": 24488 }, { "epoch": 1.6592587573683852, "grad_norm": 4.605579376220703, "learning_rate": 6.750222465603395e-05, "loss": 0.6185, "step": 24489 }, { "epoch": 1.6593265126363574, "grad_norm": 5.070891857147217, "learning_rate": 6.750085563693613e-05, "loss": 0.5779, "step": 24490 }, { "epoch": 1.6593942679043296, "grad_norm": 8.079299926757812, "learning_rate": 6.749948661783832e-05, "loss": 0.6409, "step": 24491 }, { "epoch": 1.6594620231723016, "grad_norm": 6.830527305603027, "learning_rate": 6.74981175987405e-05, "loss": 0.4849, "step": 24492 }, { "epoch": 1.6595297784402736, "grad_norm": 4.7256693840026855, "learning_rate": 6.749674857964268e-05, "loss": 0.6618, "step": 24493 }, { "epoch": 1.6595975337082458, "grad_norm": 6.006547451019287, "learning_rate": 6.749537956054486e-05, "loss": 0.7231, "step": 24494 }, { "epoch": 1.659665288976218, "grad_norm": 4.5257062911987305, "learning_rate": 6.749401054144706e-05, "loss": 0.6609, "step": 24495 }, { "epoch": 1.65973304424419, "grad_norm": 5.389584064483643, "learning_rate": 6.749264152234924e-05, "loss": 0.718, "step": 24496 }, { "epoch": 1.659800799512162, "grad_norm": 6.29978084564209, "learning_rate": 6.749127250325142e-05, "loss": 0.637, "step": 24497 }, { "epoch": 1.6598685547801342, "grad_norm": 4.019192218780518, "learning_rate": 6.748990348415361e-05, "loss": 0.4201, "step": 24498 }, { "epoch": 1.6599363100481064, "grad_norm": 6.128885269165039, "learning_rate": 6.748853446505579e-05, "loss": 0.6657, "step": 24499 }, { "epoch": 1.6600040653160784, "grad_norm": 9.172197341918945, "learning_rate": 6.748716544595797e-05, "loss": 0.5947, "step": 24500 }, { "epoch": 1.6600718205840503, "grad_norm": 8.296952247619629, "learning_rate": 6.748579642686017e-05, "loss": 0.7125, "step": 24501 }, { "epoch": 1.6601395758520225, "grad_norm": 5.2460527420043945, "learning_rate": 6.748442740776235e-05, "loss": 0.8946, "step": 24502 }, { "epoch": 1.6602073311199947, "grad_norm": 5.20157527923584, "learning_rate": 6.748305838866453e-05, "loss": 0.5173, "step": 24503 }, { "epoch": 1.6602750863879665, "grad_norm": 10.520641326904297, "learning_rate": 6.748168936956672e-05, "loss": 0.7445, "step": 24504 }, { "epoch": 1.6603428416559387, "grad_norm": 6.852914810180664, "learning_rate": 6.74803203504689e-05, "loss": 0.6132, "step": 24505 }, { "epoch": 1.660410596923911, "grad_norm": 6.64964485168457, "learning_rate": 6.747895133137108e-05, "loss": 0.6804, "step": 24506 }, { "epoch": 1.660478352191883, "grad_norm": 5.977122783660889, "learning_rate": 6.747758231227326e-05, "loss": 0.8549, "step": 24507 }, { "epoch": 1.6605461074598549, "grad_norm": 10.278204917907715, "learning_rate": 6.747621329317544e-05, "loss": 0.6766, "step": 24508 }, { "epoch": 1.660613862727827, "grad_norm": 5.756875514984131, "learning_rate": 6.747484427407764e-05, "loss": 0.5493, "step": 24509 }, { "epoch": 1.6606816179957993, "grad_norm": 5.066632270812988, "learning_rate": 6.747347525497982e-05, "loss": 0.5095, "step": 24510 }, { "epoch": 1.6607493732637713, "grad_norm": 4.904911994934082, "learning_rate": 6.7472106235882e-05, "loss": 0.561, "step": 24511 }, { "epoch": 1.6608171285317432, "grad_norm": 9.042449951171875, "learning_rate": 6.747073721678418e-05, "loss": 0.7488, "step": 24512 }, { "epoch": 1.6608848837997154, "grad_norm": 6.30387020111084, "learning_rate": 6.746936819768636e-05, "loss": 0.7607, "step": 24513 }, { "epoch": 1.6609526390676876, "grad_norm": 5.879385948181152, "learning_rate": 6.746799917858855e-05, "loss": 0.6172, "step": 24514 }, { "epoch": 1.6610203943356596, "grad_norm": 7.398876190185547, "learning_rate": 6.746663015949073e-05, "loss": 0.8103, "step": 24515 }, { "epoch": 1.6610881496036316, "grad_norm": 5.517085552215576, "learning_rate": 6.746526114039291e-05, "loss": 0.6136, "step": 24516 }, { "epoch": 1.6611559048716038, "grad_norm": 6.120765209197998, "learning_rate": 6.746389212129509e-05, "loss": 0.8203, "step": 24517 }, { "epoch": 1.661223660139576, "grad_norm": 4.425470352172852, "learning_rate": 6.746252310219729e-05, "loss": 0.5445, "step": 24518 }, { "epoch": 1.661291415407548, "grad_norm": 9.338996887207031, "learning_rate": 6.746115408309947e-05, "loss": 0.4575, "step": 24519 }, { "epoch": 1.66135917067552, "grad_norm": 5.827417850494385, "learning_rate": 6.745978506400165e-05, "loss": 0.5297, "step": 24520 }, { "epoch": 1.6614269259434922, "grad_norm": 7.172245025634766, "learning_rate": 6.745841604490383e-05, "loss": 0.5714, "step": 24521 }, { "epoch": 1.6614946812114642, "grad_norm": 5.636802673339844, "learning_rate": 6.745704702580601e-05, "loss": 0.5309, "step": 24522 }, { "epoch": 1.6615624364794361, "grad_norm": 4.9574127197265625, "learning_rate": 6.74556780067082e-05, "loss": 0.5883, "step": 24523 }, { "epoch": 1.6616301917474083, "grad_norm": 6.479262828826904, "learning_rate": 6.745430898761038e-05, "loss": 0.6362, "step": 24524 }, { "epoch": 1.6616979470153805, "grad_norm": 3.9825568199157715, "learning_rate": 6.745293996851256e-05, "loss": 0.5589, "step": 24525 }, { "epoch": 1.6617657022833525, "grad_norm": 4.280409336090088, "learning_rate": 6.745157094941474e-05, "loss": 0.6626, "step": 24526 }, { "epoch": 1.6618334575513245, "grad_norm": 5.925407409667969, "learning_rate": 6.745020193031694e-05, "loss": 0.9944, "step": 24527 }, { "epoch": 1.6619012128192967, "grad_norm": 9.160504341125488, "learning_rate": 6.744883291121912e-05, "loss": 0.599, "step": 24528 }, { "epoch": 1.661968968087269, "grad_norm": 7.601298809051514, "learning_rate": 6.74474638921213e-05, "loss": 0.6506, "step": 24529 }, { "epoch": 1.6620367233552409, "grad_norm": 6.457617282867432, "learning_rate": 6.744609487302348e-05, "loss": 0.593, "step": 24530 }, { "epoch": 1.6621044786232129, "grad_norm": 6.612271308898926, "learning_rate": 6.744472585392566e-05, "loss": 0.5783, "step": 24531 }, { "epoch": 1.662172233891185, "grad_norm": 6.315708160400391, "learning_rate": 6.744335683482785e-05, "loss": 0.7353, "step": 24532 }, { "epoch": 1.6622399891591573, "grad_norm": 5.941262722015381, "learning_rate": 6.744198781573003e-05, "loss": 0.4993, "step": 24533 }, { "epoch": 1.6623077444271293, "grad_norm": 6.960399150848389, "learning_rate": 6.744061879663221e-05, "loss": 0.733, "step": 24534 }, { "epoch": 1.6623754996951012, "grad_norm": 8.980619430541992, "learning_rate": 6.743924977753439e-05, "loss": 0.6288, "step": 24535 }, { "epoch": 1.6624432549630734, "grad_norm": 5.786811828613281, "learning_rate": 6.743788075843657e-05, "loss": 0.5549, "step": 24536 }, { "epoch": 1.6625110102310454, "grad_norm": 6.247079372406006, "learning_rate": 6.743651173933877e-05, "loss": 0.716, "step": 24537 }, { "epoch": 1.6625787654990174, "grad_norm": 4.888821601867676, "learning_rate": 6.743514272024095e-05, "loss": 0.6065, "step": 24538 }, { "epoch": 1.6626465207669896, "grad_norm": 6.884281158447266, "learning_rate": 6.743377370114313e-05, "loss": 0.707, "step": 24539 }, { "epoch": 1.6627142760349618, "grad_norm": 6.064231872558594, "learning_rate": 6.743240468204531e-05, "loss": 0.7346, "step": 24540 }, { "epoch": 1.6627820313029338, "grad_norm": 4.371703624725342, "learning_rate": 6.74310356629475e-05, "loss": 0.4447, "step": 24541 }, { "epoch": 1.6628497865709058, "grad_norm": 5.762826442718506, "learning_rate": 6.742966664384968e-05, "loss": 0.5781, "step": 24542 }, { "epoch": 1.662917541838878, "grad_norm": 5.545205593109131, "learning_rate": 6.742829762475186e-05, "loss": 0.6893, "step": 24543 }, { "epoch": 1.6629852971068502, "grad_norm": 7.45306396484375, "learning_rate": 6.742692860565406e-05, "loss": 0.7193, "step": 24544 }, { "epoch": 1.6630530523748221, "grad_norm": 7.963557243347168, "learning_rate": 6.742555958655624e-05, "loss": 0.463, "step": 24545 }, { "epoch": 1.6631208076427941, "grad_norm": 6.598818778991699, "learning_rate": 6.742419056745842e-05, "loss": 0.8174, "step": 24546 }, { "epoch": 1.6631885629107663, "grad_norm": 6.192900657653809, "learning_rate": 6.742282154836061e-05, "loss": 0.5652, "step": 24547 }, { "epoch": 1.6632563181787385, "grad_norm": 6.336206436157227, "learning_rate": 6.742145252926279e-05, "loss": 0.814, "step": 24548 }, { "epoch": 1.6633240734467105, "grad_norm": 8.596609115600586, "learning_rate": 6.742008351016497e-05, "loss": 0.6901, "step": 24549 }, { "epoch": 1.6633918287146825, "grad_norm": 6.564549922943115, "learning_rate": 6.741871449106717e-05, "loss": 0.9241, "step": 24550 }, { "epoch": 1.6634595839826547, "grad_norm": 5.012625217437744, "learning_rate": 6.741734547196935e-05, "loss": 0.8974, "step": 24551 }, { "epoch": 1.663527339250627, "grad_norm": 5.087366580963135, "learning_rate": 6.741597645287153e-05, "loss": 0.6023, "step": 24552 }, { "epoch": 1.6635950945185987, "grad_norm": 5.166167736053467, "learning_rate": 6.74146074337737e-05, "loss": 0.6923, "step": 24553 }, { "epoch": 1.6636628497865709, "grad_norm": 5.287250995635986, "learning_rate": 6.741323841467589e-05, "loss": 0.4779, "step": 24554 }, { "epoch": 1.663730605054543, "grad_norm": 4.552809715270996, "learning_rate": 6.741186939557808e-05, "loss": 0.6349, "step": 24555 }, { "epoch": 1.663798360322515, "grad_norm": 5.26357889175415, "learning_rate": 6.741050037648026e-05, "loss": 0.6755, "step": 24556 }, { "epoch": 1.663866115590487, "grad_norm": 7.202183246612549, "learning_rate": 6.740913135738244e-05, "loss": 0.7149, "step": 24557 }, { "epoch": 1.6639338708584592, "grad_norm": 7.838168144226074, "learning_rate": 6.740776233828462e-05, "loss": 0.613, "step": 24558 }, { "epoch": 1.6640016261264314, "grad_norm": 7.283451080322266, "learning_rate": 6.740639331918682e-05, "loss": 0.7237, "step": 24559 }, { "epoch": 1.6640693813944034, "grad_norm": 6.499590873718262, "learning_rate": 6.7405024300089e-05, "loss": 0.6147, "step": 24560 }, { "epoch": 1.6641371366623754, "grad_norm": 4.975700855255127, "learning_rate": 6.740365528099118e-05, "loss": 0.7823, "step": 24561 }, { "epoch": 1.6642048919303476, "grad_norm": 8.424015998840332, "learning_rate": 6.740228626189336e-05, "loss": 0.6812, "step": 24562 }, { "epoch": 1.6642726471983198, "grad_norm": 5.4898176193237305, "learning_rate": 6.740091724279554e-05, "loss": 0.4883, "step": 24563 }, { "epoch": 1.6643404024662918, "grad_norm": 5.439981460571289, "learning_rate": 6.739954822369773e-05, "loss": 0.4674, "step": 24564 }, { "epoch": 1.6644081577342638, "grad_norm": 4.54274320602417, "learning_rate": 6.739817920459991e-05, "loss": 0.7166, "step": 24565 }, { "epoch": 1.664475913002236, "grad_norm": 9.251104354858398, "learning_rate": 6.739681018550209e-05, "loss": 0.7914, "step": 24566 }, { "epoch": 1.6645436682702082, "grad_norm": 4.876403331756592, "learning_rate": 6.739544116640427e-05, "loss": 0.8098, "step": 24567 }, { "epoch": 1.6646114235381801, "grad_norm": 4.602072715759277, "learning_rate": 6.739407214730645e-05, "loss": 0.5897, "step": 24568 }, { "epoch": 1.6646791788061521, "grad_norm": 7.687628746032715, "learning_rate": 6.739270312820865e-05, "loss": 0.7267, "step": 24569 }, { "epoch": 1.6647469340741243, "grad_norm": 5.833706378936768, "learning_rate": 6.739133410911083e-05, "loss": 0.5197, "step": 24570 }, { "epoch": 1.6648146893420963, "grad_norm": 4.732761383056641, "learning_rate": 6.7389965090013e-05, "loss": 0.607, "step": 24571 }, { "epoch": 1.6648824446100683, "grad_norm": 5.815356254577637, "learning_rate": 6.738859607091519e-05, "loss": 0.5984, "step": 24572 }, { "epoch": 1.6649501998780405, "grad_norm": 4.832042217254639, "learning_rate": 6.738722705181738e-05, "loss": 0.6185, "step": 24573 }, { "epoch": 1.6650179551460127, "grad_norm": 6.887304782867432, "learning_rate": 6.738585803271956e-05, "loss": 0.6425, "step": 24574 }, { "epoch": 1.6650857104139847, "grad_norm": 8.607890129089355, "learning_rate": 6.738448901362174e-05, "loss": 0.5408, "step": 24575 }, { "epoch": 1.6651534656819567, "grad_norm": 6.832543849945068, "learning_rate": 6.738311999452392e-05, "loss": 0.6299, "step": 24576 }, { "epoch": 1.6652212209499289, "grad_norm": 6.121936321258545, "learning_rate": 6.73817509754261e-05, "loss": 0.5027, "step": 24577 }, { "epoch": 1.665288976217901, "grad_norm": 7.36810302734375, "learning_rate": 6.73803819563283e-05, "loss": 0.7083, "step": 24578 }, { "epoch": 1.665356731485873, "grad_norm": 5.197346210479736, "learning_rate": 6.737901293723048e-05, "loss": 0.8065, "step": 24579 }, { "epoch": 1.665424486753845, "grad_norm": 4.27253532409668, "learning_rate": 6.737764391813266e-05, "loss": 0.4887, "step": 24580 }, { "epoch": 1.6654922420218172, "grad_norm": 5.456480026245117, "learning_rate": 6.737627489903484e-05, "loss": 0.7807, "step": 24581 }, { "epoch": 1.6655599972897894, "grad_norm": 6.385209083557129, "learning_rate": 6.737490587993703e-05, "loss": 0.5637, "step": 24582 }, { "epoch": 1.6656277525577614, "grad_norm": 6.5706787109375, "learning_rate": 6.737353686083921e-05, "loss": 0.7574, "step": 24583 }, { "epoch": 1.6656955078257334, "grad_norm": 5.188122272491455, "learning_rate": 6.737216784174139e-05, "loss": 0.4551, "step": 24584 }, { "epoch": 1.6657632630937056, "grad_norm": 16.571386337280273, "learning_rate": 6.737079882264357e-05, "loss": 0.6981, "step": 24585 }, { "epoch": 1.6658310183616776, "grad_norm": 5.37255859375, "learning_rate": 6.736942980354575e-05, "loss": 0.5789, "step": 24586 }, { "epoch": 1.6658987736296496, "grad_norm": 7.983510494232178, "learning_rate": 6.736806078444795e-05, "loss": 0.6786, "step": 24587 }, { "epoch": 1.6659665288976218, "grad_norm": 6.391076564788818, "learning_rate": 6.736669176535013e-05, "loss": 0.7234, "step": 24588 }, { "epoch": 1.666034284165594, "grad_norm": 7.125083923339844, "learning_rate": 6.736532274625231e-05, "loss": 0.6749, "step": 24589 }, { "epoch": 1.666102039433566, "grad_norm": 6.803764343261719, "learning_rate": 6.736395372715449e-05, "loss": 0.8882, "step": 24590 }, { "epoch": 1.666169794701538, "grad_norm": 6.769619464874268, "learning_rate": 6.736258470805668e-05, "loss": 0.9067, "step": 24591 }, { "epoch": 1.6662375499695101, "grad_norm": 6.065954208374023, "learning_rate": 6.736121568895886e-05, "loss": 0.6446, "step": 24592 }, { "epoch": 1.6663053052374823, "grad_norm": 6.695490837097168, "learning_rate": 6.735984666986104e-05, "loss": 0.6926, "step": 24593 }, { "epoch": 1.6663730605054543, "grad_norm": 6.444540500640869, "learning_rate": 6.735847765076324e-05, "loss": 0.8215, "step": 24594 }, { "epoch": 1.6664408157734263, "grad_norm": 5.681356430053711, "learning_rate": 6.735710863166542e-05, "loss": 0.6143, "step": 24595 }, { "epoch": 1.6665085710413985, "grad_norm": 7.688793182373047, "learning_rate": 6.735573961256761e-05, "loss": 0.6376, "step": 24596 }, { "epoch": 1.6665763263093707, "grad_norm": 7.795339107513428, "learning_rate": 6.735437059346979e-05, "loss": 0.7272, "step": 24597 }, { "epoch": 1.6666440815773427, "grad_norm": 6.399454593658447, "learning_rate": 6.735300157437197e-05, "loss": 0.7501, "step": 24598 }, { "epoch": 1.6667118368453147, "grad_norm": 4.591368198394775, "learning_rate": 6.735163255527415e-05, "loss": 0.6005, "step": 24599 }, { "epoch": 1.6667795921132869, "grad_norm": 5.991671085357666, "learning_rate": 6.735026353617633e-05, "loss": 0.5761, "step": 24600 }, { "epoch": 1.666847347381259, "grad_norm": 6.410189628601074, "learning_rate": 6.734889451707853e-05, "loss": 0.7008, "step": 24601 }, { "epoch": 1.6669151026492308, "grad_norm": 8.000137329101562, "learning_rate": 6.73475254979807e-05, "loss": 0.7722, "step": 24602 }, { "epoch": 1.666982857917203, "grad_norm": 5.834362983703613, "learning_rate": 6.734615647888289e-05, "loss": 0.7319, "step": 24603 }, { "epoch": 1.6670506131851752, "grad_norm": 5.057834148406982, "learning_rate": 6.734478745978507e-05, "loss": 0.789, "step": 24604 }, { "epoch": 1.6671183684531472, "grad_norm": 3.6628360748291016, "learning_rate": 6.734341844068726e-05, "loss": 0.5385, "step": 24605 }, { "epoch": 1.6671861237211192, "grad_norm": 5.895676612854004, "learning_rate": 6.734204942158944e-05, "loss": 0.6779, "step": 24606 }, { "epoch": 1.6672538789890914, "grad_norm": 5.744756698608398, "learning_rate": 6.734068040249162e-05, "loss": 0.8134, "step": 24607 }, { "epoch": 1.6673216342570636, "grad_norm": 8.161149024963379, "learning_rate": 6.73393113833938e-05, "loss": 0.6139, "step": 24608 }, { "epoch": 1.6673893895250356, "grad_norm": 6.274159908294678, "learning_rate": 6.733794236429598e-05, "loss": 0.6251, "step": 24609 }, { "epoch": 1.6674571447930076, "grad_norm": 6.848295211791992, "learning_rate": 6.733657334519818e-05, "loss": 0.5317, "step": 24610 }, { "epoch": 1.6675249000609798, "grad_norm": 5.8660197257995605, "learning_rate": 6.733520432610036e-05, "loss": 0.6968, "step": 24611 }, { "epoch": 1.667592655328952, "grad_norm": 6.1010284423828125, "learning_rate": 6.733383530700254e-05, "loss": 0.6758, "step": 24612 }, { "epoch": 1.667660410596924, "grad_norm": 5.016493320465088, "learning_rate": 6.733246628790472e-05, "loss": 0.5968, "step": 24613 }, { "epoch": 1.667728165864896, "grad_norm": 5.642788887023926, "learning_rate": 6.733109726880691e-05, "loss": 0.6106, "step": 24614 }, { "epoch": 1.6677959211328681, "grad_norm": 3.5635533332824707, "learning_rate": 6.732972824970909e-05, "loss": 0.5575, "step": 24615 }, { "epoch": 1.6678636764008403, "grad_norm": 4.609151840209961, "learning_rate": 6.732835923061127e-05, "loss": 0.5913, "step": 24616 }, { "epoch": 1.6679314316688123, "grad_norm": 10.149181365966797, "learning_rate": 6.732699021151345e-05, "loss": 0.6544, "step": 24617 }, { "epoch": 1.6679991869367843, "grad_norm": 5.9454827308654785, "learning_rate": 6.732562119241563e-05, "loss": 0.6553, "step": 24618 }, { "epoch": 1.6680669422047565, "grad_norm": 5.886265277862549, "learning_rate": 6.732425217331783e-05, "loss": 0.9068, "step": 24619 }, { "epoch": 1.6681346974727285, "grad_norm": 7.245805263519287, "learning_rate": 6.732288315422e-05, "loss": 0.4793, "step": 24620 }, { "epoch": 1.6682024527407004, "grad_norm": 6.943418025970459, "learning_rate": 6.732151413512219e-05, "loss": 0.5762, "step": 24621 }, { "epoch": 1.6682702080086727, "grad_norm": 6.966055393218994, "learning_rate": 6.732014511602437e-05, "loss": 0.7251, "step": 24622 }, { "epoch": 1.6683379632766449, "grad_norm": 6.61965274810791, "learning_rate": 6.731877609692655e-05, "loss": 0.6925, "step": 24623 }, { "epoch": 1.6684057185446168, "grad_norm": 5.3798418045043945, "learning_rate": 6.731740707782874e-05, "loss": 0.7082, "step": 24624 }, { "epoch": 1.6684734738125888, "grad_norm": 10.396533012390137, "learning_rate": 6.731603805873092e-05, "loss": 0.6509, "step": 24625 }, { "epoch": 1.668541229080561, "grad_norm": 9.991857528686523, "learning_rate": 6.73146690396331e-05, "loss": 0.9762, "step": 24626 }, { "epoch": 1.6686089843485332, "grad_norm": 5.119079113006592, "learning_rate": 6.731330002053528e-05, "loss": 0.6896, "step": 24627 }, { "epoch": 1.6686767396165052, "grad_norm": 5.284431457519531, "learning_rate": 6.731193100143748e-05, "loss": 0.5897, "step": 24628 }, { "epoch": 1.6687444948844772, "grad_norm": 7.343466281890869, "learning_rate": 6.731056198233966e-05, "loss": 0.7583, "step": 24629 }, { "epoch": 1.6688122501524494, "grad_norm": 4.639831066131592, "learning_rate": 6.730919296324184e-05, "loss": 0.6573, "step": 24630 }, { "epoch": 1.6688800054204216, "grad_norm": 5.887128829956055, "learning_rate": 6.730782394414402e-05, "loss": 0.8319, "step": 24631 }, { "epoch": 1.6689477606883936, "grad_norm": 8.97950267791748, "learning_rate": 6.73064549250462e-05, "loss": 0.658, "step": 24632 }, { "epoch": 1.6690155159563655, "grad_norm": 5.73762321472168, "learning_rate": 6.730508590594839e-05, "loss": 0.7467, "step": 24633 }, { "epoch": 1.6690832712243378, "grad_norm": 5.710646152496338, "learning_rate": 6.730371688685057e-05, "loss": 0.5551, "step": 24634 }, { "epoch": 1.6691510264923097, "grad_norm": 6.035006046295166, "learning_rate": 6.730234786775275e-05, "loss": 0.6224, "step": 24635 }, { "epoch": 1.6692187817602817, "grad_norm": 8.181713104248047, "learning_rate": 6.730097884865493e-05, "loss": 0.9811, "step": 24636 }, { "epoch": 1.669286537028254, "grad_norm": 4.746059417724609, "learning_rate": 6.729960982955713e-05, "loss": 0.4793, "step": 24637 }, { "epoch": 1.6693542922962261, "grad_norm": 6.107590675354004, "learning_rate": 6.72982408104593e-05, "loss": 0.6852, "step": 24638 }, { "epoch": 1.669422047564198, "grad_norm": 5.4570136070251465, "learning_rate": 6.729687179136149e-05, "loss": 0.9077, "step": 24639 }, { "epoch": 1.66948980283217, "grad_norm": 5.439845085144043, "learning_rate": 6.729550277226368e-05, "loss": 0.7675, "step": 24640 }, { "epoch": 1.6695575581001423, "grad_norm": 4.006040096282959, "learning_rate": 6.729413375316586e-05, "loss": 0.5952, "step": 24641 }, { "epoch": 1.6696253133681145, "grad_norm": 5.006824970245361, "learning_rate": 6.729276473406804e-05, "loss": 0.6389, "step": 24642 }, { "epoch": 1.6696930686360865, "grad_norm": 4.865530014038086, "learning_rate": 6.729139571497024e-05, "loss": 0.6251, "step": 24643 }, { "epoch": 1.6697608239040584, "grad_norm": 5.440446376800537, "learning_rate": 6.729002669587242e-05, "loss": 0.5921, "step": 24644 }, { "epoch": 1.6698285791720306, "grad_norm": 6.0058913230896, "learning_rate": 6.72886576767746e-05, "loss": 0.8113, "step": 24645 }, { "epoch": 1.6698963344400028, "grad_norm": 5.258696556091309, "learning_rate": 6.728728865767678e-05, "loss": 0.6715, "step": 24646 }, { "epoch": 1.6699640897079748, "grad_norm": 6.046141147613525, "learning_rate": 6.728591963857897e-05, "loss": 0.7354, "step": 24647 }, { "epoch": 1.6700318449759468, "grad_norm": 5.165596008300781, "learning_rate": 6.728455061948115e-05, "loss": 0.7167, "step": 24648 }, { "epoch": 1.670099600243919, "grad_norm": 7.342933654785156, "learning_rate": 6.728318160038333e-05, "loss": 0.6317, "step": 24649 }, { "epoch": 1.670167355511891, "grad_norm": 6.482247352600098, "learning_rate": 6.728181258128551e-05, "loss": 0.7582, "step": 24650 }, { "epoch": 1.670235110779863, "grad_norm": 6.833373546600342, "learning_rate": 6.72804435621877e-05, "loss": 0.6143, "step": 24651 }, { "epoch": 1.6703028660478352, "grad_norm": 6.711480140686035, "learning_rate": 6.727907454308989e-05, "loss": 0.5795, "step": 24652 }, { "epoch": 1.6703706213158074, "grad_norm": 4.333495616912842, "learning_rate": 6.727770552399207e-05, "loss": 0.6611, "step": 24653 }, { "epoch": 1.6704383765837794, "grad_norm": 7.345907211303711, "learning_rate": 6.727633650489425e-05, "loss": 0.5893, "step": 24654 }, { "epoch": 1.6705061318517513, "grad_norm": 6.9593071937561035, "learning_rate": 6.727496748579643e-05, "loss": 0.5602, "step": 24655 }, { "epoch": 1.6705738871197235, "grad_norm": 5.054182052612305, "learning_rate": 6.727359846669862e-05, "loss": 0.693, "step": 24656 }, { "epoch": 1.6706416423876957, "grad_norm": 5.341734886169434, "learning_rate": 6.72722294476008e-05, "loss": 0.8318, "step": 24657 }, { "epoch": 1.6707093976556677, "grad_norm": 6.043889999389648, "learning_rate": 6.727086042850298e-05, "loss": 0.547, "step": 24658 }, { "epoch": 1.6707771529236397, "grad_norm": 5.9776129722595215, "learning_rate": 6.726949140940516e-05, "loss": 0.7362, "step": 24659 }, { "epoch": 1.670844908191612, "grad_norm": 5.753301620483398, "learning_rate": 6.726812239030736e-05, "loss": 0.5096, "step": 24660 }, { "epoch": 1.6709126634595841, "grad_norm": 5.793787956237793, "learning_rate": 6.726675337120954e-05, "loss": 0.6834, "step": 24661 }, { "epoch": 1.670980418727556, "grad_norm": 5.7630615234375, "learning_rate": 6.726538435211172e-05, "loss": 0.799, "step": 24662 }, { "epoch": 1.671048173995528, "grad_norm": 6.4958696365356445, "learning_rate": 6.72640153330139e-05, "loss": 0.5234, "step": 24663 }, { "epoch": 1.6711159292635003, "grad_norm": 6.986758708953857, "learning_rate": 6.726264631391608e-05, "loss": 0.744, "step": 24664 }, { "epoch": 1.6711836845314725, "grad_norm": 7.861841201782227, "learning_rate": 6.726127729481827e-05, "loss": 0.9275, "step": 24665 }, { "epoch": 1.6712514397994445, "grad_norm": 4.937579154968262, "learning_rate": 6.725990827572045e-05, "loss": 0.653, "step": 24666 }, { "epoch": 1.6713191950674164, "grad_norm": 4.938409328460693, "learning_rate": 6.725853925662263e-05, "loss": 0.7198, "step": 24667 }, { "epoch": 1.6713869503353886, "grad_norm": 7.9748921394348145, "learning_rate": 6.725717023752481e-05, "loss": 0.5623, "step": 24668 }, { "epoch": 1.6714547056033606, "grad_norm": 6.909602642059326, "learning_rate": 6.725580121842699e-05, "loss": 0.8704, "step": 24669 }, { "epoch": 1.6715224608713326, "grad_norm": 5.854140758514404, "learning_rate": 6.725443219932919e-05, "loss": 0.7811, "step": 24670 }, { "epoch": 1.6715902161393048, "grad_norm": 11.455551147460938, "learning_rate": 6.725306318023137e-05, "loss": 0.5756, "step": 24671 }, { "epoch": 1.671657971407277, "grad_norm": 7.715889930725098, "learning_rate": 6.725169416113355e-05, "loss": 0.6491, "step": 24672 }, { "epoch": 1.671725726675249, "grad_norm": 5.4265241622924805, "learning_rate": 6.725032514203573e-05, "loss": 0.6374, "step": 24673 }, { "epoch": 1.671793481943221, "grad_norm": 5.941964626312256, "learning_rate": 6.724895612293792e-05, "loss": 0.562, "step": 24674 }, { "epoch": 1.6718612372111932, "grad_norm": 6.446934700012207, "learning_rate": 6.72475871038401e-05, "loss": 0.4536, "step": 24675 }, { "epoch": 1.6719289924791654, "grad_norm": 6.0498528480529785, "learning_rate": 6.724621808474228e-05, "loss": 0.8691, "step": 24676 }, { "epoch": 1.6719967477471374, "grad_norm": 7.687225341796875, "learning_rate": 6.724484906564446e-05, "loss": 0.8194, "step": 24677 }, { "epoch": 1.6720645030151093, "grad_norm": 7.30818510055542, "learning_rate": 6.724348004654664e-05, "loss": 0.7705, "step": 24678 }, { "epoch": 1.6721322582830815, "grad_norm": 5.159082889556885, "learning_rate": 6.724211102744884e-05, "loss": 0.6854, "step": 24679 }, { "epoch": 1.6722000135510537, "grad_norm": 5.6689043045043945, "learning_rate": 6.724074200835102e-05, "loss": 0.6216, "step": 24680 }, { "epoch": 1.6722677688190257, "grad_norm": 7.243398189544678, "learning_rate": 6.72393729892532e-05, "loss": 0.572, "step": 24681 }, { "epoch": 1.6723355240869977, "grad_norm": 5.448641300201416, "learning_rate": 6.723800397015538e-05, "loss": 0.5237, "step": 24682 }, { "epoch": 1.67240327935497, "grad_norm": 7.783251762390137, "learning_rate": 6.723663495105757e-05, "loss": 0.6928, "step": 24683 }, { "epoch": 1.6724710346229419, "grad_norm": 5.084480285644531, "learning_rate": 6.723526593195975e-05, "loss": 0.6965, "step": 24684 }, { "epoch": 1.6725387898909139, "grad_norm": 5.882480621337891, "learning_rate": 6.723389691286193e-05, "loss": 0.7306, "step": 24685 }, { "epoch": 1.672606545158886, "grad_norm": 6.304037094116211, "learning_rate": 6.723252789376413e-05, "loss": 0.5828, "step": 24686 }, { "epoch": 1.6726743004268583, "grad_norm": 5.700740337371826, "learning_rate": 6.72311588746663e-05, "loss": 0.6622, "step": 24687 }, { "epoch": 1.6727420556948303, "grad_norm": 6.251616477966309, "learning_rate": 6.722978985556849e-05, "loss": 0.6634, "step": 24688 }, { "epoch": 1.6728098109628022, "grad_norm": 7.738149166107178, "learning_rate": 6.722842083647068e-05, "loss": 0.6027, "step": 24689 }, { "epoch": 1.6728775662307744, "grad_norm": 7.7930779457092285, "learning_rate": 6.722705181737286e-05, "loss": 0.6914, "step": 24690 }, { "epoch": 1.6729453214987466, "grad_norm": 4.586068153381348, "learning_rate": 6.722568279827504e-05, "loss": 0.6405, "step": 24691 }, { "epoch": 1.6730130767667186, "grad_norm": 6.308026313781738, "learning_rate": 6.722431377917723e-05, "loss": 0.6847, "step": 24692 }, { "epoch": 1.6730808320346906, "grad_norm": 6.953598976135254, "learning_rate": 6.722294476007941e-05, "loss": 0.7971, "step": 24693 }, { "epoch": 1.6731485873026628, "grad_norm": 5.198323726654053, "learning_rate": 6.72215757409816e-05, "loss": 0.5522, "step": 24694 }, { "epoch": 1.673216342570635, "grad_norm": 8.505494117736816, "learning_rate": 6.722020672188378e-05, "loss": 0.7233, "step": 24695 }, { "epoch": 1.673284097838607, "grad_norm": 6.391080856323242, "learning_rate": 6.721883770278596e-05, "loss": 0.7485, "step": 24696 }, { "epoch": 1.673351853106579, "grad_norm": 4.855282783508301, "learning_rate": 6.721746868368815e-05, "loss": 0.404, "step": 24697 }, { "epoch": 1.6734196083745512, "grad_norm": 5.252887725830078, "learning_rate": 6.721609966459033e-05, "loss": 0.8855, "step": 24698 }, { "epoch": 1.6734873636425232, "grad_norm": 7.222199440002441, "learning_rate": 6.721473064549251e-05, "loss": 0.6904, "step": 24699 }, { "epoch": 1.6735551189104951, "grad_norm": 5.327620029449463, "learning_rate": 6.721336162639469e-05, "loss": 0.7533, "step": 24700 }, { "epoch": 1.6736228741784673, "grad_norm": 7.05602502822876, "learning_rate": 6.721199260729687e-05, "loss": 1.0125, "step": 24701 }, { "epoch": 1.6736906294464395, "grad_norm": 6.316939353942871, "learning_rate": 6.721062358819906e-05, "loss": 0.6061, "step": 24702 }, { "epoch": 1.6737583847144115, "grad_norm": 8.834161758422852, "learning_rate": 6.720925456910125e-05, "loss": 0.7561, "step": 24703 }, { "epoch": 1.6738261399823835, "grad_norm": 6.740616321563721, "learning_rate": 6.720788555000343e-05, "loss": 0.7598, "step": 24704 }, { "epoch": 1.6738938952503557, "grad_norm": 7.415405750274658, "learning_rate": 6.72065165309056e-05, "loss": 0.7703, "step": 24705 }, { "epoch": 1.673961650518328, "grad_norm": 7.811276912689209, "learning_rate": 6.72051475118078e-05, "loss": 0.6454, "step": 24706 }, { "epoch": 1.6740294057862999, "grad_norm": 6.21217679977417, "learning_rate": 6.720377849270998e-05, "loss": 0.7582, "step": 24707 }, { "epoch": 1.6740971610542719, "grad_norm": 7.929867744445801, "learning_rate": 6.720240947361216e-05, "loss": 0.8246, "step": 24708 }, { "epoch": 1.674164916322244, "grad_norm": 5.142968654632568, "learning_rate": 6.720104045451434e-05, "loss": 0.7323, "step": 24709 }, { "epoch": 1.6742326715902163, "grad_norm": 4.773557662963867, "learning_rate": 6.719967143541652e-05, "loss": 0.5923, "step": 24710 }, { "epoch": 1.6743004268581883, "grad_norm": 5.606270790100098, "learning_rate": 6.719830241631872e-05, "loss": 0.8142, "step": 24711 }, { "epoch": 1.6743681821261602, "grad_norm": 6.676942825317383, "learning_rate": 6.71969333972209e-05, "loss": 0.7451, "step": 24712 }, { "epoch": 1.6744359373941324, "grad_norm": 4.528724193572998, "learning_rate": 6.719556437812308e-05, "loss": 0.5037, "step": 24713 }, { "epoch": 1.6745036926621046, "grad_norm": 6.769326686859131, "learning_rate": 6.719419535902526e-05, "loss": 0.7125, "step": 24714 }, { "epoch": 1.6745714479300766, "grad_norm": 6.826852321624756, "learning_rate": 6.719282633992745e-05, "loss": 0.6696, "step": 24715 }, { "epoch": 1.6746392031980486, "grad_norm": 5.247282028198242, "learning_rate": 6.719145732082963e-05, "loss": 0.4927, "step": 24716 }, { "epoch": 1.6747069584660208, "grad_norm": 5.567564487457275, "learning_rate": 6.719008830173181e-05, "loss": 0.7109, "step": 24717 }, { "epoch": 1.6747747137339928, "grad_norm": 7.4550018310546875, "learning_rate": 6.718871928263399e-05, "loss": 0.7323, "step": 24718 }, { "epoch": 1.6748424690019648, "grad_norm": 4.923420429229736, "learning_rate": 6.718735026353617e-05, "loss": 0.6912, "step": 24719 }, { "epoch": 1.674910224269937, "grad_norm": 5.7151360511779785, "learning_rate": 6.718598124443837e-05, "loss": 0.5477, "step": 24720 }, { "epoch": 1.6749779795379092, "grad_norm": 8.106683731079102, "learning_rate": 6.718461222534055e-05, "loss": 0.6679, "step": 24721 }, { "epoch": 1.6750457348058811, "grad_norm": 7.50091028213501, "learning_rate": 6.718324320624273e-05, "loss": 0.5276, "step": 24722 }, { "epoch": 1.6751134900738531, "grad_norm": 8.3933687210083, "learning_rate": 6.71818741871449e-05, "loss": 0.6117, "step": 24723 }, { "epoch": 1.6751812453418253, "grad_norm": 8.987381935119629, "learning_rate": 6.718050516804709e-05, "loss": 0.8249, "step": 24724 }, { "epoch": 1.6752490006097975, "grad_norm": 5.556356430053711, "learning_rate": 6.717913614894928e-05, "loss": 0.6871, "step": 24725 }, { "epoch": 1.6753167558777695, "grad_norm": 5.366272926330566, "learning_rate": 6.717776712985146e-05, "loss": 0.5881, "step": 24726 }, { "epoch": 1.6753845111457415, "grad_norm": 5.1666741371154785, "learning_rate": 6.717639811075364e-05, "loss": 0.6403, "step": 24727 }, { "epoch": 1.6754522664137137, "grad_norm": 4.858229637145996, "learning_rate": 6.717502909165582e-05, "loss": 0.786, "step": 24728 }, { "epoch": 1.675520021681686, "grad_norm": 6.019446849822998, "learning_rate": 6.717366007255802e-05, "loss": 0.5589, "step": 24729 }, { "epoch": 1.6755877769496579, "grad_norm": 10.520013809204102, "learning_rate": 6.71722910534602e-05, "loss": 0.7308, "step": 24730 }, { "epoch": 1.6756555322176299, "grad_norm": 5.286378383636475, "learning_rate": 6.717092203436238e-05, "loss": 0.6375, "step": 24731 }, { "epoch": 1.675723287485602, "grad_norm": 8.790345191955566, "learning_rate": 6.716955301526457e-05, "loss": 0.7936, "step": 24732 }, { "epoch": 1.675791042753574, "grad_norm": 4.952531814575195, "learning_rate": 6.716818399616675e-05, "loss": 0.6697, "step": 24733 }, { "epoch": 1.675858798021546, "grad_norm": 6.857656002044678, "learning_rate": 6.716681497706893e-05, "loss": 0.8714, "step": 24734 }, { "epoch": 1.6759265532895182, "grad_norm": 5.582533359527588, "learning_rate": 6.716544595797112e-05, "loss": 0.6523, "step": 24735 }, { "epoch": 1.6759943085574904, "grad_norm": 5.3017425537109375, "learning_rate": 6.71640769388733e-05, "loss": 0.5708, "step": 24736 }, { "epoch": 1.6760620638254624, "grad_norm": 7.885801315307617, "learning_rate": 6.716270791977549e-05, "loss": 0.7845, "step": 24737 }, { "epoch": 1.6761298190934344, "grad_norm": 4.967432498931885, "learning_rate": 6.716133890067768e-05, "loss": 0.7199, "step": 24738 }, { "epoch": 1.6761975743614066, "grad_norm": 6.330526828765869, "learning_rate": 6.715996988157986e-05, "loss": 0.6689, "step": 24739 }, { "epoch": 1.6762653296293788, "grad_norm": 5.163723945617676, "learning_rate": 6.715860086248204e-05, "loss": 0.7097, "step": 24740 }, { "epoch": 1.6763330848973508, "grad_norm": 6.017573356628418, "learning_rate": 6.715723184338422e-05, "loss": 0.8498, "step": 24741 }, { "epoch": 1.6764008401653228, "grad_norm": 5.817440986633301, "learning_rate": 6.71558628242864e-05, "loss": 0.6906, "step": 24742 }, { "epoch": 1.676468595433295, "grad_norm": 8.72060489654541, "learning_rate": 6.71544938051886e-05, "loss": 0.7478, "step": 24743 }, { "epoch": 1.6765363507012672, "grad_norm": 7.682286262512207, "learning_rate": 6.715312478609077e-05, "loss": 0.5593, "step": 24744 }, { "epoch": 1.6766041059692391, "grad_norm": 5.456862449645996, "learning_rate": 6.715175576699296e-05, "loss": 0.3124, "step": 24745 }, { "epoch": 1.6766718612372111, "grad_norm": 5.088461399078369, "learning_rate": 6.715038674789514e-05, "loss": 0.4169, "step": 24746 }, { "epoch": 1.6767396165051833, "grad_norm": 5.0297064781188965, "learning_rate": 6.714901772879733e-05, "loss": 0.6519, "step": 24747 }, { "epoch": 1.6768073717731553, "grad_norm": 4.285942077636719, "learning_rate": 6.714764870969951e-05, "loss": 0.5676, "step": 24748 }, { "epoch": 1.6768751270411273, "grad_norm": 7.083550453186035, "learning_rate": 6.714627969060169e-05, "loss": 0.5792, "step": 24749 }, { "epoch": 1.6769428823090995, "grad_norm": 5.234084606170654, "learning_rate": 6.714491067150387e-05, "loss": 0.5203, "step": 24750 }, { "epoch": 1.6770106375770717, "grad_norm": 6.183786392211914, "learning_rate": 6.714354165240605e-05, "loss": 0.7266, "step": 24751 }, { "epoch": 1.6770783928450437, "grad_norm": 8.108631134033203, "learning_rate": 6.714217263330824e-05, "loss": 0.5754, "step": 24752 }, { "epoch": 1.6771461481130157, "grad_norm": 9.125577926635742, "learning_rate": 6.714080361421042e-05, "loss": 0.7545, "step": 24753 }, { "epoch": 1.6772139033809879, "grad_norm": 6.246669292449951, "learning_rate": 6.71394345951126e-05, "loss": 0.609, "step": 24754 }, { "epoch": 1.67728165864896, "grad_norm": 4.499879837036133, "learning_rate": 6.713806557601479e-05, "loss": 0.7067, "step": 24755 }, { "epoch": 1.677349413916932, "grad_norm": 4.4509992599487305, "learning_rate": 6.713669655691697e-05, "loss": 0.4813, "step": 24756 }, { "epoch": 1.677417169184904, "grad_norm": 5.880279064178467, "learning_rate": 6.713532753781916e-05, "loss": 0.6727, "step": 24757 }, { "epoch": 1.6774849244528762, "grad_norm": 6.166238784790039, "learning_rate": 6.713395851872134e-05, "loss": 0.6671, "step": 24758 }, { "epoch": 1.6775526797208484, "grad_norm": 8.547955513000488, "learning_rate": 6.713258949962352e-05, "loss": 0.6746, "step": 24759 }, { "epoch": 1.6776204349888204, "grad_norm": 4.928234100341797, "learning_rate": 6.71312204805257e-05, "loss": 0.7342, "step": 24760 }, { "epoch": 1.6776881902567924, "grad_norm": 6.626360893249512, "learning_rate": 6.71298514614279e-05, "loss": 0.6875, "step": 24761 }, { "epoch": 1.6777559455247646, "grad_norm": 5.715549468994141, "learning_rate": 6.712848244233008e-05, "loss": 0.7002, "step": 24762 }, { "epoch": 1.6778237007927368, "grad_norm": 5.817517280578613, "learning_rate": 6.712711342323226e-05, "loss": 0.779, "step": 24763 }, { "epoch": 1.6778914560607088, "grad_norm": 5.83760929107666, "learning_rate": 6.712574440413444e-05, "loss": 0.8132, "step": 24764 }, { "epoch": 1.6779592113286808, "grad_norm": 5.836846828460693, "learning_rate": 6.712437538503662e-05, "loss": 0.575, "step": 24765 }, { "epoch": 1.678026966596653, "grad_norm": 4.079193592071533, "learning_rate": 6.712300636593881e-05, "loss": 0.5269, "step": 24766 }, { "epoch": 1.678094721864625, "grad_norm": 5.382716655731201, "learning_rate": 6.712163734684099e-05, "loss": 0.799, "step": 24767 }, { "epoch": 1.678162477132597, "grad_norm": 7.469902515411377, "learning_rate": 6.712026832774317e-05, "loss": 0.6266, "step": 24768 }, { "epoch": 1.6782302324005691, "grad_norm": 6.325541973114014, "learning_rate": 6.711889930864535e-05, "loss": 0.5656, "step": 24769 }, { "epoch": 1.6782979876685413, "grad_norm": 5.8535356521606445, "learning_rate": 6.711753028954754e-05, "loss": 0.9297, "step": 24770 }, { "epoch": 1.6783657429365133, "grad_norm": 7.224210262298584, "learning_rate": 6.711616127044973e-05, "loss": 0.9978, "step": 24771 }, { "epoch": 1.6784334982044853, "grad_norm": 4.862548351287842, "learning_rate": 6.71147922513519e-05, "loss": 0.6733, "step": 24772 }, { "epoch": 1.6785012534724575, "grad_norm": 4.642857551574707, "learning_rate": 6.711342323225409e-05, "loss": 0.7455, "step": 24773 }, { "epoch": 1.6785690087404297, "grad_norm": 8.513935089111328, "learning_rate": 6.711205421315627e-05, "loss": 0.601, "step": 24774 }, { "epoch": 1.6786367640084017, "grad_norm": 4.755556583404541, "learning_rate": 6.711068519405846e-05, "loss": 0.7216, "step": 24775 }, { "epoch": 1.6787045192763737, "grad_norm": 8.14183521270752, "learning_rate": 6.710931617496064e-05, "loss": 0.4335, "step": 24776 }, { "epoch": 1.6787722745443459, "grad_norm": 7.020402908325195, "learning_rate": 6.710794715586282e-05, "loss": 0.6062, "step": 24777 }, { "epoch": 1.678840029812318, "grad_norm": 5.9442853927612305, "learning_rate": 6.710657813676501e-05, "loss": 0.7216, "step": 24778 }, { "epoch": 1.67890778508029, "grad_norm": 6.801060676574707, "learning_rate": 6.71052091176672e-05, "loss": 0.6231, "step": 24779 }, { "epoch": 1.678975540348262, "grad_norm": 6.130906581878662, "learning_rate": 6.710384009856938e-05, "loss": 0.866, "step": 24780 }, { "epoch": 1.6790432956162342, "grad_norm": 4.870330333709717, "learning_rate": 6.710247107947157e-05, "loss": 0.6968, "step": 24781 }, { "epoch": 1.6791110508842062, "grad_norm": 5.370277404785156, "learning_rate": 6.710110206037375e-05, "loss": 0.7647, "step": 24782 }, { "epoch": 1.6791788061521782, "grad_norm": 7.842683792114258, "learning_rate": 6.709973304127593e-05, "loss": 0.6148, "step": 24783 }, { "epoch": 1.6792465614201504, "grad_norm": 6.285684108734131, "learning_rate": 6.709836402217812e-05, "loss": 0.7716, "step": 24784 }, { "epoch": 1.6793143166881226, "grad_norm": 5.6140642166137695, "learning_rate": 6.70969950030803e-05, "loss": 0.5458, "step": 24785 }, { "epoch": 1.6793820719560946, "grad_norm": 7.604360103607178, "learning_rate": 6.709562598398248e-05, "loss": 0.559, "step": 24786 }, { "epoch": 1.6794498272240665, "grad_norm": 5.330725193023682, "learning_rate": 6.709425696488466e-05, "loss": 0.7807, "step": 24787 }, { "epoch": 1.6795175824920388, "grad_norm": 6.386613368988037, "learning_rate": 6.709288794578685e-05, "loss": 0.6498, "step": 24788 }, { "epoch": 1.679585337760011, "grad_norm": 5.857038974761963, "learning_rate": 6.709151892668904e-05, "loss": 0.6684, "step": 24789 }, { "epoch": 1.679653093027983, "grad_norm": 5.3184919357299805, "learning_rate": 6.709014990759122e-05, "loss": 0.704, "step": 24790 }, { "epoch": 1.679720848295955, "grad_norm": 5.409330368041992, "learning_rate": 6.70887808884934e-05, "loss": 0.7285, "step": 24791 }, { "epoch": 1.6797886035639271, "grad_norm": 5.811436653137207, "learning_rate": 6.708741186939558e-05, "loss": 0.5875, "step": 24792 }, { "epoch": 1.6798563588318993, "grad_norm": 5.600065231323242, "learning_rate": 6.708604285029777e-05, "loss": 0.6336, "step": 24793 }, { "epoch": 1.6799241140998713, "grad_norm": 6.102730751037598, "learning_rate": 6.708467383119995e-05, "loss": 0.4488, "step": 24794 }, { "epoch": 1.6799918693678433, "grad_norm": 5.5457444190979, "learning_rate": 6.708330481210213e-05, "loss": 0.7299, "step": 24795 }, { "epoch": 1.6800596246358155, "grad_norm": 6.609248161315918, "learning_rate": 6.708193579300432e-05, "loss": 0.7016, "step": 24796 }, { "epoch": 1.6801273799037875, "grad_norm": 6.676745414733887, "learning_rate": 6.70805667739065e-05, "loss": 0.7448, "step": 24797 }, { "epoch": 1.6801951351717594, "grad_norm": 5.496711254119873, "learning_rate": 6.707919775480869e-05, "loss": 0.6337, "step": 24798 }, { "epoch": 1.6802628904397316, "grad_norm": 5.9552507400512695, "learning_rate": 6.707782873571087e-05, "loss": 0.6263, "step": 24799 }, { "epoch": 1.6803306457077039, "grad_norm": 4.842757701873779, "learning_rate": 6.707645971661305e-05, "loss": 0.6484, "step": 24800 }, { "epoch": 1.6803984009756758, "grad_norm": 6.580752849578857, "learning_rate": 6.707509069751523e-05, "loss": 0.5786, "step": 24801 }, { "epoch": 1.6804661562436478, "grad_norm": 4.990447044372559, "learning_rate": 6.707372167841741e-05, "loss": 0.7492, "step": 24802 }, { "epoch": 1.68053391151162, "grad_norm": 9.573478698730469, "learning_rate": 6.70723526593196e-05, "loss": 0.7176, "step": 24803 }, { "epoch": 1.6806016667795922, "grad_norm": 5.841297149658203, "learning_rate": 6.707098364022178e-05, "loss": 0.6496, "step": 24804 }, { "epoch": 1.6806694220475642, "grad_norm": 6.168092250823975, "learning_rate": 6.706961462112397e-05, "loss": 0.7166, "step": 24805 }, { "epoch": 1.6807371773155362, "grad_norm": 4.61181116104126, "learning_rate": 6.706824560202615e-05, "loss": 0.67, "step": 24806 }, { "epoch": 1.6808049325835084, "grad_norm": 9.332901000976562, "learning_rate": 6.706687658292834e-05, "loss": 0.5387, "step": 24807 }, { "epoch": 1.6808726878514806, "grad_norm": 5.8189697265625, "learning_rate": 6.706550756383052e-05, "loss": 0.7294, "step": 24808 }, { "epoch": 1.6809404431194526, "grad_norm": 8.06373119354248, "learning_rate": 6.70641385447327e-05, "loss": 0.5309, "step": 24809 }, { "epoch": 1.6810081983874245, "grad_norm": 7.241672992706299, "learning_rate": 6.706276952563488e-05, "loss": 0.7016, "step": 24810 }, { "epoch": 1.6810759536553967, "grad_norm": 5.645560264587402, "learning_rate": 6.706140050653706e-05, "loss": 0.7102, "step": 24811 }, { "epoch": 1.681143708923369, "grad_norm": 5.1911396980285645, "learning_rate": 6.706003148743925e-05, "loss": 0.5455, "step": 24812 }, { "epoch": 1.681211464191341, "grad_norm": 6.8172101974487305, "learning_rate": 6.705866246834144e-05, "loss": 0.753, "step": 24813 }, { "epoch": 1.681279219459313, "grad_norm": 8.302848815917969, "learning_rate": 6.705729344924362e-05, "loss": 0.8284, "step": 24814 }, { "epoch": 1.6813469747272851, "grad_norm": 3.754272222518921, "learning_rate": 6.70559244301458e-05, "loss": 0.347, "step": 24815 }, { "epoch": 1.681414729995257, "grad_norm": 4.8704609870910645, "learning_rate": 6.705455541104799e-05, "loss": 0.6916, "step": 24816 }, { "epoch": 1.681482485263229, "grad_norm": 5.928276062011719, "learning_rate": 6.705318639195017e-05, "loss": 0.6388, "step": 24817 }, { "epoch": 1.6815502405312013, "grad_norm": 6.14915657043457, "learning_rate": 6.705181737285235e-05, "loss": 0.7155, "step": 24818 }, { "epoch": 1.6816179957991735, "grad_norm": 5.186965465545654, "learning_rate": 6.705044835375453e-05, "loss": 0.5234, "step": 24819 }, { "epoch": 1.6816857510671455, "grad_norm": 6.06053352355957, "learning_rate": 6.704907933465671e-05, "loss": 0.7035, "step": 24820 }, { "epoch": 1.6817535063351174, "grad_norm": 6.018510818481445, "learning_rate": 6.70477103155589e-05, "loss": 0.7267, "step": 24821 }, { "epoch": 1.6818212616030896, "grad_norm": 6.024012565612793, "learning_rate": 6.704634129646109e-05, "loss": 0.6537, "step": 24822 }, { "epoch": 1.6818890168710618, "grad_norm": 7.035159587860107, "learning_rate": 6.704497227736327e-05, "loss": 0.8328, "step": 24823 }, { "epoch": 1.6819567721390338, "grad_norm": 6.555884838104248, "learning_rate": 6.704360325826546e-05, "loss": 0.5135, "step": 24824 }, { "epoch": 1.6820245274070058, "grad_norm": 5.398142337799072, "learning_rate": 6.704223423916764e-05, "loss": 0.6497, "step": 24825 }, { "epoch": 1.682092282674978, "grad_norm": 4.494560241699219, "learning_rate": 6.704086522006982e-05, "loss": 0.579, "step": 24826 }, { "epoch": 1.6821600379429502, "grad_norm": 6.072434425354004, "learning_rate": 6.703949620097201e-05, "loss": 0.6285, "step": 24827 }, { "epoch": 1.6822277932109222, "grad_norm": 6.314568996429443, "learning_rate": 6.70381271818742e-05, "loss": 0.7058, "step": 24828 }, { "epoch": 1.6822955484788942, "grad_norm": 5.264143466949463, "learning_rate": 6.703675816277637e-05, "loss": 0.8142, "step": 24829 }, { "epoch": 1.6823633037468664, "grad_norm": 6.3176703453063965, "learning_rate": 6.703538914367857e-05, "loss": 0.881, "step": 24830 }, { "epoch": 1.6824310590148384, "grad_norm": 5.5052690505981445, "learning_rate": 6.703402012458075e-05, "loss": 0.7648, "step": 24831 }, { "epoch": 1.6824988142828103, "grad_norm": 4.935929775238037, "learning_rate": 6.703265110548293e-05, "loss": 0.5854, "step": 24832 }, { "epoch": 1.6825665695507825, "grad_norm": 6.856173515319824, "learning_rate": 6.703128208638511e-05, "loss": 0.4969, "step": 24833 }, { "epoch": 1.6826343248187547, "grad_norm": 6.953483581542969, "learning_rate": 6.702991306728729e-05, "loss": 0.603, "step": 24834 }, { "epoch": 1.6827020800867267, "grad_norm": 5.088242530822754, "learning_rate": 6.702854404818948e-05, "loss": 0.5989, "step": 24835 }, { "epoch": 1.6827698353546987, "grad_norm": 5.894502639770508, "learning_rate": 6.702717502909166e-05, "loss": 0.6469, "step": 24836 }, { "epoch": 1.682837590622671, "grad_norm": 3.9701390266418457, "learning_rate": 6.702580600999384e-05, "loss": 0.5304, "step": 24837 }, { "epoch": 1.6829053458906431, "grad_norm": 5.064632415771484, "learning_rate": 6.702443699089602e-05, "loss": 0.5726, "step": 24838 }, { "epoch": 1.682973101158615, "grad_norm": 4.826971530914307, "learning_rate": 6.702306797179822e-05, "loss": 0.5592, "step": 24839 }, { "epoch": 1.683040856426587, "grad_norm": 8.492714881896973, "learning_rate": 6.70216989527004e-05, "loss": 0.5109, "step": 24840 }, { "epoch": 1.6831086116945593, "grad_norm": 7.207273483276367, "learning_rate": 6.702032993360258e-05, "loss": 0.76, "step": 24841 }, { "epoch": 1.6831763669625315, "grad_norm": 4.59871768951416, "learning_rate": 6.701896091450476e-05, "loss": 0.6648, "step": 24842 }, { "epoch": 1.6832441222305035, "grad_norm": 6.087802410125732, "learning_rate": 6.701759189540694e-05, "loss": 0.5418, "step": 24843 }, { "epoch": 1.6833118774984754, "grad_norm": 8.289163589477539, "learning_rate": 6.701622287630913e-05, "loss": 0.8906, "step": 24844 }, { "epoch": 1.6833796327664476, "grad_norm": 7.885702610015869, "learning_rate": 6.701485385721131e-05, "loss": 0.7011, "step": 24845 }, { "epoch": 1.6834473880344196, "grad_norm": 5.164073467254639, "learning_rate": 6.70134848381135e-05, "loss": 0.6313, "step": 24846 }, { "epoch": 1.6835151433023916, "grad_norm": 6.702796459197998, "learning_rate": 6.701211581901568e-05, "loss": 0.711, "step": 24847 }, { "epoch": 1.6835828985703638, "grad_norm": 6.18422269821167, "learning_rate": 6.701074679991787e-05, "loss": 0.7477, "step": 24848 }, { "epoch": 1.683650653838336, "grad_norm": 6.2550153732299805, "learning_rate": 6.700937778082005e-05, "loss": 0.7135, "step": 24849 }, { "epoch": 1.683718409106308, "grad_norm": 5.551297187805176, "learning_rate": 6.700800876172223e-05, "loss": 0.4674, "step": 24850 }, { "epoch": 1.68378616437428, "grad_norm": 5.166830062866211, "learning_rate": 6.700663974262441e-05, "loss": 0.695, "step": 24851 }, { "epoch": 1.6838539196422522, "grad_norm": 5.987070083618164, "learning_rate": 6.700527072352659e-05, "loss": 0.6848, "step": 24852 }, { "epoch": 1.6839216749102244, "grad_norm": 6.842796325683594, "learning_rate": 6.700390170442878e-05, "loss": 0.7161, "step": 24853 }, { "epoch": 1.6839894301781964, "grad_norm": 5.434830188751221, "learning_rate": 6.700253268533096e-05, "loss": 0.5453, "step": 24854 }, { "epoch": 1.6840571854461683, "grad_norm": 4.709153175354004, "learning_rate": 6.700116366623314e-05, "loss": 0.6629, "step": 24855 }, { "epoch": 1.6841249407141405, "grad_norm": 6.848333358764648, "learning_rate": 6.699979464713533e-05, "loss": 0.5505, "step": 24856 }, { "epoch": 1.6841926959821127, "grad_norm": 5.671286582946777, "learning_rate": 6.69984256280375e-05, "loss": 0.7091, "step": 24857 }, { "epoch": 1.6842604512500847, "grad_norm": 4.4879150390625, "learning_rate": 6.69970566089397e-05, "loss": 0.7012, "step": 24858 }, { "epoch": 1.6843282065180567, "grad_norm": 7.751883506774902, "learning_rate": 6.699568758984188e-05, "loss": 0.658, "step": 24859 }, { "epoch": 1.684395961786029, "grad_norm": 6.729503154754639, "learning_rate": 6.699431857074406e-05, "loss": 0.7526, "step": 24860 }, { "epoch": 1.684463717054001, "grad_norm": 5.290975093841553, "learning_rate": 6.699294955164624e-05, "loss": 0.547, "step": 24861 }, { "epoch": 1.6845314723219729, "grad_norm": 4.149670600891113, "learning_rate": 6.699158053254843e-05, "loss": 0.599, "step": 24862 }, { "epoch": 1.684599227589945, "grad_norm": 6.902742862701416, "learning_rate": 6.699021151345061e-05, "loss": 0.6581, "step": 24863 }, { "epoch": 1.6846669828579173, "grad_norm": 6.066167831420898, "learning_rate": 6.69888424943528e-05, "loss": 0.767, "step": 24864 }, { "epoch": 1.6847347381258893, "grad_norm": 4.975260257720947, "learning_rate": 6.698747347525498e-05, "loss": 0.6768, "step": 24865 }, { "epoch": 1.6848024933938612, "grad_norm": 6.647672653198242, "learning_rate": 6.698610445615716e-05, "loss": 0.7267, "step": 24866 }, { "epoch": 1.6848702486618334, "grad_norm": 6.1413960456848145, "learning_rate": 6.698473543705935e-05, "loss": 0.812, "step": 24867 }, { "epoch": 1.6849380039298056, "grad_norm": 4.434695243835449, "learning_rate": 6.698336641796153e-05, "loss": 0.5218, "step": 24868 }, { "epoch": 1.6850057591977776, "grad_norm": 4.933608055114746, "learning_rate": 6.698199739886371e-05, "loss": 0.8086, "step": 24869 }, { "epoch": 1.6850735144657496, "grad_norm": 6.237240314483643, "learning_rate": 6.698062837976589e-05, "loss": 0.5863, "step": 24870 }, { "epoch": 1.6851412697337218, "grad_norm": 6.954333305358887, "learning_rate": 6.697925936066808e-05, "loss": 0.7897, "step": 24871 }, { "epoch": 1.685209025001694, "grad_norm": 5.047242641448975, "learning_rate": 6.697789034157026e-05, "loss": 0.6205, "step": 24872 }, { "epoch": 1.685276780269666, "grad_norm": 5.288197994232178, "learning_rate": 6.697652132247245e-05, "loss": 0.6295, "step": 24873 }, { "epoch": 1.685344535537638, "grad_norm": 4.627950191497803, "learning_rate": 6.697515230337464e-05, "loss": 0.8034, "step": 24874 }, { "epoch": 1.6854122908056102, "grad_norm": 5.392894268035889, "learning_rate": 6.697378328427682e-05, "loss": 0.7237, "step": 24875 }, { "epoch": 1.6854800460735824, "grad_norm": 4.237034797668457, "learning_rate": 6.6972414265179e-05, "loss": 0.7015, "step": 24876 }, { "epoch": 1.6855478013415544, "grad_norm": 5.652274131774902, "learning_rate": 6.69710452460812e-05, "loss": 0.5259, "step": 24877 }, { "epoch": 1.6856155566095263, "grad_norm": 6.13096809387207, "learning_rate": 6.696967622698337e-05, "loss": 0.4902, "step": 24878 }, { "epoch": 1.6856833118774985, "grad_norm": 4.9084391593933105, "learning_rate": 6.696830720788555e-05, "loss": 0.5454, "step": 24879 }, { "epoch": 1.6857510671454705, "grad_norm": 5.867831707000732, "learning_rate": 6.696693818878775e-05, "loss": 0.4763, "step": 24880 }, { "epoch": 1.6858188224134425, "grad_norm": 5.096686840057373, "learning_rate": 6.696556916968993e-05, "loss": 0.643, "step": 24881 }, { "epoch": 1.6858865776814147, "grad_norm": 5.903698444366455, "learning_rate": 6.696420015059211e-05, "loss": 0.7266, "step": 24882 }, { "epoch": 1.685954332949387, "grad_norm": 4.862792015075684, "learning_rate": 6.696283113149429e-05, "loss": 0.6793, "step": 24883 }, { "epoch": 1.6860220882173589, "grad_norm": 4.557000637054443, "learning_rate": 6.696146211239647e-05, "loss": 0.6275, "step": 24884 }, { "epoch": 1.6860898434853309, "grad_norm": 6.688540458679199, "learning_rate": 6.696009309329866e-05, "loss": 0.5988, "step": 24885 }, { "epoch": 1.686157598753303, "grad_norm": 6.319003582000732, "learning_rate": 6.695872407420084e-05, "loss": 0.614, "step": 24886 }, { "epoch": 1.6862253540212753, "grad_norm": 5.599967956542969, "learning_rate": 6.695735505510302e-05, "loss": 0.7727, "step": 24887 }, { "epoch": 1.6862931092892472, "grad_norm": 4.638634204864502, "learning_rate": 6.69559860360052e-05, "loss": 0.5849, "step": 24888 }, { "epoch": 1.6863608645572192, "grad_norm": 8.904260635375977, "learning_rate": 6.695461701690738e-05, "loss": 0.9098, "step": 24889 }, { "epoch": 1.6864286198251914, "grad_norm": 7.689660549163818, "learning_rate": 6.695324799780958e-05, "loss": 0.6647, "step": 24890 }, { "epoch": 1.6864963750931636, "grad_norm": 6.8741230964660645, "learning_rate": 6.695187897871176e-05, "loss": 0.5197, "step": 24891 }, { "epoch": 1.6865641303611356, "grad_norm": 6.397198677062988, "learning_rate": 6.695050995961394e-05, "loss": 0.7739, "step": 24892 }, { "epoch": 1.6866318856291076, "grad_norm": 8.197042465209961, "learning_rate": 6.694914094051612e-05, "loss": 0.655, "step": 24893 }, { "epoch": 1.6866996408970798, "grad_norm": 8.166677474975586, "learning_rate": 6.694777192141831e-05, "loss": 0.8336, "step": 24894 }, { "epoch": 1.6867673961650518, "grad_norm": 6.747457981109619, "learning_rate": 6.69464029023205e-05, "loss": 0.5758, "step": 24895 }, { "epoch": 1.6868351514330238, "grad_norm": 5.457634449005127, "learning_rate": 6.694503388322267e-05, "loss": 0.7678, "step": 24896 }, { "epoch": 1.686902906700996, "grad_norm": 4.788967132568359, "learning_rate": 6.694366486412485e-05, "loss": 0.7063, "step": 24897 }, { "epoch": 1.6869706619689682, "grad_norm": 6.535849571228027, "learning_rate": 6.694229584502704e-05, "loss": 1.0037, "step": 24898 }, { "epoch": 1.6870384172369401, "grad_norm": 5.165244102478027, "learning_rate": 6.694092682592923e-05, "loss": 0.8088, "step": 24899 }, { "epoch": 1.6871061725049121, "grad_norm": 6.168712615966797, "learning_rate": 6.693955780683141e-05, "loss": 0.7681, "step": 24900 }, { "epoch": 1.6871739277728843, "grad_norm": 5.878838539123535, "learning_rate": 6.693818878773359e-05, "loss": 0.8525, "step": 24901 }, { "epoch": 1.6872416830408565, "grad_norm": 14.397567749023438, "learning_rate": 6.693681976863577e-05, "loss": 0.7416, "step": 24902 }, { "epoch": 1.6873094383088285, "grad_norm": 5.229006290435791, "learning_rate": 6.693545074953796e-05, "loss": 0.4552, "step": 24903 }, { "epoch": 1.6873771935768005, "grad_norm": 4.621506214141846, "learning_rate": 6.693408173044014e-05, "loss": 0.6569, "step": 24904 }, { "epoch": 1.6874449488447727, "grad_norm": 5.735963344573975, "learning_rate": 6.693271271134232e-05, "loss": 0.7441, "step": 24905 }, { "epoch": 1.687512704112745, "grad_norm": 4.835507869720459, "learning_rate": 6.69313436922445e-05, "loss": 0.6051, "step": 24906 }, { "epoch": 1.6875804593807169, "grad_norm": 5.483896732330322, "learning_rate": 6.692997467314669e-05, "loss": 0.6034, "step": 24907 }, { "epoch": 1.6876482146486889, "grad_norm": 6.128487586975098, "learning_rate": 6.692860565404888e-05, "loss": 0.5838, "step": 24908 }, { "epoch": 1.687715969916661, "grad_norm": 6.20028829574585, "learning_rate": 6.692723663495106e-05, "loss": 0.5792, "step": 24909 }, { "epoch": 1.6877837251846333, "grad_norm": 6.177369594573975, "learning_rate": 6.692586761585324e-05, "loss": 0.753, "step": 24910 }, { "epoch": 1.687851480452605, "grad_norm": 6.82077169418335, "learning_rate": 6.692449859675542e-05, "loss": 0.5844, "step": 24911 }, { "epoch": 1.6879192357205772, "grad_norm": 6.214969158172607, "learning_rate": 6.69231295776576e-05, "loss": 0.5654, "step": 24912 }, { "epoch": 1.6879869909885494, "grad_norm": 3.587223768234253, "learning_rate": 6.69217605585598e-05, "loss": 0.5531, "step": 24913 }, { "epoch": 1.6880547462565214, "grad_norm": 5.979495525360107, "learning_rate": 6.692039153946197e-05, "loss": 0.6536, "step": 24914 }, { "epoch": 1.6881225015244934, "grad_norm": 7.275570869445801, "learning_rate": 6.691902252036416e-05, "loss": 0.6187, "step": 24915 }, { "epoch": 1.6881902567924656, "grad_norm": 11.01311206817627, "learning_rate": 6.691765350126634e-05, "loss": 0.7534, "step": 24916 }, { "epoch": 1.6882580120604378, "grad_norm": 9.063957214355469, "learning_rate": 6.691628448216853e-05, "loss": 0.7655, "step": 24917 }, { "epoch": 1.6883257673284098, "grad_norm": 6.801476001739502, "learning_rate": 6.691491546307071e-05, "loss": 0.7913, "step": 24918 }, { "epoch": 1.6883935225963818, "grad_norm": 5.127294063568115, "learning_rate": 6.691354644397289e-05, "loss": 0.5853, "step": 24919 }, { "epoch": 1.688461277864354, "grad_norm": 7.4261064529418945, "learning_rate": 6.691217742487508e-05, "loss": 0.6926, "step": 24920 }, { "epoch": 1.6885290331323262, "grad_norm": 5.3057379722595215, "learning_rate": 6.691080840577726e-05, "loss": 0.7037, "step": 24921 }, { "epoch": 1.6885967884002981, "grad_norm": 6.727664947509766, "learning_rate": 6.690943938667944e-05, "loss": 0.7684, "step": 24922 }, { "epoch": 1.6886645436682701, "grad_norm": 4.622579097747803, "learning_rate": 6.690807036758164e-05, "loss": 0.4509, "step": 24923 }, { "epoch": 1.6887322989362423, "grad_norm": 6.05672550201416, "learning_rate": 6.690670134848382e-05, "loss": 0.6677, "step": 24924 }, { "epoch": 1.6888000542042145, "grad_norm": 6.385519504547119, "learning_rate": 6.6905332329386e-05, "loss": 0.951, "step": 24925 }, { "epoch": 1.6888678094721865, "grad_norm": 6.9078474044799805, "learning_rate": 6.690396331028819e-05, "loss": 0.7104, "step": 24926 }, { "epoch": 1.6889355647401585, "grad_norm": 5.600029945373535, "learning_rate": 6.690259429119037e-05, "loss": 0.7326, "step": 24927 }, { "epoch": 1.6890033200081307, "grad_norm": 5.599486351013184, "learning_rate": 6.690122527209255e-05, "loss": 0.871, "step": 24928 }, { "epoch": 1.6890710752761027, "grad_norm": 4.861745357513428, "learning_rate": 6.689985625299473e-05, "loss": 0.6091, "step": 24929 }, { "epoch": 1.6891388305440747, "grad_norm": 5.198245525360107, "learning_rate": 6.689848723389691e-05, "loss": 0.918, "step": 24930 }, { "epoch": 1.6892065858120469, "grad_norm": 5.151970863342285, "learning_rate": 6.689711821479911e-05, "loss": 0.6698, "step": 24931 }, { "epoch": 1.689274341080019, "grad_norm": 4.277858257293701, "learning_rate": 6.689574919570129e-05, "loss": 0.7025, "step": 24932 }, { "epoch": 1.689342096347991, "grad_norm": 5.5644965171813965, "learning_rate": 6.689438017660347e-05, "loss": 0.7327, "step": 24933 }, { "epoch": 1.689409851615963, "grad_norm": 9.408788681030273, "learning_rate": 6.689301115750565e-05, "loss": 0.4757, "step": 24934 }, { "epoch": 1.6894776068839352, "grad_norm": 6.718297004699707, "learning_rate": 6.689164213840783e-05, "loss": 0.6955, "step": 24935 }, { "epoch": 1.6895453621519074, "grad_norm": 5.6667914390563965, "learning_rate": 6.689027311931002e-05, "loss": 0.7928, "step": 24936 }, { "epoch": 1.6896131174198794, "grad_norm": 6.2902350425720215, "learning_rate": 6.68889041002122e-05, "loss": 0.7133, "step": 24937 }, { "epoch": 1.6896808726878514, "grad_norm": 4.84864616394043, "learning_rate": 6.688753508111438e-05, "loss": 0.5483, "step": 24938 }, { "epoch": 1.6897486279558236, "grad_norm": 6.141127586364746, "learning_rate": 6.688616606201656e-05, "loss": 0.8613, "step": 24939 }, { "epoch": 1.6898163832237958, "grad_norm": 5.653985023498535, "learning_rate": 6.688479704291876e-05, "loss": 0.6451, "step": 24940 }, { "epoch": 1.6898841384917678, "grad_norm": 6.482390403747559, "learning_rate": 6.688342802382094e-05, "loss": 0.7379, "step": 24941 }, { "epoch": 1.6899518937597398, "grad_norm": 6.123814582824707, "learning_rate": 6.688205900472312e-05, "loss": 0.8789, "step": 24942 }, { "epoch": 1.690019649027712, "grad_norm": 5.852234363555908, "learning_rate": 6.68806899856253e-05, "loss": 0.5812, "step": 24943 }, { "epoch": 1.690087404295684, "grad_norm": 4.462472915649414, "learning_rate": 6.687932096652748e-05, "loss": 0.665, "step": 24944 }, { "epoch": 1.690155159563656, "grad_norm": 8.73554515838623, "learning_rate": 6.687795194742967e-05, "loss": 0.6047, "step": 24945 }, { "epoch": 1.6902229148316281, "grad_norm": 7.231584072113037, "learning_rate": 6.687658292833185e-05, "loss": 0.8475, "step": 24946 }, { "epoch": 1.6902906700996003, "grad_norm": 5.229128837585449, "learning_rate": 6.687521390923403e-05, "loss": 0.5574, "step": 24947 }, { "epoch": 1.6903584253675723, "grad_norm": 4.485491752624512, "learning_rate": 6.687384489013621e-05, "loss": 0.5788, "step": 24948 }, { "epoch": 1.6904261806355443, "grad_norm": 5.944050312042236, "learning_rate": 6.687247587103841e-05, "loss": 0.5524, "step": 24949 }, { "epoch": 1.6904939359035165, "grad_norm": 5.185772895812988, "learning_rate": 6.687110685194059e-05, "loss": 0.6423, "step": 24950 }, { "epoch": 1.6905616911714887, "grad_norm": 5.588585376739502, "learning_rate": 6.686973783284277e-05, "loss": 0.8166, "step": 24951 }, { "epoch": 1.6906294464394607, "grad_norm": 5.4802775382995605, "learning_rate": 6.686836881374495e-05, "loss": 0.7423, "step": 24952 }, { "epoch": 1.6906972017074327, "grad_norm": 5.514636039733887, "learning_rate": 6.686699979464713e-05, "loss": 0.644, "step": 24953 }, { "epoch": 1.6907649569754049, "grad_norm": 6.138681411743164, "learning_rate": 6.686563077554932e-05, "loss": 0.4849, "step": 24954 }, { "epoch": 1.690832712243377, "grad_norm": 7.31122350692749, "learning_rate": 6.68642617564515e-05, "loss": 0.8128, "step": 24955 }, { "epoch": 1.690900467511349, "grad_norm": 7.142463684082031, "learning_rate": 6.686289273735368e-05, "loss": 0.8252, "step": 24956 }, { "epoch": 1.690968222779321, "grad_norm": 7.5308051109313965, "learning_rate": 6.686152371825586e-05, "loss": 0.6052, "step": 24957 }, { "epoch": 1.6910359780472932, "grad_norm": 6.993082523345947, "learning_rate": 6.686015469915806e-05, "loss": 0.6824, "step": 24958 }, { "epoch": 1.6911037333152654, "grad_norm": 11.312100410461426, "learning_rate": 6.685878568006024e-05, "loss": 0.5961, "step": 24959 }, { "epoch": 1.6911714885832372, "grad_norm": 5.5023603439331055, "learning_rate": 6.685741666096242e-05, "loss": 0.5458, "step": 24960 }, { "epoch": 1.6912392438512094, "grad_norm": 5.324767589569092, "learning_rate": 6.68560476418646e-05, "loss": 0.8021, "step": 24961 }, { "epoch": 1.6913069991191816, "grad_norm": 4.075429916381836, "learning_rate": 6.685467862276678e-05, "loss": 0.4283, "step": 24962 }, { "epoch": 1.6913747543871536, "grad_norm": 5.746015548706055, "learning_rate": 6.685330960366897e-05, "loss": 0.6243, "step": 24963 }, { "epoch": 1.6914425096551255, "grad_norm": 6.30375337600708, "learning_rate": 6.685194058457115e-05, "loss": 0.6802, "step": 24964 }, { "epoch": 1.6915102649230978, "grad_norm": 10.493048667907715, "learning_rate": 6.685057156547333e-05, "loss": 0.536, "step": 24965 }, { "epoch": 1.69157802019107, "grad_norm": 4.894591331481934, "learning_rate": 6.684920254637553e-05, "loss": 0.7296, "step": 24966 }, { "epoch": 1.691645775459042, "grad_norm": 5.362181186676025, "learning_rate": 6.684783352727771e-05, "loss": 0.75, "step": 24967 }, { "epoch": 1.691713530727014, "grad_norm": 7.37858772277832, "learning_rate": 6.684646450817989e-05, "loss": 0.6739, "step": 24968 }, { "epoch": 1.6917812859949861, "grad_norm": 5.00982666015625, "learning_rate": 6.684509548908208e-05, "loss": 0.5111, "step": 24969 }, { "epoch": 1.6918490412629583, "grad_norm": 7.925934791564941, "learning_rate": 6.684372646998426e-05, "loss": 0.5813, "step": 24970 }, { "epoch": 1.6919167965309303, "grad_norm": 5.851308822631836, "learning_rate": 6.684235745088644e-05, "loss": 0.6239, "step": 24971 }, { "epoch": 1.6919845517989023, "grad_norm": 8.066901206970215, "learning_rate": 6.684098843178864e-05, "loss": 0.6169, "step": 24972 }, { "epoch": 1.6920523070668745, "grad_norm": 5.917819023132324, "learning_rate": 6.683961941269082e-05, "loss": 0.6478, "step": 24973 }, { "epoch": 1.6921200623348467, "grad_norm": 6.636482238769531, "learning_rate": 6.6838250393593e-05, "loss": 0.5531, "step": 24974 }, { "epoch": 1.6921878176028187, "grad_norm": 4.874712944030762, "learning_rate": 6.683688137449518e-05, "loss": 0.6324, "step": 24975 }, { "epoch": 1.6922555728707906, "grad_norm": 4.387469291687012, "learning_rate": 6.683551235539736e-05, "loss": 0.6359, "step": 24976 }, { "epoch": 1.6923233281387629, "grad_norm": 5.966111183166504, "learning_rate": 6.683414333629955e-05, "loss": 0.7845, "step": 24977 }, { "epoch": 1.6923910834067348, "grad_norm": 4.725979328155518, "learning_rate": 6.683277431720173e-05, "loss": 0.7, "step": 24978 }, { "epoch": 1.6924588386747068, "grad_norm": 6.364614009857178, "learning_rate": 6.683140529810391e-05, "loss": 0.6731, "step": 24979 }, { "epoch": 1.692526593942679, "grad_norm": 5.935843467712402, "learning_rate": 6.68300362790061e-05, "loss": 0.656, "step": 24980 }, { "epoch": 1.6925943492106512, "grad_norm": 5.656540393829346, "learning_rate": 6.682866725990829e-05, "loss": 0.5569, "step": 24981 }, { "epoch": 1.6926621044786232, "grad_norm": 6.562291145324707, "learning_rate": 6.682729824081047e-05, "loss": 0.653, "step": 24982 }, { "epoch": 1.6927298597465952, "grad_norm": 6.7120680809021, "learning_rate": 6.682592922171265e-05, "loss": 0.6003, "step": 24983 }, { "epoch": 1.6927976150145674, "grad_norm": 5.34395694732666, "learning_rate": 6.682456020261483e-05, "loss": 0.5808, "step": 24984 }, { "epoch": 1.6928653702825396, "grad_norm": 5.375433444976807, "learning_rate": 6.682319118351701e-05, "loss": 0.8197, "step": 24985 }, { "epoch": 1.6929331255505116, "grad_norm": 5.848938465118408, "learning_rate": 6.68218221644192e-05, "loss": 0.521, "step": 24986 }, { "epoch": 1.6930008808184835, "grad_norm": 6.726578235626221, "learning_rate": 6.682045314532138e-05, "loss": 0.5764, "step": 24987 }, { "epoch": 1.6930686360864557, "grad_norm": 5.905828475952148, "learning_rate": 6.681908412622356e-05, "loss": 0.5815, "step": 24988 }, { "epoch": 1.693136391354428, "grad_norm": 4.964723110198975, "learning_rate": 6.681771510712574e-05, "loss": 0.5265, "step": 24989 }, { "epoch": 1.6932041466224, "grad_norm": 4.783426761627197, "learning_rate": 6.681634608802792e-05, "loss": 0.4794, "step": 24990 }, { "epoch": 1.693271901890372, "grad_norm": 4.553286552429199, "learning_rate": 6.681497706893012e-05, "loss": 0.7023, "step": 24991 }, { "epoch": 1.6933396571583441, "grad_norm": 5.5387067794799805, "learning_rate": 6.68136080498323e-05, "loss": 0.8912, "step": 24992 }, { "epoch": 1.693407412426316, "grad_norm": 4.615874767303467, "learning_rate": 6.681223903073448e-05, "loss": 0.7518, "step": 24993 }, { "epoch": 1.693475167694288, "grad_norm": 6.070943832397461, "learning_rate": 6.681087001163666e-05, "loss": 0.8717, "step": 24994 }, { "epoch": 1.6935429229622603, "grad_norm": 6.163576602935791, "learning_rate": 6.680950099253885e-05, "loss": 0.8177, "step": 24995 }, { "epoch": 1.6936106782302325, "grad_norm": 4.435528755187988, "learning_rate": 6.680813197344103e-05, "loss": 0.4842, "step": 24996 }, { "epoch": 1.6936784334982045, "grad_norm": 5.618228435516357, "learning_rate": 6.680676295434321e-05, "loss": 0.6117, "step": 24997 }, { "epoch": 1.6937461887661764, "grad_norm": 5.315666675567627, "learning_rate": 6.68053939352454e-05, "loss": 0.5958, "step": 24998 }, { "epoch": 1.6938139440341486, "grad_norm": 5.618652820587158, "learning_rate": 6.680402491614757e-05, "loss": 0.6805, "step": 24999 }, { "epoch": 1.6938816993021208, "grad_norm": 6.597662448883057, "learning_rate": 6.680265589704977e-05, "loss": 0.6677, "step": 25000 }, { "epoch": 1.6939494545700928, "grad_norm": 6.490018367767334, "learning_rate": 6.680128687795195e-05, "loss": 0.6123, "step": 25001 }, { "epoch": 1.6940172098380648, "grad_norm": 6.568131446838379, "learning_rate": 6.679991785885413e-05, "loss": 0.6507, "step": 25002 }, { "epoch": 1.694084965106037, "grad_norm": 6.233468055725098, "learning_rate": 6.679854883975631e-05, "loss": 0.6664, "step": 25003 }, { "epoch": 1.6941527203740092, "grad_norm": 5.0560197830200195, "learning_rate": 6.67971798206585e-05, "loss": 0.771, "step": 25004 }, { "epoch": 1.6942204756419812, "grad_norm": 4.404244899749756, "learning_rate": 6.679581080156068e-05, "loss": 0.5469, "step": 25005 }, { "epoch": 1.6942882309099532, "grad_norm": 5.570150375366211, "learning_rate": 6.679444178246286e-05, "loss": 0.6487, "step": 25006 }, { "epoch": 1.6943559861779254, "grad_norm": 5.646016597747803, "learning_rate": 6.679307276336504e-05, "loss": 0.3939, "step": 25007 }, { "epoch": 1.6944237414458976, "grad_norm": 7.90758752822876, "learning_rate": 6.679170374426722e-05, "loss": 0.7296, "step": 25008 }, { "epoch": 1.6944914967138693, "grad_norm": 6.168480396270752, "learning_rate": 6.679033472516942e-05, "loss": 0.7473, "step": 25009 }, { "epoch": 1.6945592519818415, "grad_norm": 4.895854949951172, "learning_rate": 6.67889657060716e-05, "loss": 0.6113, "step": 25010 }, { "epoch": 1.6946270072498137, "grad_norm": 7.343755722045898, "learning_rate": 6.678759668697378e-05, "loss": 0.7478, "step": 25011 }, { "epoch": 1.6946947625177857, "grad_norm": 6.956760883331299, "learning_rate": 6.678622766787597e-05, "loss": 0.7509, "step": 25012 }, { "epoch": 1.6947625177857577, "grad_norm": 5.971587181091309, "learning_rate": 6.678485864877815e-05, "loss": 0.7668, "step": 25013 }, { "epoch": 1.69483027305373, "grad_norm": 5.395804405212402, "learning_rate": 6.678348962968033e-05, "loss": 0.746, "step": 25014 }, { "epoch": 1.694898028321702, "grad_norm": 5.930509090423584, "learning_rate": 6.678212061058253e-05, "loss": 0.7364, "step": 25015 }, { "epoch": 1.694965783589674, "grad_norm": 6.7752275466918945, "learning_rate": 6.678075159148471e-05, "loss": 0.6286, "step": 25016 }, { "epoch": 1.695033538857646, "grad_norm": 5.970183849334717, "learning_rate": 6.677938257238689e-05, "loss": 0.7258, "step": 25017 }, { "epoch": 1.6951012941256183, "grad_norm": 6.386739253997803, "learning_rate": 6.677801355328908e-05, "loss": 0.8416, "step": 25018 }, { "epoch": 1.6951690493935905, "grad_norm": 4.7961201667785645, "learning_rate": 6.677664453419126e-05, "loss": 0.7274, "step": 25019 }, { "epoch": 1.6952368046615625, "grad_norm": 7.113206386566162, "learning_rate": 6.677527551509344e-05, "loss": 0.6677, "step": 25020 }, { "epoch": 1.6953045599295344, "grad_norm": 5.150187969207764, "learning_rate": 6.677390649599562e-05, "loss": 0.433, "step": 25021 }, { "epoch": 1.6953723151975066, "grad_norm": 5.292994499206543, "learning_rate": 6.67725374768978e-05, "loss": 0.8501, "step": 25022 }, { "epoch": 1.6954400704654788, "grad_norm": 5.369309902191162, "learning_rate": 6.67711684578e-05, "loss": 0.6215, "step": 25023 }, { "epoch": 1.6955078257334508, "grad_norm": 4.938060283660889, "learning_rate": 6.676979943870218e-05, "loss": 0.7577, "step": 25024 }, { "epoch": 1.6955755810014228, "grad_norm": 8.528769493103027, "learning_rate": 6.676843041960436e-05, "loss": 0.8652, "step": 25025 }, { "epoch": 1.695643336269395, "grad_norm": 5.736057281494141, "learning_rate": 6.676706140050654e-05, "loss": 0.652, "step": 25026 }, { "epoch": 1.695711091537367, "grad_norm": 7.560767650604248, "learning_rate": 6.676569238140873e-05, "loss": 0.5378, "step": 25027 }, { "epoch": 1.695778846805339, "grad_norm": 7.506313800811768, "learning_rate": 6.676432336231091e-05, "loss": 0.6263, "step": 25028 }, { "epoch": 1.6958466020733112, "grad_norm": 4.98671293258667, "learning_rate": 6.67629543432131e-05, "loss": 0.7146, "step": 25029 }, { "epoch": 1.6959143573412834, "grad_norm": 6.105992794036865, "learning_rate": 6.676158532411527e-05, "loss": 0.7536, "step": 25030 }, { "epoch": 1.6959821126092554, "grad_norm": 5.915815353393555, "learning_rate": 6.676021630501745e-05, "loss": 0.7493, "step": 25031 }, { "epoch": 1.6960498678772273, "grad_norm": 8.413450241088867, "learning_rate": 6.675884728591965e-05, "loss": 0.7883, "step": 25032 }, { "epoch": 1.6961176231451995, "grad_norm": 7.179708480834961, "learning_rate": 6.675747826682183e-05, "loss": 0.8011, "step": 25033 }, { "epoch": 1.6961853784131717, "grad_norm": 4.407844543457031, "learning_rate": 6.675610924772401e-05, "loss": 0.515, "step": 25034 }, { "epoch": 1.6962531336811437, "grad_norm": 8.476737976074219, "learning_rate": 6.675474022862619e-05, "loss": 0.6089, "step": 25035 }, { "epoch": 1.6963208889491157, "grad_norm": 6.964690685272217, "learning_rate": 6.675337120952838e-05, "loss": 0.6354, "step": 25036 }, { "epoch": 1.696388644217088, "grad_norm": 6.092772006988525, "learning_rate": 6.675200219043056e-05, "loss": 0.8462, "step": 25037 }, { "epoch": 1.69645639948506, "grad_norm": 4.893397331237793, "learning_rate": 6.675063317133274e-05, "loss": 0.6175, "step": 25038 }, { "epoch": 1.696524154753032, "grad_norm": 7.26146125793457, "learning_rate": 6.674926415223492e-05, "loss": 0.6571, "step": 25039 }, { "epoch": 1.696591910021004, "grad_norm": 4.923627853393555, "learning_rate": 6.67478951331371e-05, "loss": 0.7146, "step": 25040 }, { "epoch": 1.6966596652889763, "grad_norm": 21.53935432434082, "learning_rate": 6.67465261140393e-05, "loss": 0.5466, "step": 25041 }, { "epoch": 1.6967274205569483, "grad_norm": 4.476802349090576, "learning_rate": 6.674515709494148e-05, "loss": 0.6287, "step": 25042 }, { "epoch": 1.6967951758249202, "grad_norm": 7.479814529418945, "learning_rate": 6.674378807584366e-05, "loss": 0.6224, "step": 25043 }, { "epoch": 1.6968629310928924, "grad_norm": 5.696097373962402, "learning_rate": 6.674241905674584e-05, "loss": 0.6883, "step": 25044 }, { "epoch": 1.6969306863608646, "grad_norm": 6.17031717300415, "learning_rate": 6.674105003764802e-05, "loss": 0.7834, "step": 25045 }, { "epoch": 1.6969984416288366, "grad_norm": 8.674389839172363, "learning_rate": 6.673968101855021e-05, "loss": 0.4694, "step": 25046 }, { "epoch": 1.6970661968968086, "grad_norm": 5.602910995483398, "learning_rate": 6.67383119994524e-05, "loss": 0.6794, "step": 25047 }, { "epoch": 1.6971339521647808, "grad_norm": 5.066091537475586, "learning_rate": 6.673694298035457e-05, "loss": 0.6402, "step": 25048 }, { "epoch": 1.697201707432753, "grad_norm": 5.837429046630859, "learning_rate": 6.673557396125675e-05, "loss": 0.6843, "step": 25049 }, { "epoch": 1.697269462700725, "grad_norm": 5.450244903564453, "learning_rate": 6.673420494215895e-05, "loss": 0.7418, "step": 25050 }, { "epoch": 1.697337217968697, "grad_norm": 10.047053337097168, "learning_rate": 6.673283592306113e-05, "loss": 0.6979, "step": 25051 }, { "epoch": 1.6974049732366692, "grad_norm": 6.650867462158203, "learning_rate": 6.673146690396331e-05, "loss": 0.4796, "step": 25052 }, { "epoch": 1.6974727285046414, "grad_norm": 7.11015510559082, "learning_rate": 6.673009788486549e-05, "loss": 0.7487, "step": 25053 }, { "epoch": 1.6975404837726134, "grad_norm": 6.171910762786865, "learning_rate": 6.672872886576767e-05, "loss": 0.6081, "step": 25054 }, { "epoch": 1.6976082390405853, "grad_norm": 4.794114112854004, "learning_rate": 6.672735984666986e-05, "loss": 0.78, "step": 25055 }, { "epoch": 1.6976759943085575, "grad_norm": 7.26409387588501, "learning_rate": 6.672599082757204e-05, "loss": 0.436, "step": 25056 }, { "epoch": 1.6977437495765297, "grad_norm": 6.492715358734131, "learning_rate": 6.672462180847422e-05, "loss": 0.7539, "step": 25057 }, { "epoch": 1.6978115048445015, "grad_norm": 5.042111396789551, "learning_rate": 6.672325278937642e-05, "loss": 0.7252, "step": 25058 }, { "epoch": 1.6978792601124737, "grad_norm": 6.541836261749268, "learning_rate": 6.67218837702786e-05, "loss": 0.7263, "step": 25059 }, { "epoch": 1.697947015380446, "grad_norm": 5.090924263000488, "learning_rate": 6.672051475118078e-05, "loss": 0.8169, "step": 25060 }, { "epoch": 1.6980147706484179, "grad_norm": 5.2295823097229, "learning_rate": 6.671914573208297e-05, "loss": 0.838, "step": 25061 }, { "epoch": 1.6980825259163899, "grad_norm": 5.839616298675537, "learning_rate": 6.671777671298515e-05, "loss": 0.6765, "step": 25062 }, { "epoch": 1.698150281184362, "grad_norm": 7.393653392791748, "learning_rate": 6.671640769388733e-05, "loss": 0.9168, "step": 25063 }, { "epoch": 1.6982180364523343, "grad_norm": 4.483394622802734, "learning_rate": 6.671503867478953e-05, "loss": 0.6987, "step": 25064 }, { "epoch": 1.6982857917203062, "grad_norm": 5.610342502593994, "learning_rate": 6.671366965569171e-05, "loss": 0.5738, "step": 25065 }, { "epoch": 1.6983535469882782, "grad_norm": 7.7207512855529785, "learning_rate": 6.671230063659389e-05, "loss": 0.6466, "step": 25066 }, { "epoch": 1.6984213022562504, "grad_norm": 4.860000133514404, "learning_rate": 6.671093161749607e-05, "loss": 0.5785, "step": 25067 }, { "epoch": 1.6984890575242226, "grad_norm": 4.805925369262695, "learning_rate": 6.670956259839826e-05, "loss": 0.5183, "step": 25068 }, { "epoch": 1.6985568127921946, "grad_norm": 6.04840087890625, "learning_rate": 6.670819357930044e-05, "loss": 0.7135, "step": 25069 }, { "epoch": 1.6986245680601666, "grad_norm": 4.658386707305908, "learning_rate": 6.670682456020262e-05, "loss": 0.492, "step": 25070 }, { "epoch": 1.6986923233281388, "grad_norm": 5.519449710845947, "learning_rate": 6.67054555411048e-05, "loss": 0.5083, "step": 25071 }, { "epoch": 1.698760078596111, "grad_norm": 5.582700729370117, "learning_rate": 6.670408652200698e-05, "loss": 0.627, "step": 25072 }, { "epoch": 1.698827833864083, "grad_norm": 5.467287540435791, "learning_rate": 6.670271750290918e-05, "loss": 0.6692, "step": 25073 }, { "epoch": 1.698895589132055, "grad_norm": 9.410444259643555, "learning_rate": 6.670134848381136e-05, "loss": 0.7016, "step": 25074 }, { "epoch": 1.6989633444000272, "grad_norm": 6.962559223175049, "learning_rate": 6.669997946471354e-05, "loss": 0.5754, "step": 25075 }, { "epoch": 1.6990310996679991, "grad_norm": 5.768285751342773, "learning_rate": 6.669861044561572e-05, "loss": 0.8199, "step": 25076 }, { "epoch": 1.6990988549359711, "grad_norm": 4.832527160644531, "learning_rate": 6.66972414265179e-05, "loss": 0.6726, "step": 25077 }, { "epoch": 1.6991666102039433, "grad_norm": 7.724383354187012, "learning_rate": 6.669587240742009e-05, "loss": 0.5829, "step": 25078 }, { "epoch": 1.6992343654719155, "grad_norm": 5.707870006561279, "learning_rate": 6.669450338832227e-05, "loss": 0.8021, "step": 25079 }, { "epoch": 1.6993021207398875, "grad_norm": 5.641940593719482, "learning_rate": 6.669313436922445e-05, "loss": 0.7021, "step": 25080 }, { "epoch": 1.6993698760078595, "grad_norm": 6.0799689292907715, "learning_rate": 6.669176535012663e-05, "loss": 0.7064, "step": 25081 }, { "epoch": 1.6994376312758317, "grad_norm": 4.57113790512085, "learning_rate": 6.669039633102883e-05, "loss": 0.577, "step": 25082 }, { "epoch": 1.699505386543804, "grad_norm": 6.111658096313477, "learning_rate": 6.668902731193101e-05, "loss": 0.615, "step": 25083 }, { "epoch": 1.6995731418117759, "grad_norm": 7.082512855529785, "learning_rate": 6.668765829283319e-05, "loss": 0.7178, "step": 25084 }, { "epoch": 1.6996408970797479, "grad_norm": 5.076742649078369, "learning_rate": 6.668628927373537e-05, "loss": 0.8098, "step": 25085 }, { "epoch": 1.69970865234772, "grad_norm": 5.483794689178467, "learning_rate": 6.668492025463755e-05, "loss": 0.5249, "step": 25086 }, { "epoch": 1.6997764076156923, "grad_norm": 5.167780876159668, "learning_rate": 6.668355123553974e-05, "loss": 0.5718, "step": 25087 }, { "epoch": 1.6998441628836642, "grad_norm": 6.602892875671387, "learning_rate": 6.668218221644192e-05, "loss": 0.6511, "step": 25088 }, { "epoch": 1.6999119181516362, "grad_norm": 7.5922932624816895, "learning_rate": 6.66808131973441e-05, "loss": 0.7268, "step": 25089 }, { "epoch": 1.6999796734196084, "grad_norm": 4.7661614418029785, "learning_rate": 6.667944417824628e-05, "loss": 0.6133, "step": 25090 }, { "epoch": 1.7000474286875804, "grad_norm": 6.029719352722168, "learning_rate": 6.667807515914848e-05, "loss": 0.512, "step": 25091 }, { "epoch": 1.7001151839555524, "grad_norm": 4.6257123947143555, "learning_rate": 6.667670614005066e-05, "loss": 0.6222, "step": 25092 }, { "epoch": 1.7001829392235246, "grad_norm": 5.662489414215088, "learning_rate": 6.667533712095284e-05, "loss": 0.8041, "step": 25093 }, { "epoch": 1.7002506944914968, "grad_norm": 5.64774751663208, "learning_rate": 6.667396810185502e-05, "loss": 0.6182, "step": 25094 }, { "epoch": 1.7003184497594688, "grad_norm": 5.898162364959717, "learning_rate": 6.66725990827572e-05, "loss": 0.4836, "step": 25095 }, { "epoch": 1.7003862050274408, "grad_norm": 5.754024982452393, "learning_rate": 6.667123006365939e-05, "loss": 0.928, "step": 25096 }, { "epoch": 1.700453960295413, "grad_norm": 4.857344150543213, "learning_rate": 6.666986104456157e-05, "loss": 0.4798, "step": 25097 }, { "epoch": 1.7005217155633852, "grad_norm": 5.415783405303955, "learning_rate": 6.666849202546375e-05, "loss": 0.6582, "step": 25098 }, { "epoch": 1.7005894708313571, "grad_norm": 6.53661584854126, "learning_rate": 6.666712300636593e-05, "loss": 0.8871, "step": 25099 }, { "epoch": 1.7006572260993291, "grad_norm": 4.5701003074646, "learning_rate": 6.666575398726811e-05, "loss": 0.455, "step": 25100 }, { "epoch": 1.7007249813673013, "grad_norm": 5.380633354187012, "learning_rate": 6.666438496817031e-05, "loss": 0.7442, "step": 25101 }, { "epoch": 1.7007927366352735, "grad_norm": 5.642620086669922, "learning_rate": 6.666301594907249e-05, "loss": 0.5865, "step": 25102 }, { "epoch": 1.7008604919032455, "grad_norm": 4.6340742111206055, "learning_rate": 6.666164692997467e-05, "loss": 0.7186, "step": 25103 }, { "epoch": 1.7009282471712175, "grad_norm": 5.192450046539307, "learning_rate": 6.666027791087685e-05, "loss": 0.6581, "step": 25104 }, { "epoch": 1.7009960024391897, "grad_norm": 5.390408992767334, "learning_rate": 6.665890889177904e-05, "loss": 0.5296, "step": 25105 }, { "epoch": 1.701063757707162, "grad_norm": 5.711587905883789, "learning_rate": 6.665753987268122e-05, "loss": 0.6533, "step": 25106 }, { "epoch": 1.7011315129751337, "grad_norm": 6.297998905181885, "learning_rate": 6.66561708535834e-05, "loss": 0.8487, "step": 25107 }, { "epoch": 1.7011992682431059, "grad_norm": 10.715620040893555, "learning_rate": 6.66548018344856e-05, "loss": 0.7749, "step": 25108 }, { "epoch": 1.701267023511078, "grad_norm": 6.410741329193115, "learning_rate": 6.665343281538778e-05, "loss": 0.6889, "step": 25109 }, { "epoch": 1.70133477877905, "grad_norm": 6.305758476257324, "learning_rate": 6.665206379628996e-05, "loss": 0.5733, "step": 25110 }, { "epoch": 1.701402534047022, "grad_norm": 5.705240726470947, "learning_rate": 6.665069477719215e-05, "loss": 0.4983, "step": 25111 }, { "epoch": 1.7014702893149942, "grad_norm": 6.896918773651123, "learning_rate": 6.664932575809433e-05, "loss": 0.8002, "step": 25112 }, { "epoch": 1.7015380445829664, "grad_norm": 7.052534580230713, "learning_rate": 6.664795673899651e-05, "loss": 0.6317, "step": 25113 }, { "epoch": 1.7016057998509384, "grad_norm": 4.875044822692871, "learning_rate": 6.664658771989871e-05, "loss": 0.689, "step": 25114 }, { "epoch": 1.7016735551189104, "grad_norm": 6.122916221618652, "learning_rate": 6.664521870080089e-05, "loss": 0.7686, "step": 25115 }, { "epoch": 1.7017413103868826, "grad_norm": 8.155141830444336, "learning_rate": 6.664384968170307e-05, "loss": 0.5949, "step": 25116 }, { "epoch": 1.7018090656548548, "grad_norm": 5.382815361022949, "learning_rate": 6.664248066260525e-05, "loss": 0.547, "step": 25117 }, { "epoch": 1.7018768209228268, "grad_norm": 4.705789566040039, "learning_rate": 6.664111164350743e-05, "loss": 0.5915, "step": 25118 }, { "epoch": 1.7019445761907988, "grad_norm": 6.8611555099487305, "learning_rate": 6.663974262440962e-05, "loss": 0.666, "step": 25119 }, { "epoch": 1.702012331458771, "grad_norm": 7.928563594818115, "learning_rate": 6.66383736053118e-05, "loss": 0.6834, "step": 25120 }, { "epoch": 1.7020800867267432, "grad_norm": 4.79546594619751, "learning_rate": 6.663700458621398e-05, "loss": 0.6245, "step": 25121 }, { "epoch": 1.7021478419947151, "grad_norm": 4.704895496368408, "learning_rate": 6.663563556711616e-05, "loss": 0.6599, "step": 25122 }, { "epoch": 1.7022155972626871, "grad_norm": 5.289593696594238, "learning_rate": 6.663426654801834e-05, "loss": 0.599, "step": 25123 }, { "epoch": 1.7022833525306593, "grad_norm": 7.883758068084717, "learning_rate": 6.663289752892054e-05, "loss": 0.6522, "step": 25124 }, { "epoch": 1.7023511077986313, "grad_norm": 8.820324897766113, "learning_rate": 6.663152850982272e-05, "loss": 0.8254, "step": 25125 }, { "epoch": 1.7024188630666033, "grad_norm": 5.120791912078857, "learning_rate": 6.66301594907249e-05, "loss": 0.6344, "step": 25126 }, { "epoch": 1.7024866183345755, "grad_norm": 6.220513343811035, "learning_rate": 6.662879047162708e-05, "loss": 0.6514, "step": 25127 }, { "epoch": 1.7025543736025477, "grad_norm": 5.6624908447265625, "learning_rate": 6.662742145252927e-05, "loss": 0.5248, "step": 25128 }, { "epoch": 1.7026221288705197, "grad_norm": 6.824403762817383, "learning_rate": 6.662605243343145e-05, "loss": 0.7813, "step": 25129 }, { "epoch": 1.7026898841384916, "grad_norm": 4.396014213562012, "learning_rate": 6.662468341433363e-05, "loss": 0.5242, "step": 25130 }, { "epoch": 1.7027576394064639, "grad_norm": 5.098602294921875, "learning_rate": 6.662331439523581e-05, "loss": 0.5222, "step": 25131 }, { "epoch": 1.702825394674436, "grad_norm": 5.101288795471191, "learning_rate": 6.6621945376138e-05, "loss": 0.4971, "step": 25132 }, { "epoch": 1.702893149942408, "grad_norm": 5.360189437866211, "learning_rate": 6.662057635704019e-05, "loss": 0.4739, "step": 25133 }, { "epoch": 1.70296090521038, "grad_norm": 4.778810501098633, "learning_rate": 6.661920733794237e-05, "loss": 0.6314, "step": 25134 }, { "epoch": 1.7030286604783522, "grad_norm": 7.048381328582764, "learning_rate": 6.661783831884455e-05, "loss": 0.4691, "step": 25135 }, { "epoch": 1.7030964157463244, "grad_norm": 6.749027729034424, "learning_rate": 6.661646929974673e-05, "loss": 0.8057, "step": 25136 }, { "epoch": 1.7031641710142964, "grad_norm": 7.460659027099609, "learning_rate": 6.661510028064892e-05, "loss": 0.7833, "step": 25137 }, { "epoch": 1.7032319262822684, "grad_norm": 6.4059906005859375, "learning_rate": 6.66137312615511e-05, "loss": 0.6605, "step": 25138 }, { "epoch": 1.7032996815502406, "grad_norm": 9.687125205993652, "learning_rate": 6.661236224245328e-05, "loss": 0.9259, "step": 25139 }, { "epoch": 1.7033674368182126, "grad_norm": 4.778316020965576, "learning_rate": 6.661099322335546e-05, "loss": 0.5454, "step": 25140 }, { "epoch": 1.7034351920861845, "grad_norm": 4.617629051208496, "learning_rate": 6.660962420425764e-05, "loss": 0.7942, "step": 25141 }, { "epoch": 1.7035029473541567, "grad_norm": 6.270170211791992, "learning_rate": 6.660825518515984e-05, "loss": 0.7469, "step": 25142 }, { "epoch": 1.703570702622129, "grad_norm": 4.3465046882629395, "learning_rate": 6.660688616606202e-05, "loss": 0.6386, "step": 25143 }, { "epoch": 1.703638457890101, "grad_norm": 4.751098155975342, "learning_rate": 6.66055171469642e-05, "loss": 0.7116, "step": 25144 }, { "epoch": 1.703706213158073, "grad_norm": 9.499298095703125, "learning_rate": 6.660414812786638e-05, "loss": 0.6477, "step": 25145 }, { "epoch": 1.7037739684260451, "grad_norm": 9.350540161132812, "learning_rate": 6.660277910876857e-05, "loss": 0.6545, "step": 25146 }, { "epoch": 1.7038417236940173, "grad_norm": 5.168809413909912, "learning_rate": 6.660141008967075e-05, "loss": 0.5458, "step": 25147 }, { "epoch": 1.7039094789619893, "grad_norm": 4.894460678100586, "learning_rate": 6.660004107057293e-05, "loss": 0.4541, "step": 25148 }, { "epoch": 1.7039772342299613, "grad_norm": 7.977293968200684, "learning_rate": 6.659867205147511e-05, "loss": 0.7319, "step": 25149 }, { "epoch": 1.7040449894979335, "grad_norm": 4.470638751983643, "learning_rate": 6.65973030323773e-05, "loss": 0.6613, "step": 25150 }, { "epoch": 1.7041127447659057, "grad_norm": 8.05654239654541, "learning_rate": 6.659593401327949e-05, "loss": 0.7961, "step": 25151 }, { "epoch": 1.7041805000338777, "grad_norm": 4.60984992980957, "learning_rate": 6.659456499418167e-05, "loss": 0.5552, "step": 25152 }, { "epoch": 1.7042482553018496, "grad_norm": 5.158641338348389, "learning_rate": 6.659319597508385e-05, "loss": 0.551, "step": 25153 }, { "epoch": 1.7043160105698218, "grad_norm": 5.989535808563232, "learning_rate": 6.659182695598604e-05, "loss": 0.6591, "step": 25154 }, { "epoch": 1.704383765837794, "grad_norm": 5.058272361755371, "learning_rate": 6.659045793688822e-05, "loss": 0.4781, "step": 25155 }, { "epoch": 1.7044515211057658, "grad_norm": 4.714028835296631, "learning_rate": 6.65890889177904e-05, "loss": 0.5914, "step": 25156 }, { "epoch": 1.704519276373738, "grad_norm": 6.332169055938721, "learning_rate": 6.65877198986926e-05, "loss": 0.5824, "step": 25157 }, { "epoch": 1.7045870316417102, "grad_norm": 6.392367839813232, "learning_rate": 6.658635087959478e-05, "loss": 0.8769, "step": 25158 }, { "epoch": 1.7046547869096822, "grad_norm": 6.537968158721924, "learning_rate": 6.658498186049696e-05, "loss": 0.6556, "step": 25159 }, { "epoch": 1.7047225421776542, "grad_norm": 6.044924259185791, "learning_rate": 6.658361284139915e-05, "loss": 0.6802, "step": 25160 }, { "epoch": 1.7047902974456264, "grad_norm": 5.711536407470703, "learning_rate": 6.658224382230133e-05, "loss": 0.7925, "step": 25161 }, { "epoch": 1.7048580527135986, "grad_norm": 5.176628112792969, "learning_rate": 6.658087480320351e-05, "loss": 0.7638, "step": 25162 }, { "epoch": 1.7049258079815706, "grad_norm": 6.446534156799316, "learning_rate": 6.657950578410569e-05, "loss": 0.8099, "step": 25163 }, { "epoch": 1.7049935632495425, "grad_norm": 8.979019165039062, "learning_rate": 6.657813676500787e-05, "loss": 0.7947, "step": 25164 }, { "epoch": 1.7050613185175147, "grad_norm": 6.896387100219727, "learning_rate": 6.657676774591007e-05, "loss": 0.7908, "step": 25165 }, { "epoch": 1.705129073785487, "grad_norm": 5.322388172149658, "learning_rate": 6.657539872681225e-05, "loss": 0.6911, "step": 25166 }, { "epoch": 1.705196829053459, "grad_norm": 5.595407962799072, "learning_rate": 6.657402970771443e-05, "loss": 0.5786, "step": 25167 }, { "epoch": 1.705264584321431, "grad_norm": 5.507988452911377, "learning_rate": 6.657266068861661e-05, "loss": 0.6008, "step": 25168 }, { "epoch": 1.7053323395894031, "grad_norm": 6.382274150848389, "learning_rate": 6.65712916695188e-05, "loss": 0.7311, "step": 25169 }, { "epoch": 1.7054000948573753, "grad_norm": 7.819905757904053, "learning_rate": 6.656992265042098e-05, "loss": 0.6743, "step": 25170 }, { "epoch": 1.7054678501253473, "grad_norm": 7.996311664581299, "learning_rate": 6.656855363132316e-05, "loss": 0.599, "step": 25171 }, { "epoch": 1.7055356053933193, "grad_norm": 5.352550506591797, "learning_rate": 6.656718461222534e-05, "loss": 0.6117, "step": 25172 }, { "epoch": 1.7056033606612915, "grad_norm": 4.766674518585205, "learning_rate": 6.656581559312752e-05, "loss": 0.7874, "step": 25173 }, { "epoch": 1.7056711159292635, "grad_norm": 4.985347270965576, "learning_rate": 6.656444657402972e-05, "loss": 0.5769, "step": 25174 }, { "epoch": 1.7057388711972354, "grad_norm": 4.997335433959961, "learning_rate": 6.65630775549319e-05, "loss": 0.8528, "step": 25175 }, { "epoch": 1.7058066264652076, "grad_norm": 7.112738132476807, "learning_rate": 6.656170853583408e-05, "loss": 0.6668, "step": 25176 }, { "epoch": 1.7058743817331798, "grad_norm": 5.382721900939941, "learning_rate": 6.656033951673626e-05, "loss": 0.7197, "step": 25177 }, { "epoch": 1.7059421370011518, "grad_norm": 4.662708759307861, "learning_rate": 6.655897049763844e-05, "loss": 0.5784, "step": 25178 }, { "epoch": 1.7060098922691238, "grad_norm": 5.162871360778809, "learning_rate": 6.655760147854063e-05, "loss": 0.5799, "step": 25179 }, { "epoch": 1.706077647537096, "grad_norm": 8.029646873474121, "learning_rate": 6.655623245944281e-05, "loss": 0.6924, "step": 25180 }, { "epoch": 1.7061454028050682, "grad_norm": 5.90971040725708, "learning_rate": 6.655486344034499e-05, "loss": 0.7114, "step": 25181 }, { "epoch": 1.7062131580730402, "grad_norm": 4.961408615112305, "learning_rate": 6.655349442124717e-05, "loss": 0.6107, "step": 25182 }, { "epoch": 1.7062809133410122, "grad_norm": 8.74201774597168, "learning_rate": 6.655212540214937e-05, "loss": 0.6854, "step": 25183 }, { "epoch": 1.7063486686089844, "grad_norm": 4.519627094268799, "learning_rate": 6.655075638305155e-05, "loss": 0.5421, "step": 25184 }, { "epoch": 1.7064164238769566, "grad_norm": 5.223120212554932, "learning_rate": 6.654938736395373e-05, "loss": 0.7777, "step": 25185 }, { "epoch": 1.7064841791449286, "grad_norm": 7.99864387512207, "learning_rate": 6.654801834485591e-05, "loss": 0.5445, "step": 25186 }, { "epoch": 1.7065519344129005, "grad_norm": 5.048466682434082, "learning_rate": 6.654664932575809e-05, "loss": 0.6213, "step": 25187 }, { "epoch": 1.7066196896808727, "grad_norm": 6.145540237426758, "learning_rate": 6.654528030666028e-05, "loss": 0.622, "step": 25188 }, { "epoch": 1.7066874449488447, "grad_norm": 13.854827880859375, "learning_rate": 6.654391128756246e-05, "loss": 0.7011, "step": 25189 }, { "epoch": 1.7067552002168167, "grad_norm": 5.093112468719482, "learning_rate": 6.654254226846464e-05, "loss": 0.6784, "step": 25190 }, { "epoch": 1.706822955484789, "grad_norm": 5.111222267150879, "learning_rate": 6.654117324936682e-05, "loss": 0.711, "step": 25191 }, { "epoch": 1.706890710752761, "grad_norm": 9.729315757751465, "learning_rate": 6.653980423026902e-05, "loss": 0.6875, "step": 25192 }, { "epoch": 1.706958466020733, "grad_norm": 6.342761516571045, "learning_rate": 6.65384352111712e-05, "loss": 0.7672, "step": 25193 }, { "epoch": 1.707026221288705, "grad_norm": 4.545104503631592, "learning_rate": 6.653706619207338e-05, "loss": 0.4905, "step": 25194 }, { "epoch": 1.7070939765566773, "grad_norm": 7.424709796905518, "learning_rate": 6.653569717297556e-05, "loss": 0.6293, "step": 25195 }, { "epoch": 1.7071617318246495, "grad_norm": 5.225128650665283, "learning_rate": 6.653432815387774e-05, "loss": 0.7098, "step": 25196 }, { "epoch": 1.7072294870926215, "grad_norm": 7.209833145141602, "learning_rate": 6.653295913477993e-05, "loss": 0.7606, "step": 25197 }, { "epoch": 1.7072972423605934, "grad_norm": 4.8738017082214355, "learning_rate": 6.653159011568211e-05, "loss": 0.6018, "step": 25198 }, { "epoch": 1.7073649976285656, "grad_norm": 4.468564510345459, "learning_rate": 6.65302210965843e-05, "loss": 0.6363, "step": 25199 }, { "epoch": 1.7074327528965378, "grad_norm": 8.647342681884766, "learning_rate": 6.652885207748649e-05, "loss": 0.6149, "step": 25200 }, { "epoch": 1.7075005081645098, "grad_norm": 5.655155658721924, "learning_rate": 6.652748305838867e-05, "loss": 0.674, "step": 25201 }, { "epoch": 1.7075682634324818, "grad_norm": 6.163032054901123, "learning_rate": 6.652611403929085e-05, "loss": 0.7645, "step": 25202 }, { "epoch": 1.707636018700454, "grad_norm": 5.0263752937316895, "learning_rate": 6.652474502019304e-05, "loss": 0.5547, "step": 25203 }, { "epoch": 1.7077037739684262, "grad_norm": 4.6002349853515625, "learning_rate": 6.652337600109522e-05, "loss": 0.6694, "step": 25204 }, { "epoch": 1.707771529236398, "grad_norm": 5.1185221672058105, "learning_rate": 6.65220069819974e-05, "loss": 0.5701, "step": 25205 }, { "epoch": 1.7078392845043702, "grad_norm": 6.237091064453125, "learning_rate": 6.65206379628996e-05, "loss": 0.9233, "step": 25206 }, { "epoch": 1.7079070397723424, "grad_norm": 4.9598388671875, "learning_rate": 6.651926894380178e-05, "loss": 0.7191, "step": 25207 }, { "epoch": 1.7079747950403144, "grad_norm": 5.828880786895752, "learning_rate": 6.651789992470396e-05, "loss": 0.8995, "step": 25208 }, { "epoch": 1.7080425503082863, "grad_norm": 11.371097564697266, "learning_rate": 6.651653090560614e-05, "loss": 0.757, "step": 25209 }, { "epoch": 1.7081103055762585, "grad_norm": 6.213018894195557, "learning_rate": 6.651516188650832e-05, "loss": 0.6906, "step": 25210 }, { "epoch": 1.7081780608442307, "grad_norm": 7.175138473510742, "learning_rate": 6.651379286741051e-05, "loss": 0.5495, "step": 25211 }, { "epoch": 1.7082458161122027, "grad_norm": 7.287075519561768, "learning_rate": 6.651242384831269e-05, "loss": 0.646, "step": 25212 }, { "epoch": 1.7083135713801747, "grad_norm": 9.925505638122559, "learning_rate": 6.651105482921487e-05, "loss": 0.8945, "step": 25213 }, { "epoch": 1.708381326648147, "grad_norm": 4.956737041473389, "learning_rate": 6.650968581011705e-05, "loss": 0.581, "step": 25214 }, { "epoch": 1.708449081916119, "grad_norm": 6.450089454650879, "learning_rate": 6.650831679101925e-05, "loss": 0.7662, "step": 25215 }, { "epoch": 1.708516837184091, "grad_norm": 5.378121376037598, "learning_rate": 6.650694777192143e-05, "loss": 0.766, "step": 25216 }, { "epoch": 1.708584592452063, "grad_norm": 5.5494818687438965, "learning_rate": 6.650557875282361e-05, "loss": 0.6367, "step": 25217 }, { "epoch": 1.7086523477200353, "grad_norm": 7.0169854164123535, "learning_rate": 6.650420973372579e-05, "loss": 0.808, "step": 25218 }, { "epoch": 1.7087201029880075, "grad_norm": 4.7100372314453125, "learning_rate": 6.650284071462797e-05, "loss": 0.7198, "step": 25219 }, { "epoch": 1.7087878582559795, "grad_norm": 5.919319152832031, "learning_rate": 6.650147169553016e-05, "loss": 0.79, "step": 25220 }, { "epoch": 1.7088556135239514, "grad_norm": 6.218656539916992, "learning_rate": 6.650010267643234e-05, "loss": 0.6289, "step": 25221 }, { "epoch": 1.7089233687919236, "grad_norm": 5.458550453186035, "learning_rate": 6.649873365733452e-05, "loss": 0.8402, "step": 25222 }, { "epoch": 1.7089911240598956, "grad_norm": 5.951737403869629, "learning_rate": 6.64973646382367e-05, "loss": 0.6095, "step": 25223 }, { "epoch": 1.7090588793278676, "grad_norm": 12.815885543823242, "learning_rate": 6.64959956191389e-05, "loss": 0.5079, "step": 25224 }, { "epoch": 1.7091266345958398, "grad_norm": 5.068390846252441, "learning_rate": 6.649462660004108e-05, "loss": 0.746, "step": 25225 }, { "epoch": 1.709194389863812, "grad_norm": 5.200613975524902, "learning_rate": 6.649325758094326e-05, "loss": 0.7337, "step": 25226 }, { "epoch": 1.709262145131784, "grad_norm": 4.219897747039795, "learning_rate": 6.649188856184544e-05, "loss": 0.642, "step": 25227 }, { "epoch": 1.709329900399756, "grad_norm": 5.093502044677734, "learning_rate": 6.649051954274762e-05, "loss": 0.6153, "step": 25228 }, { "epoch": 1.7093976556677282, "grad_norm": 5.9794230461120605, "learning_rate": 6.648915052364981e-05, "loss": 0.6795, "step": 25229 }, { "epoch": 1.7094654109357004, "grad_norm": 5.782684326171875, "learning_rate": 6.648778150455199e-05, "loss": 0.7065, "step": 25230 }, { "epoch": 1.7095331662036723, "grad_norm": 4.205543518066406, "learning_rate": 6.648641248545417e-05, "loss": 0.5617, "step": 25231 }, { "epoch": 1.7096009214716443, "grad_norm": 9.775875091552734, "learning_rate": 6.648504346635635e-05, "loss": 0.7769, "step": 25232 }, { "epoch": 1.7096686767396165, "grad_norm": 5.259836196899414, "learning_rate": 6.648367444725853e-05, "loss": 0.5387, "step": 25233 }, { "epoch": 1.7097364320075887, "grad_norm": 4.589788436889648, "learning_rate": 6.648230542816073e-05, "loss": 0.5289, "step": 25234 }, { "epoch": 1.7098041872755607, "grad_norm": 7.346560478210449, "learning_rate": 6.648093640906291e-05, "loss": 0.649, "step": 25235 }, { "epoch": 1.7098719425435327, "grad_norm": 4.670863628387451, "learning_rate": 6.647956738996509e-05, "loss": 0.4803, "step": 25236 }, { "epoch": 1.709939697811505, "grad_norm": 6.524815082550049, "learning_rate": 6.647819837086727e-05, "loss": 0.7417, "step": 25237 }, { "epoch": 1.7100074530794769, "grad_norm": 6.423871040344238, "learning_rate": 6.647682935176946e-05, "loss": 0.3859, "step": 25238 }, { "epoch": 1.7100752083474489, "grad_norm": 7.756137371063232, "learning_rate": 6.647546033267164e-05, "loss": 0.7903, "step": 25239 }, { "epoch": 1.710142963615421, "grad_norm": 6.443990707397461, "learning_rate": 6.647409131357382e-05, "loss": 0.8692, "step": 25240 }, { "epoch": 1.7102107188833933, "grad_norm": 6.455026626586914, "learning_rate": 6.6472722294476e-05, "loss": 0.7607, "step": 25241 }, { "epoch": 1.7102784741513652, "grad_norm": 6.908333778381348, "learning_rate": 6.647135327537818e-05, "loss": 0.6899, "step": 25242 }, { "epoch": 1.7103462294193372, "grad_norm": 5.375857353210449, "learning_rate": 6.646998425628038e-05, "loss": 0.6616, "step": 25243 }, { "epoch": 1.7104139846873094, "grad_norm": 5.769772052764893, "learning_rate": 6.646861523718256e-05, "loss": 0.7986, "step": 25244 }, { "epoch": 1.7104817399552816, "grad_norm": 5.7268548011779785, "learning_rate": 6.646724621808474e-05, "loss": 0.6943, "step": 25245 }, { "epoch": 1.7105494952232536, "grad_norm": 4.8177103996276855, "learning_rate": 6.646587719898693e-05, "loss": 0.479, "step": 25246 }, { "epoch": 1.7106172504912256, "grad_norm": 4.8151469230651855, "learning_rate": 6.646450817988911e-05, "loss": 0.5962, "step": 25247 }, { "epoch": 1.7106850057591978, "grad_norm": 5.500788688659668, "learning_rate": 6.646313916079129e-05, "loss": 0.5299, "step": 25248 }, { "epoch": 1.71075276102717, "grad_norm": 5.966324806213379, "learning_rate": 6.646177014169349e-05, "loss": 0.7091, "step": 25249 }, { "epoch": 1.710820516295142, "grad_norm": 5.179985523223877, "learning_rate": 6.646040112259567e-05, "loss": 0.6414, "step": 25250 }, { "epoch": 1.710888271563114, "grad_norm": 6.603748321533203, "learning_rate": 6.645903210349785e-05, "loss": 0.5895, "step": 25251 }, { "epoch": 1.7109560268310862, "grad_norm": 4.698010444641113, "learning_rate": 6.645766308440004e-05, "loss": 0.7105, "step": 25252 }, { "epoch": 1.7110237820990584, "grad_norm": 5.542472839355469, "learning_rate": 6.645629406530222e-05, "loss": 0.7143, "step": 25253 }, { "epoch": 1.7110915373670301, "grad_norm": 9.139184951782227, "learning_rate": 6.64549250462044e-05, "loss": 0.6336, "step": 25254 }, { "epoch": 1.7111592926350023, "grad_norm": 6.416548252105713, "learning_rate": 6.645355602710658e-05, "loss": 0.6782, "step": 25255 }, { "epoch": 1.7112270479029745, "grad_norm": 4.629354476928711, "learning_rate": 6.645218700800876e-05, "loss": 0.6744, "step": 25256 }, { "epoch": 1.7112948031709465, "grad_norm": 6.584391117095947, "learning_rate": 6.645081798891096e-05, "loss": 0.6201, "step": 25257 }, { "epoch": 1.7113625584389185, "grad_norm": 7.482619762420654, "learning_rate": 6.644944896981314e-05, "loss": 0.6626, "step": 25258 }, { "epoch": 1.7114303137068907, "grad_norm": 5.3369221687316895, "learning_rate": 6.644807995071532e-05, "loss": 0.5969, "step": 25259 }, { "epoch": 1.711498068974863, "grad_norm": 5.3874406814575195, "learning_rate": 6.64467109316175e-05, "loss": 0.8196, "step": 25260 }, { "epoch": 1.7115658242428349, "grad_norm": 7.338797092437744, "learning_rate": 6.644534191251969e-05, "loss": 0.7417, "step": 25261 }, { "epoch": 1.7116335795108069, "grad_norm": 4.324355602264404, "learning_rate": 6.644397289342187e-05, "loss": 0.5484, "step": 25262 }, { "epoch": 1.711701334778779, "grad_norm": 4.097921371459961, "learning_rate": 6.644260387432405e-05, "loss": 0.5786, "step": 25263 }, { "epoch": 1.7117690900467513, "grad_norm": 4.977634429931641, "learning_rate": 6.644123485522623e-05, "loss": 0.6556, "step": 25264 }, { "epoch": 1.7118368453147232, "grad_norm": 5.438275337219238, "learning_rate": 6.643986583612841e-05, "loss": 0.7197, "step": 25265 }, { "epoch": 1.7119046005826952, "grad_norm": 5.184929847717285, "learning_rate": 6.64384968170306e-05, "loss": 0.7175, "step": 25266 }, { "epoch": 1.7119723558506674, "grad_norm": 5.006080627441406, "learning_rate": 6.643712779793279e-05, "loss": 0.7527, "step": 25267 }, { "epoch": 1.7120401111186396, "grad_norm": 5.085095405578613, "learning_rate": 6.643575877883497e-05, "loss": 0.8457, "step": 25268 }, { "epoch": 1.7121078663866116, "grad_norm": 4.48478889465332, "learning_rate": 6.643438975973715e-05, "loss": 0.7009, "step": 25269 }, { "epoch": 1.7121756216545836, "grad_norm": 6.085383892059326, "learning_rate": 6.643302074063934e-05, "loss": 0.5469, "step": 25270 }, { "epoch": 1.7122433769225558, "grad_norm": 4.564999103546143, "learning_rate": 6.643165172154152e-05, "loss": 0.6435, "step": 25271 }, { "epoch": 1.7123111321905278, "grad_norm": 6.073796272277832, "learning_rate": 6.64302827024437e-05, "loss": 0.5979, "step": 25272 }, { "epoch": 1.7123788874584998, "grad_norm": 7.018272876739502, "learning_rate": 6.642891368334588e-05, "loss": 0.8298, "step": 25273 }, { "epoch": 1.712446642726472, "grad_norm": 4.818720817565918, "learning_rate": 6.642754466424806e-05, "loss": 0.5274, "step": 25274 }, { "epoch": 1.7125143979944442, "grad_norm": 5.730863094329834, "learning_rate": 6.642617564515026e-05, "loss": 0.6804, "step": 25275 }, { "epoch": 1.7125821532624161, "grad_norm": 6.076482772827148, "learning_rate": 6.642480662605244e-05, "loss": 0.6556, "step": 25276 }, { "epoch": 1.7126499085303881, "grad_norm": 7.042006015777588, "learning_rate": 6.642343760695462e-05, "loss": 0.6566, "step": 25277 }, { "epoch": 1.7127176637983603, "grad_norm": 7.721678256988525, "learning_rate": 6.64220685878568e-05, "loss": 0.7689, "step": 25278 }, { "epoch": 1.7127854190663325, "grad_norm": 5.426868438720703, "learning_rate": 6.642069956875899e-05, "loss": 0.7284, "step": 25279 }, { "epoch": 1.7128531743343045, "grad_norm": 5.953659534454346, "learning_rate": 6.641933054966117e-05, "loss": 0.4441, "step": 25280 }, { "epoch": 1.7129209296022765, "grad_norm": 5.0832037925720215, "learning_rate": 6.641796153056335e-05, "loss": 0.7351, "step": 25281 }, { "epoch": 1.7129886848702487, "grad_norm": 6.067591667175293, "learning_rate": 6.641659251146553e-05, "loss": 0.7475, "step": 25282 }, { "epoch": 1.713056440138221, "grad_norm": 6.437488555908203, "learning_rate": 6.641522349236771e-05, "loss": 0.8685, "step": 25283 }, { "epoch": 1.7131241954061929, "grad_norm": 12.06867504119873, "learning_rate": 6.641385447326991e-05, "loss": 0.4711, "step": 25284 }, { "epoch": 1.7131919506741649, "grad_norm": 5.3627142906188965, "learning_rate": 6.641248545417209e-05, "loss": 0.6566, "step": 25285 }, { "epoch": 1.713259705942137, "grad_norm": 4.464273929595947, "learning_rate": 6.641111643507427e-05, "loss": 0.6397, "step": 25286 }, { "epoch": 1.713327461210109, "grad_norm": 5.218535900115967, "learning_rate": 6.640974741597645e-05, "loss": 0.5393, "step": 25287 }, { "epoch": 1.713395216478081, "grad_norm": 5.4146409034729, "learning_rate": 6.640837839687863e-05, "loss": 0.5638, "step": 25288 }, { "epoch": 1.7134629717460532, "grad_norm": 7.773714542388916, "learning_rate": 6.640700937778082e-05, "loss": 0.6074, "step": 25289 }, { "epoch": 1.7135307270140254, "grad_norm": 5.551184177398682, "learning_rate": 6.6405640358683e-05, "loss": 0.5854, "step": 25290 }, { "epoch": 1.7135984822819974, "grad_norm": 6.420908451080322, "learning_rate": 6.640427133958518e-05, "loss": 0.568, "step": 25291 }, { "epoch": 1.7136662375499694, "grad_norm": 8.335471153259277, "learning_rate": 6.640290232048738e-05, "loss": 0.7888, "step": 25292 }, { "epoch": 1.7137339928179416, "grad_norm": 7.040469169616699, "learning_rate": 6.640153330138956e-05, "loss": 0.5804, "step": 25293 }, { "epoch": 1.7138017480859138, "grad_norm": 6.2573933601379395, "learning_rate": 6.640016428229174e-05, "loss": 0.6632, "step": 25294 }, { "epoch": 1.7138695033538858, "grad_norm": 5.821872711181641, "learning_rate": 6.639879526319393e-05, "loss": 0.9143, "step": 25295 }, { "epoch": 1.7139372586218578, "grad_norm": 5.972662448883057, "learning_rate": 6.639742624409611e-05, "loss": 0.6522, "step": 25296 }, { "epoch": 1.71400501388983, "grad_norm": 5.182466983795166, "learning_rate": 6.639605722499829e-05, "loss": 0.5991, "step": 25297 }, { "epoch": 1.7140727691578022, "grad_norm": 6.030015468597412, "learning_rate": 6.639468820590049e-05, "loss": 0.5446, "step": 25298 }, { "epoch": 1.7141405244257741, "grad_norm": 4.939944744110107, "learning_rate": 6.639331918680267e-05, "loss": 0.5304, "step": 25299 }, { "epoch": 1.7142082796937461, "grad_norm": 6.02308464050293, "learning_rate": 6.639195016770485e-05, "loss": 0.7429, "step": 25300 }, { "epoch": 1.7142760349617183, "grad_norm": 4.9091715812683105, "learning_rate": 6.639058114860703e-05, "loss": 0.6556, "step": 25301 }, { "epoch": 1.7143437902296905, "grad_norm": 5.56158971786499, "learning_rate": 6.638921212950922e-05, "loss": 0.7258, "step": 25302 }, { "epoch": 1.7144115454976623, "grad_norm": 5.482598781585693, "learning_rate": 6.63878431104114e-05, "loss": 0.8539, "step": 25303 }, { "epoch": 1.7144793007656345, "grad_norm": 5.062831878662109, "learning_rate": 6.638647409131358e-05, "loss": 0.6676, "step": 25304 }, { "epoch": 1.7145470560336067, "grad_norm": 5.437139511108398, "learning_rate": 6.638510507221576e-05, "loss": 0.6697, "step": 25305 }, { "epoch": 1.7146148113015787, "grad_norm": 5.999005317687988, "learning_rate": 6.638373605311794e-05, "loss": 0.8362, "step": 25306 }, { "epoch": 1.7146825665695506, "grad_norm": 6.181070804595947, "learning_rate": 6.638236703402014e-05, "loss": 0.7114, "step": 25307 }, { "epoch": 1.7147503218375229, "grad_norm": 5.272585391998291, "learning_rate": 6.638099801492232e-05, "loss": 0.6248, "step": 25308 }, { "epoch": 1.714818077105495, "grad_norm": 4.343507289886475, "learning_rate": 6.63796289958245e-05, "loss": 0.5272, "step": 25309 }, { "epoch": 1.714885832373467, "grad_norm": 4.722428321838379, "learning_rate": 6.637825997672668e-05, "loss": 0.6176, "step": 25310 }, { "epoch": 1.714953587641439, "grad_norm": 6.7411065101623535, "learning_rate": 6.637689095762886e-05, "loss": 0.6661, "step": 25311 }, { "epoch": 1.7150213429094112, "grad_norm": 6.539120197296143, "learning_rate": 6.637552193853105e-05, "loss": 0.6218, "step": 25312 }, { "epoch": 1.7150890981773834, "grad_norm": 6.582669258117676, "learning_rate": 6.637415291943323e-05, "loss": 0.7431, "step": 25313 }, { "epoch": 1.7151568534453554, "grad_norm": 7.64914608001709, "learning_rate": 6.637278390033541e-05, "loss": 0.5059, "step": 25314 }, { "epoch": 1.7152246087133274, "grad_norm": 4.711778163909912, "learning_rate": 6.637141488123759e-05, "loss": 0.6219, "step": 25315 }, { "epoch": 1.7152923639812996, "grad_norm": 6.153880596160889, "learning_rate": 6.637004586213979e-05, "loss": 0.6732, "step": 25316 }, { "epoch": 1.7153601192492718, "grad_norm": 6.789356708526611, "learning_rate": 6.636867684304197e-05, "loss": 0.5224, "step": 25317 }, { "epoch": 1.7154278745172438, "grad_norm": 6.725674152374268, "learning_rate": 6.636730782394415e-05, "loss": 0.6753, "step": 25318 }, { "epoch": 1.7154956297852157, "grad_norm": 5.121606826782227, "learning_rate": 6.636593880484633e-05, "loss": 0.5675, "step": 25319 }, { "epoch": 1.715563385053188, "grad_norm": 4.3813886642456055, "learning_rate": 6.636456978574851e-05, "loss": 0.7304, "step": 25320 }, { "epoch": 1.71563114032116, "grad_norm": 5.306639671325684, "learning_rate": 6.63632007666507e-05, "loss": 0.6976, "step": 25321 }, { "epoch": 1.715698895589132, "grad_norm": 6.2787017822265625, "learning_rate": 6.636183174755288e-05, "loss": 0.7301, "step": 25322 }, { "epoch": 1.7157666508571041, "grad_norm": 5.06599760055542, "learning_rate": 6.636046272845506e-05, "loss": 0.8064, "step": 25323 }, { "epoch": 1.7158344061250763, "grad_norm": 5.7801289558410645, "learning_rate": 6.635909370935724e-05, "loss": 0.7098, "step": 25324 }, { "epoch": 1.7159021613930483, "grad_norm": 6.501038551330566, "learning_rate": 6.635772469025944e-05, "loss": 0.8625, "step": 25325 }, { "epoch": 1.7159699166610203, "grad_norm": 5.537795066833496, "learning_rate": 6.635635567116162e-05, "loss": 0.6601, "step": 25326 }, { "epoch": 1.7160376719289925, "grad_norm": 4.825695037841797, "learning_rate": 6.63549866520638e-05, "loss": 0.5477, "step": 25327 }, { "epoch": 1.7161054271969647, "grad_norm": 5.185371398925781, "learning_rate": 6.635361763296598e-05, "loss": 0.6617, "step": 25328 }, { "epoch": 1.7161731824649367, "grad_norm": 6.14351749420166, "learning_rate": 6.635224861386816e-05, "loss": 0.663, "step": 25329 }, { "epoch": 1.7162409377329086, "grad_norm": 4.828489303588867, "learning_rate": 6.635087959477035e-05, "loss": 0.594, "step": 25330 }, { "epoch": 1.7163086930008808, "grad_norm": 5.777277946472168, "learning_rate": 6.634951057567253e-05, "loss": 0.6339, "step": 25331 }, { "epoch": 1.716376448268853, "grad_norm": 6.487437725067139, "learning_rate": 6.634814155657471e-05, "loss": 0.5729, "step": 25332 }, { "epoch": 1.716444203536825, "grad_norm": 7.094808101654053, "learning_rate": 6.634677253747689e-05, "loss": 0.7831, "step": 25333 }, { "epoch": 1.716511958804797, "grad_norm": 6.366550445556641, "learning_rate": 6.634540351837909e-05, "loss": 0.7861, "step": 25334 }, { "epoch": 1.7165797140727692, "grad_norm": 7.03055477142334, "learning_rate": 6.634403449928127e-05, "loss": 0.8068, "step": 25335 }, { "epoch": 1.7166474693407412, "grad_norm": 5.3598175048828125, "learning_rate": 6.634266548018345e-05, "loss": 0.7327, "step": 25336 }, { "epoch": 1.7167152246087132, "grad_norm": 4.355727672576904, "learning_rate": 6.634129646108563e-05, "loss": 0.7314, "step": 25337 }, { "epoch": 1.7167829798766854, "grad_norm": 5.82249641418457, "learning_rate": 6.633992744198782e-05, "loss": 0.8232, "step": 25338 }, { "epoch": 1.7168507351446576, "grad_norm": 6.8153204917907715, "learning_rate": 6.633855842289e-05, "loss": 0.6053, "step": 25339 }, { "epoch": 1.7169184904126296, "grad_norm": 6.596570014953613, "learning_rate": 6.633718940379218e-05, "loss": 0.6152, "step": 25340 }, { "epoch": 1.7169862456806015, "grad_norm": 5.723672866821289, "learning_rate": 6.633582038469438e-05, "loss": 0.4495, "step": 25341 }, { "epoch": 1.7170540009485737, "grad_norm": 5.475369453430176, "learning_rate": 6.633445136559656e-05, "loss": 0.5914, "step": 25342 }, { "epoch": 1.717121756216546, "grad_norm": 5.410866737365723, "learning_rate": 6.633308234649874e-05, "loss": 0.5952, "step": 25343 }, { "epoch": 1.717189511484518, "grad_norm": 7.038705348968506, "learning_rate": 6.633171332740093e-05, "loss": 0.7802, "step": 25344 }, { "epoch": 1.71725726675249, "grad_norm": 4.617842197418213, "learning_rate": 6.633034430830311e-05, "loss": 0.4934, "step": 25345 }, { "epoch": 1.717325022020462, "grad_norm": 11.013154983520508, "learning_rate": 6.632897528920529e-05, "loss": 0.7826, "step": 25346 }, { "epoch": 1.7173927772884343, "grad_norm": 4.711155891418457, "learning_rate": 6.632760627010747e-05, "loss": 0.6448, "step": 25347 }, { "epoch": 1.7174605325564063, "grad_norm": 5.023144245147705, "learning_rate": 6.632623725100967e-05, "loss": 0.5302, "step": 25348 }, { "epoch": 1.7175282878243783, "grad_norm": 4.838811874389648, "learning_rate": 6.632486823191185e-05, "loss": 0.6337, "step": 25349 }, { "epoch": 1.7175960430923505, "grad_norm": 6.346977233886719, "learning_rate": 6.632349921281403e-05, "loss": 0.8024, "step": 25350 }, { "epoch": 1.7176637983603227, "grad_norm": 5.509746551513672, "learning_rate": 6.63221301937162e-05, "loss": 0.6952, "step": 25351 }, { "epoch": 1.7177315536282944, "grad_norm": 6.620658874511719, "learning_rate": 6.632076117461839e-05, "loss": 0.5091, "step": 25352 }, { "epoch": 1.7177993088962666, "grad_norm": 6.012679100036621, "learning_rate": 6.631939215552058e-05, "loss": 0.6452, "step": 25353 }, { "epoch": 1.7178670641642388, "grad_norm": 6.055393695831299, "learning_rate": 6.631802313642276e-05, "loss": 0.8707, "step": 25354 }, { "epoch": 1.7179348194322108, "grad_norm": 7.165378093719482, "learning_rate": 6.631665411732494e-05, "loss": 0.8322, "step": 25355 }, { "epoch": 1.7180025747001828, "grad_norm": 8.406464576721191, "learning_rate": 6.631528509822712e-05, "loss": 0.8755, "step": 25356 }, { "epoch": 1.718070329968155, "grad_norm": 6.370200157165527, "learning_rate": 6.631391607912932e-05, "loss": 0.6729, "step": 25357 }, { "epoch": 1.7181380852361272, "grad_norm": 3.3284027576446533, "learning_rate": 6.63125470600315e-05, "loss": 0.4167, "step": 25358 }, { "epoch": 1.7182058405040992, "grad_norm": 7.9139885902404785, "learning_rate": 6.631117804093368e-05, "loss": 0.8175, "step": 25359 }, { "epoch": 1.7182735957720712, "grad_norm": 6.61963415145874, "learning_rate": 6.630980902183586e-05, "loss": 0.6286, "step": 25360 }, { "epoch": 1.7183413510400434, "grad_norm": 5.56082010269165, "learning_rate": 6.630844000273804e-05, "loss": 0.5724, "step": 25361 }, { "epoch": 1.7184091063080156, "grad_norm": 4.787256240844727, "learning_rate": 6.630707098364023e-05, "loss": 0.7107, "step": 25362 }, { "epoch": 1.7184768615759876, "grad_norm": 6.39119815826416, "learning_rate": 6.630570196454241e-05, "loss": 0.6333, "step": 25363 }, { "epoch": 1.7185446168439595, "grad_norm": 5.109806537628174, "learning_rate": 6.630433294544459e-05, "loss": 0.6426, "step": 25364 }, { "epoch": 1.7186123721119317, "grad_norm": 5.763960838317871, "learning_rate": 6.630296392634677e-05, "loss": 0.703, "step": 25365 }, { "epoch": 1.718680127379904, "grad_norm": 5.919710636138916, "learning_rate": 6.630159490724895e-05, "loss": 0.7014, "step": 25366 }, { "epoch": 1.718747882647876, "grad_norm": 6.22981595993042, "learning_rate": 6.630022588815115e-05, "loss": 0.5968, "step": 25367 }, { "epoch": 1.718815637915848, "grad_norm": 6.107847690582275, "learning_rate": 6.629885686905333e-05, "loss": 0.5964, "step": 25368 }, { "epoch": 1.71888339318382, "grad_norm": 6.277917861938477, "learning_rate": 6.629748784995551e-05, "loss": 0.5531, "step": 25369 }, { "epoch": 1.718951148451792, "grad_norm": 9.817192077636719, "learning_rate": 6.629611883085769e-05, "loss": 0.6268, "step": 25370 }, { "epoch": 1.719018903719764, "grad_norm": 5.404964447021484, "learning_rate": 6.629474981175988e-05, "loss": 0.483, "step": 25371 }, { "epoch": 1.7190866589877363, "grad_norm": 7.520089149475098, "learning_rate": 6.629338079266206e-05, "loss": 0.7698, "step": 25372 }, { "epoch": 1.7191544142557085, "grad_norm": 6.482958793640137, "learning_rate": 6.629201177356424e-05, "loss": 0.6843, "step": 25373 }, { "epoch": 1.7192221695236805, "grad_norm": 4.84513521194458, "learning_rate": 6.629064275446642e-05, "loss": 0.7313, "step": 25374 }, { "epoch": 1.7192899247916524, "grad_norm": 7.228399753570557, "learning_rate": 6.62892737353686e-05, "loss": 0.7604, "step": 25375 }, { "epoch": 1.7193576800596246, "grad_norm": 5.282284736633301, "learning_rate": 6.62879047162708e-05, "loss": 0.9689, "step": 25376 }, { "epoch": 1.7194254353275968, "grad_norm": 8.054612159729004, "learning_rate": 6.628653569717298e-05, "loss": 0.8538, "step": 25377 }, { "epoch": 1.7194931905955688, "grad_norm": 5.238274574279785, "learning_rate": 6.628516667807516e-05, "loss": 0.662, "step": 25378 }, { "epoch": 1.7195609458635408, "grad_norm": 4.299121379852295, "learning_rate": 6.628379765897734e-05, "loss": 0.4753, "step": 25379 }, { "epoch": 1.719628701131513, "grad_norm": 4.379901885986328, "learning_rate": 6.628242863987953e-05, "loss": 0.6473, "step": 25380 }, { "epoch": 1.7196964563994852, "grad_norm": 5.230880260467529, "learning_rate": 6.628105962078171e-05, "loss": 0.552, "step": 25381 }, { "epoch": 1.7197642116674572, "grad_norm": 6.485786437988281, "learning_rate": 6.627969060168389e-05, "loss": 0.8829, "step": 25382 }, { "epoch": 1.7198319669354292, "grad_norm": 5.315577983856201, "learning_rate": 6.627832158258607e-05, "loss": 0.6305, "step": 25383 }, { "epoch": 1.7198997222034014, "grad_norm": 4.20033073425293, "learning_rate": 6.627695256348825e-05, "loss": 0.5822, "step": 25384 }, { "epoch": 1.7199674774713734, "grad_norm": 7.093398571014404, "learning_rate": 6.627558354439045e-05, "loss": 0.5262, "step": 25385 }, { "epoch": 1.7200352327393453, "grad_norm": 7.710385322570801, "learning_rate": 6.627421452529263e-05, "loss": 0.6543, "step": 25386 }, { "epoch": 1.7201029880073175, "grad_norm": 6.586761474609375, "learning_rate": 6.627284550619481e-05, "loss": 1.1755, "step": 25387 }, { "epoch": 1.7201707432752897, "grad_norm": 5.712710380554199, "learning_rate": 6.6271476487097e-05, "loss": 0.6102, "step": 25388 }, { "epoch": 1.7202384985432617, "grad_norm": 6.044922351837158, "learning_rate": 6.627010746799918e-05, "loss": 0.8422, "step": 25389 }, { "epoch": 1.7203062538112337, "grad_norm": 7.212240695953369, "learning_rate": 6.626873844890136e-05, "loss": 0.6451, "step": 25390 }, { "epoch": 1.720374009079206, "grad_norm": 9.255131721496582, "learning_rate": 6.626736942980356e-05, "loss": 0.9304, "step": 25391 }, { "epoch": 1.720441764347178, "grad_norm": 5.764744281768799, "learning_rate": 6.626600041070574e-05, "loss": 0.7575, "step": 25392 }, { "epoch": 1.72050951961515, "grad_norm": 6.72718620300293, "learning_rate": 6.626463139160792e-05, "loss": 0.6336, "step": 25393 }, { "epoch": 1.720577274883122, "grad_norm": 5.808223247528076, "learning_rate": 6.626326237251011e-05, "loss": 0.6533, "step": 25394 }, { "epoch": 1.7206450301510943, "grad_norm": 5.7843708992004395, "learning_rate": 6.626189335341229e-05, "loss": 0.6517, "step": 25395 }, { "epoch": 1.7207127854190665, "grad_norm": 4.947727680206299, "learning_rate": 6.626052433431447e-05, "loss": 0.7481, "step": 25396 }, { "epoch": 1.7207805406870385, "grad_norm": 5.479191780090332, "learning_rate": 6.625915531521665e-05, "loss": 0.657, "step": 25397 }, { "epoch": 1.7208482959550104, "grad_norm": 8.69981861114502, "learning_rate": 6.625778629611883e-05, "loss": 0.6097, "step": 25398 }, { "epoch": 1.7209160512229826, "grad_norm": 5.088558673858643, "learning_rate": 6.625641727702103e-05, "loss": 0.5428, "step": 25399 }, { "epoch": 1.7209838064909548, "grad_norm": 5.537509441375732, "learning_rate": 6.62550482579232e-05, "loss": 0.5492, "step": 25400 }, { "epoch": 1.7210515617589266, "grad_norm": 6.57325553894043, "learning_rate": 6.625367923882539e-05, "loss": 0.6109, "step": 25401 }, { "epoch": 1.7211193170268988, "grad_norm": 4.940507411956787, "learning_rate": 6.625231021972757e-05, "loss": 0.5136, "step": 25402 }, { "epoch": 1.721187072294871, "grad_norm": 4.566998481750488, "learning_rate": 6.625094120062976e-05, "loss": 0.6874, "step": 25403 }, { "epoch": 1.721254827562843, "grad_norm": 11.984018325805664, "learning_rate": 6.624957218153194e-05, "loss": 0.4978, "step": 25404 }, { "epoch": 1.721322582830815, "grad_norm": 7.004611968994141, "learning_rate": 6.624820316243412e-05, "loss": 0.8297, "step": 25405 }, { "epoch": 1.7213903380987872, "grad_norm": 6.502382755279541, "learning_rate": 6.62468341433363e-05, "loss": 0.487, "step": 25406 }, { "epoch": 1.7214580933667594, "grad_norm": 4.445919036865234, "learning_rate": 6.624546512423848e-05, "loss": 0.6465, "step": 25407 }, { "epoch": 1.7215258486347313, "grad_norm": 7.075725078582764, "learning_rate": 6.624409610514068e-05, "loss": 0.768, "step": 25408 }, { "epoch": 1.7215936039027033, "grad_norm": 5.922426223754883, "learning_rate": 6.624272708604286e-05, "loss": 0.544, "step": 25409 }, { "epoch": 1.7216613591706755, "grad_norm": 4.648435115814209, "learning_rate": 6.624135806694504e-05, "loss": 0.5963, "step": 25410 }, { "epoch": 1.7217291144386477, "grad_norm": 5.526916027069092, "learning_rate": 6.623998904784722e-05, "loss": 0.6432, "step": 25411 }, { "epoch": 1.7217968697066197, "grad_norm": 6.0919294357299805, "learning_rate": 6.623862002874941e-05, "loss": 0.6518, "step": 25412 }, { "epoch": 1.7218646249745917, "grad_norm": 4.40625, "learning_rate": 6.623725100965159e-05, "loss": 0.6227, "step": 25413 }, { "epoch": 1.721932380242564, "grad_norm": 7.712887763977051, "learning_rate": 6.623588199055377e-05, "loss": 0.7119, "step": 25414 }, { "epoch": 1.722000135510536, "grad_norm": 4.101988792419434, "learning_rate": 6.623451297145595e-05, "loss": 0.4379, "step": 25415 }, { "epoch": 1.722067890778508, "grad_norm": 9.257396697998047, "learning_rate": 6.623314395235813e-05, "loss": 0.694, "step": 25416 }, { "epoch": 1.72213564604648, "grad_norm": 5.768160343170166, "learning_rate": 6.623177493326033e-05, "loss": 0.5787, "step": 25417 }, { "epoch": 1.7222034013144523, "grad_norm": 7.3374199867248535, "learning_rate": 6.62304059141625e-05, "loss": 0.9615, "step": 25418 }, { "epoch": 1.7222711565824242, "grad_norm": 8.389261245727539, "learning_rate": 6.622903689506469e-05, "loss": 0.6976, "step": 25419 }, { "epoch": 1.7223389118503962, "grad_norm": 4.0525078773498535, "learning_rate": 6.622766787596687e-05, "loss": 0.5907, "step": 25420 }, { "epoch": 1.7224066671183684, "grad_norm": 5.287519931793213, "learning_rate": 6.622629885686905e-05, "loss": 0.6608, "step": 25421 }, { "epoch": 1.7224744223863406, "grad_norm": 7.4944257736206055, "learning_rate": 6.622492983777124e-05, "loss": 0.7896, "step": 25422 }, { "epoch": 1.7225421776543126, "grad_norm": 5.338679313659668, "learning_rate": 6.622356081867342e-05, "loss": 0.8118, "step": 25423 }, { "epoch": 1.7226099329222846, "grad_norm": 6.001691818237305, "learning_rate": 6.62221917995756e-05, "loss": 0.6656, "step": 25424 }, { "epoch": 1.7226776881902568, "grad_norm": 10.13070297241211, "learning_rate": 6.622082278047778e-05, "loss": 0.6307, "step": 25425 }, { "epoch": 1.722745443458229, "grad_norm": 6.4793829917907715, "learning_rate": 6.621945376137998e-05, "loss": 0.5715, "step": 25426 }, { "epoch": 1.722813198726201, "grad_norm": 5.937607288360596, "learning_rate": 6.621808474228216e-05, "loss": 0.6526, "step": 25427 }, { "epoch": 1.722880953994173, "grad_norm": 9.407894134521484, "learning_rate": 6.621671572318434e-05, "loss": 0.7242, "step": 25428 }, { "epoch": 1.7229487092621452, "grad_norm": 6.129759311676025, "learning_rate": 6.621534670408652e-05, "loss": 0.5541, "step": 25429 }, { "epoch": 1.7230164645301174, "grad_norm": 5.3438239097595215, "learning_rate": 6.62139776849887e-05, "loss": 0.5699, "step": 25430 }, { "epoch": 1.7230842197980893, "grad_norm": 6.556332111358643, "learning_rate": 6.621260866589089e-05, "loss": 0.5365, "step": 25431 }, { "epoch": 1.7231519750660613, "grad_norm": 5.53242826461792, "learning_rate": 6.621123964679307e-05, "loss": 0.626, "step": 25432 }, { "epoch": 1.7232197303340335, "grad_norm": 10.87756061553955, "learning_rate": 6.620987062769525e-05, "loss": 0.7036, "step": 25433 }, { "epoch": 1.7232874856020055, "grad_norm": 5.833924770355225, "learning_rate": 6.620850160859745e-05, "loss": 0.556, "step": 25434 }, { "epoch": 1.7233552408699775, "grad_norm": 7.258885860443115, "learning_rate": 6.620713258949963e-05, "loss": 0.7481, "step": 25435 }, { "epoch": 1.7234229961379497, "grad_norm": 3.829523801803589, "learning_rate": 6.62057635704018e-05, "loss": 0.5206, "step": 25436 }, { "epoch": 1.723490751405922, "grad_norm": 6.127206802368164, "learning_rate": 6.6204394551304e-05, "loss": 0.5821, "step": 25437 }, { "epoch": 1.7235585066738939, "grad_norm": 7.086686611175537, "learning_rate": 6.620302553220618e-05, "loss": 1.0119, "step": 25438 }, { "epoch": 1.7236262619418659, "grad_norm": 6.202488899230957, "learning_rate": 6.620165651310836e-05, "loss": 0.8254, "step": 25439 }, { "epoch": 1.723694017209838, "grad_norm": 5.026772499084473, "learning_rate": 6.620028749401056e-05, "loss": 0.8302, "step": 25440 }, { "epoch": 1.7237617724778103, "grad_norm": 9.662817001342773, "learning_rate": 6.619891847491274e-05, "loss": 0.6882, "step": 25441 }, { "epoch": 1.7238295277457822, "grad_norm": 5.611625671386719, "learning_rate": 6.619754945581492e-05, "loss": 0.5647, "step": 25442 }, { "epoch": 1.7238972830137542, "grad_norm": 6.766626358032227, "learning_rate": 6.61961804367171e-05, "loss": 0.6644, "step": 25443 }, { "epoch": 1.7239650382817264, "grad_norm": 11.601972579956055, "learning_rate": 6.619481141761928e-05, "loss": 0.6002, "step": 25444 }, { "epoch": 1.7240327935496986, "grad_norm": 7.153971195220947, "learning_rate": 6.619344239852147e-05, "loss": 0.7761, "step": 25445 }, { "epoch": 1.7241005488176706, "grad_norm": 4.911805152893066, "learning_rate": 6.619207337942365e-05, "loss": 0.5861, "step": 25446 }, { "epoch": 1.7241683040856426, "grad_norm": 5.558289051055908, "learning_rate": 6.619070436032583e-05, "loss": 0.6274, "step": 25447 }, { "epoch": 1.7242360593536148, "grad_norm": 6.112437725067139, "learning_rate": 6.618933534122801e-05, "loss": 0.5781, "step": 25448 }, { "epoch": 1.724303814621587, "grad_norm": 9.456572532653809, "learning_rate": 6.61879663221302e-05, "loss": 0.5678, "step": 25449 }, { "epoch": 1.7243715698895588, "grad_norm": 5.268760681152344, "learning_rate": 6.618659730303239e-05, "loss": 0.7139, "step": 25450 }, { "epoch": 1.724439325157531, "grad_norm": 9.074788093566895, "learning_rate": 6.618522828393457e-05, "loss": 0.6243, "step": 25451 }, { "epoch": 1.7245070804255032, "grad_norm": 7.539365768432617, "learning_rate": 6.618385926483675e-05, "loss": 0.6464, "step": 25452 }, { "epoch": 1.7245748356934751, "grad_norm": 5.249877452850342, "learning_rate": 6.618249024573893e-05, "loss": 0.6301, "step": 25453 }, { "epoch": 1.7246425909614471, "grad_norm": 5.0561699867248535, "learning_rate": 6.618112122664112e-05, "loss": 0.6398, "step": 25454 }, { "epoch": 1.7247103462294193, "grad_norm": 5.784193992614746, "learning_rate": 6.61797522075433e-05, "loss": 0.7451, "step": 25455 }, { "epoch": 1.7247781014973915, "grad_norm": 3.977977991104126, "learning_rate": 6.617838318844548e-05, "loss": 0.5568, "step": 25456 }, { "epoch": 1.7248458567653635, "grad_norm": 6.700736999511719, "learning_rate": 6.617701416934766e-05, "loss": 0.606, "step": 25457 }, { "epoch": 1.7249136120333355, "grad_norm": 5.459366798400879, "learning_rate": 6.617564515024986e-05, "loss": 0.7217, "step": 25458 }, { "epoch": 1.7249813673013077, "grad_norm": 7.069524765014648, "learning_rate": 6.617427613115204e-05, "loss": 0.5606, "step": 25459 }, { "epoch": 1.72504912256928, "grad_norm": 5.072666645050049, "learning_rate": 6.617290711205422e-05, "loss": 0.622, "step": 25460 }, { "epoch": 1.7251168778372519, "grad_norm": 4.277806758880615, "learning_rate": 6.61715380929564e-05, "loss": 0.599, "step": 25461 }, { "epoch": 1.7251846331052239, "grad_norm": 8.46384334564209, "learning_rate": 6.617016907385858e-05, "loss": 0.8162, "step": 25462 }, { "epoch": 1.725252388373196, "grad_norm": 9.81196117401123, "learning_rate": 6.616880005476077e-05, "loss": 0.7768, "step": 25463 }, { "epoch": 1.7253201436411683, "grad_norm": 5.368473052978516, "learning_rate": 6.616743103566295e-05, "loss": 0.7776, "step": 25464 }, { "epoch": 1.7253878989091402, "grad_norm": 4.814304351806641, "learning_rate": 6.616606201656513e-05, "loss": 0.653, "step": 25465 }, { "epoch": 1.7254556541771122, "grad_norm": 4.36046028137207, "learning_rate": 6.616469299746731e-05, "loss": 0.5849, "step": 25466 }, { "epoch": 1.7255234094450844, "grad_norm": 5.101994037628174, "learning_rate": 6.61633239783695e-05, "loss": 0.5846, "step": 25467 }, { "epoch": 1.7255911647130564, "grad_norm": 7.251347064971924, "learning_rate": 6.616195495927169e-05, "loss": 0.9334, "step": 25468 }, { "epoch": 1.7256589199810284, "grad_norm": 4.606649398803711, "learning_rate": 6.616058594017387e-05, "loss": 0.5857, "step": 25469 }, { "epoch": 1.7257266752490006, "grad_norm": 4.097899436950684, "learning_rate": 6.615921692107605e-05, "loss": 0.4924, "step": 25470 }, { "epoch": 1.7257944305169728, "grad_norm": 7.059034824371338, "learning_rate": 6.615784790197823e-05, "loss": 0.8741, "step": 25471 }, { "epoch": 1.7258621857849448, "grad_norm": 5.44326114654541, "learning_rate": 6.615647888288042e-05, "loss": 0.6721, "step": 25472 }, { "epoch": 1.7259299410529167, "grad_norm": 4.339067459106445, "learning_rate": 6.61551098637826e-05, "loss": 0.628, "step": 25473 }, { "epoch": 1.725997696320889, "grad_norm": 6.472447395324707, "learning_rate": 6.615374084468478e-05, "loss": 0.792, "step": 25474 }, { "epoch": 1.7260654515888612, "grad_norm": 5.704373836517334, "learning_rate": 6.615237182558696e-05, "loss": 0.7927, "step": 25475 }, { "epoch": 1.7261332068568331, "grad_norm": 7.93548583984375, "learning_rate": 6.615100280648914e-05, "loss": 0.725, "step": 25476 }, { "epoch": 1.7262009621248051, "grad_norm": 7.904815673828125, "learning_rate": 6.614963378739134e-05, "loss": 0.7954, "step": 25477 }, { "epoch": 1.7262687173927773, "grad_norm": 4.029458999633789, "learning_rate": 6.614826476829352e-05, "loss": 0.5968, "step": 25478 }, { "epoch": 1.7263364726607495, "grad_norm": 5.739626407623291, "learning_rate": 6.61468957491957e-05, "loss": 0.7128, "step": 25479 }, { "epoch": 1.7264042279287215, "grad_norm": 6.798091888427734, "learning_rate": 6.614552673009789e-05, "loss": 0.6978, "step": 25480 }, { "epoch": 1.7264719831966935, "grad_norm": 5.676067352294922, "learning_rate": 6.614415771100007e-05, "loss": 0.6441, "step": 25481 }, { "epoch": 1.7265397384646657, "grad_norm": 9.20374584197998, "learning_rate": 6.614278869190225e-05, "loss": 0.8011, "step": 25482 }, { "epoch": 1.7266074937326377, "grad_norm": 5.085662841796875, "learning_rate": 6.614141967280445e-05, "loss": 0.7052, "step": 25483 }, { "epoch": 1.7266752490006096, "grad_norm": 6.026432514190674, "learning_rate": 6.614005065370663e-05, "loss": 0.6581, "step": 25484 }, { "epoch": 1.7267430042685818, "grad_norm": 5.153714656829834, "learning_rate": 6.61386816346088e-05, "loss": 0.4684, "step": 25485 }, { "epoch": 1.726810759536554, "grad_norm": 4.191116809844971, "learning_rate": 6.6137312615511e-05, "loss": 0.5675, "step": 25486 }, { "epoch": 1.726878514804526, "grad_norm": 6.342469215393066, "learning_rate": 6.613594359641318e-05, "loss": 0.647, "step": 25487 }, { "epoch": 1.726946270072498, "grad_norm": 5.064770698547363, "learning_rate": 6.613457457731536e-05, "loss": 0.6358, "step": 25488 }, { "epoch": 1.7270140253404702, "grad_norm": 6.1624674797058105, "learning_rate": 6.613320555821754e-05, "loss": 0.5853, "step": 25489 }, { "epoch": 1.7270817806084424, "grad_norm": 4.990175724029541, "learning_rate": 6.613183653911973e-05, "loss": 0.7549, "step": 25490 }, { "epoch": 1.7271495358764144, "grad_norm": 5.86323356628418, "learning_rate": 6.613046752002192e-05, "loss": 0.5968, "step": 25491 }, { "epoch": 1.7272172911443864, "grad_norm": 3.9464385509490967, "learning_rate": 6.61290985009241e-05, "loss": 0.537, "step": 25492 }, { "epoch": 1.7272850464123586, "grad_norm": 4.628739833831787, "learning_rate": 6.612772948182628e-05, "loss": 0.6114, "step": 25493 }, { "epoch": 1.7273528016803308, "grad_norm": 4.1892547607421875, "learning_rate": 6.612636046272846e-05, "loss": 0.6128, "step": 25494 }, { "epoch": 1.7274205569483028, "grad_norm": 5.610450744628906, "learning_rate": 6.612499144363065e-05, "loss": 0.7415, "step": 25495 }, { "epoch": 1.7274883122162747, "grad_norm": 4.359115123748779, "learning_rate": 6.612362242453283e-05, "loss": 0.5607, "step": 25496 }, { "epoch": 1.727556067484247, "grad_norm": 4.843179225921631, "learning_rate": 6.612225340543501e-05, "loss": 0.6672, "step": 25497 }, { "epoch": 1.7276238227522192, "grad_norm": 5.8828043937683105, "learning_rate": 6.612088438633719e-05, "loss": 0.5181, "step": 25498 }, { "epoch": 1.727691578020191, "grad_norm": 6.844254016876221, "learning_rate": 6.611951536723937e-05, "loss": 0.7768, "step": 25499 }, { "epoch": 1.7277593332881631, "grad_norm": 8.466080665588379, "learning_rate": 6.611814634814157e-05, "loss": 0.7158, "step": 25500 }, { "epoch": 1.7278270885561353, "grad_norm": 5.9885334968566895, "learning_rate": 6.611677732904375e-05, "loss": 0.6497, "step": 25501 }, { "epoch": 1.7278948438241073, "grad_norm": 6.577659606933594, "learning_rate": 6.611540830994593e-05, "loss": 0.8415, "step": 25502 }, { "epoch": 1.7279625990920793, "grad_norm": 7.631778717041016, "learning_rate": 6.61140392908481e-05, "loss": 0.6021, "step": 25503 }, { "epoch": 1.7280303543600515, "grad_norm": 5.264751434326172, "learning_rate": 6.61126702717503e-05, "loss": 0.6291, "step": 25504 }, { "epoch": 1.7280981096280237, "grad_norm": 6.281976222991943, "learning_rate": 6.611130125265248e-05, "loss": 0.6524, "step": 25505 }, { "epoch": 1.7281658648959957, "grad_norm": 4.564767837524414, "learning_rate": 6.610993223355466e-05, "loss": 0.6484, "step": 25506 }, { "epoch": 1.7282336201639676, "grad_norm": 13.045504570007324, "learning_rate": 6.610856321445684e-05, "loss": 0.5025, "step": 25507 }, { "epoch": 1.7283013754319398, "grad_norm": 8.208609580993652, "learning_rate": 6.610719419535902e-05, "loss": 0.7429, "step": 25508 }, { "epoch": 1.728369130699912, "grad_norm": 6.3931779861450195, "learning_rate": 6.610582517626122e-05, "loss": 0.7664, "step": 25509 }, { "epoch": 1.728436885967884, "grad_norm": 8.046426773071289, "learning_rate": 6.61044561571634e-05, "loss": 0.7346, "step": 25510 }, { "epoch": 1.728504641235856, "grad_norm": 7.956863880157471, "learning_rate": 6.610308713806558e-05, "loss": 0.6307, "step": 25511 }, { "epoch": 1.7285723965038282, "grad_norm": 5.110636234283447, "learning_rate": 6.610171811896776e-05, "loss": 0.828, "step": 25512 }, { "epoch": 1.7286401517718004, "grad_norm": 5.019425868988037, "learning_rate": 6.610034909986995e-05, "loss": 0.6368, "step": 25513 }, { "epoch": 1.7287079070397724, "grad_norm": 5.0337066650390625, "learning_rate": 6.609898008077213e-05, "loss": 0.7152, "step": 25514 }, { "epoch": 1.7287756623077444, "grad_norm": 5.6866278648376465, "learning_rate": 6.609761106167431e-05, "loss": 0.8177, "step": 25515 }, { "epoch": 1.7288434175757166, "grad_norm": 7.072933673858643, "learning_rate": 6.609624204257649e-05, "loss": 0.7237, "step": 25516 }, { "epoch": 1.7289111728436886, "grad_norm": 7.999522686004639, "learning_rate": 6.609487302347867e-05, "loss": 0.6278, "step": 25517 }, { "epoch": 1.7289789281116605, "grad_norm": 5.4227447509765625, "learning_rate": 6.609350400438087e-05, "loss": 0.7588, "step": 25518 }, { "epoch": 1.7290466833796327, "grad_norm": 5.141941070556641, "learning_rate": 6.609213498528305e-05, "loss": 0.5313, "step": 25519 }, { "epoch": 1.729114438647605, "grad_norm": 6.300066947937012, "learning_rate": 6.609076596618523e-05, "loss": 0.7503, "step": 25520 }, { "epoch": 1.729182193915577, "grad_norm": 5.978633403778076, "learning_rate": 6.60893969470874e-05, "loss": 0.8036, "step": 25521 }, { "epoch": 1.729249949183549, "grad_norm": 5.071855545043945, "learning_rate": 6.608802792798959e-05, "loss": 0.764, "step": 25522 }, { "epoch": 1.729317704451521, "grad_norm": 8.546841621398926, "learning_rate": 6.608665890889178e-05, "loss": 0.7277, "step": 25523 }, { "epoch": 1.7293854597194933, "grad_norm": 6.327869892120361, "learning_rate": 6.608528988979396e-05, "loss": 0.8734, "step": 25524 }, { "epoch": 1.7294532149874653, "grad_norm": 4.923703193664551, "learning_rate": 6.608392087069614e-05, "loss": 0.5984, "step": 25525 }, { "epoch": 1.7295209702554373, "grad_norm": 5.71288537979126, "learning_rate": 6.608255185159834e-05, "loss": 0.7085, "step": 25526 }, { "epoch": 1.7295887255234095, "grad_norm": 5.394615173339844, "learning_rate": 6.608118283250052e-05, "loss": 0.9196, "step": 25527 }, { "epoch": 1.7296564807913817, "grad_norm": 5.945186138153076, "learning_rate": 6.60798138134027e-05, "loss": 0.5807, "step": 25528 }, { "epoch": 1.7297242360593537, "grad_norm": 7.364042282104492, "learning_rate": 6.607844479430489e-05, "loss": 0.7627, "step": 25529 }, { "epoch": 1.7297919913273256, "grad_norm": 6.241464614868164, "learning_rate": 6.607707577520707e-05, "loss": 0.7364, "step": 25530 }, { "epoch": 1.7298597465952978, "grad_norm": 4.333892345428467, "learning_rate": 6.607570675610925e-05, "loss": 0.4848, "step": 25531 }, { "epoch": 1.7299275018632698, "grad_norm": 4.047687530517578, "learning_rate": 6.607433773701144e-05, "loss": 0.6378, "step": 25532 }, { "epoch": 1.7299952571312418, "grad_norm": 7.485939025878906, "learning_rate": 6.607296871791362e-05, "loss": 0.8113, "step": 25533 }, { "epoch": 1.730063012399214, "grad_norm": 5.466124534606934, "learning_rate": 6.60715996988158e-05, "loss": 0.6269, "step": 25534 }, { "epoch": 1.7301307676671862, "grad_norm": 7.076124668121338, "learning_rate": 6.607023067971799e-05, "loss": 1.0288, "step": 25535 }, { "epoch": 1.7301985229351582, "grad_norm": 5.781157493591309, "learning_rate": 6.606886166062018e-05, "loss": 0.6794, "step": 25536 }, { "epoch": 1.7302662782031302, "grad_norm": 5.529167175292969, "learning_rate": 6.606749264152236e-05, "loss": 0.7348, "step": 25537 }, { "epoch": 1.7303340334711024, "grad_norm": 6.342505931854248, "learning_rate": 6.606612362242454e-05, "loss": 0.8036, "step": 25538 }, { "epoch": 1.7304017887390746, "grad_norm": 9.971144676208496, "learning_rate": 6.606475460332672e-05, "loss": 0.6215, "step": 25539 }, { "epoch": 1.7304695440070466, "grad_norm": 5.385395050048828, "learning_rate": 6.60633855842289e-05, "loss": 0.4776, "step": 25540 }, { "epoch": 1.7305372992750185, "grad_norm": 4.292444229125977, "learning_rate": 6.60620165651311e-05, "loss": 0.5806, "step": 25541 }, { "epoch": 1.7306050545429907, "grad_norm": 5.333913803100586, "learning_rate": 6.606064754603328e-05, "loss": 0.547, "step": 25542 }, { "epoch": 1.730672809810963, "grad_norm": 5.855260372161865, "learning_rate": 6.605927852693546e-05, "loss": 0.7613, "step": 25543 }, { "epoch": 1.730740565078935, "grad_norm": 5.685798645019531, "learning_rate": 6.605790950783764e-05, "loss": 0.8177, "step": 25544 }, { "epoch": 1.730808320346907, "grad_norm": 10.089472770690918, "learning_rate": 6.605654048873983e-05, "loss": 0.6602, "step": 25545 }, { "epoch": 1.730876075614879, "grad_norm": 5.080878734588623, "learning_rate": 6.605517146964201e-05, "loss": 0.6784, "step": 25546 }, { "epoch": 1.730943830882851, "grad_norm": 6.259892463684082, "learning_rate": 6.605380245054419e-05, "loss": 1.0384, "step": 25547 }, { "epoch": 1.731011586150823, "grad_norm": 4.891244888305664, "learning_rate": 6.605243343144637e-05, "loss": 0.7596, "step": 25548 }, { "epoch": 1.7310793414187953, "grad_norm": 8.010686874389648, "learning_rate": 6.605106441234855e-05, "loss": 0.6127, "step": 25549 }, { "epoch": 1.7311470966867675, "grad_norm": 4.290126323699951, "learning_rate": 6.604969539325074e-05, "loss": 0.6274, "step": 25550 }, { "epoch": 1.7312148519547395, "grad_norm": 5.422752857208252, "learning_rate": 6.604832637415293e-05, "loss": 0.7276, "step": 25551 }, { "epoch": 1.7312826072227114, "grad_norm": 6.219233989715576, "learning_rate": 6.60469573550551e-05, "loss": 0.6077, "step": 25552 }, { "epoch": 1.7313503624906836, "grad_norm": 8.631197929382324, "learning_rate": 6.604558833595729e-05, "loss": 0.7607, "step": 25553 }, { "epoch": 1.7314181177586558, "grad_norm": 3.9303784370422363, "learning_rate": 6.604421931685947e-05, "loss": 0.6347, "step": 25554 }, { "epoch": 1.7314858730266278, "grad_norm": 7.819691181182861, "learning_rate": 6.604285029776166e-05, "loss": 0.5791, "step": 25555 }, { "epoch": 1.7315536282945998, "grad_norm": 6.480127334594727, "learning_rate": 6.604148127866384e-05, "loss": 0.6953, "step": 25556 }, { "epoch": 1.731621383562572, "grad_norm": 5.88939094543457, "learning_rate": 6.604011225956602e-05, "loss": 0.7188, "step": 25557 }, { "epoch": 1.7316891388305442, "grad_norm": 8.970102310180664, "learning_rate": 6.60387432404682e-05, "loss": 0.6682, "step": 25558 }, { "epoch": 1.7317568940985162, "grad_norm": 7.166840553283691, "learning_rate": 6.60373742213704e-05, "loss": 0.639, "step": 25559 }, { "epoch": 1.7318246493664882, "grad_norm": 4.317883014678955, "learning_rate": 6.603600520227258e-05, "loss": 0.6008, "step": 25560 }, { "epoch": 1.7318924046344604, "grad_norm": 5.493449687957764, "learning_rate": 6.603463618317476e-05, "loss": 0.7625, "step": 25561 }, { "epoch": 1.7319601599024326, "grad_norm": 5.4304585456848145, "learning_rate": 6.603326716407694e-05, "loss": 0.6846, "step": 25562 }, { "epoch": 1.7320279151704046, "grad_norm": 5.879843235015869, "learning_rate": 6.603189814497912e-05, "loss": 0.5059, "step": 25563 }, { "epoch": 1.7320956704383765, "grad_norm": 6.495933532714844, "learning_rate": 6.603052912588131e-05, "loss": 0.5487, "step": 25564 }, { "epoch": 1.7321634257063487, "grad_norm": 4.096617698669434, "learning_rate": 6.602916010678349e-05, "loss": 0.5062, "step": 25565 }, { "epoch": 1.7322311809743207, "grad_norm": 5.678598403930664, "learning_rate": 6.602779108768567e-05, "loss": 0.8047, "step": 25566 }, { "epoch": 1.7322989362422927, "grad_norm": 3.6455793380737305, "learning_rate": 6.602642206858785e-05, "loss": 0.4227, "step": 25567 }, { "epoch": 1.732366691510265, "grad_norm": 6.752712249755859, "learning_rate": 6.602505304949005e-05, "loss": 0.5284, "step": 25568 }, { "epoch": 1.732434446778237, "grad_norm": 7.161937236785889, "learning_rate": 6.602368403039223e-05, "loss": 0.7129, "step": 25569 }, { "epoch": 1.732502202046209, "grad_norm": 5.397581100463867, "learning_rate": 6.60223150112944e-05, "loss": 0.6942, "step": 25570 }, { "epoch": 1.732569957314181, "grad_norm": 6.088955879211426, "learning_rate": 6.602094599219659e-05, "loss": 0.6465, "step": 25571 }, { "epoch": 1.7326377125821533, "grad_norm": 4.9699907302856445, "learning_rate": 6.601957697309878e-05, "loss": 0.5428, "step": 25572 }, { "epoch": 1.7327054678501255, "grad_norm": 4.542644500732422, "learning_rate": 6.601820795400096e-05, "loss": 0.8327, "step": 25573 }, { "epoch": 1.7327732231180974, "grad_norm": 6.418863296508789, "learning_rate": 6.601683893490314e-05, "loss": 0.6976, "step": 25574 }, { "epoch": 1.7328409783860694, "grad_norm": 3.9686763286590576, "learning_rate": 6.601546991580533e-05, "loss": 0.4487, "step": 25575 }, { "epoch": 1.7329087336540416, "grad_norm": 7.125677585601807, "learning_rate": 6.601410089670752e-05, "loss": 0.8326, "step": 25576 }, { "epoch": 1.7329764889220138, "grad_norm": 4.034999847412109, "learning_rate": 6.60127318776097e-05, "loss": 0.5583, "step": 25577 }, { "epoch": 1.7330442441899858, "grad_norm": 6.634951591491699, "learning_rate": 6.601136285851189e-05, "loss": 0.7984, "step": 25578 }, { "epoch": 1.7331119994579578, "grad_norm": 5.881780624389648, "learning_rate": 6.600999383941407e-05, "loss": 0.9082, "step": 25579 }, { "epoch": 1.73317975472593, "grad_norm": 5.644124984741211, "learning_rate": 6.600862482031625e-05, "loss": 0.7317, "step": 25580 }, { "epoch": 1.733247509993902, "grad_norm": 4.997689723968506, "learning_rate": 6.600725580121843e-05, "loss": 0.5033, "step": 25581 }, { "epoch": 1.733315265261874, "grad_norm": 7.692959308624268, "learning_rate": 6.600588678212062e-05, "loss": 0.773, "step": 25582 }, { "epoch": 1.7333830205298462, "grad_norm": 4.65723180770874, "learning_rate": 6.60045177630228e-05, "loss": 0.6071, "step": 25583 }, { "epoch": 1.7334507757978184, "grad_norm": 5.419930458068848, "learning_rate": 6.600314874392498e-05, "loss": 0.6157, "step": 25584 }, { "epoch": 1.7335185310657903, "grad_norm": 5.910074710845947, "learning_rate": 6.600177972482717e-05, "loss": 0.6017, "step": 25585 }, { "epoch": 1.7335862863337623, "grad_norm": 7.27717399597168, "learning_rate": 6.600041070572935e-05, "loss": 0.6495, "step": 25586 }, { "epoch": 1.7336540416017345, "grad_norm": 5.735796928405762, "learning_rate": 6.599904168663154e-05, "loss": 0.6343, "step": 25587 }, { "epoch": 1.7337217968697067, "grad_norm": 8.899347305297852, "learning_rate": 6.599767266753372e-05, "loss": 0.762, "step": 25588 }, { "epoch": 1.7337895521376787, "grad_norm": 4.980855464935303, "learning_rate": 6.59963036484359e-05, "loss": 0.6138, "step": 25589 }, { "epoch": 1.7338573074056507, "grad_norm": 7.619093894958496, "learning_rate": 6.599493462933808e-05, "loss": 0.6438, "step": 25590 }, { "epoch": 1.733925062673623, "grad_norm": 4.853003978729248, "learning_rate": 6.599356561024027e-05, "loss": 0.5973, "step": 25591 }, { "epoch": 1.733992817941595, "grad_norm": 8.52735424041748, "learning_rate": 6.599219659114245e-05, "loss": 1.0052, "step": 25592 }, { "epoch": 1.734060573209567, "grad_norm": 5.967507362365723, "learning_rate": 6.599082757204464e-05, "loss": 0.7718, "step": 25593 }, { "epoch": 1.734128328477539, "grad_norm": 5.472686290740967, "learning_rate": 6.598945855294682e-05, "loss": 0.7531, "step": 25594 }, { "epoch": 1.7341960837455113, "grad_norm": 5.608329772949219, "learning_rate": 6.5988089533849e-05, "loss": 0.6275, "step": 25595 }, { "epoch": 1.7342638390134832, "grad_norm": 5.340178489685059, "learning_rate": 6.598672051475119e-05, "loss": 0.5889, "step": 25596 }, { "epoch": 1.7343315942814552, "grad_norm": 5.973979473114014, "learning_rate": 6.598535149565337e-05, "loss": 0.5147, "step": 25597 }, { "epoch": 1.7343993495494274, "grad_norm": 4.145613670349121, "learning_rate": 6.598398247655555e-05, "loss": 0.6503, "step": 25598 }, { "epoch": 1.7344671048173996, "grad_norm": 5.668124198913574, "learning_rate": 6.598261345745773e-05, "loss": 0.7005, "step": 25599 }, { "epoch": 1.7345348600853716, "grad_norm": 5.004129886627197, "learning_rate": 6.598124443835992e-05, "loss": 0.612, "step": 25600 }, { "epoch": 1.7346026153533436, "grad_norm": 9.957189559936523, "learning_rate": 6.59798754192621e-05, "loss": 0.5934, "step": 25601 }, { "epoch": 1.7346703706213158, "grad_norm": 6.933279514312744, "learning_rate": 6.597850640016429e-05, "loss": 0.6202, "step": 25602 }, { "epoch": 1.734738125889288, "grad_norm": 6.362627983093262, "learning_rate": 6.597713738106647e-05, "loss": 0.653, "step": 25603 }, { "epoch": 1.73480588115726, "grad_norm": 5.552439212799072, "learning_rate": 6.597576836196865e-05, "loss": 0.8684, "step": 25604 }, { "epoch": 1.734873636425232, "grad_norm": 7.008484840393066, "learning_rate": 6.597439934287084e-05, "loss": 0.6752, "step": 25605 }, { "epoch": 1.7349413916932042, "grad_norm": 4.557954788208008, "learning_rate": 6.597303032377302e-05, "loss": 0.5517, "step": 25606 }, { "epoch": 1.7350091469611764, "grad_norm": 5.365555286407471, "learning_rate": 6.59716613046752e-05, "loss": 0.7555, "step": 25607 }, { "epoch": 1.7350769022291483, "grad_norm": 6.421380043029785, "learning_rate": 6.597029228557738e-05, "loss": 0.63, "step": 25608 }, { "epoch": 1.7351446574971203, "grad_norm": 6.179976463317871, "learning_rate": 6.596892326647956e-05, "loss": 0.6495, "step": 25609 }, { "epoch": 1.7352124127650925, "grad_norm": 8.540979385375977, "learning_rate": 6.596755424738176e-05, "loss": 0.6377, "step": 25610 }, { "epoch": 1.7352801680330647, "grad_norm": 10.045397758483887, "learning_rate": 6.596618522828394e-05, "loss": 0.8016, "step": 25611 }, { "epoch": 1.7353479233010367, "grad_norm": 4.574528217315674, "learning_rate": 6.596481620918612e-05, "loss": 0.6432, "step": 25612 }, { "epoch": 1.7354156785690087, "grad_norm": 7.613458156585693, "learning_rate": 6.59634471900883e-05, "loss": 0.641, "step": 25613 }, { "epoch": 1.735483433836981, "grad_norm": 5.976986408233643, "learning_rate": 6.596207817099049e-05, "loss": 0.6086, "step": 25614 }, { "epoch": 1.7355511891049529, "grad_norm": 6.945934295654297, "learning_rate": 6.596070915189267e-05, "loss": 0.6341, "step": 25615 }, { "epoch": 1.7356189443729249, "grad_norm": 4.878197193145752, "learning_rate": 6.595934013279485e-05, "loss": 0.5538, "step": 25616 }, { "epoch": 1.735686699640897, "grad_norm": 4.438850402832031, "learning_rate": 6.595797111369703e-05, "loss": 0.6563, "step": 25617 }, { "epoch": 1.7357544549088693, "grad_norm": 7.127029895782471, "learning_rate": 6.595660209459921e-05, "loss": 0.7289, "step": 25618 }, { "epoch": 1.7358222101768412, "grad_norm": 4.609349250793457, "learning_rate": 6.59552330755014e-05, "loss": 0.6017, "step": 25619 }, { "epoch": 1.7358899654448132, "grad_norm": 9.90104866027832, "learning_rate": 6.595386405640359e-05, "loss": 0.6659, "step": 25620 }, { "epoch": 1.7359577207127854, "grad_norm": 5.373215675354004, "learning_rate": 6.595249503730577e-05, "loss": 0.6135, "step": 25621 }, { "epoch": 1.7360254759807576, "grad_norm": 6.092944145202637, "learning_rate": 6.595112601820796e-05, "loss": 0.5728, "step": 25622 }, { "epoch": 1.7360932312487296, "grad_norm": 4.976345539093018, "learning_rate": 6.594975699911014e-05, "loss": 0.6653, "step": 25623 }, { "epoch": 1.7361609865167016, "grad_norm": 5.963764190673828, "learning_rate": 6.594838798001232e-05, "loss": 0.6008, "step": 25624 }, { "epoch": 1.7362287417846738, "grad_norm": 5.621171951293945, "learning_rate": 6.594701896091451e-05, "loss": 0.673, "step": 25625 }, { "epoch": 1.736296497052646, "grad_norm": 8.661623001098633, "learning_rate": 6.59456499418167e-05, "loss": 0.6659, "step": 25626 }, { "epoch": 1.736364252320618, "grad_norm": 8.060325622558594, "learning_rate": 6.594428092271888e-05, "loss": 0.8357, "step": 25627 }, { "epoch": 1.73643200758859, "grad_norm": 5.744637489318848, "learning_rate": 6.594291190362107e-05, "loss": 0.646, "step": 25628 }, { "epoch": 1.7364997628565622, "grad_norm": 6.1005778312683105, "learning_rate": 6.594154288452325e-05, "loss": 0.6844, "step": 25629 }, { "epoch": 1.7365675181245341, "grad_norm": 6.877413749694824, "learning_rate": 6.594017386542543e-05, "loss": 0.7134, "step": 25630 }, { "epoch": 1.7366352733925061, "grad_norm": 5.398859977722168, "learning_rate": 6.593880484632761e-05, "loss": 0.6381, "step": 25631 }, { "epoch": 1.7367030286604783, "grad_norm": 8.131665229797363, "learning_rate": 6.593743582722979e-05, "loss": 0.6835, "step": 25632 }, { "epoch": 1.7367707839284505, "grad_norm": 6.656064987182617, "learning_rate": 6.593606680813198e-05, "loss": 0.7121, "step": 25633 }, { "epoch": 1.7368385391964225, "grad_norm": 5.487008094787598, "learning_rate": 6.593469778903416e-05, "loss": 0.4743, "step": 25634 }, { "epoch": 1.7369062944643945, "grad_norm": 6.9027557373046875, "learning_rate": 6.593332876993634e-05, "loss": 0.6098, "step": 25635 }, { "epoch": 1.7369740497323667, "grad_norm": 4.393783092498779, "learning_rate": 6.593195975083853e-05, "loss": 0.5852, "step": 25636 }, { "epoch": 1.737041805000339, "grad_norm": 4.939314842224121, "learning_rate": 6.593059073174072e-05, "loss": 0.795, "step": 25637 }, { "epoch": 1.7371095602683109, "grad_norm": 8.355628967285156, "learning_rate": 6.59292217126429e-05, "loss": 0.7241, "step": 25638 }, { "epoch": 1.7371773155362829, "grad_norm": 5.048741340637207, "learning_rate": 6.592785269354508e-05, "loss": 0.737, "step": 25639 }, { "epoch": 1.737245070804255, "grad_norm": 5.074414253234863, "learning_rate": 6.592648367444726e-05, "loss": 0.4803, "step": 25640 }, { "epoch": 1.7373128260722273, "grad_norm": 5.173471927642822, "learning_rate": 6.592511465534944e-05, "loss": 0.6624, "step": 25641 }, { "epoch": 1.7373805813401992, "grad_norm": 5.589393615722656, "learning_rate": 6.592374563625163e-05, "loss": 0.7542, "step": 25642 }, { "epoch": 1.7374483366081712, "grad_norm": 6.596447944641113, "learning_rate": 6.592237661715381e-05, "loss": 0.4961, "step": 25643 }, { "epoch": 1.7375160918761434, "grad_norm": 5.236518859863281, "learning_rate": 6.5921007598056e-05, "loss": 0.6762, "step": 25644 }, { "epoch": 1.7375838471441154, "grad_norm": 7.286501884460449, "learning_rate": 6.591963857895818e-05, "loss": 0.5411, "step": 25645 }, { "epoch": 1.7376516024120874, "grad_norm": 5.120194911956787, "learning_rate": 6.591826955986037e-05, "loss": 0.6789, "step": 25646 }, { "epoch": 1.7377193576800596, "grad_norm": 11.137944221496582, "learning_rate": 6.591690054076255e-05, "loss": 0.5202, "step": 25647 }, { "epoch": 1.7377871129480318, "grad_norm": 5.928827285766602, "learning_rate": 6.591553152166473e-05, "loss": 0.7185, "step": 25648 }, { "epoch": 1.7378548682160038, "grad_norm": 7.244656085968018, "learning_rate": 6.591416250256691e-05, "loss": 1.0238, "step": 25649 }, { "epoch": 1.7379226234839757, "grad_norm": 6.669746398925781, "learning_rate": 6.591279348346909e-05, "loss": 0.578, "step": 25650 }, { "epoch": 1.737990378751948, "grad_norm": 5.049881935119629, "learning_rate": 6.591142446437128e-05, "loss": 0.7445, "step": 25651 }, { "epoch": 1.7380581340199202, "grad_norm": 7.4142746925354, "learning_rate": 6.591005544527346e-05, "loss": 0.744, "step": 25652 }, { "epoch": 1.7381258892878921, "grad_norm": 5.661040306091309, "learning_rate": 6.590868642617565e-05, "loss": 0.7853, "step": 25653 }, { "epoch": 1.7381936445558641, "grad_norm": 5.498867511749268, "learning_rate": 6.590731740707783e-05, "loss": 0.6948, "step": 25654 }, { "epoch": 1.7382613998238363, "grad_norm": 5.211367130279541, "learning_rate": 6.590594838798002e-05, "loss": 0.6745, "step": 25655 }, { "epoch": 1.7383291550918085, "grad_norm": 6.378080368041992, "learning_rate": 6.59045793688822e-05, "loss": 0.6938, "step": 25656 }, { "epoch": 1.7383969103597805, "grad_norm": 5.6606764793396, "learning_rate": 6.590321034978438e-05, "loss": 0.8094, "step": 25657 }, { "epoch": 1.7384646656277525, "grad_norm": 4.091710567474365, "learning_rate": 6.590184133068656e-05, "loss": 0.5208, "step": 25658 }, { "epoch": 1.7385324208957247, "grad_norm": 6.197096824645996, "learning_rate": 6.590047231158874e-05, "loss": 0.8154, "step": 25659 }, { "epoch": 1.7386001761636969, "grad_norm": 6.5755085945129395, "learning_rate": 6.589910329249093e-05, "loss": 0.597, "step": 25660 }, { "epoch": 1.7386679314316689, "grad_norm": 6.068049907684326, "learning_rate": 6.589773427339312e-05, "loss": 0.6156, "step": 25661 }, { "epoch": 1.7387356866996408, "grad_norm": 6.1400580406188965, "learning_rate": 6.58963652542953e-05, "loss": 0.8085, "step": 25662 }, { "epoch": 1.738803441967613, "grad_norm": 9.01591968536377, "learning_rate": 6.589499623519748e-05, "loss": 0.8206, "step": 25663 }, { "epoch": 1.738871197235585, "grad_norm": 6.319591999053955, "learning_rate": 6.589362721609966e-05, "loss": 0.4648, "step": 25664 }, { "epoch": 1.738938952503557, "grad_norm": 7.403785705566406, "learning_rate": 6.589225819700185e-05, "loss": 0.7265, "step": 25665 }, { "epoch": 1.7390067077715292, "grad_norm": 5.1548566818237305, "learning_rate": 6.589088917790403e-05, "loss": 0.5086, "step": 25666 }, { "epoch": 1.7390744630395014, "grad_norm": 5.482895374298096, "learning_rate": 6.588952015880621e-05, "loss": 0.637, "step": 25667 }, { "epoch": 1.7391422183074734, "grad_norm": 4.438100337982178, "learning_rate": 6.58881511397084e-05, "loss": 0.7223, "step": 25668 }, { "epoch": 1.7392099735754454, "grad_norm": 6.8635406494140625, "learning_rate": 6.588678212061058e-05, "loss": 0.9955, "step": 25669 }, { "epoch": 1.7392777288434176, "grad_norm": 5.0605573654174805, "learning_rate": 6.588541310151277e-05, "loss": 0.6451, "step": 25670 }, { "epoch": 1.7393454841113898, "grad_norm": 4.826018810272217, "learning_rate": 6.588404408241496e-05, "loss": 0.6077, "step": 25671 }, { "epoch": 1.7394132393793618, "grad_norm": 5.520399570465088, "learning_rate": 6.588267506331714e-05, "loss": 0.7008, "step": 25672 }, { "epoch": 1.7394809946473337, "grad_norm": 7.003137588500977, "learning_rate": 6.588130604421932e-05, "loss": 0.7857, "step": 25673 }, { "epoch": 1.739548749915306, "grad_norm": 4.968471050262451, "learning_rate": 6.587993702512151e-05, "loss": 0.4135, "step": 25674 }, { "epoch": 1.7396165051832782, "grad_norm": 6.582738399505615, "learning_rate": 6.58785680060237e-05, "loss": 0.9358, "step": 25675 }, { "epoch": 1.7396842604512501, "grad_norm": 4.99366569519043, "learning_rate": 6.587719898692587e-05, "loss": 0.56, "step": 25676 }, { "epoch": 1.7397520157192221, "grad_norm": 8.163372039794922, "learning_rate": 6.587582996782805e-05, "loss": 0.6495, "step": 25677 }, { "epoch": 1.7398197709871943, "grad_norm": 5.233698844909668, "learning_rate": 6.587446094873025e-05, "loss": 0.6177, "step": 25678 }, { "epoch": 1.7398875262551663, "grad_norm": 4.807053565979004, "learning_rate": 6.587309192963243e-05, "loss": 0.6176, "step": 25679 }, { "epoch": 1.7399552815231383, "grad_norm": 5.539912223815918, "learning_rate": 6.587172291053461e-05, "loss": 0.7477, "step": 25680 }, { "epoch": 1.7400230367911105, "grad_norm": 9.708006858825684, "learning_rate": 6.587035389143679e-05, "loss": 0.7479, "step": 25681 }, { "epoch": 1.7400907920590827, "grad_norm": 7.313464641571045, "learning_rate": 6.586898487233897e-05, "loss": 0.7512, "step": 25682 }, { "epoch": 1.7401585473270547, "grad_norm": 4.152487754821777, "learning_rate": 6.586761585324116e-05, "loss": 0.6113, "step": 25683 }, { "epoch": 1.7402263025950266, "grad_norm": 4.523417949676514, "learning_rate": 6.586624683414334e-05, "loss": 0.5814, "step": 25684 }, { "epoch": 1.7402940578629988, "grad_norm": 4.555922031402588, "learning_rate": 6.586487781504552e-05, "loss": 0.5193, "step": 25685 }, { "epoch": 1.740361813130971, "grad_norm": 5.965786457061768, "learning_rate": 6.58635087959477e-05, "loss": 0.6394, "step": 25686 }, { "epoch": 1.740429568398943, "grad_norm": 4.879258155822754, "learning_rate": 6.586213977684989e-05, "loss": 0.6792, "step": 25687 }, { "epoch": 1.740497323666915, "grad_norm": 5.5089874267578125, "learning_rate": 6.586077075775208e-05, "loss": 0.504, "step": 25688 }, { "epoch": 1.7405650789348872, "grad_norm": 4.974884510040283, "learning_rate": 6.585940173865426e-05, "loss": 0.5702, "step": 25689 }, { "epoch": 1.7406328342028594, "grad_norm": 8.558534622192383, "learning_rate": 6.585803271955644e-05, "loss": 0.6053, "step": 25690 }, { "epoch": 1.7407005894708314, "grad_norm": 5.625752925872803, "learning_rate": 6.585666370045862e-05, "loss": 0.6097, "step": 25691 }, { "epoch": 1.7407683447388034, "grad_norm": 6.389636039733887, "learning_rate": 6.585529468136081e-05, "loss": 0.8236, "step": 25692 }, { "epoch": 1.7408361000067756, "grad_norm": 4.013599872589111, "learning_rate": 6.5853925662263e-05, "loss": 0.5112, "step": 25693 }, { "epoch": 1.7409038552747476, "grad_norm": 6.346076965332031, "learning_rate": 6.585255664316517e-05, "loss": 0.6327, "step": 25694 }, { "epoch": 1.7409716105427195, "grad_norm": 6.322297096252441, "learning_rate": 6.585118762406736e-05, "loss": 0.7021, "step": 25695 }, { "epoch": 1.7410393658106917, "grad_norm": 5.099515914916992, "learning_rate": 6.584981860496954e-05, "loss": 0.6257, "step": 25696 }, { "epoch": 1.741107121078664, "grad_norm": 5.058123588562012, "learning_rate": 6.584844958587173e-05, "loss": 0.7955, "step": 25697 }, { "epoch": 1.741174876346636, "grad_norm": 6.9045257568359375, "learning_rate": 6.584708056677391e-05, "loss": 1.0253, "step": 25698 }, { "epoch": 1.741242631614608, "grad_norm": 7.135466575622559, "learning_rate": 6.584571154767609e-05, "loss": 0.6022, "step": 25699 }, { "epoch": 1.74131038688258, "grad_norm": 4.111330509185791, "learning_rate": 6.584434252857827e-05, "loss": 0.5623, "step": 25700 }, { "epoch": 1.7413781421505523, "grad_norm": 7.536962032318115, "learning_rate": 6.584297350948046e-05, "loss": 0.6023, "step": 25701 }, { "epoch": 1.7414458974185243, "grad_norm": 4.943263053894043, "learning_rate": 6.584160449038264e-05, "loss": 0.62, "step": 25702 }, { "epoch": 1.7415136526864963, "grad_norm": 7.599701404571533, "learning_rate": 6.584023547128482e-05, "loss": 0.6206, "step": 25703 }, { "epoch": 1.7415814079544685, "grad_norm": 8.920269966125488, "learning_rate": 6.5838866452187e-05, "loss": 0.5892, "step": 25704 }, { "epoch": 1.7416491632224407, "grad_norm": 5.76598596572876, "learning_rate": 6.583749743308919e-05, "loss": 0.6179, "step": 25705 }, { "epoch": 1.7417169184904127, "grad_norm": 5.664372444152832, "learning_rate": 6.583612841399138e-05, "loss": 0.6032, "step": 25706 }, { "epoch": 1.7417846737583846, "grad_norm": 5.803332805633545, "learning_rate": 6.583475939489356e-05, "loss": 0.7896, "step": 25707 }, { "epoch": 1.7418524290263568, "grad_norm": 4.637600898742676, "learning_rate": 6.583339037579574e-05, "loss": 0.5905, "step": 25708 }, { "epoch": 1.741920184294329, "grad_norm": 9.818278312683105, "learning_rate": 6.583202135669792e-05, "loss": 0.5407, "step": 25709 }, { "epoch": 1.741987939562301, "grad_norm": 6.853373050689697, "learning_rate": 6.58306523376001e-05, "loss": 0.5095, "step": 25710 }, { "epoch": 1.742055694830273, "grad_norm": 6.308809757232666, "learning_rate": 6.58292833185023e-05, "loss": 0.9032, "step": 25711 }, { "epoch": 1.7421234500982452, "grad_norm": 8.974072456359863, "learning_rate": 6.582791429940448e-05, "loss": 0.7538, "step": 25712 }, { "epoch": 1.7421912053662172, "grad_norm": 6.9478936195373535, "learning_rate": 6.582654528030666e-05, "loss": 1.0491, "step": 25713 }, { "epoch": 1.7422589606341892, "grad_norm": 4.284901142120361, "learning_rate": 6.582517626120885e-05, "loss": 0.5409, "step": 25714 }, { "epoch": 1.7423267159021614, "grad_norm": 6.749875545501709, "learning_rate": 6.582380724211103e-05, "loss": 0.5878, "step": 25715 }, { "epoch": 1.7423944711701336, "grad_norm": 5.000622272491455, "learning_rate": 6.582243822301321e-05, "loss": 0.5889, "step": 25716 }, { "epoch": 1.7424622264381056, "grad_norm": 6.760974407196045, "learning_rate": 6.58210692039154e-05, "loss": 0.8801, "step": 25717 }, { "epoch": 1.7425299817060775, "grad_norm": 5.439220905303955, "learning_rate": 6.581970018481758e-05, "loss": 0.6164, "step": 25718 }, { "epoch": 1.7425977369740497, "grad_norm": 6.161888599395752, "learning_rate": 6.581833116571976e-05, "loss": 0.6949, "step": 25719 }, { "epoch": 1.742665492242022, "grad_norm": 4.546342849731445, "learning_rate": 6.581696214662196e-05, "loss": 0.6252, "step": 25720 }, { "epoch": 1.742733247509994, "grad_norm": 6.467803955078125, "learning_rate": 6.581559312752414e-05, "loss": 0.7239, "step": 25721 }, { "epoch": 1.742801002777966, "grad_norm": 14.28783893585205, "learning_rate": 6.581422410842632e-05, "loss": 0.7483, "step": 25722 }, { "epoch": 1.742868758045938, "grad_norm": 4.8071770668029785, "learning_rate": 6.58128550893285e-05, "loss": 0.5007, "step": 25723 }, { "epoch": 1.7429365133139103, "grad_norm": 5.195301055908203, "learning_rate": 6.58114860702307e-05, "loss": 0.6506, "step": 25724 }, { "epoch": 1.7430042685818823, "grad_norm": 6.599780082702637, "learning_rate": 6.581011705113287e-05, "loss": 0.9542, "step": 25725 }, { "epoch": 1.7430720238498543, "grad_norm": 8.823718070983887, "learning_rate": 6.580874803203505e-05, "loss": 0.6473, "step": 25726 }, { "epoch": 1.7431397791178265, "grad_norm": 7.684313774108887, "learning_rate": 6.580737901293723e-05, "loss": 0.6391, "step": 25727 }, { "epoch": 1.7432075343857985, "grad_norm": 4.98506498336792, "learning_rate": 6.580600999383941e-05, "loss": 0.6565, "step": 25728 }, { "epoch": 1.7432752896537704, "grad_norm": 6.592257022857666, "learning_rate": 6.580464097474161e-05, "loss": 0.7835, "step": 25729 }, { "epoch": 1.7433430449217426, "grad_norm": 4.955665588378906, "learning_rate": 6.580327195564379e-05, "loss": 0.6094, "step": 25730 }, { "epoch": 1.7434108001897148, "grad_norm": 4.702327251434326, "learning_rate": 6.580190293654597e-05, "loss": 0.591, "step": 25731 }, { "epoch": 1.7434785554576868, "grad_norm": 5.489253997802734, "learning_rate": 6.580053391744815e-05, "loss": 0.6381, "step": 25732 }, { "epoch": 1.7435463107256588, "grad_norm": 5.20759916305542, "learning_rate": 6.579916489835034e-05, "loss": 0.588, "step": 25733 }, { "epoch": 1.743614065993631, "grad_norm": 7.177090644836426, "learning_rate": 6.579779587925252e-05, "loss": 0.5499, "step": 25734 }, { "epoch": 1.7436818212616032, "grad_norm": 5.366008758544922, "learning_rate": 6.57964268601547e-05, "loss": 0.6292, "step": 25735 }, { "epoch": 1.7437495765295752, "grad_norm": 7.24453067779541, "learning_rate": 6.579505784105688e-05, "loss": 0.6931, "step": 25736 }, { "epoch": 1.7438173317975472, "grad_norm": 6.688304424285889, "learning_rate": 6.579368882195906e-05, "loss": 0.8245, "step": 25737 }, { "epoch": 1.7438850870655194, "grad_norm": 5.532419204711914, "learning_rate": 6.579231980286126e-05, "loss": 0.6847, "step": 25738 }, { "epoch": 1.7439528423334916, "grad_norm": 6.385082721710205, "learning_rate": 6.579095078376344e-05, "loss": 0.6303, "step": 25739 }, { "epoch": 1.7440205976014636, "grad_norm": 6.279954433441162, "learning_rate": 6.578958176466562e-05, "loss": 0.5892, "step": 25740 }, { "epoch": 1.7440883528694355, "grad_norm": 6.359553337097168, "learning_rate": 6.57882127455678e-05, "loss": 0.7152, "step": 25741 }, { "epoch": 1.7441561081374077, "grad_norm": 4.301605701446533, "learning_rate": 6.578684372646998e-05, "loss": 0.6074, "step": 25742 }, { "epoch": 1.7442238634053797, "grad_norm": 6.902194499969482, "learning_rate": 6.578547470737217e-05, "loss": 0.7942, "step": 25743 }, { "epoch": 1.7442916186733517, "grad_norm": 6.907533168792725, "learning_rate": 6.578410568827435e-05, "loss": 0.5675, "step": 25744 }, { "epoch": 1.744359373941324, "grad_norm": 5.956394672393799, "learning_rate": 6.578273666917653e-05, "loss": 0.5172, "step": 25745 }, { "epoch": 1.744427129209296, "grad_norm": 6.77935791015625, "learning_rate": 6.578136765007872e-05, "loss": 0.7991, "step": 25746 }, { "epoch": 1.744494884477268, "grad_norm": 5.546535491943359, "learning_rate": 6.577999863098091e-05, "loss": 0.8067, "step": 25747 }, { "epoch": 1.74456263974524, "grad_norm": 4.969619274139404, "learning_rate": 6.577862961188309e-05, "loss": 0.6359, "step": 25748 }, { "epoch": 1.7446303950132123, "grad_norm": 6.4722137451171875, "learning_rate": 6.577726059278527e-05, "loss": 0.6328, "step": 25749 }, { "epoch": 1.7446981502811845, "grad_norm": 8.694899559020996, "learning_rate": 6.577589157368745e-05, "loss": 0.7885, "step": 25750 }, { "epoch": 1.7447659055491564, "grad_norm": 5.355075359344482, "learning_rate": 6.577452255458963e-05, "loss": 0.6964, "step": 25751 }, { "epoch": 1.7448336608171284, "grad_norm": 7.220716953277588, "learning_rate": 6.577315353549182e-05, "loss": 0.6302, "step": 25752 }, { "epoch": 1.7449014160851006, "grad_norm": 4.214052200317383, "learning_rate": 6.5771784516394e-05, "loss": 0.5508, "step": 25753 }, { "epoch": 1.7449691713530728, "grad_norm": 5.318426609039307, "learning_rate": 6.577041549729618e-05, "loss": 0.6204, "step": 25754 }, { "epoch": 1.7450369266210448, "grad_norm": 5.618316173553467, "learning_rate": 6.576904647819837e-05, "loss": 0.6274, "step": 25755 }, { "epoch": 1.7451046818890168, "grad_norm": 7.088129997253418, "learning_rate": 6.576767745910056e-05, "loss": 0.6306, "step": 25756 }, { "epoch": 1.745172437156989, "grad_norm": 7.021555423736572, "learning_rate": 6.576630844000274e-05, "loss": 0.7008, "step": 25757 }, { "epoch": 1.7452401924249612, "grad_norm": 5.483371257781982, "learning_rate": 6.576493942090492e-05, "loss": 0.4843, "step": 25758 }, { "epoch": 1.745307947692933, "grad_norm": 7.425747394561768, "learning_rate": 6.57635704018071e-05, "loss": 0.7701, "step": 25759 }, { "epoch": 1.7453757029609052, "grad_norm": 4.503190040588379, "learning_rate": 6.57622013827093e-05, "loss": 0.5189, "step": 25760 }, { "epoch": 1.7454434582288774, "grad_norm": 4.866891384124756, "learning_rate": 6.576083236361147e-05, "loss": 0.5303, "step": 25761 }, { "epoch": 1.7455112134968493, "grad_norm": 4.321117877960205, "learning_rate": 6.575946334451365e-05, "loss": 0.4822, "step": 25762 }, { "epoch": 1.7455789687648213, "grad_norm": 4.874314308166504, "learning_rate": 6.575809432541585e-05, "loss": 0.6901, "step": 25763 }, { "epoch": 1.7456467240327935, "grad_norm": 4.888028621673584, "learning_rate": 6.575672530631803e-05, "loss": 0.6453, "step": 25764 }, { "epoch": 1.7457144793007657, "grad_norm": 7.0384602546691895, "learning_rate": 6.575535628722021e-05, "loss": 0.5793, "step": 25765 }, { "epoch": 1.7457822345687377, "grad_norm": 7.243376731872559, "learning_rate": 6.57539872681224e-05, "loss": 0.5827, "step": 25766 }, { "epoch": 1.7458499898367097, "grad_norm": 5.692538261413574, "learning_rate": 6.575261824902458e-05, "loss": 0.7676, "step": 25767 }, { "epoch": 1.745917745104682, "grad_norm": 6.968898296356201, "learning_rate": 6.575124922992676e-05, "loss": 0.7488, "step": 25768 }, { "epoch": 1.745985500372654, "grad_norm": 6.26676607131958, "learning_rate": 6.574988021082894e-05, "loss": 0.5392, "step": 25769 }, { "epoch": 1.746053255640626, "grad_norm": 5.47633695602417, "learning_rate": 6.574851119173114e-05, "loss": 0.5346, "step": 25770 }, { "epoch": 1.746121010908598, "grad_norm": 5.262749195098877, "learning_rate": 6.574714217263332e-05, "loss": 0.7475, "step": 25771 }, { "epoch": 1.7461887661765703, "grad_norm": 8.45853042602539, "learning_rate": 6.57457731535355e-05, "loss": 0.5415, "step": 25772 }, { "epoch": 1.7462565214445425, "grad_norm": 4.522602081298828, "learning_rate": 6.574440413443768e-05, "loss": 0.4504, "step": 25773 }, { "epoch": 1.7463242767125144, "grad_norm": 4.278589725494385, "learning_rate": 6.574303511533986e-05, "loss": 0.5815, "step": 25774 }, { "epoch": 1.7463920319804864, "grad_norm": 7.9681572914123535, "learning_rate": 6.574166609624205e-05, "loss": 0.7919, "step": 25775 }, { "epoch": 1.7464597872484586, "grad_norm": 6.9029130935668945, "learning_rate": 6.574029707714423e-05, "loss": 0.7275, "step": 25776 }, { "epoch": 1.7465275425164306, "grad_norm": 5.71829891204834, "learning_rate": 6.573892805804641e-05, "loss": 0.6722, "step": 25777 }, { "epoch": 1.7465952977844026, "grad_norm": 4.974033355712891, "learning_rate": 6.57375590389486e-05, "loss": 0.6065, "step": 25778 }, { "epoch": 1.7466630530523748, "grad_norm": 5.5474066734313965, "learning_rate": 6.573619001985079e-05, "loss": 0.6182, "step": 25779 }, { "epoch": 1.746730808320347, "grad_norm": 6.233824253082275, "learning_rate": 6.573482100075297e-05, "loss": 0.7043, "step": 25780 }, { "epoch": 1.746798563588319, "grad_norm": 5.908051013946533, "learning_rate": 6.573345198165515e-05, "loss": 0.7733, "step": 25781 }, { "epoch": 1.746866318856291, "grad_norm": 6.083704948425293, "learning_rate": 6.573208296255733e-05, "loss": 0.6927, "step": 25782 }, { "epoch": 1.7469340741242632, "grad_norm": 4.982226848602295, "learning_rate": 6.573071394345951e-05, "loss": 0.4754, "step": 25783 }, { "epoch": 1.7470018293922354, "grad_norm": 5.122231483459473, "learning_rate": 6.57293449243617e-05, "loss": 0.6595, "step": 25784 }, { "epoch": 1.7470695846602073, "grad_norm": 7.763484001159668, "learning_rate": 6.572797590526388e-05, "loss": 0.8699, "step": 25785 }, { "epoch": 1.7471373399281793, "grad_norm": 4.307711124420166, "learning_rate": 6.572660688616606e-05, "loss": 0.5804, "step": 25786 }, { "epoch": 1.7472050951961515, "grad_norm": 6.894066333770752, "learning_rate": 6.572523786706824e-05, "loss": 0.8208, "step": 25787 }, { "epoch": 1.7472728504641237, "grad_norm": 4.315510272979736, "learning_rate": 6.572386884797044e-05, "loss": 0.7237, "step": 25788 }, { "epoch": 1.7473406057320957, "grad_norm": 5.104374885559082, "learning_rate": 6.572249982887262e-05, "loss": 0.7501, "step": 25789 }, { "epoch": 1.7474083610000677, "grad_norm": 5.595174312591553, "learning_rate": 6.57211308097748e-05, "loss": 0.5619, "step": 25790 }, { "epoch": 1.74747611626804, "grad_norm": 5.287594795227051, "learning_rate": 6.571976179067698e-05, "loss": 0.5281, "step": 25791 }, { "epoch": 1.7475438715360119, "grad_norm": 6.272217273712158, "learning_rate": 6.571839277157916e-05, "loss": 0.5419, "step": 25792 }, { "epoch": 1.7476116268039839, "grad_norm": 5.096037864685059, "learning_rate": 6.571702375248135e-05, "loss": 0.5876, "step": 25793 }, { "epoch": 1.747679382071956, "grad_norm": 8.194091796875, "learning_rate": 6.571565473338353e-05, "loss": 0.787, "step": 25794 }, { "epoch": 1.7477471373399283, "grad_norm": 6.367763996124268, "learning_rate": 6.571428571428571e-05, "loss": 0.5308, "step": 25795 }, { "epoch": 1.7478148926079002, "grad_norm": 8.519259452819824, "learning_rate": 6.57129166951879e-05, "loss": 0.6007, "step": 25796 }, { "epoch": 1.7478826478758722, "grad_norm": 6.417860507965088, "learning_rate": 6.571154767609008e-05, "loss": 0.7045, "step": 25797 }, { "epoch": 1.7479504031438444, "grad_norm": 6.260969638824463, "learning_rate": 6.571017865699227e-05, "loss": 0.6863, "step": 25798 }, { "epoch": 1.7480181584118166, "grad_norm": 5.760765552520752, "learning_rate": 6.570880963789445e-05, "loss": 0.6834, "step": 25799 }, { "epoch": 1.7480859136797886, "grad_norm": 5.959381580352783, "learning_rate": 6.570744061879663e-05, "loss": 0.7146, "step": 25800 }, { "epoch": 1.7481536689477606, "grad_norm": 6.284053325653076, "learning_rate": 6.570607159969881e-05, "loss": 0.7872, "step": 25801 }, { "epoch": 1.7482214242157328, "grad_norm": 5.263003349304199, "learning_rate": 6.5704702580601e-05, "loss": 0.6354, "step": 25802 }, { "epoch": 1.748289179483705, "grad_norm": 6.9170098304748535, "learning_rate": 6.570333356150318e-05, "loss": 0.834, "step": 25803 }, { "epoch": 1.748356934751677, "grad_norm": 4.882582187652588, "learning_rate": 6.570196454240536e-05, "loss": 0.8357, "step": 25804 }, { "epoch": 1.748424690019649, "grad_norm": 6.475587844848633, "learning_rate": 6.570059552330754e-05, "loss": 0.5403, "step": 25805 }, { "epoch": 1.7484924452876212, "grad_norm": 5.631176948547363, "learning_rate": 6.569922650420974e-05, "loss": 0.6456, "step": 25806 }, { "epoch": 1.7485602005555934, "grad_norm": 5.645204544067383, "learning_rate": 6.569785748511192e-05, "loss": 0.771, "step": 25807 }, { "epoch": 1.7486279558235651, "grad_norm": 5.516275882720947, "learning_rate": 6.56964884660141e-05, "loss": 0.6057, "step": 25808 }, { "epoch": 1.7486957110915373, "grad_norm": 7.613132953643799, "learning_rate": 6.56951194469163e-05, "loss": 0.6088, "step": 25809 }, { "epoch": 1.7487634663595095, "grad_norm": 8.744385719299316, "learning_rate": 6.569375042781847e-05, "loss": 0.5719, "step": 25810 }, { "epoch": 1.7488312216274815, "grad_norm": 8.922464370727539, "learning_rate": 6.569238140872065e-05, "loss": 0.6407, "step": 25811 }, { "epoch": 1.7488989768954535, "grad_norm": 7.267102241516113, "learning_rate": 6.569101238962285e-05, "loss": 0.596, "step": 25812 }, { "epoch": 1.7489667321634257, "grad_norm": 13.050261497497559, "learning_rate": 6.568964337052503e-05, "loss": 0.8438, "step": 25813 }, { "epoch": 1.7490344874313979, "grad_norm": 5.910432815551758, "learning_rate": 6.568827435142721e-05, "loss": 0.5265, "step": 25814 }, { "epoch": 1.7491022426993699, "grad_norm": 6.482776641845703, "learning_rate": 6.568690533232939e-05, "loss": 0.5244, "step": 25815 }, { "epoch": 1.7491699979673418, "grad_norm": 4.9004998207092285, "learning_rate": 6.568553631323158e-05, "loss": 0.6094, "step": 25816 }, { "epoch": 1.749237753235314, "grad_norm": 4.36956262588501, "learning_rate": 6.568416729413376e-05, "loss": 0.6113, "step": 25817 }, { "epoch": 1.7493055085032863, "grad_norm": 5.168211460113525, "learning_rate": 6.568279827503594e-05, "loss": 0.6276, "step": 25818 }, { "epoch": 1.7493732637712582, "grad_norm": 4.960877895355225, "learning_rate": 6.568142925593812e-05, "loss": 0.8379, "step": 25819 }, { "epoch": 1.7494410190392302, "grad_norm": 6.219804286956787, "learning_rate": 6.56800602368403e-05, "loss": 0.5687, "step": 25820 }, { "epoch": 1.7495087743072024, "grad_norm": 5.516842842102051, "learning_rate": 6.56786912177425e-05, "loss": 0.688, "step": 25821 }, { "epoch": 1.7495765295751746, "grad_norm": 7.586067199707031, "learning_rate": 6.567732219864468e-05, "loss": 0.5607, "step": 25822 }, { "epoch": 1.7496442848431466, "grad_norm": 5.0128374099731445, "learning_rate": 6.567595317954686e-05, "loss": 0.4364, "step": 25823 }, { "epoch": 1.7496442848431466, "eval_loss": 0.6726692318916321, "eval_noise_accuracy": 0.0, "eval_runtime": 1417.668, "eval_samples_per_second": 3.625, "eval_steps_per_second": 0.227, "eval_wer": 62.51616631353018, "step": 25823 }, { "epoch": 1.7497120401111186, "grad_norm": 7.227599620819092, "learning_rate": 6.567458416044904e-05, "loss": 0.6701, "step": 25824 }, { "epoch": 1.7497797953790908, "grad_norm": 6.076612949371338, "learning_rate": 6.567321514135123e-05, "loss": 0.6672, "step": 25825 }, { "epoch": 1.7498475506470628, "grad_norm": 4.20572566986084, "learning_rate": 6.567184612225341e-05, "loss": 0.7687, "step": 25826 }, { "epoch": 1.7499153059150347, "grad_norm": 7.4938225746154785, "learning_rate": 6.56704771031556e-05, "loss": 0.7531, "step": 25827 }, { "epoch": 1.749983061183007, "grad_norm": 5.82895040512085, "learning_rate": 6.566910808405777e-05, "loss": 0.6977, "step": 25828 }, { "epoch": 1.7500508164509792, "grad_norm": 5.042722225189209, "learning_rate": 6.566773906495995e-05, "loss": 0.6183, "step": 25829 }, { "epoch": 1.7501185717189511, "grad_norm": 5.512479782104492, "learning_rate": 6.566637004586215e-05, "loss": 0.7587, "step": 25830 }, { "epoch": 1.7501863269869231, "grad_norm": 5.67332124710083, "learning_rate": 6.566500102676433e-05, "loss": 0.8625, "step": 25831 }, { "epoch": 1.7502540822548953, "grad_norm": 5.60905647277832, "learning_rate": 6.566363200766651e-05, "loss": 0.7421, "step": 25832 }, { "epoch": 1.7503218375228675, "grad_norm": 4.8533759117126465, "learning_rate": 6.566226298856869e-05, "loss": 0.7826, "step": 25833 }, { "epoch": 1.7503895927908395, "grad_norm": 9.238015174865723, "learning_rate": 6.566089396947088e-05, "loss": 0.5548, "step": 25834 }, { "epoch": 1.7504573480588115, "grad_norm": 5.150877952575684, "learning_rate": 6.565952495037306e-05, "loss": 0.5286, "step": 25835 }, { "epoch": 1.7505251033267837, "grad_norm": 4.8785223960876465, "learning_rate": 6.565815593127524e-05, "loss": 0.5903, "step": 25836 }, { "epoch": 1.7505928585947559, "grad_norm": 6.5917816162109375, "learning_rate": 6.565678691217742e-05, "loss": 0.7785, "step": 25837 }, { "epoch": 1.7506606138627279, "grad_norm": 8.778099060058594, "learning_rate": 6.56554178930796e-05, "loss": 0.6501, "step": 25838 }, { "epoch": 1.7507283691306998, "grad_norm": 8.85311222076416, "learning_rate": 6.56540488739818e-05, "loss": 0.9583, "step": 25839 }, { "epoch": 1.750796124398672, "grad_norm": 5.388113975524902, "learning_rate": 6.565267985488398e-05, "loss": 0.4516, "step": 25840 }, { "epoch": 1.750863879666644, "grad_norm": 6.071376800537109, "learning_rate": 6.565131083578616e-05, "loss": 0.6396, "step": 25841 }, { "epoch": 1.750931634934616, "grad_norm": 5.863883018493652, "learning_rate": 6.564994181668834e-05, "loss": 0.6984, "step": 25842 }, { "epoch": 1.7509993902025882, "grad_norm": 6.308466911315918, "learning_rate": 6.564857279759052e-05, "loss": 0.6775, "step": 25843 }, { "epoch": 1.7510671454705604, "grad_norm": 5.295975208282471, "learning_rate": 6.564720377849271e-05, "loss": 0.7101, "step": 25844 }, { "epoch": 1.7511349007385324, "grad_norm": 5.230875492095947, "learning_rate": 6.56458347593949e-05, "loss": 0.6848, "step": 25845 }, { "epoch": 1.7512026560065044, "grad_norm": 4.298776626586914, "learning_rate": 6.564446574029707e-05, "loss": 0.6024, "step": 25846 }, { "epoch": 1.7512704112744766, "grad_norm": 4.550160884857178, "learning_rate": 6.564309672119925e-05, "loss": 0.6311, "step": 25847 }, { "epoch": 1.7513381665424488, "grad_norm": 4.570974349975586, "learning_rate": 6.564172770210145e-05, "loss": 0.5935, "step": 25848 }, { "epoch": 1.7514059218104208, "grad_norm": 5.91491174697876, "learning_rate": 6.564035868300363e-05, "loss": 0.6835, "step": 25849 }, { "epoch": 1.7514736770783927, "grad_norm": 7.910042762756348, "learning_rate": 6.563898966390581e-05, "loss": 0.7051, "step": 25850 }, { "epoch": 1.751541432346365, "grad_norm": 7.475001335144043, "learning_rate": 6.563762064480799e-05, "loss": 0.8045, "step": 25851 }, { "epoch": 1.7516091876143371, "grad_norm": 4.963623523712158, "learning_rate": 6.563625162571018e-05, "loss": 0.4366, "step": 25852 }, { "epoch": 1.7516769428823091, "grad_norm": 5.219460964202881, "learning_rate": 6.563488260661236e-05, "loss": 0.5778, "step": 25853 }, { "epoch": 1.751744698150281, "grad_norm": 5.359859943389893, "learning_rate": 6.563351358751454e-05, "loss": 0.5859, "step": 25854 }, { "epoch": 1.7518124534182533, "grad_norm": 5.4195556640625, "learning_rate": 6.563214456841674e-05, "loss": 0.6537, "step": 25855 }, { "epoch": 1.7518802086862255, "grad_norm": 5.123684406280518, "learning_rate": 6.563077554931892e-05, "loss": 0.5845, "step": 25856 }, { "epoch": 1.7519479639541973, "grad_norm": 5.569446086883545, "learning_rate": 6.56294065302211e-05, "loss": 0.519, "step": 25857 }, { "epoch": 1.7520157192221695, "grad_norm": 5.365669250488281, "learning_rate": 6.562803751112329e-05, "loss": 0.6877, "step": 25858 }, { "epoch": 1.7520834744901417, "grad_norm": 6.426041126251221, "learning_rate": 6.562666849202547e-05, "loss": 0.6624, "step": 25859 }, { "epoch": 1.7521512297581137, "grad_norm": 4.9448065757751465, "learning_rate": 6.562529947292765e-05, "loss": 0.4891, "step": 25860 }, { "epoch": 1.7522189850260856, "grad_norm": 8.940189361572266, "learning_rate": 6.562393045382983e-05, "loss": 0.5728, "step": 25861 }, { "epoch": 1.7522867402940578, "grad_norm": 7.54421329498291, "learning_rate": 6.562256143473203e-05, "loss": 0.6376, "step": 25862 }, { "epoch": 1.75235449556203, "grad_norm": 10.732321739196777, "learning_rate": 6.562119241563421e-05, "loss": 0.7383, "step": 25863 }, { "epoch": 1.752422250830002, "grad_norm": 5.4037017822265625, "learning_rate": 6.561982339653639e-05, "loss": 0.6331, "step": 25864 }, { "epoch": 1.752490006097974, "grad_norm": 5.664796829223633, "learning_rate": 6.561845437743857e-05, "loss": 0.4815, "step": 25865 }, { "epoch": 1.7525577613659462, "grad_norm": 7.327459812164307, "learning_rate": 6.561708535834076e-05, "loss": 0.4462, "step": 25866 }, { "epoch": 1.7526255166339184, "grad_norm": 4.969393730163574, "learning_rate": 6.561571633924294e-05, "loss": 0.6739, "step": 25867 }, { "epoch": 1.7526932719018904, "grad_norm": 5.012089252471924, "learning_rate": 6.561434732014512e-05, "loss": 0.658, "step": 25868 }, { "epoch": 1.7527610271698624, "grad_norm": 5.766871452331543, "learning_rate": 6.56129783010473e-05, "loss": 0.7148, "step": 25869 }, { "epoch": 1.7528287824378346, "grad_norm": 6.2534499168396, "learning_rate": 6.561160928194948e-05, "loss": 0.5337, "step": 25870 }, { "epoch": 1.7528965377058068, "grad_norm": 6.493129730224609, "learning_rate": 6.561024026285168e-05, "loss": 0.5148, "step": 25871 }, { "epoch": 1.7529642929737788, "grad_norm": 5.754786968231201, "learning_rate": 6.560887124375386e-05, "loss": 0.6828, "step": 25872 }, { "epoch": 1.7530320482417507, "grad_norm": 6.791285037994385, "learning_rate": 6.560750222465604e-05, "loss": 0.6611, "step": 25873 }, { "epoch": 1.753099803509723, "grad_norm": 5.3094635009765625, "learning_rate": 6.560613320555822e-05, "loss": 0.8367, "step": 25874 }, { "epoch": 1.753167558777695, "grad_norm": 5.729187488555908, "learning_rate": 6.56047641864604e-05, "loss": 0.4959, "step": 25875 }, { "epoch": 1.753235314045667, "grad_norm": 9.17248821258545, "learning_rate": 6.560339516736259e-05, "loss": 0.6412, "step": 25876 }, { "epoch": 1.753303069313639, "grad_norm": 6.588944911956787, "learning_rate": 6.560202614826477e-05, "loss": 0.619, "step": 25877 }, { "epoch": 1.7533708245816113, "grad_norm": 5.483118534088135, "learning_rate": 6.560065712916695e-05, "loss": 0.8028, "step": 25878 }, { "epoch": 1.7534385798495833, "grad_norm": 5.239889621734619, "learning_rate": 6.559928811006913e-05, "loss": 0.8002, "step": 25879 }, { "epoch": 1.7535063351175553, "grad_norm": 6.084550857543945, "learning_rate": 6.559791909097133e-05, "loss": 0.7418, "step": 25880 }, { "epoch": 1.7535740903855275, "grad_norm": 4.720691204071045, "learning_rate": 6.559655007187351e-05, "loss": 0.6879, "step": 25881 }, { "epoch": 1.7536418456534997, "grad_norm": 5.284574508666992, "learning_rate": 6.559518105277569e-05, "loss": 0.6904, "step": 25882 }, { "epoch": 1.7537096009214717, "grad_norm": 5.831902027130127, "learning_rate": 6.559381203367787e-05, "loss": 0.5757, "step": 25883 }, { "epoch": 1.7537773561894436, "grad_norm": 6.249098300933838, "learning_rate": 6.559244301458005e-05, "loss": 0.469, "step": 25884 }, { "epoch": 1.7538451114574158, "grad_norm": 5.684015274047852, "learning_rate": 6.559107399548224e-05, "loss": 0.5294, "step": 25885 }, { "epoch": 1.753912866725388, "grad_norm": 6.418021202087402, "learning_rate": 6.558970497638442e-05, "loss": 0.6001, "step": 25886 }, { "epoch": 1.75398062199336, "grad_norm": 7.681850910186768, "learning_rate": 6.55883359572866e-05, "loss": 0.8063, "step": 25887 }, { "epoch": 1.754048377261332, "grad_norm": 6.86993408203125, "learning_rate": 6.558696693818878e-05, "loss": 0.7978, "step": 25888 }, { "epoch": 1.7541161325293042, "grad_norm": 6.945374965667725, "learning_rate": 6.558559791909098e-05, "loss": 0.5818, "step": 25889 }, { "epoch": 1.7541838877972762, "grad_norm": 5.693384647369385, "learning_rate": 6.558422889999316e-05, "loss": 0.6464, "step": 25890 }, { "epoch": 1.7542516430652482, "grad_norm": 7.113884449005127, "learning_rate": 6.558285988089534e-05, "loss": 0.8236, "step": 25891 }, { "epoch": 1.7543193983332204, "grad_norm": 5.733856678009033, "learning_rate": 6.558149086179752e-05, "loss": 0.855, "step": 25892 }, { "epoch": 1.7543871536011926, "grad_norm": 5.061638832092285, "learning_rate": 6.55801218426997e-05, "loss": 0.5937, "step": 25893 }, { "epoch": 1.7544549088691646, "grad_norm": 6.240810394287109, "learning_rate": 6.55787528236019e-05, "loss": 0.4901, "step": 25894 }, { "epoch": 1.7545226641371365, "grad_norm": 6.464528560638428, "learning_rate": 6.557738380450407e-05, "loss": 0.6934, "step": 25895 }, { "epoch": 1.7545904194051087, "grad_norm": 7.034316062927246, "learning_rate": 6.557601478540625e-05, "loss": 0.6395, "step": 25896 }, { "epoch": 1.754658174673081, "grad_norm": 4.611041069030762, "learning_rate": 6.557464576630843e-05, "loss": 0.6211, "step": 25897 }, { "epoch": 1.754725929941053, "grad_norm": 5.231209754943848, "learning_rate": 6.557327674721061e-05, "loss": 0.5168, "step": 25898 }, { "epoch": 1.754793685209025, "grad_norm": 7.0618414878845215, "learning_rate": 6.557190772811281e-05, "loss": 0.5725, "step": 25899 }, { "epoch": 1.754861440476997, "grad_norm": 4.872448921203613, "learning_rate": 6.557053870901499e-05, "loss": 0.6318, "step": 25900 }, { "epoch": 1.7549291957449693, "grad_norm": 6.79256010055542, "learning_rate": 6.556916968991717e-05, "loss": 0.6354, "step": 25901 }, { "epoch": 1.7549969510129413, "grad_norm": 4.897211074829102, "learning_rate": 6.556780067081936e-05, "loss": 0.5833, "step": 25902 }, { "epoch": 1.7550647062809133, "grad_norm": 5.017561912536621, "learning_rate": 6.556643165172154e-05, "loss": 0.5764, "step": 25903 }, { "epoch": 1.7551324615488855, "grad_norm": 5.571432113647461, "learning_rate": 6.556506263262372e-05, "loss": 0.7145, "step": 25904 }, { "epoch": 1.7552002168168577, "grad_norm": 8.148841857910156, "learning_rate": 6.556369361352592e-05, "loss": 0.7032, "step": 25905 }, { "epoch": 1.7552679720848294, "grad_norm": 8.837944984436035, "learning_rate": 6.55623245944281e-05, "loss": 0.5588, "step": 25906 }, { "epoch": 1.7553357273528016, "grad_norm": 8.073527336120605, "learning_rate": 6.556095557533028e-05, "loss": 0.8427, "step": 25907 }, { "epoch": 1.7554034826207738, "grad_norm": 6.408937454223633, "learning_rate": 6.555958655623247e-05, "loss": 0.4789, "step": 25908 }, { "epoch": 1.7554712378887458, "grad_norm": 5.238623142242432, "learning_rate": 6.555821753713465e-05, "loss": 0.6374, "step": 25909 }, { "epoch": 1.7555389931567178, "grad_norm": 4.905627250671387, "learning_rate": 6.555684851803683e-05, "loss": 0.5617, "step": 25910 }, { "epoch": 1.75560674842469, "grad_norm": 4.94779109954834, "learning_rate": 6.555547949893901e-05, "loss": 0.7721, "step": 25911 }, { "epoch": 1.7556745036926622, "grad_norm": 5.058548450469971, "learning_rate": 6.555411047984121e-05, "loss": 0.791, "step": 25912 }, { "epoch": 1.7557422589606342, "grad_norm": 11.479742050170898, "learning_rate": 6.555274146074339e-05, "loss": 0.8485, "step": 25913 }, { "epoch": 1.7558100142286062, "grad_norm": 5.697183132171631, "learning_rate": 6.555137244164557e-05, "loss": 0.8121, "step": 25914 }, { "epoch": 1.7558777694965784, "grad_norm": 7.386518478393555, "learning_rate": 6.555000342254775e-05, "loss": 0.8268, "step": 25915 }, { "epoch": 1.7559455247645506, "grad_norm": 5.550824165344238, "learning_rate": 6.554863440344993e-05, "loss": 0.7796, "step": 25916 }, { "epoch": 1.7560132800325225, "grad_norm": 6.624141216278076, "learning_rate": 6.554726538435212e-05, "loss": 0.7535, "step": 25917 }, { "epoch": 1.7560810353004945, "grad_norm": 3.842359781265259, "learning_rate": 6.55458963652543e-05, "loss": 0.4686, "step": 25918 }, { "epoch": 1.7561487905684667, "grad_norm": 4.325156211853027, "learning_rate": 6.554452734615648e-05, "loss": 0.6454, "step": 25919 }, { "epoch": 1.756216545836439, "grad_norm": 5.042502403259277, "learning_rate": 6.554315832705866e-05, "loss": 0.6831, "step": 25920 }, { "epoch": 1.756284301104411, "grad_norm": 5.607142448425293, "learning_rate": 6.554178930796086e-05, "loss": 0.6935, "step": 25921 }, { "epoch": 1.756352056372383, "grad_norm": 6.299644947052002, "learning_rate": 6.554042028886304e-05, "loss": 0.8701, "step": 25922 }, { "epoch": 1.756419811640355, "grad_norm": 6.45064115524292, "learning_rate": 6.553905126976522e-05, "loss": 0.6632, "step": 25923 }, { "epoch": 1.756487566908327, "grad_norm": 5.421267986297607, "learning_rate": 6.55376822506674e-05, "loss": 0.5757, "step": 25924 }, { "epoch": 1.756555322176299, "grad_norm": 8.824456214904785, "learning_rate": 6.553631323156958e-05, "loss": 0.6644, "step": 25925 }, { "epoch": 1.7566230774442713, "grad_norm": 4.192281723022461, "learning_rate": 6.553494421247177e-05, "loss": 0.5373, "step": 25926 }, { "epoch": 1.7566908327122435, "grad_norm": 5.330092906951904, "learning_rate": 6.553357519337395e-05, "loss": 0.6172, "step": 25927 }, { "epoch": 1.7567585879802154, "grad_norm": 6.326024055480957, "learning_rate": 6.553220617427613e-05, "loss": 0.6789, "step": 25928 }, { "epoch": 1.7568263432481874, "grad_norm": 5.052905082702637, "learning_rate": 6.553083715517831e-05, "loss": 0.6637, "step": 25929 }, { "epoch": 1.7568940985161596, "grad_norm": 4.339183807373047, "learning_rate": 6.55294681360805e-05, "loss": 0.6897, "step": 25930 }, { "epoch": 1.7569618537841318, "grad_norm": 6.826128959655762, "learning_rate": 6.552809911698269e-05, "loss": 0.6564, "step": 25931 }, { "epoch": 1.7570296090521038, "grad_norm": 5.7809882164001465, "learning_rate": 6.552673009788487e-05, "loss": 0.6, "step": 25932 }, { "epoch": 1.7570973643200758, "grad_norm": 4.941299915313721, "learning_rate": 6.552536107878705e-05, "loss": 0.5965, "step": 25933 }, { "epoch": 1.757165119588048, "grad_norm": 6.07199764251709, "learning_rate": 6.552399205968923e-05, "loss": 0.5287, "step": 25934 }, { "epoch": 1.7572328748560202, "grad_norm": 5.547092437744141, "learning_rate": 6.552262304059142e-05, "loss": 0.8183, "step": 25935 }, { "epoch": 1.7573006301239922, "grad_norm": 6.366233825683594, "learning_rate": 6.55212540214936e-05, "loss": 0.7728, "step": 25936 }, { "epoch": 1.7573683853919642, "grad_norm": 4.93941593170166, "learning_rate": 6.551988500239578e-05, "loss": 0.4907, "step": 25937 }, { "epoch": 1.7574361406599364, "grad_norm": 6.378958225250244, "learning_rate": 6.551851598329796e-05, "loss": 0.6069, "step": 25938 }, { "epoch": 1.7575038959279083, "grad_norm": 5.258844375610352, "learning_rate": 6.551714696420014e-05, "loss": 0.6675, "step": 25939 }, { "epoch": 1.7575716511958803, "grad_norm": 6.539587020874023, "learning_rate": 6.551577794510234e-05, "loss": 0.6066, "step": 25940 }, { "epoch": 1.7576394064638525, "grad_norm": 5.255248069763184, "learning_rate": 6.551440892600452e-05, "loss": 0.4826, "step": 25941 }, { "epoch": 1.7577071617318247, "grad_norm": 8.701440811157227, "learning_rate": 6.55130399069067e-05, "loss": 0.6865, "step": 25942 }, { "epoch": 1.7577749169997967, "grad_norm": 6.247589111328125, "learning_rate": 6.551167088780888e-05, "loss": 0.651, "step": 25943 }, { "epoch": 1.7578426722677687, "grad_norm": 5.293149471282959, "learning_rate": 6.551030186871107e-05, "loss": 0.7506, "step": 25944 }, { "epoch": 1.757910427535741, "grad_norm": 5.162432670593262, "learning_rate": 6.550893284961325e-05, "loss": 0.8462, "step": 25945 }, { "epoch": 1.757978182803713, "grad_norm": 5.762948989868164, "learning_rate": 6.550756383051543e-05, "loss": 0.711, "step": 25946 }, { "epoch": 1.758045938071685, "grad_norm": 5.000258445739746, "learning_rate": 6.550619481141761e-05, "loss": 0.5515, "step": 25947 }, { "epoch": 1.758113693339657, "grad_norm": 6.04584264755249, "learning_rate": 6.550482579231981e-05, "loss": 0.9808, "step": 25948 }, { "epoch": 1.7581814486076293, "grad_norm": 4.598662853240967, "learning_rate": 6.550345677322199e-05, "loss": 0.5982, "step": 25949 }, { "epoch": 1.7582492038756015, "grad_norm": 4.166588306427002, "learning_rate": 6.550208775412417e-05, "loss": 0.585, "step": 25950 }, { "epoch": 1.7583169591435734, "grad_norm": 10.352437019348145, "learning_rate": 6.550071873502636e-05, "loss": 0.6245, "step": 25951 }, { "epoch": 1.7583847144115454, "grad_norm": 6.7580246925354, "learning_rate": 6.549934971592854e-05, "loss": 0.5846, "step": 25952 }, { "epoch": 1.7584524696795176, "grad_norm": 9.031633377075195, "learning_rate": 6.549798069683072e-05, "loss": 0.8116, "step": 25953 }, { "epoch": 1.7585202249474898, "grad_norm": 4.842124938964844, "learning_rate": 6.549661167773292e-05, "loss": 0.5459, "step": 25954 }, { "epoch": 1.7585879802154616, "grad_norm": 9.152101516723633, "learning_rate": 6.54952426586351e-05, "loss": 0.5521, "step": 25955 }, { "epoch": 1.7586557354834338, "grad_norm": 5.48805570602417, "learning_rate": 6.549387363953728e-05, "loss": 0.611, "step": 25956 }, { "epoch": 1.758723490751406, "grad_norm": 7.045570373535156, "learning_rate": 6.549250462043946e-05, "loss": 0.529, "step": 25957 }, { "epoch": 1.758791246019378, "grad_norm": 6.056333065032959, "learning_rate": 6.549113560134165e-05, "loss": 0.6044, "step": 25958 }, { "epoch": 1.75885900128735, "grad_norm": 5.139931678771973, "learning_rate": 6.548976658224383e-05, "loss": 0.8135, "step": 25959 }, { "epoch": 1.7589267565553222, "grad_norm": 5.4833879470825195, "learning_rate": 6.548839756314601e-05, "loss": 0.6127, "step": 25960 }, { "epoch": 1.7589945118232944, "grad_norm": 6.661175727844238, "learning_rate": 6.548702854404819e-05, "loss": 0.747, "step": 25961 }, { "epoch": 1.7590622670912663, "grad_norm": 5.838383197784424, "learning_rate": 6.548565952495037e-05, "loss": 0.7507, "step": 25962 }, { "epoch": 1.7591300223592383, "grad_norm": 5.641793727874756, "learning_rate": 6.548429050585257e-05, "loss": 0.5802, "step": 25963 }, { "epoch": 1.7591977776272105, "grad_norm": 7.197784423828125, "learning_rate": 6.548292148675475e-05, "loss": 0.8249, "step": 25964 }, { "epoch": 1.7592655328951827, "grad_norm": 6.6994524002075195, "learning_rate": 6.548155246765693e-05, "loss": 0.5054, "step": 25965 }, { "epoch": 1.7593332881631547, "grad_norm": 4.117260932922363, "learning_rate": 6.548018344855911e-05, "loss": 0.5483, "step": 25966 }, { "epoch": 1.7594010434311267, "grad_norm": 7.471590042114258, "learning_rate": 6.54788144294613e-05, "loss": 0.724, "step": 25967 }, { "epoch": 1.759468798699099, "grad_norm": 4.678570747375488, "learning_rate": 6.547744541036348e-05, "loss": 0.5948, "step": 25968 }, { "epoch": 1.759536553967071, "grad_norm": 4.723426342010498, "learning_rate": 6.547607639126566e-05, "loss": 0.6196, "step": 25969 }, { "epoch": 1.759604309235043, "grad_norm": 6.850006580352783, "learning_rate": 6.547470737216784e-05, "loss": 0.661, "step": 25970 }, { "epoch": 1.759672064503015, "grad_norm": 4.958638668060303, "learning_rate": 6.547333835307002e-05, "loss": 0.6395, "step": 25971 }, { "epoch": 1.7597398197709873, "grad_norm": 7.698724269866943, "learning_rate": 6.547196933397222e-05, "loss": 0.5712, "step": 25972 }, { "epoch": 1.7598075750389592, "grad_norm": 6.176445484161377, "learning_rate": 6.54706003148744e-05, "loss": 0.5211, "step": 25973 }, { "epoch": 1.7598753303069312, "grad_norm": 4.893373966217041, "learning_rate": 6.546923129577658e-05, "loss": 0.5816, "step": 25974 }, { "epoch": 1.7599430855749034, "grad_norm": 5.526969909667969, "learning_rate": 6.546786227667876e-05, "loss": 0.6235, "step": 25975 }, { "epoch": 1.7600108408428756, "grad_norm": 9.844045639038086, "learning_rate": 6.546649325758094e-05, "loss": 0.5773, "step": 25976 }, { "epoch": 1.7600785961108476, "grad_norm": 4.320882320404053, "learning_rate": 6.546512423848313e-05, "loss": 0.57, "step": 25977 }, { "epoch": 1.7601463513788196, "grad_norm": 5.783230304718018, "learning_rate": 6.546375521938531e-05, "loss": 0.7559, "step": 25978 }, { "epoch": 1.7602141066467918, "grad_norm": 10.476959228515625, "learning_rate": 6.54623862002875e-05, "loss": 0.6016, "step": 25979 }, { "epoch": 1.760281861914764, "grad_norm": 5.971802234649658, "learning_rate": 6.546101718118967e-05, "loss": 0.6716, "step": 25980 }, { "epoch": 1.760349617182736, "grad_norm": 4.906615257263184, "learning_rate": 6.545964816209187e-05, "loss": 0.633, "step": 25981 }, { "epoch": 1.760417372450708, "grad_norm": 7.26517391204834, "learning_rate": 6.545827914299405e-05, "loss": 0.8422, "step": 25982 }, { "epoch": 1.7604851277186802, "grad_norm": 7.254943370819092, "learning_rate": 6.545691012389623e-05, "loss": 0.6588, "step": 25983 }, { "epoch": 1.7605528829866524, "grad_norm": 6.568024635314941, "learning_rate": 6.545554110479841e-05, "loss": 0.6129, "step": 25984 }, { "epoch": 1.7606206382546243, "grad_norm": 4.65602445602417, "learning_rate": 6.545417208570059e-05, "loss": 0.5578, "step": 25985 }, { "epoch": 1.7606883935225963, "grad_norm": 9.815917015075684, "learning_rate": 6.545280306660278e-05, "loss": 0.5827, "step": 25986 }, { "epoch": 1.7607561487905685, "grad_norm": 6.27549409866333, "learning_rate": 6.545143404750496e-05, "loss": 0.692, "step": 25987 }, { "epoch": 1.7608239040585405, "grad_norm": 4.6234869956970215, "learning_rate": 6.545006502840714e-05, "loss": 0.563, "step": 25988 }, { "epoch": 1.7608916593265125, "grad_norm": 3.814049482345581, "learning_rate": 6.544869600930932e-05, "loss": 0.5276, "step": 25989 }, { "epoch": 1.7609594145944847, "grad_norm": 5.100503921508789, "learning_rate": 6.544732699021152e-05, "loss": 0.7471, "step": 25990 }, { "epoch": 1.7610271698624569, "grad_norm": 4.298525810241699, "learning_rate": 6.54459579711137e-05, "loss": 0.5211, "step": 25991 }, { "epoch": 1.7610949251304289, "grad_norm": 8.046632766723633, "learning_rate": 6.544458895201588e-05, "loss": 0.7405, "step": 25992 }, { "epoch": 1.7611626803984008, "grad_norm": 4.898441791534424, "learning_rate": 6.544321993291806e-05, "loss": 0.5633, "step": 25993 }, { "epoch": 1.761230435666373, "grad_norm": 4.6432785987854, "learning_rate": 6.544185091382025e-05, "loss": 0.54, "step": 25994 }, { "epoch": 1.7612981909343453, "grad_norm": 5.308745861053467, "learning_rate": 6.544048189472243e-05, "loss": 0.7353, "step": 25995 }, { "epoch": 1.7613659462023172, "grad_norm": 4.590399265289307, "learning_rate": 6.543911287562461e-05, "loss": 0.5773, "step": 25996 }, { "epoch": 1.7614337014702892, "grad_norm": 6.676545143127441, "learning_rate": 6.543774385652681e-05, "loss": 0.573, "step": 25997 }, { "epoch": 1.7615014567382614, "grad_norm": 6.637362480163574, "learning_rate": 6.543637483742899e-05, "loss": 0.652, "step": 25998 }, { "epoch": 1.7615692120062336, "grad_norm": 5.561255931854248, "learning_rate": 6.543500581833117e-05, "loss": 0.654, "step": 25999 }, { "epoch": 1.7616369672742056, "grad_norm": 7.007732391357422, "learning_rate": 6.543363679923336e-05, "loss": 0.6883, "step": 26000 }, { "epoch": 1.7617047225421776, "grad_norm": 5.393604278564453, "learning_rate": 6.543226778013554e-05, "loss": 0.5698, "step": 26001 }, { "epoch": 1.7617724778101498, "grad_norm": 6.897402286529541, "learning_rate": 6.543089876103772e-05, "loss": 0.6326, "step": 26002 }, { "epoch": 1.761840233078122, "grad_norm": 5.251434803009033, "learning_rate": 6.54295297419399e-05, "loss": 0.6659, "step": 26003 }, { "epoch": 1.7619079883460937, "grad_norm": 5.828359603881836, "learning_rate": 6.54281607228421e-05, "loss": 0.5814, "step": 26004 }, { "epoch": 1.761975743614066, "grad_norm": 7.657083988189697, "learning_rate": 6.542679170374428e-05, "loss": 0.8882, "step": 26005 }, { "epoch": 1.7620434988820382, "grad_norm": 6.106318950653076, "learning_rate": 6.542542268464646e-05, "loss": 0.7612, "step": 26006 }, { "epoch": 1.7621112541500101, "grad_norm": 5.010497570037842, "learning_rate": 6.542405366554864e-05, "loss": 0.5696, "step": 26007 }, { "epoch": 1.7621790094179821, "grad_norm": 8.421164512634277, "learning_rate": 6.542268464645082e-05, "loss": 0.7194, "step": 26008 }, { "epoch": 1.7622467646859543, "grad_norm": 7.1616902351379395, "learning_rate": 6.542131562735301e-05, "loss": 0.6983, "step": 26009 }, { "epoch": 1.7623145199539265, "grad_norm": 9.530068397521973, "learning_rate": 6.541994660825519e-05, "loss": 0.5873, "step": 26010 }, { "epoch": 1.7623822752218985, "grad_norm": 9.944863319396973, "learning_rate": 6.541857758915737e-05, "loss": 0.6993, "step": 26011 }, { "epoch": 1.7624500304898705, "grad_norm": 5.367223262786865, "learning_rate": 6.541720857005955e-05, "loss": 0.674, "step": 26012 }, { "epoch": 1.7625177857578427, "grad_norm": 6.0474138259887695, "learning_rate": 6.541583955096175e-05, "loss": 0.6119, "step": 26013 }, { "epoch": 1.7625855410258149, "grad_norm": 6.8727569580078125, "learning_rate": 6.541447053186393e-05, "loss": 0.6148, "step": 26014 }, { "epoch": 1.7626532962937869, "grad_norm": 8.032042503356934, "learning_rate": 6.541310151276611e-05, "loss": 0.5611, "step": 26015 }, { "epoch": 1.7627210515617588, "grad_norm": 4.887080192565918, "learning_rate": 6.541173249366829e-05, "loss": 0.5864, "step": 26016 }, { "epoch": 1.762788806829731, "grad_norm": 7.992802143096924, "learning_rate": 6.541036347457047e-05, "loss": 0.7493, "step": 26017 }, { "epoch": 1.7628565620977033, "grad_norm": 4.735798358917236, "learning_rate": 6.540899445547266e-05, "loss": 0.605, "step": 26018 }, { "epoch": 1.7629243173656752, "grad_norm": 6.112973213195801, "learning_rate": 6.540762543637484e-05, "loss": 0.6626, "step": 26019 }, { "epoch": 1.7629920726336472, "grad_norm": 6.254942893981934, "learning_rate": 6.540625641727702e-05, "loss": 0.7196, "step": 26020 }, { "epoch": 1.7630598279016194, "grad_norm": 8.520581245422363, "learning_rate": 6.54048873981792e-05, "loss": 0.4977, "step": 26021 }, { "epoch": 1.7631275831695914, "grad_norm": 5.8406901359558105, "learning_rate": 6.54035183790814e-05, "loss": 0.7756, "step": 26022 }, { "epoch": 1.7631953384375634, "grad_norm": 10.93246841430664, "learning_rate": 6.540214935998358e-05, "loss": 0.8368, "step": 26023 }, { "epoch": 1.7632630937055356, "grad_norm": 6.0172224044799805, "learning_rate": 6.540078034088576e-05, "loss": 0.5544, "step": 26024 }, { "epoch": 1.7633308489735078, "grad_norm": 5.282917499542236, "learning_rate": 6.539941132178794e-05, "loss": 0.7205, "step": 26025 }, { "epoch": 1.7633986042414798, "grad_norm": 10.382974624633789, "learning_rate": 6.539804230269012e-05, "loss": 0.4527, "step": 26026 }, { "epoch": 1.7634663595094517, "grad_norm": 5.5675764083862305, "learning_rate": 6.539667328359231e-05, "loss": 0.6895, "step": 26027 }, { "epoch": 1.763534114777424, "grad_norm": 5.941452503204346, "learning_rate": 6.539530426449449e-05, "loss": 0.5432, "step": 26028 }, { "epoch": 1.7636018700453961, "grad_norm": 7.118484973907471, "learning_rate": 6.539393524539667e-05, "loss": 0.8546, "step": 26029 }, { "epoch": 1.7636696253133681, "grad_norm": 5.190835475921631, "learning_rate": 6.539256622629885e-05, "loss": 0.5955, "step": 26030 }, { "epoch": 1.76373738058134, "grad_norm": 6.062416076660156, "learning_rate": 6.539119720720103e-05, "loss": 0.6003, "step": 26031 }, { "epoch": 1.7638051358493123, "grad_norm": 5.884268283843994, "learning_rate": 6.538982818810323e-05, "loss": 0.4987, "step": 26032 }, { "epoch": 1.7638728911172845, "grad_norm": 11.770475387573242, "learning_rate": 6.538845916900541e-05, "loss": 0.5638, "step": 26033 }, { "epoch": 1.7639406463852565, "grad_norm": 5.32235860824585, "learning_rate": 6.538709014990759e-05, "loss": 0.6356, "step": 26034 }, { "epoch": 1.7640084016532285, "grad_norm": 5.284451961517334, "learning_rate": 6.538572113080977e-05, "loss": 0.8046, "step": 26035 }, { "epoch": 1.7640761569212007, "grad_norm": 6.055844306945801, "learning_rate": 6.538435211171196e-05, "loss": 0.5743, "step": 26036 }, { "epoch": 1.7641439121891727, "grad_norm": 4.777540683746338, "learning_rate": 6.538298309261414e-05, "loss": 0.6828, "step": 26037 }, { "epoch": 1.7642116674571446, "grad_norm": 5.87257194519043, "learning_rate": 6.538161407351632e-05, "loss": 0.743, "step": 26038 }, { "epoch": 1.7642794227251168, "grad_norm": 5.612257957458496, "learning_rate": 6.53802450544185e-05, "loss": 0.7301, "step": 26039 }, { "epoch": 1.764347177993089, "grad_norm": 4.901982307434082, "learning_rate": 6.53788760353207e-05, "loss": 0.697, "step": 26040 }, { "epoch": 1.764414933261061, "grad_norm": 4.504999160766602, "learning_rate": 6.537750701622288e-05, "loss": 0.5678, "step": 26041 }, { "epoch": 1.764482688529033, "grad_norm": 7.39354944229126, "learning_rate": 6.537613799712506e-05, "loss": 0.5451, "step": 26042 }, { "epoch": 1.7645504437970052, "grad_norm": 5.382939338684082, "learning_rate": 6.537476897802725e-05, "loss": 0.7785, "step": 26043 }, { "epoch": 1.7646181990649774, "grad_norm": 5.103958606719971, "learning_rate": 6.537339995892943e-05, "loss": 0.6338, "step": 26044 }, { "epoch": 1.7646859543329494, "grad_norm": 7.330012321472168, "learning_rate": 6.537203093983161e-05, "loss": 0.7256, "step": 26045 }, { "epoch": 1.7647537096009214, "grad_norm": 7.930817127227783, "learning_rate": 6.53706619207338e-05, "loss": 0.4925, "step": 26046 }, { "epoch": 1.7648214648688936, "grad_norm": 4.290006637573242, "learning_rate": 6.536929290163599e-05, "loss": 0.6182, "step": 26047 }, { "epoch": 1.7648892201368658, "grad_norm": 7.0014424324035645, "learning_rate": 6.536792388253817e-05, "loss": 0.4437, "step": 26048 }, { "epoch": 1.7649569754048378, "grad_norm": 6.501501560211182, "learning_rate": 6.536655486344035e-05, "loss": 0.6309, "step": 26049 }, { "epoch": 1.7650247306728097, "grad_norm": 9.471246719360352, "learning_rate": 6.536518584434254e-05, "loss": 0.5698, "step": 26050 }, { "epoch": 1.765092485940782, "grad_norm": 6.281744480133057, "learning_rate": 6.536381682524472e-05, "loss": 0.6205, "step": 26051 }, { "epoch": 1.7651602412087541, "grad_norm": 4.783389568328857, "learning_rate": 6.53624478061469e-05, "loss": 0.5099, "step": 26052 }, { "epoch": 1.765227996476726, "grad_norm": 9.8426513671875, "learning_rate": 6.536107878704908e-05, "loss": 0.678, "step": 26053 }, { "epoch": 1.765295751744698, "grad_norm": 5.765985012054443, "learning_rate": 6.535970976795128e-05, "loss": 0.6766, "step": 26054 }, { "epoch": 1.7653635070126703, "grad_norm": 4.655985355377197, "learning_rate": 6.535834074885346e-05, "loss": 0.5367, "step": 26055 }, { "epoch": 1.7654312622806423, "grad_norm": 5.177961826324463, "learning_rate": 6.535697172975564e-05, "loss": 0.5559, "step": 26056 }, { "epoch": 1.7654990175486143, "grad_norm": 5.326662540435791, "learning_rate": 6.535560271065782e-05, "loss": 0.7873, "step": 26057 }, { "epoch": 1.7655667728165865, "grad_norm": 7.835726737976074, "learning_rate": 6.535423369156e-05, "loss": 0.5462, "step": 26058 }, { "epoch": 1.7656345280845587, "grad_norm": 4.577127933502197, "learning_rate": 6.535286467246219e-05, "loss": 0.7398, "step": 26059 }, { "epoch": 1.7657022833525307, "grad_norm": 5.871502876281738, "learning_rate": 6.535149565336437e-05, "loss": 0.7327, "step": 26060 }, { "epoch": 1.7657700386205026, "grad_norm": 5.115939617156982, "learning_rate": 6.535012663426655e-05, "loss": 0.6191, "step": 26061 }, { "epoch": 1.7658377938884748, "grad_norm": 3.876553535461426, "learning_rate": 6.534875761516873e-05, "loss": 0.5889, "step": 26062 }, { "epoch": 1.765905549156447, "grad_norm": 5.687398910522461, "learning_rate": 6.534738859607091e-05, "loss": 0.6737, "step": 26063 }, { "epoch": 1.765973304424419, "grad_norm": 5.404374122619629, "learning_rate": 6.534601957697311e-05, "loss": 0.744, "step": 26064 }, { "epoch": 1.766041059692391, "grad_norm": 5.287882328033447, "learning_rate": 6.534465055787529e-05, "loss": 0.696, "step": 26065 }, { "epoch": 1.7661088149603632, "grad_norm": 4.97260856628418, "learning_rate": 6.534328153877747e-05, "loss": 0.7562, "step": 26066 }, { "epoch": 1.7661765702283354, "grad_norm": 6.113078594207764, "learning_rate": 6.534191251967965e-05, "loss": 0.6772, "step": 26067 }, { "epoch": 1.7662443254963074, "grad_norm": 6.604419708251953, "learning_rate": 6.534054350058184e-05, "loss": 0.6028, "step": 26068 }, { "epoch": 1.7663120807642794, "grad_norm": 6.609261512756348, "learning_rate": 6.533917448148402e-05, "loss": 0.8738, "step": 26069 }, { "epoch": 1.7663798360322516, "grad_norm": 5.951005458831787, "learning_rate": 6.53378054623862e-05, "loss": 0.5643, "step": 26070 }, { "epoch": 1.7664475913002236, "grad_norm": 7.544666290283203, "learning_rate": 6.533643644328838e-05, "loss": 0.5708, "step": 26071 }, { "epoch": 1.7665153465681955, "grad_norm": 4.604458332061768, "learning_rate": 6.533506742419056e-05, "loss": 0.5754, "step": 26072 }, { "epoch": 1.7665831018361677, "grad_norm": 4.461895942687988, "learning_rate": 6.533369840509276e-05, "loss": 0.6377, "step": 26073 }, { "epoch": 1.76665085710414, "grad_norm": 4.388378620147705, "learning_rate": 6.533232938599494e-05, "loss": 0.5078, "step": 26074 }, { "epoch": 1.766718612372112, "grad_norm": 5.902589797973633, "learning_rate": 6.533096036689712e-05, "loss": 0.6055, "step": 26075 }, { "epoch": 1.766786367640084, "grad_norm": 4.487920761108398, "learning_rate": 6.53295913477993e-05, "loss": 0.6244, "step": 26076 }, { "epoch": 1.766854122908056, "grad_norm": 8.149316787719727, "learning_rate": 6.532822232870149e-05, "loss": 0.6511, "step": 26077 }, { "epoch": 1.7669218781760283, "grad_norm": 6.544806003570557, "learning_rate": 6.532685330960367e-05, "loss": 0.6353, "step": 26078 }, { "epoch": 1.7669896334440003, "grad_norm": 8.300387382507324, "learning_rate": 6.532548429050585e-05, "loss": 0.7692, "step": 26079 }, { "epoch": 1.7670573887119723, "grad_norm": 6.352061748504639, "learning_rate": 6.532411527140803e-05, "loss": 0.7005, "step": 26080 }, { "epoch": 1.7671251439799445, "grad_norm": 5.491880893707275, "learning_rate": 6.532274625231021e-05, "loss": 0.7571, "step": 26081 }, { "epoch": 1.7671928992479167, "grad_norm": 5.847635269165039, "learning_rate": 6.532137723321241e-05, "loss": 0.6368, "step": 26082 }, { "epoch": 1.7672606545158887, "grad_norm": 5.043829441070557, "learning_rate": 6.532000821411459e-05, "loss": 0.6826, "step": 26083 }, { "epoch": 1.7673284097838606, "grad_norm": 4.656184196472168, "learning_rate": 6.531863919501677e-05, "loss": 0.6134, "step": 26084 }, { "epoch": 1.7673961650518328, "grad_norm": 5.013433933258057, "learning_rate": 6.531727017591895e-05, "loss": 0.4294, "step": 26085 }, { "epoch": 1.7674639203198048, "grad_norm": 4.183717727661133, "learning_rate": 6.531590115682114e-05, "loss": 0.5381, "step": 26086 }, { "epoch": 1.7675316755877768, "grad_norm": 6.94572639465332, "learning_rate": 6.531453213772332e-05, "loss": 0.7359, "step": 26087 }, { "epoch": 1.767599430855749, "grad_norm": 5.487498760223389, "learning_rate": 6.53131631186255e-05, "loss": 0.5451, "step": 26088 }, { "epoch": 1.7676671861237212, "grad_norm": 8.08267879486084, "learning_rate": 6.53117940995277e-05, "loss": 0.7425, "step": 26089 }, { "epoch": 1.7677349413916932, "grad_norm": 3.9045464992523193, "learning_rate": 6.531042508042988e-05, "loss": 0.5496, "step": 26090 }, { "epoch": 1.7678026966596652, "grad_norm": 5.489101409912109, "learning_rate": 6.530905606133206e-05, "loss": 0.6907, "step": 26091 }, { "epoch": 1.7678704519276374, "grad_norm": 6.696456432342529, "learning_rate": 6.530768704223425e-05, "loss": 0.6272, "step": 26092 }, { "epoch": 1.7679382071956096, "grad_norm": 6.150305271148682, "learning_rate": 6.530631802313643e-05, "loss": 0.7311, "step": 26093 }, { "epoch": 1.7680059624635815, "grad_norm": 4.522466659545898, "learning_rate": 6.530494900403861e-05, "loss": 0.4945, "step": 26094 }, { "epoch": 1.7680737177315535, "grad_norm": 8.462139129638672, "learning_rate": 6.530357998494079e-05, "loss": 0.8046, "step": 26095 }, { "epoch": 1.7681414729995257, "grad_norm": 3.515920877456665, "learning_rate": 6.530221096584299e-05, "loss": 0.4933, "step": 26096 }, { "epoch": 1.768209228267498, "grad_norm": 5.681598663330078, "learning_rate": 6.530084194674517e-05, "loss": 0.6736, "step": 26097 }, { "epoch": 1.76827698353547, "grad_norm": 6.083437919616699, "learning_rate": 6.529947292764735e-05, "loss": 0.6017, "step": 26098 }, { "epoch": 1.768344738803442, "grad_norm": 4.953409194946289, "learning_rate": 6.529810390854953e-05, "loss": 0.7449, "step": 26099 }, { "epoch": 1.768412494071414, "grad_norm": 5.798496723175049, "learning_rate": 6.529673488945172e-05, "loss": 0.8422, "step": 26100 }, { "epoch": 1.7684802493393863, "grad_norm": 5.354043483734131, "learning_rate": 6.52953658703539e-05, "loss": 0.5749, "step": 26101 }, { "epoch": 1.768548004607358, "grad_norm": 7.558041095733643, "learning_rate": 6.529399685125608e-05, "loss": 0.7098, "step": 26102 }, { "epoch": 1.7686157598753303, "grad_norm": 7.559915065765381, "learning_rate": 6.529262783215826e-05, "loss": 0.8049, "step": 26103 }, { "epoch": 1.7686835151433025, "grad_norm": 7.357734203338623, "learning_rate": 6.529125881306044e-05, "loss": 0.6352, "step": 26104 }, { "epoch": 1.7687512704112744, "grad_norm": 6.6557512283325195, "learning_rate": 6.528988979396264e-05, "loss": 0.8462, "step": 26105 }, { "epoch": 1.7688190256792464, "grad_norm": 4.447732448577881, "learning_rate": 6.528852077486482e-05, "loss": 0.6324, "step": 26106 }, { "epoch": 1.7688867809472186, "grad_norm": 6.347873687744141, "learning_rate": 6.5287151755767e-05, "loss": 0.7366, "step": 26107 }, { "epoch": 1.7689545362151908, "grad_norm": 6.684083938598633, "learning_rate": 6.528578273666918e-05, "loss": 0.7047, "step": 26108 }, { "epoch": 1.7690222914831628, "grad_norm": 4.173487663269043, "learning_rate": 6.528441371757136e-05, "loss": 0.5987, "step": 26109 }, { "epoch": 1.7690900467511348, "grad_norm": 8.687338829040527, "learning_rate": 6.528304469847355e-05, "loss": 0.763, "step": 26110 }, { "epoch": 1.769157802019107, "grad_norm": 6.043261528015137, "learning_rate": 6.528167567937573e-05, "loss": 0.7625, "step": 26111 }, { "epoch": 1.7692255572870792, "grad_norm": 6.925621032714844, "learning_rate": 6.528030666027791e-05, "loss": 0.6787, "step": 26112 }, { "epoch": 1.7692933125550512, "grad_norm": 4.519059181213379, "learning_rate": 6.527893764118009e-05, "loss": 0.5957, "step": 26113 }, { "epoch": 1.7693610678230232, "grad_norm": 4.737812042236328, "learning_rate": 6.527756862208229e-05, "loss": 0.5229, "step": 26114 }, { "epoch": 1.7694288230909954, "grad_norm": 9.810925483703613, "learning_rate": 6.527619960298447e-05, "loss": 0.667, "step": 26115 }, { "epoch": 1.7694965783589676, "grad_norm": 5.9565110206604, "learning_rate": 6.527483058388665e-05, "loss": 0.6851, "step": 26116 }, { "epoch": 1.7695643336269395, "grad_norm": 8.545305252075195, "learning_rate": 6.527346156478883e-05, "loss": 0.7418, "step": 26117 }, { "epoch": 1.7696320888949115, "grad_norm": 7.720947265625, "learning_rate": 6.527209254569101e-05, "loss": 0.8674, "step": 26118 }, { "epoch": 1.7696998441628837, "grad_norm": 4.52553653717041, "learning_rate": 6.52707235265932e-05, "loss": 0.7093, "step": 26119 }, { "epoch": 1.7697675994308557, "grad_norm": 7.120404243469238, "learning_rate": 6.526935450749538e-05, "loss": 0.6471, "step": 26120 }, { "epoch": 1.7698353546988277, "grad_norm": 4.984893321990967, "learning_rate": 6.526798548839756e-05, "loss": 0.7788, "step": 26121 }, { "epoch": 1.7699031099668, "grad_norm": 5.900517463684082, "learning_rate": 6.526661646929974e-05, "loss": 0.5472, "step": 26122 }, { "epoch": 1.769970865234772, "grad_norm": 6.761430263519287, "learning_rate": 6.526524745020194e-05, "loss": 0.7416, "step": 26123 }, { "epoch": 1.770038620502744, "grad_norm": 7.630609035491943, "learning_rate": 6.526387843110412e-05, "loss": 0.841, "step": 26124 }, { "epoch": 1.770106375770716, "grad_norm": 8.53343391418457, "learning_rate": 6.52625094120063e-05, "loss": 0.5926, "step": 26125 }, { "epoch": 1.7701741310386883, "grad_norm": 9.503962516784668, "learning_rate": 6.526114039290848e-05, "loss": 0.5311, "step": 26126 }, { "epoch": 1.7702418863066605, "grad_norm": 4.967596530914307, "learning_rate": 6.525977137381066e-05, "loss": 0.6613, "step": 26127 }, { "epoch": 1.7703096415746324, "grad_norm": 4.744167327880859, "learning_rate": 6.525840235471285e-05, "loss": 0.413, "step": 26128 }, { "epoch": 1.7703773968426044, "grad_norm": 7.1107330322265625, "learning_rate": 6.525703333561503e-05, "loss": 0.5792, "step": 26129 }, { "epoch": 1.7704451521105766, "grad_norm": 4.659130573272705, "learning_rate": 6.525566431651721e-05, "loss": 0.6459, "step": 26130 }, { "epoch": 1.7705129073785488, "grad_norm": 6.676963806152344, "learning_rate": 6.525429529741939e-05, "loss": 0.7779, "step": 26131 }, { "epoch": 1.7705806626465208, "grad_norm": 5.8365302085876465, "learning_rate": 6.525292627832159e-05, "loss": 0.5228, "step": 26132 }, { "epoch": 1.7706484179144928, "grad_norm": 5.15347957611084, "learning_rate": 6.525155725922377e-05, "loss": 0.9751, "step": 26133 }, { "epoch": 1.770716173182465, "grad_norm": 5.298765182495117, "learning_rate": 6.525018824012595e-05, "loss": 0.6286, "step": 26134 }, { "epoch": 1.770783928450437, "grad_norm": 4.704442024230957, "learning_rate": 6.524881922102813e-05, "loss": 0.6309, "step": 26135 }, { "epoch": 1.770851683718409, "grad_norm": 7.75271463394165, "learning_rate": 6.524745020193032e-05, "loss": 0.6694, "step": 26136 }, { "epoch": 1.7709194389863812, "grad_norm": 5.687178134918213, "learning_rate": 6.52460811828325e-05, "loss": 0.5857, "step": 26137 }, { "epoch": 1.7709871942543534, "grad_norm": 5.414909839630127, "learning_rate": 6.524471216373468e-05, "loss": 0.5293, "step": 26138 }, { "epoch": 1.7710549495223253, "grad_norm": 4.726587295532227, "learning_rate": 6.524334314463688e-05, "loss": 0.8845, "step": 26139 }, { "epoch": 1.7711227047902973, "grad_norm": 6.3334126472473145, "learning_rate": 6.524197412553906e-05, "loss": 0.6752, "step": 26140 }, { "epoch": 1.7711904600582695, "grad_norm": 4.45259428024292, "learning_rate": 6.524060510644124e-05, "loss": 0.7507, "step": 26141 }, { "epoch": 1.7712582153262417, "grad_norm": 7.780733585357666, "learning_rate": 6.523923608734343e-05, "loss": 0.7182, "step": 26142 }, { "epoch": 1.7713259705942137, "grad_norm": 5.367558479309082, "learning_rate": 6.523786706824561e-05, "loss": 0.593, "step": 26143 }, { "epoch": 1.7713937258621857, "grad_norm": 5.670482635498047, "learning_rate": 6.523649804914779e-05, "loss": 0.6147, "step": 26144 }, { "epoch": 1.7714614811301579, "grad_norm": 5.766420364379883, "learning_rate": 6.523512903004997e-05, "loss": 0.5324, "step": 26145 }, { "epoch": 1.77152923639813, "grad_norm": 5.280393123626709, "learning_rate": 6.523376001095217e-05, "loss": 0.5168, "step": 26146 }, { "epoch": 1.771596991666102, "grad_norm": 6.21720552444458, "learning_rate": 6.523239099185435e-05, "loss": 0.7388, "step": 26147 }, { "epoch": 1.771664746934074, "grad_norm": 5.296938419342041, "learning_rate": 6.523102197275653e-05, "loss": 0.5421, "step": 26148 }, { "epoch": 1.7717325022020463, "grad_norm": 5.694173812866211, "learning_rate": 6.522965295365871e-05, "loss": 0.594, "step": 26149 }, { "epoch": 1.7718002574700185, "grad_norm": 6.852183818817139, "learning_rate": 6.522828393456089e-05, "loss": 0.6218, "step": 26150 }, { "epoch": 1.7718680127379902, "grad_norm": 4.314418792724609, "learning_rate": 6.522691491546308e-05, "loss": 0.7649, "step": 26151 }, { "epoch": 1.7719357680059624, "grad_norm": 8.232078552246094, "learning_rate": 6.522554589636526e-05, "loss": 0.5937, "step": 26152 }, { "epoch": 1.7720035232739346, "grad_norm": 7.118101596832275, "learning_rate": 6.522417687726744e-05, "loss": 0.672, "step": 26153 }, { "epoch": 1.7720712785419066, "grad_norm": 5.644408702850342, "learning_rate": 6.522280785816962e-05, "loss": 0.6611, "step": 26154 }, { "epoch": 1.7721390338098786, "grad_norm": 8.544456481933594, "learning_rate": 6.522143883907182e-05, "loss": 0.5539, "step": 26155 }, { "epoch": 1.7722067890778508, "grad_norm": 5.19674825668335, "learning_rate": 6.5220069819974e-05, "loss": 0.5238, "step": 26156 }, { "epoch": 1.772274544345823, "grad_norm": 4.830060005187988, "learning_rate": 6.521870080087618e-05, "loss": 0.71, "step": 26157 }, { "epoch": 1.772342299613795, "grad_norm": 5.08637809753418, "learning_rate": 6.521733178177836e-05, "loss": 0.8073, "step": 26158 }, { "epoch": 1.772410054881767, "grad_norm": 6.047713756561279, "learning_rate": 6.521596276268054e-05, "loss": 0.5291, "step": 26159 }, { "epoch": 1.7724778101497392, "grad_norm": 7.1006646156311035, "learning_rate": 6.521459374358273e-05, "loss": 0.5221, "step": 26160 }, { "epoch": 1.7725455654177114, "grad_norm": 5.528575420379639, "learning_rate": 6.521322472448491e-05, "loss": 0.5233, "step": 26161 }, { "epoch": 1.7726133206856833, "grad_norm": 5.498785495758057, "learning_rate": 6.521185570538709e-05, "loss": 0.5493, "step": 26162 }, { "epoch": 1.7726810759536553, "grad_norm": 5.042159080505371, "learning_rate": 6.521048668628927e-05, "loss": 0.5306, "step": 26163 }, { "epoch": 1.7727488312216275, "grad_norm": 5.094602584838867, "learning_rate": 6.520911766719145e-05, "loss": 0.5878, "step": 26164 }, { "epoch": 1.7728165864895997, "grad_norm": 5.207982540130615, "learning_rate": 6.520774864809365e-05, "loss": 0.7632, "step": 26165 }, { "epoch": 1.7728843417575717, "grad_norm": 7.10983419418335, "learning_rate": 6.520637962899583e-05, "loss": 0.5854, "step": 26166 }, { "epoch": 1.7729520970255437, "grad_norm": 4.622167587280273, "learning_rate": 6.520501060989801e-05, "loss": 0.5575, "step": 26167 }, { "epoch": 1.7730198522935159, "grad_norm": 11.48950481414795, "learning_rate": 6.520364159080019e-05, "loss": 0.5127, "step": 26168 }, { "epoch": 1.7730876075614879, "grad_norm": 7.422914505004883, "learning_rate": 6.520227257170238e-05, "loss": 0.6737, "step": 26169 }, { "epoch": 1.7731553628294598, "grad_norm": 6.35203218460083, "learning_rate": 6.520090355260456e-05, "loss": 0.7607, "step": 26170 }, { "epoch": 1.773223118097432, "grad_norm": 5.909194469451904, "learning_rate": 6.519953453350674e-05, "loss": 0.649, "step": 26171 }, { "epoch": 1.7732908733654043, "grad_norm": 4.9144697189331055, "learning_rate": 6.519816551440892e-05, "loss": 0.6652, "step": 26172 }, { "epoch": 1.7733586286333762, "grad_norm": 8.570989608764648, "learning_rate": 6.51967964953111e-05, "loss": 0.5561, "step": 26173 }, { "epoch": 1.7734263839013482, "grad_norm": 4.87061071395874, "learning_rate": 6.51954274762133e-05, "loss": 0.5939, "step": 26174 }, { "epoch": 1.7734941391693204, "grad_norm": 6.447973251342773, "learning_rate": 6.519405845711548e-05, "loss": 0.4696, "step": 26175 }, { "epoch": 1.7735618944372926, "grad_norm": 5.259974479675293, "learning_rate": 6.519268943801766e-05, "loss": 0.8093, "step": 26176 }, { "epoch": 1.7736296497052646, "grad_norm": 5.42963171005249, "learning_rate": 6.519132041891984e-05, "loss": 0.7018, "step": 26177 }, { "epoch": 1.7736974049732366, "grad_norm": 5.374091625213623, "learning_rate": 6.518995139982203e-05, "loss": 0.7823, "step": 26178 }, { "epoch": 1.7737651602412088, "grad_norm": 5.888201713562012, "learning_rate": 6.518858238072421e-05, "loss": 0.5387, "step": 26179 }, { "epoch": 1.773832915509181, "grad_norm": 6.8430070877075195, "learning_rate": 6.518721336162639e-05, "loss": 0.5932, "step": 26180 }, { "epoch": 1.773900670777153, "grad_norm": 5.4822869300842285, "learning_rate": 6.518584434252857e-05, "loss": 0.6067, "step": 26181 }, { "epoch": 1.773968426045125, "grad_norm": 6.931448459625244, "learning_rate": 6.518447532343077e-05, "loss": 0.6294, "step": 26182 }, { "epoch": 1.7740361813130971, "grad_norm": 8.675823211669922, "learning_rate": 6.518310630433295e-05, "loss": 0.8787, "step": 26183 }, { "epoch": 1.7741039365810691, "grad_norm": 7.754086017608643, "learning_rate": 6.518173728523513e-05, "loss": 0.6469, "step": 26184 }, { "epoch": 1.774171691849041, "grad_norm": 8.591416358947754, "learning_rate": 6.518036826613732e-05, "loss": 0.7977, "step": 26185 }, { "epoch": 1.7742394471170133, "grad_norm": 5.790739059448242, "learning_rate": 6.51789992470395e-05, "loss": 0.5929, "step": 26186 }, { "epoch": 1.7743072023849855, "grad_norm": 5.52567720413208, "learning_rate": 6.517763022794168e-05, "loss": 0.9134, "step": 26187 }, { "epoch": 1.7743749576529575, "grad_norm": 7.404950141906738, "learning_rate": 6.517626120884388e-05, "loss": 0.6455, "step": 26188 }, { "epoch": 1.7744427129209295, "grad_norm": 5.871044158935547, "learning_rate": 6.517489218974606e-05, "loss": 0.6736, "step": 26189 }, { "epoch": 1.7745104681889017, "grad_norm": 4.9674248695373535, "learning_rate": 6.517352317064824e-05, "loss": 0.8013, "step": 26190 }, { "epoch": 1.7745782234568739, "grad_norm": 3.70339298248291, "learning_rate": 6.517215415155042e-05, "loss": 0.5647, "step": 26191 }, { "epoch": 1.7746459787248459, "grad_norm": 7.396569728851318, "learning_rate": 6.517078513245261e-05, "loss": 0.7312, "step": 26192 }, { "epoch": 1.7747137339928178, "grad_norm": 4.398367404937744, "learning_rate": 6.516941611335479e-05, "loss": 0.5974, "step": 26193 }, { "epoch": 1.77478148926079, "grad_norm": 6.495832443237305, "learning_rate": 6.516804709425697e-05, "loss": 0.9031, "step": 26194 }, { "epoch": 1.7748492445287622, "grad_norm": 5.91717529296875, "learning_rate": 6.516667807515915e-05, "loss": 0.7518, "step": 26195 }, { "epoch": 1.7749169997967342, "grad_norm": 6.6416730880737305, "learning_rate": 6.516530905606133e-05, "loss": 0.6949, "step": 26196 }, { "epoch": 1.7749847550647062, "grad_norm": 6.709065914154053, "learning_rate": 6.516394003696353e-05, "loss": 0.5256, "step": 26197 }, { "epoch": 1.7750525103326784, "grad_norm": 6.62344217300415, "learning_rate": 6.51625710178657e-05, "loss": 0.6235, "step": 26198 }, { "epoch": 1.7751202656006506, "grad_norm": 7.148166179656982, "learning_rate": 6.516120199876789e-05, "loss": 0.733, "step": 26199 }, { "epoch": 1.7751880208686224, "grad_norm": 4.76890754699707, "learning_rate": 6.515983297967007e-05, "loss": 0.4758, "step": 26200 }, { "epoch": 1.7752557761365946, "grad_norm": 6.986993312835693, "learning_rate": 6.515846396057226e-05, "loss": 0.659, "step": 26201 }, { "epoch": 1.7753235314045668, "grad_norm": 6.321896553039551, "learning_rate": 6.515709494147444e-05, "loss": 0.6797, "step": 26202 }, { "epoch": 1.7753912866725388, "grad_norm": 6.699182987213135, "learning_rate": 6.515572592237662e-05, "loss": 0.5503, "step": 26203 }, { "epoch": 1.7754590419405107, "grad_norm": 6.909707069396973, "learning_rate": 6.51543569032788e-05, "loss": 0.9312, "step": 26204 }, { "epoch": 1.775526797208483, "grad_norm": 7.392991542816162, "learning_rate": 6.515298788418098e-05, "loss": 0.6072, "step": 26205 }, { "epoch": 1.7755945524764551, "grad_norm": 5.242347717285156, "learning_rate": 6.515161886508318e-05, "loss": 0.7417, "step": 26206 }, { "epoch": 1.7756623077444271, "grad_norm": 4.8919148445129395, "learning_rate": 6.515024984598536e-05, "loss": 0.6361, "step": 26207 }, { "epoch": 1.775730063012399, "grad_norm": 4.8805999755859375, "learning_rate": 6.514888082688754e-05, "loss": 0.8457, "step": 26208 }, { "epoch": 1.7757978182803713, "grad_norm": 9.665910720825195, "learning_rate": 6.514751180778972e-05, "loss": 0.5371, "step": 26209 }, { "epoch": 1.7758655735483435, "grad_norm": 6.253188133239746, "learning_rate": 6.514614278869191e-05, "loss": 0.6577, "step": 26210 }, { "epoch": 1.7759333288163155, "grad_norm": 5.116734027862549, "learning_rate": 6.514477376959409e-05, "loss": 0.7176, "step": 26211 }, { "epoch": 1.7760010840842875, "grad_norm": 6.382299900054932, "learning_rate": 6.514340475049627e-05, "loss": 0.8357, "step": 26212 }, { "epoch": 1.7760688393522597, "grad_norm": 4.87203311920166, "learning_rate": 6.514203573139845e-05, "loss": 0.6068, "step": 26213 }, { "epoch": 1.7761365946202319, "grad_norm": 5.826707363128662, "learning_rate": 6.514066671230063e-05, "loss": 0.7918, "step": 26214 }, { "epoch": 1.7762043498882039, "grad_norm": 4.952813625335693, "learning_rate": 6.513929769320283e-05, "loss": 0.5833, "step": 26215 }, { "epoch": 1.7762721051561758, "grad_norm": 8.016603469848633, "learning_rate": 6.5137928674105e-05, "loss": 0.8779, "step": 26216 }, { "epoch": 1.776339860424148, "grad_norm": 7.262516021728516, "learning_rate": 6.513655965500719e-05, "loss": 0.5986, "step": 26217 }, { "epoch": 1.77640761569212, "grad_norm": 6.761621475219727, "learning_rate": 6.513519063590937e-05, "loss": 0.4318, "step": 26218 }, { "epoch": 1.776475370960092, "grad_norm": 4.797771453857422, "learning_rate": 6.513382161681155e-05, "loss": 0.4833, "step": 26219 }, { "epoch": 1.7765431262280642, "grad_norm": 5.192538738250732, "learning_rate": 6.513245259771374e-05, "loss": 0.6104, "step": 26220 }, { "epoch": 1.7766108814960364, "grad_norm": 6.4591145515441895, "learning_rate": 6.513108357861592e-05, "loss": 0.6351, "step": 26221 }, { "epoch": 1.7766786367640084, "grad_norm": 4.765796661376953, "learning_rate": 6.51297145595181e-05, "loss": 0.6718, "step": 26222 }, { "epoch": 1.7767463920319804, "grad_norm": 8.628193855285645, "learning_rate": 6.512834554042028e-05, "loss": 0.6747, "step": 26223 }, { "epoch": 1.7768141472999526, "grad_norm": 7.504236698150635, "learning_rate": 6.512697652132248e-05, "loss": 0.4679, "step": 26224 }, { "epoch": 1.7768819025679248, "grad_norm": 4.729029655456543, "learning_rate": 6.512560750222466e-05, "loss": 0.6914, "step": 26225 }, { "epoch": 1.7769496578358968, "grad_norm": 6.43047571182251, "learning_rate": 6.512423848312684e-05, "loss": 0.6329, "step": 26226 }, { "epoch": 1.7770174131038687, "grad_norm": 6.321591377258301, "learning_rate": 6.512286946402902e-05, "loss": 0.5174, "step": 26227 }, { "epoch": 1.777085168371841, "grad_norm": 10.580113410949707, "learning_rate": 6.512150044493121e-05, "loss": 0.6472, "step": 26228 }, { "epoch": 1.7771529236398131, "grad_norm": 7.805088996887207, "learning_rate": 6.512013142583339e-05, "loss": 0.8694, "step": 26229 }, { "epoch": 1.7772206789077851, "grad_norm": 5.341179847717285, "learning_rate": 6.511876240673557e-05, "loss": 0.828, "step": 26230 }, { "epoch": 1.777288434175757, "grad_norm": 5.669384479522705, "learning_rate": 6.511739338763777e-05, "loss": 0.7664, "step": 26231 }, { "epoch": 1.7773561894437293, "grad_norm": 6.7767133712768555, "learning_rate": 6.511602436853995e-05, "loss": 0.5804, "step": 26232 }, { "epoch": 1.7774239447117013, "grad_norm": 8.451361656188965, "learning_rate": 6.511465534944213e-05, "loss": 0.6138, "step": 26233 }, { "epoch": 1.7774916999796733, "grad_norm": 7.171023845672607, "learning_rate": 6.511328633034432e-05, "loss": 0.6284, "step": 26234 }, { "epoch": 1.7775594552476455, "grad_norm": 5.079052448272705, "learning_rate": 6.51119173112465e-05, "loss": 0.5733, "step": 26235 }, { "epoch": 1.7776272105156177, "grad_norm": 8.652579307556152, "learning_rate": 6.511054829214868e-05, "loss": 0.8178, "step": 26236 }, { "epoch": 1.7776949657835897, "grad_norm": 4.48960018157959, "learning_rate": 6.510917927305086e-05, "loss": 0.5918, "step": 26237 }, { "epoch": 1.7777627210515616, "grad_norm": 5.389339923858643, "learning_rate": 6.510781025395306e-05, "loss": 0.7196, "step": 26238 }, { "epoch": 1.7778304763195338, "grad_norm": 7.6502275466918945, "learning_rate": 6.510644123485524e-05, "loss": 0.4765, "step": 26239 }, { "epoch": 1.777898231587506, "grad_norm": 4.492560386657715, "learning_rate": 6.510507221575742e-05, "loss": 0.5438, "step": 26240 }, { "epoch": 1.777965986855478, "grad_norm": 5.092094898223877, "learning_rate": 6.51037031966596e-05, "loss": 0.7108, "step": 26241 }, { "epoch": 1.77803374212345, "grad_norm": 5.550102233886719, "learning_rate": 6.510233417756179e-05, "loss": 0.5486, "step": 26242 }, { "epoch": 1.7781014973914222, "grad_norm": 5.876474380493164, "learning_rate": 6.510096515846397e-05, "loss": 0.6285, "step": 26243 }, { "epoch": 1.7781692526593944, "grad_norm": 5.287566661834717, "learning_rate": 6.509959613936615e-05, "loss": 0.6298, "step": 26244 }, { "epoch": 1.7782370079273664, "grad_norm": 5.401659965515137, "learning_rate": 6.509822712026833e-05, "loss": 0.5691, "step": 26245 }, { "epoch": 1.7783047631953384, "grad_norm": 5.325063705444336, "learning_rate": 6.509685810117051e-05, "loss": 0.4554, "step": 26246 }, { "epoch": 1.7783725184633106, "grad_norm": 8.371828079223633, "learning_rate": 6.50954890820727e-05, "loss": 0.5348, "step": 26247 }, { "epoch": 1.7784402737312828, "grad_norm": 5.0574951171875, "learning_rate": 6.509412006297489e-05, "loss": 0.572, "step": 26248 }, { "epoch": 1.7785080289992545, "grad_norm": 7.302107810974121, "learning_rate": 6.509275104387707e-05, "loss": 0.782, "step": 26249 }, { "epoch": 1.7785757842672267, "grad_norm": 8.27893352508545, "learning_rate": 6.509138202477925e-05, "loss": 0.6515, "step": 26250 }, { "epoch": 1.778643539535199, "grad_norm": 10.665140151977539, "learning_rate": 6.509001300568143e-05, "loss": 0.724, "step": 26251 }, { "epoch": 1.778711294803171, "grad_norm": 5.2150092124938965, "learning_rate": 6.508864398658362e-05, "loss": 0.5225, "step": 26252 }, { "epoch": 1.778779050071143, "grad_norm": 6.470101833343506, "learning_rate": 6.50872749674858e-05, "loss": 0.4577, "step": 26253 }, { "epoch": 1.778846805339115, "grad_norm": 7.054866790771484, "learning_rate": 6.508590594838798e-05, "loss": 0.602, "step": 26254 }, { "epoch": 1.7789145606070873, "grad_norm": 6.059296607971191, "learning_rate": 6.508453692929016e-05, "loss": 0.6417, "step": 26255 }, { "epoch": 1.7789823158750593, "grad_norm": 5.093813896179199, "learning_rate": 6.508316791019236e-05, "loss": 0.5812, "step": 26256 }, { "epoch": 1.7790500711430313, "grad_norm": 5.006677150726318, "learning_rate": 6.508179889109454e-05, "loss": 0.806, "step": 26257 }, { "epoch": 1.7791178264110035, "grad_norm": 5.218171119689941, "learning_rate": 6.508042987199672e-05, "loss": 0.5814, "step": 26258 }, { "epoch": 1.7791855816789757, "grad_norm": 6.636211395263672, "learning_rate": 6.50790608528989e-05, "loss": 0.6272, "step": 26259 }, { "epoch": 1.7792533369469477, "grad_norm": 4.272821426391602, "learning_rate": 6.507769183380108e-05, "loss": 0.5109, "step": 26260 }, { "epoch": 1.7793210922149196, "grad_norm": 5.905057430267334, "learning_rate": 6.507632281470327e-05, "loss": 0.543, "step": 26261 }, { "epoch": 1.7793888474828918, "grad_norm": 4.297147274017334, "learning_rate": 6.507495379560545e-05, "loss": 0.4999, "step": 26262 }, { "epoch": 1.779456602750864, "grad_norm": 5.627222537994385, "learning_rate": 6.507358477650763e-05, "loss": 0.6131, "step": 26263 }, { "epoch": 1.779524358018836, "grad_norm": 4.548015117645264, "learning_rate": 6.507221575740981e-05, "loss": 0.5991, "step": 26264 }, { "epoch": 1.779592113286808, "grad_norm": 4.507772445678711, "learning_rate": 6.5070846738312e-05, "loss": 0.5305, "step": 26265 }, { "epoch": 1.7796598685547802, "grad_norm": 6.157375812530518, "learning_rate": 6.506947771921419e-05, "loss": 0.7176, "step": 26266 }, { "epoch": 1.7797276238227522, "grad_norm": 7.357705116271973, "learning_rate": 6.506810870011637e-05, "loss": 0.5386, "step": 26267 }, { "epoch": 1.7797953790907242, "grad_norm": 5.527599811553955, "learning_rate": 6.506673968101855e-05, "loss": 0.4654, "step": 26268 }, { "epoch": 1.7798631343586964, "grad_norm": 6.004730224609375, "learning_rate": 6.506537066192073e-05, "loss": 0.72, "step": 26269 }, { "epoch": 1.7799308896266686, "grad_norm": 5.90570592880249, "learning_rate": 6.506400164282292e-05, "loss": 0.6022, "step": 26270 }, { "epoch": 1.7799986448946405, "grad_norm": 6.858707427978516, "learning_rate": 6.50626326237251e-05, "loss": 0.6354, "step": 26271 }, { "epoch": 1.7800664001626125, "grad_norm": 5.700782299041748, "learning_rate": 6.506126360462728e-05, "loss": 0.5334, "step": 26272 }, { "epoch": 1.7801341554305847, "grad_norm": 5.820164680480957, "learning_rate": 6.505989458552946e-05, "loss": 0.5211, "step": 26273 }, { "epoch": 1.780201910698557, "grad_norm": 5.285854816436768, "learning_rate": 6.505852556643166e-05, "loss": 0.5345, "step": 26274 }, { "epoch": 1.780269665966529, "grad_norm": 6.2082929611206055, "learning_rate": 6.505715654733384e-05, "loss": 0.5069, "step": 26275 }, { "epoch": 1.780337421234501, "grad_norm": 9.771315574645996, "learning_rate": 6.505578752823602e-05, "loss": 0.6025, "step": 26276 }, { "epoch": 1.780405176502473, "grad_norm": 6.296723365783691, "learning_rate": 6.505441850913821e-05, "loss": 0.6225, "step": 26277 }, { "epoch": 1.7804729317704453, "grad_norm": 4.941160678863525, "learning_rate": 6.505304949004039e-05, "loss": 0.7894, "step": 26278 }, { "epoch": 1.7805406870384173, "grad_norm": 6.271059989929199, "learning_rate": 6.505168047094257e-05, "loss": 0.5523, "step": 26279 }, { "epoch": 1.7806084423063893, "grad_norm": 5.674217700958252, "learning_rate": 6.505031145184477e-05, "loss": 0.5092, "step": 26280 }, { "epoch": 1.7806761975743615, "grad_norm": 4.911231994628906, "learning_rate": 6.504894243274695e-05, "loss": 0.6116, "step": 26281 }, { "epoch": 1.7807439528423334, "grad_norm": 4.3681416511535645, "learning_rate": 6.504757341364913e-05, "loss": 0.5119, "step": 26282 }, { "epoch": 1.7808117081103054, "grad_norm": 5.417566776275635, "learning_rate": 6.50462043945513e-05, "loss": 0.8336, "step": 26283 }, { "epoch": 1.7808794633782776, "grad_norm": 4.877775192260742, "learning_rate": 6.50448353754535e-05, "loss": 0.5978, "step": 26284 }, { "epoch": 1.7809472186462498, "grad_norm": 4.768303394317627, "learning_rate": 6.504346635635568e-05, "loss": 0.6708, "step": 26285 }, { "epoch": 1.7810149739142218, "grad_norm": 5.215206623077393, "learning_rate": 6.504209733725786e-05, "loss": 0.698, "step": 26286 }, { "epoch": 1.7810827291821938, "grad_norm": 4.246512413024902, "learning_rate": 6.504072831816004e-05, "loss": 0.4556, "step": 26287 }, { "epoch": 1.781150484450166, "grad_norm": 5.616784572601318, "learning_rate": 6.503935929906223e-05, "loss": 0.5392, "step": 26288 }, { "epoch": 1.7812182397181382, "grad_norm": 7.141454696655273, "learning_rate": 6.503799027996442e-05, "loss": 0.5588, "step": 26289 }, { "epoch": 1.7812859949861102, "grad_norm": 6.097990989685059, "learning_rate": 6.50366212608666e-05, "loss": 0.7071, "step": 26290 }, { "epoch": 1.7813537502540822, "grad_norm": 5.133143424987793, "learning_rate": 6.503525224176878e-05, "loss": 0.7326, "step": 26291 }, { "epoch": 1.7814215055220544, "grad_norm": 5.6267218589782715, "learning_rate": 6.503388322267096e-05, "loss": 0.7864, "step": 26292 }, { "epoch": 1.7814892607900266, "grad_norm": 4.385286808013916, "learning_rate": 6.503251420357315e-05, "loss": 0.5803, "step": 26293 }, { "epoch": 1.7815570160579985, "grad_norm": 5.683904647827148, "learning_rate": 6.503114518447533e-05, "loss": 0.7281, "step": 26294 }, { "epoch": 1.7816247713259705, "grad_norm": 7.223691463470459, "learning_rate": 6.502977616537751e-05, "loss": 0.6332, "step": 26295 }, { "epoch": 1.7816925265939427, "grad_norm": 6.437406539916992, "learning_rate": 6.502840714627969e-05, "loss": 0.7518, "step": 26296 }, { "epoch": 1.781760281861915, "grad_norm": 5.91798734664917, "learning_rate": 6.502703812718187e-05, "loss": 0.6554, "step": 26297 }, { "epoch": 1.7818280371298867, "grad_norm": 4.883409023284912, "learning_rate": 6.502566910808407e-05, "loss": 0.4712, "step": 26298 }, { "epoch": 1.781895792397859, "grad_norm": 5.011134147644043, "learning_rate": 6.502430008898625e-05, "loss": 0.7166, "step": 26299 }, { "epoch": 1.781963547665831, "grad_norm": 7.341510772705078, "learning_rate": 6.502293106988843e-05, "loss": 0.6639, "step": 26300 }, { "epoch": 1.782031302933803, "grad_norm": 6.135472297668457, "learning_rate": 6.50215620507906e-05, "loss": 0.7651, "step": 26301 }, { "epoch": 1.782099058201775, "grad_norm": 4.695068836212158, "learning_rate": 6.50201930316928e-05, "loss": 0.5716, "step": 26302 }, { "epoch": 1.7821668134697473, "grad_norm": 5.978828430175781, "learning_rate": 6.501882401259498e-05, "loss": 0.8204, "step": 26303 }, { "epoch": 1.7822345687377195, "grad_norm": 4.523531436920166, "learning_rate": 6.501745499349716e-05, "loss": 0.5388, "step": 26304 }, { "epoch": 1.7823023240056914, "grad_norm": 5.662416458129883, "learning_rate": 6.501608597439934e-05, "loss": 0.8604, "step": 26305 }, { "epoch": 1.7823700792736634, "grad_norm": 4.932500839233398, "learning_rate": 6.501471695530152e-05, "loss": 0.5202, "step": 26306 }, { "epoch": 1.7824378345416356, "grad_norm": 5.360942840576172, "learning_rate": 6.501334793620372e-05, "loss": 0.5271, "step": 26307 }, { "epoch": 1.7825055898096078, "grad_norm": 4.16403341293335, "learning_rate": 6.50119789171059e-05, "loss": 0.5311, "step": 26308 }, { "epoch": 1.7825733450775798, "grad_norm": 5.083327293395996, "learning_rate": 6.501060989800808e-05, "loss": 0.7051, "step": 26309 }, { "epoch": 1.7826411003455518, "grad_norm": 6.07045316696167, "learning_rate": 6.500924087891026e-05, "loss": 0.6193, "step": 26310 }, { "epoch": 1.782708855613524, "grad_norm": 6.427214622497559, "learning_rate": 6.500787185981245e-05, "loss": 0.6106, "step": 26311 }, { "epoch": 1.7827766108814962, "grad_norm": 11.0379638671875, "learning_rate": 6.500650284071463e-05, "loss": 0.5715, "step": 26312 }, { "epoch": 1.7828443661494682, "grad_norm": 4.899604320526123, "learning_rate": 6.500513382161681e-05, "loss": 0.7425, "step": 26313 }, { "epoch": 1.7829121214174402, "grad_norm": 4.783871173858643, "learning_rate": 6.500376480251899e-05, "loss": 0.5747, "step": 26314 }, { "epoch": 1.7829798766854124, "grad_norm": 6.395834922790527, "learning_rate": 6.500239578342117e-05, "loss": 0.7771, "step": 26315 }, { "epoch": 1.7830476319533843, "grad_norm": 5.068416595458984, "learning_rate": 6.500102676432337e-05, "loss": 0.6036, "step": 26316 }, { "epoch": 1.7831153872213563, "grad_norm": 6.595072269439697, "learning_rate": 6.499965774522555e-05, "loss": 0.9191, "step": 26317 }, { "epoch": 1.7831831424893285, "grad_norm": 7.667177200317383, "learning_rate": 6.499828872612773e-05, "loss": 0.5307, "step": 26318 }, { "epoch": 1.7832508977573007, "grad_norm": 5.6841912269592285, "learning_rate": 6.499691970702991e-05, "loss": 0.6656, "step": 26319 }, { "epoch": 1.7833186530252727, "grad_norm": 7.168448448181152, "learning_rate": 6.49955506879321e-05, "loss": 0.5229, "step": 26320 }, { "epoch": 1.7833864082932447, "grad_norm": 5.1867356300354, "learning_rate": 6.499418166883428e-05, "loss": 0.5718, "step": 26321 }, { "epoch": 1.7834541635612169, "grad_norm": 5.567361831665039, "learning_rate": 6.499281264973646e-05, "loss": 0.7633, "step": 26322 }, { "epoch": 1.783521918829189, "grad_norm": 5.368721961975098, "learning_rate": 6.499144363063866e-05, "loss": 0.5399, "step": 26323 }, { "epoch": 1.783589674097161, "grad_norm": 6.433285236358643, "learning_rate": 6.499007461154084e-05, "loss": 0.9767, "step": 26324 }, { "epoch": 1.783657429365133, "grad_norm": 5.6672773361206055, "learning_rate": 6.498870559244302e-05, "loss": 0.7548, "step": 26325 }, { "epoch": 1.7837251846331053, "grad_norm": 4.4783196449279785, "learning_rate": 6.498733657334521e-05, "loss": 0.7151, "step": 26326 }, { "epoch": 1.7837929399010775, "grad_norm": 5.901156425476074, "learning_rate": 6.498596755424739e-05, "loss": 0.6145, "step": 26327 }, { "epoch": 1.7838606951690494, "grad_norm": 6.009035110473633, "learning_rate": 6.498459853514957e-05, "loss": 0.7804, "step": 26328 }, { "epoch": 1.7839284504370214, "grad_norm": 5.1344804763793945, "learning_rate": 6.498322951605175e-05, "loss": 0.6365, "step": 26329 }, { "epoch": 1.7839962057049936, "grad_norm": 10.51190185546875, "learning_rate": 6.498186049695394e-05, "loss": 0.6456, "step": 26330 }, { "epoch": 1.7840639609729656, "grad_norm": 7.6890692710876465, "learning_rate": 6.498049147785613e-05, "loss": 0.6568, "step": 26331 }, { "epoch": 1.7841317162409376, "grad_norm": 6.185900688171387, "learning_rate": 6.49791224587583e-05, "loss": 0.8142, "step": 26332 }, { "epoch": 1.7841994715089098, "grad_norm": 5.532405853271484, "learning_rate": 6.497775343966049e-05, "loss": 0.3972, "step": 26333 }, { "epoch": 1.784267226776882, "grad_norm": 6.95060396194458, "learning_rate": 6.497638442056268e-05, "loss": 0.5303, "step": 26334 }, { "epoch": 1.784334982044854, "grad_norm": 4.956532001495361, "learning_rate": 6.497501540146486e-05, "loss": 0.6008, "step": 26335 }, { "epoch": 1.784402737312826, "grad_norm": 5.985325813293457, "learning_rate": 6.497364638236704e-05, "loss": 0.6, "step": 26336 }, { "epoch": 1.7844704925807982, "grad_norm": 8.809942245483398, "learning_rate": 6.497227736326922e-05, "loss": 0.6775, "step": 26337 }, { "epoch": 1.7845382478487704, "grad_norm": 7.035124778747559, "learning_rate": 6.49709083441714e-05, "loss": 0.7345, "step": 26338 }, { "epoch": 1.7846060031167423, "grad_norm": 6.113848686218262, "learning_rate": 6.49695393250736e-05, "loss": 0.5588, "step": 26339 }, { "epoch": 1.7846737583847143, "grad_norm": 4.031193733215332, "learning_rate": 6.496817030597578e-05, "loss": 0.6087, "step": 26340 }, { "epoch": 1.7847415136526865, "grad_norm": 6.964221477508545, "learning_rate": 6.496680128687796e-05, "loss": 0.5812, "step": 26341 }, { "epoch": 1.7848092689206587, "grad_norm": 5.153870105743408, "learning_rate": 6.496543226778014e-05, "loss": 0.6416, "step": 26342 }, { "epoch": 1.7848770241886307, "grad_norm": 4.133979320526123, "learning_rate": 6.496406324868233e-05, "loss": 0.492, "step": 26343 }, { "epoch": 1.7849447794566027, "grad_norm": 4.951413631439209, "learning_rate": 6.496269422958451e-05, "loss": 0.5686, "step": 26344 }, { "epoch": 1.7850125347245749, "grad_norm": 5.242880344390869, "learning_rate": 6.496132521048669e-05, "loss": 0.5574, "step": 26345 }, { "epoch": 1.785080289992547, "grad_norm": 6.982253551483154, "learning_rate": 6.495995619138887e-05, "loss": 0.5025, "step": 26346 }, { "epoch": 1.7851480452605188, "grad_norm": 6.126225471496582, "learning_rate": 6.495858717229105e-05, "loss": 0.6791, "step": 26347 }, { "epoch": 1.785215800528491, "grad_norm": 6.48276948928833, "learning_rate": 6.495721815319325e-05, "loss": 0.8133, "step": 26348 }, { "epoch": 1.7852835557964633, "grad_norm": 6.315218925476074, "learning_rate": 6.495584913409543e-05, "loss": 0.6597, "step": 26349 }, { "epoch": 1.7853513110644352, "grad_norm": 5.54972505569458, "learning_rate": 6.49544801149976e-05, "loss": 0.5861, "step": 26350 }, { "epoch": 1.7854190663324072, "grad_norm": 5.744149684906006, "learning_rate": 6.495311109589979e-05, "loss": 0.7507, "step": 26351 }, { "epoch": 1.7854868216003794, "grad_norm": 8.468841552734375, "learning_rate": 6.495174207680197e-05, "loss": 0.6146, "step": 26352 }, { "epoch": 1.7855545768683516, "grad_norm": 7.37338924407959, "learning_rate": 6.495037305770416e-05, "loss": 0.8406, "step": 26353 }, { "epoch": 1.7856223321363236, "grad_norm": 6.369899749755859, "learning_rate": 6.494900403860634e-05, "loss": 0.5272, "step": 26354 }, { "epoch": 1.7856900874042956, "grad_norm": 7.2992048263549805, "learning_rate": 6.494763501950852e-05, "loss": 0.7414, "step": 26355 }, { "epoch": 1.7857578426722678, "grad_norm": 5.574035167694092, "learning_rate": 6.49462660004107e-05, "loss": 0.7736, "step": 26356 }, { "epoch": 1.78582559794024, "grad_norm": 4.762753486633301, "learning_rate": 6.49448969813129e-05, "loss": 0.5895, "step": 26357 }, { "epoch": 1.785893353208212, "grad_norm": 5.722200393676758, "learning_rate": 6.494352796221508e-05, "loss": 0.5501, "step": 26358 }, { "epoch": 1.785961108476184, "grad_norm": 4.765664577484131, "learning_rate": 6.494215894311726e-05, "loss": 0.6768, "step": 26359 }, { "epoch": 1.7860288637441561, "grad_norm": 5.618216037750244, "learning_rate": 6.494078992401944e-05, "loss": 0.6274, "step": 26360 }, { "epoch": 1.7860966190121284, "grad_norm": 10.237839698791504, "learning_rate": 6.493942090492162e-05, "loss": 0.8119, "step": 26361 }, { "epoch": 1.7861643742801003, "grad_norm": 4.806180477142334, "learning_rate": 6.493805188582381e-05, "loss": 0.7364, "step": 26362 }, { "epoch": 1.7862321295480723, "grad_norm": 4.758591175079346, "learning_rate": 6.493668286672599e-05, "loss": 0.4931, "step": 26363 }, { "epoch": 1.7862998848160445, "grad_norm": 5.142162799835205, "learning_rate": 6.493531384762817e-05, "loss": 0.6636, "step": 26364 }, { "epoch": 1.7863676400840165, "grad_norm": 5.005941867828369, "learning_rate": 6.493394482853035e-05, "loss": 0.7622, "step": 26365 }, { "epoch": 1.7864353953519885, "grad_norm": 6.8068766593933105, "learning_rate": 6.493257580943255e-05, "loss": 0.5161, "step": 26366 }, { "epoch": 1.7865031506199607, "grad_norm": 4.8328142166137695, "learning_rate": 6.493120679033473e-05, "loss": 0.5629, "step": 26367 }, { "epoch": 1.7865709058879329, "grad_norm": 5.449564456939697, "learning_rate": 6.49298377712369e-05, "loss": 0.6236, "step": 26368 }, { "epoch": 1.7866386611559049, "grad_norm": 7.089264392852783, "learning_rate": 6.49284687521391e-05, "loss": 0.6652, "step": 26369 }, { "epoch": 1.7867064164238768, "grad_norm": 4.994874954223633, "learning_rate": 6.492709973304128e-05, "loss": 0.531, "step": 26370 }, { "epoch": 1.786774171691849, "grad_norm": 5.724993705749512, "learning_rate": 6.492573071394346e-05, "loss": 0.8294, "step": 26371 }, { "epoch": 1.7868419269598212, "grad_norm": 4.761093616485596, "learning_rate": 6.492436169484565e-05, "loss": 0.575, "step": 26372 }, { "epoch": 1.7869096822277932, "grad_norm": 5.527770042419434, "learning_rate": 6.492299267574783e-05, "loss": 0.518, "step": 26373 }, { "epoch": 1.7869774374957652, "grad_norm": 6.27854061126709, "learning_rate": 6.492162365665002e-05, "loss": 0.7232, "step": 26374 }, { "epoch": 1.7870451927637374, "grad_norm": 4.220062732696533, "learning_rate": 6.492025463755221e-05, "loss": 0.529, "step": 26375 }, { "epoch": 1.7871129480317096, "grad_norm": 9.990065574645996, "learning_rate": 6.491888561845439e-05, "loss": 0.7258, "step": 26376 }, { "epoch": 1.7871807032996816, "grad_norm": 6.87020206451416, "learning_rate": 6.491751659935657e-05, "loss": 0.5307, "step": 26377 }, { "epoch": 1.7872484585676536, "grad_norm": 5.108802318572998, "learning_rate": 6.491614758025875e-05, "loss": 0.9455, "step": 26378 }, { "epoch": 1.7873162138356258, "grad_norm": 7.117612361907959, "learning_rate": 6.491477856116093e-05, "loss": 0.8715, "step": 26379 }, { "epoch": 1.7873839691035978, "grad_norm": 4.6693196296691895, "learning_rate": 6.491340954206312e-05, "loss": 0.6337, "step": 26380 }, { "epoch": 1.7874517243715697, "grad_norm": 5.6354875564575195, "learning_rate": 6.49120405229653e-05, "loss": 0.65, "step": 26381 }, { "epoch": 1.787519479639542, "grad_norm": 5.027455806732178, "learning_rate": 6.491067150386749e-05, "loss": 0.5752, "step": 26382 }, { "epoch": 1.7875872349075141, "grad_norm": 4.050728797912598, "learning_rate": 6.490930248476967e-05, "loss": 0.506, "step": 26383 }, { "epoch": 1.7876549901754861, "grad_norm": 5.359255313873291, "learning_rate": 6.490793346567185e-05, "loss": 0.4139, "step": 26384 }, { "epoch": 1.787722745443458, "grad_norm": 6.576174736022949, "learning_rate": 6.490656444657404e-05, "loss": 0.6187, "step": 26385 }, { "epoch": 1.7877905007114303, "grad_norm": 8.634109497070312, "learning_rate": 6.490519542747622e-05, "loss": 0.7281, "step": 26386 }, { "epoch": 1.7878582559794025, "grad_norm": 5.379361152648926, "learning_rate": 6.49038264083784e-05, "loss": 0.6943, "step": 26387 }, { "epoch": 1.7879260112473745, "grad_norm": 5.9948039054870605, "learning_rate": 6.490245738928058e-05, "loss": 0.7506, "step": 26388 }, { "epoch": 1.7879937665153465, "grad_norm": 3.681732654571533, "learning_rate": 6.490108837018277e-05, "loss": 0.4758, "step": 26389 }, { "epoch": 1.7880615217833187, "grad_norm": 3.7350714206695557, "learning_rate": 6.489971935108495e-05, "loss": 0.5386, "step": 26390 }, { "epoch": 1.7881292770512909, "grad_norm": 5.570841312408447, "learning_rate": 6.489835033198714e-05, "loss": 0.6617, "step": 26391 }, { "epoch": 1.7881970323192629, "grad_norm": 5.250325679779053, "learning_rate": 6.489698131288932e-05, "loss": 0.4629, "step": 26392 }, { "epoch": 1.7882647875872348, "grad_norm": 5.652355670928955, "learning_rate": 6.48956122937915e-05, "loss": 0.6559, "step": 26393 }, { "epoch": 1.788332542855207, "grad_norm": 6.612611293792725, "learning_rate": 6.489424327469369e-05, "loss": 0.6847, "step": 26394 }, { "epoch": 1.788400298123179, "grad_norm": 6.216116428375244, "learning_rate": 6.489287425559587e-05, "loss": 0.4877, "step": 26395 }, { "epoch": 1.788468053391151, "grad_norm": 8.196573257446289, "learning_rate": 6.489150523649805e-05, "loss": 1.0331, "step": 26396 }, { "epoch": 1.7885358086591232, "grad_norm": 6.992516994476318, "learning_rate": 6.489013621740023e-05, "loss": 0.5445, "step": 26397 }, { "epoch": 1.7886035639270954, "grad_norm": 5.786468505859375, "learning_rate": 6.488876719830242e-05, "loss": 0.5726, "step": 26398 }, { "epoch": 1.7886713191950674, "grad_norm": 4.655817031860352, "learning_rate": 6.48873981792046e-05, "loss": 0.6573, "step": 26399 }, { "epoch": 1.7887390744630394, "grad_norm": 6.914945125579834, "learning_rate": 6.488602916010679e-05, "loss": 0.6381, "step": 26400 }, { "epoch": 1.7888068297310116, "grad_norm": 6.051506042480469, "learning_rate": 6.488466014100897e-05, "loss": 0.5943, "step": 26401 }, { "epoch": 1.7888745849989838, "grad_norm": 5.392162322998047, "learning_rate": 6.488329112191115e-05, "loss": 0.6961, "step": 26402 }, { "epoch": 1.7889423402669558, "grad_norm": 6.171052932739258, "learning_rate": 6.488192210281334e-05, "loss": 0.6106, "step": 26403 }, { "epoch": 1.7890100955349277, "grad_norm": 9.967434883117676, "learning_rate": 6.488055308371552e-05, "loss": 0.6564, "step": 26404 }, { "epoch": 1.7890778508029, "grad_norm": 9.935561180114746, "learning_rate": 6.48791840646177e-05, "loss": 0.5065, "step": 26405 }, { "epoch": 1.7891456060708721, "grad_norm": 6.003063678741455, "learning_rate": 6.487781504551988e-05, "loss": 0.6716, "step": 26406 }, { "epoch": 1.7892133613388441, "grad_norm": 9.65091609954834, "learning_rate": 6.487644602642206e-05, "loss": 0.7475, "step": 26407 }, { "epoch": 1.789281116606816, "grad_norm": 4.953720569610596, "learning_rate": 6.487507700732426e-05, "loss": 0.5188, "step": 26408 }, { "epoch": 1.7893488718747883, "grad_norm": 4.507436275482178, "learning_rate": 6.487370798822644e-05, "loss": 0.6341, "step": 26409 }, { "epoch": 1.7894166271427605, "grad_norm": 4.783552646636963, "learning_rate": 6.487233896912862e-05, "loss": 0.6584, "step": 26410 }, { "epoch": 1.7894843824107325, "grad_norm": 4.97083044052124, "learning_rate": 6.48709699500308e-05, "loss": 0.5379, "step": 26411 }, { "epoch": 1.7895521376787045, "grad_norm": 7.35077428817749, "learning_rate": 6.486960093093299e-05, "loss": 0.423, "step": 26412 }, { "epoch": 1.7896198929466767, "grad_norm": 5.893283843994141, "learning_rate": 6.486823191183517e-05, "loss": 0.6411, "step": 26413 }, { "epoch": 1.7896876482146487, "grad_norm": 9.143628120422363, "learning_rate": 6.486686289273735e-05, "loss": 0.5173, "step": 26414 }, { "epoch": 1.7897554034826206, "grad_norm": 5.5789947509765625, "learning_rate": 6.486549387363953e-05, "loss": 0.6164, "step": 26415 }, { "epoch": 1.7898231587505928, "grad_norm": 4.776392936706543, "learning_rate": 6.486412485454173e-05, "loss": 0.6629, "step": 26416 }, { "epoch": 1.789890914018565, "grad_norm": 8.20186710357666, "learning_rate": 6.48627558354439e-05, "loss": 0.5706, "step": 26417 }, { "epoch": 1.789958669286537, "grad_norm": 7.451145648956299, "learning_rate": 6.486138681634609e-05, "loss": 0.6785, "step": 26418 }, { "epoch": 1.790026424554509, "grad_norm": 4.824932098388672, "learning_rate": 6.486001779724828e-05, "loss": 0.5898, "step": 26419 }, { "epoch": 1.7900941798224812, "grad_norm": 4.962063312530518, "learning_rate": 6.485864877815046e-05, "loss": 0.5289, "step": 26420 }, { "epoch": 1.7901619350904534, "grad_norm": 4.4916672706604, "learning_rate": 6.485727975905264e-05, "loss": 0.4736, "step": 26421 }, { "epoch": 1.7902296903584254, "grad_norm": 5.037089824676514, "learning_rate": 6.485591073995483e-05, "loss": 0.4219, "step": 26422 }, { "epoch": 1.7902974456263974, "grad_norm": 5.25237512588501, "learning_rate": 6.485454172085701e-05, "loss": 0.6346, "step": 26423 }, { "epoch": 1.7903652008943696, "grad_norm": 5.309401512145996, "learning_rate": 6.48531727017592e-05, "loss": 0.7037, "step": 26424 }, { "epoch": 1.7904329561623418, "grad_norm": 7.015481948852539, "learning_rate": 6.485180368266138e-05, "loss": 0.558, "step": 26425 }, { "epoch": 1.7905007114303138, "grad_norm": 5.2350358963012695, "learning_rate": 6.485043466356357e-05, "loss": 0.6907, "step": 26426 }, { "epoch": 1.7905684666982857, "grad_norm": 8.046436309814453, "learning_rate": 6.484906564446575e-05, "loss": 0.8123, "step": 26427 }, { "epoch": 1.790636221966258, "grad_norm": 6.032464981079102, "learning_rate": 6.484769662536793e-05, "loss": 0.7201, "step": 26428 }, { "epoch": 1.79070397723423, "grad_norm": 8.338666915893555, "learning_rate": 6.484632760627011e-05, "loss": 0.7385, "step": 26429 }, { "epoch": 1.790771732502202, "grad_norm": 4.253366470336914, "learning_rate": 6.484495858717229e-05, "loss": 0.5497, "step": 26430 }, { "epoch": 1.790839487770174, "grad_norm": 5.106434345245361, "learning_rate": 6.484358956807448e-05, "loss": 0.5141, "step": 26431 }, { "epoch": 1.7909072430381463, "grad_norm": 3.8568952083587646, "learning_rate": 6.484222054897666e-05, "loss": 0.5326, "step": 26432 }, { "epoch": 1.7909749983061183, "grad_norm": 5.514882564544678, "learning_rate": 6.484085152987885e-05, "loss": 0.756, "step": 26433 }, { "epoch": 1.7910427535740903, "grad_norm": 5.388563632965088, "learning_rate": 6.483948251078103e-05, "loss": 0.7191, "step": 26434 }, { "epoch": 1.7911105088420625, "grad_norm": 6.257626533508301, "learning_rate": 6.483811349168322e-05, "loss": 0.4426, "step": 26435 }, { "epoch": 1.7911782641100347, "grad_norm": 5.926671504974365, "learning_rate": 6.48367444725854e-05, "loss": 0.5242, "step": 26436 }, { "epoch": 1.7912460193780066, "grad_norm": 5.302340030670166, "learning_rate": 6.483537545348758e-05, "loss": 0.8692, "step": 26437 }, { "epoch": 1.7913137746459786, "grad_norm": 4.704530239105225, "learning_rate": 6.483400643438976e-05, "loss": 0.7067, "step": 26438 }, { "epoch": 1.7913815299139508, "grad_norm": 5.011970043182373, "learning_rate": 6.483263741529194e-05, "loss": 0.5468, "step": 26439 }, { "epoch": 1.791449285181923, "grad_norm": 4.53717565536499, "learning_rate": 6.483126839619413e-05, "loss": 0.513, "step": 26440 }, { "epoch": 1.791517040449895, "grad_norm": 4.554178714752197, "learning_rate": 6.482989937709631e-05, "loss": 0.5917, "step": 26441 }, { "epoch": 1.791584795717867, "grad_norm": 4.665304183959961, "learning_rate": 6.48285303579985e-05, "loss": 0.6778, "step": 26442 }, { "epoch": 1.7916525509858392, "grad_norm": 6.469430923461914, "learning_rate": 6.482716133890068e-05, "loss": 0.5902, "step": 26443 }, { "epoch": 1.7917203062538112, "grad_norm": 4.606096267700195, "learning_rate": 6.482579231980287e-05, "loss": 0.5843, "step": 26444 }, { "epoch": 1.7917880615217832, "grad_norm": 4.674129009246826, "learning_rate": 6.482442330070505e-05, "loss": 0.4787, "step": 26445 }, { "epoch": 1.7918558167897554, "grad_norm": 9.590033531188965, "learning_rate": 6.482305428160723e-05, "loss": 0.7909, "step": 26446 }, { "epoch": 1.7919235720577276, "grad_norm": 4.760642051696777, "learning_rate": 6.482168526250941e-05, "loss": 0.5469, "step": 26447 }, { "epoch": 1.7919913273256995, "grad_norm": 4.928704738616943, "learning_rate": 6.482031624341159e-05, "loss": 0.7847, "step": 26448 }, { "epoch": 1.7920590825936715, "grad_norm": 5.956216812133789, "learning_rate": 6.481894722431378e-05, "loss": 0.6129, "step": 26449 }, { "epoch": 1.7921268378616437, "grad_norm": 5.261460304260254, "learning_rate": 6.481757820521597e-05, "loss": 0.6793, "step": 26450 }, { "epoch": 1.792194593129616, "grad_norm": 6.220026969909668, "learning_rate": 6.481620918611815e-05, "loss": 0.6183, "step": 26451 }, { "epoch": 1.792262348397588, "grad_norm": 10.501867294311523, "learning_rate": 6.481484016702033e-05, "loss": 0.7157, "step": 26452 }, { "epoch": 1.79233010366556, "grad_norm": 4.8075995445251465, "learning_rate": 6.481347114792252e-05, "loss": 0.5604, "step": 26453 }, { "epoch": 1.792397858933532, "grad_norm": 7.208893775939941, "learning_rate": 6.48121021288247e-05, "loss": 0.6519, "step": 26454 }, { "epoch": 1.7924656142015043, "grad_norm": 5.528526306152344, "learning_rate": 6.481073310972688e-05, "loss": 0.4895, "step": 26455 }, { "epoch": 1.7925333694694763, "grad_norm": 5.144335746765137, "learning_rate": 6.480936409062906e-05, "loss": 0.5076, "step": 26456 }, { "epoch": 1.7926011247374483, "grad_norm": 5.386043548583984, "learning_rate": 6.480799507153124e-05, "loss": 0.5324, "step": 26457 }, { "epoch": 1.7926688800054205, "grad_norm": 5.487115859985352, "learning_rate": 6.480662605243343e-05, "loss": 0.6723, "step": 26458 }, { "epoch": 1.7927366352733927, "grad_norm": 5.967433929443359, "learning_rate": 6.480525703333562e-05, "loss": 0.6928, "step": 26459 }, { "epoch": 1.7928043905413646, "grad_norm": 5.330333709716797, "learning_rate": 6.48038880142378e-05, "loss": 0.8019, "step": 26460 }, { "epoch": 1.7928721458093366, "grad_norm": 8.557965278625488, "learning_rate": 6.480251899513998e-05, "loss": 0.7394, "step": 26461 }, { "epoch": 1.7929399010773088, "grad_norm": 15.408960342407227, "learning_rate": 6.480114997604217e-05, "loss": 0.7096, "step": 26462 }, { "epoch": 1.7930076563452808, "grad_norm": 6.923033714294434, "learning_rate": 6.479978095694435e-05, "loss": 0.5457, "step": 26463 }, { "epoch": 1.7930754116132528, "grad_norm": 4.936722755432129, "learning_rate": 6.479841193784653e-05, "loss": 0.6432, "step": 26464 }, { "epoch": 1.793143166881225, "grad_norm": 6.209729194641113, "learning_rate": 6.479704291874872e-05, "loss": 0.7559, "step": 26465 }, { "epoch": 1.7932109221491972, "grad_norm": 5.772585868835449, "learning_rate": 6.47956738996509e-05, "loss": 0.5113, "step": 26466 }, { "epoch": 1.7932786774171692, "grad_norm": 5.662108421325684, "learning_rate": 6.479430488055309e-05, "loss": 0.7638, "step": 26467 }, { "epoch": 1.7933464326851412, "grad_norm": 5.3496527671813965, "learning_rate": 6.479293586145528e-05, "loss": 0.7197, "step": 26468 }, { "epoch": 1.7934141879531134, "grad_norm": 5.405253887176514, "learning_rate": 6.479156684235746e-05, "loss": 0.6277, "step": 26469 }, { "epoch": 1.7934819432210856, "grad_norm": 6.205084323883057, "learning_rate": 6.479019782325964e-05, "loss": 0.525, "step": 26470 }, { "epoch": 1.7935496984890575, "grad_norm": 6.950950622558594, "learning_rate": 6.478882880416182e-05, "loss": 0.5347, "step": 26471 }, { "epoch": 1.7936174537570295, "grad_norm": 5.890125751495361, "learning_rate": 6.478745978506401e-05, "loss": 0.5795, "step": 26472 }, { "epoch": 1.7936852090250017, "grad_norm": 5.911766529083252, "learning_rate": 6.47860907659662e-05, "loss": 0.523, "step": 26473 }, { "epoch": 1.793752964292974, "grad_norm": 7.1140875816345215, "learning_rate": 6.478472174686837e-05, "loss": 0.6725, "step": 26474 }, { "epoch": 1.793820719560946, "grad_norm": 5.0741496086120605, "learning_rate": 6.478335272777055e-05, "loss": 0.8458, "step": 26475 }, { "epoch": 1.793888474828918, "grad_norm": 5.016864776611328, "learning_rate": 6.478198370867275e-05, "loss": 0.5249, "step": 26476 }, { "epoch": 1.79395623009689, "grad_norm": 5.342867374420166, "learning_rate": 6.478061468957493e-05, "loss": 0.5858, "step": 26477 }, { "epoch": 1.794023985364862, "grad_norm": 6.175942897796631, "learning_rate": 6.477924567047711e-05, "loss": 0.6113, "step": 26478 }, { "epoch": 1.794091740632834, "grad_norm": 5.073520660400391, "learning_rate": 6.477787665137929e-05, "loss": 0.4152, "step": 26479 }, { "epoch": 1.7941594959008063, "grad_norm": 6.900640964508057, "learning_rate": 6.477650763228147e-05, "loss": 0.5232, "step": 26480 }, { "epoch": 1.7942272511687785, "grad_norm": 6.278518199920654, "learning_rate": 6.477513861318366e-05, "loss": 0.6106, "step": 26481 }, { "epoch": 1.7942950064367504, "grad_norm": 6.664700031280518, "learning_rate": 6.477376959408584e-05, "loss": 0.5881, "step": 26482 }, { "epoch": 1.7943627617047224, "grad_norm": 6.38070821762085, "learning_rate": 6.477240057498802e-05, "loss": 0.6671, "step": 26483 }, { "epoch": 1.7944305169726946, "grad_norm": 5.4179277420043945, "learning_rate": 6.47710315558902e-05, "loss": 0.4431, "step": 26484 }, { "epoch": 1.7944982722406668, "grad_norm": 4.815109729766846, "learning_rate": 6.476966253679239e-05, "loss": 0.7775, "step": 26485 }, { "epoch": 1.7945660275086388, "grad_norm": 6.67338228225708, "learning_rate": 6.476829351769458e-05, "loss": 0.6454, "step": 26486 }, { "epoch": 1.7946337827766108, "grad_norm": 6.016955375671387, "learning_rate": 6.476692449859676e-05, "loss": 0.6208, "step": 26487 }, { "epoch": 1.794701538044583, "grad_norm": 6.311379432678223, "learning_rate": 6.476555547949894e-05, "loss": 0.7121, "step": 26488 }, { "epoch": 1.7947692933125552, "grad_norm": 6.455601692199707, "learning_rate": 6.476418646040112e-05, "loss": 0.6557, "step": 26489 }, { "epoch": 1.7948370485805272, "grad_norm": 4.883986473083496, "learning_rate": 6.476281744130331e-05, "loss": 0.7066, "step": 26490 }, { "epoch": 1.7949048038484992, "grad_norm": 6.17501163482666, "learning_rate": 6.47614484222055e-05, "loss": 0.6069, "step": 26491 }, { "epoch": 1.7949725591164714, "grad_norm": 7.240769863128662, "learning_rate": 6.476007940310767e-05, "loss": 0.5492, "step": 26492 }, { "epoch": 1.7950403143844433, "grad_norm": 5.634336948394775, "learning_rate": 6.475871038400986e-05, "loss": 0.779, "step": 26493 }, { "epoch": 1.7951080696524153, "grad_norm": 6.642146110534668, "learning_rate": 6.475734136491204e-05, "loss": 0.8299, "step": 26494 }, { "epoch": 1.7951758249203875, "grad_norm": 5.608401298522949, "learning_rate": 6.475597234581423e-05, "loss": 0.6411, "step": 26495 }, { "epoch": 1.7952435801883597, "grad_norm": 4.544772624969482, "learning_rate": 6.475460332671641e-05, "loss": 0.6067, "step": 26496 }, { "epoch": 1.7953113354563317, "grad_norm": 5.051577091217041, "learning_rate": 6.475323430761859e-05, "loss": 0.5062, "step": 26497 }, { "epoch": 1.7953790907243037, "grad_norm": 6.298222541809082, "learning_rate": 6.475186528852077e-05, "loss": 0.5332, "step": 26498 }, { "epoch": 1.7954468459922759, "grad_norm": 6.149299621582031, "learning_rate": 6.475049626942296e-05, "loss": 0.9144, "step": 26499 }, { "epoch": 1.795514601260248, "grad_norm": 5.899142265319824, "learning_rate": 6.474912725032514e-05, "loss": 0.4973, "step": 26500 }, { "epoch": 1.79558235652822, "grad_norm": 6.300529956817627, "learning_rate": 6.474775823122733e-05, "loss": 0.5333, "step": 26501 }, { "epoch": 1.795650111796192, "grad_norm": 6.748919486999512, "learning_rate": 6.47463892121295e-05, "loss": 0.8797, "step": 26502 }, { "epoch": 1.7957178670641643, "grad_norm": 9.374682426452637, "learning_rate": 6.474502019303169e-05, "loss": 0.4555, "step": 26503 }, { "epoch": 1.7957856223321365, "grad_norm": 5.463624000549316, "learning_rate": 6.474365117393388e-05, "loss": 0.7813, "step": 26504 }, { "epoch": 1.7958533776001084, "grad_norm": 7.498270511627197, "learning_rate": 6.474228215483606e-05, "loss": 0.8315, "step": 26505 }, { "epoch": 1.7959211328680804, "grad_norm": 4.882760047912598, "learning_rate": 6.474091313573824e-05, "loss": 0.59, "step": 26506 }, { "epoch": 1.7959888881360526, "grad_norm": 6.245075702667236, "learning_rate": 6.473954411664042e-05, "loss": 0.6496, "step": 26507 }, { "epoch": 1.7960566434040248, "grad_norm": 5.731497764587402, "learning_rate": 6.473817509754261e-05, "loss": 0.8126, "step": 26508 }, { "epoch": 1.7961243986719968, "grad_norm": 6.280736923217773, "learning_rate": 6.47368060784448e-05, "loss": 0.6649, "step": 26509 }, { "epoch": 1.7961921539399688, "grad_norm": 4.965966701507568, "learning_rate": 6.473543705934698e-05, "loss": 0.5842, "step": 26510 }, { "epoch": 1.796259909207941, "grad_norm": 4.613831520080566, "learning_rate": 6.473406804024917e-05, "loss": 0.5611, "step": 26511 }, { "epoch": 1.796327664475913, "grad_norm": 5.441418647766113, "learning_rate": 6.473269902115135e-05, "loss": 0.7925, "step": 26512 }, { "epoch": 1.796395419743885, "grad_norm": 5.757138729095459, "learning_rate": 6.473133000205353e-05, "loss": 0.7135, "step": 26513 }, { "epoch": 1.7964631750118571, "grad_norm": 6.918154239654541, "learning_rate": 6.472996098295572e-05, "loss": 0.716, "step": 26514 }, { "epoch": 1.7965309302798294, "grad_norm": 6.256191730499268, "learning_rate": 6.47285919638579e-05, "loss": 0.5513, "step": 26515 }, { "epoch": 1.7965986855478013, "grad_norm": 5.0834784507751465, "learning_rate": 6.472722294476008e-05, "loss": 0.6331, "step": 26516 }, { "epoch": 1.7966664408157733, "grad_norm": 6.043743133544922, "learning_rate": 6.472585392566226e-05, "loss": 0.7205, "step": 26517 }, { "epoch": 1.7967341960837455, "grad_norm": 5.448409080505371, "learning_rate": 6.472448490656446e-05, "loss": 0.5172, "step": 26518 }, { "epoch": 1.7968019513517177, "grad_norm": 7.727349758148193, "learning_rate": 6.472311588746664e-05, "loss": 0.5832, "step": 26519 }, { "epoch": 1.7968697066196897, "grad_norm": 8.997537612915039, "learning_rate": 6.472174686836882e-05, "loss": 0.9855, "step": 26520 }, { "epoch": 1.7969374618876617, "grad_norm": 4.254085540771484, "learning_rate": 6.4720377849271e-05, "loss": 0.5432, "step": 26521 }, { "epoch": 1.7970052171556339, "grad_norm": 5.776826858520508, "learning_rate": 6.47190088301732e-05, "loss": 0.7538, "step": 26522 }, { "epoch": 1.797072972423606, "grad_norm": 5.046884536743164, "learning_rate": 6.471763981107537e-05, "loss": 0.5555, "step": 26523 }, { "epoch": 1.797140727691578, "grad_norm": 4.67003870010376, "learning_rate": 6.471627079197755e-05, "loss": 0.7027, "step": 26524 }, { "epoch": 1.79720848295955, "grad_norm": 6.470618724822998, "learning_rate": 6.471490177287973e-05, "loss": 0.6377, "step": 26525 }, { "epoch": 1.7972762382275222, "grad_norm": 7.124924659729004, "learning_rate": 6.471353275378191e-05, "loss": 0.9904, "step": 26526 }, { "epoch": 1.7973439934954942, "grad_norm": 5.9909491539001465, "learning_rate": 6.471216373468411e-05, "loss": 0.9398, "step": 26527 }, { "epoch": 1.7974117487634662, "grad_norm": 6.01777982711792, "learning_rate": 6.471079471558629e-05, "loss": 0.7513, "step": 26528 }, { "epoch": 1.7974795040314384, "grad_norm": 4.469691276550293, "learning_rate": 6.470942569648847e-05, "loss": 0.5834, "step": 26529 }, { "epoch": 1.7975472592994106, "grad_norm": 6.170987606048584, "learning_rate": 6.470805667739065e-05, "loss": 0.4829, "step": 26530 }, { "epoch": 1.7976150145673826, "grad_norm": 8.11898136138916, "learning_rate": 6.470668765829284e-05, "loss": 0.6731, "step": 26531 }, { "epoch": 1.7976827698353546, "grad_norm": 9.631454467773438, "learning_rate": 6.470531863919502e-05, "loss": 0.6059, "step": 26532 }, { "epoch": 1.7977505251033268, "grad_norm": 9.488521575927734, "learning_rate": 6.47039496200972e-05, "loss": 0.7617, "step": 26533 }, { "epoch": 1.797818280371299, "grad_norm": 6.981712818145752, "learning_rate": 6.470258060099938e-05, "loss": 0.5256, "step": 26534 }, { "epoch": 1.797886035639271, "grad_norm": 9.017019271850586, "learning_rate": 6.470121158190157e-05, "loss": 0.6109, "step": 26535 }, { "epoch": 1.797953790907243, "grad_norm": 6.677830219268799, "learning_rate": 6.469984256280376e-05, "loss": 0.6089, "step": 26536 }, { "epoch": 1.7980215461752151, "grad_norm": 4.918286323547363, "learning_rate": 6.469847354370594e-05, "loss": 0.5634, "step": 26537 }, { "epoch": 1.7980893014431873, "grad_norm": 4.861996173858643, "learning_rate": 6.469710452460812e-05, "loss": 0.5706, "step": 26538 }, { "epoch": 1.7981570567111593, "grad_norm": 9.288019180297852, "learning_rate": 6.46957355055103e-05, "loss": 0.9268, "step": 26539 }, { "epoch": 1.7982248119791313, "grad_norm": 6.122613430023193, "learning_rate": 6.469436648641248e-05, "loss": 0.757, "step": 26540 }, { "epoch": 1.7982925672471035, "grad_norm": 7.2641191482543945, "learning_rate": 6.469299746731467e-05, "loss": 0.5974, "step": 26541 }, { "epoch": 1.7983603225150755, "grad_norm": 5.84812593460083, "learning_rate": 6.469162844821685e-05, "loss": 0.7169, "step": 26542 }, { "epoch": 1.7984280777830475, "grad_norm": 4.586693286895752, "learning_rate": 6.469025942911903e-05, "loss": 0.6772, "step": 26543 }, { "epoch": 1.7984958330510197, "grad_norm": 6.699120044708252, "learning_rate": 6.468889041002122e-05, "loss": 0.7054, "step": 26544 }, { "epoch": 1.7985635883189919, "grad_norm": 5.690792560577393, "learning_rate": 6.468752139092341e-05, "loss": 0.6623, "step": 26545 }, { "epoch": 1.7986313435869639, "grad_norm": 7.463400363922119, "learning_rate": 6.468615237182559e-05, "loss": 0.9621, "step": 26546 }, { "epoch": 1.7986990988549358, "grad_norm": 6.837425708770752, "learning_rate": 6.468478335272777e-05, "loss": 0.6292, "step": 26547 }, { "epoch": 1.798766854122908, "grad_norm": 6.503660678863525, "learning_rate": 6.468341433362995e-05, "loss": 0.7486, "step": 26548 }, { "epoch": 1.7988346093908802, "grad_norm": 8.214045524597168, "learning_rate": 6.468204531453213e-05, "loss": 0.763, "step": 26549 }, { "epoch": 1.7989023646588522, "grad_norm": 6.540403366088867, "learning_rate": 6.468067629543432e-05, "loss": 0.7941, "step": 26550 }, { "epoch": 1.7989701199268242, "grad_norm": 5.374214172363281, "learning_rate": 6.46793072763365e-05, "loss": 0.6, "step": 26551 }, { "epoch": 1.7990378751947964, "grad_norm": 6.324252128601074, "learning_rate": 6.467793825723869e-05, "loss": 0.8992, "step": 26552 }, { "epoch": 1.7991056304627686, "grad_norm": 8.818258285522461, "learning_rate": 6.467656923814087e-05, "loss": 0.8208, "step": 26553 }, { "epoch": 1.7991733857307406, "grad_norm": 6.762580871582031, "learning_rate": 6.467520021904306e-05, "loss": 0.6053, "step": 26554 }, { "epoch": 1.7992411409987126, "grad_norm": 5.99172830581665, "learning_rate": 6.467383119994524e-05, "loss": 0.6317, "step": 26555 }, { "epoch": 1.7993088962666848, "grad_norm": 5.259061813354492, "learning_rate": 6.467246218084742e-05, "loss": 0.5466, "step": 26556 }, { "epoch": 1.799376651534657, "grad_norm": 7.074782848358154, "learning_rate": 6.467109316174961e-05, "loss": 0.6525, "step": 26557 }, { "epoch": 1.799444406802629, "grad_norm": 6.076554775238037, "learning_rate": 6.46697241426518e-05, "loss": 0.6784, "step": 26558 }, { "epoch": 1.799512162070601, "grad_norm": 5.036582946777344, "learning_rate": 6.466835512355397e-05, "loss": 0.5646, "step": 26559 }, { "epoch": 1.7995799173385731, "grad_norm": 6.355813026428223, "learning_rate": 6.466698610445617e-05, "loss": 0.6583, "step": 26560 }, { "epoch": 1.7996476726065451, "grad_norm": 4.809903621673584, "learning_rate": 6.466561708535835e-05, "loss": 0.5925, "step": 26561 }, { "epoch": 1.799715427874517, "grad_norm": 4.481884002685547, "learning_rate": 6.466424806626053e-05, "loss": 0.6977, "step": 26562 }, { "epoch": 1.7997831831424893, "grad_norm": 4.985859394073486, "learning_rate": 6.466287904716271e-05, "loss": 0.6854, "step": 26563 }, { "epoch": 1.7998509384104615, "grad_norm": 5.616401195526123, "learning_rate": 6.46615100280649e-05, "loss": 0.6965, "step": 26564 }, { "epoch": 1.7999186936784335, "grad_norm": 9.009173393249512, "learning_rate": 6.466014100896708e-05, "loss": 0.5857, "step": 26565 }, { "epoch": 1.7999864489464055, "grad_norm": 4.98852014541626, "learning_rate": 6.465877198986926e-05, "loss": 0.6878, "step": 26566 }, { "epoch": 1.8000542042143777, "grad_norm": 6.359554290771484, "learning_rate": 6.465740297077144e-05, "loss": 0.5963, "step": 26567 }, { "epoch": 1.8001219594823499, "grad_norm": 5.698772430419922, "learning_rate": 6.465603395167364e-05, "loss": 0.7196, "step": 26568 }, { "epoch": 1.8001897147503219, "grad_norm": 4.061408519744873, "learning_rate": 6.465466493257582e-05, "loss": 0.6843, "step": 26569 }, { "epoch": 1.8002574700182938, "grad_norm": 5.215244770050049, "learning_rate": 6.4653295913478e-05, "loss": 0.6046, "step": 26570 }, { "epoch": 1.800325225286266, "grad_norm": 6.018982887268066, "learning_rate": 6.465192689438018e-05, "loss": 0.8371, "step": 26571 }, { "epoch": 1.8003929805542382, "grad_norm": 7.803177356719971, "learning_rate": 6.465055787528236e-05, "loss": 0.8552, "step": 26572 }, { "epoch": 1.8004607358222102, "grad_norm": 6.1324005126953125, "learning_rate": 6.464918885618455e-05, "loss": 0.8102, "step": 26573 }, { "epoch": 1.8005284910901822, "grad_norm": 5.621084690093994, "learning_rate": 6.464781983708673e-05, "loss": 0.6757, "step": 26574 }, { "epoch": 1.8005962463581544, "grad_norm": 9.661255836486816, "learning_rate": 6.464645081798891e-05, "loss": 0.47, "step": 26575 }, { "epoch": 1.8006640016261264, "grad_norm": 5.457442760467529, "learning_rate": 6.46450817988911e-05, "loss": 0.5571, "step": 26576 }, { "epoch": 1.8007317568940984, "grad_norm": 5.952642440795898, "learning_rate": 6.464371277979329e-05, "loss": 0.761, "step": 26577 }, { "epoch": 1.8007995121620706, "grad_norm": 5.3689680099487305, "learning_rate": 6.464234376069547e-05, "loss": 0.6339, "step": 26578 }, { "epoch": 1.8008672674300428, "grad_norm": 7.067813873291016, "learning_rate": 6.464097474159765e-05, "loss": 0.6727, "step": 26579 }, { "epoch": 1.8009350226980148, "grad_norm": 4.067531585693359, "learning_rate": 6.463960572249983e-05, "loss": 0.6167, "step": 26580 }, { "epoch": 1.8010027779659867, "grad_norm": 4.539855003356934, "learning_rate": 6.463823670340201e-05, "loss": 0.565, "step": 26581 }, { "epoch": 1.801070533233959, "grad_norm": 3.961587429046631, "learning_rate": 6.46368676843042e-05, "loss": 0.5188, "step": 26582 }, { "epoch": 1.8011382885019311, "grad_norm": 5.065338611602783, "learning_rate": 6.463549866520638e-05, "loss": 0.6298, "step": 26583 }, { "epoch": 1.8012060437699031, "grad_norm": 4.988255500793457, "learning_rate": 6.463412964610856e-05, "loss": 0.5653, "step": 26584 }, { "epoch": 1.801273799037875, "grad_norm": 5.641452312469482, "learning_rate": 6.463276062701074e-05, "loss": 0.6304, "step": 26585 }, { "epoch": 1.8013415543058473, "grad_norm": 5.1581034660339355, "learning_rate": 6.463139160791294e-05, "loss": 0.4741, "step": 26586 }, { "epoch": 1.8014093095738195, "grad_norm": 5.798873424530029, "learning_rate": 6.463002258881512e-05, "loss": 0.6889, "step": 26587 }, { "epoch": 1.8014770648417915, "grad_norm": 5.460486888885498, "learning_rate": 6.46286535697173e-05, "loss": 0.6604, "step": 26588 }, { "epoch": 1.8015448201097635, "grad_norm": 6.950523376464844, "learning_rate": 6.462728455061948e-05, "loss": 0.6573, "step": 26589 }, { "epoch": 1.8016125753777357, "grad_norm": 5.717700958251953, "learning_rate": 6.462591553152166e-05, "loss": 0.6488, "step": 26590 }, { "epoch": 1.8016803306457077, "grad_norm": 6.389710903167725, "learning_rate": 6.462454651242385e-05, "loss": 0.5811, "step": 26591 }, { "epoch": 1.8017480859136796, "grad_norm": 5.163828372955322, "learning_rate": 6.462317749332603e-05, "loss": 0.8307, "step": 26592 }, { "epoch": 1.8018158411816518, "grad_norm": 5.820754051208496, "learning_rate": 6.462180847422821e-05, "loss": 0.9844, "step": 26593 }, { "epoch": 1.801883596449624, "grad_norm": 6.573385238647461, "learning_rate": 6.46204394551304e-05, "loss": 0.7063, "step": 26594 }, { "epoch": 1.801951351717596, "grad_norm": 4.557989120483398, "learning_rate": 6.461907043603258e-05, "loss": 0.4319, "step": 26595 }, { "epoch": 1.802019106985568, "grad_norm": 6.516071319580078, "learning_rate": 6.461770141693477e-05, "loss": 0.6751, "step": 26596 }, { "epoch": 1.8020868622535402, "grad_norm": 7.508298873901367, "learning_rate": 6.461633239783695e-05, "loss": 0.7591, "step": 26597 }, { "epoch": 1.8021546175215124, "grad_norm": 8.688054084777832, "learning_rate": 6.461496337873913e-05, "loss": 0.745, "step": 26598 }, { "epoch": 1.8022223727894844, "grad_norm": 6.245868682861328, "learning_rate": 6.461359435964131e-05, "loss": 0.776, "step": 26599 }, { "epoch": 1.8022901280574564, "grad_norm": 4.948495864868164, "learning_rate": 6.46122253405435e-05, "loss": 0.6623, "step": 26600 }, { "epoch": 1.8023578833254286, "grad_norm": 6.351430892944336, "learning_rate": 6.461085632144568e-05, "loss": 0.7274, "step": 26601 }, { "epoch": 1.8024256385934008, "grad_norm": 6.893420696258545, "learning_rate": 6.460948730234786e-05, "loss": 0.8598, "step": 26602 }, { "epoch": 1.8024933938613728, "grad_norm": 6.316067218780518, "learning_rate": 6.460811828325006e-05, "loss": 0.6597, "step": 26603 }, { "epoch": 1.8025611491293447, "grad_norm": 5.7982001304626465, "learning_rate": 6.460674926415224e-05, "loss": 0.6529, "step": 26604 }, { "epoch": 1.802628904397317, "grad_norm": 5.980959415435791, "learning_rate": 6.460538024505442e-05, "loss": 0.7541, "step": 26605 }, { "epoch": 1.8026966596652891, "grad_norm": 5.760681629180908, "learning_rate": 6.460401122595661e-05, "loss": 0.6315, "step": 26606 }, { "epoch": 1.802764414933261, "grad_norm": 7.992841720581055, "learning_rate": 6.46026422068588e-05, "loss": 0.8285, "step": 26607 }, { "epoch": 1.802832170201233, "grad_norm": 5.729654788970947, "learning_rate": 6.460127318776097e-05, "loss": 0.6211, "step": 26608 }, { "epoch": 1.8028999254692053, "grad_norm": 4.4032368659973145, "learning_rate": 6.459990416866317e-05, "loss": 0.4703, "step": 26609 }, { "epoch": 1.8029676807371773, "grad_norm": 5.871094226837158, "learning_rate": 6.459853514956535e-05, "loss": 0.5748, "step": 26610 }, { "epoch": 1.8030354360051493, "grad_norm": 8.289984703063965, "learning_rate": 6.459716613046753e-05, "loss": 0.6571, "step": 26611 }, { "epoch": 1.8031031912731215, "grad_norm": 7.255340576171875, "learning_rate": 6.459579711136971e-05, "loss": 0.8978, "step": 26612 }, { "epoch": 1.8031709465410937, "grad_norm": 5.88706111907959, "learning_rate": 6.459442809227189e-05, "loss": 0.7288, "step": 26613 }, { "epoch": 1.8032387018090656, "grad_norm": 5.174073696136475, "learning_rate": 6.459305907317408e-05, "loss": 0.69, "step": 26614 }, { "epoch": 1.8033064570770376, "grad_norm": 5.952917575836182, "learning_rate": 6.459169005407626e-05, "loss": 0.4171, "step": 26615 }, { "epoch": 1.8033742123450098, "grad_norm": 5.535572528839111, "learning_rate": 6.459032103497844e-05, "loss": 0.704, "step": 26616 }, { "epoch": 1.803441967612982, "grad_norm": 5.8505048751831055, "learning_rate": 6.458895201588062e-05, "loss": 0.686, "step": 26617 }, { "epoch": 1.803509722880954, "grad_norm": 5.648918151855469, "learning_rate": 6.45875829967828e-05, "loss": 0.6585, "step": 26618 }, { "epoch": 1.803577478148926, "grad_norm": 4.893547534942627, "learning_rate": 6.4586213977685e-05, "loss": 0.6417, "step": 26619 }, { "epoch": 1.8036452334168982, "grad_norm": 7.191371440887451, "learning_rate": 6.458484495858718e-05, "loss": 0.7256, "step": 26620 }, { "epoch": 1.8037129886848704, "grad_norm": 5.298569679260254, "learning_rate": 6.458347593948936e-05, "loss": 0.6138, "step": 26621 }, { "epoch": 1.8037807439528424, "grad_norm": 5.8029890060424805, "learning_rate": 6.458210692039154e-05, "loss": 0.7385, "step": 26622 }, { "epoch": 1.8038484992208144, "grad_norm": 7.555717945098877, "learning_rate": 6.458073790129373e-05, "loss": 0.7266, "step": 26623 }, { "epoch": 1.8039162544887866, "grad_norm": 5.26461124420166, "learning_rate": 6.457936888219591e-05, "loss": 0.5604, "step": 26624 }, { "epoch": 1.8039840097567585, "grad_norm": 6.112181186676025, "learning_rate": 6.45779998630981e-05, "loss": 0.9723, "step": 26625 }, { "epoch": 1.8040517650247305, "grad_norm": 4.49717903137207, "learning_rate": 6.457663084400027e-05, "loss": 0.6352, "step": 26626 }, { "epoch": 1.8041195202927027, "grad_norm": 4.145341396331787, "learning_rate": 6.457526182490245e-05, "loss": 0.5797, "step": 26627 }, { "epoch": 1.804187275560675, "grad_norm": 6.019824504852295, "learning_rate": 6.457389280580465e-05, "loss": 0.835, "step": 26628 }, { "epoch": 1.804255030828647, "grad_norm": 7.168060302734375, "learning_rate": 6.457252378670683e-05, "loss": 0.6774, "step": 26629 }, { "epoch": 1.804322786096619, "grad_norm": 8.427329063415527, "learning_rate": 6.457115476760901e-05, "loss": 0.7754, "step": 26630 }, { "epoch": 1.804390541364591, "grad_norm": 4.158406734466553, "learning_rate": 6.456978574851119e-05, "loss": 0.6869, "step": 26631 }, { "epoch": 1.8044582966325633, "grad_norm": 5.0957865715026855, "learning_rate": 6.456841672941338e-05, "loss": 0.6993, "step": 26632 }, { "epoch": 1.8045260519005353, "grad_norm": 5.387447357177734, "learning_rate": 6.456704771031556e-05, "loss": 0.5744, "step": 26633 }, { "epoch": 1.8045938071685073, "grad_norm": 4.888663291931152, "learning_rate": 6.456567869121774e-05, "loss": 0.5801, "step": 26634 }, { "epoch": 1.8046615624364795, "grad_norm": 8.10501480102539, "learning_rate": 6.456430967211992e-05, "loss": 0.7303, "step": 26635 }, { "epoch": 1.8047293177044517, "grad_norm": 4.1412272453308105, "learning_rate": 6.45629406530221e-05, "loss": 0.4478, "step": 26636 }, { "epoch": 1.8047970729724236, "grad_norm": 6.040284156799316, "learning_rate": 6.45615716339243e-05, "loss": 0.8867, "step": 26637 }, { "epoch": 1.8048648282403956, "grad_norm": 4.134230613708496, "learning_rate": 6.456020261482648e-05, "loss": 0.5829, "step": 26638 }, { "epoch": 1.8049325835083678, "grad_norm": 7.536664962768555, "learning_rate": 6.455883359572866e-05, "loss": 0.7935, "step": 26639 }, { "epoch": 1.8050003387763398, "grad_norm": 4.033829212188721, "learning_rate": 6.455746457663084e-05, "loss": 0.5104, "step": 26640 }, { "epoch": 1.8050680940443118, "grad_norm": 6.248331546783447, "learning_rate": 6.455609555753303e-05, "loss": 0.5793, "step": 26641 }, { "epoch": 1.805135849312284, "grad_norm": 5.885889053344727, "learning_rate": 6.455472653843521e-05, "loss": 0.7101, "step": 26642 }, { "epoch": 1.8052036045802562, "grad_norm": 5.447513580322266, "learning_rate": 6.45533575193374e-05, "loss": 0.7314, "step": 26643 }, { "epoch": 1.8052713598482282, "grad_norm": 5.033900737762451, "learning_rate": 6.455198850023957e-05, "loss": 0.5788, "step": 26644 }, { "epoch": 1.8053391151162002, "grad_norm": 4.742642879486084, "learning_rate": 6.455061948114175e-05, "loss": 0.4979, "step": 26645 }, { "epoch": 1.8054068703841724, "grad_norm": 9.610888481140137, "learning_rate": 6.454925046204395e-05, "loss": 0.9125, "step": 26646 }, { "epoch": 1.8054746256521446, "grad_norm": 4.593472003936768, "learning_rate": 6.454788144294613e-05, "loss": 0.628, "step": 26647 }, { "epoch": 1.8055423809201165, "grad_norm": 4.266874313354492, "learning_rate": 6.454651242384831e-05, "loss": 0.5571, "step": 26648 }, { "epoch": 1.8056101361880885, "grad_norm": 5.551753044128418, "learning_rate": 6.454514340475049e-05, "loss": 0.6587, "step": 26649 }, { "epoch": 1.8056778914560607, "grad_norm": 5.672576427459717, "learning_rate": 6.454377438565268e-05, "loss": 0.8113, "step": 26650 }, { "epoch": 1.805745646724033, "grad_norm": 8.664983749389648, "learning_rate": 6.454240536655486e-05, "loss": 0.8978, "step": 26651 }, { "epoch": 1.805813401992005, "grad_norm": 6.065309524536133, "learning_rate": 6.454103634745704e-05, "loss": 0.7266, "step": 26652 }, { "epoch": 1.8058811572599769, "grad_norm": 5.543305397033691, "learning_rate": 6.453966732835924e-05, "loss": 0.7754, "step": 26653 }, { "epoch": 1.805948912527949, "grad_norm": 5.135695457458496, "learning_rate": 6.453829830926142e-05, "loss": 0.5851, "step": 26654 }, { "epoch": 1.8060166677959213, "grad_norm": 7.3984270095825195, "learning_rate": 6.45369292901636e-05, "loss": 0.5804, "step": 26655 }, { "epoch": 1.806084423063893, "grad_norm": 7.026412487030029, "learning_rate": 6.453556027106579e-05, "loss": 0.607, "step": 26656 }, { "epoch": 1.8061521783318653, "grad_norm": 4.901999473571777, "learning_rate": 6.453419125196797e-05, "loss": 0.803, "step": 26657 }, { "epoch": 1.8062199335998375, "grad_norm": 5.573629856109619, "learning_rate": 6.453282223287015e-05, "loss": 0.5523, "step": 26658 }, { "epoch": 1.8062876888678094, "grad_norm": 4.794769763946533, "learning_rate": 6.453145321377233e-05, "loss": 0.6039, "step": 26659 }, { "epoch": 1.8063554441357814, "grad_norm": 9.877415657043457, "learning_rate": 6.453008419467453e-05, "loss": 0.5238, "step": 26660 }, { "epoch": 1.8064231994037536, "grad_norm": 3.544771194458008, "learning_rate": 6.452871517557671e-05, "loss": 0.5056, "step": 26661 }, { "epoch": 1.8064909546717258, "grad_norm": 6.887268543243408, "learning_rate": 6.452734615647889e-05, "loss": 0.7541, "step": 26662 }, { "epoch": 1.8065587099396978, "grad_norm": 8.100629806518555, "learning_rate": 6.452597713738107e-05, "loss": 0.7298, "step": 26663 }, { "epoch": 1.8066264652076698, "grad_norm": 5.358435153961182, "learning_rate": 6.452460811828326e-05, "loss": 0.7197, "step": 26664 }, { "epoch": 1.806694220475642, "grad_norm": 7.067539691925049, "learning_rate": 6.452323909918544e-05, "loss": 0.5941, "step": 26665 }, { "epoch": 1.8067619757436142, "grad_norm": 8.059318542480469, "learning_rate": 6.452187008008762e-05, "loss": 0.7721, "step": 26666 }, { "epoch": 1.8068297310115862, "grad_norm": 7.265447616577148, "learning_rate": 6.45205010609898e-05, "loss": 0.7893, "step": 26667 }, { "epoch": 1.8068974862795582, "grad_norm": 9.578680038452148, "learning_rate": 6.451913204189198e-05, "loss": 0.4923, "step": 26668 }, { "epoch": 1.8069652415475304, "grad_norm": 4.649520397186279, "learning_rate": 6.451776302279418e-05, "loss": 0.4493, "step": 26669 }, { "epoch": 1.8070329968155026, "grad_norm": 6.034356594085693, "learning_rate": 6.451639400369636e-05, "loss": 0.7154, "step": 26670 }, { "epoch": 1.8071007520834745, "grad_norm": 4.412425518035889, "learning_rate": 6.451502498459854e-05, "loss": 0.5599, "step": 26671 }, { "epoch": 1.8071685073514465, "grad_norm": 8.468602180480957, "learning_rate": 6.451365596550072e-05, "loss": 0.9691, "step": 26672 }, { "epoch": 1.8072362626194187, "grad_norm": 6.271977424621582, "learning_rate": 6.45122869464029e-05, "loss": 0.668, "step": 26673 }, { "epoch": 1.8073040178873907, "grad_norm": 4.649438381195068, "learning_rate": 6.45109179273051e-05, "loss": 0.5779, "step": 26674 }, { "epoch": 1.8073717731553627, "grad_norm": 6.293813705444336, "learning_rate": 6.450954890820727e-05, "loss": 0.4399, "step": 26675 }, { "epoch": 1.8074395284233349, "grad_norm": 6.513540267944336, "learning_rate": 6.450817988910945e-05, "loss": 0.4249, "step": 26676 }, { "epoch": 1.807507283691307, "grad_norm": 7.556334495544434, "learning_rate": 6.450681087001163e-05, "loss": 0.529, "step": 26677 }, { "epoch": 1.807575038959279, "grad_norm": 6.386952877044678, "learning_rate": 6.450544185091383e-05, "loss": 0.5599, "step": 26678 }, { "epoch": 1.807642794227251, "grad_norm": 7.544146537780762, "learning_rate": 6.450407283181601e-05, "loss": 0.6528, "step": 26679 }, { "epoch": 1.8077105494952233, "grad_norm": 6.671665668487549, "learning_rate": 6.450270381271819e-05, "loss": 0.575, "step": 26680 }, { "epoch": 1.8077783047631955, "grad_norm": 5.898190975189209, "learning_rate": 6.450133479362037e-05, "loss": 0.8126, "step": 26681 }, { "epoch": 1.8078460600311674, "grad_norm": 6.859700679779053, "learning_rate": 6.449996577452255e-05, "loss": 0.5955, "step": 26682 }, { "epoch": 1.8079138152991394, "grad_norm": 5.492820739746094, "learning_rate": 6.449859675542474e-05, "loss": 0.7447, "step": 26683 }, { "epoch": 1.8079815705671116, "grad_norm": 6.010580539703369, "learning_rate": 6.449722773632692e-05, "loss": 0.6246, "step": 26684 }, { "epoch": 1.8080493258350838, "grad_norm": 5.697756767272949, "learning_rate": 6.44958587172291e-05, "loss": 0.5983, "step": 26685 }, { "epoch": 1.8081170811030558, "grad_norm": 7.863930702209473, "learning_rate": 6.449448969813128e-05, "loss": 0.7022, "step": 26686 }, { "epoch": 1.8081848363710278, "grad_norm": 6.813071250915527, "learning_rate": 6.449312067903348e-05, "loss": 0.7993, "step": 26687 }, { "epoch": 1.808252591639, "grad_norm": 5.41215705871582, "learning_rate": 6.449175165993566e-05, "loss": 0.7762, "step": 26688 }, { "epoch": 1.808320346906972, "grad_norm": 6.559473037719727, "learning_rate": 6.449038264083784e-05, "loss": 0.7103, "step": 26689 }, { "epoch": 1.808388102174944, "grad_norm": 4.254727363586426, "learning_rate": 6.448901362174002e-05, "loss": 0.7443, "step": 26690 }, { "epoch": 1.8084558574429161, "grad_norm": 8.600076675415039, "learning_rate": 6.44876446026422e-05, "loss": 0.4983, "step": 26691 }, { "epoch": 1.8085236127108884, "grad_norm": 6.5350165367126465, "learning_rate": 6.44862755835444e-05, "loss": 0.5312, "step": 26692 }, { "epoch": 1.8085913679788603, "grad_norm": 4.9250688552856445, "learning_rate": 6.448490656444657e-05, "loss": 0.6038, "step": 26693 }, { "epoch": 1.8086591232468323, "grad_norm": 4.169704914093018, "learning_rate": 6.448353754534875e-05, "loss": 0.5641, "step": 26694 }, { "epoch": 1.8087268785148045, "grad_norm": 5.04357385635376, "learning_rate": 6.448216852625093e-05, "loss": 0.5334, "step": 26695 }, { "epoch": 1.8087946337827767, "grad_norm": 7.931650161743164, "learning_rate": 6.448079950715313e-05, "loss": 0.6855, "step": 26696 }, { "epoch": 1.8088623890507487, "grad_norm": 5.08057165145874, "learning_rate": 6.447943048805531e-05, "loss": 0.4288, "step": 26697 }, { "epoch": 1.8089301443187207, "grad_norm": 7.3420796394348145, "learning_rate": 6.447806146895749e-05, "loss": 0.6827, "step": 26698 }, { "epoch": 1.8089978995866929, "grad_norm": 5.660425186157227, "learning_rate": 6.447669244985968e-05, "loss": 0.6113, "step": 26699 }, { "epoch": 1.809065654854665, "grad_norm": 5.526769161224365, "learning_rate": 6.447532343076186e-05, "loss": 0.699, "step": 26700 }, { "epoch": 1.809133410122637, "grad_norm": 8.094581604003906, "learning_rate": 6.447395441166404e-05, "loss": 0.5058, "step": 26701 }, { "epoch": 1.809201165390609, "grad_norm": 6.102320671081543, "learning_rate": 6.447258539256624e-05, "loss": 0.5671, "step": 26702 }, { "epoch": 1.8092689206585812, "grad_norm": 6.886892318725586, "learning_rate": 6.447121637346842e-05, "loss": 0.4643, "step": 26703 }, { "epoch": 1.8093366759265535, "grad_norm": 5.078798770904541, "learning_rate": 6.44698473543706e-05, "loss": 0.5002, "step": 26704 }, { "epoch": 1.8094044311945252, "grad_norm": 5.563132286071777, "learning_rate": 6.446847833527278e-05, "loss": 0.6681, "step": 26705 }, { "epoch": 1.8094721864624974, "grad_norm": 6.463866710662842, "learning_rate": 6.446710931617497e-05, "loss": 0.728, "step": 26706 }, { "epoch": 1.8095399417304696, "grad_norm": 4.308879852294922, "learning_rate": 6.446574029707715e-05, "loss": 0.4979, "step": 26707 }, { "epoch": 1.8096076969984416, "grad_norm": 7.237813472747803, "learning_rate": 6.446437127797933e-05, "loss": 0.4622, "step": 26708 }, { "epoch": 1.8096754522664136, "grad_norm": 4.842174053192139, "learning_rate": 6.446300225888151e-05, "loss": 0.7286, "step": 26709 }, { "epoch": 1.8097432075343858, "grad_norm": 6.156591892242432, "learning_rate": 6.446163323978371e-05, "loss": 0.7151, "step": 26710 }, { "epoch": 1.809810962802358, "grad_norm": 9.411527633666992, "learning_rate": 6.446026422068589e-05, "loss": 0.7027, "step": 26711 }, { "epoch": 1.80987871807033, "grad_norm": 5.296900749206543, "learning_rate": 6.445889520158807e-05, "loss": 0.5842, "step": 26712 }, { "epoch": 1.809946473338302, "grad_norm": 5.718865871429443, "learning_rate": 6.445752618249025e-05, "loss": 0.5461, "step": 26713 }, { "epoch": 1.8100142286062741, "grad_norm": 5.576772212982178, "learning_rate": 6.445615716339243e-05, "loss": 0.5557, "step": 26714 }, { "epoch": 1.8100819838742463, "grad_norm": 4.056344509124756, "learning_rate": 6.445478814429462e-05, "loss": 0.4581, "step": 26715 }, { "epoch": 1.8101497391422183, "grad_norm": 4.021970748901367, "learning_rate": 6.44534191251968e-05, "loss": 0.4897, "step": 26716 }, { "epoch": 1.8102174944101903, "grad_norm": 5.890685081481934, "learning_rate": 6.445205010609898e-05, "loss": 0.7064, "step": 26717 }, { "epoch": 1.8102852496781625, "grad_norm": 8.370976448059082, "learning_rate": 6.445068108700116e-05, "loss": 0.8584, "step": 26718 }, { "epoch": 1.8103530049461347, "grad_norm": 7.068817615509033, "learning_rate": 6.444931206790336e-05, "loss": 0.698, "step": 26719 }, { "epoch": 1.8104207602141067, "grad_norm": 7.319885730743408, "learning_rate": 6.444794304880554e-05, "loss": 0.6886, "step": 26720 }, { "epoch": 1.8104885154820787, "grad_norm": 5.1313700675964355, "learning_rate": 6.444657402970772e-05, "loss": 0.6442, "step": 26721 }, { "epoch": 1.8105562707500509, "grad_norm": 5.411067008972168, "learning_rate": 6.44452050106099e-05, "loss": 0.533, "step": 26722 }, { "epoch": 1.8106240260180229, "grad_norm": 4.981649398803711, "learning_rate": 6.444383599151208e-05, "loss": 0.7701, "step": 26723 }, { "epoch": 1.8106917812859948, "grad_norm": 6.775236129760742, "learning_rate": 6.444246697241427e-05, "loss": 0.9041, "step": 26724 }, { "epoch": 1.810759536553967, "grad_norm": 4.964432716369629, "learning_rate": 6.444109795331645e-05, "loss": 0.7585, "step": 26725 }, { "epoch": 1.8108272918219392, "grad_norm": 4.480970859527588, "learning_rate": 6.443972893421863e-05, "loss": 0.5431, "step": 26726 }, { "epoch": 1.8108950470899112, "grad_norm": 6.073116779327393, "learning_rate": 6.443835991512081e-05, "loss": 0.7699, "step": 26727 }, { "epoch": 1.8109628023578832, "grad_norm": 5.585526943206787, "learning_rate": 6.4436990896023e-05, "loss": 0.665, "step": 26728 }, { "epoch": 1.8110305576258554, "grad_norm": 5.802382946014404, "learning_rate": 6.443562187692519e-05, "loss": 0.6967, "step": 26729 }, { "epoch": 1.8110983128938276, "grad_norm": 6.993733882904053, "learning_rate": 6.443425285782737e-05, "loss": 0.7171, "step": 26730 }, { "epoch": 1.8111660681617996, "grad_norm": 4.545501232147217, "learning_rate": 6.443288383872955e-05, "loss": 0.658, "step": 26731 }, { "epoch": 1.8112338234297716, "grad_norm": 5.580200672149658, "learning_rate": 6.443151481963173e-05, "loss": 0.6378, "step": 26732 }, { "epoch": 1.8113015786977438, "grad_norm": 4.307887077331543, "learning_rate": 6.443014580053392e-05, "loss": 0.5219, "step": 26733 }, { "epoch": 1.811369333965716, "grad_norm": 4.933224678039551, "learning_rate": 6.44287767814361e-05, "loss": 0.5946, "step": 26734 }, { "epoch": 1.811437089233688, "grad_norm": 6.994917869567871, "learning_rate": 6.442740776233828e-05, "loss": 0.5942, "step": 26735 }, { "epoch": 1.81150484450166, "grad_norm": 4.319359302520752, "learning_rate": 6.442603874324046e-05, "loss": 0.6546, "step": 26736 }, { "epoch": 1.8115725997696321, "grad_norm": 5.631087303161621, "learning_rate": 6.442466972414264e-05, "loss": 0.5247, "step": 26737 }, { "epoch": 1.8116403550376041, "grad_norm": 6.209167003631592, "learning_rate": 6.442330070504484e-05, "loss": 0.6188, "step": 26738 }, { "epoch": 1.811708110305576, "grad_norm": 5.736749649047852, "learning_rate": 6.442193168594702e-05, "loss": 0.8434, "step": 26739 }, { "epoch": 1.8117758655735483, "grad_norm": 6.1277666091918945, "learning_rate": 6.44205626668492e-05, "loss": 0.7997, "step": 26740 }, { "epoch": 1.8118436208415205, "grad_norm": 4.894939422607422, "learning_rate": 6.441919364775138e-05, "loss": 0.5566, "step": 26741 }, { "epoch": 1.8119113761094925, "grad_norm": 9.176277160644531, "learning_rate": 6.441782462865357e-05, "loss": 0.8058, "step": 26742 }, { "epoch": 1.8119791313774645, "grad_norm": 5.639243125915527, "learning_rate": 6.441645560955575e-05, "loss": 0.6999, "step": 26743 }, { "epoch": 1.8120468866454367, "grad_norm": 4.586820602416992, "learning_rate": 6.441508659045793e-05, "loss": 0.5781, "step": 26744 }, { "epoch": 1.8121146419134089, "grad_norm": 6.362236022949219, "learning_rate": 6.441371757136013e-05, "loss": 0.5678, "step": 26745 }, { "epoch": 1.8121823971813809, "grad_norm": 10.11884593963623, "learning_rate": 6.441234855226231e-05, "loss": 0.5483, "step": 26746 }, { "epoch": 1.8122501524493528, "grad_norm": 7.34290075302124, "learning_rate": 6.441097953316449e-05, "loss": 0.5388, "step": 26747 }, { "epoch": 1.812317907717325, "grad_norm": 6.0892014503479, "learning_rate": 6.440961051406668e-05, "loss": 0.6437, "step": 26748 }, { "epoch": 1.8123856629852972, "grad_norm": 9.570404052734375, "learning_rate": 6.440824149496886e-05, "loss": 0.7032, "step": 26749 }, { "epoch": 1.8124534182532692, "grad_norm": 7.4562788009643555, "learning_rate": 6.440687247587104e-05, "loss": 0.7963, "step": 26750 }, { "epoch": 1.8125211735212412, "grad_norm": 10.692671775817871, "learning_rate": 6.440550345677322e-05, "loss": 0.6065, "step": 26751 }, { "epoch": 1.8125889287892134, "grad_norm": 6.532835483551025, "learning_rate": 6.440413443767542e-05, "loss": 0.6824, "step": 26752 }, { "epoch": 1.8126566840571856, "grad_norm": 6.075977802276611, "learning_rate": 6.44027654185776e-05, "loss": 0.6521, "step": 26753 }, { "epoch": 1.8127244393251574, "grad_norm": 6.9657883644104, "learning_rate": 6.440139639947978e-05, "loss": 0.7387, "step": 26754 }, { "epoch": 1.8127921945931296, "grad_norm": 8.127182006835938, "learning_rate": 6.440002738038196e-05, "loss": 0.7441, "step": 26755 }, { "epoch": 1.8128599498611018, "grad_norm": 4.852051734924316, "learning_rate": 6.439865836128415e-05, "loss": 0.6406, "step": 26756 }, { "epoch": 1.8129277051290738, "grad_norm": 4.3676228523254395, "learning_rate": 6.439728934218633e-05, "loss": 0.5068, "step": 26757 }, { "epoch": 1.8129954603970457, "grad_norm": 5.912296295166016, "learning_rate": 6.439592032308851e-05, "loss": 0.6796, "step": 26758 }, { "epoch": 1.813063215665018, "grad_norm": 7.6113200187683105, "learning_rate": 6.43945513039907e-05, "loss": 0.5794, "step": 26759 }, { "epoch": 1.8131309709329901, "grad_norm": 5.5016350746154785, "learning_rate": 6.439318228489287e-05, "loss": 0.636, "step": 26760 }, { "epoch": 1.8131987262009621, "grad_norm": 5.8604326248168945, "learning_rate": 6.439181326579507e-05, "loss": 0.7049, "step": 26761 }, { "epoch": 1.813266481468934, "grad_norm": 5.603643894195557, "learning_rate": 6.439044424669725e-05, "loss": 0.6631, "step": 26762 }, { "epoch": 1.8133342367369063, "grad_norm": 5.980537414550781, "learning_rate": 6.438907522759943e-05, "loss": 0.5581, "step": 26763 }, { "epoch": 1.8134019920048785, "grad_norm": 5.573897361755371, "learning_rate": 6.438770620850161e-05, "loss": 0.6234, "step": 26764 }, { "epoch": 1.8134697472728505, "grad_norm": 6.370614528656006, "learning_rate": 6.43863371894038e-05, "loss": 0.8828, "step": 26765 }, { "epoch": 1.8135375025408225, "grad_norm": 10.662313461303711, "learning_rate": 6.438496817030598e-05, "loss": 0.5375, "step": 26766 }, { "epoch": 1.8136052578087947, "grad_norm": 9.693385124206543, "learning_rate": 6.438359915120816e-05, "loss": 0.5614, "step": 26767 }, { "epoch": 1.8136730130767669, "grad_norm": 8.361624717712402, "learning_rate": 6.438223013211034e-05, "loss": 0.8321, "step": 26768 }, { "epoch": 1.8137407683447389, "grad_norm": 6.96463680267334, "learning_rate": 6.438086111301252e-05, "loss": 0.7869, "step": 26769 }, { "epoch": 1.8138085236127108, "grad_norm": 7.088846206665039, "learning_rate": 6.437949209391472e-05, "loss": 0.7808, "step": 26770 }, { "epoch": 1.813876278880683, "grad_norm": 6.646789073944092, "learning_rate": 6.43781230748169e-05, "loss": 0.7288, "step": 26771 }, { "epoch": 1.813944034148655, "grad_norm": 6.715269088745117, "learning_rate": 6.437675405571908e-05, "loss": 0.8429, "step": 26772 }, { "epoch": 1.814011789416627, "grad_norm": 5.272633075714111, "learning_rate": 6.437538503662126e-05, "loss": 0.5514, "step": 26773 }, { "epoch": 1.8140795446845992, "grad_norm": 6.690631866455078, "learning_rate": 6.437401601752345e-05, "loss": 0.73, "step": 26774 }, { "epoch": 1.8141472999525714, "grad_norm": 6.6043853759765625, "learning_rate": 6.437264699842563e-05, "loss": 0.8896, "step": 26775 }, { "epoch": 1.8142150552205434, "grad_norm": 7.21596097946167, "learning_rate": 6.437127797932781e-05, "loss": 0.6688, "step": 26776 }, { "epoch": 1.8142828104885154, "grad_norm": 8.567174911499023, "learning_rate": 6.436990896023e-05, "loss": 0.6049, "step": 26777 }, { "epoch": 1.8143505657564876, "grad_norm": 7.774494647979736, "learning_rate": 6.436853994113217e-05, "loss": 0.5412, "step": 26778 }, { "epoch": 1.8144183210244598, "grad_norm": 6.627426624298096, "learning_rate": 6.436717092203437e-05, "loss": 0.5851, "step": 26779 }, { "epoch": 1.8144860762924317, "grad_norm": 5.826731204986572, "learning_rate": 6.436580190293655e-05, "loss": 0.7251, "step": 26780 }, { "epoch": 1.8145538315604037, "grad_norm": 6.535499572753906, "learning_rate": 6.436443288383873e-05, "loss": 0.5091, "step": 26781 }, { "epoch": 1.814621586828376, "grad_norm": 6.5149641036987305, "learning_rate": 6.436306386474091e-05, "loss": 0.6396, "step": 26782 }, { "epoch": 1.8146893420963481, "grad_norm": 5.846710205078125, "learning_rate": 6.436169484564309e-05, "loss": 0.7927, "step": 26783 }, { "epoch": 1.8147570973643201, "grad_norm": 4.809659957885742, "learning_rate": 6.436032582654528e-05, "loss": 0.7074, "step": 26784 }, { "epoch": 1.814824852632292, "grad_norm": 5.15207576751709, "learning_rate": 6.435895680744746e-05, "loss": 0.7254, "step": 26785 }, { "epoch": 1.8148926079002643, "grad_norm": 5.333955764770508, "learning_rate": 6.435758778834964e-05, "loss": 0.5817, "step": 26786 }, { "epoch": 1.8149603631682363, "grad_norm": 7.035273551940918, "learning_rate": 6.435621876925182e-05, "loss": 0.7293, "step": 26787 }, { "epoch": 1.8150281184362083, "grad_norm": 6.120570659637451, "learning_rate": 6.435484975015402e-05, "loss": 0.5948, "step": 26788 }, { "epoch": 1.8150958737041805, "grad_norm": 6.132373809814453, "learning_rate": 6.43534807310562e-05, "loss": 0.6462, "step": 26789 }, { "epoch": 1.8151636289721527, "grad_norm": 6.487470626831055, "learning_rate": 6.435211171195838e-05, "loss": 0.7499, "step": 26790 }, { "epoch": 1.8152313842401246, "grad_norm": 5.5331549644470215, "learning_rate": 6.435074269286057e-05, "loss": 0.7447, "step": 26791 }, { "epoch": 1.8152991395080966, "grad_norm": 6.806013107299805, "learning_rate": 6.434937367376275e-05, "loss": 0.708, "step": 26792 }, { "epoch": 1.8153668947760688, "grad_norm": 5.616776943206787, "learning_rate": 6.434800465466493e-05, "loss": 0.9344, "step": 26793 }, { "epoch": 1.815434650044041, "grad_norm": 4.844578742980957, "learning_rate": 6.434663563556713e-05, "loss": 0.6033, "step": 26794 }, { "epoch": 1.815502405312013, "grad_norm": 4.1602559089660645, "learning_rate": 6.434526661646931e-05, "loss": 0.6561, "step": 26795 }, { "epoch": 1.815570160579985, "grad_norm": 5.875583171844482, "learning_rate": 6.434389759737149e-05, "loss": 0.6788, "step": 26796 }, { "epoch": 1.8156379158479572, "grad_norm": 6.238223075866699, "learning_rate": 6.434252857827368e-05, "loss": 0.5923, "step": 26797 }, { "epoch": 1.8157056711159294, "grad_norm": 7.049952983856201, "learning_rate": 6.434115955917586e-05, "loss": 0.7706, "step": 26798 }, { "epoch": 1.8157734263839014, "grad_norm": 5.727673530578613, "learning_rate": 6.433979054007804e-05, "loss": 0.6504, "step": 26799 }, { "epoch": 1.8158411816518734, "grad_norm": 4.021955966949463, "learning_rate": 6.433842152098022e-05, "loss": 0.6595, "step": 26800 }, { "epoch": 1.8159089369198456, "grad_norm": 6.199460506439209, "learning_rate": 6.43370525018824e-05, "loss": 0.7684, "step": 26801 }, { "epoch": 1.8159766921878178, "grad_norm": 7.943406105041504, "learning_rate": 6.43356834827846e-05, "loss": 0.5824, "step": 26802 }, { "epoch": 1.8160444474557895, "grad_norm": 4.654277801513672, "learning_rate": 6.433431446368678e-05, "loss": 0.845, "step": 26803 }, { "epoch": 1.8161122027237617, "grad_norm": 6.458272457122803, "learning_rate": 6.433294544458896e-05, "loss": 0.7574, "step": 26804 }, { "epoch": 1.816179957991734, "grad_norm": 4.04094934463501, "learning_rate": 6.433157642549114e-05, "loss": 0.3929, "step": 26805 }, { "epoch": 1.816247713259706, "grad_norm": 6.782951831817627, "learning_rate": 6.433020740639332e-05, "loss": 0.7204, "step": 26806 }, { "epoch": 1.816315468527678, "grad_norm": 5.652131080627441, "learning_rate": 6.432883838729551e-05, "loss": 0.589, "step": 26807 }, { "epoch": 1.81638322379565, "grad_norm": 6.933178901672363, "learning_rate": 6.432746936819769e-05, "loss": 0.6676, "step": 26808 }, { "epoch": 1.8164509790636223, "grad_norm": 6.712334156036377, "learning_rate": 6.432610034909987e-05, "loss": 0.6327, "step": 26809 }, { "epoch": 1.8165187343315943, "grad_norm": 4.471114635467529, "learning_rate": 6.432473133000205e-05, "loss": 0.5463, "step": 26810 }, { "epoch": 1.8165864895995663, "grad_norm": 5.284713268280029, "learning_rate": 6.432336231090425e-05, "loss": 0.6851, "step": 26811 }, { "epoch": 1.8166542448675385, "grad_norm": 5.724893093109131, "learning_rate": 6.432199329180643e-05, "loss": 0.6955, "step": 26812 }, { "epoch": 1.8167220001355107, "grad_norm": 5.086476802825928, "learning_rate": 6.432062427270861e-05, "loss": 0.651, "step": 26813 }, { "epoch": 1.8167897554034826, "grad_norm": 4.483304500579834, "learning_rate": 6.431925525361079e-05, "loss": 0.4861, "step": 26814 }, { "epoch": 1.8168575106714546, "grad_norm": 4.835921764373779, "learning_rate": 6.431788623451297e-05, "loss": 0.5256, "step": 26815 }, { "epoch": 1.8169252659394268, "grad_norm": 5.97222900390625, "learning_rate": 6.431651721541516e-05, "loss": 0.6655, "step": 26816 }, { "epoch": 1.816993021207399, "grad_norm": 8.557904243469238, "learning_rate": 6.431514819631734e-05, "loss": 0.7602, "step": 26817 }, { "epoch": 1.817060776475371, "grad_norm": 9.205791473388672, "learning_rate": 6.431377917721952e-05, "loss": 0.8115, "step": 26818 }, { "epoch": 1.817128531743343, "grad_norm": 6.321309566497803, "learning_rate": 6.43124101581217e-05, "loss": 0.4963, "step": 26819 }, { "epoch": 1.8171962870113152, "grad_norm": 5.1774163246154785, "learning_rate": 6.43110411390239e-05, "loss": 0.5835, "step": 26820 }, { "epoch": 1.8172640422792872, "grad_norm": 7.056092739105225, "learning_rate": 6.430967211992608e-05, "loss": 1.1252, "step": 26821 }, { "epoch": 1.8173317975472592, "grad_norm": 5.346447944641113, "learning_rate": 6.430830310082826e-05, "loss": 0.7471, "step": 26822 }, { "epoch": 1.8173995528152314, "grad_norm": 6.9109015464782715, "learning_rate": 6.430693408173044e-05, "loss": 0.8658, "step": 26823 }, { "epoch": 1.8174673080832036, "grad_norm": 5.000592231750488, "learning_rate": 6.430556506263262e-05, "loss": 0.63, "step": 26824 }, { "epoch": 1.8175350633511755, "grad_norm": 5.131752014160156, "learning_rate": 6.430419604353481e-05, "loss": 0.7951, "step": 26825 }, { "epoch": 1.8176028186191475, "grad_norm": 5.373676776885986, "learning_rate": 6.430282702443699e-05, "loss": 0.5351, "step": 26826 }, { "epoch": 1.8176705738871197, "grad_norm": 5.54384708404541, "learning_rate": 6.430145800533917e-05, "loss": 0.7754, "step": 26827 }, { "epoch": 1.817738329155092, "grad_norm": 5.991764545440674, "learning_rate": 6.430008898624135e-05, "loss": 0.6369, "step": 26828 }, { "epoch": 1.817806084423064, "grad_norm": 6.774628639221191, "learning_rate": 6.429871996714353e-05, "loss": 0.6626, "step": 26829 }, { "epoch": 1.8178738396910359, "grad_norm": 6.1143479347229, "learning_rate": 6.429735094804573e-05, "loss": 0.5739, "step": 26830 }, { "epoch": 1.817941594959008, "grad_norm": 5.588479518890381, "learning_rate": 6.429598192894791e-05, "loss": 0.9135, "step": 26831 }, { "epoch": 1.8180093502269803, "grad_norm": 4.15416145324707, "learning_rate": 6.429461290985009e-05, "loss": 0.507, "step": 26832 }, { "epoch": 1.8180771054949523, "grad_norm": 4.868810176849365, "learning_rate": 6.429324389075227e-05, "loss": 0.7227, "step": 26833 }, { "epoch": 1.8181448607629243, "grad_norm": 4.594121932983398, "learning_rate": 6.429187487165446e-05, "loss": 0.7051, "step": 26834 }, { "epoch": 1.8182126160308965, "grad_norm": 8.40695858001709, "learning_rate": 6.429050585255664e-05, "loss": 0.7387, "step": 26835 }, { "epoch": 1.8182803712988684, "grad_norm": 6.064061164855957, "learning_rate": 6.428913683345882e-05, "loss": 0.821, "step": 26836 }, { "epoch": 1.8183481265668404, "grad_norm": 5.062285900115967, "learning_rate": 6.428776781436102e-05, "loss": 0.7031, "step": 26837 }, { "epoch": 1.8184158818348126, "grad_norm": 5.871172904968262, "learning_rate": 6.42863987952632e-05, "loss": 0.7735, "step": 26838 }, { "epoch": 1.8184836371027848, "grad_norm": 4.496103286743164, "learning_rate": 6.428502977616538e-05, "loss": 0.5738, "step": 26839 }, { "epoch": 1.8185513923707568, "grad_norm": 5.968371391296387, "learning_rate": 6.428366075706757e-05, "loss": 0.7147, "step": 26840 }, { "epoch": 1.8186191476387288, "grad_norm": 7.5802788734436035, "learning_rate": 6.428229173796975e-05, "loss": 0.5577, "step": 26841 }, { "epoch": 1.818686902906701, "grad_norm": 4.212392807006836, "learning_rate": 6.428092271887193e-05, "loss": 0.5768, "step": 26842 }, { "epoch": 1.8187546581746732, "grad_norm": 5.193695545196533, "learning_rate": 6.427955369977413e-05, "loss": 0.5791, "step": 26843 }, { "epoch": 1.8188224134426452, "grad_norm": 5.580350875854492, "learning_rate": 6.427818468067631e-05, "loss": 0.6195, "step": 26844 }, { "epoch": 1.8188901687106172, "grad_norm": 4.380248546600342, "learning_rate": 6.427681566157849e-05, "loss": 0.6944, "step": 26845 }, { "epoch": 1.8189579239785894, "grad_norm": 4.986954689025879, "learning_rate": 6.427544664248067e-05, "loss": 0.6336, "step": 26846 }, { "epoch": 1.8190256792465616, "grad_norm": 4.710751056671143, "learning_rate": 6.427407762338285e-05, "loss": 0.5652, "step": 26847 }, { "epoch": 1.8190934345145335, "grad_norm": 8.558531761169434, "learning_rate": 6.427270860428504e-05, "loss": 0.4399, "step": 26848 }, { "epoch": 1.8191611897825055, "grad_norm": 6.772984504699707, "learning_rate": 6.427133958518722e-05, "loss": 0.798, "step": 26849 }, { "epoch": 1.8192289450504777, "grad_norm": 4.505115509033203, "learning_rate": 6.42699705660894e-05, "loss": 0.4706, "step": 26850 }, { "epoch": 1.81929670031845, "grad_norm": 6.115203380584717, "learning_rate": 6.426860154699158e-05, "loss": 0.6037, "step": 26851 }, { "epoch": 1.8193644555864217, "grad_norm": 5.214086055755615, "learning_rate": 6.426723252789378e-05, "loss": 0.7594, "step": 26852 }, { "epoch": 1.8194322108543939, "grad_norm": 5.495413780212402, "learning_rate": 6.426586350879596e-05, "loss": 0.6066, "step": 26853 }, { "epoch": 1.819499966122366, "grad_norm": 5.874922752380371, "learning_rate": 6.426449448969814e-05, "loss": 0.6537, "step": 26854 }, { "epoch": 1.819567721390338, "grad_norm": 5.272257328033447, "learning_rate": 6.426312547060032e-05, "loss": 0.5504, "step": 26855 }, { "epoch": 1.81963547665831, "grad_norm": 5.65789270401001, "learning_rate": 6.42617564515025e-05, "loss": 0.8557, "step": 26856 }, { "epoch": 1.8197032319262822, "grad_norm": 5.834033489227295, "learning_rate": 6.426038743240469e-05, "loss": 0.4307, "step": 26857 }, { "epoch": 1.8197709871942545, "grad_norm": 6.105058193206787, "learning_rate": 6.425901841330687e-05, "loss": 0.7788, "step": 26858 }, { "epoch": 1.8198387424622264, "grad_norm": 5.53648567199707, "learning_rate": 6.425764939420905e-05, "loss": 0.6943, "step": 26859 }, { "epoch": 1.8199064977301984, "grad_norm": 9.916837692260742, "learning_rate": 6.425628037511123e-05, "loss": 0.6313, "step": 26860 }, { "epoch": 1.8199742529981706, "grad_norm": 6.8560638427734375, "learning_rate": 6.425491135601341e-05, "loss": 0.6722, "step": 26861 }, { "epoch": 1.8200420082661428, "grad_norm": 4.567662715911865, "learning_rate": 6.425354233691561e-05, "loss": 0.5911, "step": 26862 }, { "epoch": 1.8201097635341148, "grad_norm": 5.696816444396973, "learning_rate": 6.425217331781779e-05, "loss": 0.6058, "step": 26863 }, { "epoch": 1.8201775188020868, "grad_norm": 5.012820720672607, "learning_rate": 6.425080429871997e-05, "loss": 0.5629, "step": 26864 }, { "epoch": 1.820245274070059, "grad_norm": 6.324100494384766, "learning_rate": 6.424943527962215e-05, "loss": 0.4665, "step": 26865 }, { "epoch": 1.8203130293380312, "grad_norm": 4.943985462188721, "learning_rate": 6.424806626052434e-05, "loss": 0.5342, "step": 26866 }, { "epoch": 1.8203807846060032, "grad_norm": 5.81069803237915, "learning_rate": 6.424669724142652e-05, "loss": 0.744, "step": 26867 }, { "epoch": 1.8204485398739751, "grad_norm": 8.049765586853027, "learning_rate": 6.42453282223287e-05, "loss": 0.5717, "step": 26868 }, { "epoch": 1.8205162951419473, "grad_norm": 8.365617752075195, "learning_rate": 6.424395920323088e-05, "loss": 0.6421, "step": 26869 }, { "epoch": 1.8205840504099193, "grad_norm": 5.931880950927734, "learning_rate": 6.424259018413306e-05, "loss": 0.644, "step": 26870 }, { "epoch": 1.8206518056778913, "grad_norm": 5.31236457824707, "learning_rate": 6.424122116503526e-05, "loss": 0.6598, "step": 26871 }, { "epoch": 1.8207195609458635, "grad_norm": 4.936864376068115, "learning_rate": 6.423985214593744e-05, "loss": 0.6771, "step": 26872 }, { "epoch": 1.8207873162138357, "grad_norm": 4.9129509925842285, "learning_rate": 6.423848312683962e-05, "loss": 0.6896, "step": 26873 }, { "epoch": 1.8208550714818077, "grad_norm": 5.862224578857422, "learning_rate": 6.42371141077418e-05, "loss": 0.7889, "step": 26874 }, { "epoch": 1.8209228267497797, "grad_norm": 8.524252891540527, "learning_rate": 6.423574508864399e-05, "loss": 0.6247, "step": 26875 }, { "epoch": 1.8209905820177519, "grad_norm": 8.955757141113281, "learning_rate": 6.423437606954617e-05, "loss": 0.8649, "step": 26876 }, { "epoch": 1.821058337285724, "grad_norm": 5.182361602783203, "learning_rate": 6.423300705044835e-05, "loss": 0.4863, "step": 26877 }, { "epoch": 1.821126092553696, "grad_norm": 8.005090713500977, "learning_rate": 6.423163803135053e-05, "loss": 0.8362, "step": 26878 }, { "epoch": 1.821193847821668, "grad_norm": 8.498146057128906, "learning_rate": 6.423026901225271e-05, "loss": 0.4503, "step": 26879 }, { "epoch": 1.8212616030896402, "grad_norm": 5.614807605743408, "learning_rate": 6.422889999315491e-05, "loss": 0.6538, "step": 26880 }, { "epoch": 1.8213293583576124, "grad_norm": 7.498589038848877, "learning_rate": 6.422753097405709e-05, "loss": 0.7441, "step": 26881 }, { "epoch": 1.8213971136255844, "grad_norm": 4.812827110290527, "learning_rate": 6.422616195495927e-05, "loss": 0.7186, "step": 26882 }, { "epoch": 1.8214648688935564, "grad_norm": 9.303362846374512, "learning_rate": 6.422479293586146e-05, "loss": 0.6587, "step": 26883 }, { "epoch": 1.8215326241615286, "grad_norm": 4.434160232543945, "learning_rate": 6.422342391676364e-05, "loss": 0.6887, "step": 26884 }, { "epoch": 1.8216003794295006, "grad_norm": 6.16200590133667, "learning_rate": 6.422205489766582e-05, "loss": 0.6104, "step": 26885 }, { "epoch": 1.8216681346974726, "grad_norm": 6.920750141143799, "learning_rate": 6.422068587856802e-05, "loss": 0.5485, "step": 26886 }, { "epoch": 1.8217358899654448, "grad_norm": 7.931760787963867, "learning_rate": 6.42193168594702e-05, "loss": 0.7042, "step": 26887 }, { "epoch": 1.821803645233417, "grad_norm": 6.881309986114502, "learning_rate": 6.421794784037238e-05, "loss": 0.7257, "step": 26888 }, { "epoch": 1.821871400501389, "grad_norm": 5.458394527435303, "learning_rate": 6.421657882127457e-05, "loss": 0.6933, "step": 26889 }, { "epoch": 1.821939155769361, "grad_norm": 5.543918609619141, "learning_rate": 6.421520980217675e-05, "loss": 0.6925, "step": 26890 }, { "epoch": 1.8220069110373331, "grad_norm": 7.882980823516846, "learning_rate": 6.421384078307893e-05, "loss": 0.6895, "step": 26891 }, { "epoch": 1.8220746663053053, "grad_norm": 5.929370880126953, "learning_rate": 6.421247176398111e-05, "loss": 0.5887, "step": 26892 }, { "epoch": 1.8221424215732773, "grad_norm": 8.399518013000488, "learning_rate": 6.421110274488329e-05, "loss": 0.6868, "step": 26893 }, { "epoch": 1.8222101768412493, "grad_norm": 5.345746040344238, "learning_rate": 6.420973372578549e-05, "loss": 0.6598, "step": 26894 }, { "epoch": 1.8222779321092215, "grad_norm": 5.6372880935668945, "learning_rate": 6.420836470668767e-05, "loss": 0.7604, "step": 26895 }, { "epoch": 1.8223456873771937, "grad_norm": 8.29430866241455, "learning_rate": 6.420699568758985e-05, "loss": 0.8143, "step": 26896 }, { "epoch": 1.8224134426451657, "grad_norm": 5.910583972930908, "learning_rate": 6.420562666849203e-05, "loss": 0.7133, "step": 26897 }, { "epoch": 1.8224811979131377, "grad_norm": 7.054952621459961, "learning_rate": 6.420425764939422e-05, "loss": 0.6232, "step": 26898 }, { "epoch": 1.8225489531811099, "grad_norm": 8.68067741394043, "learning_rate": 6.42028886302964e-05, "loss": 0.6582, "step": 26899 }, { "epoch": 1.822616708449082, "grad_norm": 5.167630195617676, "learning_rate": 6.420151961119858e-05, "loss": 0.5518, "step": 26900 }, { "epoch": 1.8226844637170538, "grad_norm": 9.366568565368652, "learning_rate": 6.420015059210076e-05, "loss": 0.4744, "step": 26901 }, { "epoch": 1.822752218985026, "grad_norm": 8.826454162597656, "learning_rate": 6.419878157300294e-05, "loss": 0.7846, "step": 26902 }, { "epoch": 1.8228199742529982, "grad_norm": 4.773290634155273, "learning_rate": 6.419741255390514e-05, "loss": 0.5836, "step": 26903 }, { "epoch": 1.8228877295209702, "grad_norm": 5.867455959320068, "learning_rate": 6.419604353480732e-05, "loss": 0.7549, "step": 26904 }, { "epoch": 1.8229554847889422, "grad_norm": 4.339199066162109, "learning_rate": 6.41946745157095e-05, "loss": 0.585, "step": 26905 }, { "epoch": 1.8230232400569144, "grad_norm": 10.257946968078613, "learning_rate": 6.419330549661168e-05, "loss": 0.6898, "step": 26906 }, { "epoch": 1.8230909953248866, "grad_norm": 6.219809055328369, "learning_rate": 6.419193647751387e-05, "loss": 0.6128, "step": 26907 }, { "epoch": 1.8231587505928586, "grad_norm": 7.880582809448242, "learning_rate": 6.419056745841605e-05, "loss": 0.5522, "step": 26908 }, { "epoch": 1.8232265058608306, "grad_norm": 6.833552837371826, "learning_rate": 6.418919843931823e-05, "loss": 0.6702, "step": 26909 }, { "epoch": 1.8232942611288028, "grad_norm": 5.3744988441467285, "learning_rate": 6.418782942022041e-05, "loss": 0.6979, "step": 26910 }, { "epoch": 1.823362016396775, "grad_norm": 8.949766159057617, "learning_rate": 6.418646040112259e-05, "loss": 0.543, "step": 26911 }, { "epoch": 1.823429771664747, "grad_norm": 4.5017852783203125, "learning_rate": 6.418509138202479e-05, "loss": 0.5045, "step": 26912 }, { "epoch": 1.823497526932719, "grad_norm": 5.969408988952637, "learning_rate": 6.418372236292697e-05, "loss": 0.7842, "step": 26913 }, { "epoch": 1.8235652822006911, "grad_norm": 6.075934410095215, "learning_rate": 6.418235334382915e-05, "loss": 0.6815, "step": 26914 }, { "epoch": 1.8236330374686633, "grad_norm": 5.513145923614502, "learning_rate": 6.418098432473133e-05, "loss": 0.8548, "step": 26915 }, { "epoch": 1.8237007927366353, "grad_norm": 5.735189914703369, "learning_rate": 6.417961530563351e-05, "loss": 0.6377, "step": 26916 }, { "epoch": 1.8237685480046073, "grad_norm": 4.3350510597229, "learning_rate": 6.41782462865357e-05, "loss": 0.657, "step": 26917 }, { "epoch": 1.8238363032725795, "grad_norm": 6.060817241668701, "learning_rate": 6.417687726743788e-05, "loss": 0.6871, "step": 26918 }, { "epoch": 1.8239040585405515, "grad_norm": 5.571325778961182, "learning_rate": 6.417550824834006e-05, "loss": 0.6959, "step": 26919 }, { "epoch": 1.8239718138085235, "grad_norm": 6.155787944793701, "learning_rate": 6.417413922924224e-05, "loss": 0.527, "step": 26920 }, { "epoch": 1.8240395690764957, "grad_norm": 5.891366958618164, "learning_rate": 6.417277021014444e-05, "loss": 0.6957, "step": 26921 }, { "epoch": 1.8241073243444679, "grad_norm": 6.51685094833374, "learning_rate": 6.417140119104662e-05, "loss": 0.8319, "step": 26922 }, { "epoch": 1.8241750796124399, "grad_norm": 4.492886066436768, "learning_rate": 6.41700321719488e-05, "loss": 0.5719, "step": 26923 }, { "epoch": 1.8242428348804118, "grad_norm": 6.633151054382324, "learning_rate": 6.416866315285098e-05, "loss": 0.6983, "step": 26924 }, { "epoch": 1.824310590148384, "grad_norm": 3.7205300331115723, "learning_rate": 6.416729413375316e-05, "loss": 0.5829, "step": 26925 }, { "epoch": 1.8243783454163562, "grad_norm": 7.058459758758545, "learning_rate": 6.416592511465535e-05, "loss": 0.5829, "step": 26926 }, { "epoch": 1.8244461006843282, "grad_norm": 6.693596363067627, "learning_rate": 6.416455609555753e-05, "loss": 0.6567, "step": 26927 }, { "epoch": 1.8245138559523002, "grad_norm": 5.36549711227417, "learning_rate": 6.416318707645971e-05, "loss": 0.5685, "step": 26928 }, { "epoch": 1.8245816112202724, "grad_norm": 6.21834135055542, "learning_rate": 6.41618180573619e-05, "loss": 0.5284, "step": 26929 }, { "epoch": 1.8246493664882446, "grad_norm": 6.121436595916748, "learning_rate": 6.416044903826409e-05, "loss": 0.6134, "step": 26930 }, { "epoch": 1.8247171217562166, "grad_norm": 4.8863043785095215, "learning_rate": 6.415908001916627e-05, "loss": 0.4969, "step": 26931 }, { "epoch": 1.8247848770241886, "grad_norm": 7.209650039672852, "learning_rate": 6.415771100006845e-05, "loss": 0.8434, "step": 26932 }, { "epoch": 1.8248526322921608, "grad_norm": 4.393049240112305, "learning_rate": 6.415634198097064e-05, "loss": 0.5362, "step": 26933 }, { "epoch": 1.8249203875601328, "grad_norm": 5.873647212982178, "learning_rate": 6.415497296187282e-05, "loss": 0.8002, "step": 26934 }, { "epoch": 1.8249881428281047, "grad_norm": 5.379234790802002, "learning_rate": 6.4153603942775e-05, "loss": 0.6589, "step": 26935 }, { "epoch": 1.825055898096077, "grad_norm": 4.735995292663574, "learning_rate": 6.41522349236772e-05, "loss": 0.6109, "step": 26936 }, { "epoch": 1.8251236533640491, "grad_norm": 6.90614652633667, "learning_rate": 6.415086590457938e-05, "loss": 0.68, "step": 26937 }, { "epoch": 1.8251914086320211, "grad_norm": 5.389228820800781, "learning_rate": 6.414949688548156e-05, "loss": 0.5866, "step": 26938 }, { "epoch": 1.825259163899993, "grad_norm": 8.4229736328125, "learning_rate": 6.414812786638374e-05, "loss": 0.809, "step": 26939 }, { "epoch": 1.8253269191679653, "grad_norm": 5.2330121994018555, "learning_rate": 6.414675884728593e-05, "loss": 0.5874, "step": 26940 }, { "epoch": 1.8253946744359375, "grad_norm": 5.0487141609191895, "learning_rate": 6.414538982818811e-05, "loss": 0.7264, "step": 26941 }, { "epoch": 1.8254624297039095, "grad_norm": 4.621920108795166, "learning_rate": 6.414402080909029e-05, "loss": 0.471, "step": 26942 }, { "epoch": 1.8255301849718815, "grad_norm": 5.247325897216797, "learning_rate": 6.414265178999247e-05, "loss": 0.7786, "step": 26943 }, { "epoch": 1.8255979402398537, "grad_norm": 5.230954647064209, "learning_rate": 6.414128277089467e-05, "loss": 0.6541, "step": 26944 }, { "epoch": 1.8256656955078259, "grad_norm": 6.193086624145508, "learning_rate": 6.413991375179685e-05, "loss": 0.8888, "step": 26945 }, { "epoch": 1.8257334507757979, "grad_norm": 4.489074230194092, "learning_rate": 6.413854473269903e-05, "loss": 0.5255, "step": 26946 }, { "epoch": 1.8258012060437698, "grad_norm": 9.20129108428955, "learning_rate": 6.413717571360121e-05, "loss": 0.6889, "step": 26947 }, { "epoch": 1.825868961311742, "grad_norm": 5.677957534790039, "learning_rate": 6.413580669450339e-05, "loss": 0.7654, "step": 26948 }, { "epoch": 1.8259367165797142, "grad_norm": 5.151188850402832, "learning_rate": 6.413443767540558e-05, "loss": 0.6573, "step": 26949 }, { "epoch": 1.826004471847686, "grad_norm": 5.070529937744141, "learning_rate": 6.413306865630776e-05, "loss": 0.7751, "step": 26950 }, { "epoch": 1.8260722271156582, "grad_norm": 9.593429565429688, "learning_rate": 6.413169963720994e-05, "loss": 0.6607, "step": 26951 }, { "epoch": 1.8261399823836304, "grad_norm": 4.855633735656738, "learning_rate": 6.413033061811212e-05, "loss": 0.6231, "step": 26952 }, { "epoch": 1.8262077376516024, "grad_norm": 7.323613166809082, "learning_rate": 6.412896159901432e-05, "loss": 0.7254, "step": 26953 }, { "epoch": 1.8262754929195744, "grad_norm": 12.425178527832031, "learning_rate": 6.41275925799165e-05, "loss": 0.739, "step": 26954 }, { "epoch": 1.8263432481875466, "grad_norm": 7.491691589355469, "learning_rate": 6.412622356081868e-05, "loss": 0.7017, "step": 26955 }, { "epoch": 1.8264110034555188, "grad_norm": 6.335961818695068, "learning_rate": 6.412485454172086e-05, "loss": 0.6649, "step": 26956 }, { "epoch": 1.8264787587234907, "grad_norm": 6.87590217590332, "learning_rate": 6.412348552262304e-05, "loss": 0.8192, "step": 26957 }, { "epoch": 1.8265465139914627, "grad_norm": 6.224260330200195, "learning_rate": 6.412211650352523e-05, "loss": 0.7282, "step": 26958 }, { "epoch": 1.826614269259435, "grad_norm": 7.220107078552246, "learning_rate": 6.412074748442741e-05, "loss": 0.6591, "step": 26959 }, { "epoch": 1.8266820245274071, "grad_norm": 6.2804341316223145, "learning_rate": 6.411937846532959e-05, "loss": 0.6233, "step": 26960 }, { "epoch": 1.8267497797953791, "grad_norm": 5.34904670715332, "learning_rate": 6.411800944623177e-05, "loss": 0.7223, "step": 26961 }, { "epoch": 1.826817535063351, "grad_norm": 4.861424446105957, "learning_rate": 6.411664042713397e-05, "loss": 0.5573, "step": 26962 }, { "epoch": 1.8268852903313233, "grad_norm": 5.9581522941589355, "learning_rate": 6.411527140803615e-05, "loss": 0.6992, "step": 26963 }, { "epoch": 1.8269530455992955, "grad_norm": 5.566277980804443, "learning_rate": 6.411390238893833e-05, "loss": 0.6453, "step": 26964 }, { "epoch": 1.8270208008672675, "grad_norm": 5.890635967254639, "learning_rate": 6.411253336984051e-05, "loss": 0.8441, "step": 26965 }, { "epoch": 1.8270885561352395, "grad_norm": 6.2591753005981445, "learning_rate": 6.411116435074269e-05, "loss": 0.6785, "step": 26966 }, { "epoch": 1.8271563114032117, "grad_norm": 5.018440246582031, "learning_rate": 6.410979533164488e-05, "loss": 0.7852, "step": 26967 }, { "epoch": 1.8272240666711836, "grad_norm": 5.77724027633667, "learning_rate": 6.410842631254706e-05, "loss": 0.8152, "step": 26968 }, { "epoch": 1.8272918219391556, "grad_norm": 4.979044437408447, "learning_rate": 6.410705729344924e-05, "loss": 0.8171, "step": 26969 }, { "epoch": 1.8273595772071278, "grad_norm": 5.348381996154785, "learning_rate": 6.410568827435142e-05, "loss": 0.638, "step": 26970 }, { "epoch": 1.8274273324751, "grad_norm": 9.234869956970215, "learning_rate": 6.41043192552536e-05, "loss": 0.6912, "step": 26971 }, { "epoch": 1.827495087743072, "grad_norm": 5.540833950042725, "learning_rate": 6.41029502361558e-05, "loss": 0.4706, "step": 26972 }, { "epoch": 1.827562843011044, "grad_norm": 4.905838966369629, "learning_rate": 6.410158121705798e-05, "loss": 0.8874, "step": 26973 }, { "epoch": 1.8276305982790162, "grad_norm": 7.751222610473633, "learning_rate": 6.410021219796016e-05, "loss": 0.5258, "step": 26974 }, { "epoch": 1.8276983535469884, "grad_norm": 6.102903842926025, "learning_rate": 6.409884317886234e-05, "loss": 0.6203, "step": 26975 }, { "epoch": 1.8277661088149604, "grad_norm": 5.799428462982178, "learning_rate": 6.409747415976453e-05, "loss": 0.6989, "step": 26976 }, { "epoch": 1.8278338640829324, "grad_norm": 5.58146858215332, "learning_rate": 6.409610514066671e-05, "loss": 0.4457, "step": 26977 }, { "epoch": 1.8279016193509046, "grad_norm": 8.655208587646484, "learning_rate": 6.409473612156889e-05, "loss": 0.921, "step": 26978 }, { "epoch": 1.8279693746188768, "grad_norm": 8.314556121826172, "learning_rate": 6.409336710247109e-05, "loss": 0.6584, "step": 26979 }, { "epoch": 1.8280371298868487, "grad_norm": 7.256601810455322, "learning_rate": 6.409199808337327e-05, "loss": 0.6863, "step": 26980 }, { "epoch": 1.8281048851548207, "grad_norm": 4.994773864746094, "learning_rate": 6.409062906427545e-05, "loss": 0.5215, "step": 26981 }, { "epoch": 1.828172640422793, "grad_norm": 6.1205058097839355, "learning_rate": 6.408926004517764e-05, "loss": 0.6166, "step": 26982 }, { "epoch": 1.828240395690765, "grad_norm": 6.192052364349365, "learning_rate": 6.408789102607982e-05, "loss": 0.9072, "step": 26983 }, { "epoch": 1.8283081509587369, "grad_norm": 5.821140766143799, "learning_rate": 6.4086522006982e-05, "loss": 0.5227, "step": 26984 }, { "epoch": 1.828375906226709, "grad_norm": 6.481544017791748, "learning_rate": 6.40851529878842e-05, "loss": 0.6292, "step": 26985 }, { "epoch": 1.8284436614946813, "grad_norm": 6.02473258972168, "learning_rate": 6.408378396878638e-05, "loss": 0.7241, "step": 26986 }, { "epoch": 1.8285114167626533, "grad_norm": 4.747802734375, "learning_rate": 6.408241494968856e-05, "loss": 0.4797, "step": 26987 }, { "epoch": 1.8285791720306253, "grad_norm": 4.692590713500977, "learning_rate": 6.408104593059074e-05, "loss": 0.5038, "step": 26988 }, { "epoch": 1.8286469272985975, "grad_norm": 4.784687042236328, "learning_rate": 6.407967691149292e-05, "loss": 0.6441, "step": 26989 }, { "epoch": 1.8287146825665697, "grad_norm": 6.401617050170898, "learning_rate": 6.407830789239511e-05, "loss": 0.6369, "step": 26990 }, { "epoch": 1.8287824378345416, "grad_norm": 6.50145959854126, "learning_rate": 6.407693887329729e-05, "loss": 0.5476, "step": 26991 }, { "epoch": 1.8288501931025136, "grad_norm": 5.1334733963012695, "learning_rate": 6.407556985419947e-05, "loss": 0.6473, "step": 26992 }, { "epoch": 1.8289179483704858, "grad_norm": 7.163140296936035, "learning_rate": 6.407420083510165e-05, "loss": 0.5846, "step": 26993 }, { "epoch": 1.828985703638458, "grad_norm": 5.882272243499756, "learning_rate": 6.407283181600383e-05, "loss": 0.8721, "step": 26994 }, { "epoch": 1.82905345890643, "grad_norm": 4.345631122589111, "learning_rate": 6.407146279690603e-05, "loss": 0.6124, "step": 26995 }, { "epoch": 1.829121214174402, "grad_norm": 6.0518012046813965, "learning_rate": 6.40700937778082e-05, "loss": 0.7258, "step": 26996 }, { "epoch": 1.8291889694423742, "grad_norm": 10.405317306518555, "learning_rate": 6.406872475871039e-05, "loss": 0.6924, "step": 26997 }, { "epoch": 1.8292567247103464, "grad_norm": 6.6871137619018555, "learning_rate": 6.406735573961257e-05, "loss": 0.7213, "step": 26998 }, { "epoch": 1.8293244799783182, "grad_norm": 7.200775146484375, "learning_rate": 6.406598672051476e-05, "loss": 0.7284, "step": 26999 }, { "epoch": 1.8293922352462904, "grad_norm": 7.090208530426025, "learning_rate": 6.406461770141694e-05, "loss": 0.747, "step": 27000 }, { "epoch": 1.8294599905142626, "grad_norm": 8.028387069702148, "learning_rate": 6.406324868231912e-05, "loss": 0.6143, "step": 27001 }, { "epoch": 1.8295277457822345, "grad_norm": 6.212949275970459, "learning_rate": 6.40618796632213e-05, "loss": 0.7512, "step": 27002 }, { "epoch": 1.8295955010502065, "grad_norm": 5.00569486618042, "learning_rate": 6.406051064412348e-05, "loss": 0.4813, "step": 27003 }, { "epoch": 1.8296632563181787, "grad_norm": 6.806384086608887, "learning_rate": 6.405914162502568e-05, "loss": 0.6581, "step": 27004 }, { "epoch": 1.829731011586151, "grad_norm": 7.819924354553223, "learning_rate": 6.405777260592786e-05, "loss": 0.6099, "step": 27005 }, { "epoch": 1.829798766854123, "grad_norm": 5.931289196014404, "learning_rate": 6.405640358683004e-05, "loss": 0.8588, "step": 27006 }, { "epoch": 1.8298665221220949, "grad_norm": 6.23674201965332, "learning_rate": 6.405503456773222e-05, "loss": 0.8011, "step": 27007 }, { "epoch": 1.829934277390067, "grad_norm": 6.8386383056640625, "learning_rate": 6.405366554863441e-05, "loss": 0.801, "step": 27008 }, { "epoch": 1.8300020326580393, "grad_norm": 7.78073787689209, "learning_rate": 6.405229652953659e-05, "loss": 0.591, "step": 27009 }, { "epoch": 1.8300697879260113, "grad_norm": 6.295107841491699, "learning_rate": 6.405092751043877e-05, "loss": 0.866, "step": 27010 }, { "epoch": 1.8301375431939833, "grad_norm": 4.190281867980957, "learning_rate": 6.404955849134095e-05, "loss": 0.5806, "step": 27011 }, { "epoch": 1.8302052984619555, "grad_norm": 5.526933193206787, "learning_rate": 6.404818947224313e-05, "loss": 0.6643, "step": 27012 }, { "epoch": 1.8302730537299277, "grad_norm": 9.667847633361816, "learning_rate": 6.404682045314533e-05, "loss": 0.8612, "step": 27013 }, { "epoch": 1.8303408089978996, "grad_norm": 5.015357494354248, "learning_rate": 6.404545143404751e-05, "loss": 0.7618, "step": 27014 }, { "epoch": 1.8304085642658716, "grad_norm": 5.642210960388184, "learning_rate": 6.404408241494969e-05, "loss": 0.7235, "step": 27015 }, { "epoch": 1.8304763195338438, "grad_norm": 5.983831405639648, "learning_rate": 6.404271339585187e-05, "loss": 0.4964, "step": 27016 }, { "epoch": 1.8305440748018158, "grad_norm": 5.916496276855469, "learning_rate": 6.404134437675405e-05, "loss": 0.6606, "step": 27017 }, { "epoch": 1.8306118300697878, "grad_norm": 5.996575832366943, "learning_rate": 6.403997535765624e-05, "loss": 0.6849, "step": 27018 }, { "epoch": 1.83067958533776, "grad_norm": 5.760493278503418, "learning_rate": 6.403860633855842e-05, "loss": 0.6324, "step": 27019 }, { "epoch": 1.8307473406057322, "grad_norm": 5.115372180938721, "learning_rate": 6.40372373194606e-05, "loss": 0.8033, "step": 27020 }, { "epoch": 1.8308150958737042, "grad_norm": 8.37394905090332, "learning_rate": 6.403586830036278e-05, "loss": 0.5806, "step": 27021 }, { "epoch": 1.8308828511416761, "grad_norm": 6.826961040496826, "learning_rate": 6.403449928126498e-05, "loss": 0.7289, "step": 27022 }, { "epoch": 1.8309506064096484, "grad_norm": 4.898519039154053, "learning_rate": 6.403313026216716e-05, "loss": 0.7147, "step": 27023 }, { "epoch": 1.8310183616776206, "grad_norm": 4.685663223266602, "learning_rate": 6.403176124306934e-05, "loss": 0.5217, "step": 27024 }, { "epoch": 1.8310861169455925, "grad_norm": 4.9772419929504395, "learning_rate": 6.403039222397153e-05, "loss": 0.6135, "step": 27025 }, { "epoch": 1.8311538722135645, "grad_norm": 6.4903740882873535, "learning_rate": 6.402902320487371e-05, "loss": 0.5683, "step": 27026 }, { "epoch": 1.8312216274815367, "grad_norm": 5.804243087768555, "learning_rate": 6.402765418577589e-05, "loss": 0.6519, "step": 27027 }, { "epoch": 1.831289382749509, "grad_norm": 5.7462944984436035, "learning_rate": 6.402628516667809e-05, "loss": 0.6994, "step": 27028 }, { "epoch": 1.831357138017481, "grad_norm": 6.7851362228393555, "learning_rate": 6.402491614758027e-05, "loss": 0.8892, "step": 27029 }, { "epoch": 1.8314248932854529, "grad_norm": 4.842338562011719, "learning_rate": 6.402354712848245e-05, "loss": 0.5477, "step": 27030 }, { "epoch": 1.831492648553425, "grad_norm": 4.671584129333496, "learning_rate": 6.402217810938464e-05, "loss": 0.6777, "step": 27031 }, { "epoch": 1.831560403821397, "grad_norm": 6.08165979385376, "learning_rate": 6.402080909028682e-05, "loss": 0.6355, "step": 27032 }, { "epoch": 1.831628159089369, "grad_norm": 6.45645809173584, "learning_rate": 6.4019440071189e-05, "loss": 0.6636, "step": 27033 }, { "epoch": 1.8316959143573412, "grad_norm": 5.685486316680908, "learning_rate": 6.401807105209118e-05, "loss": 0.5661, "step": 27034 }, { "epoch": 1.8317636696253135, "grad_norm": 6.7784647941589355, "learning_rate": 6.401670203299336e-05, "loss": 0.7328, "step": 27035 }, { "epoch": 1.8318314248932854, "grad_norm": 6.640045166015625, "learning_rate": 6.401533301389556e-05, "loss": 0.7097, "step": 27036 }, { "epoch": 1.8318991801612574, "grad_norm": 6.570594787597656, "learning_rate": 6.401396399479774e-05, "loss": 0.6009, "step": 27037 }, { "epoch": 1.8319669354292296, "grad_norm": 5.127500534057617, "learning_rate": 6.401259497569992e-05, "loss": 0.5751, "step": 27038 }, { "epoch": 1.8320346906972018, "grad_norm": 7.923624038696289, "learning_rate": 6.40112259566021e-05, "loss": 0.6297, "step": 27039 }, { "epoch": 1.8321024459651738, "grad_norm": 4.042380332946777, "learning_rate": 6.400985693750429e-05, "loss": 0.5798, "step": 27040 }, { "epoch": 1.8321702012331458, "grad_norm": 4.216054916381836, "learning_rate": 6.400848791840647e-05, "loss": 0.6215, "step": 27041 }, { "epoch": 1.832237956501118, "grad_norm": 4.337714672088623, "learning_rate": 6.400711889930865e-05, "loss": 0.4545, "step": 27042 }, { "epoch": 1.8323057117690902, "grad_norm": 5.690299987792969, "learning_rate": 6.400574988021083e-05, "loss": 0.4669, "step": 27043 }, { "epoch": 1.8323734670370622, "grad_norm": 7.744217395782471, "learning_rate": 6.400438086111301e-05, "loss": 0.639, "step": 27044 }, { "epoch": 1.8324412223050341, "grad_norm": 5.5576677322387695, "learning_rate": 6.40030118420152e-05, "loss": 0.6992, "step": 27045 }, { "epoch": 1.8325089775730063, "grad_norm": 4.3674821853637695, "learning_rate": 6.400164282291739e-05, "loss": 0.5946, "step": 27046 }, { "epoch": 1.8325767328409786, "grad_norm": 7.169052600860596, "learning_rate": 6.400027380381957e-05, "loss": 0.7153, "step": 27047 }, { "epoch": 1.8326444881089503, "grad_norm": 4.440925121307373, "learning_rate": 6.399890478472175e-05, "loss": 0.6253, "step": 27048 }, { "epoch": 1.8327122433769225, "grad_norm": 6.660823822021484, "learning_rate": 6.399753576562393e-05, "loss": 0.6886, "step": 27049 }, { "epoch": 1.8327799986448947, "grad_norm": 5.153602600097656, "learning_rate": 6.399616674652612e-05, "loss": 0.5792, "step": 27050 }, { "epoch": 1.8328477539128667, "grad_norm": 6.479100227355957, "learning_rate": 6.39947977274283e-05, "loss": 0.8238, "step": 27051 }, { "epoch": 1.8329155091808387, "grad_norm": 5.222026348114014, "learning_rate": 6.399342870833048e-05, "loss": 0.6738, "step": 27052 }, { "epoch": 1.8329832644488109, "grad_norm": 5.560079574584961, "learning_rate": 6.399205968923266e-05, "loss": 0.6105, "step": 27053 }, { "epoch": 1.833051019716783, "grad_norm": 9.557579040527344, "learning_rate": 6.399069067013486e-05, "loss": 0.6399, "step": 27054 }, { "epoch": 1.833118774984755, "grad_norm": 4.676896572113037, "learning_rate": 6.398932165103704e-05, "loss": 0.6849, "step": 27055 }, { "epoch": 1.833186530252727, "grad_norm": 6.9437994956970215, "learning_rate": 6.398795263193922e-05, "loss": 0.8253, "step": 27056 }, { "epoch": 1.8332542855206992, "grad_norm": 5.391374588012695, "learning_rate": 6.39865836128414e-05, "loss": 0.7191, "step": 27057 }, { "epoch": 1.8333220407886714, "grad_norm": 5.727509021759033, "learning_rate": 6.398521459374358e-05, "loss": 0.5553, "step": 27058 }, { "epoch": 1.8333897960566434, "grad_norm": 8.656184196472168, "learning_rate": 6.398384557464577e-05, "loss": 0.5963, "step": 27059 }, { "epoch": 1.8334575513246154, "grad_norm": 5.220839023590088, "learning_rate": 6.398247655554795e-05, "loss": 0.7957, "step": 27060 }, { "epoch": 1.8335253065925876, "grad_norm": 6.602926731109619, "learning_rate": 6.398110753645013e-05, "loss": 0.6537, "step": 27061 }, { "epoch": 1.8335930618605598, "grad_norm": 3.6636953353881836, "learning_rate": 6.397973851735231e-05, "loss": 0.7836, "step": 27062 }, { "epoch": 1.8336608171285318, "grad_norm": 10.129796981811523, "learning_rate": 6.39783694982545e-05, "loss": 0.6911, "step": 27063 }, { "epoch": 1.8337285723965038, "grad_norm": 5.320174217224121, "learning_rate": 6.397700047915669e-05, "loss": 0.7047, "step": 27064 }, { "epoch": 1.833796327664476, "grad_norm": 7.714473724365234, "learning_rate": 6.397563146005887e-05, "loss": 0.6122, "step": 27065 }, { "epoch": 1.833864082932448, "grad_norm": 5.299808025360107, "learning_rate": 6.397426244096105e-05, "loss": 0.478, "step": 27066 }, { "epoch": 1.83393183820042, "grad_norm": 5.774031639099121, "learning_rate": 6.397289342186323e-05, "loss": 0.5519, "step": 27067 }, { "epoch": 1.8339995934683921, "grad_norm": 6.557982921600342, "learning_rate": 6.397152440276542e-05, "loss": 0.68, "step": 27068 }, { "epoch": 1.8340673487363643, "grad_norm": 8.897318840026855, "learning_rate": 6.39701553836676e-05, "loss": 0.7005, "step": 27069 }, { "epoch": 1.8341351040043363, "grad_norm": 4.923262119293213, "learning_rate": 6.396878636456978e-05, "loss": 0.6291, "step": 27070 }, { "epoch": 1.8342028592723083, "grad_norm": 5.590880393981934, "learning_rate": 6.396741734547198e-05, "loss": 0.7349, "step": 27071 }, { "epoch": 1.8342706145402805, "grad_norm": 5.689929962158203, "learning_rate": 6.396604832637416e-05, "loss": 0.7117, "step": 27072 }, { "epoch": 1.8343383698082527, "grad_norm": 4.350910663604736, "learning_rate": 6.396467930727634e-05, "loss": 0.6253, "step": 27073 }, { "epoch": 1.8344061250762247, "grad_norm": 6.703629016876221, "learning_rate": 6.396331028817853e-05, "loss": 0.6563, "step": 27074 }, { "epoch": 1.8344738803441967, "grad_norm": 11.001628875732422, "learning_rate": 6.396194126908071e-05, "loss": 0.4706, "step": 27075 }, { "epoch": 1.8345416356121689, "grad_norm": 6.387265205383301, "learning_rate": 6.396057224998289e-05, "loss": 0.6145, "step": 27076 }, { "epoch": 1.834609390880141, "grad_norm": 5.874007701873779, "learning_rate": 6.395920323088509e-05, "loss": 0.6737, "step": 27077 }, { "epoch": 1.834677146148113, "grad_norm": 6.399379253387451, "learning_rate": 6.395783421178727e-05, "loss": 0.6994, "step": 27078 }, { "epoch": 1.834744901416085, "grad_norm": 6.696014881134033, "learning_rate": 6.395646519268945e-05, "loss": 0.7136, "step": 27079 }, { "epoch": 1.8348126566840572, "grad_norm": 7.934556484222412, "learning_rate": 6.395509617359163e-05, "loss": 0.7292, "step": 27080 }, { "epoch": 1.8348804119520292, "grad_norm": 6.404906272888184, "learning_rate": 6.39537271544938e-05, "loss": 0.6635, "step": 27081 }, { "epoch": 1.8349481672200012, "grad_norm": 6.134159088134766, "learning_rate": 6.3952358135396e-05, "loss": 0.8699, "step": 27082 }, { "epoch": 1.8350159224879734, "grad_norm": 8.87403392791748, "learning_rate": 6.395098911629818e-05, "loss": 0.6233, "step": 27083 }, { "epoch": 1.8350836777559456, "grad_norm": 4.6083149909973145, "learning_rate": 6.394962009720036e-05, "loss": 0.5614, "step": 27084 }, { "epoch": 1.8351514330239176, "grad_norm": 5.596293926239014, "learning_rate": 6.394825107810254e-05, "loss": 0.7423, "step": 27085 }, { "epoch": 1.8352191882918896, "grad_norm": 4.901249885559082, "learning_rate": 6.394688205900474e-05, "loss": 0.7232, "step": 27086 }, { "epoch": 1.8352869435598618, "grad_norm": 5.825705528259277, "learning_rate": 6.394551303990692e-05, "loss": 0.6866, "step": 27087 }, { "epoch": 1.835354698827834, "grad_norm": 4.893263339996338, "learning_rate": 6.39441440208091e-05, "loss": 0.665, "step": 27088 }, { "epoch": 1.835422454095806, "grad_norm": 5.927305221557617, "learning_rate": 6.394277500171128e-05, "loss": 0.6703, "step": 27089 }, { "epoch": 1.835490209363778, "grad_norm": 6.780624866485596, "learning_rate": 6.394140598261346e-05, "loss": 0.6677, "step": 27090 }, { "epoch": 1.8355579646317501, "grad_norm": 6.262970447540283, "learning_rate": 6.394003696351565e-05, "loss": 1.0669, "step": 27091 }, { "epoch": 1.8356257198997223, "grad_norm": 4.726997375488281, "learning_rate": 6.393866794441783e-05, "loss": 0.7209, "step": 27092 }, { "epoch": 1.8356934751676943, "grad_norm": 5.94981050491333, "learning_rate": 6.393729892532001e-05, "loss": 0.4734, "step": 27093 }, { "epoch": 1.8357612304356663, "grad_norm": 4.843165397644043, "learning_rate": 6.393592990622219e-05, "loss": 0.6202, "step": 27094 }, { "epoch": 1.8358289857036385, "grad_norm": 6.516410827636719, "learning_rate": 6.393456088712439e-05, "loss": 0.7649, "step": 27095 }, { "epoch": 1.8358967409716107, "grad_norm": 6.110748767852783, "learning_rate": 6.393319186802657e-05, "loss": 0.4996, "step": 27096 }, { "epoch": 1.8359644962395825, "grad_norm": 4.608067989349365, "learning_rate": 6.393182284892875e-05, "loss": 0.6759, "step": 27097 }, { "epoch": 1.8360322515075547, "grad_norm": 6.62278413772583, "learning_rate": 6.393045382983093e-05, "loss": 0.6082, "step": 27098 }, { "epoch": 1.8361000067755269, "grad_norm": 4.574203968048096, "learning_rate": 6.392908481073311e-05, "loss": 0.657, "step": 27099 }, { "epoch": 1.8361677620434989, "grad_norm": 4.2443742752075195, "learning_rate": 6.39277157916353e-05, "loss": 0.6377, "step": 27100 }, { "epoch": 1.8362355173114708, "grad_norm": 4.650674819946289, "learning_rate": 6.392634677253748e-05, "loss": 0.4449, "step": 27101 }, { "epoch": 1.836303272579443, "grad_norm": 4.396097660064697, "learning_rate": 6.392497775343966e-05, "loss": 0.5545, "step": 27102 }, { "epoch": 1.8363710278474152, "grad_norm": 5.159193992614746, "learning_rate": 6.392360873434184e-05, "loss": 0.8793, "step": 27103 }, { "epoch": 1.8364387831153872, "grad_norm": 4.396710395812988, "learning_rate": 6.392223971524402e-05, "loss": 0.7038, "step": 27104 }, { "epoch": 1.8365065383833592, "grad_norm": 5.943559646606445, "learning_rate": 6.392087069614622e-05, "loss": 0.6529, "step": 27105 }, { "epoch": 1.8365742936513314, "grad_norm": 5.786361217498779, "learning_rate": 6.39195016770484e-05, "loss": 0.8427, "step": 27106 }, { "epoch": 1.8366420489193036, "grad_norm": 4.7800774574279785, "learning_rate": 6.391813265795058e-05, "loss": 0.7367, "step": 27107 }, { "epoch": 1.8367098041872756, "grad_norm": 6.204615116119385, "learning_rate": 6.391676363885276e-05, "loss": 0.6768, "step": 27108 }, { "epoch": 1.8367775594552476, "grad_norm": 6.143183708190918, "learning_rate": 6.391539461975495e-05, "loss": 0.7598, "step": 27109 }, { "epoch": 1.8368453147232198, "grad_norm": 5.547792911529541, "learning_rate": 6.391402560065713e-05, "loss": 0.6179, "step": 27110 }, { "epoch": 1.836913069991192, "grad_norm": 4.83444356918335, "learning_rate": 6.391265658155931e-05, "loss": 0.6423, "step": 27111 }, { "epoch": 1.836980825259164, "grad_norm": 7.766525745391846, "learning_rate": 6.391128756246149e-05, "loss": 0.8261, "step": 27112 }, { "epoch": 1.837048580527136, "grad_norm": 7.094886302947998, "learning_rate": 6.390991854336367e-05, "loss": 0.8914, "step": 27113 }, { "epoch": 1.8371163357951081, "grad_norm": 5.593029975891113, "learning_rate": 6.390854952426587e-05, "loss": 0.8365, "step": 27114 }, { "epoch": 1.8371840910630801, "grad_norm": 4.874145030975342, "learning_rate": 6.390718050516805e-05, "loss": 0.5787, "step": 27115 }, { "epoch": 1.837251846331052, "grad_norm": 5.7997331619262695, "learning_rate": 6.390581148607023e-05, "loss": 0.6945, "step": 27116 }, { "epoch": 1.8373196015990243, "grad_norm": 6.465807914733887, "learning_rate": 6.390444246697242e-05, "loss": 0.786, "step": 27117 }, { "epoch": 1.8373873568669965, "grad_norm": 4.830003261566162, "learning_rate": 6.39030734478746e-05, "loss": 0.6153, "step": 27118 }, { "epoch": 1.8374551121349685, "grad_norm": 4.534160614013672, "learning_rate": 6.390170442877678e-05, "loss": 0.5561, "step": 27119 }, { "epoch": 1.8375228674029405, "grad_norm": 7.364757061004639, "learning_rate": 6.390033540967898e-05, "loss": 0.503, "step": 27120 }, { "epoch": 1.8375906226709127, "grad_norm": 6.061070919036865, "learning_rate": 6.389896639058116e-05, "loss": 0.62, "step": 27121 }, { "epoch": 1.8376583779388849, "grad_norm": 6.377957820892334, "learning_rate": 6.389759737148334e-05, "loss": 0.6305, "step": 27122 }, { "epoch": 1.8377261332068568, "grad_norm": 5.186578750610352, "learning_rate": 6.389622835238553e-05, "loss": 0.7422, "step": 27123 }, { "epoch": 1.8377938884748288, "grad_norm": 4.198673248291016, "learning_rate": 6.389485933328771e-05, "loss": 0.6638, "step": 27124 }, { "epoch": 1.837861643742801, "grad_norm": 5.7820258140563965, "learning_rate": 6.389349031418989e-05, "loss": 0.5406, "step": 27125 }, { "epoch": 1.8379293990107732, "grad_norm": 7.425137996673584, "learning_rate": 6.389212129509207e-05, "loss": 0.64, "step": 27126 }, { "epoch": 1.8379971542787452, "grad_norm": 6.651361465454102, "learning_rate": 6.389075227599425e-05, "loss": 0.6403, "step": 27127 }, { "epoch": 1.8380649095467172, "grad_norm": 6.0719733238220215, "learning_rate": 6.388938325689645e-05, "loss": 0.824, "step": 27128 }, { "epoch": 1.8381326648146894, "grad_norm": 6.191911220550537, "learning_rate": 6.388801423779863e-05, "loss": 0.7056, "step": 27129 }, { "epoch": 1.8382004200826614, "grad_norm": 4.558618545532227, "learning_rate": 6.38866452187008e-05, "loss": 0.6392, "step": 27130 }, { "epoch": 1.8382681753506334, "grad_norm": 4.800431251525879, "learning_rate": 6.388527619960299e-05, "loss": 0.7138, "step": 27131 }, { "epoch": 1.8383359306186056, "grad_norm": 5.098847389221191, "learning_rate": 6.388390718050518e-05, "loss": 0.6193, "step": 27132 }, { "epoch": 1.8384036858865778, "grad_norm": 4.847878456115723, "learning_rate": 6.388253816140736e-05, "loss": 0.5993, "step": 27133 }, { "epoch": 1.8384714411545497, "grad_norm": 8.10627269744873, "learning_rate": 6.388116914230954e-05, "loss": 0.7259, "step": 27134 }, { "epoch": 1.8385391964225217, "grad_norm": 5.610559940338135, "learning_rate": 6.387980012321172e-05, "loss": 0.6919, "step": 27135 }, { "epoch": 1.838606951690494, "grad_norm": 5.2679033279418945, "learning_rate": 6.38784311041139e-05, "loss": 0.6883, "step": 27136 }, { "epoch": 1.8386747069584661, "grad_norm": 4.892630577087402, "learning_rate": 6.38770620850161e-05, "loss": 0.6058, "step": 27137 }, { "epoch": 1.8387424622264381, "grad_norm": 7.031632423400879, "learning_rate": 6.387569306591828e-05, "loss": 0.895, "step": 27138 }, { "epoch": 1.83881021749441, "grad_norm": 5.677679538726807, "learning_rate": 6.387432404682046e-05, "loss": 0.6503, "step": 27139 }, { "epoch": 1.8388779727623823, "grad_norm": 4.14378023147583, "learning_rate": 6.387295502772264e-05, "loss": 0.4477, "step": 27140 }, { "epoch": 1.8389457280303545, "grad_norm": 6.540284156799316, "learning_rate": 6.387158600862483e-05, "loss": 0.7397, "step": 27141 }, { "epoch": 1.8390134832983265, "grad_norm": 7.307000637054443, "learning_rate": 6.387021698952701e-05, "loss": 0.765, "step": 27142 }, { "epoch": 1.8390812385662985, "grad_norm": 5.611281394958496, "learning_rate": 6.386884797042919e-05, "loss": 0.578, "step": 27143 }, { "epoch": 1.8391489938342707, "grad_norm": 5.757426738739014, "learning_rate": 6.386747895133137e-05, "loss": 0.8585, "step": 27144 }, { "epoch": 1.8392167491022429, "grad_norm": 6.5514349937438965, "learning_rate": 6.386610993223355e-05, "loss": 0.9399, "step": 27145 }, { "epoch": 1.8392845043702146, "grad_norm": 5.2238311767578125, "learning_rate": 6.386474091313575e-05, "loss": 0.6201, "step": 27146 }, { "epoch": 1.8393522596381868, "grad_norm": 4.355088710784912, "learning_rate": 6.386337189403793e-05, "loss": 0.5092, "step": 27147 }, { "epoch": 1.839420014906159, "grad_norm": 5.973000526428223, "learning_rate": 6.38620028749401e-05, "loss": 0.8749, "step": 27148 }, { "epoch": 1.839487770174131, "grad_norm": 6.912758827209473, "learning_rate": 6.386063385584229e-05, "loss": 0.4982, "step": 27149 }, { "epoch": 1.839555525442103, "grad_norm": 5.01729154586792, "learning_rate": 6.385926483674447e-05, "loss": 0.53, "step": 27150 }, { "epoch": 1.8396232807100752, "grad_norm": 5.161027431488037, "learning_rate": 6.385789581764666e-05, "loss": 0.664, "step": 27151 }, { "epoch": 1.8396910359780474, "grad_norm": 5.726491928100586, "learning_rate": 6.385652679854884e-05, "loss": 0.5998, "step": 27152 }, { "epoch": 1.8397587912460194, "grad_norm": 4.707332134246826, "learning_rate": 6.385515777945102e-05, "loss": 0.633, "step": 27153 }, { "epoch": 1.8398265465139914, "grad_norm": 5.075165748596191, "learning_rate": 6.38537887603532e-05, "loss": 0.7313, "step": 27154 }, { "epoch": 1.8398943017819636, "grad_norm": 4.26729679107666, "learning_rate": 6.38524197412554e-05, "loss": 0.5671, "step": 27155 }, { "epoch": 1.8399620570499358, "grad_norm": 4.236566066741943, "learning_rate": 6.385105072215758e-05, "loss": 0.642, "step": 27156 }, { "epoch": 1.8400298123179077, "grad_norm": 6.68916654586792, "learning_rate": 6.384968170305976e-05, "loss": 0.8603, "step": 27157 }, { "epoch": 1.8400975675858797, "grad_norm": 5.118523597717285, "learning_rate": 6.384831268396194e-05, "loss": 0.6821, "step": 27158 }, { "epoch": 1.840165322853852, "grad_norm": 6.067781448364258, "learning_rate": 6.384694366486412e-05, "loss": 0.6319, "step": 27159 }, { "epoch": 1.8402330781218241, "grad_norm": 4.439019680023193, "learning_rate": 6.384557464576631e-05, "loss": 0.7385, "step": 27160 }, { "epoch": 1.840300833389796, "grad_norm": 7.919618606567383, "learning_rate": 6.384420562666849e-05, "loss": 0.5013, "step": 27161 }, { "epoch": 1.840368588657768, "grad_norm": 5.464724540710449, "learning_rate": 6.384283660757067e-05, "loss": 0.9655, "step": 27162 }, { "epoch": 1.8404363439257403, "grad_norm": 7.131372928619385, "learning_rate": 6.384146758847285e-05, "loss": 0.5069, "step": 27163 }, { "epoch": 1.8405040991937123, "grad_norm": 6.207808494567871, "learning_rate": 6.384009856937505e-05, "loss": 0.6736, "step": 27164 }, { "epoch": 1.8405718544616843, "grad_norm": 7.220529079437256, "learning_rate": 6.383872955027723e-05, "loss": 0.7916, "step": 27165 }, { "epoch": 1.8406396097296565, "grad_norm": 6.629493713378906, "learning_rate": 6.38373605311794e-05, "loss": 0.7053, "step": 27166 }, { "epoch": 1.8407073649976287, "grad_norm": 5.980734825134277, "learning_rate": 6.38359915120816e-05, "loss": 0.5917, "step": 27167 }, { "epoch": 1.8407751202656006, "grad_norm": 5.605178356170654, "learning_rate": 6.383462249298378e-05, "loss": 0.6872, "step": 27168 }, { "epoch": 1.8408428755335726, "grad_norm": 6.21562385559082, "learning_rate": 6.383325347388596e-05, "loss": 0.5389, "step": 27169 }, { "epoch": 1.8409106308015448, "grad_norm": 5.654512882232666, "learning_rate": 6.383188445478815e-05, "loss": 0.6534, "step": 27170 }, { "epoch": 1.840978386069517, "grad_norm": 5.547878265380859, "learning_rate": 6.383051543569034e-05, "loss": 0.846, "step": 27171 }, { "epoch": 1.841046141337489, "grad_norm": 5.61244010925293, "learning_rate": 6.382914641659252e-05, "loss": 0.6848, "step": 27172 }, { "epoch": 1.841113896605461, "grad_norm": 7.1599812507629395, "learning_rate": 6.382777739749471e-05, "loss": 0.8033, "step": 27173 }, { "epoch": 1.8411816518734332, "grad_norm": 5.274214744567871, "learning_rate": 6.382640837839689e-05, "loss": 0.7354, "step": 27174 }, { "epoch": 1.8412494071414054, "grad_norm": 6.403132915496826, "learning_rate": 6.382503935929907e-05, "loss": 0.6285, "step": 27175 }, { "epoch": 1.8413171624093774, "grad_norm": 5.905534744262695, "learning_rate": 6.382367034020125e-05, "loss": 0.6663, "step": 27176 }, { "epoch": 1.8413849176773494, "grad_norm": 4.439425945281982, "learning_rate": 6.382230132110343e-05, "loss": 0.4893, "step": 27177 }, { "epoch": 1.8414526729453216, "grad_norm": 10.236502647399902, "learning_rate": 6.382093230200562e-05, "loss": 0.5672, "step": 27178 }, { "epoch": 1.8415204282132935, "grad_norm": 5.910647392272949, "learning_rate": 6.38195632829078e-05, "loss": 0.8369, "step": 27179 }, { "epoch": 1.8415881834812655, "grad_norm": 4.8260722160339355, "learning_rate": 6.381819426380999e-05, "loss": 0.6168, "step": 27180 }, { "epoch": 1.8416559387492377, "grad_norm": 7.156643390655518, "learning_rate": 6.381682524471217e-05, "loss": 0.6032, "step": 27181 }, { "epoch": 1.84172369401721, "grad_norm": 9.88762092590332, "learning_rate": 6.381545622561435e-05, "loss": 0.5745, "step": 27182 }, { "epoch": 1.841791449285182, "grad_norm": 6.390614032745361, "learning_rate": 6.381408720651654e-05, "loss": 0.5789, "step": 27183 }, { "epoch": 1.8418592045531539, "grad_norm": 3.8802597522735596, "learning_rate": 6.381271818741872e-05, "loss": 0.6438, "step": 27184 }, { "epoch": 1.841926959821126, "grad_norm": 5.190890312194824, "learning_rate": 6.38113491683209e-05, "loss": 0.6418, "step": 27185 }, { "epoch": 1.8419947150890983, "grad_norm": 4.825675964355469, "learning_rate": 6.380998014922308e-05, "loss": 0.6216, "step": 27186 }, { "epoch": 1.8420624703570703, "grad_norm": 4.351223945617676, "learning_rate": 6.380861113012527e-05, "loss": 0.6623, "step": 27187 }, { "epoch": 1.8421302256250423, "grad_norm": 6.349198341369629, "learning_rate": 6.380724211102746e-05, "loss": 0.6646, "step": 27188 }, { "epoch": 1.8421979808930145, "grad_norm": 4.6931843757629395, "learning_rate": 6.380587309192964e-05, "loss": 0.6439, "step": 27189 }, { "epoch": 1.8422657361609867, "grad_norm": 7.774212837219238, "learning_rate": 6.380450407283182e-05, "loss": 0.7867, "step": 27190 }, { "epoch": 1.8423334914289586, "grad_norm": 5.207257270812988, "learning_rate": 6.3803135053734e-05, "loss": 0.829, "step": 27191 }, { "epoch": 1.8424012466969306, "grad_norm": 6.146117687225342, "learning_rate": 6.380176603463619e-05, "loss": 0.5151, "step": 27192 }, { "epoch": 1.8424690019649028, "grad_norm": 7.030690670013428, "learning_rate": 6.380039701553837e-05, "loss": 0.5987, "step": 27193 }, { "epoch": 1.842536757232875, "grad_norm": 4.610107898712158, "learning_rate": 6.379902799644055e-05, "loss": 0.6009, "step": 27194 }, { "epoch": 1.8426045125008468, "grad_norm": 5.732295989990234, "learning_rate": 6.379765897734273e-05, "loss": 0.7996, "step": 27195 }, { "epoch": 1.842672267768819, "grad_norm": 5.715400218963623, "learning_rate": 6.379628995824493e-05, "loss": 0.7113, "step": 27196 }, { "epoch": 1.8427400230367912, "grad_norm": 4.745540142059326, "learning_rate": 6.37949209391471e-05, "loss": 0.7557, "step": 27197 }, { "epoch": 1.8428077783047632, "grad_norm": 5.092692852020264, "learning_rate": 6.379355192004929e-05, "loss": 0.5179, "step": 27198 }, { "epoch": 1.8428755335727351, "grad_norm": 5.640484809875488, "learning_rate": 6.379218290095147e-05, "loss": 0.542, "step": 27199 }, { "epoch": 1.8429432888407073, "grad_norm": 7.4174017906188965, "learning_rate": 6.379081388185365e-05, "loss": 0.8544, "step": 27200 }, { "epoch": 1.8430110441086796, "grad_norm": 9.441333770751953, "learning_rate": 6.378944486275584e-05, "loss": 0.6669, "step": 27201 }, { "epoch": 1.8430787993766515, "grad_norm": 5.900002479553223, "learning_rate": 6.378807584365802e-05, "loss": 0.4846, "step": 27202 }, { "epoch": 1.8431465546446235, "grad_norm": 4.485546112060547, "learning_rate": 6.37867068245602e-05, "loss": 0.653, "step": 27203 }, { "epoch": 1.8432143099125957, "grad_norm": 6.221130847930908, "learning_rate": 6.378533780546238e-05, "loss": 0.8438, "step": 27204 }, { "epoch": 1.843282065180568, "grad_norm": 5.402370452880859, "learning_rate": 6.378396878636456e-05, "loss": 0.6887, "step": 27205 }, { "epoch": 1.84334982044854, "grad_norm": 5.096292495727539, "learning_rate": 6.378259976726676e-05, "loss": 0.499, "step": 27206 }, { "epoch": 1.8434175757165119, "grad_norm": 5.338291168212891, "learning_rate": 6.378123074816894e-05, "loss": 0.6466, "step": 27207 }, { "epoch": 1.843485330984484, "grad_norm": 7.704479217529297, "learning_rate": 6.377986172907112e-05, "loss": 0.6626, "step": 27208 }, { "epoch": 1.8435530862524563, "grad_norm": 4.516578674316406, "learning_rate": 6.37784927099733e-05, "loss": 0.4436, "step": 27209 }, { "epoch": 1.8436208415204283, "grad_norm": 7.9580397605896, "learning_rate": 6.377712369087549e-05, "loss": 0.6321, "step": 27210 }, { "epoch": 1.8436885967884002, "grad_norm": 4.41426944732666, "learning_rate": 6.377575467177767e-05, "loss": 0.6738, "step": 27211 }, { "epoch": 1.8437563520563724, "grad_norm": 5.765415191650391, "learning_rate": 6.377438565267985e-05, "loss": 0.6718, "step": 27212 }, { "epoch": 1.8438241073243444, "grad_norm": 5.658130168914795, "learning_rate": 6.377301663358205e-05, "loss": 0.681, "step": 27213 }, { "epoch": 1.8438918625923164, "grad_norm": 4.679565906524658, "learning_rate": 6.377164761448423e-05, "loss": 0.5013, "step": 27214 }, { "epoch": 1.8439596178602886, "grad_norm": 8.153345108032227, "learning_rate": 6.37702785953864e-05, "loss": 0.5654, "step": 27215 }, { "epoch": 1.8440273731282608, "grad_norm": 4.831034183502197, "learning_rate": 6.37689095762886e-05, "loss": 0.6661, "step": 27216 }, { "epoch": 1.8440951283962328, "grad_norm": 6.71080207824707, "learning_rate": 6.376754055719078e-05, "loss": 0.6891, "step": 27217 }, { "epoch": 1.8441628836642048, "grad_norm": 4.777334213256836, "learning_rate": 6.376617153809296e-05, "loss": 0.6543, "step": 27218 }, { "epoch": 1.844230638932177, "grad_norm": 6.528384685516357, "learning_rate": 6.376480251899515e-05, "loss": 0.588, "step": 27219 }, { "epoch": 1.8442983942001492, "grad_norm": 7.911769390106201, "learning_rate": 6.376343349989733e-05, "loss": 0.7107, "step": 27220 }, { "epoch": 1.8443661494681212, "grad_norm": 5.2077860832214355, "learning_rate": 6.376206448079951e-05, "loss": 0.5972, "step": 27221 }, { "epoch": 1.8444339047360931, "grad_norm": 5.480731010437012, "learning_rate": 6.37606954617017e-05, "loss": 0.5687, "step": 27222 }, { "epoch": 1.8445016600040653, "grad_norm": 4.657915115356445, "learning_rate": 6.375932644260388e-05, "loss": 0.6566, "step": 27223 }, { "epoch": 1.8445694152720375, "grad_norm": 7.52306604385376, "learning_rate": 6.375795742350607e-05, "loss": 1.0508, "step": 27224 }, { "epoch": 1.8446371705400095, "grad_norm": 4.840890884399414, "learning_rate": 6.375658840440825e-05, "loss": 0.7028, "step": 27225 }, { "epoch": 1.8447049258079815, "grad_norm": 7.088278293609619, "learning_rate": 6.375521938531043e-05, "loss": 0.7096, "step": 27226 }, { "epoch": 1.8447726810759537, "grad_norm": 6.657862663269043, "learning_rate": 6.375385036621261e-05, "loss": 0.8553, "step": 27227 }, { "epoch": 1.8448404363439257, "grad_norm": 4.305265426635742, "learning_rate": 6.37524813471148e-05, "loss": 0.4676, "step": 27228 }, { "epoch": 1.8449081916118977, "grad_norm": 6.675529956817627, "learning_rate": 6.375111232801698e-05, "loss": 0.6975, "step": 27229 }, { "epoch": 1.8449759468798699, "grad_norm": 7.283346176147461, "learning_rate": 6.374974330891917e-05, "loss": 0.7351, "step": 27230 }, { "epoch": 1.845043702147842, "grad_norm": 5.355958938598633, "learning_rate": 6.374837428982135e-05, "loss": 0.8886, "step": 27231 }, { "epoch": 1.845111457415814, "grad_norm": 5.871514320373535, "learning_rate": 6.374700527072353e-05, "loss": 0.7631, "step": 27232 }, { "epoch": 1.845179212683786, "grad_norm": 5.2196502685546875, "learning_rate": 6.374563625162572e-05, "loss": 0.6316, "step": 27233 }, { "epoch": 1.8452469679517582, "grad_norm": 5.123522758483887, "learning_rate": 6.37442672325279e-05, "loss": 0.6855, "step": 27234 }, { "epoch": 1.8453147232197304, "grad_norm": 4.887501239776611, "learning_rate": 6.374289821343008e-05, "loss": 0.576, "step": 27235 }, { "epoch": 1.8453824784877024, "grad_norm": 5.347650051116943, "learning_rate": 6.374152919433226e-05, "loss": 0.5584, "step": 27236 }, { "epoch": 1.8454502337556744, "grad_norm": 5.271849155426025, "learning_rate": 6.374016017523444e-05, "loss": 0.6104, "step": 27237 }, { "epoch": 1.8455179890236466, "grad_norm": 7.209766387939453, "learning_rate": 6.373879115613663e-05, "loss": 0.6179, "step": 27238 }, { "epoch": 1.8455857442916188, "grad_norm": 7.59173583984375, "learning_rate": 6.373742213703882e-05, "loss": 0.5191, "step": 27239 }, { "epoch": 1.8456534995595908, "grad_norm": 8.281730651855469, "learning_rate": 6.3736053117941e-05, "loss": 0.416, "step": 27240 }, { "epoch": 1.8457212548275628, "grad_norm": 5.340824127197266, "learning_rate": 6.373468409884318e-05, "loss": 0.6017, "step": 27241 }, { "epoch": 1.845789010095535, "grad_norm": 4.2464599609375, "learning_rate": 6.373331507974537e-05, "loss": 0.6916, "step": 27242 }, { "epoch": 1.845856765363507, "grad_norm": 6.593756675720215, "learning_rate": 6.373194606064755e-05, "loss": 0.8323, "step": 27243 }, { "epoch": 1.845924520631479, "grad_norm": 6.0877861976623535, "learning_rate": 6.373057704154973e-05, "loss": 0.7362, "step": 27244 }, { "epoch": 1.8459922758994511, "grad_norm": 4.741452693939209, "learning_rate": 6.372920802245191e-05, "loss": 0.637, "step": 27245 }, { "epoch": 1.8460600311674233, "grad_norm": 4.9500813484191895, "learning_rate": 6.372783900335409e-05, "loss": 0.7221, "step": 27246 }, { "epoch": 1.8461277864353953, "grad_norm": 8.915060043334961, "learning_rate": 6.372646998425629e-05, "loss": 0.4946, "step": 27247 }, { "epoch": 1.8461955417033673, "grad_norm": 5.91048526763916, "learning_rate": 6.372510096515847e-05, "loss": 0.6178, "step": 27248 }, { "epoch": 1.8462632969713395, "grad_norm": 6.160606861114502, "learning_rate": 6.372373194606065e-05, "loss": 0.665, "step": 27249 }, { "epoch": 1.8463310522393117, "grad_norm": 6.125753879547119, "learning_rate": 6.372236292696283e-05, "loss": 0.7054, "step": 27250 }, { "epoch": 1.8463988075072837, "grad_norm": 6.176767826080322, "learning_rate": 6.372099390786502e-05, "loss": 0.7628, "step": 27251 }, { "epoch": 1.8464665627752557, "grad_norm": 7.957395076751709, "learning_rate": 6.37196248887672e-05, "loss": 0.9647, "step": 27252 }, { "epoch": 1.8465343180432279, "grad_norm": 6.996708869934082, "learning_rate": 6.371825586966938e-05, "loss": 0.627, "step": 27253 }, { "epoch": 1.8466020733112, "grad_norm": 4.916651248931885, "learning_rate": 6.371688685057156e-05, "loss": 0.5691, "step": 27254 }, { "epoch": 1.846669828579172, "grad_norm": 5.062841415405273, "learning_rate": 6.371551783147374e-05, "loss": 0.5555, "step": 27255 }, { "epoch": 1.846737583847144, "grad_norm": 6.414377212524414, "learning_rate": 6.371414881237594e-05, "loss": 0.706, "step": 27256 }, { "epoch": 1.8468053391151162, "grad_norm": 5.882386684417725, "learning_rate": 6.371277979327812e-05, "loss": 0.6797, "step": 27257 }, { "epoch": 1.8468730943830884, "grad_norm": 4.906846046447754, "learning_rate": 6.37114107741803e-05, "loss": 0.601, "step": 27258 }, { "epoch": 1.8469408496510604, "grad_norm": 4.856419086456299, "learning_rate": 6.371004175508249e-05, "loss": 0.6372, "step": 27259 }, { "epoch": 1.8470086049190324, "grad_norm": 4.769770622253418, "learning_rate": 6.370867273598467e-05, "loss": 0.4912, "step": 27260 }, { "epoch": 1.8470763601870046, "grad_norm": 5.710936069488525, "learning_rate": 6.370730371688685e-05, "loss": 0.5366, "step": 27261 }, { "epoch": 1.8471441154549766, "grad_norm": 6.524085521697998, "learning_rate": 6.370593469778904e-05, "loss": 0.6769, "step": 27262 }, { "epoch": 1.8472118707229486, "grad_norm": 8.097933769226074, "learning_rate": 6.370456567869122e-05, "loss": 0.7485, "step": 27263 }, { "epoch": 1.8472796259909208, "grad_norm": 5.118399620056152, "learning_rate": 6.37031966595934e-05, "loss": 0.6439, "step": 27264 }, { "epoch": 1.847347381258893, "grad_norm": 5.130964756011963, "learning_rate": 6.37018276404956e-05, "loss": 0.5796, "step": 27265 }, { "epoch": 1.847415136526865, "grad_norm": 6.9567694664001465, "learning_rate": 6.370045862139778e-05, "loss": 0.6227, "step": 27266 }, { "epoch": 1.847482891794837, "grad_norm": 6.147199630737305, "learning_rate": 6.369908960229996e-05, "loss": 0.6198, "step": 27267 }, { "epoch": 1.8475506470628091, "grad_norm": 6.827304363250732, "learning_rate": 6.369772058320214e-05, "loss": 0.8015, "step": 27268 }, { "epoch": 1.8476184023307813, "grad_norm": 5.746049880981445, "learning_rate": 6.369635156410432e-05, "loss": 0.6196, "step": 27269 }, { "epoch": 1.8476861575987533, "grad_norm": 5.408587455749512, "learning_rate": 6.369498254500651e-05, "loss": 0.6948, "step": 27270 }, { "epoch": 1.8477539128667253, "grad_norm": 6.556030750274658, "learning_rate": 6.36936135259087e-05, "loss": 0.5461, "step": 27271 }, { "epoch": 1.8478216681346975, "grad_norm": 4.876744747161865, "learning_rate": 6.369224450681087e-05, "loss": 0.7387, "step": 27272 }, { "epoch": 1.8478894234026697, "grad_norm": 7.352886199951172, "learning_rate": 6.369087548771306e-05, "loss": 0.7012, "step": 27273 }, { "epoch": 1.8479571786706417, "grad_norm": 5.337966442108154, "learning_rate": 6.368950646861525e-05, "loss": 0.6795, "step": 27274 }, { "epoch": 1.8480249339386137, "grad_norm": 4.234106540679932, "learning_rate": 6.368813744951743e-05, "loss": 0.5507, "step": 27275 }, { "epoch": 1.8480926892065859, "grad_norm": 6.158735752105713, "learning_rate": 6.368676843041961e-05, "loss": 0.6043, "step": 27276 }, { "epoch": 1.8481604444745579, "grad_norm": 5.66843318939209, "learning_rate": 6.368539941132179e-05, "loss": 0.5871, "step": 27277 }, { "epoch": 1.8482281997425298, "grad_norm": 6.930666923522949, "learning_rate": 6.368403039222397e-05, "loss": 0.7413, "step": 27278 }, { "epoch": 1.848295955010502, "grad_norm": 7.251629829406738, "learning_rate": 6.368266137312616e-05, "loss": 0.6268, "step": 27279 }, { "epoch": 1.8483637102784742, "grad_norm": 5.876374244689941, "learning_rate": 6.368129235402834e-05, "loss": 0.6098, "step": 27280 }, { "epoch": 1.8484314655464462, "grad_norm": 4.424190998077393, "learning_rate": 6.367992333493053e-05, "loss": 0.5825, "step": 27281 }, { "epoch": 1.8484992208144182, "grad_norm": 6.5628228187561035, "learning_rate": 6.36785543158327e-05, "loss": 0.6589, "step": 27282 }, { "epoch": 1.8485669760823904, "grad_norm": 5.401718616485596, "learning_rate": 6.367718529673489e-05, "loss": 0.6024, "step": 27283 }, { "epoch": 1.8486347313503626, "grad_norm": 7.6457343101501465, "learning_rate": 6.367581627763708e-05, "loss": 0.6854, "step": 27284 }, { "epoch": 1.8487024866183346, "grad_norm": 9.27796745300293, "learning_rate": 6.367444725853926e-05, "loss": 0.7498, "step": 27285 }, { "epoch": 1.8487702418863066, "grad_norm": 5.034204006195068, "learning_rate": 6.367307823944144e-05, "loss": 0.6011, "step": 27286 }, { "epoch": 1.8488379971542788, "grad_norm": 6.607077121734619, "learning_rate": 6.367170922034362e-05, "loss": 0.6759, "step": 27287 }, { "epoch": 1.848905752422251, "grad_norm": 4.362952709197998, "learning_rate": 6.367034020124581e-05, "loss": 0.6721, "step": 27288 }, { "epoch": 1.848973507690223, "grad_norm": 4.469357013702393, "learning_rate": 6.3668971182148e-05, "loss": 0.5127, "step": 27289 }, { "epoch": 1.849041262958195, "grad_norm": 7.249838829040527, "learning_rate": 6.366760216305018e-05, "loss": 0.5577, "step": 27290 }, { "epoch": 1.8491090182261671, "grad_norm": 4.217117786407471, "learning_rate": 6.366623314395236e-05, "loss": 0.6289, "step": 27291 }, { "epoch": 1.8491767734941391, "grad_norm": 4.958245754241943, "learning_rate": 6.366486412485454e-05, "loss": 0.8431, "step": 27292 }, { "epoch": 1.849244528762111, "grad_norm": 3.9064323902130127, "learning_rate": 6.366349510575673e-05, "loss": 0.5287, "step": 27293 }, { "epoch": 1.8493122840300833, "grad_norm": 4.463814735412598, "learning_rate": 6.366212608665891e-05, "loss": 0.411, "step": 27294 }, { "epoch": 1.8493800392980555, "grad_norm": 7.430866241455078, "learning_rate": 6.366075706756109e-05, "loss": 0.7882, "step": 27295 }, { "epoch": 1.8494477945660275, "grad_norm": 7.235048770904541, "learning_rate": 6.365938804846327e-05, "loss": 0.6859, "step": 27296 }, { "epoch": 1.8495155498339995, "grad_norm": 4.099121570587158, "learning_rate": 6.365801902936546e-05, "loss": 0.4855, "step": 27297 }, { "epoch": 1.8495833051019717, "grad_norm": 5.911378383636475, "learning_rate": 6.365665001026765e-05, "loss": 0.8448, "step": 27298 }, { "epoch": 1.8496510603699439, "grad_norm": 6.584514141082764, "learning_rate": 6.365528099116983e-05, "loss": 0.8111, "step": 27299 }, { "epoch": 1.8497188156379158, "grad_norm": 6.399921894073486, "learning_rate": 6.3653911972072e-05, "loss": 0.7091, "step": 27300 }, { "epoch": 1.8497865709058878, "grad_norm": 6.056392669677734, "learning_rate": 6.365254295297419e-05, "loss": 0.8396, "step": 27301 }, { "epoch": 1.84985432617386, "grad_norm": 6.509528160095215, "learning_rate": 6.365117393387638e-05, "loss": 0.8983, "step": 27302 }, { "epoch": 1.8499220814418322, "grad_norm": 5.756089210510254, "learning_rate": 6.364980491477856e-05, "loss": 0.4498, "step": 27303 }, { "epoch": 1.8499898367098042, "grad_norm": 5.259481906890869, "learning_rate": 6.364843589568074e-05, "loss": 0.5964, "step": 27304 }, { "epoch": 1.8500575919777762, "grad_norm": 4.819509029388428, "learning_rate": 6.364706687658293e-05, "loss": 0.779, "step": 27305 }, { "epoch": 1.8501253472457484, "grad_norm": 5.050518035888672, "learning_rate": 6.364569785748511e-05, "loss": 0.621, "step": 27306 }, { "epoch": 1.8501931025137206, "grad_norm": 4.945389270782471, "learning_rate": 6.36443288383873e-05, "loss": 0.6204, "step": 27307 }, { "epoch": 1.8502608577816926, "grad_norm": 5.4422736167907715, "learning_rate": 6.364295981928949e-05, "loss": 0.7986, "step": 27308 }, { "epoch": 1.8503286130496646, "grad_norm": 4.408063888549805, "learning_rate": 6.364159080019167e-05, "loss": 0.506, "step": 27309 }, { "epoch": 1.8503963683176368, "grad_norm": 4.614170551300049, "learning_rate": 6.364022178109385e-05, "loss": 0.4339, "step": 27310 }, { "epoch": 1.8504641235856087, "grad_norm": 7.3041276931762695, "learning_rate": 6.363885276199604e-05, "loss": 0.7409, "step": 27311 }, { "epoch": 1.8505318788535807, "grad_norm": 5.567177772521973, "learning_rate": 6.363748374289822e-05, "loss": 0.6989, "step": 27312 }, { "epoch": 1.850599634121553, "grad_norm": 8.493524551391602, "learning_rate": 6.36361147238004e-05, "loss": 0.6291, "step": 27313 }, { "epoch": 1.8506673893895251, "grad_norm": 4.622281074523926, "learning_rate": 6.363474570470258e-05, "loss": 0.6496, "step": 27314 }, { "epoch": 1.850735144657497, "grad_norm": 4.407711029052734, "learning_rate": 6.363337668560477e-05, "loss": 0.5226, "step": 27315 }, { "epoch": 1.850802899925469, "grad_norm": 6.123640060424805, "learning_rate": 6.363200766650696e-05, "loss": 0.5228, "step": 27316 }, { "epoch": 1.8508706551934413, "grad_norm": 5.437078475952148, "learning_rate": 6.363063864740914e-05, "loss": 0.5585, "step": 27317 }, { "epoch": 1.8509384104614135, "grad_norm": 4.838663101196289, "learning_rate": 6.362926962831132e-05, "loss": 0.5486, "step": 27318 }, { "epoch": 1.8510061657293855, "grad_norm": 7.062039852142334, "learning_rate": 6.36279006092135e-05, "loss": 0.8208, "step": 27319 }, { "epoch": 1.8510739209973575, "grad_norm": 6.570778846740723, "learning_rate": 6.36265315901157e-05, "loss": 0.581, "step": 27320 }, { "epoch": 1.8511416762653297, "grad_norm": 5.243046760559082, "learning_rate": 6.362516257101787e-05, "loss": 0.4384, "step": 27321 }, { "epoch": 1.8512094315333019, "grad_norm": 9.272966384887695, "learning_rate": 6.362379355192005e-05, "loss": 0.579, "step": 27322 }, { "epoch": 1.8512771868012738, "grad_norm": 3.7065258026123047, "learning_rate": 6.362242453282223e-05, "loss": 0.4431, "step": 27323 }, { "epoch": 1.8513449420692458, "grad_norm": 6.572539806365967, "learning_rate": 6.362105551372442e-05, "loss": 0.5641, "step": 27324 }, { "epoch": 1.851412697337218, "grad_norm": 5.313950061798096, "learning_rate": 6.361968649462661e-05, "loss": 0.6205, "step": 27325 }, { "epoch": 1.85148045260519, "grad_norm": 4.911096096038818, "learning_rate": 6.361831747552879e-05, "loss": 0.5131, "step": 27326 }, { "epoch": 1.851548207873162, "grad_norm": 5.58803653717041, "learning_rate": 6.361694845643097e-05, "loss": 0.6395, "step": 27327 }, { "epoch": 1.8516159631411342, "grad_norm": 7.310421466827393, "learning_rate": 6.361557943733315e-05, "loss": 0.5262, "step": 27328 }, { "epoch": 1.8516837184091064, "grad_norm": 4.941981315612793, "learning_rate": 6.361421041823534e-05, "loss": 0.5872, "step": 27329 }, { "epoch": 1.8517514736770784, "grad_norm": 4.61047887802124, "learning_rate": 6.361284139913752e-05, "loss": 0.7227, "step": 27330 }, { "epoch": 1.8518192289450504, "grad_norm": 4.741776943206787, "learning_rate": 6.36114723800397e-05, "loss": 0.5834, "step": 27331 }, { "epoch": 1.8518869842130226, "grad_norm": 7.017233371734619, "learning_rate": 6.361010336094189e-05, "loss": 0.739, "step": 27332 }, { "epoch": 1.8519547394809948, "grad_norm": 7.026426315307617, "learning_rate": 6.360873434184407e-05, "loss": 0.7106, "step": 27333 }, { "epoch": 1.8520224947489667, "grad_norm": 4.270300388336182, "learning_rate": 6.360736532274626e-05, "loss": 0.5543, "step": 27334 }, { "epoch": 1.8520902500169387, "grad_norm": 6.484553337097168, "learning_rate": 6.360599630364844e-05, "loss": 0.6582, "step": 27335 }, { "epoch": 1.852158005284911, "grad_norm": 5.423399925231934, "learning_rate": 6.360462728455062e-05, "loss": 0.5746, "step": 27336 }, { "epoch": 1.8522257605528831, "grad_norm": 4.573421001434326, "learning_rate": 6.36032582654528e-05, "loss": 0.7216, "step": 27337 }, { "epoch": 1.852293515820855, "grad_norm": 13.776273727416992, "learning_rate": 6.360188924635498e-05, "loss": 0.6545, "step": 27338 }, { "epoch": 1.852361271088827, "grad_norm": 7.646167755126953, "learning_rate": 6.360052022725717e-05, "loss": 0.8296, "step": 27339 }, { "epoch": 1.8524290263567993, "grad_norm": 5.871392726898193, "learning_rate": 6.359915120815935e-05, "loss": 0.6882, "step": 27340 }, { "epoch": 1.8524967816247713, "grad_norm": 7.552643299102783, "learning_rate": 6.359778218906154e-05, "loss": 0.5037, "step": 27341 }, { "epoch": 1.8525645368927433, "grad_norm": 5.449880123138428, "learning_rate": 6.359641316996372e-05, "loss": 0.6198, "step": 27342 }, { "epoch": 1.8526322921607155, "grad_norm": 5.327448844909668, "learning_rate": 6.359504415086591e-05, "loss": 0.5621, "step": 27343 }, { "epoch": 1.8527000474286877, "grad_norm": 4.673593997955322, "learning_rate": 6.359367513176809e-05, "loss": 0.5164, "step": 27344 }, { "epoch": 1.8527678026966596, "grad_norm": 6.391227722167969, "learning_rate": 6.359230611267027e-05, "loss": 0.6985, "step": 27345 }, { "epoch": 1.8528355579646316, "grad_norm": 4.4343390464782715, "learning_rate": 6.359093709357245e-05, "loss": 0.4835, "step": 27346 }, { "epoch": 1.8529033132326038, "grad_norm": 5.450658321380615, "learning_rate": 6.358956807447463e-05, "loss": 0.7195, "step": 27347 }, { "epoch": 1.852971068500576, "grad_norm": 5.276173114776611, "learning_rate": 6.358819905537682e-05, "loss": 0.4989, "step": 27348 }, { "epoch": 1.853038823768548, "grad_norm": 9.247002601623535, "learning_rate": 6.3586830036279e-05, "loss": 0.6132, "step": 27349 }, { "epoch": 1.85310657903652, "grad_norm": 6.21991491317749, "learning_rate": 6.358546101718119e-05, "loss": 0.5354, "step": 27350 }, { "epoch": 1.8531743343044922, "grad_norm": 5.062180995941162, "learning_rate": 6.358409199808338e-05, "loss": 0.6442, "step": 27351 }, { "epoch": 1.8532420895724644, "grad_norm": 5.623244762420654, "learning_rate": 6.358272297898556e-05, "loss": 0.9594, "step": 27352 }, { "epoch": 1.8533098448404364, "grad_norm": 7.558547496795654, "learning_rate": 6.358135395988774e-05, "loss": 0.6075, "step": 27353 }, { "epoch": 1.8533776001084084, "grad_norm": 5.582084655761719, "learning_rate": 6.357998494078993e-05, "loss": 0.6956, "step": 27354 }, { "epoch": 1.8534453553763806, "grad_norm": 6.831633567810059, "learning_rate": 6.357861592169211e-05, "loss": 0.6701, "step": 27355 }, { "epoch": 1.8535131106443528, "grad_norm": 4.830446243286133, "learning_rate": 6.35772469025943e-05, "loss": 0.8312, "step": 27356 }, { "epoch": 1.8535808659123247, "grad_norm": 4.984294414520264, "learning_rate": 6.357587788349649e-05, "loss": 0.4073, "step": 27357 }, { "epoch": 1.8536486211802967, "grad_norm": 6.982358932495117, "learning_rate": 6.357450886439867e-05, "loss": 0.7405, "step": 27358 }, { "epoch": 1.853716376448269, "grad_norm": 5.725995063781738, "learning_rate": 6.357313984530085e-05, "loss": 0.577, "step": 27359 }, { "epoch": 1.853784131716241, "grad_norm": 5.237644672393799, "learning_rate": 6.357177082620303e-05, "loss": 0.6281, "step": 27360 }, { "epoch": 1.8538518869842129, "grad_norm": 4.636541366577148, "learning_rate": 6.357040180710522e-05, "loss": 0.7373, "step": 27361 }, { "epoch": 1.853919642252185, "grad_norm": 6.322758197784424, "learning_rate": 6.35690327880074e-05, "loss": 0.5409, "step": 27362 }, { "epoch": 1.8539873975201573, "grad_norm": 7.68882417678833, "learning_rate": 6.356766376890958e-05, "loss": 0.6769, "step": 27363 }, { "epoch": 1.8540551527881293, "grad_norm": 6.354411602020264, "learning_rate": 6.356629474981176e-05, "loss": 0.5237, "step": 27364 }, { "epoch": 1.8541229080561012, "grad_norm": 6.746979713439941, "learning_rate": 6.356492573071394e-05, "loss": 0.903, "step": 27365 }, { "epoch": 1.8541906633240735, "grad_norm": 6.164494514465332, "learning_rate": 6.356355671161614e-05, "loss": 0.8598, "step": 27366 }, { "epoch": 1.8542584185920457, "grad_norm": 4.801336765289307, "learning_rate": 6.356218769251832e-05, "loss": 0.7046, "step": 27367 }, { "epoch": 1.8543261738600176, "grad_norm": 4.869281768798828, "learning_rate": 6.35608186734205e-05, "loss": 0.6237, "step": 27368 }, { "epoch": 1.8543939291279896, "grad_norm": 4.915579319000244, "learning_rate": 6.355944965432268e-05, "loss": 0.6431, "step": 27369 }, { "epoch": 1.8544616843959618, "grad_norm": 5.74111270904541, "learning_rate": 6.355808063522486e-05, "loss": 0.5631, "step": 27370 }, { "epoch": 1.854529439663934, "grad_norm": 5.344593524932861, "learning_rate": 6.355671161612705e-05, "loss": 0.4961, "step": 27371 }, { "epoch": 1.854597194931906, "grad_norm": 8.426936149597168, "learning_rate": 6.355534259702923e-05, "loss": 0.8655, "step": 27372 }, { "epoch": 1.854664950199878, "grad_norm": 4.48435115814209, "learning_rate": 6.355397357793141e-05, "loss": 0.6053, "step": 27373 }, { "epoch": 1.8547327054678502, "grad_norm": 5.457202911376953, "learning_rate": 6.35526045588336e-05, "loss": 0.5076, "step": 27374 }, { "epoch": 1.8548004607358222, "grad_norm": 4.937319755554199, "learning_rate": 6.355123553973579e-05, "loss": 0.6934, "step": 27375 }, { "epoch": 1.8548682160037941, "grad_norm": 4.500487327575684, "learning_rate": 6.354986652063797e-05, "loss": 0.6523, "step": 27376 }, { "epoch": 1.8549359712717663, "grad_norm": 4.4783759117126465, "learning_rate": 6.354849750154015e-05, "loss": 0.4815, "step": 27377 }, { "epoch": 1.8550037265397386, "grad_norm": 10.28587532043457, "learning_rate": 6.354712848244233e-05, "loss": 0.5234, "step": 27378 }, { "epoch": 1.8550714818077105, "grad_norm": 8.73853588104248, "learning_rate": 6.354575946334451e-05, "loss": 0.541, "step": 27379 }, { "epoch": 1.8551392370756825, "grad_norm": 4.327946186065674, "learning_rate": 6.35443904442467e-05, "loss": 0.5391, "step": 27380 }, { "epoch": 1.8552069923436547, "grad_norm": 6.12151575088501, "learning_rate": 6.354302142514888e-05, "loss": 0.6003, "step": 27381 }, { "epoch": 1.855274747611627, "grad_norm": 6.210278034210205, "learning_rate": 6.354165240605106e-05, "loss": 0.724, "step": 27382 }, { "epoch": 1.855342502879599, "grad_norm": 6.4067487716674805, "learning_rate": 6.354028338695325e-05, "loss": 0.5818, "step": 27383 }, { "epoch": 1.8554102581475709, "grad_norm": 5.090939998626709, "learning_rate": 6.353891436785544e-05, "loss": 0.5646, "step": 27384 }, { "epoch": 1.855478013415543, "grad_norm": 5.1198410987854, "learning_rate": 6.353754534875762e-05, "loss": 0.5747, "step": 27385 }, { "epoch": 1.8555457686835153, "grad_norm": 6.894594192504883, "learning_rate": 6.35361763296598e-05, "loss": 0.8017, "step": 27386 }, { "epoch": 1.8556135239514873, "grad_norm": 4.41778039932251, "learning_rate": 6.353480731056198e-05, "loss": 0.5225, "step": 27387 }, { "epoch": 1.8556812792194592, "grad_norm": 5.774452209472656, "learning_rate": 6.353343829146416e-05, "loss": 0.7573, "step": 27388 }, { "epoch": 1.8557490344874314, "grad_norm": 4.999820709228516, "learning_rate": 6.353206927236635e-05, "loss": 0.685, "step": 27389 }, { "epoch": 1.8558167897554034, "grad_norm": 4.346382141113281, "learning_rate": 6.353070025326853e-05, "loss": 0.5328, "step": 27390 }, { "epoch": 1.8558845450233754, "grad_norm": 3.8927834033966064, "learning_rate": 6.352933123417071e-05, "loss": 0.5368, "step": 27391 }, { "epoch": 1.8559523002913476, "grad_norm": 5.548489570617676, "learning_rate": 6.35279622150729e-05, "loss": 0.6206, "step": 27392 }, { "epoch": 1.8560200555593198, "grad_norm": 3.9222590923309326, "learning_rate": 6.352659319597508e-05, "loss": 0.5224, "step": 27393 }, { "epoch": 1.8560878108272918, "grad_norm": 5.585320949554443, "learning_rate": 6.352522417687727e-05, "loss": 0.5466, "step": 27394 }, { "epoch": 1.8561555660952638, "grad_norm": 6.57012939453125, "learning_rate": 6.352385515777945e-05, "loss": 0.603, "step": 27395 }, { "epoch": 1.856223321363236, "grad_norm": 4.979565143585205, "learning_rate": 6.352248613868163e-05, "loss": 0.6659, "step": 27396 }, { "epoch": 1.8562910766312082, "grad_norm": 5.931491851806641, "learning_rate": 6.352111711958381e-05, "loss": 0.4857, "step": 27397 }, { "epoch": 1.8563588318991802, "grad_norm": 6.6327409744262695, "learning_rate": 6.3519748100486e-05, "loss": 0.5586, "step": 27398 }, { "epoch": 1.8564265871671521, "grad_norm": 6.132262229919434, "learning_rate": 6.351837908138818e-05, "loss": 0.7533, "step": 27399 }, { "epoch": 1.8564943424351243, "grad_norm": 7.199448108673096, "learning_rate": 6.351701006229038e-05, "loss": 0.7212, "step": 27400 }, { "epoch": 1.8565620977030965, "grad_norm": 6.919311046600342, "learning_rate": 6.351564104319256e-05, "loss": 0.6529, "step": 27401 }, { "epoch": 1.8566298529710685, "grad_norm": 8.360771179199219, "learning_rate": 6.351427202409474e-05, "loss": 0.5225, "step": 27402 }, { "epoch": 1.8566976082390405, "grad_norm": 6.093073844909668, "learning_rate": 6.351290300499693e-05, "loss": 0.6264, "step": 27403 }, { "epoch": 1.8567653635070127, "grad_norm": 6.476813793182373, "learning_rate": 6.351153398589911e-05, "loss": 0.6023, "step": 27404 }, { "epoch": 1.856833118774985, "grad_norm": 5.322724342346191, "learning_rate": 6.35101649668013e-05, "loss": 0.635, "step": 27405 }, { "epoch": 1.856900874042957, "grad_norm": 4.329344749450684, "learning_rate": 6.350879594770347e-05, "loss": 0.5481, "step": 27406 }, { "epoch": 1.8569686293109289, "grad_norm": 9.201658248901367, "learning_rate": 6.350742692860567e-05, "loss": 0.8689, "step": 27407 }, { "epoch": 1.857036384578901, "grad_norm": 6.191984176635742, "learning_rate": 6.350605790950785e-05, "loss": 0.6586, "step": 27408 }, { "epoch": 1.857104139846873, "grad_norm": 5.749955654144287, "learning_rate": 6.350468889041003e-05, "loss": 0.5267, "step": 27409 }, { "epoch": 1.857171895114845, "grad_norm": 4.96457576751709, "learning_rate": 6.350331987131221e-05, "loss": 0.6487, "step": 27410 }, { "epoch": 1.8572396503828172, "grad_norm": 7.008732795715332, "learning_rate": 6.350195085221439e-05, "loss": 0.6814, "step": 27411 }, { "epoch": 1.8573074056507894, "grad_norm": 5.415872097015381, "learning_rate": 6.350058183311658e-05, "loss": 0.7267, "step": 27412 }, { "epoch": 1.8573751609187614, "grad_norm": 7.083207130432129, "learning_rate": 6.349921281401876e-05, "loss": 0.7548, "step": 27413 }, { "epoch": 1.8574429161867334, "grad_norm": 8.648002624511719, "learning_rate": 6.349784379492094e-05, "loss": 0.6119, "step": 27414 }, { "epoch": 1.8575106714547056, "grad_norm": 6.584985733032227, "learning_rate": 6.349647477582312e-05, "loss": 0.6992, "step": 27415 }, { "epoch": 1.8575784267226778, "grad_norm": 6.528139591217041, "learning_rate": 6.34951057567253e-05, "loss": 0.7645, "step": 27416 }, { "epoch": 1.8576461819906498, "grad_norm": 5.8630852699279785, "learning_rate": 6.34937367376275e-05, "loss": 0.7785, "step": 27417 }, { "epoch": 1.8577139372586218, "grad_norm": 4.976963996887207, "learning_rate": 6.349236771852968e-05, "loss": 0.5168, "step": 27418 }, { "epoch": 1.857781692526594, "grad_norm": 7.131485939025879, "learning_rate": 6.349099869943186e-05, "loss": 0.713, "step": 27419 }, { "epoch": 1.8578494477945662, "grad_norm": 4.122857093811035, "learning_rate": 6.348962968033404e-05, "loss": 0.6652, "step": 27420 }, { "epoch": 1.8579172030625382, "grad_norm": 6.096546173095703, "learning_rate": 6.348826066123623e-05, "loss": 0.6349, "step": 27421 }, { "epoch": 1.8579849583305101, "grad_norm": 6.647857189178467, "learning_rate": 6.348689164213841e-05, "loss": 0.8348, "step": 27422 }, { "epoch": 1.8580527135984823, "grad_norm": 5.542960166931152, "learning_rate": 6.34855226230406e-05, "loss": 0.6956, "step": 27423 }, { "epoch": 1.8581204688664543, "grad_norm": 6.000484466552734, "learning_rate": 6.348415360394277e-05, "loss": 0.7898, "step": 27424 }, { "epoch": 1.8581882241344263, "grad_norm": 5.6986284255981445, "learning_rate": 6.348278458484495e-05, "loss": 0.8872, "step": 27425 }, { "epoch": 1.8582559794023985, "grad_norm": 6.247173309326172, "learning_rate": 6.348141556574715e-05, "loss": 0.6922, "step": 27426 }, { "epoch": 1.8583237346703707, "grad_norm": 6.019658088684082, "learning_rate": 6.348004654664933e-05, "loss": 0.6635, "step": 27427 }, { "epoch": 1.8583914899383427, "grad_norm": 5.187469005584717, "learning_rate": 6.347867752755151e-05, "loss": 0.6467, "step": 27428 }, { "epoch": 1.8584592452063147, "grad_norm": 8.710208892822266, "learning_rate": 6.347730850845369e-05, "loss": 0.4713, "step": 27429 }, { "epoch": 1.8585270004742869, "grad_norm": 8.6785888671875, "learning_rate": 6.347593948935588e-05, "loss": 0.6769, "step": 27430 }, { "epoch": 1.858594755742259, "grad_norm": 6.435037612915039, "learning_rate": 6.347457047025806e-05, "loss": 0.6165, "step": 27431 }, { "epoch": 1.858662511010231, "grad_norm": 5.052980899810791, "learning_rate": 6.347320145116024e-05, "loss": 0.5973, "step": 27432 }, { "epoch": 1.858730266278203, "grad_norm": 5.869048118591309, "learning_rate": 6.347183243206242e-05, "loss": 0.876, "step": 27433 }, { "epoch": 1.8587980215461752, "grad_norm": 4.583013534545898, "learning_rate": 6.34704634129646e-05, "loss": 0.6434, "step": 27434 }, { "epoch": 1.8588657768141474, "grad_norm": 6.383969306945801, "learning_rate": 6.34690943938668e-05, "loss": 0.9968, "step": 27435 }, { "epoch": 1.8589335320821194, "grad_norm": 7.021505355834961, "learning_rate": 6.346772537476898e-05, "loss": 0.5182, "step": 27436 }, { "epoch": 1.8590012873500914, "grad_norm": 4.752154350280762, "learning_rate": 6.346635635567116e-05, "loss": 0.6979, "step": 27437 }, { "epoch": 1.8590690426180636, "grad_norm": 5.68402099609375, "learning_rate": 6.346498733657334e-05, "loss": 0.7744, "step": 27438 }, { "epoch": 1.8591367978860356, "grad_norm": 6.555962085723877, "learning_rate": 6.346361831747553e-05, "loss": 0.7762, "step": 27439 }, { "epoch": 1.8592045531540076, "grad_norm": 5.884278297424316, "learning_rate": 6.346224929837771e-05, "loss": 0.7809, "step": 27440 }, { "epoch": 1.8592723084219798, "grad_norm": 6.9058613777160645, "learning_rate": 6.34608802792799e-05, "loss": 0.7164, "step": 27441 }, { "epoch": 1.859340063689952, "grad_norm": 8.071953773498535, "learning_rate": 6.345951126018207e-05, "loss": 0.4951, "step": 27442 }, { "epoch": 1.859407818957924, "grad_norm": 6.780834674835205, "learning_rate": 6.345814224108426e-05, "loss": 0.7843, "step": 27443 }, { "epoch": 1.859475574225896, "grad_norm": 6.857516288757324, "learning_rate": 6.345677322198645e-05, "loss": 0.5576, "step": 27444 }, { "epoch": 1.8595433294938681, "grad_norm": 6.23760986328125, "learning_rate": 6.345540420288863e-05, "loss": 0.6751, "step": 27445 }, { "epoch": 1.8596110847618403, "grad_norm": 5.3638458251953125, "learning_rate": 6.345403518379081e-05, "loss": 0.6531, "step": 27446 }, { "epoch": 1.8596788400298123, "grad_norm": 4.731656551361084, "learning_rate": 6.3452666164693e-05, "loss": 0.639, "step": 27447 }, { "epoch": 1.8597465952977843, "grad_norm": 5.725217342376709, "learning_rate": 6.345129714559518e-05, "loss": 0.7359, "step": 27448 }, { "epoch": 1.8598143505657565, "grad_norm": 4.49294376373291, "learning_rate": 6.344992812649736e-05, "loss": 0.4767, "step": 27449 }, { "epoch": 1.8598821058337287, "grad_norm": 6.838014125823975, "learning_rate": 6.344855910739956e-05, "loss": 0.6365, "step": 27450 }, { "epoch": 1.8599498611017007, "grad_norm": 7.335923671722412, "learning_rate": 6.344719008830174e-05, "loss": 0.9139, "step": 27451 }, { "epoch": 1.8600176163696727, "grad_norm": 4.797834873199463, "learning_rate": 6.344582106920392e-05, "loss": 0.5721, "step": 27452 }, { "epoch": 1.8600853716376449, "grad_norm": 4.923839569091797, "learning_rate": 6.344445205010611e-05, "loss": 0.4886, "step": 27453 }, { "epoch": 1.860153126905617, "grad_norm": 4.291384696960449, "learning_rate": 6.344308303100829e-05, "loss": 0.5915, "step": 27454 }, { "epoch": 1.860220882173589, "grad_norm": 10.862751960754395, "learning_rate": 6.344171401191047e-05, "loss": 0.7376, "step": 27455 }, { "epoch": 1.860288637441561, "grad_norm": 7.912614345550537, "learning_rate": 6.344034499281265e-05, "loss": 0.698, "step": 27456 }, { "epoch": 1.8603563927095332, "grad_norm": 8.359848022460938, "learning_rate": 6.343897597371483e-05, "loss": 0.7441, "step": 27457 }, { "epoch": 1.8604241479775052, "grad_norm": 6.462465763092041, "learning_rate": 6.343760695461703e-05, "loss": 0.6437, "step": 27458 }, { "epoch": 1.8604919032454772, "grad_norm": 5.56485652923584, "learning_rate": 6.343623793551921e-05, "loss": 0.5771, "step": 27459 }, { "epoch": 1.8605596585134494, "grad_norm": 8.585254669189453, "learning_rate": 6.343486891642139e-05, "loss": 0.6168, "step": 27460 }, { "epoch": 1.8606274137814216, "grad_norm": 5.145244121551514, "learning_rate": 6.343349989732357e-05, "loss": 0.7488, "step": 27461 }, { "epoch": 1.8606951690493936, "grad_norm": 6.185863971710205, "learning_rate": 6.343213087822576e-05, "loss": 0.8715, "step": 27462 }, { "epoch": 1.8607629243173656, "grad_norm": 7.251806735992432, "learning_rate": 6.343076185912794e-05, "loss": 0.7178, "step": 27463 }, { "epoch": 1.8608306795853378, "grad_norm": 6.879580497741699, "learning_rate": 6.342939284003012e-05, "loss": 0.6186, "step": 27464 }, { "epoch": 1.86089843485331, "grad_norm": 5.017488956451416, "learning_rate": 6.34280238209323e-05, "loss": 0.6563, "step": 27465 }, { "epoch": 1.860966190121282, "grad_norm": 8.587684631347656, "learning_rate": 6.342665480183448e-05, "loss": 0.679, "step": 27466 }, { "epoch": 1.861033945389254, "grad_norm": 7.058719635009766, "learning_rate": 6.342528578273668e-05, "loss": 0.6198, "step": 27467 }, { "epoch": 1.8611017006572261, "grad_norm": 8.960915565490723, "learning_rate": 6.342391676363886e-05, "loss": 0.5395, "step": 27468 }, { "epoch": 1.8611694559251983, "grad_norm": 8.864168167114258, "learning_rate": 6.342254774454104e-05, "loss": 0.7512, "step": 27469 }, { "epoch": 1.8612372111931703, "grad_norm": 6.252296447753906, "learning_rate": 6.342117872544322e-05, "loss": 0.6893, "step": 27470 }, { "epoch": 1.8613049664611423, "grad_norm": 5.1161017417907715, "learning_rate": 6.34198097063454e-05, "loss": 0.5752, "step": 27471 }, { "epoch": 1.8613727217291145, "grad_norm": 5.370450973510742, "learning_rate": 6.34184406872476e-05, "loss": 0.4814, "step": 27472 }, { "epoch": 1.8614404769970865, "grad_norm": 9.893977165222168, "learning_rate": 6.341707166814977e-05, "loss": 0.6027, "step": 27473 }, { "epoch": 1.8615082322650585, "grad_norm": 5.268862724304199, "learning_rate": 6.341570264905195e-05, "loss": 0.66, "step": 27474 }, { "epoch": 1.8615759875330307, "grad_norm": 8.013508796691895, "learning_rate": 6.341433362995413e-05, "loss": 0.4637, "step": 27475 }, { "epoch": 1.8616437428010029, "grad_norm": 9.281586647033691, "learning_rate": 6.341296461085633e-05, "loss": 1.0553, "step": 27476 }, { "epoch": 1.8617114980689748, "grad_norm": 8.454440116882324, "learning_rate": 6.341159559175851e-05, "loss": 0.5317, "step": 27477 }, { "epoch": 1.8617792533369468, "grad_norm": 7.9012041091918945, "learning_rate": 6.341022657266069e-05, "loss": 0.7423, "step": 27478 }, { "epoch": 1.861847008604919, "grad_norm": 5.583664894104004, "learning_rate": 6.340885755356287e-05, "loss": 0.7331, "step": 27479 }, { "epoch": 1.8619147638728912, "grad_norm": 4.838959693908691, "learning_rate": 6.340748853446505e-05, "loss": 0.5627, "step": 27480 }, { "epoch": 1.8619825191408632, "grad_norm": 9.809456825256348, "learning_rate": 6.340611951536724e-05, "loss": 0.5376, "step": 27481 }, { "epoch": 1.8620502744088352, "grad_norm": 6.430886268615723, "learning_rate": 6.340475049626942e-05, "loss": 0.5611, "step": 27482 }, { "epoch": 1.8621180296768074, "grad_norm": 6.066258907318115, "learning_rate": 6.34033814771716e-05, "loss": 0.7584, "step": 27483 }, { "epoch": 1.8621857849447796, "grad_norm": 4.257765769958496, "learning_rate": 6.340201245807378e-05, "loss": 0.4482, "step": 27484 }, { "epoch": 1.8622535402127516, "grad_norm": 4.936148166656494, "learning_rate": 6.340064343897598e-05, "loss": 0.8437, "step": 27485 }, { "epoch": 1.8623212954807236, "grad_norm": 6.235775470733643, "learning_rate": 6.339927441987816e-05, "loss": 0.7762, "step": 27486 }, { "epoch": 1.8623890507486958, "grad_norm": 5.4720330238342285, "learning_rate": 6.339790540078034e-05, "loss": 0.5973, "step": 27487 }, { "epoch": 1.8624568060166677, "grad_norm": 6.396482944488525, "learning_rate": 6.339653638168252e-05, "loss": 0.6618, "step": 27488 }, { "epoch": 1.8625245612846397, "grad_norm": 5.926846027374268, "learning_rate": 6.33951673625847e-05, "loss": 0.9018, "step": 27489 }, { "epoch": 1.862592316552612, "grad_norm": 6.477115154266357, "learning_rate": 6.33937983434869e-05, "loss": 0.6185, "step": 27490 }, { "epoch": 1.8626600718205841, "grad_norm": 7.842959880828857, "learning_rate": 6.339242932438907e-05, "loss": 0.5973, "step": 27491 }, { "epoch": 1.862727827088556, "grad_norm": 5.8213982582092285, "learning_rate": 6.339106030529125e-05, "loss": 0.4736, "step": 27492 }, { "epoch": 1.862795582356528, "grad_norm": 4.521148681640625, "learning_rate": 6.338969128619345e-05, "loss": 0.4561, "step": 27493 }, { "epoch": 1.8628633376245003, "grad_norm": 5.3785400390625, "learning_rate": 6.338832226709563e-05, "loss": 0.64, "step": 27494 }, { "epoch": 1.8629310928924725, "grad_norm": 8.941308975219727, "learning_rate": 6.338695324799781e-05, "loss": 0.5262, "step": 27495 }, { "epoch": 1.8629988481604445, "grad_norm": 6.348376274108887, "learning_rate": 6.33855842289e-05, "loss": 0.5321, "step": 27496 }, { "epoch": 1.8630666034284165, "grad_norm": 7.614035606384277, "learning_rate": 6.338421520980218e-05, "loss": 0.7335, "step": 27497 }, { "epoch": 1.8631343586963887, "grad_norm": 6.328239917755127, "learning_rate": 6.338284619070436e-05, "loss": 0.6698, "step": 27498 }, { "epoch": 1.8632021139643609, "grad_norm": 4.844901084899902, "learning_rate": 6.338147717160656e-05, "loss": 0.6613, "step": 27499 }, { "epoch": 1.8632698692323328, "grad_norm": 5.244931697845459, "learning_rate": 6.338010815250874e-05, "loss": 0.7399, "step": 27500 }, { "epoch": 1.8633376245003048, "grad_norm": 4.91608190536499, "learning_rate": 6.337873913341092e-05, "loss": 0.5988, "step": 27501 }, { "epoch": 1.863405379768277, "grad_norm": 5.051263809204102, "learning_rate": 6.33773701143131e-05, "loss": 0.5617, "step": 27502 }, { "epoch": 1.8634731350362492, "grad_norm": 6.216585636138916, "learning_rate": 6.337600109521528e-05, "loss": 0.6756, "step": 27503 }, { "epoch": 1.863540890304221, "grad_norm": 5.489158630371094, "learning_rate": 6.337463207611747e-05, "loss": 0.7392, "step": 27504 }, { "epoch": 1.8636086455721932, "grad_norm": 5.453476428985596, "learning_rate": 6.337326305701965e-05, "loss": 0.6395, "step": 27505 }, { "epoch": 1.8636764008401654, "grad_norm": 5.6178765296936035, "learning_rate": 6.337189403792183e-05, "loss": 0.6805, "step": 27506 }, { "epoch": 1.8637441561081374, "grad_norm": 5.014954090118408, "learning_rate": 6.337052501882401e-05, "loss": 0.7604, "step": 27507 }, { "epoch": 1.8638119113761094, "grad_norm": 8.24957275390625, "learning_rate": 6.336915599972621e-05, "loss": 1.0151, "step": 27508 }, { "epoch": 1.8638796666440816, "grad_norm": 5.401782512664795, "learning_rate": 6.336778698062839e-05, "loss": 0.4904, "step": 27509 }, { "epoch": 1.8639474219120538, "grad_norm": 5.713493824005127, "learning_rate": 6.336641796153057e-05, "loss": 0.7614, "step": 27510 }, { "epoch": 1.8640151771800257, "grad_norm": 8.810641288757324, "learning_rate": 6.336504894243275e-05, "loss": 0.7489, "step": 27511 }, { "epoch": 1.8640829324479977, "grad_norm": 4.702940940856934, "learning_rate": 6.336367992333493e-05, "loss": 0.6259, "step": 27512 }, { "epoch": 1.86415068771597, "grad_norm": 5.29288911819458, "learning_rate": 6.336231090423712e-05, "loss": 0.533, "step": 27513 }, { "epoch": 1.8642184429839421, "grad_norm": 6.737634658813477, "learning_rate": 6.33609418851393e-05, "loss": 0.61, "step": 27514 }, { "epoch": 1.864286198251914, "grad_norm": 6.840268135070801, "learning_rate": 6.335957286604148e-05, "loss": 0.6276, "step": 27515 }, { "epoch": 1.864353953519886, "grad_norm": 5.003899097442627, "learning_rate": 6.335820384694366e-05, "loss": 0.7093, "step": 27516 }, { "epoch": 1.8644217087878583, "grad_norm": 6.776587009429932, "learning_rate": 6.335683482784586e-05, "loss": 0.6821, "step": 27517 }, { "epoch": 1.8644894640558305, "grad_norm": 11.110896110534668, "learning_rate": 6.335546580874804e-05, "loss": 0.6128, "step": 27518 }, { "epoch": 1.8645572193238025, "grad_norm": 7.699942111968994, "learning_rate": 6.335409678965022e-05, "loss": 0.705, "step": 27519 }, { "epoch": 1.8646249745917745, "grad_norm": 5.7837371826171875, "learning_rate": 6.33527277705524e-05, "loss": 0.5557, "step": 27520 }, { "epoch": 1.8646927298597467, "grad_norm": 4.587590217590332, "learning_rate": 6.335135875145458e-05, "loss": 0.5177, "step": 27521 }, { "epoch": 1.8647604851277186, "grad_norm": 4.3267083168029785, "learning_rate": 6.334998973235677e-05, "loss": 0.3771, "step": 27522 }, { "epoch": 1.8648282403956906, "grad_norm": 5.273138046264648, "learning_rate": 6.334862071325895e-05, "loss": 0.6586, "step": 27523 }, { "epoch": 1.8648959956636628, "grad_norm": 7.248533248901367, "learning_rate": 6.334725169416113e-05, "loss": 0.7499, "step": 27524 }, { "epoch": 1.864963750931635, "grad_norm": 4.818221569061279, "learning_rate": 6.334588267506331e-05, "loss": 0.6871, "step": 27525 }, { "epoch": 1.865031506199607, "grad_norm": 5.056493282318115, "learning_rate": 6.33445136559655e-05, "loss": 0.6244, "step": 27526 }, { "epoch": 1.865099261467579, "grad_norm": 6.085416793823242, "learning_rate": 6.334314463686769e-05, "loss": 0.6461, "step": 27527 }, { "epoch": 1.8651670167355512, "grad_norm": 6.23557710647583, "learning_rate": 6.334177561776987e-05, "loss": 0.5157, "step": 27528 }, { "epoch": 1.8652347720035234, "grad_norm": 6.963679313659668, "learning_rate": 6.334040659867205e-05, "loss": 0.8752, "step": 27529 }, { "epoch": 1.8653025272714954, "grad_norm": 5.019575119018555, "learning_rate": 6.333903757957423e-05, "loss": 0.4809, "step": 27530 }, { "epoch": 1.8653702825394674, "grad_norm": 5.5090742111206055, "learning_rate": 6.333766856047642e-05, "loss": 0.6475, "step": 27531 }, { "epoch": 1.8654380378074396, "grad_norm": 5.594353199005127, "learning_rate": 6.33362995413786e-05, "loss": 0.7321, "step": 27532 }, { "epoch": 1.8655057930754118, "grad_norm": 5.266883850097656, "learning_rate": 6.333493052228078e-05, "loss": 0.712, "step": 27533 }, { "epoch": 1.8655735483433837, "grad_norm": 5.156014442443848, "learning_rate": 6.333356150318296e-05, "loss": 0.5358, "step": 27534 }, { "epoch": 1.8656413036113557, "grad_norm": 8.361031532287598, "learning_rate": 6.333219248408514e-05, "loss": 0.7801, "step": 27535 }, { "epoch": 1.865709058879328, "grad_norm": 6.438535213470459, "learning_rate": 6.333082346498734e-05, "loss": 0.6702, "step": 27536 }, { "epoch": 1.8657768141473, "grad_norm": 6.253293991088867, "learning_rate": 6.332945444588952e-05, "loss": 0.6285, "step": 27537 }, { "epoch": 1.8658445694152719, "grad_norm": 5.79615592956543, "learning_rate": 6.33280854267917e-05, "loss": 0.6007, "step": 27538 }, { "epoch": 1.865912324683244, "grad_norm": 4.508249282836914, "learning_rate": 6.332671640769389e-05, "loss": 0.7284, "step": 27539 }, { "epoch": 1.8659800799512163, "grad_norm": 7.839498519897461, "learning_rate": 6.332534738859607e-05, "loss": 0.5137, "step": 27540 }, { "epoch": 1.8660478352191883, "grad_norm": 6.532543659210205, "learning_rate": 6.332397836949825e-05, "loss": 0.6612, "step": 27541 }, { "epoch": 1.8661155904871602, "grad_norm": 4.765163898468018, "learning_rate": 6.332260935040045e-05, "loss": 0.7231, "step": 27542 }, { "epoch": 1.8661833457551324, "grad_norm": 6.229429244995117, "learning_rate": 6.332124033130263e-05, "loss": 0.3815, "step": 27543 }, { "epoch": 1.8662511010231047, "grad_norm": 4.934325695037842, "learning_rate": 6.331987131220481e-05, "loss": 0.5077, "step": 27544 }, { "epoch": 1.8663188562910766, "grad_norm": 5.246899127960205, "learning_rate": 6.3318502293107e-05, "loss": 0.6816, "step": 27545 }, { "epoch": 1.8663866115590486, "grad_norm": 7.447319507598877, "learning_rate": 6.331713327400918e-05, "loss": 0.8169, "step": 27546 }, { "epoch": 1.8664543668270208, "grad_norm": 6.7778000831604, "learning_rate": 6.331576425491136e-05, "loss": 0.5133, "step": 27547 }, { "epoch": 1.866522122094993, "grad_norm": 7.6582207679748535, "learning_rate": 6.331439523581354e-05, "loss": 0.726, "step": 27548 }, { "epoch": 1.866589877362965, "grad_norm": 4.94448184967041, "learning_rate": 6.331302621671574e-05, "loss": 0.6578, "step": 27549 }, { "epoch": 1.866657632630937, "grad_norm": 4.380932807922363, "learning_rate": 6.331165719761792e-05, "loss": 0.6329, "step": 27550 }, { "epoch": 1.8667253878989092, "grad_norm": 8.570854187011719, "learning_rate": 6.33102881785201e-05, "loss": 0.6394, "step": 27551 }, { "epoch": 1.8667931431668814, "grad_norm": 6.0751953125, "learning_rate": 6.330891915942228e-05, "loss": 0.6093, "step": 27552 }, { "epoch": 1.8668608984348531, "grad_norm": 5.206480503082275, "learning_rate": 6.330755014032446e-05, "loss": 0.609, "step": 27553 }, { "epoch": 1.8669286537028253, "grad_norm": 10.385674476623535, "learning_rate": 6.330618112122665e-05, "loss": 0.624, "step": 27554 }, { "epoch": 1.8669964089707975, "grad_norm": 5.524631023406982, "learning_rate": 6.330481210212883e-05, "loss": 0.671, "step": 27555 }, { "epoch": 1.8670641642387695, "grad_norm": 6.066066741943359, "learning_rate": 6.330344308303101e-05, "loss": 0.9129, "step": 27556 }, { "epoch": 1.8671319195067415, "grad_norm": 6.176371097564697, "learning_rate": 6.33020740639332e-05, "loss": 0.5983, "step": 27557 }, { "epoch": 1.8671996747747137, "grad_norm": 5.846497058868408, "learning_rate": 6.330070504483537e-05, "loss": 0.556, "step": 27558 }, { "epoch": 1.867267430042686, "grad_norm": 6.619709014892578, "learning_rate": 6.329933602573757e-05, "loss": 0.615, "step": 27559 }, { "epoch": 1.867335185310658, "grad_norm": 6.307657718658447, "learning_rate": 6.329796700663975e-05, "loss": 0.5037, "step": 27560 }, { "epoch": 1.8674029405786299, "grad_norm": 4.41392707824707, "learning_rate": 6.329659798754193e-05, "loss": 0.6664, "step": 27561 }, { "epoch": 1.867470695846602, "grad_norm": 6.220268249511719, "learning_rate": 6.329522896844411e-05, "loss": 0.5445, "step": 27562 }, { "epoch": 1.8675384511145743, "grad_norm": 5.679828643798828, "learning_rate": 6.32938599493463e-05, "loss": 0.5877, "step": 27563 }, { "epoch": 1.8676062063825463, "grad_norm": 5.238972187042236, "learning_rate": 6.329249093024848e-05, "loss": 0.5732, "step": 27564 }, { "epoch": 1.8676739616505182, "grad_norm": 5.531563758850098, "learning_rate": 6.329112191115066e-05, "loss": 0.5633, "step": 27565 }, { "epoch": 1.8677417169184904, "grad_norm": 6.132482051849365, "learning_rate": 6.328975289205284e-05, "loss": 0.656, "step": 27566 }, { "epoch": 1.8678094721864626, "grad_norm": 4.97045373916626, "learning_rate": 6.328838387295502e-05, "loss": 0.6594, "step": 27567 }, { "epoch": 1.8678772274544346, "grad_norm": 5.431466579437256, "learning_rate": 6.328701485385722e-05, "loss": 0.7123, "step": 27568 }, { "epoch": 1.8679449827224066, "grad_norm": 10.782968521118164, "learning_rate": 6.32856458347594e-05, "loss": 0.7027, "step": 27569 }, { "epoch": 1.8680127379903788, "grad_norm": 9.726080894470215, "learning_rate": 6.328427681566158e-05, "loss": 0.6635, "step": 27570 }, { "epoch": 1.8680804932583508, "grad_norm": 6.330414772033691, "learning_rate": 6.328290779656376e-05, "loss": 0.7098, "step": 27571 }, { "epoch": 1.8681482485263228, "grad_norm": 5.863759994506836, "learning_rate": 6.328153877746595e-05, "loss": 0.5253, "step": 27572 }, { "epoch": 1.868216003794295, "grad_norm": 6.357123851776123, "learning_rate": 6.328016975836813e-05, "loss": 0.4679, "step": 27573 }, { "epoch": 1.8682837590622672, "grad_norm": 4.4874396324157715, "learning_rate": 6.327880073927031e-05, "loss": 0.5357, "step": 27574 }, { "epoch": 1.8683515143302392, "grad_norm": 5.162113189697266, "learning_rate": 6.32774317201725e-05, "loss": 0.6434, "step": 27575 }, { "epoch": 1.8684192695982111, "grad_norm": 6.1192402839660645, "learning_rate": 6.327606270107467e-05, "loss": 0.673, "step": 27576 }, { "epoch": 1.8684870248661833, "grad_norm": 6.052635192871094, "learning_rate": 6.327469368197687e-05, "loss": 0.9042, "step": 27577 }, { "epoch": 1.8685547801341555, "grad_norm": 7.540661811828613, "learning_rate": 6.327332466287905e-05, "loss": 0.6738, "step": 27578 }, { "epoch": 1.8686225354021275, "grad_norm": 7.101236343383789, "learning_rate": 6.327195564378123e-05, "loss": 0.7266, "step": 27579 }, { "epoch": 1.8686902906700995, "grad_norm": 3.7665467262268066, "learning_rate": 6.327058662468341e-05, "loss": 0.56, "step": 27580 }, { "epoch": 1.8687580459380717, "grad_norm": 7.175234794616699, "learning_rate": 6.326921760558559e-05, "loss": 0.6842, "step": 27581 }, { "epoch": 1.868825801206044, "grad_norm": 5.582401275634766, "learning_rate": 6.326784858648778e-05, "loss": 0.7106, "step": 27582 }, { "epoch": 1.868893556474016, "grad_norm": 6.703815937042236, "learning_rate": 6.326647956738996e-05, "loss": 0.6742, "step": 27583 }, { "epoch": 1.8689613117419879, "grad_norm": 7.135458946228027, "learning_rate": 6.326511054829214e-05, "loss": 0.6265, "step": 27584 }, { "epoch": 1.86902906700996, "grad_norm": 8.373826026916504, "learning_rate": 6.326374152919434e-05, "loss": 0.6332, "step": 27585 }, { "epoch": 1.869096822277932, "grad_norm": 6.4368696212768555, "learning_rate": 6.326237251009652e-05, "loss": 0.627, "step": 27586 }, { "epoch": 1.869164577545904, "grad_norm": 7.972250938415527, "learning_rate": 6.32610034909987e-05, "loss": 0.5196, "step": 27587 }, { "epoch": 1.8692323328138762, "grad_norm": 5.925262928009033, "learning_rate": 6.325963447190089e-05, "loss": 0.7968, "step": 27588 }, { "epoch": 1.8693000880818484, "grad_norm": 4.48094367980957, "learning_rate": 6.325826545280307e-05, "loss": 0.5081, "step": 27589 }, { "epoch": 1.8693678433498204, "grad_norm": 4.9163103103637695, "learning_rate": 6.325689643370525e-05, "loss": 0.5825, "step": 27590 }, { "epoch": 1.8694355986177924, "grad_norm": 5.697823524475098, "learning_rate": 6.325552741460745e-05, "loss": 0.5705, "step": 27591 }, { "epoch": 1.8695033538857646, "grad_norm": 6.15464973449707, "learning_rate": 6.325415839550963e-05, "loss": 0.6357, "step": 27592 }, { "epoch": 1.8695711091537368, "grad_norm": 4.470996379852295, "learning_rate": 6.325278937641181e-05, "loss": 0.6342, "step": 27593 }, { "epoch": 1.8696388644217088, "grad_norm": 6.393310070037842, "learning_rate": 6.325142035731399e-05, "loss": 0.7824, "step": 27594 }, { "epoch": 1.8697066196896808, "grad_norm": 7.8723015785217285, "learning_rate": 6.325005133821618e-05, "loss": 0.8213, "step": 27595 }, { "epoch": 1.869774374957653, "grad_norm": 3.4930808544158936, "learning_rate": 6.324868231911836e-05, "loss": 0.4314, "step": 27596 }, { "epoch": 1.8698421302256252, "grad_norm": 7.379387378692627, "learning_rate": 6.324731330002054e-05, "loss": 0.6784, "step": 27597 }, { "epoch": 1.8699098854935972, "grad_norm": 8.626168251037598, "learning_rate": 6.324594428092272e-05, "loss": 0.5364, "step": 27598 }, { "epoch": 1.8699776407615691, "grad_norm": 5.449434280395508, "learning_rate": 6.32445752618249e-05, "loss": 0.7607, "step": 27599 }, { "epoch": 1.8700453960295413, "grad_norm": 6.095484256744385, "learning_rate": 6.32432062427271e-05, "loss": 0.6019, "step": 27600 }, { "epoch": 1.8701131512975135, "grad_norm": 5.368495464324951, "learning_rate": 6.324183722362928e-05, "loss": 0.6088, "step": 27601 }, { "epoch": 1.8701809065654853, "grad_norm": 7.877405166625977, "learning_rate": 6.324046820453146e-05, "loss": 0.5621, "step": 27602 }, { "epoch": 1.8702486618334575, "grad_norm": 5.503072738647461, "learning_rate": 6.323909918543364e-05, "loss": 0.7354, "step": 27603 }, { "epoch": 1.8703164171014297, "grad_norm": 5.963362216949463, "learning_rate": 6.323773016633582e-05, "loss": 0.6148, "step": 27604 }, { "epoch": 1.8703841723694017, "grad_norm": 5.848011016845703, "learning_rate": 6.323636114723801e-05, "loss": 0.6348, "step": 27605 }, { "epoch": 1.8704519276373737, "grad_norm": 7.314944267272949, "learning_rate": 6.323499212814019e-05, "loss": 0.7918, "step": 27606 }, { "epoch": 1.8705196829053459, "grad_norm": 6.513854503631592, "learning_rate": 6.323362310904237e-05, "loss": 0.7255, "step": 27607 }, { "epoch": 1.870587438173318, "grad_norm": 6.1126580238342285, "learning_rate": 6.323225408994455e-05, "loss": 0.5964, "step": 27608 }, { "epoch": 1.87065519344129, "grad_norm": 8.589181900024414, "learning_rate": 6.323088507084675e-05, "loss": 0.665, "step": 27609 }, { "epoch": 1.870722948709262, "grad_norm": 4.772172451019287, "learning_rate": 6.322951605174893e-05, "loss": 0.4764, "step": 27610 }, { "epoch": 1.8707907039772342, "grad_norm": 5.804854393005371, "learning_rate": 6.322814703265111e-05, "loss": 0.6995, "step": 27611 }, { "epoch": 1.8708584592452064, "grad_norm": 8.268136024475098, "learning_rate": 6.322677801355329e-05, "loss": 0.6965, "step": 27612 }, { "epoch": 1.8709262145131784, "grad_norm": 4.57783317565918, "learning_rate": 6.322540899445547e-05, "loss": 0.5294, "step": 27613 }, { "epoch": 1.8709939697811504, "grad_norm": 5.626828193664551, "learning_rate": 6.322403997535766e-05, "loss": 0.69, "step": 27614 }, { "epoch": 1.8710617250491226, "grad_norm": 7.793335914611816, "learning_rate": 6.322267095625984e-05, "loss": 0.6251, "step": 27615 }, { "epoch": 1.8711294803170948, "grad_norm": 5.945993423461914, "learning_rate": 6.322130193716202e-05, "loss": 0.5285, "step": 27616 }, { "epoch": 1.8711972355850668, "grad_norm": 5.081297874450684, "learning_rate": 6.32199329180642e-05, "loss": 0.5604, "step": 27617 }, { "epoch": 1.8712649908530388, "grad_norm": 7.597188472747803, "learning_rate": 6.32185638989664e-05, "loss": 0.5667, "step": 27618 }, { "epoch": 1.871332746121011, "grad_norm": 4.571447372436523, "learning_rate": 6.321719487986858e-05, "loss": 0.582, "step": 27619 }, { "epoch": 1.871400501388983, "grad_norm": 5.77311372756958, "learning_rate": 6.321582586077076e-05, "loss": 0.7752, "step": 27620 }, { "epoch": 1.871468256656955, "grad_norm": 3.9829463958740234, "learning_rate": 6.321445684167294e-05, "loss": 0.5098, "step": 27621 }, { "epoch": 1.8715360119249271, "grad_norm": 5.793895721435547, "learning_rate": 6.321308782257512e-05, "loss": 0.6633, "step": 27622 }, { "epoch": 1.8716037671928993, "grad_norm": 4.404062271118164, "learning_rate": 6.321171880347731e-05, "loss": 0.4825, "step": 27623 }, { "epoch": 1.8716715224608713, "grad_norm": 5.341352939605713, "learning_rate": 6.321034978437949e-05, "loss": 0.9318, "step": 27624 }, { "epoch": 1.8717392777288433, "grad_norm": 6.436113357543945, "learning_rate": 6.320898076528167e-05, "loss": 0.4978, "step": 27625 }, { "epoch": 1.8718070329968155, "grad_norm": 6.073765754699707, "learning_rate": 6.320761174618385e-05, "loss": 0.6259, "step": 27626 }, { "epoch": 1.8718747882647877, "grad_norm": 6.041072368621826, "learning_rate": 6.320624272708605e-05, "loss": 0.698, "step": 27627 }, { "epoch": 1.8719425435327597, "grad_norm": 5.603363513946533, "learning_rate": 6.320487370798823e-05, "loss": 0.5446, "step": 27628 }, { "epoch": 1.8720102988007317, "grad_norm": 7.812607765197754, "learning_rate": 6.320350468889041e-05, "loss": 0.5666, "step": 27629 }, { "epoch": 1.8720780540687039, "grad_norm": 4.379048824310303, "learning_rate": 6.320213566979259e-05, "loss": 0.6064, "step": 27630 }, { "epoch": 1.872145809336676, "grad_norm": 5.9303507804870605, "learning_rate": 6.320076665069478e-05, "loss": 0.6508, "step": 27631 }, { "epoch": 1.872213564604648, "grad_norm": 4.688441276550293, "learning_rate": 6.319939763159696e-05, "loss": 0.5297, "step": 27632 }, { "epoch": 1.87228131987262, "grad_norm": 6.080013751983643, "learning_rate": 6.319802861249914e-05, "loss": 0.6958, "step": 27633 }, { "epoch": 1.8723490751405922, "grad_norm": 7.3791985511779785, "learning_rate": 6.319665959340134e-05, "loss": 0.6289, "step": 27634 }, { "epoch": 1.8724168304085642, "grad_norm": 5.630093097686768, "learning_rate": 6.319529057430352e-05, "loss": 0.647, "step": 27635 }, { "epoch": 1.8724845856765362, "grad_norm": 4.811784744262695, "learning_rate": 6.31939215552057e-05, "loss": 0.5946, "step": 27636 }, { "epoch": 1.8725523409445084, "grad_norm": 5.44603967666626, "learning_rate": 6.319255253610789e-05, "loss": 0.6471, "step": 27637 }, { "epoch": 1.8726200962124806, "grad_norm": 4.257768630981445, "learning_rate": 6.319118351701007e-05, "loss": 0.5682, "step": 27638 }, { "epoch": 1.8726878514804526, "grad_norm": 6.343385696411133, "learning_rate": 6.318981449791225e-05, "loss": 0.6729, "step": 27639 }, { "epoch": 1.8727556067484246, "grad_norm": 7.715961456298828, "learning_rate": 6.318844547881443e-05, "loss": 0.5739, "step": 27640 }, { "epoch": 1.8728233620163968, "grad_norm": 4.259825706481934, "learning_rate": 6.318707645971663e-05, "loss": 0.5166, "step": 27641 }, { "epoch": 1.872891117284369, "grad_norm": 5.661103248596191, "learning_rate": 6.318570744061881e-05, "loss": 0.6133, "step": 27642 }, { "epoch": 1.872958872552341, "grad_norm": 5.630831718444824, "learning_rate": 6.318433842152099e-05, "loss": 0.6126, "step": 27643 }, { "epoch": 1.873026627820313, "grad_norm": 5.7166056632995605, "learning_rate": 6.318296940242317e-05, "loss": 0.6548, "step": 27644 }, { "epoch": 1.8730943830882851, "grad_norm": 6.629462718963623, "learning_rate": 6.318160038332535e-05, "loss": 0.5414, "step": 27645 }, { "epoch": 1.8731621383562573, "grad_norm": 5.528544902801514, "learning_rate": 6.318023136422754e-05, "loss": 0.721, "step": 27646 }, { "epoch": 1.8732298936242293, "grad_norm": 6.485620975494385, "learning_rate": 6.317886234512972e-05, "loss": 0.6957, "step": 27647 }, { "epoch": 1.8732976488922013, "grad_norm": 7.911180019378662, "learning_rate": 6.31774933260319e-05, "loss": 0.8079, "step": 27648 }, { "epoch": 1.8733654041601735, "grad_norm": 6.4991679191589355, "learning_rate": 6.317612430693408e-05, "loss": 0.7295, "step": 27649 }, { "epoch": 1.8734331594281457, "grad_norm": 4.90471887588501, "learning_rate": 6.317475528783628e-05, "loss": 0.3855, "step": 27650 }, { "epoch": 1.8735009146961175, "grad_norm": 6.923444747924805, "learning_rate": 6.317338626873846e-05, "loss": 0.7044, "step": 27651 }, { "epoch": 1.8735686699640897, "grad_norm": 9.125747680664062, "learning_rate": 6.317201724964064e-05, "loss": 0.5634, "step": 27652 }, { "epoch": 1.8736364252320619, "grad_norm": 5.692586421966553, "learning_rate": 6.317064823054282e-05, "loss": 0.8148, "step": 27653 }, { "epoch": 1.8737041805000338, "grad_norm": 5.6882710456848145, "learning_rate": 6.3169279211445e-05, "loss": 0.6392, "step": 27654 }, { "epoch": 1.8737719357680058, "grad_norm": 5.58971643447876, "learning_rate": 6.316791019234719e-05, "loss": 0.8332, "step": 27655 }, { "epoch": 1.873839691035978, "grad_norm": 6.572085380554199, "learning_rate": 6.316654117324937e-05, "loss": 0.7238, "step": 27656 }, { "epoch": 1.8739074463039502, "grad_norm": 5.4601263999938965, "learning_rate": 6.316517215415155e-05, "loss": 0.5742, "step": 27657 }, { "epoch": 1.8739752015719222, "grad_norm": 5.338827133178711, "learning_rate": 6.316380313505373e-05, "loss": 0.8464, "step": 27658 }, { "epoch": 1.8740429568398942, "grad_norm": 6.2741498947143555, "learning_rate": 6.316243411595591e-05, "loss": 0.7781, "step": 27659 }, { "epoch": 1.8741107121078664, "grad_norm": 5.905333518981934, "learning_rate": 6.316106509685811e-05, "loss": 0.6721, "step": 27660 }, { "epoch": 1.8741784673758386, "grad_norm": 7.028719902038574, "learning_rate": 6.315969607776029e-05, "loss": 0.5994, "step": 27661 }, { "epoch": 1.8742462226438106, "grad_norm": 7.493400573730469, "learning_rate": 6.315832705866247e-05, "loss": 0.7094, "step": 27662 }, { "epoch": 1.8743139779117826, "grad_norm": 5.389708518981934, "learning_rate": 6.315695803956465e-05, "loss": 0.9868, "step": 27663 }, { "epoch": 1.8743817331797548, "grad_norm": 6.144076347351074, "learning_rate": 6.315558902046684e-05, "loss": 0.7837, "step": 27664 }, { "epoch": 1.874449488447727, "grad_norm": 5.857853889465332, "learning_rate": 6.315422000136902e-05, "loss": 0.5986, "step": 27665 }, { "epoch": 1.874517243715699, "grad_norm": 6.104315280914307, "learning_rate": 6.31528509822712e-05, "loss": 0.6893, "step": 27666 }, { "epoch": 1.874584998983671, "grad_norm": 5.558208465576172, "learning_rate": 6.315148196317338e-05, "loss": 0.6674, "step": 27667 }, { "epoch": 1.8746527542516431, "grad_norm": 5.267580509185791, "learning_rate": 6.315011294407556e-05, "loss": 0.5952, "step": 27668 }, { "epoch": 1.874720509519615, "grad_norm": 4.04619026184082, "learning_rate": 6.314874392497776e-05, "loss": 0.5726, "step": 27669 }, { "epoch": 1.874788264787587, "grad_norm": 8.548932075500488, "learning_rate": 6.314737490587994e-05, "loss": 0.4923, "step": 27670 }, { "epoch": 1.8748560200555593, "grad_norm": 4.811182975769043, "learning_rate": 6.314600588678212e-05, "loss": 0.5514, "step": 27671 }, { "epoch": 1.8749237753235315, "grad_norm": 4.701756000518799, "learning_rate": 6.31446368676843e-05, "loss": 0.6763, "step": 27672 }, { "epoch": 1.8749915305915035, "grad_norm": 6.024022579193115, "learning_rate": 6.314326784858649e-05, "loss": 0.4827, "step": 27673 }, { "epoch": 1.8750592858594755, "grad_norm": 6.7600016593933105, "learning_rate": 6.314189882948867e-05, "loss": 0.5979, "step": 27674 }, { "epoch": 1.8751270411274477, "grad_norm": 4.895082473754883, "learning_rate": 6.314052981039085e-05, "loss": 0.7011, "step": 27675 }, { "epoch": 1.8751947963954199, "grad_norm": 5.55872917175293, "learning_rate": 6.313916079129303e-05, "loss": 0.5859, "step": 27676 }, { "epoch": 1.8752625516633918, "grad_norm": 6.713132858276367, "learning_rate": 6.313779177219521e-05, "loss": 0.7916, "step": 27677 }, { "epoch": 1.8753303069313638, "grad_norm": 5.122043132781982, "learning_rate": 6.313642275309741e-05, "loss": 0.6621, "step": 27678 }, { "epoch": 1.875398062199336, "grad_norm": 5.220146179199219, "learning_rate": 6.313505373399959e-05, "loss": 0.6534, "step": 27679 }, { "epoch": 1.8754658174673082, "grad_norm": 8.564950942993164, "learning_rate": 6.313368471490177e-05, "loss": 0.7945, "step": 27680 }, { "epoch": 1.8755335727352802, "grad_norm": 5.617307186126709, "learning_rate": 6.313231569580396e-05, "loss": 0.7702, "step": 27681 }, { "epoch": 1.8756013280032522, "grad_norm": 5.0468902587890625, "learning_rate": 6.313094667670614e-05, "loss": 0.627, "step": 27682 }, { "epoch": 1.8756690832712244, "grad_norm": 7.348795413970947, "learning_rate": 6.312957765760832e-05, "loss": 0.7568, "step": 27683 }, { "epoch": 1.8757368385391964, "grad_norm": 5.685968399047852, "learning_rate": 6.312820863851052e-05, "loss": 0.6377, "step": 27684 }, { "epoch": 1.8758045938071684, "grad_norm": 6.052116394042969, "learning_rate": 6.31268396194127e-05, "loss": 0.6678, "step": 27685 }, { "epoch": 1.8758723490751406, "grad_norm": 4.644410133361816, "learning_rate": 6.312547060031488e-05, "loss": 0.5755, "step": 27686 }, { "epoch": 1.8759401043431128, "grad_norm": 5.561807632446289, "learning_rate": 6.312410158121707e-05, "loss": 0.4107, "step": 27687 }, { "epoch": 1.8760078596110847, "grad_norm": 7.285003662109375, "learning_rate": 6.312273256211925e-05, "loss": 0.6708, "step": 27688 }, { "epoch": 1.8760756148790567, "grad_norm": 7.577326774597168, "learning_rate": 6.312136354302143e-05, "loss": 0.6312, "step": 27689 }, { "epoch": 1.876143370147029, "grad_norm": 6.981543064117432, "learning_rate": 6.311999452392361e-05, "loss": 0.7904, "step": 27690 }, { "epoch": 1.8762111254150011, "grad_norm": 7.03598690032959, "learning_rate": 6.311862550482579e-05, "loss": 0.7296, "step": 27691 }, { "epoch": 1.876278880682973, "grad_norm": 5.692619323730469, "learning_rate": 6.311725648572799e-05, "loss": 0.6503, "step": 27692 }, { "epoch": 1.876346635950945, "grad_norm": 7.297091484069824, "learning_rate": 6.311588746663017e-05, "loss": 0.6787, "step": 27693 }, { "epoch": 1.8764143912189173, "grad_norm": 4.956392765045166, "learning_rate": 6.311451844753235e-05, "loss": 0.6123, "step": 27694 }, { "epoch": 1.8764821464868895, "grad_norm": 8.860595703125, "learning_rate": 6.311314942843453e-05, "loss": 0.5468, "step": 27695 }, { "epoch": 1.8765499017548615, "grad_norm": 6.402701377868652, "learning_rate": 6.311178040933672e-05, "loss": 0.7543, "step": 27696 }, { "epoch": 1.8766176570228335, "grad_norm": 5.509602069854736, "learning_rate": 6.31104113902389e-05, "loss": 0.5654, "step": 27697 }, { "epoch": 1.8766854122908057, "grad_norm": 5.92568302154541, "learning_rate": 6.310904237114108e-05, "loss": 0.6782, "step": 27698 }, { "epoch": 1.8767531675587779, "grad_norm": 7.864884853363037, "learning_rate": 6.310767335204326e-05, "loss": 0.5696, "step": 27699 }, { "epoch": 1.8768209228267496, "grad_norm": 5.446737766265869, "learning_rate": 6.310630433294544e-05, "loss": 0.5372, "step": 27700 }, { "epoch": 1.8768886780947218, "grad_norm": 5.075801849365234, "learning_rate": 6.310493531384764e-05, "loss": 0.4995, "step": 27701 }, { "epoch": 1.876956433362694, "grad_norm": 5.3325419425964355, "learning_rate": 6.310356629474982e-05, "loss": 0.5364, "step": 27702 }, { "epoch": 1.877024188630666, "grad_norm": 4.913942337036133, "learning_rate": 6.3102197275652e-05, "loss": 0.6728, "step": 27703 }, { "epoch": 1.877091943898638, "grad_norm": 9.252853393554688, "learning_rate": 6.310082825655418e-05, "loss": 0.6297, "step": 27704 }, { "epoch": 1.8771596991666102, "grad_norm": 7.867688179016113, "learning_rate": 6.309945923745637e-05, "loss": 0.5698, "step": 27705 }, { "epoch": 1.8772274544345824, "grad_norm": 6.115978240966797, "learning_rate": 6.309809021835855e-05, "loss": 0.6309, "step": 27706 }, { "epoch": 1.8772952097025544, "grad_norm": 4.818838596343994, "learning_rate": 6.309672119926073e-05, "loss": 0.6313, "step": 27707 }, { "epoch": 1.8773629649705263, "grad_norm": 6.907614231109619, "learning_rate": 6.309535218016291e-05, "loss": 0.4904, "step": 27708 }, { "epoch": 1.8774307202384986, "grad_norm": 4.625635147094727, "learning_rate": 6.309398316106509e-05, "loss": 0.5579, "step": 27709 }, { "epoch": 1.8774984755064708, "grad_norm": 5.9245405197143555, "learning_rate": 6.309261414196729e-05, "loss": 0.62, "step": 27710 }, { "epoch": 1.8775662307744427, "grad_norm": 7.240972518920898, "learning_rate": 6.309124512286947e-05, "loss": 0.64, "step": 27711 }, { "epoch": 1.8776339860424147, "grad_norm": 9.308841705322266, "learning_rate": 6.308987610377165e-05, "loss": 0.7956, "step": 27712 }, { "epoch": 1.877701741310387, "grad_norm": 7.538474082946777, "learning_rate": 6.308850708467383e-05, "loss": 0.7125, "step": 27713 }, { "epoch": 1.8777694965783591, "grad_norm": 5.775266647338867, "learning_rate": 6.308713806557601e-05, "loss": 0.6632, "step": 27714 }, { "epoch": 1.877837251846331, "grad_norm": 6.167921543121338, "learning_rate": 6.30857690464782e-05, "loss": 0.5848, "step": 27715 }, { "epoch": 1.877905007114303, "grad_norm": 5.392101287841797, "learning_rate": 6.308440002738038e-05, "loss": 0.6515, "step": 27716 }, { "epoch": 1.8779727623822753, "grad_norm": 4.693084716796875, "learning_rate": 6.308303100828256e-05, "loss": 0.5928, "step": 27717 }, { "epoch": 1.8780405176502473, "grad_norm": 7.290642738342285, "learning_rate": 6.308166198918474e-05, "loss": 0.8857, "step": 27718 }, { "epoch": 1.8781082729182192, "grad_norm": 5.563359260559082, "learning_rate": 6.308029297008694e-05, "loss": 0.7006, "step": 27719 }, { "epoch": 1.8781760281861914, "grad_norm": 10.064821243286133, "learning_rate": 6.307892395098912e-05, "loss": 0.5922, "step": 27720 }, { "epoch": 1.8782437834541637, "grad_norm": 5.412308692932129, "learning_rate": 6.30775549318913e-05, "loss": 0.8891, "step": 27721 }, { "epoch": 1.8783115387221356, "grad_norm": 5.160074710845947, "learning_rate": 6.307618591279348e-05, "loss": 0.6862, "step": 27722 }, { "epoch": 1.8783792939901076, "grad_norm": 6.202085494995117, "learning_rate": 6.307481689369566e-05, "loss": 0.4975, "step": 27723 }, { "epoch": 1.8784470492580798, "grad_norm": 5.247751235961914, "learning_rate": 6.307344787459785e-05, "loss": 0.7376, "step": 27724 }, { "epoch": 1.878514804526052, "grad_norm": 5.984557628631592, "learning_rate": 6.307207885550003e-05, "loss": 0.5867, "step": 27725 }, { "epoch": 1.878582559794024, "grad_norm": 6.24952507019043, "learning_rate": 6.307070983640221e-05, "loss": 0.7009, "step": 27726 }, { "epoch": 1.878650315061996, "grad_norm": 7.430344104766846, "learning_rate": 6.306934081730441e-05, "loss": 0.8437, "step": 27727 }, { "epoch": 1.8787180703299682, "grad_norm": 7.423604965209961, "learning_rate": 6.306797179820659e-05, "loss": 0.4807, "step": 27728 }, { "epoch": 1.8787858255979404, "grad_norm": 4.904209136962891, "learning_rate": 6.306660277910877e-05, "loss": 0.5291, "step": 27729 }, { "epoch": 1.8788535808659124, "grad_norm": 5.0410990715026855, "learning_rate": 6.306523376001096e-05, "loss": 0.5532, "step": 27730 }, { "epoch": 1.8789213361338843, "grad_norm": 6.520369052886963, "learning_rate": 6.306386474091314e-05, "loss": 0.8261, "step": 27731 }, { "epoch": 1.8789890914018565, "grad_norm": 6.740850925445557, "learning_rate": 6.306249572181532e-05, "loss": 0.6368, "step": 27732 }, { "epoch": 1.8790568466698285, "grad_norm": 4.268787860870361, "learning_rate": 6.306112670271752e-05, "loss": 0.5355, "step": 27733 }, { "epoch": 1.8791246019378005, "grad_norm": 5.29947566986084, "learning_rate": 6.30597576836197e-05, "loss": 0.6901, "step": 27734 }, { "epoch": 1.8791923572057727, "grad_norm": 6.35125207901001, "learning_rate": 6.305838866452188e-05, "loss": 0.6996, "step": 27735 }, { "epoch": 1.879260112473745, "grad_norm": 7.95774507522583, "learning_rate": 6.305701964542406e-05, "loss": 0.5396, "step": 27736 }, { "epoch": 1.879327867741717, "grad_norm": 5.3757710456848145, "learning_rate": 6.305565062632624e-05, "loss": 0.6044, "step": 27737 }, { "epoch": 1.8793956230096889, "grad_norm": 5.934232711791992, "learning_rate": 6.305428160722843e-05, "loss": 0.5736, "step": 27738 }, { "epoch": 1.879463378277661, "grad_norm": 5.422888278961182, "learning_rate": 6.305291258813061e-05, "loss": 0.5648, "step": 27739 }, { "epoch": 1.8795311335456333, "grad_norm": 5.318690776824951, "learning_rate": 6.305154356903279e-05, "loss": 0.611, "step": 27740 }, { "epoch": 1.8795988888136053, "grad_norm": 8.277127265930176, "learning_rate": 6.305017454993497e-05, "loss": 0.5131, "step": 27741 }, { "epoch": 1.8796666440815772, "grad_norm": 6.5871148109436035, "learning_rate": 6.304880553083717e-05, "loss": 0.8458, "step": 27742 }, { "epoch": 1.8797343993495494, "grad_norm": 5.361516952514648, "learning_rate": 6.304743651173935e-05, "loss": 0.4487, "step": 27743 }, { "epoch": 1.8798021546175216, "grad_norm": 6.524036407470703, "learning_rate": 6.304606749264153e-05, "loss": 0.601, "step": 27744 }, { "epoch": 1.8798699098854936, "grad_norm": 5.621700286865234, "learning_rate": 6.304469847354371e-05, "loss": 0.7091, "step": 27745 }, { "epoch": 1.8799376651534656, "grad_norm": 5.093276500701904, "learning_rate": 6.304332945444589e-05, "loss": 0.7739, "step": 27746 }, { "epoch": 1.8800054204214378, "grad_norm": 4.841097354888916, "learning_rate": 6.304196043534808e-05, "loss": 0.6498, "step": 27747 }, { "epoch": 1.88007317568941, "grad_norm": 6.036467552185059, "learning_rate": 6.304059141625026e-05, "loss": 0.6075, "step": 27748 }, { "epoch": 1.8801409309573818, "grad_norm": 5.061807632446289, "learning_rate": 6.303922239715244e-05, "loss": 0.6634, "step": 27749 }, { "epoch": 1.880208686225354, "grad_norm": 6.396430015563965, "learning_rate": 6.303785337805462e-05, "loss": 0.6394, "step": 27750 }, { "epoch": 1.8802764414933262, "grad_norm": 8.381585121154785, "learning_rate": 6.303648435895682e-05, "loss": 0.5395, "step": 27751 }, { "epoch": 1.8803441967612982, "grad_norm": 5.380007266998291, "learning_rate": 6.3035115339859e-05, "loss": 0.7283, "step": 27752 }, { "epoch": 1.8804119520292701, "grad_norm": 7.17773962020874, "learning_rate": 6.303374632076118e-05, "loss": 0.5774, "step": 27753 }, { "epoch": 1.8804797072972423, "grad_norm": 7.463109493255615, "learning_rate": 6.303237730166336e-05, "loss": 0.638, "step": 27754 }, { "epoch": 1.8805474625652145, "grad_norm": 6.505912780761719, "learning_rate": 6.303100828256554e-05, "loss": 0.6356, "step": 27755 }, { "epoch": 1.8806152178331865, "grad_norm": 5.124770641326904, "learning_rate": 6.302963926346773e-05, "loss": 0.5635, "step": 27756 }, { "epoch": 1.8806829731011585, "grad_norm": 6.4556474685668945, "learning_rate": 6.302827024436991e-05, "loss": 0.6483, "step": 27757 }, { "epoch": 1.8807507283691307, "grad_norm": 6.184944152832031, "learning_rate": 6.302690122527209e-05, "loss": 0.7537, "step": 27758 }, { "epoch": 1.880818483637103, "grad_norm": 6.299713611602783, "learning_rate": 6.302553220617427e-05, "loss": 0.587, "step": 27759 }, { "epoch": 1.880886238905075, "grad_norm": 5.858841419219971, "learning_rate": 6.302416318707647e-05, "loss": 0.7515, "step": 27760 }, { "epoch": 1.8809539941730469, "grad_norm": 5.586334705352783, "learning_rate": 6.302279416797865e-05, "loss": 0.7079, "step": 27761 }, { "epoch": 1.881021749441019, "grad_norm": 5.737678050994873, "learning_rate": 6.302142514888083e-05, "loss": 0.6909, "step": 27762 }, { "epoch": 1.8810895047089913, "grad_norm": 5.704142093658447, "learning_rate": 6.302005612978301e-05, "loss": 0.6227, "step": 27763 }, { "epoch": 1.8811572599769633, "grad_norm": 4.308501720428467, "learning_rate": 6.301868711068519e-05, "loss": 0.601, "step": 27764 }, { "epoch": 1.8812250152449352, "grad_norm": 9.007975578308105, "learning_rate": 6.301731809158738e-05, "loss": 0.9042, "step": 27765 }, { "epoch": 1.8812927705129074, "grad_norm": 4.238919258117676, "learning_rate": 6.301594907248956e-05, "loss": 0.6458, "step": 27766 }, { "epoch": 1.8813605257808794, "grad_norm": 6.253438949584961, "learning_rate": 6.301458005339174e-05, "loss": 0.7256, "step": 27767 }, { "epoch": 1.8814282810488514, "grad_norm": 5.897993564605713, "learning_rate": 6.301321103429392e-05, "loss": 1.0179, "step": 27768 }, { "epoch": 1.8814960363168236, "grad_norm": 5.591862201690674, "learning_rate": 6.30118420151961e-05, "loss": 0.6122, "step": 27769 }, { "epoch": 1.8815637915847958, "grad_norm": 9.267468452453613, "learning_rate": 6.30104729960983e-05, "loss": 0.7103, "step": 27770 }, { "epoch": 1.8816315468527678, "grad_norm": 4.131267547607422, "learning_rate": 6.300910397700048e-05, "loss": 0.6858, "step": 27771 }, { "epoch": 1.8816993021207398, "grad_norm": 7.605987548828125, "learning_rate": 6.300773495790266e-05, "loss": 0.589, "step": 27772 }, { "epoch": 1.881767057388712, "grad_norm": 4.588912487030029, "learning_rate": 6.300636593880485e-05, "loss": 0.7035, "step": 27773 }, { "epoch": 1.8818348126566842, "grad_norm": 5.934086799621582, "learning_rate": 6.300499691970703e-05, "loss": 0.5067, "step": 27774 }, { "epoch": 1.8819025679246562, "grad_norm": 4.782838344573975, "learning_rate": 6.300362790060921e-05, "loss": 0.6483, "step": 27775 }, { "epoch": 1.8819703231926281, "grad_norm": 4.159089088439941, "learning_rate": 6.30022588815114e-05, "loss": 0.5623, "step": 27776 }, { "epoch": 1.8820380784606003, "grad_norm": 6.216699123382568, "learning_rate": 6.300088986241359e-05, "loss": 0.7901, "step": 27777 }, { "epoch": 1.8821058337285725, "grad_norm": 5.774432182312012, "learning_rate": 6.299952084331577e-05, "loss": 0.5702, "step": 27778 }, { "epoch": 1.8821735889965445, "grad_norm": 7.069362640380859, "learning_rate": 6.299815182421796e-05, "loss": 0.8542, "step": 27779 }, { "epoch": 1.8822413442645165, "grad_norm": 5.779545307159424, "learning_rate": 6.299678280512014e-05, "loss": 0.8343, "step": 27780 }, { "epoch": 1.8823090995324887, "grad_norm": 4.833282947540283, "learning_rate": 6.299541378602232e-05, "loss": 0.496, "step": 27781 }, { "epoch": 1.8823768548004607, "grad_norm": 5.607405662536621, "learning_rate": 6.29940447669245e-05, "loss": 0.4973, "step": 27782 }, { "epoch": 1.8824446100684327, "grad_norm": 5.071746826171875, "learning_rate": 6.29926757478267e-05, "loss": 0.5641, "step": 27783 }, { "epoch": 1.8825123653364049, "grad_norm": 4.783718585968018, "learning_rate": 6.299130672872888e-05, "loss": 0.7483, "step": 27784 }, { "epoch": 1.882580120604377, "grad_norm": 7.011200428009033, "learning_rate": 6.298993770963106e-05, "loss": 0.617, "step": 27785 }, { "epoch": 1.882647875872349, "grad_norm": 4.615904331207275, "learning_rate": 6.298856869053324e-05, "loss": 0.5133, "step": 27786 }, { "epoch": 1.882715631140321, "grad_norm": 7.715183258056641, "learning_rate": 6.298719967143542e-05, "loss": 0.8425, "step": 27787 }, { "epoch": 1.8827833864082932, "grad_norm": 7.79667854309082, "learning_rate": 6.298583065233761e-05, "loss": 0.5396, "step": 27788 }, { "epoch": 1.8828511416762654, "grad_norm": 5.175922393798828, "learning_rate": 6.298446163323979e-05, "loss": 0.7429, "step": 27789 }, { "epoch": 1.8829188969442374, "grad_norm": 5.8260955810546875, "learning_rate": 6.298309261414197e-05, "loss": 0.7412, "step": 27790 }, { "epoch": 1.8829866522122094, "grad_norm": 11.39370059967041, "learning_rate": 6.298172359504415e-05, "loss": 0.6245, "step": 27791 }, { "epoch": 1.8830544074801816, "grad_norm": 6.576147079467773, "learning_rate": 6.298035457594633e-05, "loss": 0.583, "step": 27792 }, { "epoch": 1.8831221627481538, "grad_norm": 7.248864650726318, "learning_rate": 6.297898555684853e-05, "loss": 0.6064, "step": 27793 }, { "epoch": 1.8831899180161258, "grad_norm": 6.464034557342529, "learning_rate": 6.29776165377507e-05, "loss": 0.6663, "step": 27794 }, { "epoch": 1.8832576732840978, "grad_norm": 4.349175453186035, "learning_rate": 6.297624751865289e-05, "loss": 0.6086, "step": 27795 }, { "epoch": 1.88332542855207, "grad_norm": 8.544511795043945, "learning_rate": 6.297487849955507e-05, "loss": 0.6652, "step": 27796 }, { "epoch": 1.8833931838200422, "grad_norm": 6.351444721221924, "learning_rate": 6.297350948045726e-05, "loss": 0.7854, "step": 27797 }, { "epoch": 1.883460939088014, "grad_norm": 6.114952087402344, "learning_rate": 6.297214046135944e-05, "loss": 0.6867, "step": 27798 }, { "epoch": 1.8835286943559861, "grad_norm": 4.511425018310547, "learning_rate": 6.297077144226162e-05, "loss": 0.5339, "step": 27799 }, { "epoch": 1.8835964496239583, "grad_norm": 6.657654762268066, "learning_rate": 6.29694024231638e-05, "loss": 0.694, "step": 27800 }, { "epoch": 1.8836642048919303, "grad_norm": 6.614530086517334, "learning_rate": 6.296803340406598e-05, "loss": 0.7122, "step": 27801 }, { "epoch": 1.8837319601599023, "grad_norm": 3.3991873264312744, "learning_rate": 6.296666438496818e-05, "loss": 0.5734, "step": 27802 }, { "epoch": 1.8837997154278745, "grad_norm": 6.7986040115356445, "learning_rate": 6.296529536587036e-05, "loss": 0.6417, "step": 27803 }, { "epoch": 1.8838674706958467, "grad_norm": 5.507370948791504, "learning_rate": 6.296392634677254e-05, "loss": 0.5712, "step": 27804 }, { "epoch": 1.8839352259638187, "grad_norm": 6.678649425506592, "learning_rate": 6.296255732767472e-05, "loss": 0.6058, "step": 27805 }, { "epoch": 1.8840029812317907, "grad_norm": 5.5613789558410645, "learning_rate": 6.296118830857691e-05, "loss": 0.845, "step": 27806 }, { "epoch": 1.8840707364997629, "grad_norm": 4.232308387756348, "learning_rate": 6.295981928947909e-05, "loss": 0.5973, "step": 27807 }, { "epoch": 1.884138491767735, "grad_norm": 5.616826057434082, "learning_rate": 6.295845027038127e-05, "loss": 0.7172, "step": 27808 }, { "epoch": 1.884206247035707, "grad_norm": 5.375556468963623, "learning_rate": 6.295708125128345e-05, "loss": 0.5752, "step": 27809 }, { "epoch": 1.884274002303679, "grad_norm": 5.572027206420898, "learning_rate": 6.295571223218563e-05, "loss": 0.6993, "step": 27810 }, { "epoch": 1.8843417575716512, "grad_norm": 7.198902606964111, "learning_rate": 6.295434321308783e-05, "loss": 0.8159, "step": 27811 }, { "epoch": 1.8844095128396234, "grad_norm": 5.242114067077637, "learning_rate": 6.295297419399001e-05, "loss": 0.8541, "step": 27812 }, { "epoch": 1.8844772681075954, "grad_norm": 6.712498664855957, "learning_rate": 6.295160517489219e-05, "loss": 0.6763, "step": 27813 }, { "epoch": 1.8845450233755674, "grad_norm": 4.084784507751465, "learning_rate": 6.295023615579437e-05, "loss": 0.634, "step": 27814 }, { "epoch": 1.8846127786435396, "grad_norm": 6.214087009429932, "learning_rate": 6.294886713669656e-05, "loss": 0.7013, "step": 27815 }, { "epoch": 1.8846805339115116, "grad_norm": 4.692789077758789, "learning_rate": 6.294749811759874e-05, "loss": 0.7202, "step": 27816 }, { "epoch": 1.8847482891794836, "grad_norm": 7.547297477722168, "learning_rate": 6.294612909850092e-05, "loss": 0.5889, "step": 27817 }, { "epoch": 1.8848160444474558, "grad_norm": 4.7694525718688965, "learning_rate": 6.29447600794031e-05, "loss": 0.5516, "step": 27818 }, { "epoch": 1.884883799715428, "grad_norm": 4.276844501495361, "learning_rate": 6.29433910603053e-05, "loss": 0.5707, "step": 27819 }, { "epoch": 1.8849515549834, "grad_norm": 9.09312629699707, "learning_rate": 6.294202204120748e-05, "loss": 0.5902, "step": 27820 }, { "epoch": 1.885019310251372, "grad_norm": 8.327112197875977, "learning_rate": 6.294065302210966e-05, "loss": 0.6392, "step": 27821 }, { "epoch": 1.8850870655193441, "grad_norm": 4.54794979095459, "learning_rate": 6.293928400301185e-05, "loss": 0.594, "step": 27822 }, { "epoch": 1.8851548207873163, "grad_norm": 5.393647193908691, "learning_rate": 6.293791498391403e-05, "loss": 0.6194, "step": 27823 }, { "epoch": 1.8852225760552883, "grad_norm": 5.772001266479492, "learning_rate": 6.293654596481621e-05, "loss": 0.5333, "step": 27824 }, { "epoch": 1.8852903313232603, "grad_norm": 5.474550724029541, "learning_rate": 6.29351769457184e-05, "loss": 0.7837, "step": 27825 }, { "epoch": 1.8853580865912325, "grad_norm": 6.464359760284424, "learning_rate": 6.293380792662059e-05, "loss": 0.6209, "step": 27826 }, { "epoch": 1.8854258418592047, "grad_norm": 6.802030563354492, "learning_rate": 6.293243890752277e-05, "loss": 0.5631, "step": 27827 }, { "epoch": 1.8854935971271767, "grad_norm": 11.750792503356934, "learning_rate": 6.293106988842495e-05, "loss": 0.7616, "step": 27828 }, { "epoch": 1.8855613523951487, "grad_norm": 5.488737106323242, "learning_rate": 6.292970086932714e-05, "loss": 0.5792, "step": 27829 }, { "epoch": 1.8856291076631209, "grad_norm": 6.087658405303955, "learning_rate": 6.292833185022932e-05, "loss": 0.6129, "step": 27830 }, { "epoch": 1.8856968629310928, "grad_norm": 5.497212886810303, "learning_rate": 6.29269628311315e-05, "loss": 0.4548, "step": 27831 }, { "epoch": 1.8857646181990648, "grad_norm": 5.263195037841797, "learning_rate": 6.292559381203368e-05, "loss": 0.4988, "step": 27832 }, { "epoch": 1.885832373467037, "grad_norm": 4.739650249481201, "learning_rate": 6.292422479293586e-05, "loss": 0.6862, "step": 27833 }, { "epoch": 1.8859001287350092, "grad_norm": 5.190148830413818, "learning_rate": 6.292285577383806e-05, "loss": 0.5897, "step": 27834 }, { "epoch": 1.8859678840029812, "grad_norm": 7.34982967376709, "learning_rate": 6.292148675474024e-05, "loss": 0.7245, "step": 27835 }, { "epoch": 1.8860356392709532, "grad_norm": 8.40492057800293, "learning_rate": 6.292011773564242e-05, "loss": 0.572, "step": 27836 }, { "epoch": 1.8861033945389254, "grad_norm": 7.786345958709717, "learning_rate": 6.29187487165446e-05, "loss": 0.6696, "step": 27837 }, { "epoch": 1.8861711498068976, "grad_norm": 6.7823076248168945, "learning_rate": 6.291737969744679e-05, "loss": 0.5655, "step": 27838 }, { "epoch": 1.8862389050748696, "grad_norm": 4.822293758392334, "learning_rate": 6.291601067834897e-05, "loss": 0.6274, "step": 27839 }, { "epoch": 1.8863066603428416, "grad_norm": 6.137720108032227, "learning_rate": 6.291464165925115e-05, "loss": 0.9212, "step": 27840 }, { "epoch": 1.8863744156108138, "grad_norm": 6.924318313598633, "learning_rate": 6.291327264015333e-05, "loss": 0.6207, "step": 27841 }, { "epoch": 1.886442170878786, "grad_norm": 5.956676959991455, "learning_rate": 6.291190362105551e-05, "loss": 0.5797, "step": 27842 }, { "epoch": 1.886509926146758, "grad_norm": 6.285329341888428, "learning_rate": 6.29105346019577e-05, "loss": 0.5935, "step": 27843 }, { "epoch": 1.88657768141473, "grad_norm": 13.063393592834473, "learning_rate": 6.290916558285989e-05, "loss": 0.8381, "step": 27844 }, { "epoch": 1.8866454366827021, "grad_norm": 5.160290241241455, "learning_rate": 6.290779656376207e-05, "loss": 0.8267, "step": 27845 }, { "epoch": 1.8867131919506743, "grad_norm": 4.445935249328613, "learning_rate": 6.290642754466425e-05, "loss": 0.6295, "step": 27846 }, { "epoch": 1.886780947218646, "grad_norm": 7.884426593780518, "learning_rate": 6.290505852556643e-05, "loss": 0.6127, "step": 27847 }, { "epoch": 1.8868487024866183, "grad_norm": 6.003480911254883, "learning_rate": 6.290368950646862e-05, "loss": 0.7157, "step": 27848 }, { "epoch": 1.8869164577545905, "grad_norm": 5.5332112312316895, "learning_rate": 6.29023204873708e-05, "loss": 0.7715, "step": 27849 }, { "epoch": 1.8869842130225625, "grad_norm": 4.3096466064453125, "learning_rate": 6.290095146827298e-05, "loss": 0.653, "step": 27850 }, { "epoch": 1.8870519682905345, "grad_norm": 6.927731990814209, "learning_rate": 6.289958244917516e-05, "loss": 0.5651, "step": 27851 }, { "epoch": 1.8871197235585067, "grad_norm": 7.153664588928223, "learning_rate": 6.289821343007736e-05, "loss": 0.5585, "step": 27852 }, { "epoch": 1.8871874788264789, "grad_norm": 6.628005504608154, "learning_rate": 6.289684441097954e-05, "loss": 0.5855, "step": 27853 }, { "epoch": 1.8872552340944508, "grad_norm": 6.215766906738281, "learning_rate": 6.289547539188172e-05, "loss": 0.7795, "step": 27854 }, { "epoch": 1.8873229893624228, "grad_norm": 4.5858964920043945, "learning_rate": 6.28941063727839e-05, "loss": 0.5637, "step": 27855 }, { "epoch": 1.887390744630395, "grad_norm": 5.295482158660889, "learning_rate": 6.289273735368608e-05, "loss": 0.6721, "step": 27856 }, { "epoch": 1.8874584998983672, "grad_norm": 5.549714088439941, "learning_rate": 6.289136833458827e-05, "loss": 0.7451, "step": 27857 }, { "epoch": 1.8875262551663392, "grad_norm": 9.592004776000977, "learning_rate": 6.288999931549045e-05, "loss": 0.5713, "step": 27858 }, { "epoch": 1.8875940104343112, "grad_norm": 5.4451704025268555, "learning_rate": 6.288863029639263e-05, "loss": 0.6771, "step": 27859 }, { "epoch": 1.8876617657022834, "grad_norm": 4.971888542175293, "learning_rate": 6.288726127729481e-05, "loss": 0.4993, "step": 27860 }, { "epoch": 1.8877295209702556, "grad_norm": 6.195004463195801, "learning_rate": 6.2885892258197e-05, "loss": 0.5517, "step": 27861 }, { "epoch": 1.8877972762382276, "grad_norm": 5.294865131378174, "learning_rate": 6.288452323909919e-05, "loss": 0.6428, "step": 27862 }, { "epoch": 1.8878650315061996, "grad_norm": 5.933304309844971, "learning_rate": 6.288315422000137e-05, "loss": 0.5436, "step": 27863 }, { "epoch": 1.8879327867741718, "grad_norm": 5.031601428985596, "learning_rate": 6.288178520090355e-05, "loss": 0.5979, "step": 27864 }, { "epoch": 1.8880005420421437, "grad_norm": 4.878922939300537, "learning_rate": 6.288041618180574e-05, "loss": 0.6054, "step": 27865 }, { "epoch": 1.8880682973101157, "grad_norm": 6.99415922164917, "learning_rate": 6.287904716270792e-05, "loss": 0.51, "step": 27866 }, { "epoch": 1.888136052578088, "grad_norm": 6.7687578201293945, "learning_rate": 6.28776781436101e-05, "loss": 0.5958, "step": 27867 }, { "epoch": 1.8882038078460601, "grad_norm": 5.958980083465576, "learning_rate": 6.28763091245123e-05, "loss": 0.5688, "step": 27868 }, { "epoch": 1.888271563114032, "grad_norm": 6.857299327850342, "learning_rate": 6.287494010541448e-05, "loss": 0.7707, "step": 27869 }, { "epoch": 1.888339318382004, "grad_norm": 6.961366176605225, "learning_rate": 6.287357108631666e-05, "loss": 0.5337, "step": 27870 }, { "epoch": 1.8884070736499763, "grad_norm": 9.765071868896484, "learning_rate": 6.287220206721885e-05, "loss": 0.5884, "step": 27871 }, { "epoch": 1.8884748289179485, "grad_norm": 5.829784870147705, "learning_rate": 6.287083304812103e-05, "loss": 0.6393, "step": 27872 }, { "epoch": 1.8885425841859205, "grad_norm": 4.859447956085205, "learning_rate": 6.286946402902321e-05, "loss": 0.6702, "step": 27873 }, { "epoch": 1.8886103394538925, "grad_norm": 6.523632049560547, "learning_rate": 6.286809500992539e-05, "loss": 0.6617, "step": 27874 }, { "epoch": 1.8886780947218647, "grad_norm": 5.760532855987549, "learning_rate": 6.286672599082759e-05, "loss": 0.6585, "step": 27875 }, { "epoch": 1.8887458499898369, "grad_norm": 8.435131072998047, "learning_rate": 6.286535697172977e-05, "loss": 0.6598, "step": 27876 }, { "epoch": 1.8888136052578088, "grad_norm": 5.874969959259033, "learning_rate": 6.286398795263195e-05, "loss": 0.6267, "step": 27877 }, { "epoch": 1.8888813605257808, "grad_norm": 9.037013053894043, "learning_rate": 6.286261893353413e-05, "loss": 0.7653, "step": 27878 }, { "epoch": 1.888949115793753, "grad_norm": 4.923863887786865, "learning_rate": 6.28612499144363e-05, "loss": 0.7022, "step": 27879 }, { "epoch": 1.889016871061725, "grad_norm": 8.456932067871094, "learning_rate": 6.28598808953385e-05, "loss": 0.8555, "step": 27880 }, { "epoch": 1.889084626329697, "grad_norm": 4.476989269256592, "learning_rate": 6.285851187624068e-05, "loss": 0.6004, "step": 27881 }, { "epoch": 1.8891523815976692, "grad_norm": 4.470795631408691, "learning_rate": 6.285714285714286e-05, "loss": 0.6607, "step": 27882 }, { "epoch": 1.8892201368656414, "grad_norm": 5.465544700622559, "learning_rate": 6.285577383804504e-05, "loss": 0.595, "step": 27883 }, { "epoch": 1.8892878921336134, "grad_norm": 6.0402512550354, "learning_rate": 6.285440481894724e-05, "loss": 0.7841, "step": 27884 }, { "epoch": 1.8893556474015853, "grad_norm": 5.8225998878479, "learning_rate": 6.285303579984942e-05, "loss": 0.5089, "step": 27885 }, { "epoch": 1.8894234026695576, "grad_norm": 5.41359281539917, "learning_rate": 6.28516667807516e-05, "loss": 0.4542, "step": 27886 }, { "epoch": 1.8894911579375298, "grad_norm": 7.687122344970703, "learning_rate": 6.285029776165378e-05, "loss": 0.7451, "step": 27887 }, { "epoch": 1.8895589132055017, "grad_norm": 5.120946884155273, "learning_rate": 6.284892874255596e-05, "loss": 0.7644, "step": 27888 }, { "epoch": 1.8896266684734737, "grad_norm": 5.045968055725098, "learning_rate": 6.284755972345815e-05, "loss": 0.6742, "step": 27889 }, { "epoch": 1.889694423741446, "grad_norm": 6.101681709289551, "learning_rate": 6.284619070436033e-05, "loss": 0.5494, "step": 27890 }, { "epoch": 1.8897621790094181, "grad_norm": 7.080737113952637, "learning_rate": 6.284482168526251e-05, "loss": 0.7508, "step": 27891 }, { "epoch": 1.88982993427739, "grad_norm": 13.909500122070312, "learning_rate": 6.284345266616469e-05, "loss": 0.6289, "step": 27892 }, { "epoch": 1.889897689545362, "grad_norm": 8.98306941986084, "learning_rate": 6.284208364706689e-05, "loss": 0.7762, "step": 27893 }, { "epoch": 1.8899654448133343, "grad_norm": 8.30830192565918, "learning_rate": 6.284071462796907e-05, "loss": 0.8536, "step": 27894 }, { "epoch": 1.8900332000813065, "grad_norm": 5.51300573348999, "learning_rate": 6.283934560887125e-05, "loss": 0.5662, "step": 27895 }, { "epoch": 1.8901009553492782, "grad_norm": 8.269031524658203, "learning_rate": 6.283797658977343e-05, "loss": 0.6531, "step": 27896 }, { "epoch": 1.8901687106172504, "grad_norm": 5.297168731689453, "learning_rate": 6.283660757067561e-05, "loss": 0.5434, "step": 27897 }, { "epoch": 1.8902364658852226, "grad_norm": 4.827119827270508, "learning_rate": 6.28352385515778e-05, "loss": 0.6099, "step": 27898 }, { "epoch": 1.8903042211531946, "grad_norm": 7.924688816070557, "learning_rate": 6.283386953247998e-05, "loss": 0.7328, "step": 27899 }, { "epoch": 1.8903719764211666, "grad_norm": 5.807513236999512, "learning_rate": 6.283250051338216e-05, "loss": 0.6546, "step": 27900 }, { "epoch": 1.8904397316891388, "grad_norm": 5.4299726486206055, "learning_rate": 6.283113149428434e-05, "loss": 0.5708, "step": 27901 }, { "epoch": 1.890507486957111, "grad_norm": 4.840429306030273, "learning_rate": 6.282976247518652e-05, "loss": 0.543, "step": 27902 }, { "epoch": 1.890575242225083, "grad_norm": 4.524851322174072, "learning_rate": 6.282839345608872e-05, "loss": 0.5309, "step": 27903 }, { "epoch": 1.890642997493055, "grad_norm": 6.648617744445801, "learning_rate": 6.28270244369909e-05, "loss": 0.6446, "step": 27904 }, { "epoch": 1.8907107527610272, "grad_norm": 5.819903373718262, "learning_rate": 6.282565541789308e-05, "loss": 0.6913, "step": 27905 }, { "epoch": 1.8907785080289994, "grad_norm": 7.003329277038574, "learning_rate": 6.282428639879526e-05, "loss": 0.5399, "step": 27906 }, { "epoch": 1.8908462632969714, "grad_norm": 5.443247318267822, "learning_rate": 6.282291737969745e-05, "loss": 0.5746, "step": 27907 }, { "epoch": 1.8909140185649433, "grad_norm": 4.132559299468994, "learning_rate": 6.282154836059963e-05, "loss": 0.6508, "step": 27908 }, { "epoch": 1.8909817738329155, "grad_norm": 4.961408615112305, "learning_rate": 6.282017934150181e-05, "loss": 0.6596, "step": 27909 }, { "epoch": 1.8910495291008877, "grad_norm": 10.42081069946289, "learning_rate": 6.281881032240399e-05, "loss": 0.6262, "step": 27910 }, { "epoch": 1.8911172843688597, "grad_norm": 9.181026458740234, "learning_rate": 6.281744130330617e-05, "loss": 0.5355, "step": 27911 }, { "epoch": 1.8911850396368317, "grad_norm": 6.443288803100586, "learning_rate": 6.281607228420837e-05, "loss": 0.6956, "step": 27912 }, { "epoch": 1.891252794904804, "grad_norm": 6.795979976654053, "learning_rate": 6.281470326511055e-05, "loss": 0.71, "step": 27913 }, { "epoch": 1.891320550172776, "grad_norm": 4.191555023193359, "learning_rate": 6.281333424601274e-05, "loss": 0.6779, "step": 27914 }, { "epoch": 1.8913883054407479, "grad_norm": 8.110291481018066, "learning_rate": 6.281196522691492e-05, "loss": 0.6528, "step": 27915 }, { "epoch": 1.89145606070872, "grad_norm": 6.099140167236328, "learning_rate": 6.28105962078171e-05, "loss": 0.6476, "step": 27916 }, { "epoch": 1.8915238159766923, "grad_norm": 5.942191123962402, "learning_rate": 6.28092271887193e-05, "loss": 0.5565, "step": 27917 }, { "epoch": 1.8915915712446643, "grad_norm": 5.103944778442383, "learning_rate": 6.280785816962148e-05, "loss": 0.5312, "step": 27918 }, { "epoch": 1.8916593265126362, "grad_norm": 6.048289775848389, "learning_rate": 6.280648915052366e-05, "loss": 0.5308, "step": 27919 }, { "epoch": 1.8917270817806084, "grad_norm": 5.136694431304932, "learning_rate": 6.280512013142584e-05, "loss": 0.6103, "step": 27920 }, { "epoch": 1.8917948370485806, "grad_norm": 6.237873077392578, "learning_rate": 6.280375111232803e-05, "loss": 0.5972, "step": 27921 }, { "epoch": 1.8918625923165526, "grad_norm": 4.408037185668945, "learning_rate": 6.280238209323021e-05, "loss": 0.5683, "step": 27922 }, { "epoch": 1.8919303475845246, "grad_norm": 9.457489967346191, "learning_rate": 6.280101307413239e-05, "loss": 0.8114, "step": 27923 }, { "epoch": 1.8919981028524968, "grad_norm": 5.306713104248047, "learning_rate": 6.279964405503457e-05, "loss": 0.6148, "step": 27924 }, { "epoch": 1.892065858120469, "grad_norm": 10.740824699401855, "learning_rate": 6.279827503593675e-05, "loss": 0.6275, "step": 27925 }, { "epoch": 1.892133613388441, "grad_norm": 5.938587188720703, "learning_rate": 6.279690601683895e-05, "loss": 0.5174, "step": 27926 }, { "epoch": 1.892201368656413, "grad_norm": 5.85345458984375, "learning_rate": 6.279553699774113e-05, "loss": 0.7031, "step": 27927 }, { "epoch": 1.8922691239243852, "grad_norm": 5.49940824508667, "learning_rate": 6.27941679786433e-05, "loss": 0.7203, "step": 27928 }, { "epoch": 1.8923368791923572, "grad_norm": 4.601334095001221, "learning_rate": 6.279279895954549e-05, "loss": 0.5695, "step": 27929 }, { "epoch": 1.8924046344603291, "grad_norm": 5.437506675720215, "learning_rate": 6.279142994044768e-05, "loss": 0.7129, "step": 27930 }, { "epoch": 1.8924723897283013, "grad_norm": 8.092564582824707, "learning_rate": 6.279006092134986e-05, "loss": 0.6854, "step": 27931 }, { "epoch": 1.8925401449962735, "grad_norm": 7.429778575897217, "learning_rate": 6.278869190225204e-05, "loss": 0.6489, "step": 27932 }, { "epoch": 1.8926079002642455, "grad_norm": 4.186244487762451, "learning_rate": 6.278732288315422e-05, "loss": 0.5523, "step": 27933 }, { "epoch": 1.8926756555322175, "grad_norm": 7.550124645233154, "learning_rate": 6.27859538640564e-05, "loss": 0.7396, "step": 27934 }, { "epoch": 1.8927434108001897, "grad_norm": 5.588962078094482, "learning_rate": 6.27845848449586e-05, "loss": 0.6227, "step": 27935 }, { "epoch": 1.892811166068162, "grad_norm": 4.912853717803955, "learning_rate": 6.278321582586078e-05, "loss": 0.5998, "step": 27936 }, { "epoch": 1.892878921336134, "grad_norm": 5.042669773101807, "learning_rate": 6.278184680676296e-05, "loss": 0.7426, "step": 27937 }, { "epoch": 1.8929466766041059, "grad_norm": 9.121674537658691, "learning_rate": 6.278047778766514e-05, "loss": 0.6514, "step": 27938 }, { "epoch": 1.893014431872078, "grad_norm": 5.514252662658691, "learning_rate": 6.277910876856733e-05, "loss": 0.6656, "step": 27939 }, { "epoch": 1.8930821871400503, "grad_norm": 8.002510070800781, "learning_rate": 6.277773974946951e-05, "loss": 0.9421, "step": 27940 }, { "epoch": 1.8931499424080223, "grad_norm": 6.288922309875488, "learning_rate": 6.277637073037169e-05, "loss": 0.5436, "step": 27941 }, { "epoch": 1.8932176976759942, "grad_norm": 7.101735591888428, "learning_rate": 6.277500171127387e-05, "loss": 0.7483, "step": 27942 }, { "epoch": 1.8932854529439664, "grad_norm": 6.643509864807129, "learning_rate": 6.277363269217605e-05, "loss": 0.8295, "step": 27943 }, { "epoch": 1.8933532082119386, "grad_norm": 5.1549835205078125, "learning_rate": 6.277226367307825e-05, "loss": 0.5482, "step": 27944 }, { "epoch": 1.8934209634799104, "grad_norm": 5.492838382720947, "learning_rate": 6.277089465398043e-05, "loss": 0.5106, "step": 27945 }, { "epoch": 1.8934887187478826, "grad_norm": 4.974836349487305, "learning_rate": 6.27695256348826e-05, "loss": 0.7815, "step": 27946 }, { "epoch": 1.8935564740158548, "grad_norm": 8.243467330932617, "learning_rate": 6.276815661578479e-05, "loss": 0.4757, "step": 27947 }, { "epoch": 1.8936242292838268, "grad_norm": 5.1588640213012695, "learning_rate": 6.276678759668698e-05, "loss": 0.5252, "step": 27948 }, { "epoch": 1.8936919845517988, "grad_norm": 9.226536750793457, "learning_rate": 6.276541857758916e-05, "loss": 0.5195, "step": 27949 }, { "epoch": 1.893759739819771, "grad_norm": 4.892991542816162, "learning_rate": 6.276404955849134e-05, "loss": 0.6993, "step": 27950 }, { "epoch": 1.8938274950877432, "grad_norm": 4.515966892242432, "learning_rate": 6.276268053939352e-05, "loss": 0.4921, "step": 27951 }, { "epoch": 1.8938952503557152, "grad_norm": 6.133669376373291, "learning_rate": 6.27613115202957e-05, "loss": 0.5697, "step": 27952 }, { "epoch": 1.8939630056236871, "grad_norm": 5.865467071533203, "learning_rate": 6.27599425011979e-05, "loss": 0.8547, "step": 27953 }, { "epoch": 1.8940307608916593, "grad_norm": 7.806417465209961, "learning_rate": 6.275857348210008e-05, "loss": 0.6773, "step": 27954 }, { "epoch": 1.8940985161596315, "grad_norm": 5.358498573303223, "learning_rate": 6.275720446300226e-05, "loss": 0.5565, "step": 27955 }, { "epoch": 1.8941662714276035, "grad_norm": 5.503627300262451, "learning_rate": 6.275583544390444e-05, "loss": 0.5413, "step": 27956 }, { "epoch": 1.8942340266955755, "grad_norm": 6.727372169494629, "learning_rate": 6.275446642480662e-05, "loss": 0.7934, "step": 27957 }, { "epoch": 1.8943017819635477, "grad_norm": 9.110444068908691, "learning_rate": 6.275309740570881e-05, "loss": 0.5672, "step": 27958 }, { "epoch": 1.89436953723152, "grad_norm": 5.821893692016602, "learning_rate": 6.275172838661099e-05, "loss": 0.6373, "step": 27959 }, { "epoch": 1.8944372924994919, "grad_norm": 12.694964408874512, "learning_rate": 6.275035936751317e-05, "loss": 0.571, "step": 27960 }, { "epoch": 1.8945050477674639, "grad_norm": 4.236441612243652, "learning_rate": 6.274899034841537e-05, "loss": 0.7169, "step": 27961 }, { "epoch": 1.894572803035436, "grad_norm": 5.195706367492676, "learning_rate": 6.274762132931755e-05, "loss": 0.6871, "step": 27962 }, { "epoch": 1.894640558303408, "grad_norm": 4.133591651916504, "learning_rate": 6.274625231021973e-05, "loss": 0.603, "step": 27963 }, { "epoch": 1.89470831357138, "grad_norm": 5.110321044921875, "learning_rate": 6.274488329112192e-05, "loss": 0.517, "step": 27964 }, { "epoch": 1.8947760688393522, "grad_norm": 6.033245086669922, "learning_rate": 6.27435142720241e-05, "loss": 0.52, "step": 27965 }, { "epoch": 1.8948438241073244, "grad_norm": 4.572634696960449, "learning_rate": 6.274214525292628e-05, "loss": 0.5718, "step": 27966 }, { "epoch": 1.8949115793752964, "grad_norm": 5.049005031585693, "learning_rate": 6.274077623382847e-05, "loss": 0.5932, "step": 27967 }, { "epoch": 1.8949793346432684, "grad_norm": 5.185237407684326, "learning_rate": 6.273940721473066e-05, "loss": 0.5069, "step": 27968 }, { "epoch": 1.8950470899112406, "grad_norm": 4.3618316650390625, "learning_rate": 6.273803819563284e-05, "loss": 0.5608, "step": 27969 }, { "epoch": 1.8951148451792128, "grad_norm": 9.441234588623047, "learning_rate": 6.273666917653502e-05, "loss": 0.7363, "step": 27970 }, { "epoch": 1.8951826004471848, "grad_norm": 5.210994243621826, "learning_rate": 6.273530015743721e-05, "loss": 0.6587, "step": 27971 }, { "epoch": 1.8952503557151568, "grad_norm": 5.425252437591553, "learning_rate": 6.273393113833939e-05, "loss": 0.783, "step": 27972 }, { "epoch": 1.895318110983129, "grad_norm": 7.4556660652160645, "learning_rate": 6.273256211924157e-05, "loss": 0.6362, "step": 27973 }, { "epoch": 1.8953858662511012, "grad_norm": 5.395413398742676, "learning_rate": 6.273119310014375e-05, "loss": 0.7213, "step": 27974 }, { "epoch": 1.8954536215190732, "grad_norm": 4.670349597930908, "learning_rate": 6.272982408104593e-05, "loss": 0.5566, "step": 27975 }, { "epoch": 1.8955213767870451, "grad_norm": 8.937593460083008, "learning_rate": 6.272845506194813e-05, "loss": 0.5278, "step": 27976 }, { "epoch": 1.8955891320550173, "grad_norm": 5.2874908447265625, "learning_rate": 6.27270860428503e-05, "loss": 0.6166, "step": 27977 }, { "epoch": 1.8956568873229893, "grad_norm": 4.7879204750061035, "learning_rate": 6.272571702375249e-05, "loss": 0.6663, "step": 27978 }, { "epoch": 1.8957246425909613, "grad_norm": 4.383210182189941, "learning_rate": 6.272434800465467e-05, "loss": 0.6098, "step": 27979 }, { "epoch": 1.8957923978589335, "grad_norm": 7.9090657234191895, "learning_rate": 6.272297898555685e-05, "loss": 0.6952, "step": 27980 }, { "epoch": 1.8958601531269057, "grad_norm": 10.131110191345215, "learning_rate": 6.272160996645904e-05, "loss": 0.6581, "step": 27981 }, { "epoch": 1.8959279083948777, "grad_norm": 5.533164024353027, "learning_rate": 6.272024094736122e-05, "loss": 0.4885, "step": 27982 }, { "epoch": 1.8959956636628497, "grad_norm": 5.269891738891602, "learning_rate": 6.27188719282634e-05, "loss": 0.4963, "step": 27983 }, { "epoch": 1.8960634189308219, "grad_norm": 4.583466529846191, "learning_rate": 6.271750290916558e-05, "loss": 0.86, "step": 27984 }, { "epoch": 1.896131174198794, "grad_norm": 6.827075004577637, "learning_rate": 6.271613389006778e-05, "loss": 0.5098, "step": 27985 }, { "epoch": 1.896198929466766, "grad_norm": 4.502198219299316, "learning_rate": 6.271476487096996e-05, "loss": 0.6442, "step": 27986 }, { "epoch": 1.896266684734738, "grad_norm": 10.249238967895508, "learning_rate": 6.271339585187214e-05, "loss": 0.6625, "step": 27987 }, { "epoch": 1.8963344400027102, "grad_norm": 4.874610424041748, "learning_rate": 6.271202683277432e-05, "loss": 0.5412, "step": 27988 }, { "epoch": 1.8964021952706824, "grad_norm": 6.348155498504639, "learning_rate": 6.27106578136765e-05, "loss": 0.6989, "step": 27989 }, { "epoch": 1.8964699505386544, "grad_norm": 4.426656246185303, "learning_rate": 6.270928879457869e-05, "loss": 0.6238, "step": 27990 }, { "epoch": 1.8965377058066264, "grad_norm": 5.9634904861450195, "learning_rate": 6.270791977548087e-05, "loss": 0.6088, "step": 27991 }, { "epoch": 1.8966054610745986, "grad_norm": 4.561583995819092, "learning_rate": 6.270655075638305e-05, "loss": 0.5882, "step": 27992 }, { "epoch": 1.8966732163425708, "grad_norm": 5.065361976623535, "learning_rate": 6.270518173728523e-05, "loss": 0.5419, "step": 27993 }, { "epoch": 1.8967409716105426, "grad_norm": 5.6459455490112305, "learning_rate": 6.270381271818743e-05, "loss": 0.6856, "step": 27994 }, { "epoch": 1.8968087268785148, "grad_norm": 6.001391410827637, "learning_rate": 6.27024436990896e-05, "loss": 0.7888, "step": 27995 }, { "epoch": 1.896876482146487, "grad_norm": 5.802665710449219, "learning_rate": 6.270107467999179e-05, "loss": 0.8807, "step": 27996 }, { "epoch": 1.896944237414459, "grad_norm": 6.880556583404541, "learning_rate": 6.269970566089397e-05, "loss": 0.6042, "step": 27997 }, { "epoch": 1.897011992682431, "grad_norm": 7.001958847045898, "learning_rate": 6.269833664179615e-05, "loss": 0.987, "step": 27998 }, { "epoch": 1.8970797479504031, "grad_norm": 4.170566082000732, "learning_rate": 6.269696762269834e-05, "loss": 0.4431, "step": 27999 }, { "epoch": 1.8971475032183753, "grad_norm": 6.863246440887451, "learning_rate": 6.269559860360052e-05, "loss": 0.7338, "step": 28000 }, { "epoch": 1.8972152584863473, "grad_norm": 6.243758678436279, "learning_rate": 6.26942295845027e-05, "loss": 0.693, "step": 28001 }, { "epoch": 1.8972830137543193, "grad_norm": 6.018113136291504, "learning_rate": 6.269286056540488e-05, "loss": 0.8508, "step": 28002 }, { "epoch": 1.8973507690222915, "grad_norm": 6.146251201629639, "learning_rate": 6.269149154630706e-05, "loss": 0.7654, "step": 28003 }, { "epoch": 1.8974185242902637, "grad_norm": 4.882858753204346, "learning_rate": 6.269012252720926e-05, "loss": 0.5273, "step": 28004 }, { "epoch": 1.8974862795582357, "grad_norm": 4.832515716552734, "learning_rate": 6.268875350811144e-05, "loss": 0.5841, "step": 28005 }, { "epoch": 1.8975540348262077, "grad_norm": 12.736294746398926, "learning_rate": 6.268738448901362e-05, "loss": 0.6313, "step": 28006 }, { "epoch": 1.8976217900941799, "grad_norm": 5.134313583374023, "learning_rate": 6.268601546991581e-05, "loss": 0.6489, "step": 28007 }, { "epoch": 1.897689545362152, "grad_norm": 5.645500183105469, "learning_rate": 6.268464645081799e-05, "loss": 0.727, "step": 28008 }, { "epoch": 1.897757300630124, "grad_norm": 5.176048278808594, "learning_rate": 6.268327743172017e-05, "loss": 0.7237, "step": 28009 }, { "epoch": 1.897825055898096, "grad_norm": 4.958619117736816, "learning_rate": 6.268190841262237e-05, "loss": 0.6693, "step": 28010 }, { "epoch": 1.8978928111660682, "grad_norm": 6.422164440155029, "learning_rate": 6.268053939352455e-05, "loss": 0.7463, "step": 28011 }, { "epoch": 1.8979605664340402, "grad_norm": 5.076028823852539, "learning_rate": 6.267917037442673e-05, "loss": 0.6854, "step": 28012 }, { "epoch": 1.8980283217020122, "grad_norm": 8.129158020019531, "learning_rate": 6.267780135532892e-05, "loss": 0.7088, "step": 28013 }, { "epoch": 1.8980960769699844, "grad_norm": 5.619378566741943, "learning_rate": 6.26764323362311e-05, "loss": 0.6318, "step": 28014 }, { "epoch": 1.8981638322379566, "grad_norm": 5.090880393981934, "learning_rate": 6.267506331713328e-05, "loss": 0.7173, "step": 28015 }, { "epoch": 1.8982315875059286, "grad_norm": 8.684544563293457, "learning_rate": 6.267369429803546e-05, "loss": 0.7112, "step": 28016 }, { "epoch": 1.8982993427739006, "grad_norm": 5.88771915435791, "learning_rate": 6.267232527893765e-05, "loss": 0.5245, "step": 28017 }, { "epoch": 1.8983670980418728, "grad_norm": 6.148416519165039, "learning_rate": 6.267095625983983e-05, "loss": 0.8831, "step": 28018 }, { "epoch": 1.898434853309845, "grad_norm": 6.767914295196533, "learning_rate": 6.266958724074202e-05, "loss": 0.6759, "step": 28019 }, { "epoch": 1.898502608577817, "grad_norm": 9.156801223754883, "learning_rate": 6.26682182216442e-05, "loss": 0.7483, "step": 28020 }, { "epoch": 1.898570363845789, "grad_norm": 6.3764801025390625, "learning_rate": 6.266684920254638e-05, "loss": 0.66, "step": 28021 }, { "epoch": 1.8986381191137611, "grad_norm": 5.421944618225098, "learning_rate": 6.266548018344857e-05, "loss": 0.5072, "step": 28022 }, { "epoch": 1.8987058743817333, "grad_norm": 4.839576244354248, "learning_rate": 6.266411116435075e-05, "loss": 0.5475, "step": 28023 }, { "epoch": 1.8987736296497053, "grad_norm": 5.244320869445801, "learning_rate": 6.266274214525293e-05, "loss": 0.644, "step": 28024 }, { "epoch": 1.8988413849176773, "grad_norm": 9.911107063293457, "learning_rate": 6.266137312615511e-05, "loss": 0.6691, "step": 28025 }, { "epoch": 1.8989091401856495, "grad_norm": 5.33414363861084, "learning_rate": 6.26600041070573e-05, "loss": 0.6678, "step": 28026 }, { "epoch": 1.8989768954536215, "grad_norm": 4.388881683349609, "learning_rate": 6.265863508795949e-05, "loss": 0.6262, "step": 28027 }, { "epoch": 1.8990446507215935, "grad_norm": 6.040522575378418, "learning_rate": 6.265726606886167e-05, "loss": 0.6029, "step": 28028 }, { "epoch": 1.8991124059895657, "grad_norm": 5.867251873016357, "learning_rate": 6.265589704976385e-05, "loss": 0.5985, "step": 28029 }, { "epoch": 1.8991801612575379, "grad_norm": 6.5245208740234375, "learning_rate": 6.265452803066603e-05, "loss": 0.7159, "step": 28030 }, { "epoch": 1.8992479165255098, "grad_norm": 9.071560859680176, "learning_rate": 6.265315901156822e-05, "loss": 0.4441, "step": 28031 }, { "epoch": 1.8993156717934818, "grad_norm": 6.216573715209961, "learning_rate": 6.26517899924704e-05, "loss": 0.7907, "step": 28032 }, { "epoch": 1.899383427061454, "grad_norm": 4.796585559844971, "learning_rate": 6.265042097337258e-05, "loss": 0.59, "step": 28033 }, { "epoch": 1.8994511823294262, "grad_norm": 4.2864508628845215, "learning_rate": 6.264905195427476e-05, "loss": 0.5824, "step": 28034 }, { "epoch": 1.8995189375973982, "grad_norm": 5.209982872009277, "learning_rate": 6.264768293517694e-05, "loss": 0.5931, "step": 28035 }, { "epoch": 1.8995866928653702, "grad_norm": 6.072529315948486, "learning_rate": 6.264631391607914e-05, "loss": 0.8438, "step": 28036 }, { "epoch": 1.8996544481333424, "grad_norm": 4.597762107849121, "learning_rate": 6.264494489698132e-05, "loss": 0.7242, "step": 28037 }, { "epoch": 1.8997222034013146, "grad_norm": 4.908017635345459, "learning_rate": 6.26435758778835e-05, "loss": 0.5572, "step": 28038 }, { "epoch": 1.8997899586692866, "grad_norm": 4.632526874542236, "learning_rate": 6.264220685878568e-05, "loss": 0.6071, "step": 28039 }, { "epoch": 1.8998577139372586, "grad_norm": 6.960393905639648, "learning_rate": 6.264083783968787e-05, "loss": 0.6, "step": 28040 }, { "epoch": 1.8999254692052308, "grad_norm": 4.907259941101074, "learning_rate": 6.263946882059005e-05, "loss": 0.6689, "step": 28041 }, { "epoch": 1.899993224473203, "grad_norm": 5.881953716278076, "learning_rate": 6.263809980149223e-05, "loss": 0.5639, "step": 28042 }, { "epoch": 1.9000609797411747, "grad_norm": 4.54290246963501, "learning_rate": 6.263673078239441e-05, "loss": 0.5972, "step": 28043 }, { "epoch": 1.900128735009147, "grad_norm": 6.13270902633667, "learning_rate": 6.263536176329659e-05, "loss": 0.7343, "step": 28044 }, { "epoch": 1.9001964902771191, "grad_norm": 6.069002628326416, "learning_rate": 6.263399274419879e-05, "loss": 0.8076, "step": 28045 }, { "epoch": 1.900264245545091, "grad_norm": 6.392848968505859, "learning_rate": 6.263262372510097e-05, "loss": 0.5425, "step": 28046 }, { "epoch": 1.900332000813063, "grad_norm": 8.148958206176758, "learning_rate": 6.263125470600315e-05, "loss": 0.6503, "step": 28047 }, { "epoch": 1.9003997560810353, "grad_norm": 9.433317184448242, "learning_rate": 6.262988568690533e-05, "loss": 0.8328, "step": 28048 }, { "epoch": 1.9004675113490075, "grad_norm": 6.73468017578125, "learning_rate": 6.262851666780752e-05, "loss": 0.6495, "step": 28049 }, { "epoch": 1.9005352666169795, "grad_norm": 6.58126163482666, "learning_rate": 6.26271476487097e-05, "loss": 0.7283, "step": 28050 }, { "epoch": 1.9006030218849514, "grad_norm": 5.302158832550049, "learning_rate": 6.262577862961188e-05, "loss": 0.463, "step": 28051 }, { "epoch": 1.9006707771529237, "grad_norm": 5.480127334594727, "learning_rate": 6.262440961051406e-05, "loss": 0.8374, "step": 28052 }, { "epoch": 1.9007385324208959, "grad_norm": 5.6172356605529785, "learning_rate": 6.262304059141626e-05, "loss": 0.6555, "step": 28053 }, { "epoch": 1.9008062876888678, "grad_norm": 6.019320011138916, "learning_rate": 6.262167157231844e-05, "loss": 0.626, "step": 28054 }, { "epoch": 1.9008740429568398, "grad_norm": 5.676902770996094, "learning_rate": 6.262030255322062e-05, "loss": 0.8235, "step": 28055 }, { "epoch": 1.900941798224812, "grad_norm": 3.9130501747131348, "learning_rate": 6.261893353412281e-05, "loss": 0.5576, "step": 28056 }, { "epoch": 1.9010095534927842, "grad_norm": 3.774235248565674, "learning_rate": 6.261756451502499e-05, "loss": 0.3967, "step": 28057 }, { "epoch": 1.9010773087607562, "grad_norm": 11.525525093078613, "learning_rate": 6.261619549592717e-05, "loss": 0.659, "step": 28058 }, { "epoch": 1.9011450640287282, "grad_norm": 6.035669326782227, "learning_rate": 6.261482647682936e-05, "loss": 0.6431, "step": 28059 }, { "epoch": 1.9012128192967004, "grad_norm": 5.007401943206787, "learning_rate": 6.261345745773154e-05, "loss": 0.6312, "step": 28060 }, { "epoch": 1.9012805745646724, "grad_norm": 7.335431098937988, "learning_rate": 6.261208843863373e-05, "loss": 0.6605, "step": 28061 }, { "epoch": 1.9013483298326443, "grad_norm": 4.612360954284668, "learning_rate": 6.26107194195359e-05, "loss": 0.7306, "step": 28062 }, { "epoch": 1.9014160851006165, "grad_norm": 10.407176971435547, "learning_rate": 6.26093504004381e-05, "loss": 0.5671, "step": 28063 }, { "epoch": 1.9014838403685888, "grad_norm": 6.377346992492676, "learning_rate": 6.260798138134028e-05, "loss": 0.7961, "step": 28064 }, { "epoch": 1.9015515956365607, "grad_norm": 7.938178539276123, "learning_rate": 6.260661236224246e-05, "loss": 0.8932, "step": 28065 }, { "epoch": 1.9016193509045327, "grad_norm": 4.821824073791504, "learning_rate": 6.260524334314464e-05, "loss": 0.4987, "step": 28066 }, { "epoch": 1.901687106172505, "grad_norm": 6.3685688972473145, "learning_rate": 6.260387432404682e-05, "loss": 0.7736, "step": 28067 }, { "epoch": 1.9017548614404771, "grad_norm": 5.149661540985107, "learning_rate": 6.260250530494901e-05, "loss": 0.6405, "step": 28068 }, { "epoch": 1.901822616708449, "grad_norm": 5.361179828643799, "learning_rate": 6.26011362858512e-05, "loss": 0.5542, "step": 28069 }, { "epoch": 1.901890371976421, "grad_norm": 6.096372604370117, "learning_rate": 6.259976726675338e-05, "loss": 0.4486, "step": 28070 }, { "epoch": 1.9019581272443933, "grad_norm": 7.56496000289917, "learning_rate": 6.259839824765556e-05, "loss": 0.8262, "step": 28071 }, { "epoch": 1.9020258825123655, "grad_norm": 5.705261707305908, "learning_rate": 6.259702922855775e-05, "loss": 0.6896, "step": 28072 }, { "epoch": 1.9020936377803375, "grad_norm": 7.310647964477539, "learning_rate": 6.259566020945993e-05, "loss": 0.8476, "step": 28073 }, { "epoch": 1.9021613930483094, "grad_norm": 6.995996475219727, "learning_rate": 6.259429119036211e-05, "loss": 0.6683, "step": 28074 }, { "epoch": 1.9022291483162816, "grad_norm": 9.082365036010742, "learning_rate": 6.259292217126429e-05, "loss": 0.5173, "step": 28075 }, { "epoch": 1.9022969035842536, "grad_norm": 5.095073223114014, "learning_rate": 6.259155315216647e-05, "loss": 0.5319, "step": 28076 }, { "epoch": 1.9023646588522256, "grad_norm": 6.049412250518799, "learning_rate": 6.259018413306866e-05, "loss": 0.8222, "step": 28077 }, { "epoch": 1.9024324141201978, "grad_norm": 6.387606143951416, "learning_rate": 6.258881511397085e-05, "loss": 0.6588, "step": 28078 }, { "epoch": 1.90250016938817, "grad_norm": 6.936084270477295, "learning_rate": 6.258744609487303e-05, "loss": 0.6258, "step": 28079 }, { "epoch": 1.902567924656142, "grad_norm": 5.685515880584717, "learning_rate": 6.25860770757752e-05, "loss": 0.6855, "step": 28080 }, { "epoch": 1.902635679924114, "grad_norm": 10.87590217590332, "learning_rate": 6.25847080566774e-05, "loss": 0.5993, "step": 28081 }, { "epoch": 1.9027034351920862, "grad_norm": 4.292232990264893, "learning_rate": 6.258333903757958e-05, "loss": 0.6185, "step": 28082 }, { "epoch": 1.9027711904600584, "grad_norm": 5.462042331695557, "learning_rate": 6.258197001848176e-05, "loss": 0.7133, "step": 28083 }, { "epoch": 1.9028389457280304, "grad_norm": 5.954684257507324, "learning_rate": 6.258060099938394e-05, "loss": 0.6175, "step": 28084 }, { "epoch": 1.9029067009960023, "grad_norm": 4.7124810218811035, "learning_rate": 6.257923198028612e-05, "loss": 0.6161, "step": 28085 }, { "epoch": 1.9029744562639745, "grad_norm": 6.080146789550781, "learning_rate": 6.257786296118831e-05, "loss": 0.4772, "step": 28086 }, { "epoch": 1.9030422115319467, "grad_norm": 5.302973747253418, "learning_rate": 6.25764939420905e-05, "loss": 0.7594, "step": 28087 }, { "epoch": 1.9031099667999187, "grad_norm": 5.512613296508789, "learning_rate": 6.257512492299268e-05, "loss": 0.8758, "step": 28088 }, { "epoch": 1.9031777220678907, "grad_norm": 7.813691139221191, "learning_rate": 6.257375590389486e-05, "loss": 0.6331, "step": 28089 }, { "epoch": 1.903245477335863, "grad_norm": 7.981358051300049, "learning_rate": 6.257238688479704e-05, "loss": 0.6579, "step": 28090 }, { "epoch": 1.9033132326038351, "grad_norm": 4.985409259796143, "learning_rate": 6.257101786569923e-05, "loss": 0.681, "step": 28091 }, { "epoch": 1.9033809878718069, "grad_norm": 6.005244731903076, "learning_rate": 6.256964884660141e-05, "loss": 0.6781, "step": 28092 }, { "epoch": 1.903448743139779, "grad_norm": 4.971264839172363, "learning_rate": 6.256827982750359e-05, "loss": 0.6519, "step": 28093 }, { "epoch": 1.9035164984077513, "grad_norm": 6.509243011474609, "learning_rate": 6.256691080840577e-05, "loss": 0.8957, "step": 28094 }, { "epoch": 1.9035842536757233, "grad_norm": 6.364065170288086, "learning_rate": 6.256554178930797e-05, "loss": 0.7702, "step": 28095 }, { "epoch": 1.9036520089436952, "grad_norm": 5.259433746337891, "learning_rate": 6.256417277021015e-05, "loss": 0.7731, "step": 28096 }, { "epoch": 1.9037197642116674, "grad_norm": 6.510261535644531, "learning_rate": 6.256280375111233e-05, "loss": 0.8533, "step": 28097 }, { "epoch": 1.9037875194796396, "grad_norm": 5.009304523468018, "learning_rate": 6.25614347320145e-05, "loss": 0.5906, "step": 28098 }, { "epoch": 1.9038552747476116, "grad_norm": 4.84100866317749, "learning_rate": 6.25600657129167e-05, "loss": 0.6118, "step": 28099 }, { "epoch": 1.9039230300155836, "grad_norm": 5.630660533905029, "learning_rate": 6.255869669381888e-05, "loss": 0.7602, "step": 28100 }, { "epoch": 1.9039907852835558, "grad_norm": 4.808835506439209, "learning_rate": 6.255732767472106e-05, "loss": 0.5488, "step": 28101 }, { "epoch": 1.904058540551528, "grad_norm": 4.897031784057617, "learning_rate": 6.255595865562325e-05, "loss": 0.5942, "step": 28102 }, { "epoch": 1.9041262958195, "grad_norm": 4.182346343994141, "learning_rate": 6.255458963652543e-05, "loss": 0.5426, "step": 28103 }, { "epoch": 1.904194051087472, "grad_norm": 5.779045104980469, "learning_rate": 6.255322061742762e-05, "loss": 0.6379, "step": 28104 }, { "epoch": 1.9042618063554442, "grad_norm": 5.738335609436035, "learning_rate": 6.255185159832981e-05, "loss": 0.6346, "step": 28105 }, { "epoch": 1.9043295616234164, "grad_norm": 6.258022308349609, "learning_rate": 6.255048257923199e-05, "loss": 0.6077, "step": 28106 }, { "epoch": 1.9043973168913884, "grad_norm": 7.2753777503967285, "learning_rate": 6.254911356013417e-05, "loss": 0.5471, "step": 28107 }, { "epoch": 1.9044650721593603, "grad_norm": 7.134427547454834, "learning_rate": 6.254774454103635e-05, "loss": 0.6805, "step": 28108 }, { "epoch": 1.9045328274273325, "grad_norm": 4.979456424713135, "learning_rate": 6.254637552193854e-05, "loss": 0.5626, "step": 28109 }, { "epoch": 1.9046005826953045, "grad_norm": 4.196211338043213, "learning_rate": 6.254500650284072e-05, "loss": 0.5358, "step": 28110 }, { "epoch": 1.9046683379632765, "grad_norm": 9.044280052185059, "learning_rate": 6.25436374837429e-05, "loss": 0.8638, "step": 28111 }, { "epoch": 1.9047360932312487, "grad_norm": 5.217176914215088, "learning_rate": 6.254226846464509e-05, "loss": 0.6578, "step": 28112 }, { "epoch": 1.904803848499221, "grad_norm": 5.99954080581665, "learning_rate": 6.254089944554727e-05, "loss": 0.6939, "step": 28113 }, { "epoch": 1.904871603767193, "grad_norm": 6.258463382720947, "learning_rate": 6.253953042644946e-05, "loss": 0.6118, "step": 28114 }, { "epoch": 1.9049393590351649, "grad_norm": 5.76107931137085, "learning_rate": 6.253816140735164e-05, "loss": 0.7104, "step": 28115 }, { "epoch": 1.905007114303137, "grad_norm": 7.432440280914307, "learning_rate": 6.253679238825382e-05, "loss": 0.5931, "step": 28116 }, { "epoch": 1.9050748695711093, "grad_norm": 5.8485188484191895, "learning_rate": 6.2535423369156e-05, "loss": 0.7041, "step": 28117 }, { "epoch": 1.9051426248390813, "grad_norm": 8.2584228515625, "learning_rate": 6.25340543500582e-05, "loss": 0.5905, "step": 28118 }, { "epoch": 1.9052103801070532, "grad_norm": 6.431135177612305, "learning_rate": 6.253268533096037e-05, "loss": 0.6383, "step": 28119 }, { "epoch": 1.9052781353750254, "grad_norm": 5.427676200866699, "learning_rate": 6.253131631186255e-05, "loss": 0.6139, "step": 28120 }, { "epoch": 1.9053458906429976, "grad_norm": 6.1826066970825195, "learning_rate": 6.252994729276474e-05, "loss": 0.7515, "step": 28121 }, { "epoch": 1.9054136459109696, "grad_norm": 8.537379264831543, "learning_rate": 6.252857827366692e-05, "loss": 0.6376, "step": 28122 }, { "epoch": 1.9054814011789416, "grad_norm": 5.005917072296143, "learning_rate": 6.252720925456911e-05, "loss": 0.6164, "step": 28123 }, { "epoch": 1.9055491564469138, "grad_norm": 5.297226428985596, "learning_rate": 6.252584023547129e-05, "loss": 0.6875, "step": 28124 }, { "epoch": 1.9056169117148858, "grad_norm": 6.836134910583496, "learning_rate": 6.252447121637347e-05, "loss": 0.896, "step": 28125 }, { "epoch": 1.9056846669828578, "grad_norm": 6.831963539123535, "learning_rate": 6.252310219727565e-05, "loss": 0.6885, "step": 28126 }, { "epoch": 1.90575242225083, "grad_norm": 5.029696464538574, "learning_rate": 6.252173317817784e-05, "loss": 0.5873, "step": 28127 }, { "epoch": 1.9058201775188022, "grad_norm": 6.471965789794922, "learning_rate": 6.252036415908002e-05, "loss": 0.9647, "step": 28128 }, { "epoch": 1.9058879327867742, "grad_norm": 6.227716445922852, "learning_rate": 6.25189951399822e-05, "loss": 0.6246, "step": 28129 }, { "epoch": 1.9059556880547461, "grad_norm": 8.503149032592773, "learning_rate": 6.251762612088439e-05, "loss": 0.783, "step": 28130 }, { "epoch": 1.9060234433227183, "grad_norm": 6.027461051940918, "learning_rate": 6.251625710178657e-05, "loss": 0.6218, "step": 28131 }, { "epoch": 1.9060911985906905, "grad_norm": 7.4793596267700195, "learning_rate": 6.251488808268876e-05, "loss": 0.576, "step": 28132 }, { "epoch": 1.9061589538586625, "grad_norm": 7.99904727935791, "learning_rate": 6.251351906359094e-05, "loss": 0.8415, "step": 28133 }, { "epoch": 1.9062267091266345, "grad_norm": 11.23747444152832, "learning_rate": 6.251215004449312e-05, "loss": 0.6726, "step": 28134 }, { "epoch": 1.9062944643946067, "grad_norm": 6.5806565284729, "learning_rate": 6.25107810253953e-05, "loss": 0.6243, "step": 28135 }, { "epoch": 1.906362219662579, "grad_norm": 5.105010032653809, "learning_rate": 6.25094120062975e-05, "loss": 0.8903, "step": 28136 }, { "epoch": 1.9064299749305509, "grad_norm": 6.285195827484131, "learning_rate": 6.250804298719967e-05, "loss": 0.7168, "step": 28137 }, { "epoch": 1.9064977301985229, "grad_norm": 5.4594645500183105, "learning_rate": 6.250667396810186e-05, "loss": 0.6202, "step": 28138 }, { "epoch": 1.906565485466495, "grad_norm": 5.800373554229736, "learning_rate": 6.250530494900404e-05, "loss": 0.7119, "step": 28139 }, { "epoch": 1.906633240734467, "grad_norm": 5.120060920715332, "learning_rate": 6.250393592990622e-05, "loss": 0.5267, "step": 28140 }, { "epoch": 1.906700996002439, "grad_norm": 7.896551132202148, "learning_rate": 6.250256691080841e-05, "loss": 0.5677, "step": 28141 }, { "epoch": 1.9067687512704112, "grad_norm": 4.746171474456787, "learning_rate": 6.250119789171059e-05, "loss": 0.6146, "step": 28142 }, { "epoch": 1.9068365065383834, "grad_norm": 5.693826675415039, "learning_rate": 6.249982887261277e-05, "loss": 0.5731, "step": 28143 }, { "epoch": 1.9069042618063554, "grad_norm": 4.578557014465332, "learning_rate": 6.249845985351495e-05, "loss": 0.731, "step": 28144 }, { "epoch": 1.9069720170743274, "grad_norm": 5.344570159912109, "learning_rate": 6.249709083441714e-05, "loss": 0.6781, "step": 28145 }, { "epoch": 1.9070397723422996, "grad_norm": 6.495667457580566, "learning_rate": 6.249572181531933e-05, "loss": 0.8527, "step": 28146 }, { "epoch": 1.9071075276102718, "grad_norm": 6.570298194885254, "learning_rate": 6.24943527962215e-05, "loss": 0.6646, "step": 28147 }, { "epoch": 1.9071752828782438, "grad_norm": 4.930169582366943, "learning_rate": 6.24929837771237e-05, "loss": 0.5916, "step": 28148 }, { "epoch": 1.9072430381462158, "grad_norm": 4.793105125427246, "learning_rate": 6.249161475802588e-05, "loss": 0.6905, "step": 28149 }, { "epoch": 1.907310793414188, "grad_norm": 9.802921295166016, "learning_rate": 6.249024573892806e-05, "loss": 0.6944, "step": 28150 }, { "epoch": 1.9073785486821602, "grad_norm": 4.744882583618164, "learning_rate": 6.248887671983025e-05, "loss": 0.6834, "step": 28151 }, { "epoch": 1.9074463039501321, "grad_norm": 4.292336940765381, "learning_rate": 6.248750770073243e-05, "loss": 0.6144, "step": 28152 }, { "epoch": 1.9075140592181041, "grad_norm": 5.810223579406738, "learning_rate": 6.248613868163461e-05, "loss": 0.6362, "step": 28153 }, { "epoch": 1.9075818144860763, "grad_norm": 4.914311408996582, "learning_rate": 6.24847696625368e-05, "loss": 0.6515, "step": 28154 }, { "epoch": 1.9076495697540485, "grad_norm": 3.768951177597046, "learning_rate": 6.248340064343899e-05, "loss": 0.6291, "step": 28155 }, { "epoch": 1.9077173250220205, "grad_norm": 5.338575839996338, "learning_rate": 6.248203162434117e-05, "loss": 0.6383, "step": 28156 }, { "epoch": 1.9077850802899925, "grad_norm": 8.074195861816406, "learning_rate": 6.248066260524335e-05, "loss": 0.6669, "step": 28157 }, { "epoch": 1.9078528355579647, "grad_norm": 4.642064094543457, "learning_rate": 6.247929358614553e-05, "loss": 0.7234, "step": 28158 }, { "epoch": 1.9079205908259367, "grad_norm": 5.436228275299072, "learning_rate": 6.247792456704772e-05, "loss": 0.6365, "step": 28159 }, { "epoch": 1.9079883460939087, "grad_norm": 5.487400531768799, "learning_rate": 6.24765555479499e-05, "loss": 0.6714, "step": 28160 }, { "epoch": 1.9080561013618809, "grad_norm": 5.965282917022705, "learning_rate": 6.247518652885208e-05, "loss": 0.6713, "step": 28161 }, { "epoch": 1.908123856629853, "grad_norm": 5.42965841293335, "learning_rate": 6.247381750975426e-05, "loss": 0.6756, "step": 28162 }, { "epoch": 1.908191611897825, "grad_norm": 8.41012191772461, "learning_rate": 6.247244849065645e-05, "loss": 0.5881, "step": 28163 }, { "epoch": 1.908259367165797, "grad_norm": 6.4519524574279785, "learning_rate": 6.247107947155864e-05, "loss": 0.5895, "step": 28164 }, { "epoch": 1.9083271224337692, "grad_norm": 4.816607475280762, "learning_rate": 6.246971045246082e-05, "loss": 0.7015, "step": 28165 }, { "epoch": 1.9083948777017414, "grad_norm": 5.888318061828613, "learning_rate": 6.2468341433363e-05, "loss": 0.6209, "step": 28166 }, { "epoch": 1.9084626329697134, "grad_norm": 4.57689094543457, "learning_rate": 6.246697241426518e-05, "loss": 0.6019, "step": 28167 }, { "epoch": 1.9085303882376854, "grad_norm": 3.429711103439331, "learning_rate": 6.246560339516736e-05, "loss": 0.4124, "step": 28168 }, { "epoch": 1.9085981435056576, "grad_norm": 6.379589080810547, "learning_rate": 6.246423437606955e-05, "loss": 0.7169, "step": 28169 }, { "epoch": 1.9086658987736298, "grad_norm": 4.7205986976623535, "learning_rate": 6.246286535697173e-05, "loss": 0.609, "step": 28170 }, { "epoch": 1.9087336540416018, "grad_norm": 5.55640172958374, "learning_rate": 6.246149633787391e-05, "loss": 0.5442, "step": 28171 }, { "epoch": 1.9088014093095738, "grad_norm": 5.387771129608154, "learning_rate": 6.24601273187761e-05, "loss": 0.6047, "step": 28172 }, { "epoch": 1.908869164577546, "grad_norm": 5.830379486083984, "learning_rate": 6.245875829967829e-05, "loss": 0.6185, "step": 28173 }, { "epoch": 1.908936919845518, "grad_norm": 11.014876365661621, "learning_rate": 6.245738928058047e-05, "loss": 0.8563, "step": 28174 }, { "epoch": 1.90900467511349, "grad_norm": 4.168486595153809, "learning_rate": 6.245602026148265e-05, "loss": 0.5609, "step": 28175 }, { "epoch": 1.9090724303814621, "grad_norm": 6.7226362228393555, "learning_rate": 6.245465124238483e-05, "loss": 0.9162, "step": 28176 }, { "epoch": 1.9091401856494343, "grad_norm": 12.65170669555664, "learning_rate": 6.245328222328701e-05, "loss": 0.5838, "step": 28177 }, { "epoch": 1.9092079409174063, "grad_norm": 6.556295394897461, "learning_rate": 6.24519132041892e-05, "loss": 0.7225, "step": 28178 }, { "epoch": 1.9092756961853783, "grad_norm": 6.1977643966674805, "learning_rate": 6.245054418509138e-05, "loss": 0.6378, "step": 28179 }, { "epoch": 1.9093434514533505, "grad_norm": 8.658784866333008, "learning_rate": 6.244917516599357e-05, "loss": 0.7056, "step": 28180 }, { "epoch": 1.9094112067213227, "grad_norm": 6.270622253417969, "learning_rate": 6.244780614689575e-05, "loss": 0.782, "step": 28181 }, { "epoch": 1.9094789619892947, "grad_norm": 4.534059524536133, "learning_rate": 6.244643712779794e-05, "loss": 0.7303, "step": 28182 }, { "epoch": 1.9095467172572667, "grad_norm": 6.620143890380859, "learning_rate": 6.244506810870012e-05, "loss": 0.7433, "step": 28183 }, { "epoch": 1.9096144725252389, "grad_norm": 4.130523681640625, "learning_rate": 6.24436990896023e-05, "loss": 0.495, "step": 28184 }, { "epoch": 1.909682227793211, "grad_norm": 7.303094863891602, "learning_rate": 6.244233007050448e-05, "loss": 0.5893, "step": 28185 }, { "epoch": 1.909749983061183, "grad_norm": 5.388350963592529, "learning_rate": 6.244096105140666e-05, "loss": 0.4662, "step": 28186 }, { "epoch": 1.909817738329155, "grad_norm": 6.202406883239746, "learning_rate": 6.243959203230885e-05, "loss": 0.6259, "step": 28187 }, { "epoch": 1.9098854935971272, "grad_norm": 6.634548187255859, "learning_rate": 6.243822301321103e-05, "loss": 0.7537, "step": 28188 }, { "epoch": 1.9099532488650992, "grad_norm": 5.020077705383301, "learning_rate": 6.243685399411322e-05, "loss": 0.8424, "step": 28189 }, { "epoch": 1.9100210041330712, "grad_norm": 7.1290812492370605, "learning_rate": 6.24354849750154e-05, "loss": 0.4857, "step": 28190 }, { "epoch": 1.9100887594010434, "grad_norm": 4.401777744293213, "learning_rate": 6.243411595591758e-05, "loss": 0.655, "step": 28191 }, { "epoch": 1.9101565146690156, "grad_norm": 5.375810146331787, "learning_rate": 6.243274693681977e-05, "loss": 0.6643, "step": 28192 }, { "epoch": 1.9102242699369876, "grad_norm": 6.33805513381958, "learning_rate": 6.243137791772195e-05, "loss": 0.4665, "step": 28193 }, { "epoch": 1.9102920252049596, "grad_norm": 6.352182388305664, "learning_rate": 6.243000889862413e-05, "loss": 0.6865, "step": 28194 }, { "epoch": 1.9103597804729318, "grad_norm": 7.041769027709961, "learning_rate": 6.242863987952632e-05, "loss": 0.7352, "step": 28195 }, { "epoch": 1.910427535740904, "grad_norm": 7.026673316955566, "learning_rate": 6.24272708604285e-05, "loss": 0.6431, "step": 28196 }, { "epoch": 1.910495291008876, "grad_norm": 6.72052526473999, "learning_rate": 6.242590184133069e-05, "loss": 0.6148, "step": 28197 }, { "epoch": 1.910563046276848, "grad_norm": 13.376420974731445, "learning_rate": 6.242453282223288e-05, "loss": 0.7559, "step": 28198 }, { "epoch": 1.9106308015448201, "grad_norm": 5.2088775634765625, "learning_rate": 6.242316380313506e-05, "loss": 0.704, "step": 28199 }, { "epoch": 1.9106985568127923, "grad_norm": 5.322445869445801, "learning_rate": 6.242179478403724e-05, "loss": 0.4905, "step": 28200 }, { "epoch": 1.9107663120807643, "grad_norm": 11.749404907226562, "learning_rate": 6.242042576493943e-05, "loss": 0.6918, "step": 28201 }, { "epoch": 1.9108340673487363, "grad_norm": 9.630813598632812, "learning_rate": 6.241905674584161e-05, "loss": 0.7982, "step": 28202 }, { "epoch": 1.9109018226167085, "grad_norm": 8.200209617614746, "learning_rate": 6.24176877267438e-05, "loss": 0.6587, "step": 28203 }, { "epoch": 1.9109695778846807, "grad_norm": 5.137231349945068, "learning_rate": 6.241631870764597e-05, "loss": 0.4075, "step": 28204 }, { "epoch": 1.9110373331526527, "grad_norm": 5.832844257354736, "learning_rate": 6.241494968854817e-05, "loss": 0.4991, "step": 28205 }, { "epoch": 1.9111050884206247, "grad_norm": 5.792083740234375, "learning_rate": 6.241358066945035e-05, "loss": 0.6265, "step": 28206 }, { "epoch": 1.9111728436885969, "grad_norm": 5.080399990081787, "learning_rate": 6.241221165035253e-05, "loss": 0.7927, "step": 28207 }, { "epoch": 1.9112405989565688, "grad_norm": 6.282182216644287, "learning_rate": 6.241084263125471e-05, "loss": 0.7125, "step": 28208 }, { "epoch": 1.9113083542245408, "grad_norm": 4.741801738739014, "learning_rate": 6.240947361215689e-05, "loss": 0.6016, "step": 28209 }, { "epoch": 1.911376109492513, "grad_norm": 5.554837226867676, "learning_rate": 6.240810459305908e-05, "loss": 0.7168, "step": 28210 }, { "epoch": 1.9114438647604852, "grad_norm": 8.11208438873291, "learning_rate": 6.240673557396126e-05, "loss": 0.6817, "step": 28211 }, { "epoch": 1.9115116200284572, "grad_norm": 7.08942985534668, "learning_rate": 6.240536655486344e-05, "loss": 0.7032, "step": 28212 }, { "epoch": 1.9115793752964292, "grad_norm": 6.4064226150512695, "learning_rate": 6.240399753576562e-05, "loss": 0.5456, "step": 28213 }, { "epoch": 1.9116471305644014, "grad_norm": 7.623965740203857, "learning_rate": 6.240262851666782e-05, "loss": 0.8369, "step": 28214 }, { "epoch": 1.9117148858323736, "grad_norm": 5.873419761657715, "learning_rate": 6.240125949757e-05, "loss": 0.5145, "step": 28215 }, { "epoch": 1.9117826411003456, "grad_norm": 4.047770023345947, "learning_rate": 6.239989047847218e-05, "loss": 0.5069, "step": 28216 }, { "epoch": 1.9118503963683176, "grad_norm": 5.599514484405518, "learning_rate": 6.239852145937436e-05, "loss": 0.817, "step": 28217 }, { "epoch": 1.9119181516362898, "grad_norm": 7.1214518547058105, "learning_rate": 6.239715244027654e-05, "loss": 0.719, "step": 28218 }, { "epoch": 1.911985906904262, "grad_norm": 5.470030307769775, "learning_rate": 6.239578342117873e-05, "loss": 0.5558, "step": 28219 }, { "epoch": 1.912053662172234, "grad_norm": 6.7009711265563965, "learning_rate": 6.239441440208091e-05, "loss": 0.6452, "step": 28220 }, { "epoch": 1.912121417440206, "grad_norm": 5.659205913543701, "learning_rate": 6.23930453829831e-05, "loss": 0.7549, "step": 28221 }, { "epoch": 1.9121891727081781, "grad_norm": 5.935034275054932, "learning_rate": 6.239167636388527e-05, "loss": 0.8765, "step": 28222 }, { "epoch": 1.91225692797615, "grad_norm": 5.545430660247803, "learning_rate": 6.239030734478746e-05, "loss": 0.8063, "step": 28223 }, { "epoch": 1.912324683244122, "grad_norm": 5.9013566970825195, "learning_rate": 6.238893832568965e-05, "loss": 0.5275, "step": 28224 }, { "epoch": 1.9123924385120943, "grad_norm": 5.254367351531982, "learning_rate": 6.238756930659183e-05, "loss": 0.7219, "step": 28225 }, { "epoch": 1.9124601937800665, "grad_norm": 4.6046366691589355, "learning_rate": 6.238620028749401e-05, "loss": 0.5669, "step": 28226 }, { "epoch": 1.9125279490480385, "grad_norm": 6.583508491516113, "learning_rate": 6.238483126839619e-05, "loss": 0.7312, "step": 28227 }, { "epoch": 1.9125957043160104, "grad_norm": 5.271289825439453, "learning_rate": 6.238346224929838e-05, "loss": 0.6552, "step": 28228 }, { "epoch": 1.9126634595839827, "grad_norm": 14.728133201599121, "learning_rate": 6.238209323020056e-05, "loss": 0.6443, "step": 28229 }, { "epoch": 1.9127312148519549, "grad_norm": 8.004919052124023, "learning_rate": 6.238072421110274e-05, "loss": 0.4892, "step": 28230 }, { "epoch": 1.9127989701199268, "grad_norm": 5.295616626739502, "learning_rate": 6.237935519200493e-05, "loss": 0.6773, "step": 28231 }, { "epoch": 1.9128667253878988, "grad_norm": 5.0610737800598145, "learning_rate": 6.23779861729071e-05, "loss": 0.9714, "step": 28232 }, { "epoch": 1.912934480655871, "grad_norm": 8.041497230529785, "learning_rate": 6.23766171538093e-05, "loss": 0.6912, "step": 28233 }, { "epoch": 1.9130022359238432, "grad_norm": 4.987978458404541, "learning_rate": 6.237524813471148e-05, "loss": 0.7912, "step": 28234 }, { "epoch": 1.9130699911918152, "grad_norm": 5.397056579589844, "learning_rate": 6.237387911561366e-05, "loss": 0.5901, "step": 28235 }, { "epoch": 1.9131377464597872, "grad_norm": 4.179184913635254, "learning_rate": 6.237251009651584e-05, "loss": 0.6988, "step": 28236 }, { "epoch": 1.9132055017277594, "grad_norm": 4.65203332901001, "learning_rate": 6.237114107741803e-05, "loss": 0.5195, "step": 28237 }, { "epoch": 1.9132732569957314, "grad_norm": 4.89298677444458, "learning_rate": 6.236977205832021e-05, "loss": 0.651, "step": 28238 }, { "epoch": 1.9133410122637033, "grad_norm": 5.547011852264404, "learning_rate": 6.23684030392224e-05, "loss": 0.503, "step": 28239 }, { "epoch": 1.9134087675316755, "grad_norm": 5.054087162017822, "learning_rate": 6.236703402012458e-05, "loss": 0.5927, "step": 28240 }, { "epoch": 1.9134765227996477, "grad_norm": 4.582016468048096, "learning_rate": 6.236566500102677e-05, "loss": 0.6774, "step": 28241 }, { "epoch": 1.9135442780676197, "grad_norm": 5.9870710372924805, "learning_rate": 6.236429598192895e-05, "loss": 0.5762, "step": 28242 }, { "epoch": 1.9136120333355917, "grad_norm": 6.593662738800049, "learning_rate": 6.236292696283113e-05, "loss": 0.7402, "step": 28243 }, { "epoch": 1.913679788603564, "grad_norm": 3.3660526275634766, "learning_rate": 6.236155794373332e-05, "loss": 0.4283, "step": 28244 }, { "epoch": 1.9137475438715361, "grad_norm": 6.092799186706543, "learning_rate": 6.23601889246355e-05, "loss": 0.6655, "step": 28245 }, { "epoch": 1.913815299139508, "grad_norm": 6.085809707641602, "learning_rate": 6.235881990553768e-05, "loss": 0.4621, "step": 28246 }, { "epoch": 1.91388305440748, "grad_norm": 4.65207052230835, "learning_rate": 6.235745088643988e-05, "loss": 0.5873, "step": 28247 }, { "epoch": 1.9139508096754523, "grad_norm": 5.324954032897949, "learning_rate": 6.235608186734206e-05, "loss": 0.5951, "step": 28248 }, { "epoch": 1.9140185649434245, "grad_norm": 5.763540267944336, "learning_rate": 6.235471284824424e-05, "loss": 0.5455, "step": 28249 }, { "epoch": 1.9140863202113965, "grad_norm": 6.7105841636657715, "learning_rate": 6.235334382914642e-05, "loss": 0.4951, "step": 28250 }, { "epoch": 1.9141540754793684, "grad_norm": 5.403067588806152, "learning_rate": 6.235197481004861e-05, "loss": 0.455, "step": 28251 }, { "epoch": 1.9142218307473406, "grad_norm": 5.926052093505859, "learning_rate": 6.23506057909508e-05, "loss": 0.6601, "step": 28252 }, { "epoch": 1.9142895860153128, "grad_norm": 7.537343978881836, "learning_rate": 6.234923677185297e-05, "loss": 0.8612, "step": 28253 }, { "epoch": 1.9143573412832848, "grad_norm": 5.686923027038574, "learning_rate": 6.234786775275515e-05, "loss": 1.0931, "step": 28254 }, { "epoch": 1.9144250965512568, "grad_norm": 8.6925630569458, "learning_rate": 6.234649873365733e-05, "loss": 0.6575, "step": 28255 }, { "epoch": 1.914492851819229, "grad_norm": 6.426208019256592, "learning_rate": 6.234512971455953e-05, "loss": 0.589, "step": 28256 }, { "epoch": 1.914560607087201, "grad_norm": 5.321257591247559, "learning_rate": 6.234376069546171e-05, "loss": 0.6472, "step": 28257 }, { "epoch": 1.914628362355173, "grad_norm": 5.475333213806152, "learning_rate": 6.234239167636389e-05, "loss": 0.5253, "step": 28258 }, { "epoch": 1.9146961176231452, "grad_norm": 7.373652935028076, "learning_rate": 6.234102265726607e-05, "loss": 0.6109, "step": 28259 }, { "epoch": 1.9147638728911174, "grad_norm": 5.130527019500732, "learning_rate": 6.233965363816826e-05, "loss": 0.6292, "step": 28260 }, { "epoch": 1.9148316281590894, "grad_norm": 7.459865093231201, "learning_rate": 6.233828461907044e-05, "loss": 0.6137, "step": 28261 }, { "epoch": 1.9148993834270613, "grad_norm": 6.567488670349121, "learning_rate": 6.233691559997262e-05, "loss": 0.6082, "step": 28262 }, { "epoch": 1.9149671386950335, "grad_norm": 5.790989398956299, "learning_rate": 6.23355465808748e-05, "loss": 0.4918, "step": 28263 }, { "epoch": 1.9150348939630057, "grad_norm": 6.416210174560547, "learning_rate": 6.233417756177698e-05, "loss": 0.7774, "step": 28264 }, { "epoch": 1.9151026492309777, "grad_norm": 5.22403621673584, "learning_rate": 6.233280854267918e-05, "loss": 0.6461, "step": 28265 }, { "epoch": 1.9151704044989497, "grad_norm": 4.517313003540039, "learning_rate": 6.233143952358136e-05, "loss": 0.6127, "step": 28266 }, { "epoch": 1.915238159766922, "grad_norm": 7.204422950744629, "learning_rate": 6.233007050448354e-05, "loss": 0.6057, "step": 28267 }, { "epoch": 1.9153059150348941, "grad_norm": 8.016009330749512, "learning_rate": 6.232870148538572e-05, "loss": 0.8231, "step": 28268 }, { "epoch": 1.915373670302866, "grad_norm": 6.618776798248291, "learning_rate": 6.232733246628791e-05, "loss": 0.6906, "step": 28269 }, { "epoch": 1.915441425570838, "grad_norm": 5.752885341644287, "learning_rate": 6.23259634471901e-05, "loss": 0.5331, "step": 28270 }, { "epoch": 1.9155091808388103, "grad_norm": 4.6555986404418945, "learning_rate": 6.232459442809227e-05, "loss": 0.6287, "step": 28271 }, { "epoch": 1.9155769361067823, "grad_norm": 4.94518518447876, "learning_rate": 6.232322540899445e-05, "loss": 0.6393, "step": 28272 }, { "epoch": 1.9156446913747542, "grad_norm": 7.24334716796875, "learning_rate": 6.232185638989663e-05, "loss": 0.7715, "step": 28273 }, { "epoch": 1.9157124466427264, "grad_norm": 6.514208793640137, "learning_rate": 6.232048737079883e-05, "loss": 0.835, "step": 28274 }, { "epoch": 1.9157802019106986, "grad_norm": 6.295428276062012, "learning_rate": 6.231911835170101e-05, "loss": 0.7749, "step": 28275 }, { "epoch": 1.9158479571786706, "grad_norm": 4.065423965454102, "learning_rate": 6.231774933260319e-05, "loss": 0.712, "step": 28276 }, { "epoch": 1.9159157124466426, "grad_norm": 6.354039192199707, "learning_rate": 6.231638031350537e-05, "loss": 0.7686, "step": 28277 }, { "epoch": 1.9159834677146148, "grad_norm": 10.755304336547852, "learning_rate": 6.231501129440755e-05, "loss": 0.6289, "step": 28278 }, { "epoch": 1.916051222982587, "grad_norm": 4.785209655761719, "learning_rate": 6.231364227530974e-05, "loss": 0.58, "step": 28279 }, { "epoch": 1.916118978250559, "grad_norm": 6.699982643127441, "learning_rate": 6.231227325621192e-05, "loss": 0.6559, "step": 28280 }, { "epoch": 1.916186733518531, "grad_norm": 7.671614646911621, "learning_rate": 6.23109042371141e-05, "loss": 0.6241, "step": 28281 }, { "epoch": 1.9162544887865032, "grad_norm": 5.376356601715088, "learning_rate": 6.230953521801629e-05, "loss": 0.4436, "step": 28282 }, { "epoch": 1.9163222440544754, "grad_norm": 5.6847405433654785, "learning_rate": 6.230816619891848e-05, "loss": 0.6701, "step": 28283 }, { "epoch": 1.9163899993224474, "grad_norm": 7.662397384643555, "learning_rate": 6.230679717982066e-05, "loss": 0.592, "step": 28284 }, { "epoch": 1.9164577545904193, "grad_norm": 4.956202030181885, "learning_rate": 6.230542816072284e-05, "loss": 0.7348, "step": 28285 }, { "epoch": 1.9165255098583915, "grad_norm": 6.086929798126221, "learning_rate": 6.230405914162502e-05, "loss": 0.5697, "step": 28286 }, { "epoch": 1.9165932651263635, "grad_norm": 4.660053730010986, "learning_rate": 6.230269012252721e-05, "loss": 0.6163, "step": 28287 }, { "epoch": 1.9166610203943355, "grad_norm": 6.749231815338135, "learning_rate": 6.23013211034294e-05, "loss": 0.6305, "step": 28288 }, { "epoch": 1.9167287756623077, "grad_norm": 4.755308151245117, "learning_rate": 6.229995208433157e-05, "loss": 0.502, "step": 28289 }, { "epoch": 1.91679653093028, "grad_norm": 6.753567695617676, "learning_rate": 6.229858306523377e-05, "loss": 0.5583, "step": 28290 }, { "epoch": 1.9168642861982519, "grad_norm": 8.098618507385254, "learning_rate": 6.229721404613595e-05, "loss": 0.592, "step": 28291 }, { "epoch": 1.9169320414662239, "grad_norm": 5.910730838775635, "learning_rate": 6.229584502703813e-05, "loss": 0.6777, "step": 28292 }, { "epoch": 1.916999796734196, "grad_norm": 4.5628204345703125, "learning_rate": 6.229447600794032e-05, "loss": 0.6705, "step": 28293 }, { "epoch": 1.9170675520021683, "grad_norm": 7.88828706741333, "learning_rate": 6.22931069888425e-05, "loss": 0.6232, "step": 28294 }, { "epoch": 1.9171353072701403, "grad_norm": 7.131038665771484, "learning_rate": 6.229173796974468e-05, "loss": 0.6202, "step": 28295 }, { "epoch": 1.9172030625381122, "grad_norm": 6.609604358673096, "learning_rate": 6.229036895064686e-05, "loss": 0.6097, "step": 28296 }, { "epoch": 1.9172708178060844, "grad_norm": 7.910763263702393, "learning_rate": 6.228899993154906e-05, "loss": 0.7789, "step": 28297 }, { "epoch": 1.9173385730740566, "grad_norm": 6.339363098144531, "learning_rate": 6.228763091245124e-05, "loss": 0.4745, "step": 28298 }, { "epoch": 1.9174063283420286, "grad_norm": 5.2845048904418945, "learning_rate": 6.228626189335342e-05, "loss": 0.6373, "step": 28299 }, { "epoch": 1.9174740836100006, "grad_norm": 5.222335338592529, "learning_rate": 6.22848928742556e-05, "loss": 0.6928, "step": 28300 }, { "epoch": 1.9175418388779728, "grad_norm": 6.8832621574401855, "learning_rate": 6.228352385515778e-05, "loss": 0.5725, "step": 28301 }, { "epoch": 1.917609594145945, "grad_norm": 5.24762487411499, "learning_rate": 6.228215483605997e-05, "loss": 0.5172, "step": 28302 }, { "epoch": 1.917677349413917, "grad_norm": 6.291989803314209, "learning_rate": 6.228078581696215e-05, "loss": 0.6913, "step": 28303 }, { "epoch": 1.917745104681889, "grad_norm": 7.523410797119141, "learning_rate": 6.227941679786433e-05, "loss": 0.561, "step": 28304 }, { "epoch": 1.9178128599498612, "grad_norm": 6.572514057159424, "learning_rate": 6.227804777876651e-05, "loss": 0.5474, "step": 28305 }, { "epoch": 1.9178806152178332, "grad_norm": 4.8998894691467285, "learning_rate": 6.227667875966871e-05, "loss": 0.7243, "step": 28306 }, { "epoch": 1.9179483704858051, "grad_norm": 7.965509414672852, "learning_rate": 6.227530974057089e-05, "loss": 0.7155, "step": 28307 }, { "epoch": 1.9180161257537773, "grad_norm": 4.83542013168335, "learning_rate": 6.227394072147307e-05, "loss": 0.7703, "step": 28308 }, { "epoch": 1.9180838810217495, "grad_norm": 4.789363384246826, "learning_rate": 6.227257170237525e-05, "loss": 0.4702, "step": 28309 }, { "epoch": 1.9181516362897215, "grad_norm": 4.59388542175293, "learning_rate": 6.227120268327743e-05, "loss": 0.5372, "step": 28310 }, { "epoch": 1.9182193915576935, "grad_norm": 6.926321506500244, "learning_rate": 6.226983366417962e-05, "loss": 0.6555, "step": 28311 }, { "epoch": 1.9182871468256657, "grad_norm": 6.62013053894043, "learning_rate": 6.22684646450818e-05, "loss": 0.6109, "step": 28312 }, { "epoch": 1.918354902093638, "grad_norm": 5.811066150665283, "learning_rate": 6.226709562598398e-05, "loss": 0.7588, "step": 28313 }, { "epoch": 1.9184226573616099, "grad_norm": 4.151942729949951, "learning_rate": 6.226572660688616e-05, "loss": 0.5746, "step": 28314 }, { "epoch": 1.9184904126295819, "grad_norm": 5.185586452484131, "learning_rate": 6.226435758778836e-05, "loss": 0.5726, "step": 28315 }, { "epoch": 1.918558167897554, "grad_norm": 5.529595851898193, "learning_rate": 6.226298856869054e-05, "loss": 0.866, "step": 28316 }, { "epoch": 1.9186259231655263, "grad_norm": 5.73392915725708, "learning_rate": 6.226161954959272e-05, "loss": 0.5977, "step": 28317 }, { "epoch": 1.9186936784334983, "grad_norm": 6.077703952789307, "learning_rate": 6.22602505304949e-05, "loss": 0.68, "step": 28318 }, { "epoch": 1.9187614337014702, "grad_norm": 4.9379167556762695, "learning_rate": 6.225888151139708e-05, "loss": 0.5862, "step": 28319 }, { "epoch": 1.9188291889694424, "grad_norm": 6.101070404052734, "learning_rate": 6.225751249229927e-05, "loss": 0.6215, "step": 28320 }, { "epoch": 1.9188969442374144, "grad_norm": 4.113830089569092, "learning_rate": 6.225614347320145e-05, "loss": 0.6458, "step": 28321 }, { "epoch": 1.9189646995053864, "grad_norm": 7.528818130493164, "learning_rate": 6.225477445410363e-05, "loss": 0.7651, "step": 28322 }, { "epoch": 1.9190324547733586, "grad_norm": 7.304696083068848, "learning_rate": 6.225340543500581e-05, "loss": 0.6152, "step": 28323 }, { "epoch": 1.9191002100413308, "grad_norm": 5.629587173461914, "learning_rate": 6.2252036415908e-05, "loss": 0.6281, "step": 28324 }, { "epoch": 1.9191679653093028, "grad_norm": 8.370487213134766, "learning_rate": 6.225066739681019e-05, "loss": 0.6463, "step": 28325 }, { "epoch": 1.9192357205772748, "grad_norm": 7.787399768829346, "learning_rate": 6.224929837771237e-05, "loss": 0.5466, "step": 28326 }, { "epoch": 1.919303475845247, "grad_norm": 6.431150436401367, "learning_rate": 6.224792935861455e-05, "loss": 0.6754, "step": 28327 }, { "epoch": 1.9193712311132192, "grad_norm": 12.079461097717285, "learning_rate": 6.224656033951673e-05, "loss": 0.5908, "step": 28328 }, { "epoch": 1.9194389863811911, "grad_norm": 6.183928489685059, "learning_rate": 6.224519132041892e-05, "loss": 0.5821, "step": 28329 }, { "epoch": 1.9195067416491631, "grad_norm": 5.663132190704346, "learning_rate": 6.22438223013211e-05, "loss": 0.6368, "step": 28330 }, { "epoch": 1.9195744969171353, "grad_norm": 4.822798252105713, "learning_rate": 6.224245328222328e-05, "loss": 0.811, "step": 28331 }, { "epoch": 1.9196422521851075, "grad_norm": 6.43221378326416, "learning_rate": 6.224108426312546e-05, "loss": 0.764, "step": 28332 }, { "epoch": 1.9197100074530795, "grad_norm": 4.907687187194824, "learning_rate": 6.223971524402766e-05, "loss": 0.6305, "step": 28333 }, { "epoch": 1.9197777627210515, "grad_norm": 5.310521602630615, "learning_rate": 6.223834622492984e-05, "loss": 0.6364, "step": 28334 }, { "epoch": 1.9198455179890237, "grad_norm": 6.2660298347473145, "learning_rate": 6.223697720583202e-05, "loss": 0.5639, "step": 28335 }, { "epoch": 1.9199132732569957, "grad_norm": 5.1752190589904785, "learning_rate": 6.223560818673421e-05, "loss": 0.6361, "step": 28336 }, { "epoch": 1.9199810285249677, "grad_norm": 3.833684206008911, "learning_rate": 6.22342391676364e-05, "loss": 0.4145, "step": 28337 }, { "epoch": 1.9200487837929399, "grad_norm": 5.313507556915283, "learning_rate": 6.223287014853857e-05, "loss": 0.7789, "step": 28338 }, { "epoch": 1.920116539060912, "grad_norm": 6.328597068786621, "learning_rate": 6.223150112944077e-05, "loss": 0.666, "step": 28339 }, { "epoch": 1.920184294328884, "grad_norm": 6.812235355377197, "learning_rate": 6.223013211034295e-05, "loss": 0.7071, "step": 28340 }, { "epoch": 1.920252049596856, "grad_norm": 5.5278639793396, "learning_rate": 6.222876309124513e-05, "loss": 0.6003, "step": 28341 }, { "epoch": 1.9203198048648282, "grad_norm": 5.520150184631348, "learning_rate": 6.222739407214731e-05, "loss": 0.6558, "step": 28342 }, { "epoch": 1.9203875601328004, "grad_norm": 4.875969409942627, "learning_rate": 6.22260250530495e-05, "loss": 0.6381, "step": 28343 }, { "epoch": 1.9204553154007724, "grad_norm": 4.728352069854736, "learning_rate": 6.222465603395168e-05, "loss": 0.5331, "step": 28344 }, { "epoch": 1.9205230706687444, "grad_norm": 6.876324653625488, "learning_rate": 6.222328701485386e-05, "loss": 0.8128, "step": 28345 }, { "epoch": 1.9205908259367166, "grad_norm": 7.1404709815979, "learning_rate": 6.222191799575604e-05, "loss": 0.5839, "step": 28346 }, { "epoch": 1.9206585812046888, "grad_norm": 5.399140357971191, "learning_rate": 6.222054897665824e-05, "loss": 0.7274, "step": 28347 }, { "epoch": 1.9207263364726608, "grad_norm": 5.289692401885986, "learning_rate": 6.221917995756042e-05, "loss": 0.6635, "step": 28348 }, { "epoch": 1.9207940917406328, "grad_norm": 5.1045756340026855, "learning_rate": 6.22178109384626e-05, "loss": 0.6614, "step": 28349 }, { "epoch": 1.920861847008605, "grad_norm": 9.461718559265137, "learning_rate": 6.221644191936478e-05, "loss": 0.5649, "step": 28350 }, { "epoch": 1.9209296022765772, "grad_norm": 6.942845821380615, "learning_rate": 6.221507290026696e-05, "loss": 0.4936, "step": 28351 }, { "epoch": 1.920997357544549, "grad_norm": 6.64108419418335, "learning_rate": 6.221370388116915e-05, "loss": 0.6832, "step": 28352 }, { "epoch": 1.9210651128125211, "grad_norm": 7.081848621368408, "learning_rate": 6.221233486207133e-05, "loss": 0.7264, "step": 28353 }, { "epoch": 1.9211328680804933, "grad_norm": 3.846511125564575, "learning_rate": 6.221096584297351e-05, "loss": 0.6487, "step": 28354 }, { "epoch": 1.9212006233484653, "grad_norm": 5.351787090301514, "learning_rate": 6.22095968238757e-05, "loss": 0.6807, "step": 28355 }, { "epoch": 1.9212683786164373, "grad_norm": 6.555635452270508, "learning_rate": 6.220822780477787e-05, "loss": 0.4994, "step": 28356 }, { "epoch": 1.9213361338844095, "grad_norm": 6.432476997375488, "learning_rate": 6.220685878568007e-05, "loss": 0.7129, "step": 28357 }, { "epoch": 1.9214038891523817, "grad_norm": 5.652860641479492, "learning_rate": 6.220548976658225e-05, "loss": 0.62, "step": 28358 }, { "epoch": 1.9214716444203537, "grad_norm": 9.564810752868652, "learning_rate": 6.220412074748443e-05, "loss": 0.7323, "step": 28359 }, { "epoch": 1.9215393996883257, "grad_norm": 8.773537635803223, "learning_rate": 6.220275172838661e-05, "loss": 0.7746, "step": 28360 }, { "epoch": 1.9216071549562979, "grad_norm": 5.790295600891113, "learning_rate": 6.22013827092888e-05, "loss": 0.6731, "step": 28361 }, { "epoch": 1.92167491022427, "grad_norm": 5.169703483581543, "learning_rate": 6.220001369019098e-05, "loss": 0.6529, "step": 28362 }, { "epoch": 1.921742665492242, "grad_norm": 8.459930419921875, "learning_rate": 6.219864467109316e-05, "loss": 0.6874, "step": 28363 }, { "epoch": 1.921810420760214, "grad_norm": 5.379633903503418, "learning_rate": 6.219727565199534e-05, "loss": 0.8499, "step": 28364 }, { "epoch": 1.9218781760281862, "grad_norm": 4.733797073364258, "learning_rate": 6.219590663289752e-05, "loss": 0.5795, "step": 28365 }, { "epoch": 1.9219459312961584, "grad_norm": 7.72140645980835, "learning_rate": 6.219453761379972e-05, "loss": 0.736, "step": 28366 }, { "epoch": 1.9220136865641304, "grad_norm": 4.898532390594482, "learning_rate": 6.21931685947019e-05, "loss": 0.636, "step": 28367 }, { "epoch": 1.9220814418321024, "grad_norm": 8.962363243103027, "learning_rate": 6.219179957560408e-05, "loss": 0.5774, "step": 28368 }, { "epoch": 1.9221491971000746, "grad_norm": 8.444782257080078, "learning_rate": 6.219043055650626e-05, "loss": 0.6625, "step": 28369 }, { "epoch": 1.9222169523680466, "grad_norm": 7.254874229431152, "learning_rate": 6.218906153740845e-05, "loss": 0.6205, "step": 28370 }, { "epoch": 1.9222847076360186, "grad_norm": 4.62468147277832, "learning_rate": 6.218769251831063e-05, "loss": 0.744, "step": 28371 }, { "epoch": 1.9223524629039908, "grad_norm": 4.281252861022949, "learning_rate": 6.218632349921281e-05, "loss": 0.6246, "step": 28372 }, { "epoch": 1.922420218171963, "grad_norm": 7.967668533325195, "learning_rate": 6.2184954480115e-05, "loss": 0.5969, "step": 28373 }, { "epoch": 1.922487973439935, "grad_norm": 5.637409210205078, "learning_rate": 6.218358546101717e-05, "loss": 0.566, "step": 28374 }, { "epoch": 1.922555728707907, "grad_norm": 5.185787677764893, "learning_rate": 6.218221644191937e-05, "loss": 0.6512, "step": 28375 }, { "epoch": 1.9226234839758791, "grad_norm": 5.771216869354248, "learning_rate": 6.218084742282155e-05, "loss": 0.8431, "step": 28376 }, { "epoch": 1.9226912392438513, "grad_norm": 6.910263538360596, "learning_rate": 6.217947840372373e-05, "loss": 0.4434, "step": 28377 }, { "epoch": 1.9227589945118233, "grad_norm": 7.867201805114746, "learning_rate": 6.217810938462591e-05, "loss": 0.9529, "step": 28378 }, { "epoch": 1.9228267497797953, "grad_norm": 5.661318778991699, "learning_rate": 6.21767403655281e-05, "loss": 0.746, "step": 28379 }, { "epoch": 1.9228945050477675, "grad_norm": 4.958561897277832, "learning_rate": 6.217537134643028e-05, "loss": 0.5047, "step": 28380 }, { "epoch": 1.9229622603157397, "grad_norm": 9.883639335632324, "learning_rate": 6.217400232733246e-05, "loss": 0.6312, "step": 28381 }, { "epoch": 1.9230300155837117, "grad_norm": 6.269084453582764, "learning_rate": 6.217263330823466e-05, "loss": 0.5894, "step": 28382 }, { "epoch": 1.9230977708516837, "grad_norm": 6.202548980712891, "learning_rate": 6.217126428913684e-05, "loss": 0.5531, "step": 28383 }, { "epoch": 1.9231655261196559, "grad_norm": 5.310699462890625, "learning_rate": 6.216989527003902e-05, "loss": 0.7384, "step": 28384 }, { "epoch": 1.9232332813876278, "grad_norm": 3.876499891281128, "learning_rate": 6.216852625094121e-05, "loss": 0.5332, "step": 28385 }, { "epoch": 1.9233010366555998, "grad_norm": 5.699923038482666, "learning_rate": 6.216715723184339e-05, "loss": 0.6171, "step": 28386 }, { "epoch": 1.923368791923572, "grad_norm": 6.497870445251465, "learning_rate": 6.216578821274557e-05, "loss": 0.7984, "step": 28387 }, { "epoch": 1.9234365471915442, "grad_norm": 5.313953399658203, "learning_rate": 6.216441919364775e-05, "loss": 0.4984, "step": 28388 }, { "epoch": 1.9235043024595162, "grad_norm": 5.88501501083374, "learning_rate": 6.216305017454995e-05, "loss": 0.6689, "step": 28389 }, { "epoch": 1.9235720577274882, "grad_norm": 4.937823295593262, "learning_rate": 6.216168115545213e-05, "loss": 0.762, "step": 28390 }, { "epoch": 1.9236398129954604, "grad_norm": 4.382574081420898, "learning_rate": 6.216031213635431e-05, "loss": 0.8321, "step": 28391 }, { "epoch": 1.9237075682634326, "grad_norm": 6.748155117034912, "learning_rate": 6.215894311725649e-05, "loss": 0.6799, "step": 28392 }, { "epoch": 1.9237753235314046, "grad_norm": 4.244483470916748, "learning_rate": 6.215757409815868e-05, "loss": 0.6766, "step": 28393 }, { "epoch": 1.9238430787993765, "grad_norm": 4.990312576293945, "learning_rate": 6.215620507906086e-05, "loss": 0.5099, "step": 28394 }, { "epoch": 1.9239108340673488, "grad_norm": 5.214026927947998, "learning_rate": 6.215483605996304e-05, "loss": 0.7437, "step": 28395 }, { "epoch": 1.923978589335321, "grad_norm": 4.402276515960693, "learning_rate": 6.215346704086522e-05, "loss": 0.7217, "step": 28396 }, { "epoch": 1.924046344603293, "grad_norm": 5.443220138549805, "learning_rate": 6.21520980217674e-05, "loss": 0.6559, "step": 28397 }, { "epoch": 1.924114099871265, "grad_norm": 5.6022419929504395, "learning_rate": 6.21507290026696e-05, "loss": 0.7986, "step": 28398 }, { "epoch": 1.9241818551392371, "grad_norm": 5.732864856719971, "learning_rate": 6.214935998357178e-05, "loss": 0.5843, "step": 28399 }, { "epoch": 1.9242496104072093, "grad_norm": 6.277917861938477, "learning_rate": 6.214799096447396e-05, "loss": 0.7361, "step": 28400 }, { "epoch": 1.924317365675181, "grad_norm": 6.9203200340271, "learning_rate": 6.214662194537614e-05, "loss": 0.5985, "step": 28401 }, { "epoch": 1.9243851209431533, "grad_norm": 11.026823043823242, "learning_rate": 6.214525292627833e-05, "loss": 0.417, "step": 28402 }, { "epoch": 1.9244528762111255, "grad_norm": 5.364033222198486, "learning_rate": 6.214388390718051e-05, "loss": 0.6262, "step": 28403 }, { "epoch": 1.9245206314790975, "grad_norm": 6.0806121826171875, "learning_rate": 6.214251488808269e-05, "loss": 0.6453, "step": 28404 }, { "epoch": 1.9245883867470694, "grad_norm": 5.099963188171387, "learning_rate": 6.214114586898487e-05, "loss": 0.689, "step": 28405 }, { "epoch": 1.9246561420150416, "grad_norm": 6.504395008087158, "learning_rate": 6.213977684988705e-05, "loss": 0.4991, "step": 28406 }, { "epoch": 1.9247238972830139, "grad_norm": 5.507768154144287, "learning_rate": 6.213840783078925e-05, "loss": 0.5854, "step": 28407 }, { "epoch": 1.9247916525509858, "grad_norm": 4.35896635055542, "learning_rate": 6.213703881169143e-05, "loss": 0.49, "step": 28408 }, { "epoch": 1.9248594078189578, "grad_norm": 6.360691070556641, "learning_rate": 6.213566979259361e-05, "loss": 0.5208, "step": 28409 }, { "epoch": 1.92492716308693, "grad_norm": 6.072938919067383, "learning_rate": 6.213430077349579e-05, "loss": 0.6175, "step": 28410 }, { "epoch": 1.9249949183549022, "grad_norm": 5.432000160217285, "learning_rate": 6.213293175439797e-05, "loss": 0.5428, "step": 28411 }, { "epoch": 1.9250626736228742, "grad_norm": 8.281515121459961, "learning_rate": 6.213156273530016e-05, "loss": 0.5802, "step": 28412 }, { "epoch": 1.9251304288908462, "grad_norm": 5.940629959106445, "learning_rate": 6.213019371620234e-05, "loss": 0.7134, "step": 28413 }, { "epoch": 1.9251981841588184, "grad_norm": 5.551223278045654, "learning_rate": 6.212882469710452e-05, "loss": 0.5487, "step": 28414 }, { "epoch": 1.9252659394267906, "grad_norm": 7.231472015380859, "learning_rate": 6.21274556780067e-05, "loss": 0.5203, "step": 28415 }, { "epoch": 1.9253336946947626, "grad_norm": 8.01622200012207, "learning_rate": 6.21260866589089e-05, "loss": 0.5683, "step": 28416 }, { "epoch": 1.9254014499627345, "grad_norm": 6.751226902008057, "learning_rate": 6.212471763981108e-05, "loss": 0.6902, "step": 28417 }, { "epoch": 1.9254692052307067, "grad_norm": 4.8643646240234375, "learning_rate": 6.212334862071326e-05, "loss": 0.5182, "step": 28418 }, { "epoch": 1.9255369604986787, "grad_norm": 5.386687278747559, "learning_rate": 6.212197960161544e-05, "loss": 0.6624, "step": 28419 }, { "epoch": 1.9256047157666507, "grad_norm": 4.824132442474365, "learning_rate": 6.212061058251762e-05, "loss": 0.6084, "step": 28420 }, { "epoch": 1.925672471034623, "grad_norm": 7.017910480499268, "learning_rate": 6.211924156341981e-05, "loss": 0.7911, "step": 28421 }, { "epoch": 1.9257402263025951, "grad_norm": 11.070582389831543, "learning_rate": 6.2117872544322e-05, "loss": 0.7902, "step": 28422 }, { "epoch": 1.925807981570567, "grad_norm": 11.75324535369873, "learning_rate": 6.211650352522417e-05, "loss": 0.6171, "step": 28423 }, { "epoch": 1.925875736838539, "grad_norm": 9.73751163482666, "learning_rate": 6.211513450612635e-05, "loss": 0.7562, "step": 28424 }, { "epoch": 1.9259434921065113, "grad_norm": 7.741608142852783, "learning_rate": 6.211376548702855e-05, "loss": 0.7265, "step": 28425 }, { "epoch": 1.9260112473744835, "grad_norm": 6.757630348205566, "learning_rate": 6.211239646793073e-05, "loss": 0.8659, "step": 28426 }, { "epoch": 1.9260790026424555, "grad_norm": 6.108337879180908, "learning_rate": 6.211102744883291e-05, "loss": 0.5452, "step": 28427 }, { "epoch": 1.9261467579104274, "grad_norm": 6.750048637390137, "learning_rate": 6.210965842973509e-05, "loss": 0.6647, "step": 28428 }, { "epoch": 1.9262145131783996, "grad_norm": 6.668140888214111, "learning_rate": 6.210828941063728e-05, "loss": 0.9092, "step": 28429 }, { "epoch": 1.9262822684463718, "grad_norm": 5.9531989097595215, "learning_rate": 6.210692039153946e-05, "loss": 0.7256, "step": 28430 }, { "epoch": 1.9263500237143438, "grad_norm": 9.292762756347656, "learning_rate": 6.210555137244166e-05, "loss": 0.8445, "step": 28431 }, { "epoch": 1.9264177789823158, "grad_norm": 8.323636054992676, "learning_rate": 6.210418235334384e-05, "loss": 0.706, "step": 28432 }, { "epoch": 1.926485534250288, "grad_norm": 5.794934272766113, "learning_rate": 6.210281333424602e-05, "loss": 0.5797, "step": 28433 }, { "epoch": 1.92655328951826, "grad_norm": 8.127436637878418, "learning_rate": 6.21014443151482e-05, "loss": 0.8369, "step": 28434 }, { "epoch": 1.926621044786232, "grad_norm": 4.921231746673584, "learning_rate": 6.210007529605039e-05, "loss": 0.4797, "step": 28435 }, { "epoch": 1.9266888000542042, "grad_norm": 5.086030960083008, "learning_rate": 6.209870627695257e-05, "loss": 0.5558, "step": 28436 }, { "epoch": 1.9267565553221764, "grad_norm": 7.503446102142334, "learning_rate": 6.209733725785475e-05, "loss": 0.7163, "step": 28437 }, { "epoch": 1.9268243105901484, "grad_norm": 5.754150390625, "learning_rate": 6.209596823875693e-05, "loss": 0.721, "step": 28438 }, { "epoch": 1.9268920658581203, "grad_norm": 6.245065689086914, "learning_rate": 6.209459921965913e-05, "loss": 0.95, "step": 28439 }, { "epoch": 1.9269598211260925, "grad_norm": 5.816915988922119, "learning_rate": 6.209323020056131e-05, "loss": 0.717, "step": 28440 }, { "epoch": 1.9270275763940647, "grad_norm": 4.763542652130127, "learning_rate": 6.209186118146349e-05, "loss": 0.5979, "step": 28441 }, { "epoch": 1.9270953316620367, "grad_norm": 5.390736103057861, "learning_rate": 6.209049216236567e-05, "loss": 0.6346, "step": 28442 }, { "epoch": 1.9271630869300087, "grad_norm": 6.556180000305176, "learning_rate": 6.208912314326785e-05, "loss": 0.4704, "step": 28443 }, { "epoch": 1.927230842197981, "grad_norm": 6.33700704574585, "learning_rate": 6.208775412417004e-05, "loss": 0.5692, "step": 28444 }, { "epoch": 1.9272985974659531, "grad_norm": 4.112368583679199, "learning_rate": 6.208638510507222e-05, "loss": 0.5593, "step": 28445 }, { "epoch": 1.927366352733925, "grad_norm": 4.96721887588501, "learning_rate": 6.20850160859744e-05, "loss": 0.7625, "step": 28446 }, { "epoch": 1.927434108001897, "grad_norm": 6.48582649230957, "learning_rate": 6.208364706687658e-05, "loss": 0.5426, "step": 28447 }, { "epoch": 1.9275018632698693, "grad_norm": 5.078929901123047, "learning_rate": 6.208227804777878e-05, "loss": 0.7287, "step": 28448 }, { "epoch": 1.9275696185378415, "grad_norm": 5.574972629547119, "learning_rate": 6.208090902868096e-05, "loss": 0.7802, "step": 28449 }, { "epoch": 1.9276373738058132, "grad_norm": 5.754945278167725, "learning_rate": 6.207954000958314e-05, "loss": 0.7155, "step": 28450 }, { "epoch": 1.9277051290737854, "grad_norm": 7.50304651260376, "learning_rate": 6.207817099048532e-05, "loss": 0.7906, "step": 28451 }, { "epoch": 1.9277728843417576, "grad_norm": 5.510854721069336, "learning_rate": 6.20768019713875e-05, "loss": 0.6915, "step": 28452 }, { "epoch": 1.9278406396097296, "grad_norm": 6.960733413696289, "learning_rate": 6.207543295228969e-05, "loss": 0.7342, "step": 28453 }, { "epoch": 1.9279083948777016, "grad_norm": 4.546530246734619, "learning_rate": 6.207406393319187e-05, "loss": 0.534, "step": 28454 }, { "epoch": 1.9279761501456738, "grad_norm": 6.371224880218506, "learning_rate": 6.207269491409405e-05, "loss": 0.6904, "step": 28455 }, { "epoch": 1.928043905413646, "grad_norm": 5.584114074707031, "learning_rate": 6.207132589499623e-05, "loss": 0.6313, "step": 28456 }, { "epoch": 1.928111660681618, "grad_norm": 5.565365314483643, "learning_rate": 6.206995687589841e-05, "loss": 0.5504, "step": 28457 }, { "epoch": 1.92817941594959, "grad_norm": 7.050447940826416, "learning_rate": 6.206858785680061e-05, "loss": 0.8484, "step": 28458 }, { "epoch": 1.9282471712175622, "grad_norm": 6.632272720336914, "learning_rate": 6.206721883770279e-05, "loss": 0.5142, "step": 28459 }, { "epoch": 1.9283149264855344, "grad_norm": 6.201747417449951, "learning_rate": 6.206584981860497e-05, "loss": 0.6743, "step": 28460 }, { "epoch": 1.9283826817535064, "grad_norm": 6.616166114807129, "learning_rate": 6.206448079950715e-05, "loss": 0.5409, "step": 28461 }, { "epoch": 1.9284504370214783, "grad_norm": 5.979218482971191, "learning_rate": 6.206311178040934e-05, "loss": 0.6618, "step": 28462 }, { "epoch": 1.9285181922894505, "grad_norm": 5.146771430969238, "learning_rate": 6.206174276131152e-05, "loss": 0.7307, "step": 28463 }, { "epoch": 1.9285859475574227, "grad_norm": 6.374832630157471, "learning_rate": 6.20603737422137e-05, "loss": 0.6169, "step": 28464 }, { "epoch": 1.9286537028253947, "grad_norm": 5.30745267868042, "learning_rate": 6.205900472311588e-05, "loss": 0.5528, "step": 28465 }, { "epoch": 1.9287214580933667, "grad_norm": 5.535842418670654, "learning_rate": 6.205763570401806e-05, "loss": 0.5735, "step": 28466 }, { "epoch": 1.928789213361339, "grad_norm": 4.684143543243408, "learning_rate": 6.205626668492026e-05, "loss": 0.5714, "step": 28467 }, { "epoch": 1.9288569686293109, "grad_norm": 6.368638038635254, "learning_rate": 6.205489766582244e-05, "loss": 0.6865, "step": 28468 }, { "epoch": 1.9289247238972829, "grad_norm": 12.237223625183105, "learning_rate": 6.205352864672462e-05, "loss": 0.642, "step": 28469 }, { "epoch": 1.928992479165255, "grad_norm": 5.972848415374756, "learning_rate": 6.20521596276268e-05, "loss": 0.7328, "step": 28470 }, { "epoch": 1.9290602344332273, "grad_norm": 4.6239776611328125, "learning_rate": 6.205079060852899e-05, "loss": 0.6115, "step": 28471 }, { "epoch": 1.9291279897011993, "grad_norm": 3.920407772064209, "learning_rate": 6.204942158943117e-05, "loss": 0.5125, "step": 28472 }, { "epoch": 1.9291957449691712, "grad_norm": 5.393649578094482, "learning_rate": 6.204805257033335e-05, "loss": 0.5774, "step": 28473 }, { "epoch": 1.9292635002371434, "grad_norm": 5.25321102142334, "learning_rate": 6.204668355123553e-05, "loss": 0.5904, "step": 28474 }, { "epoch": 1.9293312555051156, "grad_norm": 7.831811904907227, "learning_rate": 6.204531453213773e-05, "loss": 0.6949, "step": 28475 }, { "epoch": 1.9293990107730876, "grad_norm": 6.814644813537598, "learning_rate": 6.204394551303991e-05, "loss": 0.7658, "step": 28476 }, { "epoch": 1.9294667660410596, "grad_norm": 5.90346097946167, "learning_rate": 6.204257649394209e-05, "loss": 0.596, "step": 28477 }, { "epoch": 1.9295345213090318, "grad_norm": 5.404088020324707, "learning_rate": 6.204120747484428e-05, "loss": 0.5762, "step": 28478 }, { "epoch": 1.929602276577004, "grad_norm": 4.36534309387207, "learning_rate": 6.203983845574646e-05, "loss": 0.6383, "step": 28479 }, { "epoch": 1.929670031844976, "grad_norm": 5.728662967681885, "learning_rate": 6.203846943664864e-05, "loss": 0.5758, "step": 28480 }, { "epoch": 1.929737787112948, "grad_norm": 6.326351642608643, "learning_rate": 6.203710041755084e-05, "loss": 0.6358, "step": 28481 }, { "epoch": 1.9298055423809202, "grad_norm": 4.751561164855957, "learning_rate": 6.203573139845302e-05, "loss": 0.6798, "step": 28482 }, { "epoch": 1.9298732976488921, "grad_norm": 9.217057228088379, "learning_rate": 6.20343623793552e-05, "loss": 0.4917, "step": 28483 }, { "epoch": 1.9299410529168641, "grad_norm": 6.551508903503418, "learning_rate": 6.203299336025738e-05, "loss": 0.7157, "step": 28484 }, { "epoch": 1.9300088081848363, "grad_norm": 5.003602504730225, "learning_rate": 6.203162434115957e-05, "loss": 0.5762, "step": 28485 }, { "epoch": 1.9300765634528085, "grad_norm": 5.196956157684326, "learning_rate": 6.203025532206175e-05, "loss": 0.6815, "step": 28486 }, { "epoch": 1.9301443187207805, "grad_norm": 6.768782615661621, "learning_rate": 6.202888630296393e-05, "loss": 0.8213, "step": 28487 }, { "epoch": 1.9302120739887525, "grad_norm": 7.272469997406006, "learning_rate": 6.202751728386611e-05, "loss": 0.823, "step": 28488 }, { "epoch": 1.9302798292567247, "grad_norm": 7.48783016204834, "learning_rate": 6.202614826476829e-05, "loss": 0.6396, "step": 28489 }, { "epoch": 1.930347584524697, "grad_norm": 5.605549335479736, "learning_rate": 6.202477924567049e-05, "loss": 0.6027, "step": 28490 }, { "epoch": 1.9304153397926689, "grad_norm": 6.688634872436523, "learning_rate": 6.202341022657267e-05, "loss": 0.6856, "step": 28491 }, { "epoch": 1.9304830950606409, "grad_norm": 6.407228469848633, "learning_rate": 6.202204120747485e-05, "loss": 0.6578, "step": 28492 }, { "epoch": 1.930550850328613, "grad_norm": 9.50770092010498, "learning_rate": 6.202067218837703e-05, "loss": 0.6565, "step": 28493 }, { "epoch": 1.9306186055965853, "grad_norm": 4.5346269607543945, "learning_rate": 6.201930316927922e-05, "loss": 0.6491, "step": 28494 }, { "epoch": 1.9306863608645572, "grad_norm": 8.925776481628418, "learning_rate": 6.20179341501814e-05, "loss": 0.6025, "step": 28495 }, { "epoch": 1.9307541161325292, "grad_norm": 6.782399654388428, "learning_rate": 6.201656513108358e-05, "loss": 0.7245, "step": 28496 }, { "epoch": 1.9308218714005014, "grad_norm": 5.405448913574219, "learning_rate": 6.201519611198576e-05, "loss": 0.5824, "step": 28497 }, { "epoch": 1.9308896266684736, "grad_norm": 6.046790599822998, "learning_rate": 6.201382709288794e-05, "loss": 0.6123, "step": 28498 }, { "epoch": 1.9309573819364454, "grad_norm": 4.8724260330200195, "learning_rate": 6.201245807379014e-05, "loss": 0.6524, "step": 28499 }, { "epoch": 1.9310251372044176, "grad_norm": 4.150418281555176, "learning_rate": 6.201108905469232e-05, "loss": 0.5389, "step": 28500 }, { "epoch": 1.9310928924723898, "grad_norm": 7.368474006652832, "learning_rate": 6.20097200355945e-05, "loss": 0.7815, "step": 28501 }, { "epoch": 1.9311606477403618, "grad_norm": 5.407767295837402, "learning_rate": 6.200835101649668e-05, "loss": 0.7005, "step": 28502 }, { "epoch": 1.9312284030083338, "grad_norm": 5.938120365142822, "learning_rate": 6.200698199739887e-05, "loss": 0.7076, "step": 28503 }, { "epoch": 1.931296158276306, "grad_norm": 5.1715264320373535, "learning_rate": 6.200561297830105e-05, "loss": 0.6374, "step": 28504 }, { "epoch": 1.9313639135442782, "grad_norm": 7.3596110343933105, "learning_rate": 6.200424395920323e-05, "loss": 0.6586, "step": 28505 }, { "epoch": 1.9314316688122501, "grad_norm": 9.151622772216797, "learning_rate": 6.200287494010541e-05, "loss": 0.596, "step": 28506 }, { "epoch": 1.9314994240802221, "grad_norm": 5.0445380210876465, "learning_rate": 6.20015059210076e-05, "loss": 0.5935, "step": 28507 }, { "epoch": 1.9315671793481943, "grad_norm": 4.483127117156982, "learning_rate": 6.200013690190979e-05, "loss": 0.6236, "step": 28508 }, { "epoch": 1.9316349346161665, "grad_norm": 6.973853588104248, "learning_rate": 6.199876788281197e-05, "loss": 0.5802, "step": 28509 }, { "epoch": 1.9317026898841385, "grad_norm": 7.5400071144104, "learning_rate": 6.199739886371415e-05, "loss": 0.6044, "step": 28510 }, { "epoch": 1.9317704451521105, "grad_norm": 6.371807098388672, "learning_rate": 6.199602984461633e-05, "loss": 0.8609, "step": 28511 }, { "epoch": 1.9318382004200827, "grad_norm": 6.54652738571167, "learning_rate": 6.199466082551851e-05, "loss": 0.8349, "step": 28512 }, { "epoch": 1.931905955688055, "grad_norm": 5.067972660064697, "learning_rate": 6.19932918064207e-05, "loss": 0.6699, "step": 28513 }, { "epoch": 1.9319737109560269, "grad_norm": 5.8682379722595215, "learning_rate": 6.199192278732288e-05, "loss": 0.547, "step": 28514 }, { "epoch": 1.9320414662239989, "grad_norm": 6.025839328765869, "learning_rate": 6.199055376822506e-05, "loss": 0.5254, "step": 28515 }, { "epoch": 1.932109221491971, "grad_norm": 6.526335716247559, "learning_rate": 6.198918474912724e-05, "loss": 0.6487, "step": 28516 }, { "epoch": 1.932176976759943, "grad_norm": 5.089259624481201, "learning_rate": 6.198781573002944e-05, "loss": 0.5588, "step": 28517 }, { "epoch": 1.932244732027915, "grad_norm": 6.412837028503418, "learning_rate": 6.198644671093162e-05, "loss": 0.8426, "step": 28518 }, { "epoch": 1.9323124872958872, "grad_norm": 4.779003143310547, "learning_rate": 6.19850776918338e-05, "loss": 0.7693, "step": 28519 }, { "epoch": 1.9323802425638594, "grad_norm": 8.069668769836426, "learning_rate": 6.198370867273598e-05, "loss": 0.7175, "step": 28520 }, { "epoch": 1.9324479978318314, "grad_norm": 7.956181049346924, "learning_rate": 6.198233965363817e-05, "loss": 1.0498, "step": 28521 }, { "epoch": 1.9325157530998034, "grad_norm": 5.370879173278809, "learning_rate": 6.198097063454035e-05, "loss": 0.6338, "step": 28522 }, { "epoch": 1.9325835083677756, "grad_norm": 4.606190204620361, "learning_rate": 6.197960161544253e-05, "loss": 0.5655, "step": 28523 }, { "epoch": 1.9326512636357478, "grad_norm": 12.843914031982422, "learning_rate": 6.197823259634473e-05, "loss": 0.5928, "step": 28524 }, { "epoch": 1.9327190189037198, "grad_norm": 4.216327667236328, "learning_rate": 6.197686357724691e-05, "loss": 0.6361, "step": 28525 }, { "epoch": 1.9327867741716918, "grad_norm": 7.817590713500977, "learning_rate": 6.197549455814909e-05, "loss": 0.7204, "step": 28526 }, { "epoch": 1.932854529439664, "grad_norm": 5.3290324211120605, "learning_rate": 6.197412553905128e-05, "loss": 0.6765, "step": 28527 }, { "epoch": 1.9329222847076362, "grad_norm": 5.772282600402832, "learning_rate": 6.197275651995346e-05, "loss": 0.7369, "step": 28528 }, { "epoch": 1.9329900399756081, "grad_norm": 7.335721492767334, "learning_rate": 6.197138750085564e-05, "loss": 0.6611, "step": 28529 }, { "epoch": 1.9330577952435801, "grad_norm": 7.809662818908691, "learning_rate": 6.197001848175782e-05, "loss": 0.6552, "step": 28530 }, { "epoch": 1.9331255505115523, "grad_norm": 8.958946228027344, "learning_rate": 6.196864946266002e-05, "loss": 0.5424, "step": 28531 }, { "epoch": 1.9331933057795243, "grad_norm": 5.2104082107543945, "learning_rate": 6.19672804435622e-05, "loss": 0.6959, "step": 28532 }, { "epoch": 1.9332610610474963, "grad_norm": 7.923717021942139, "learning_rate": 6.196591142446438e-05, "loss": 0.5582, "step": 28533 }, { "epoch": 1.9333288163154685, "grad_norm": 4.6489996910095215, "learning_rate": 6.196454240536656e-05, "loss": 0.6027, "step": 28534 }, { "epoch": 1.9333965715834407, "grad_norm": 6.054392337799072, "learning_rate": 6.196317338626875e-05, "loss": 0.5697, "step": 28535 }, { "epoch": 1.9334643268514127, "grad_norm": 5.184953689575195, "learning_rate": 6.196180436717093e-05, "loss": 0.6298, "step": 28536 }, { "epoch": 1.9335320821193847, "grad_norm": 8.842804908752441, "learning_rate": 6.196043534807311e-05, "loss": 0.6379, "step": 28537 }, { "epoch": 1.9335998373873569, "grad_norm": 6.06003475189209, "learning_rate": 6.195906632897529e-05, "loss": 0.725, "step": 28538 }, { "epoch": 1.933667592655329, "grad_norm": 6.783237457275391, "learning_rate": 6.195769730987747e-05, "loss": 0.7485, "step": 28539 }, { "epoch": 1.933735347923301, "grad_norm": 7.08988618850708, "learning_rate": 6.195632829077967e-05, "loss": 0.602, "step": 28540 }, { "epoch": 1.933803103191273, "grad_norm": 7.574702262878418, "learning_rate": 6.195495927168185e-05, "loss": 0.648, "step": 28541 }, { "epoch": 1.9338708584592452, "grad_norm": 5.2033233642578125, "learning_rate": 6.195359025258403e-05, "loss": 0.7427, "step": 28542 }, { "epoch": 1.9339386137272174, "grad_norm": 4.715076923370361, "learning_rate": 6.195222123348621e-05, "loss": 0.6111, "step": 28543 }, { "epoch": 1.9340063689951894, "grad_norm": 6.639047622680664, "learning_rate": 6.195085221438839e-05, "loss": 0.4473, "step": 28544 }, { "epoch": 1.9340741242631614, "grad_norm": 5.392805576324463, "learning_rate": 6.194948319529058e-05, "loss": 0.6149, "step": 28545 }, { "epoch": 1.9341418795311336, "grad_norm": 9.134385108947754, "learning_rate": 6.194811417619276e-05, "loss": 0.7422, "step": 28546 }, { "epoch": 1.9342096347991058, "grad_norm": 5.084778308868408, "learning_rate": 6.194674515709494e-05, "loss": 0.6837, "step": 28547 }, { "epoch": 1.9342773900670776, "grad_norm": 9.126777648925781, "learning_rate": 6.194537613799712e-05, "loss": 0.7457, "step": 28548 }, { "epoch": 1.9343451453350498, "grad_norm": 5.352545738220215, "learning_rate": 6.194400711889932e-05, "loss": 0.7279, "step": 28549 }, { "epoch": 1.934412900603022, "grad_norm": 4.972015857696533, "learning_rate": 6.19426380998015e-05, "loss": 0.7183, "step": 28550 }, { "epoch": 1.934480655870994, "grad_norm": 5.829220771789551, "learning_rate": 6.194126908070368e-05, "loss": 0.6428, "step": 28551 }, { "epoch": 1.934548411138966, "grad_norm": 3.7478229999542236, "learning_rate": 6.193990006160586e-05, "loss": 0.5828, "step": 28552 }, { "epoch": 1.9346161664069381, "grad_norm": 7.719437599182129, "learning_rate": 6.193853104250804e-05, "loss": 0.6079, "step": 28553 }, { "epoch": 1.9346839216749103, "grad_norm": 6.2139811515808105, "learning_rate": 6.193716202341023e-05, "loss": 0.4731, "step": 28554 }, { "epoch": 1.9347516769428823, "grad_norm": 8.157742500305176, "learning_rate": 6.193579300431241e-05, "loss": 0.7648, "step": 28555 }, { "epoch": 1.9348194322108543, "grad_norm": 8.286266326904297, "learning_rate": 6.193442398521459e-05, "loss": 0.8304, "step": 28556 }, { "epoch": 1.9348871874788265, "grad_norm": 5.625251770019531, "learning_rate": 6.193305496611677e-05, "loss": 0.6148, "step": 28557 }, { "epoch": 1.9349549427467987, "grad_norm": 11.140101432800293, "learning_rate": 6.193168594701897e-05, "loss": 0.4975, "step": 28558 }, { "epoch": 1.9350226980147707, "grad_norm": 5.472982883453369, "learning_rate": 6.193031692792115e-05, "loss": 0.7242, "step": 28559 }, { "epoch": 1.9350904532827427, "grad_norm": 5.094518661499023, "learning_rate": 6.192894790882333e-05, "loss": 0.6876, "step": 28560 }, { "epoch": 1.9351582085507149, "grad_norm": 7.059485912322998, "learning_rate": 6.192757888972551e-05, "loss": 0.6748, "step": 28561 }, { "epoch": 1.935225963818687, "grad_norm": 5.413349151611328, "learning_rate": 6.192620987062769e-05, "loss": 0.4237, "step": 28562 }, { "epoch": 1.935293719086659, "grad_norm": 5.512279033660889, "learning_rate": 6.192484085152988e-05, "loss": 0.7303, "step": 28563 }, { "epoch": 1.935361474354631, "grad_norm": 5.219598293304443, "learning_rate": 6.192347183243206e-05, "loss": 0.609, "step": 28564 }, { "epoch": 1.9354292296226032, "grad_norm": 7.204946994781494, "learning_rate": 6.192210281333424e-05, "loss": 0.7239, "step": 28565 }, { "epoch": 1.9354969848905752, "grad_norm": 9.102926254272461, "learning_rate": 6.192073379423642e-05, "loss": 0.6234, "step": 28566 }, { "epoch": 1.9355647401585472, "grad_norm": 6.255657196044922, "learning_rate": 6.191936477513862e-05, "loss": 0.7606, "step": 28567 }, { "epoch": 1.9356324954265194, "grad_norm": 8.599347114562988, "learning_rate": 6.19179957560408e-05, "loss": 0.4751, "step": 28568 }, { "epoch": 1.9357002506944916, "grad_norm": 4.90819787979126, "learning_rate": 6.191662673694298e-05, "loss": 0.5664, "step": 28569 }, { "epoch": 1.9357680059624636, "grad_norm": 5.482956886291504, "learning_rate": 6.191525771784517e-05, "loss": 0.7085, "step": 28570 }, { "epoch": 1.9358357612304355, "grad_norm": 9.783164978027344, "learning_rate": 6.191388869874735e-05, "loss": 0.7288, "step": 28571 }, { "epoch": 1.9359035164984078, "grad_norm": 5.615985870361328, "learning_rate": 6.191251967964953e-05, "loss": 0.6341, "step": 28572 }, { "epoch": 1.93597127176638, "grad_norm": 4.8506550788879395, "learning_rate": 6.191115066055173e-05, "loss": 0.5521, "step": 28573 }, { "epoch": 1.936039027034352, "grad_norm": 6.287008285522461, "learning_rate": 6.19097816414539e-05, "loss": 0.641, "step": 28574 }, { "epoch": 1.936106782302324, "grad_norm": 4.707332611083984, "learning_rate": 6.190841262235609e-05, "loss": 0.6765, "step": 28575 }, { "epoch": 1.9361745375702961, "grad_norm": 7.540157794952393, "learning_rate": 6.190704360325827e-05, "loss": 0.7016, "step": 28576 }, { "epoch": 1.9362422928382683, "grad_norm": 5.525252342224121, "learning_rate": 6.190567458416046e-05, "loss": 0.5497, "step": 28577 }, { "epoch": 1.9363100481062403, "grad_norm": 6.896514415740967, "learning_rate": 6.190430556506264e-05, "loss": 0.6259, "step": 28578 }, { "epoch": 1.9363778033742123, "grad_norm": 6.355600357055664, "learning_rate": 6.190293654596482e-05, "loss": 0.5807, "step": 28579 }, { "epoch": 1.9364455586421845, "grad_norm": 8.0429048538208, "learning_rate": 6.1901567526867e-05, "loss": 0.6578, "step": 28580 }, { "epoch": 1.9365133139101565, "grad_norm": 4.7329182624816895, "learning_rate": 6.19001985077692e-05, "loss": 0.5473, "step": 28581 }, { "epoch": 1.9365810691781284, "grad_norm": 4.761880874633789, "learning_rate": 6.189882948867138e-05, "loss": 0.5078, "step": 28582 }, { "epoch": 1.9366488244461006, "grad_norm": 4.288254261016846, "learning_rate": 6.189746046957356e-05, "loss": 0.5215, "step": 28583 }, { "epoch": 1.9367165797140729, "grad_norm": 6.030455589294434, "learning_rate": 6.189609145047574e-05, "loss": 0.6715, "step": 28584 }, { "epoch": 1.9367843349820448, "grad_norm": 5.887031555175781, "learning_rate": 6.189472243137792e-05, "loss": 0.6168, "step": 28585 }, { "epoch": 1.9368520902500168, "grad_norm": 4.796930313110352, "learning_rate": 6.189335341228011e-05, "loss": 0.6936, "step": 28586 }, { "epoch": 1.936919845517989, "grad_norm": 5.600851535797119, "learning_rate": 6.189198439318229e-05, "loss": 0.6754, "step": 28587 }, { "epoch": 1.9369876007859612, "grad_norm": 11.505284309387207, "learning_rate": 6.189061537408447e-05, "loss": 0.8462, "step": 28588 }, { "epoch": 1.9370553560539332, "grad_norm": 6.869186878204346, "learning_rate": 6.188924635498665e-05, "loss": 0.5944, "step": 28589 }, { "epoch": 1.9371231113219052, "grad_norm": 7.754715919494629, "learning_rate": 6.188787733588883e-05, "loss": 0.5995, "step": 28590 }, { "epoch": 1.9371908665898774, "grad_norm": 5.203024387359619, "learning_rate": 6.188650831679103e-05, "loss": 0.5424, "step": 28591 }, { "epoch": 1.9372586218578496, "grad_norm": 5.241202354431152, "learning_rate": 6.188513929769321e-05, "loss": 0.6454, "step": 28592 }, { "epoch": 1.9373263771258216, "grad_norm": 5.361361980438232, "learning_rate": 6.188377027859539e-05, "loss": 0.6663, "step": 28593 }, { "epoch": 1.9373941323937935, "grad_norm": 7.436186790466309, "learning_rate": 6.188240125949757e-05, "loss": 0.7071, "step": 28594 }, { "epoch": 1.9374618876617657, "grad_norm": 4.972476482391357, "learning_rate": 6.188103224039976e-05, "loss": 0.6225, "step": 28595 }, { "epoch": 1.937529642929738, "grad_norm": 3.7898640632629395, "learning_rate": 6.187966322130194e-05, "loss": 0.6544, "step": 28596 }, { "epoch": 1.9375973981977097, "grad_norm": 5.32175350189209, "learning_rate": 6.187829420220412e-05, "loss": 0.624, "step": 28597 }, { "epoch": 1.937665153465682, "grad_norm": 6.299227714538574, "learning_rate": 6.18769251831063e-05, "loss": 0.6085, "step": 28598 }, { "epoch": 1.9377329087336541, "grad_norm": 8.355826377868652, "learning_rate": 6.187555616400848e-05, "loss": 0.7092, "step": 28599 }, { "epoch": 1.937800664001626, "grad_norm": 5.939084053039551, "learning_rate": 6.187418714491068e-05, "loss": 0.6101, "step": 28600 }, { "epoch": 1.937868419269598, "grad_norm": 6.251040935516357, "learning_rate": 6.187281812581286e-05, "loss": 0.6313, "step": 28601 }, { "epoch": 1.9379361745375703, "grad_norm": 7.4939398765563965, "learning_rate": 6.187144910671504e-05, "loss": 0.6913, "step": 28602 }, { "epoch": 1.9380039298055425, "grad_norm": 5.292856693267822, "learning_rate": 6.187008008761722e-05, "loss": 0.5445, "step": 28603 }, { "epoch": 1.9380716850735145, "grad_norm": 6.195893287658691, "learning_rate": 6.186871106851941e-05, "loss": 0.5729, "step": 28604 }, { "epoch": 1.9381394403414864, "grad_norm": 4.964665412902832, "learning_rate": 6.186734204942159e-05, "loss": 0.6635, "step": 28605 }, { "epoch": 1.9382071956094586, "grad_norm": 5.8960700035095215, "learning_rate": 6.186597303032377e-05, "loss": 0.607, "step": 28606 }, { "epoch": 1.9382749508774308, "grad_norm": 7.551485538482666, "learning_rate": 6.186460401122595e-05, "loss": 0.7469, "step": 28607 }, { "epoch": 1.9383427061454028, "grad_norm": 4.212287425994873, "learning_rate": 6.186323499212813e-05, "loss": 0.549, "step": 28608 }, { "epoch": 1.9384104614133748, "grad_norm": 4.636597633361816, "learning_rate": 6.186186597303033e-05, "loss": 0.5942, "step": 28609 }, { "epoch": 1.938478216681347, "grad_norm": 9.224071502685547, "learning_rate": 6.186049695393251e-05, "loss": 0.7737, "step": 28610 }, { "epoch": 1.9385459719493192, "grad_norm": 5.452713489532471, "learning_rate": 6.185912793483469e-05, "loss": 0.5048, "step": 28611 }, { "epoch": 1.9386137272172912, "grad_norm": 6.921501159667969, "learning_rate": 6.185775891573687e-05, "loss": 0.5626, "step": 28612 }, { "epoch": 1.9386814824852632, "grad_norm": 4.672121047973633, "learning_rate": 6.185638989663906e-05, "loss": 0.7294, "step": 28613 }, { "epoch": 1.9387492377532354, "grad_norm": 6.076914310455322, "learning_rate": 6.185502087754124e-05, "loss": 0.5956, "step": 28614 }, { "epoch": 1.9388169930212074, "grad_norm": 12.139627456665039, "learning_rate": 6.185365185844342e-05, "loss": 0.5159, "step": 28615 }, { "epoch": 1.9388847482891793, "grad_norm": 4.922008514404297, "learning_rate": 6.185228283934562e-05, "loss": 0.4672, "step": 28616 }, { "epoch": 1.9389525035571515, "grad_norm": 5.476095199584961, "learning_rate": 6.18509138202478e-05, "loss": 0.612, "step": 28617 }, { "epoch": 1.9390202588251237, "grad_norm": 5.230226039886475, "learning_rate": 6.184954480114998e-05, "loss": 0.5316, "step": 28618 }, { "epoch": 1.9390880140930957, "grad_norm": 5.498950481414795, "learning_rate": 6.184817578205217e-05, "loss": 0.6535, "step": 28619 }, { "epoch": 1.9391557693610677, "grad_norm": 8.583647727966309, "learning_rate": 6.184680676295435e-05, "loss": 0.6749, "step": 28620 }, { "epoch": 1.93922352462904, "grad_norm": 8.01122760772705, "learning_rate": 6.184543774385653e-05, "loss": 0.5557, "step": 28621 }, { "epoch": 1.939291279897012, "grad_norm": 6.075076103210449, "learning_rate": 6.184406872475871e-05, "loss": 0.5553, "step": 28622 }, { "epoch": 1.939359035164984, "grad_norm": 6.27225399017334, "learning_rate": 6.18426997056609e-05, "loss": 0.721, "step": 28623 }, { "epoch": 1.939426790432956, "grad_norm": 4.066678047180176, "learning_rate": 6.184133068656309e-05, "loss": 0.5518, "step": 28624 }, { "epoch": 1.9394945457009283, "grad_norm": 4.5981526374816895, "learning_rate": 6.183996166746527e-05, "loss": 0.6057, "step": 28625 }, { "epoch": 1.9395623009689005, "grad_norm": 7.319876670837402, "learning_rate": 6.183859264836745e-05, "loss": 0.8305, "step": 28626 }, { "epoch": 1.9396300562368725, "grad_norm": 10.743691444396973, "learning_rate": 6.183722362926964e-05, "loss": 0.6612, "step": 28627 }, { "epoch": 1.9396978115048444, "grad_norm": 7.262180805206299, "learning_rate": 6.183585461017182e-05, "loss": 0.5824, "step": 28628 }, { "epoch": 1.9397655667728166, "grad_norm": 4.927490711212158, "learning_rate": 6.1834485591074e-05, "loss": 0.5049, "step": 28629 }, { "epoch": 1.9398333220407886, "grad_norm": 7.486043930053711, "learning_rate": 6.183311657197618e-05, "loss": 0.5932, "step": 28630 }, { "epoch": 1.9399010773087606, "grad_norm": 7.368857383728027, "learning_rate": 6.183174755287836e-05, "loss": 0.6766, "step": 28631 }, { "epoch": 1.9399688325767328, "grad_norm": 5.343363285064697, "learning_rate": 6.183037853378056e-05, "loss": 0.6674, "step": 28632 }, { "epoch": 1.940036587844705, "grad_norm": 5.154477119445801, "learning_rate": 6.182900951468274e-05, "loss": 0.6577, "step": 28633 }, { "epoch": 1.940104343112677, "grad_norm": 5.382983207702637, "learning_rate": 6.182764049558492e-05, "loss": 0.4085, "step": 28634 }, { "epoch": 1.940172098380649, "grad_norm": 4.985278129577637, "learning_rate": 6.18262714764871e-05, "loss": 0.7261, "step": 28635 }, { "epoch": 1.9402398536486212, "grad_norm": 5.426328182220459, "learning_rate": 6.182490245738929e-05, "loss": 0.6391, "step": 28636 }, { "epoch": 1.9403076089165934, "grad_norm": 4.8267741203308105, "learning_rate": 6.182353343829147e-05, "loss": 0.4095, "step": 28637 }, { "epoch": 1.9403753641845654, "grad_norm": 4.744563102722168, "learning_rate": 6.182216441919365e-05, "loss": 0.6119, "step": 28638 }, { "epoch": 1.9404431194525373, "grad_norm": 8.817002296447754, "learning_rate": 6.182079540009583e-05, "loss": 0.4355, "step": 28639 }, { "epoch": 1.9405108747205095, "grad_norm": 4.900224208831787, "learning_rate": 6.181942638099801e-05, "loss": 0.7074, "step": 28640 }, { "epoch": 1.9405786299884817, "grad_norm": 5.2455620765686035, "learning_rate": 6.18180573619002e-05, "loss": 0.6272, "step": 28641 }, { "epoch": 1.9406463852564537, "grad_norm": 8.267704963684082, "learning_rate": 6.181668834280239e-05, "loss": 0.6647, "step": 28642 }, { "epoch": 1.9407141405244257, "grad_norm": 5.420190334320068, "learning_rate": 6.181531932370457e-05, "loss": 0.6882, "step": 28643 }, { "epoch": 1.940781895792398, "grad_norm": 7.452794075012207, "learning_rate": 6.181395030460675e-05, "loss": 0.833, "step": 28644 }, { "epoch": 1.94084965106037, "grad_norm": 5.4695940017700195, "learning_rate": 6.181258128550893e-05, "loss": 0.3673, "step": 28645 }, { "epoch": 1.9409174063283419, "grad_norm": 10.13788890838623, "learning_rate": 6.181121226641112e-05, "loss": 0.6133, "step": 28646 }, { "epoch": 1.940985161596314, "grad_norm": 3.8637263774871826, "learning_rate": 6.18098432473133e-05, "loss": 0.5386, "step": 28647 }, { "epoch": 1.9410529168642863, "grad_norm": 4.875961780548096, "learning_rate": 6.180847422821548e-05, "loss": 0.4483, "step": 28648 }, { "epoch": 1.9411206721322583, "grad_norm": 4.464960098266602, "learning_rate": 6.180710520911766e-05, "loss": 0.4816, "step": 28649 }, { "epoch": 1.9411884274002302, "grad_norm": 4.292661190032959, "learning_rate": 6.180573619001986e-05, "loss": 0.5268, "step": 28650 }, { "epoch": 1.9412561826682024, "grad_norm": 7.241438388824463, "learning_rate": 6.180436717092204e-05, "loss": 0.7284, "step": 28651 }, { "epoch": 1.9413239379361746, "grad_norm": 5.302928924560547, "learning_rate": 6.180299815182422e-05, "loss": 0.6234, "step": 28652 }, { "epoch": 1.9413916932041466, "grad_norm": 5.852281093597412, "learning_rate": 6.18016291327264e-05, "loss": 0.5666, "step": 28653 }, { "epoch": 1.9414594484721186, "grad_norm": 4.393338680267334, "learning_rate": 6.180026011362858e-05, "loss": 0.5532, "step": 28654 }, { "epoch": 1.9415272037400908, "grad_norm": 6.769193649291992, "learning_rate": 6.179889109453077e-05, "loss": 0.654, "step": 28655 }, { "epoch": 1.941594959008063, "grad_norm": 13.000207901000977, "learning_rate": 6.179752207543295e-05, "loss": 0.6202, "step": 28656 }, { "epoch": 1.941662714276035, "grad_norm": 5.196847915649414, "learning_rate": 6.179615305633513e-05, "loss": 0.5033, "step": 28657 }, { "epoch": 1.941730469544007, "grad_norm": 5.129347324371338, "learning_rate": 6.179478403723731e-05, "loss": 0.6464, "step": 28658 }, { "epoch": 1.9417982248119792, "grad_norm": 5.013476848602295, "learning_rate": 6.17934150181395e-05, "loss": 0.6631, "step": 28659 }, { "epoch": 1.9418659800799514, "grad_norm": 9.939468383789062, "learning_rate": 6.179204599904169e-05, "loss": 0.5997, "step": 28660 }, { "epoch": 1.9419337353479234, "grad_norm": 6.824064254760742, "learning_rate": 6.179067697994387e-05, "loss": 0.5813, "step": 28661 }, { "epoch": 1.9420014906158953, "grad_norm": 10.320743560791016, "learning_rate": 6.178930796084606e-05, "loss": 0.7428, "step": 28662 }, { "epoch": 1.9420692458838675, "grad_norm": 6.643125057220459, "learning_rate": 6.178793894174824e-05, "loss": 0.595, "step": 28663 }, { "epoch": 1.9421370011518395, "grad_norm": 5.816925525665283, "learning_rate": 6.178656992265042e-05, "loss": 0.7194, "step": 28664 }, { "epoch": 1.9422047564198115, "grad_norm": 6.746510028839111, "learning_rate": 6.178520090355262e-05, "loss": 0.8549, "step": 28665 }, { "epoch": 1.9422725116877837, "grad_norm": 5.960466384887695, "learning_rate": 6.17838318844548e-05, "loss": 0.8657, "step": 28666 }, { "epoch": 1.942340266955756, "grad_norm": 6.005176067352295, "learning_rate": 6.178246286535698e-05, "loss": 0.7723, "step": 28667 }, { "epoch": 1.9424080222237279, "grad_norm": 5.783576965332031, "learning_rate": 6.178109384625917e-05, "loss": 0.6517, "step": 28668 }, { "epoch": 1.9424757774916999, "grad_norm": 7.607844829559326, "learning_rate": 6.177972482716135e-05, "loss": 0.5748, "step": 28669 }, { "epoch": 1.942543532759672, "grad_norm": 6.274801254272461, "learning_rate": 6.177835580806353e-05, "loss": 0.6483, "step": 28670 }, { "epoch": 1.9426112880276443, "grad_norm": 5.110238075256348, "learning_rate": 6.177698678896571e-05, "loss": 0.7024, "step": 28671 }, { "epoch": 1.9426790432956162, "grad_norm": 4.358292102813721, "learning_rate": 6.177561776986789e-05, "loss": 0.5744, "step": 28672 }, { "epoch": 1.9427467985635882, "grad_norm": 7.277698993682861, "learning_rate": 6.177424875077009e-05, "loss": 0.6946, "step": 28673 }, { "epoch": 1.9428145538315604, "grad_norm": 5.080716609954834, "learning_rate": 6.177287973167227e-05, "loss": 0.556, "step": 28674 }, { "epoch": 1.9428823090995326, "grad_norm": 6.130206108093262, "learning_rate": 6.177151071257445e-05, "loss": 0.862, "step": 28675 }, { "epoch": 1.9429500643675046, "grad_norm": 5.5082831382751465, "learning_rate": 6.177014169347663e-05, "loss": 0.7452, "step": 28676 }, { "epoch": 1.9430178196354766, "grad_norm": 6.992527484893799, "learning_rate": 6.176877267437881e-05, "loss": 0.5492, "step": 28677 }, { "epoch": 1.9430855749034488, "grad_norm": 6.63530969619751, "learning_rate": 6.1767403655281e-05, "loss": 0.6612, "step": 28678 }, { "epoch": 1.9431533301714208, "grad_norm": 5.571232318878174, "learning_rate": 6.176603463618318e-05, "loss": 0.6846, "step": 28679 }, { "epoch": 1.9432210854393928, "grad_norm": 5.6822309494018555, "learning_rate": 6.176466561708536e-05, "loss": 0.6147, "step": 28680 }, { "epoch": 1.943288840707365, "grad_norm": 4.381528854370117, "learning_rate": 6.176329659798754e-05, "loss": 0.678, "step": 28681 }, { "epoch": 1.9433565959753372, "grad_norm": 9.351381301879883, "learning_rate": 6.176192757888974e-05, "loss": 0.7075, "step": 28682 }, { "epoch": 1.9434243512433091, "grad_norm": 4.348139762878418, "learning_rate": 6.176055855979192e-05, "loss": 0.6189, "step": 28683 }, { "epoch": 1.9434921065112811, "grad_norm": 4.689141750335693, "learning_rate": 6.17591895406941e-05, "loss": 0.6754, "step": 28684 }, { "epoch": 1.9435598617792533, "grad_norm": 5.469815254211426, "learning_rate": 6.175782052159628e-05, "loss": 0.7724, "step": 28685 }, { "epoch": 1.9436276170472255, "grad_norm": 5.192305088043213, "learning_rate": 6.175645150249846e-05, "loss": 0.6679, "step": 28686 }, { "epoch": 1.9436953723151975, "grad_norm": 4.628074645996094, "learning_rate": 6.175508248340065e-05, "loss": 0.5487, "step": 28687 }, { "epoch": 1.9437631275831695, "grad_norm": 6.580662250518799, "learning_rate": 6.175371346430283e-05, "loss": 0.658, "step": 28688 }, { "epoch": 1.9438308828511417, "grad_norm": 5.042867183685303, "learning_rate": 6.175234444520501e-05, "loss": 0.5456, "step": 28689 }, { "epoch": 1.943898638119114, "grad_norm": 5.7183837890625, "learning_rate": 6.175097542610719e-05, "loss": 0.7406, "step": 28690 }, { "epoch": 1.9439663933870859, "grad_norm": 6.7057342529296875, "learning_rate": 6.174960640700939e-05, "loss": 0.7938, "step": 28691 }, { "epoch": 1.9440341486550579, "grad_norm": 6.759729862213135, "learning_rate": 6.174823738791157e-05, "loss": 0.7271, "step": 28692 }, { "epoch": 1.94410190392303, "grad_norm": 7.484266757965088, "learning_rate": 6.174686836881375e-05, "loss": 0.4975, "step": 28693 }, { "epoch": 1.9441696591910023, "grad_norm": 8.286821365356445, "learning_rate": 6.174549934971593e-05, "loss": 0.7148, "step": 28694 }, { "epoch": 1.944237414458974, "grad_norm": 6.6494340896606445, "learning_rate": 6.174413033061811e-05, "loss": 0.5409, "step": 28695 }, { "epoch": 1.9443051697269462, "grad_norm": 5.680832386016846, "learning_rate": 6.17427613115203e-05, "loss": 0.6705, "step": 28696 }, { "epoch": 1.9443729249949184, "grad_norm": 6.213473320007324, "learning_rate": 6.174139229242248e-05, "loss": 0.6087, "step": 28697 }, { "epoch": 1.9444406802628904, "grad_norm": 5.9445295333862305, "learning_rate": 6.174002327332466e-05, "loss": 0.6205, "step": 28698 }, { "epoch": 1.9445084355308624, "grad_norm": 5.545928478240967, "learning_rate": 6.173865425422684e-05, "loss": 0.5832, "step": 28699 }, { "epoch": 1.9445761907988346, "grad_norm": 6.16463041305542, "learning_rate": 6.173728523512902e-05, "loss": 0.5765, "step": 28700 }, { "epoch": 1.9446439460668068, "grad_norm": 7.963038444519043, "learning_rate": 6.173591621603122e-05, "loss": 0.9328, "step": 28701 }, { "epoch": 1.9447117013347788, "grad_norm": 6.575962543487549, "learning_rate": 6.17345471969334e-05, "loss": 0.5746, "step": 28702 }, { "epoch": 1.9447794566027508, "grad_norm": 5.762258529663086, "learning_rate": 6.173317817783558e-05, "loss": 0.5594, "step": 28703 }, { "epoch": 1.944847211870723, "grad_norm": 5.898035049438477, "learning_rate": 6.173180915873776e-05, "loss": 0.7383, "step": 28704 }, { "epoch": 1.9449149671386952, "grad_norm": 7.248903751373291, "learning_rate": 6.173044013963995e-05, "loss": 0.5798, "step": 28705 }, { "epoch": 1.9449827224066671, "grad_norm": 6.232579708099365, "learning_rate": 6.172907112054213e-05, "loss": 0.7132, "step": 28706 }, { "epoch": 1.9450504776746391, "grad_norm": 4.272340774536133, "learning_rate": 6.172770210144431e-05, "loss": 0.6234, "step": 28707 }, { "epoch": 1.9451182329426113, "grad_norm": 8.680265426635742, "learning_rate": 6.172633308234649e-05, "loss": 0.6354, "step": 28708 }, { "epoch": 1.9451859882105835, "grad_norm": 5.183062553405762, "learning_rate": 6.172496406324869e-05, "loss": 0.6373, "step": 28709 }, { "epoch": 1.9452537434785555, "grad_norm": 7.233780384063721, "learning_rate": 6.172359504415087e-05, "loss": 0.833, "step": 28710 }, { "epoch": 1.9453214987465275, "grad_norm": 5.869729995727539, "learning_rate": 6.172222602505305e-05, "loss": 0.5923, "step": 28711 }, { "epoch": 1.9453892540144997, "grad_norm": 5.358488082885742, "learning_rate": 6.172085700595524e-05, "loss": 0.5227, "step": 28712 }, { "epoch": 1.9454570092824717, "grad_norm": 5.472938537597656, "learning_rate": 6.171948798685742e-05, "loss": 0.6871, "step": 28713 }, { "epoch": 1.9455247645504437, "grad_norm": 5.226840019226074, "learning_rate": 6.17181189677596e-05, "loss": 0.5788, "step": 28714 }, { "epoch": 1.9455925198184159, "grad_norm": 5.824084758758545, "learning_rate": 6.17167499486618e-05, "loss": 0.6836, "step": 28715 }, { "epoch": 1.945660275086388, "grad_norm": 5.0991902351379395, "learning_rate": 6.171538092956398e-05, "loss": 0.7912, "step": 28716 }, { "epoch": 1.94572803035436, "grad_norm": 4.728055953979492, "learning_rate": 6.171401191046616e-05, "loss": 0.699, "step": 28717 }, { "epoch": 1.945795785622332, "grad_norm": 4.425876140594482, "learning_rate": 6.171264289136834e-05, "loss": 0.5282, "step": 28718 }, { "epoch": 1.9458635408903042, "grad_norm": 10.244550704956055, "learning_rate": 6.171127387227053e-05, "loss": 0.7667, "step": 28719 }, { "epoch": 1.9459312961582764, "grad_norm": 5.445611476898193, "learning_rate": 6.170990485317271e-05, "loss": 0.6214, "step": 28720 }, { "epoch": 1.9459990514262484, "grad_norm": 4.944458484649658, "learning_rate": 6.170853583407489e-05, "loss": 0.6899, "step": 28721 }, { "epoch": 1.9460668066942204, "grad_norm": 6.7921271324157715, "learning_rate": 6.170716681497707e-05, "loss": 0.7199, "step": 28722 }, { "epoch": 1.9461345619621926, "grad_norm": 7.901906490325928, "learning_rate": 6.170579779587927e-05, "loss": 0.5384, "step": 28723 }, { "epoch": 1.9462023172301648, "grad_norm": 6.525637149810791, "learning_rate": 6.170442877678145e-05, "loss": 0.6278, "step": 28724 }, { "epoch": 1.9462700724981368, "grad_norm": 7.920475482940674, "learning_rate": 6.170305975768363e-05, "loss": 0.8268, "step": 28725 }, { "epoch": 1.9463378277661088, "grad_norm": 5.1515889167785645, "learning_rate": 6.17016907385858e-05, "loss": 0.597, "step": 28726 }, { "epoch": 1.946405583034081, "grad_norm": 3.3866782188415527, "learning_rate": 6.170032171948799e-05, "loss": 0.4506, "step": 28727 }, { "epoch": 1.946473338302053, "grad_norm": 5.323874473571777, "learning_rate": 6.169895270039018e-05, "loss": 0.8875, "step": 28728 }, { "epoch": 1.946541093570025, "grad_norm": 5.794893264770508, "learning_rate": 6.169758368129236e-05, "loss": 0.4875, "step": 28729 }, { "epoch": 1.9466088488379971, "grad_norm": 5.685214519500732, "learning_rate": 6.169621466219454e-05, "loss": 0.6596, "step": 28730 }, { "epoch": 1.9466766041059693, "grad_norm": 4.391304016113281, "learning_rate": 6.169484564309672e-05, "loss": 0.6301, "step": 28731 }, { "epoch": 1.9467443593739413, "grad_norm": 5.314813137054443, "learning_rate": 6.16934766239989e-05, "loss": 0.4982, "step": 28732 }, { "epoch": 1.9468121146419133, "grad_norm": 4.464449882507324, "learning_rate": 6.16921076049011e-05, "loss": 0.5192, "step": 28733 }, { "epoch": 1.9468798699098855, "grad_norm": 6.077066898345947, "learning_rate": 6.169073858580328e-05, "loss": 0.678, "step": 28734 }, { "epoch": 1.9469476251778577, "grad_norm": 6.863765716552734, "learning_rate": 6.168936956670546e-05, "loss": 0.7542, "step": 28735 }, { "epoch": 1.9470153804458297, "grad_norm": 4.0284318923950195, "learning_rate": 6.168800054760764e-05, "loss": 0.5636, "step": 28736 }, { "epoch": 1.9470831357138016, "grad_norm": 5.776848793029785, "learning_rate": 6.168663152850983e-05, "loss": 0.5683, "step": 28737 }, { "epoch": 1.9471508909817739, "grad_norm": 7.05540132522583, "learning_rate": 6.168526250941201e-05, "loss": 0.7944, "step": 28738 }, { "epoch": 1.947218646249746, "grad_norm": 5.6854352951049805, "learning_rate": 6.168389349031419e-05, "loss": 0.5459, "step": 28739 }, { "epoch": 1.947286401517718, "grad_norm": 4.437877178192139, "learning_rate": 6.168252447121637e-05, "loss": 0.6904, "step": 28740 }, { "epoch": 1.94735415678569, "grad_norm": 6.7348222732543945, "learning_rate": 6.168115545211855e-05, "loss": 0.6616, "step": 28741 }, { "epoch": 1.9474219120536622, "grad_norm": 11.454263687133789, "learning_rate": 6.167978643302075e-05, "loss": 0.6138, "step": 28742 }, { "epoch": 1.9474896673216344, "grad_norm": 4.780706405639648, "learning_rate": 6.167841741392293e-05, "loss": 0.7994, "step": 28743 }, { "epoch": 1.9475574225896062, "grad_norm": 5.796602725982666, "learning_rate": 6.16770483948251e-05, "loss": 0.5379, "step": 28744 }, { "epoch": 1.9476251778575784, "grad_norm": 5.559529781341553, "learning_rate": 6.167567937572729e-05, "loss": 0.8721, "step": 28745 }, { "epoch": 1.9476929331255506, "grad_norm": 5.04899263381958, "learning_rate": 6.167431035662948e-05, "loss": 0.7037, "step": 28746 }, { "epoch": 1.9477606883935226, "grad_norm": 6.127429962158203, "learning_rate": 6.167294133753166e-05, "loss": 0.7173, "step": 28747 }, { "epoch": 1.9478284436614945, "grad_norm": 4.973020553588867, "learning_rate": 6.167157231843384e-05, "loss": 0.5931, "step": 28748 }, { "epoch": 1.9478961989294667, "grad_norm": 5.805059909820557, "learning_rate": 6.167020329933602e-05, "loss": 0.589, "step": 28749 }, { "epoch": 1.947963954197439, "grad_norm": 5.048222541809082, "learning_rate": 6.16688342802382e-05, "loss": 0.5949, "step": 28750 }, { "epoch": 1.948031709465411, "grad_norm": 4.926900386810303, "learning_rate": 6.16674652611404e-05, "loss": 0.5669, "step": 28751 }, { "epoch": 1.948099464733383, "grad_norm": 5.65214204788208, "learning_rate": 6.166609624204258e-05, "loss": 0.8361, "step": 28752 }, { "epoch": 1.9481672200013551, "grad_norm": 5.783775806427002, "learning_rate": 6.166472722294476e-05, "loss": 0.6117, "step": 28753 }, { "epoch": 1.9482349752693273, "grad_norm": 5.0523457527160645, "learning_rate": 6.166335820384694e-05, "loss": 0.628, "step": 28754 }, { "epoch": 1.9483027305372993, "grad_norm": 6.750730514526367, "learning_rate": 6.166198918474913e-05, "loss": 0.8132, "step": 28755 }, { "epoch": 1.9483704858052713, "grad_norm": 5.872334957122803, "learning_rate": 6.166062016565131e-05, "loss": 0.5122, "step": 28756 }, { "epoch": 1.9484382410732435, "grad_norm": 8.903843879699707, "learning_rate": 6.165925114655349e-05, "loss": 0.6204, "step": 28757 }, { "epoch": 1.9485059963412157, "grad_norm": 5.024861812591553, "learning_rate": 6.165788212745569e-05, "loss": 0.6579, "step": 28758 }, { "epoch": 1.9485737516091877, "grad_norm": 6.1356425285339355, "learning_rate": 6.165651310835787e-05, "loss": 0.9644, "step": 28759 }, { "epoch": 1.9486415068771596, "grad_norm": 5.050475120544434, "learning_rate": 6.165514408926005e-05, "loss": 0.5044, "step": 28760 }, { "epoch": 1.9487092621451318, "grad_norm": 5.7536702156066895, "learning_rate": 6.165377507016224e-05, "loss": 0.6415, "step": 28761 }, { "epoch": 1.9487770174131038, "grad_norm": 5.318159103393555, "learning_rate": 6.165240605106442e-05, "loss": 0.7304, "step": 28762 }, { "epoch": 1.9488447726810758, "grad_norm": 4.914268493652344, "learning_rate": 6.16510370319666e-05, "loss": 0.5606, "step": 28763 }, { "epoch": 1.948912527949048, "grad_norm": 5.375725269317627, "learning_rate": 6.164966801286878e-05, "loss": 0.6004, "step": 28764 }, { "epoch": 1.9489802832170202, "grad_norm": 4.385342121124268, "learning_rate": 6.164829899377098e-05, "loss": 0.5192, "step": 28765 }, { "epoch": 1.9490480384849922, "grad_norm": 4.504706382751465, "learning_rate": 6.164692997467316e-05, "loss": 0.5618, "step": 28766 }, { "epoch": 1.9491157937529642, "grad_norm": 5.534895420074463, "learning_rate": 6.164556095557534e-05, "loss": 0.6846, "step": 28767 }, { "epoch": 1.9491835490209364, "grad_norm": 8.58128833770752, "learning_rate": 6.164419193647752e-05, "loss": 0.5517, "step": 28768 }, { "epoch": 1.9492513042889086, "grad_norm": 4.594401836395264, "learning_rate": 6.164282291737971e-05, "loss": 0.6352, "step": 28769 }, { "epoch": 1.9493190595568806, "grad_norm": 7.296576023101807, "learning_rate": 6.164145389828189e-05, "loss": 0.6357, "step": 28770 }, { "epoch": 1.9493868148248525, "grad_norm": 12.078683853149414, "learning_rate": 6.164008487918407e-05, "loss": 0.7985, "step": 28771 }, { "epoch": 1.9494545700928247, "grad_norm": 4.1309309005737305, "learning_rate": 6.163871586008625e-05, "loss": 0.497, "step": 28772 }, { "epoch": 1.949522325360797, "grad_norm": 5.9491987228393555, "learning_rate": 6.163734684098843e-05, "loss": 0.6334, "step": 28773 }, { "epoch": 1.949590080628769, "grad_norm": 4.421563625335693, "learning_rate": 6.163597782189063e-05, "loss": 0.6012, "step": 28774 }, { "epoch": 1.949657835896741, "grad_norm": 10.57115650177002, "learning_rate": 6.16346088027928e-05, "loss": 0.7755, "step": 28775 }, { "epoch": 1.9497255911647131, "grad_norm": 9.736289024353027, "learning_rate": 6.163323978369499e-05, "loss": 0.7272, "step": 28776 }, { "epoch": 1.949793346432685, "grad_norm": 4.990771770477295, "learning_rate": 6.163187076459717e-05, "loss": 0.734, "step": 28777 }, { "epoch": 1.949861101700657, "grad_norm": 5.690709590911865, "learning_rate": 6.163050174549935e-05, "loss": 0.6035, "step": 28778 }, { "epoch": 1.9499288569686293, "grad_norm": 4.623249053955078, "learning_rate": 6.162913272640154e-05, "loss": 0.5484, "step": 28779 }, { "epoch": 1.9499966122366015, "grad_norm": 8.26430606842041, "learning_rate": 6.162776370730372e-05, "loss": 0.5288, "step": 28780 }, { "epoch": 1.9500643675045735, "grad_norm": 4.722864627838135, "learning_rate": 6.16263946882059e-05, "loss": 0.6174, "step": 28781 }, { "epoch": 1.9501321227725454, "grad_norm": 6.520045280456543, "learning_rate": 6.162502566910808e-05, "loss": 0.7041, "step": 28782 }, { "epoch": 1.9501998780405176, "grad_norm": 4.23316764831543, "learning_rate": 6.162365665001028e-05, "loss": 0.4999, "step": 28783 }, { "epoch": 1.9502676333084898, "grad_norm": 5.521609306335449, "learning_rate": 6.162228763091246e-05, "loss": 0.7922, "step": 28784 }, { "epoch": 1.9503353885764618, "grad_norm": 10.171154975891113, "learning_rate": 6.162091861181464e-05, "loss": 0.8463, "step": 28785 }, { "epoch": 1.9504031438444338, "grad_norm": 6.21957540512085, "learning_rate": 6.161954959271682e-05, "loss": 0.7074, "step": 28786 }, { "epoch": 1.950470899112406, "grad_norm": 5.864689826965332, "learning_rate": 6.1618180573619e-05, "loss": 0.6228, "step": 28787 }, { "epoch": 1.9505386543803782, "grad_norm": 5.454946041107178, "learning_rate": 6.161681155452119e-05, "loss": 0.6474, "step": 28788 }, { "epoch": 1.9506064096483502, "grad_norm": 4.70573091506958, "learning_rate": 6.161544253542337e-05, "loss": 0.5554, "step": 28789 }, { "epoch": 1.9506741649163222, "grad_norm": 5.291991233825684, "learning_rate": 6.161407351632555e-05, "loss": 0.7816, "step": 28790 }, { "epoch": 1.9507419201842944, "grad_norm": 15.013936042785645, "learning_rate": 6.161270449722773e-05, "loss": 0.6766, "step": 28791 }, { "epoch": 1.9508096754522666, "grad_norm": 6.058167934417725, "learning_rate": 6.161133547812993e-05, "loss": 0.7848, "step": 28792 }, { "epoch": 1.9508774307202383, "grad_norm": 5.19060754776001, "learning_rate": 6.16099664590321e-05, "loss": 0.6753, "step": 28793 }, { "epoch": 1.9509451859882105, "grad_norm": 8.119346618652344, "learning_rate": 6.160859743993429e-05, "loss": 0.4678, "step": 28794 }, { "epoch": 1.9510129412561827, "grad_norm": 8.047420501708984, "learning_rate": 6.160722842083647e-05, "loss": 0.644, "step": 28795 }, { "epoch": 1.9510806965241547, "grad_norm": 5.24276065826416, "learning_rate": 6.160585940173865e-05, "loss": 0.5401, "step": 28796 }, { "epoch": 1.9511484517921267, "grad_norm": 6.268372058868408, "learning_rate": 6.160449038264084e-05, "loss": 0.6234, "step": 28797 }, { "epoch": 1.951216207060099, "grad_norm": 5.679368019104004, "learning_rate": 6.160312136354302e-05, "loss": 0.6546, "step": 28798 }, { "epoch": 1.951283962328071, "grad_norm": 5.503265857696533, "learning_rate": 6.16017523444452e-05, "loss": 0.8836, "step": 28799 }, { "epoch": 1.951351717596043, "grad_norm": 5.551029682159424, "learning_rate": 6.160038332534738e-05, "loss": 0.682, "step": 28800 }, { "epoch": 1.951419472864015, "grad_norm": 4.5591888427734375, "learning_rate": 6.159901430624958e-05, "loss": 0.4084, "step": 28801 }, { "epoch": 1.9514872281319873, "grad_norm": 6.121176242828369, "learning_rate": 6.159764528715176e-05, "loss": 0.6759, "step": 28802 }, { "epoch": 1.9515549833999595, "grad_norm": 9.853983879089355, "learning_rate": 6.159627626805394e-05, "loss": 0.5536, "step": 28803 }, { "epoch": 1.9516227386679315, "grad_norm": 8.808220863342285, "learning_rate": 6.159490724895613e-05, "loss": 0.5843, "step": 28804 }, { "epoch": 1.9516904939359034, "grad_norm": 5.519996166229248, "learning_rate": 6.159353822985831e-05, "loss": 0.5334, "step": 28805 }, { "epoch": 1.9517582492038756, "grad_norm": 6.59185266494751, "learning_rate": 6.159216921076049e-05, "loss": 0.5765, "step": 28806 }, { "epoch": 1.9518260044718478, "grad_norm": 6.0018181800842285, "learning_rate": 6.159080019166268e-05, "loss": 0.4789, "step": 28807 }, { "epoch": 1.9518937597398198, "grad_norm": 4.605164527893066, "learning_rate": 6.158943117256487e-05, "loss": 0.5751, "step": 28808 }, { "epoch": 1.9519615150077918, "grad_norm": 5.008047580718994, "learning_rate": 6.158806215346705e-05, "loss": 0.6249, "step": 28809 }, { "epoch": 1.952029270275764, "grad_norm": 5.410154342651367, "learning_rate": 6.158669313436923e-05, "loss": 0.7559, "step": 28810 }, { "epoch": 1.952097025543736, "grad_norm": 6.555610656738281, "learning_rate": 6.158532411527142e-05, "loss": 0.735, "step": 28811 }, { "epoch": 1.952164780811708, "grad_norm": 6.208712577819824, "learning_rate": 6.15839550961736e-05, "loss": 0.6611, "step": 28812 }, { "epoch": 1.9522325360796802, "grad_norm": 5.031275749206543, "learning_rate": 6.158258607707578e-05, "loss": 0.471, "step": 28813 }, { "epoch": 1.9523002913476524, "grad_norm": 4.32275915145874, "learning_rate": 6.158121705797796e-05, "loss": 0.538, "step": 28814 }, { "epoch": 1.9523680466156244, "grad_norm": 5.641719818115234, "learning_rate": 6.157984803888015e-05, "loss": 0.7153, "step": 28815 }, { "epoch": 1.9524358018835963, "grad_norm": 6.171957969665527, "learning_rate": 6.157847901978234e-05, "loss": 0.7305, "step": 28816 }, { "epoch": 1.9525035571515685, "grad_norm": 5.879449367523193, "learning_rate": 6.157711000068452e-05, "loss": 0.573, "step": 28817 }, { "epoch": 1.9525713124195407, "grad_norm": 5.888575077056885, "learning_rate": 6.15757409815867e-05, "loss": 0.5565, "step": 28818 }, { "epoch": 1.9526390676875127, "grad_norm": 4.92302942276001, "learning_rate": 6.157437196248888e-05, "loss": 0.6036, "step": 28819 }, { "epoch": 1.9527068229554847, "grad_norm": 6.25700044631958, "learning_rate": 6.157300294339107e-05, "loss": 0.6818, "step": 28820 }, { "epoch": 1.952774578223457, "grad_norm": 5.071694374084473, "learning_rate": 6.157163392429325e-05, "loss": 0.8396, "step": 28821 }, { "epoch": 1.952842333491429, "grad_norm": 5.737265586853027, "learning_rate": 6.157026490519543e-05, "loss": 0.6887, "step": 28822 }, { "epoch": 1.952910088759401, "grad_norm": 7.511000156402588, "learning_rate": 6.156889588609761e-05, "loss": 0.7232, "step": 28823 }, { "epoch": 1.952977844027373, "grad_norm": 9.723064422607422, "learning_rate": 6.15675268669998e-05, "loss": 0.6183, "step": 28824 }, { "epoch": 1.9530455992953453, "grad_norm": 6.696814060211182, "learning_rate": 6.156615784790199e-05, "loss": 0.6909, "step": 28825 }, { "epoch": 1.9531133545633172, "grad_norm": 7.046933650970459, "learning_rate": 6.156478882880417e-05, "loss": 0.5903, "step": 28826 }, { "epoch": 1.9531811098312892, "grad_norm": 5.769134044647217, "learning_rate": 6.156341980970635e-05, "loss": 0.5381, "step": 28827 }, { "epoch": 1.9532488650992614, "grad_norm": 5.938135623931885, "learning_rate": 6.156205079060853e-05, "loss": 0.5377, "step": 28828 }, { "epoch": 1.9533166203672336, "grad_norm": 4.743623733520508, "learning_rate": 6.156068177151072e-05, "loss": 0.6042, "step": 28829 }, { "epoch": 1.9533843756352056, "grad_norm": 7.382614612579346, "learning_rate": 6.15593127524129e-05, "loss": 0.8824, "step": 28830 }, { "epoch": 1.9534521309031776, "grad_norm": 6.215185165405273, "learning_rate": 6.155794373331508e-05, "loss": 0.5555, "step": 28831 }, { "epoch": 1.9535198861711498, "grad_norm": 8.129961013793945, "learning_rate": 6.155657471421726e-05, "loss": 0.6205, "step": 28832 }, { "epoch": 1.953587641439122, "grad_norm": 7.861161708831787, "learning_rate": 6.155520569511944e-05, "loss": 0.5523, "step": 28833 }, { "epoch": 1.953655396707094, "grad_norm": 6.709312915802002, "learning_rate": 6.155383667602164e-05, "loss": 0.6929, "step": 28834 }, { "epoch": 1.953723151975066, "grad_norm": 5.890100955963135, "learning_rate": 6.155246765692382e-05, "loss": 0.6185, "step": 28835 }, { "epoch": 1.9537909072430382, "grad_norm": 5.2450408935546875, "learning_rate": 6.1551098637826e-05, "loss": 0.6607, "step": 28836 }, { "epoch": 1.9538586625110104, "grad_norm": 9.77294635772705, "learning_rate": 6.154972961872818e-05, "loss": 0.6833, "step": 28837 }, { "epoch": 1.9539264177789823, "grad_norm": 6.975707054138184, "learning_rate": 6.154836059963037e-05, "loss": 0.7338, "step": 28838 }, { "epoch": 1.9539941730469543, "grad_norm": 7.068915367126465, "learning_rate": 6.154699158053255e-05, "loss": 0.6316, "step": 28839 }, { "epoch": 1.9540619283149265, "grad_norm": 5.8116912841796875, "learning_rate": 6.154562256143473e-05, "loss": 0.5802, "step": 28840 }, { "epoch": 1.9541296835828987, "grad_norm": 5.537242889404297, "learning_rate": 6.154425354233691e-05, "loss": 0.6695, "step": 28841 }, { "epoch": 1.9541974388508705, "grad_norm": 4.900106906890869, "learning_rate": 6.154288452323909e-05, "loss": 0.5653, "step": 28842 }, { "epoch": 1.9542651941188427, "grad_norm": 3.9513099193573, "learning_rate": 6.154151550414129e-05, "loss": 0.5429, "step": 28843 }, { "epoch": 1.954332949386815, "grad_norm": 4.619136333465576, "learning_rate": 6.154014648504347e-05, "loss": 0.4972, "step": 28844 }, { "epoch": 1.9544007046547869, "grad_norm": 5.607911586761475, "learning_rate": 6.153877746594565e-05, "loss": 0.6948, "step": 28845 }, { "epoch": 1.9544684599227589, "grad_norm": 4.518560409545898, "learning_rate": 6.153740844684783e-05, "loss": 0.4961, "step": 28846 }, { "epoch": 1.954536215190731, "grad_norm": 7.199248313903809, "learning_rate": 6.153603942775002e-05, "loss": 0.7773, "step": 28847 }, { "epoch": 1.9546039704587033, "grad_norm": 5.082158088684082, "learning_rate": 6.15346704086522e-05, "loss": 0.9156, "step": 28848 }, { "epoch": 1.9546717257266752, "grad_norm": 6.763563632965088, "learning_rate": 6.153330138955438e-05, "loss": 0.5101, "step": 28849 }, { "epoch": 1.9547394809946472, "grad_norm": 4.508602619171143, "learning_rate": 6.153193237045658e-05, "loss": 0.5239, "step": 28850 }, { "epoch": 1.9548072362626194, "grad_norm": 5.59226655960083, "learning_rate": 6.153056335135876e-05, "loss": 0.6363, "step": 28851 }, { "epoch": 1.9548749915305916, "grad_norm": 7.766153812408447, "learning_rate": 6.152919433226094e-05, "loss": 0.6743, "step": 28852 }, { "epoch": 1.9549427467985636, "grad_norm": 6.068765163421631, "learning_rate": 6.152782531316313e-05, "loss": 0.8143, "step": 28853 }, { "epoch": 1.9550105020665356, "grad_norm": 7.005800247192383, "learning_rate": 6.152645629406531e-05, "loss": 0.7657, "step": 28854 }, { "epoch": 1.9550782573345078, "grad_norm": 5.853997707366943, "learning_rate": 6.152508727496749e-05, "loss": 0.7449, "step": 28855 }, { "epoch": 1.95514601260248, "grad_norm": 6.580873966217041, "learning_rate": 6.152371825586968e-05, "loss": 0.7789, "step": 28856 }, { "epoch": 1.955213767870452, "grad_norm": 7.370440483093262, "learning_rate": 6.152234923677186e-05, "loss": 0.752, "step": 28857 }, { "epoch": 1.955281523138424, "grad_norm": 5.970898628234863, "learning_rate": 6.152098021767404e-05, "loss": 0.6408, "step": 28858 }, { "epoch": 1.9553492784063962, "grad_norm": 5.60892391204834, "learning_rate": 6.151961119857623e-05, "loss": 0.5949, "step": 28859 }, { "epoch": 1.9554170336743681, "grad_norm": 4.091507434844971, "learning_rate": 6.15182421794784e-05, "loss": 0.5611, "step": 28860 }, { "epoch": 1.9554847889423401, "grad_norm": 6.424842834472656, "learning_rate": 6.15168731603806e-05, "loss": 0.6861, "step": 28861 }, { "epoch": 1.9555525442103123, "grad_norm": 5.848278522491455, "learning_rate": 6.151550414128278e-05, "loss": 0.6867, "step": 28862 }, { "epoch": 1.9556202994782845, "grad_norm": 4.520167350769043, "learning_rate": 6.151413512218496e-05, "loss": 0.545, "step": 28863 }, { "epoch": 1.9556880547462565, "grad_norm": 10.690908432006836, "learning_rate": 6.151276610308714e-05, "loss": 0.5774, "step": 28864 }, { "epoch": 1.9557558100142285, "grad_norm": 5.153115272521973, "learning_rate": 6.151139708398932e-05, "loss": 0.4279, "step": 28865 }, { "epoch": 1.9558235652822007, "grad_norm": 6.3178791999816895, "learning_rate": 6.151002806489151e-05, "loss": 0.7498, "step": 28866 }, { "epoch": 1.955891320550173, "grad_norm": 5.159513473510742, "learning_rate": 6.15086590457937e-05, "loss": 0.8724, "step": 28867 }, { "epoch": 1.9559590758181449, "grad_norm": 7.239426612854004, "learning_rate": 6.150729002669588e-05, "loss": 0.7056, "step": 28868 }, { "epoch": 1.9560268310861169, "grad_norm": 5.71388578414917, "learning_rate": 6.150592100759806e-05, "loss": 0.582, "step": 28869 }, { "epoch": 1.956094586354089, "grad_norm": 4.586304664611816, "learning_rate": 6.150455198850025e-05, "loss": 0.565, "step": 28870 }, { "epoch": 1.9561623416220613, "grad_norm": 5.728758811950684, "learning_rate": 6.150318296940243e-05, "loss": 0.5688, "step": 28871 }, { "epoch": 1.9562300968900332, "grad_norm": 5.075711727142334, "learning_rate": 6.150181395030461e-05, "loss": 0.639, "step": 28872 }, { "epoch": 1.9562978521580052, "grad_norm": 6.264239311218262, "learning_rate": 6.150044493120679e-05, "loss": 0.5849, "step": 28873 }, { "epoch": 1.9563656074259774, "grad_norm": 7.786729335784912, "learning_rate": 6.149907591210897e-05, "loss": 0.6131, "step": 28874 }, { "epoch": 1.9564333626939494, "grad_norm": 4.640535831451416, "learning_rate": 6.149770689301116e-05, "loss": 0.4988, "step": 28875 }, { "epoch": 1.9565011179619214, "grad_norm": 5.136857986450195, "learning_rate": 6.149633787391335e-05, "loss": 0.558, "step": 28876 }, { "epoch": 1.9565688732298936, "grad_norm": 6.769590854644775, "learning_rate": 6.149496885481553e-05, "loss": 0.589, "step": 28877 }, { "epoch": 1.9566366284978658, "grad_norm": 10.301228523254395, "learning_rate": 6.14935998357177e-05, "loss": 0.6028, "step": 28878 }, { "epoch": 1.9567043837658378, "grad_norm": 5.710690975189209, "learning_rate": 6.14922308166199e-05, "loss": 0.5563, "step": 28879 }, { "epoch": 1.9567721390338098, "grad_norm": 8.383143424987793, "learning_rate": 6.149086179752208e-05, "loss": 0.6975, "step": 28880 }, { "epoch": 1.956839894301782, "grad_norm": 7.660165786743164, "learning_rate": 6.148949277842426e-05, "loss": 0.5819, "step": 28881 }, { "epoch": 1.9569076495697542, "grad_norm": 5.845088481903076, "learning_rate": 6.148812375932644e-05, "loss": 0.7491, "step": 28882 }, { "epoch": 1.9569754048377261, "grad_norm": 8.105628967285156, "learning_rate": 6.148675474022862e-05, "loss": 0.6899, "step": 28883 }, { "epoch": 1.9570431601056981, "grad_norm": 4.791311264038086, "learning_rate": 6.148538572113082e-05, "loss": 0.5789, "step": 28884 }, { "epoch": 1.9571109153736703, "grad_norm": 5.790649890899658, "learning_rate": 6.1484016702033e-05, "loss": 0.5591, "step": 28885 }, { "epoch": 1.9571786706416425, "grad_norm": 5.742051124572754, "learning_rate": 6.148264768293518e-05, "loss": 0.5892, "step": 28886 }, { "epoch": 1.9572464259096145, "grad_norm": 7.415877819061279, "learning_rate": 6.148127866383736e-05, "loss": 0.4857, "step": 28887 }, { "epoch": 1.9573141811775865, "grad_norm": 7.224211692810059, "learning_rate": 6.147990964473954e-05, "loss": 0.7006, "step": 28888 }, { "epoch": 1.9573819364455587, "grad_norm": 6.297126293182373, "learning_rate": 6.147854062564173e-05, "loss": 0.696, "step": 28889 }, { "epoch": 1.957449691713531, "grad_norm": 10.787469863891602, "learning_rate": 6.147717160654391e-05, "loss": 0.6664, "step": 28890 }, { "epoch": 1.9575174469815027, "grad_norm": 5.3572845458984375, "learning_rate": 6.147580258744609e-05, "loss": 0.6827, "step": 28891 }, { "epoch": 1.9575852022494749, "grad_norm": 4.461609363555908, "learning_rate": 6.147443356834827e-05, "loss": 0.6487, "step": 28892 }, { "epoch": 1.957652957517447, "grad_norm": 8.352335929870605, "learning_rate": 6.147306454925047e-05, "loss": 0.7327, "step": 28893 }, { "epoch": 1.957720712785419, "grad_norm": 7.3421101570129395, "learning_rate": 6.147169553015265e-05, "loss": 0.6379, "step": 28894 }, { "epoch": 1.957788468053391, "grad_norm": 4.859137535095215, "learning_rate": 6.147032651105483e-05, "loss": 0.747, "step": 28895 }, { "epoch": 1.9578562233213632, "grad_norm": 6.488630294799805, "learning_rate": 6.146895749195702e-05, "loss": 0.5625, "step": 28896 }, { "epoch": 1.9579239785893354, "grad_norm": 7.0655837059021, "learning_rate": 6.14675884728592e-05, "loss": 0.5255, "step": 28897 }, { "epoch": 1.9579917338573074, "grad_norm": 9.669530868530273, "learning_rate": 6.146621945376138e-05, "loss": 0.7087, "step": 28898 }, { "epoch": 1.9580594891252794, "grad_norm": 5.1551384925842285, "learning_rate": 6.146485043466357e-05, "loss": 0.6685, "step": 28899 }, { "epoch": 1.9581272443932516, "grad_norm": 5.584240913391113, "learning_rate": 6.146348141556575e-05, "loss": 0.9357, "step": 28900 }, { "epoch": 1.9581949996612238, "grad_norm": 6.648988723754883, "learning_rate": 6.146211239646794e-05, "loss": 0.611, "step": 28901 }, { "epoch": 1.9582627549291958, "grad_norm": 5.101949214935303, "learning_rate": 6.146074337737013e-05, "loss": 0.8043, "step": 28902 }, { "epoch": 1.9583305101971678, "grad_norm": 5.86761999130249, "learning_rate": 6.145937435827231e-05, "loss": 0.5707, "step": 28903 }, { "epoch": 1.95839826546514, "grad_norm": 7.598624229431152, "learning_rate": 6.145800533917449e-05, "loss": 0.7241, "step": 28904 }, { "epoch": 1.9584660207331122, "grad_norm": 7.512176036834717, "learning_rate": 6.145663632007667e-05, "loss": 0.7418, "step": 28905 }, { "epoch": 1.9585337760010841, "grad_norm": 4.843771457672119, "learning_rate": 6.145526730097885e-05, "loss": 0.4749, "step": 28906 }, { "epoch": 1.9586015312690561, "grad_norm": 6.519543647766113, "learning_rate": 6.145389828188104e-05, "loss": 0.6042, "step": 28907 }, { "epoch": 1.9586692865370283, "grad_norm": 5.248413562774658, "learning_rate": 6.145252926278322e-05, "loss": 0.5449, "step": 28908 }, { "epoch": 1.9587370418050003, "grad_norm": 6.345320701599121, "learning_rate": 6.14511602436854e-05, "loss": 0.6453, "step": 28909 }, { "epoch": 1.9588047970729723, "grad_norm": 4.284171104431152, "learning_rate": 6.144979122458759e-05, "loss": 0.5697, "step": 28910 }, { "epoch": 1.9588725523409445, "grad_norm": 5.569398880004883, "learning_rate": 6.144842220548977e-05, "loss": 0.8933, "step": 28911 }, { "epoch": 1.9589403076089167, "grad_norm": 7.190845966339111, "learning_rate": 6.144705318639196e-05, "loss": 0.856, "step": 28912 }, { "epoch": 1.9590080628768887, "grad_norm": 4.648343086242676, "learning_rate": 6.144568416729414e-05, "loss": 0.6775, "step": 28913 }, { "epoch": 1.9590758181448606, "grad_norm": 8.214643478393555, "learning_rate": 6.144431514819632e-05, "loss": 0.6365, "step": 28914 }, { "epoch": 1.9591435734128329, "grad_norm": 5.924804210662842, "learning_rate": 6.14429461290985e-05, "loss": 0.6022, "step": 28915 }, { "epoch": 1.959211328680805, "grad_norm": 7.368619441986084, "learning_rate": 6.14415771100007e-05, "loss": 0.5743, "step": 28916 }, { "epoch": 1.959279083948777, "grad_norm": 5.499876022338867, "learning_rate": 6.144020809090287e-05, "loss": 0.6408, "step": 28917 }, { "epoch": 1.959346839216749, "grad_norm": 4.913097858428955, "learning_rate": 6.143883907180506e-05, "loss": 0.5641, "step": 28918 }, { "epoch": 1.9594145944847212, "grad_norm": 4.883818626403809, "learning_rate": 6.143747005270724e-05, "loss": 0.7751, "step": 28919 }, { "epoch": 1.9594823497526934, "grad_norm": 4.84994649887085, "learning_rate": 6.143610103360942e-05, "loss": 0.7682, "step": 28920 }, { "epoch": 1.9595501050206654, "grad_norm": 6.4821953773498535, "learning_rate": 6.143473201451161e-05, "loss": 0.6161, "step": 28921 }, { "epoch": 1.9596178602886374, "grad_norm": 4.960611343383789, "learning_rate": 6.143336299541379e-05, "loss": 0.5498, "step": 28922 }, { "epoch": 1.9596856155566096, "grad_norm": 5.259833812713623, "learning_rate": 6.143199397631597e-05, "loss": 0.8027, "step": 28923 }, { "epoch": 1.9597533708245816, "grad_norm": 8.247075080871582, "learning_rate": 6.143062495721815e-05, "loss": 0.7, "step": 28924 }, { "epoch": 1.9598211260925535, "grad_norm": 6.164152145385742, "learning_rate": 6.142925593812034e-05, "loss": 0.8248, "step": 28925 }, { "epoch": 1.9598888813605257, "grad_norm": 5.319053649902344, "learning_rate": 6.142788691902252e-05, "loss": 0.7903, "step": 28926 }, { "epoch": 1.959956636628498, "grad_norm": 5.036146640777588, "learning_rate": 6.14265178999247e-05, "loss": 0.5887, "step": 28927 }, { "epoch": 1.96002439189647, "grad_norm": 4.2241291999816895, "learning_rate": 6.142514888082689e-05, "loss": 0.5153, "step": 28928 }, { "epoch": 1.960092147164442, "grad_norm": 5.472495079040527, "learning_rate": 6.142377986172907e-05, "loss": 0.5871, "step": 28929 }, { "epoch": 1.9601599024324141, "grad_norm": 5.977293014526367, "learning_rate": 6.142241084263126e-05, "loss": 0.5855, "step": 28930 }, { "epoch": 1.9602276577003863, "grad_norm": 6.423376560211182, "learning_rate": 6.142104182353344e-05, "loss": 0.6327, "step": 28931 }, { "epoch": 1.9602954129683583, "grad_norm": 9.00832748413086, "learning_rate": 6.141967280443562e-05, "loss": 0.6218, "step": 28932 }, { "epoch": 1.9603631682363303, "grad_norm": 15.605475425720215, "learning_rate": 6.14183037853378e-05, "loss": 0.8462, "step": 28933 }, { "epoch": 1.9604309235043025, "grad_norm": 9.590350151062012, "learning_rate": 6.141693476624e-05, "loss": 0.5761, "step": 28934 }, { "epoch": 1.9604986787722747, "grad_norm": 7.903284549713135, "learning_rate": 6.141556574714218e-05, "loss": 0.6433, "step": 28935 }, { "epoch": 1.9605664340402467, "grad_norm": 8.605879783630371, "learning_rate": 6.141419672804436e-05, "loss": 0.6074, "step": 28936 }, { "epoch": 1.9606341893082186, "grad_norm": 6.4242987632751465, "learning_rate": 6.141282770894654e-05, "loss": 0.5847, "step": 28937 }, { "epoch": 1.9607019445761908, "grad_norm": 4.912962436676025, "learning_rate": 6.141145868984872e-05, "loss": 0.5927, "step": 28938 }, { "epoch": 1.960769699844163, "grad_norm": 4.664330959320068, "learning_rate": 6.141008967075091e-05, "loss": 0.5497, "step": 28939 }, { "epoch": 1.9608374551121348, "grad_norm": 5.34434700012207, "learning_rate": 6.140872065165309e-05, "loss": 0.5563, "step": 28940 }, { "epoch": 1.960905210380107, "grad_norm": 8.524620056152344, "learning_rate": 6.140735163255527e-05, "loss": 0.7316, "step": 28941 }, { "epoch": 1.9609729656480792, "grad_norm": 5.5719170570373535, "learning_rate": 6.140598261345745e-05, "loss": 0.5937, "step": 28942 }, { "epoch": 1.9610407209160512, "grad_norm": 5.816039085388184, "learning_rate": 6.140461359435964e-05, "loss": 0.7157, "step": 28943 }, { "epoch": 1.9611084761840232, "grad_norm": 5.816992282867432, "learning_rate": 6.140324457526183e-05, "loss": 0.7201, "step": 28944 }, { "epoch": 1.9611762314519954, "grad_norm": 11.75878620147705, "learning_rate": 6.140187555616402e-05, "loss": 0.6708, "step": 28945 }, { "epoch": 1.9612439867199676, "grad_norm": 6.465924263000488, "learning_rate": 6.14005065370662e-05, "loss": 0.6637, "step": 28946 }, { "epoch": 1.9613117419879396, "grad_norm": 6.660533905029297, "learning_rate": 6.139913751796838e-05, "loss": 0.6131, "step": 28947 }, { "epoch": 1.9613794972559115, "grad_norm": 5.098770618438721, "learning_rate": 6.139776849887057e-05, "loss": 0.5317, "step": 28948 }, { "epoch": 1.9614472525238837, "grad_norm": 7.8518147468566895, "learning_rate": 6.139639947977275e-05, "loss": 0.6219, "step": 28949 }, { "epoch": 1.961515007791856, "grad_norm": 8.825947761535645, "learning_rate": 6.139503046067493e-05, "loss": 0.6028, "step": 28950 }, { "epoch": 1.961582763059828, "grad_norm": 8.084100723266602, "learning_rate": 6.139366144157711e-05, "loss": 0.3727, "step": 28951 }, { "epoch": 1.9616505183278, "grad_norm": 4.405707359313965, "learning_rate": 6.13922924224793e-05, "loss": 0.6116, "step": 28952 }, { "epoch": 1.961718273595772, "grad_norm": 6.748661994934082, "learning_rate": 6.139092340338149e-05, "loss": 0.8652, "step": 28953 }, { "epoch": 1.9617860288637443, "grad_norm": 4.595452785491943, "learning_rate": 6.138955438428367e-05, "loss": 0.5746, "step": 28954 }, { "epoch": 1.9618537841317163, "grad_norm": 8.71441650390625, "learning_rate": 6.138818536518585e-05, "loss": 0.6599, "step": 28955 }, { "epoch": 1.9619215393996883, "grad_norm": 6.202203273773193, "learning_rate": 6.138681634608803e-05, "loss": 0.5337, "step": 28956 }, { "epoch": 1.9619892946676605, "grad_norm": 6.207939624786377, "learning_rate": 6.138544732699022e-05, "loss": 0.7338, "step": 28957 }, { "epoch": 1.9620570499356325, "grad_norm": 8.76633358001709, "learning_rate": 6.13840783078924e-05, "loss": 0.8777, "step": 28958 }, { "epoch": 1.9621248052036044, "grad_norm": 7.134418487548828, "learning_rate": 6.138270928879458e-05, "loss": 0.8644, "step": 28959 }, { "epoch": 1.9621925604715766, "grad_norm": 10.837727546691895, "learning_rate": 6.138134026969676e-05, "loss": 0.8385, "step": 28960 }, { "epoch": 1.9622603157395488, "grad_norm": 6.070011615753174, "learning_rate": 6.137997125059895e-05, "loss": 0.7423, "step": 28961 }, { "epoch": 1.9623280710075208, "grad_norm": 4.4019293785095215, "learning_rate": 6.137860223150114e-05, "loss": 0.5595, "step": 28962 }, { "epoch": 1.9623958262754928, "grad_norm": 6.246272563934326, "learning_rate": 6.137723321240332e-05, "loss": 0.6188, "step": 28963 }, { "epoch": 1.962463581543465, "grad_norm": 6.856026649475098, "learning_rate": 6.13758641933055e-05, "loss": 0.6302, "step": 28964 }, { "epoch": 1.9625313368114372, "grad_norm": 5.587317943572998, "learning_rate": 6.137449517420768e-05, "loss": 0.5079, "step": 28965 }, { "epoch": 1.9625990920794092, "grad_norm": 6.939572334289551, "learning_rate": 6.137312615510986e-05, "loss": 0.5443, "step": 28966 }, { "epoch": 1.9626668473473812, "grad_norm": 5.5739545822143555, "learning_rate": 6.137175713601205e-05, "loss": 0.6097, "step": 28967 }, { "epoch": 1.9627346026153534, "grad_norm": 5.8647356033325195, "learning_rate": 6.137038811691423e-05, "loss": 0.7358, "step": 28968 }, { "epoch": 1.9628023578833256, "grad_norm": 6.041688919067383, "learning_rate": 6.136901909781642e-05, "loss": 0.6562, "step": 28969 }, { "epoch": 1.9628701131512976, "grad_norm": 5.463731288909912, "learning_rate": 6.13676500787186e-05, "loss": 0.6788, "step": 28970 }, { "epoch": 1.9629378684192695, "grad_norm": 5.441404342651367, "learning_rate": 6.136628105962079e-05, "loss": 0.7198, "step": 28971 }, { "epoch": 1.9630056236872417, "grad_norm": 5.100437641143799, "learning_rate": 6.136491204052297e-05, "loss": 0.743, "step": 28972 }, { "epoch": 1.9630733789552137, "grad_norm": 5.764346599578857, "learning_rate": 6.136354302142515e-05, "loss": 0.7511, "step": 28973 }, { "epoch": 1.9631411342231857, "grad_norm": 5.502767086029053, "learning_rate": 6.136217400232733e-05, "loss": 0.6393, "step": 28974 }, { "epoch": 1.963208889491158, "grad_norm": 4.156765937805176, "learning_rate": 6.136080498322951e-05, "loss": 0.611, "step": 28975 }, { "epoch": 1.96327664475913, "grad_norm": 6.125405311584473, "learning_rate": 6.13594359641317e-05, "loss": 0.8793, "step": 28976 }, { "epoch": 1.963344400027102, "grad_norm": 5.750539302825928, "learning_rate": 6.135806694503388e-05, "loss": 0.4968, "step": 28977 }, { "epoch": 1.963412155295074, "grad_norm": 7.588444232940674, "learning_rate": 6.135669792593607e-05, "loss": 0.7482, "step": 28978 }, { "epoch": 1.9634799105630463, "grad_norm": 6.540208339691162, "learning_rate": 6.135532890683825e-05, "loss": 0.6563, "step": 28979 }, { "epoch": 1.9635476658310185, "grad_norm": 6.812836647033691, "learning_rate": 6.135395988774044e-05, "loss": 0.5702, "step": 28980 }, { "epoch": 1.9636154210989905, "grad_norm": 5.895477771759033, "learning_rate": 6.135259086864262e-05, "loss": 0.532, "step": 28981 }, { "epoch": 1.9636831763669624, "grad_norm": 5.53212308883667, "learning_rate": 6.13512218495448e-05, "loss": 0.7345, "step": 28982 }, { "epoch": 1.9637509316349346, "grad_norm": 7.931548118591309, "learning_rate": 6.134985283044698e-05, "loss": 0.5142, "step": 28983 }, { "epoch": 1.9638186869029068, "grad_norm": 5.837134838104248, "learning_rate": 6.134848381134916e-05, "loss": 0.5184, "step": 28984 }, { "epoch": 1.9638864421708788, "grad_norm": 5.558528423309326, "learning_rate": 6.134711479225135e-05, "loss": 0.6575, "step": 28985 }, { "epoch": 1.9639541974388508, "grad_norm": 5.77218770980835, "learning_rate": 6.134574577315354e-05, "loss": 0.6848, "step": 28986 }, { "epoch": 1.964021952706823, "grad_norm": 5.129573822021484, "learning_rate": 6.134437675405572e-05, "loss": 0.4592, "step": 28987 }, { "epoch": 1.964089707974795, "grad_norm": 5.6337385177612305, "learning_rate": 6.13430077349579e-05, "loss": 0.7121, "step": 28988 }, { "epoch": 1.964157463242767, "grad_norm": 5.635845184326172, "learning_rate": 6.134163871586009e-05, "loss": 0.8205, "step": 28989 }, { "epoch": 1.9642252185107392, "grad_norm": 9.898343086242676, "learning_rate": 6.134026969676227e-05, "loss": 0.7368, "step": 28990 }, { "epoch": 1.9642929737787114, "grad_norm": 6.140385150909424, "learning_rate": 6.133890067766445e-05, "loss": 0.7521, "step": 28991 }, { "epoch": 1.9643607290466834, "grad_norm": 7.281193256378174, "learning_rate": 6.133753165856664e-05, "loss": 0.7039, "step": 28992 }, { "epoch": 1.9644284843146553, "grad_norm": 4.26568603515625, "learning_rate": 6.133616263946882e-05, "loss": 0.5023, "step": 28993 }, { "epoch": 1.9644962395826275, "grad_norm": 6.609010696411133, "learning_rate": 6.1334793620371e-05, "loss": 0.7873, "step": 28994 }, { "epoch": 1.9645639948505997, "grad_norm": 4.9739155769348145, "learning_rate": 6.13334246012732e-05, "loss": 0.8375, "step": 28995 }, { "epoch": 1.9646317501185717, "grad_norm": 4.602044105529785, "learning_rate": 6.133205558217538e-05, "loss": 0.7458, "step": 28996 }, { "epoch": 1.9646995053865437, "grad_norm": 6.714208126068115, "learning_rate": 6.133068656307756e-05, "loss": 0.8619, "step": 28997 }, { "epoch": 1.964767260654516, "grad_norm": 4.919519424438477, "learning_rate": 6.132931754397974e-05, "loss": 0.858, "step": 28998 }, { "epoch": 1.964835015922488, "grad_norm": 5.190576553344727, "learning_rate": 6.132794852488193e-05, "loss": 0.696, "step": 28999 }, { "epoch": 1.96490277119046, "grad_norm": 5.230053424835205, "learning_rate": 6.132657950578411e-05, "loss": 0.6002, "step": 29000 }, { "epoch": 1.964970526458432, "grad_norm": 6.920840740203857, "learning_rate": 6.13252104866863e-05, "loss": 0.4685, "step": 29001 }, { "epoch": 1.9650382817264043, "grad_norm": 5.093705654144287, "learning_rate": 6.132384146758847e-05, "loss": 0.5402, "step": 29002 }, { "epoch": 1.9651060369943765, "grad_norm": 7.337501525878906, "learning_rate": 6.132247244849067e-05, "loss": 0.7539, "step": 29003 }, { "epoch": 1.9651737922623485, "grad_norm": 9.352638244628906, "learning_rate": 6.132110342939285e-05, "loss": 0.7614, "step": 29004 }, { "epoch": 1.9652415475303204, "grad_norm": 6.304286956787109, "learning_rate": 6.131973441029503e-05, "loss": 0.5024, "step": 29005 }, { "epoch": 1.9653093027982926, "grad_norm": 7.128985404968262, "learning_rate": 6.131836539119721e-05, "loss": 0.4507, "step": 29006 }, { "epoch": 1.9653770580662646, "grad_norm": 6.443929195404053, "learning_rate": 6.131699637209939e-05, "loss": 0.7494, "step": 29007 }, { "epoch": 1.9654448133342366, "grad_norm": 5.0976457595825195, "learning_rate": 6.131562735300158e-05, "loss": 0.6719, "step": 29008 }, { "epoch": 1.9655125686022088, "grad_norm": 6.290353775024414, "learning_rate": 6.131425833390376e-05, "loss": 0.599, "step": 29009 }, { "epoch": 1.965580323870181, "grad_norm": 11.679778099060059, "learning_rate": 6.131288931480594e-05, "loss": 0.653, "step": 29010 }, { "epoch": 1.965648079138153, "grad_norm": 5.916609287261963, "learning_rate": 6.131152029570812e-05, "loss": 0.7052, "step": 29011 }, { "epoch": 1.965715834406125, "grad_norm": 4.350180149078369, "learning_rate": 6.131015127661032e-05, "loss": 0.4919, "step": 29012 }, { "epoch": 1.9657835896740972, "grad_norm": 5.160588264465332, "learning_rate": 6.13087822575125e-05, "loss": 0.529, "step": 29013 }, { "epoch": 1.9658513449420694, "grad_norm": 6.098862648010254, "learning_rate": 6.130741323841468e-05, "loss": 0.6174, "step": 29014 }, { "epoch": 1.9659191002100413, "grad_norm": 4.119729042053223, "learning_rate": 6.130604421931686e-05, "loss": 0.6265, "step": 29015 }, { "epoch": 1.9659868554780133, "grad_norm": 6.015602111816406, "learning_rate": 6.130467520021904e-05, "loss": 0.6705, "step": 29016 }, { "epoch": 1.9660546107459855, "grad_norm": 5.341603755950928, "learning_rate": 6.130330618112123e-05, "loss": 0.6988, "step": 29017 }, { "epoch": 1.9661223660139577, "grad_norm": 6.407102108001709, "learning_rate": 6.130193716202341e-05, "loss": 0.6104, "step": 29018 }, { "epoch": 1.9661901212819297, "grad_norm": 5.381917953491211, "learning_rate": 6.13005681429256e-05, "loss": 0.7288, "step": 29019 }, { "epoch": 1.9662578765499017, "grad_norm": 10.077352523803711, "learning_rate": 6.129919912382778e-05, "loss": 0.6633, "step": 29020 }, { "epoch": 1.966325631817874, "grad_norm": 5.926183700561523, "learning_rate": 6.129783010472996e-05, "loss": 0.6995, "step": 29021 }, { "epoch": 1.9663933870858459, "grad_norm": 6.568513870239258, "learning_rate": 6.129646108563215e-05, "loss": 0.691, "step": 29022 }, { "epoch": 1.9664611423538179, "grad_norm": 5.261431694030762, "learning_rate": 6.129509206653433e-05, "loss": 0.8528, "step": 29023 }, { "epoch": 1.96652889762179, "grad_norm": 5.44044303894043, "learning_rate": 6.129372304743651e-05, "loss": 0.5916, "step": 29024 }, { "epoch": 1.9665966528897623, "grad_norm": 5.334259510040283, "learning_rate": 6.129235402833869e-05, "loss": 0.5109, "step": 29025 }, { "epoch": 1.9666644081577342, "grad_norm": 8.541903495788574, "learning_rate": 6.129098500924088e-05, "loss": 0.5654, "step": 29026 }, { "epoch": 1.9667321634257062, "grad_norm": 11.885461807250977, "learning_rate": 6.128961599014306e-05, "loss": 0.6005, "step": 29027 }, { "epoch": 1.9667999186936784, "grad_norm": 7.119378089904785, "learning_rate": 6.128824697104524e-05, "loss": 0.7314, "step": 29028 }, { "epoch": 1.9668676739616506, "grad_norm": 6.65686559677124, "learning_rate": 6.128687795194743e-05, "loss": 0.7143, "step": 29029 }, { "epoch": 1.9669354292296226, "grad_norm": 5.1780619621276855, "learning_rate": 6.12855089328496e-05, "loss": 0.6241, "step": 29030 }, { "epoch": 1.9670031844975946, "grad_norm": 4.988961219787598, "learning_rate": 6.12841399137518e-05, "loss": 0.5517, "step": 29031 }, { "epoch": 1.9670709397655668, "grad_norm": 6.406604766845703, "learning_rate": 6.128277089465398e-05, "loss": 0.7962, "step": 29032 }, { "epoch": 1.967138695033539, "grad_norm": 7.278943061828613, "learning_rate": 6.128140187555616e-05, "loss": 0.6943, "step": 29033 }, { "epoch": 1.967206450301511, "grad_norm": 5.706309795379639, "learning_rate": 6.128003285645834e-05, "loss": 0.6311, "step": 29034 }, { "epoch": 1.967274205569483, "grad_norm": 10.52574348449707, "learning_rate": 6.127866383736053e-05, "loss": 0.6776, "step": 29035 }, { "epoch": 1.9673419608374552, "grad_norm": 4.665840148925781, "learning_rate": 6.127729481826271e-05, "loss": 0.6024, "step": 29036 }, { "epoch": 1.9674097161054271, "grad_norm": 5.895798206329346, "learning_rate": 6.12759257991649e-05, "loss": 0.5489, "step": 29037 }, { "epoch": 1.9674774713733991, "grad_norm": 5.836568355560303, "learning_rate": 6.127455678006709e-05, "loss": 0.5973, "step": 29038 }, { "epoch": 1.9675452266413713, "grad_norm": 5.488255500793457, "learning_rate": 6.127318776096927e-05, "loss": 0.5396, "step": 29039 }, { "epoch": 1.9676129819093435, "grad_norm": 6.366283893585205, "learning_rate": 6.127181874187145e-05, "loss": 0.6619, "step": 29040 }, { "epoch": 1.9676807371773155, "grad_norm": 9.516559600830078, "learning_rate": 6.127044972277364e-05, "loss": 0.612, "step": 29041 }, { "epoch": 1.9677484924452875, "grad_norm": 4.969685077667236, "learning_rate": 6.126908070367582e-05, "loss": 0.5369, "step": 29042 }, { "epoch": 1.9678162477132597, "grad_norm": 6.766669273376465, "learning_rate": 6.1267711684578e-05, "loss": 0.7026, "step": 29043 }, { "epoch": 1.967884002981232, "grad_norm": 6.524433612823486, "learning_rate": 6.126634266548018e-05, "loss": 0.6749, "step": 29044 }, { "epoch": 1.9679517582492039, "grad_norm": 6.742608547210693, "learning_rate": 6.126497364638238e-05, "loss": 0.4495, "step": 29045 }, { "epoch": 1.9680195135171759, "grad_norm": 5.25252103805542, "learning_rate": 6.126360462728456e-05, "loss": 0.6297, "step": 29046 }, { "epoch": 1.968087268785148, "grad_norm": 6.794873237609863, "learning_rate": 6.126223560818674e-05, "loss": 0.6282, "step": 29047 }, { "epoch": 1.9681550240531203, "grad_norm": 5.984779357910156, "learning_rate": 6.126086658908892e-05, "loss": 0.6616, "step": 29048 }, { "epoch": 1.9682227793210922, "grad_norm": 6.634092807769775, "learning_rate": 6.125949756999111e-05, "loss": 0.7879, "step": 29049 }, { "epoch": 1.9682905345890642, "grad_norm": 5.225188255310059, "learning_rate": 6.12581285508933e-05, "loss": 0.7329, "step": 29050 }, { "epoch": 1.9683582898570364, "grad_norm": 5.979741096496582, "learning_rate": 6.125675953179547e-05, "loss": 0.835, "step": 29051 }, { "epoch": 1.9684260451250086, "grad_norm": 5.8609724044799805, "learning_rate": 6.125539051269765e-05, "loss": 0.7209, "step": 29052 }, { "epoch": 1.9684938003929806, "grad_norm": 6.011023044586182, "learning_rate": 6.125402149359983e-05, "loss": 0.7732, "step": 29053 }, { "epoch": 1.9685615556609526, "grad_norm": 4.092095851898193, "learning_rate": 6.125265247450203e-05, "loss": 0.3476, "step": 29054 }, { "epoch": 1.9686293109289248, "grad_norm": 5.588472366333008, "learning_rate": 6.125128345540421e-05, "loss": 0.6109, "step": 29055 }, { "epoch": 1.9686970661968968, "grad_norm": 7.563730716705322, "learning_rate": 6.124991443630639e-05, "loss": 0.65, "step": 29056 }, { "epoch": 1.9687648214648688, "grad_norm": 5.735250949859619, "learning_rate": 6.124854541720857e-05, "loss": 0.7087, "step": 29057 }, { "epoch": 1.968832576732841, "grad_norm": 7.215830326080322, "learning_rate": 6.124717639811076e-05, "loss": 0.7439, "step": 29058 }, { "epoch": 1.9689003320008132, "grad_norm": 5.3754754066467285, "learning_rate": 6.124580737901294e-05, "loss": 0.6441, "step": 29059 }, { "epoch": 1.9689680872687851, "grad_norm": 4.666903018951416, "learning_rate": 6.124443835991512e-05, "loss": 0.562, "step": 29060 }, { "epoch": 1.9690358425367571, "grad_norm": 4.884100437164307, "learning_rate": 6.12430693408173e-05, "loss": 0.6052, "step": 29061 }, { "epoch": 1.9691035978047293, "grad_norm": 6.038975238800049, "learning_rate": 6.124170032171948e-05, "loss": 0.6276, "step": 29062 }, { "epoch": 1.9691713530727015, "grad_norm": 5.114677906036377, "learning_rate": 6.124033130262168e-05, "loss": 0.5472, "step": 29063 }, { "epoch": 1.9692391083406735, "grad_norm": 7.423213958740234, "learning_rate": 6.123896228352386e-05, "loss": 0.7639, "step": 29064 }, { "epoch": 1.9693068636086455, "grad_norm": 4.7420125007629395, "learning_rate": 6.123759326442604e-05, "loss": 0.6612, "step": 29065 }, { "epoch": 1.9693746188766177, "grad_norm": 6.4766435623168945, "learning_rate": 6.123622424532822e-05, "loss": 0.6408, "step": 29066 }, { "epoch": 1.96944237414459, "grad_norm": 5.792601585388184, "learning_rate": 6.123485522623041e-05, "loss": 0.5509, "step": 29067 }, { "epoch": 1.9695101294125619, "grad_norm": 5.055642604827881, "learning_rate": 6.12334862071326e-05, "loss": 0.6492, "step": 29068 }, { "epoch": 1.9695778846805339, "grad_norm": 5.878149509429932, "learning_rate": 6.123211718803477e-05, "loss": 0.7554, "step": 29069 }, { "epoch": 1.969645639948506, "grad_norm": 12.752946853637695, "learning_rate": 6.123074816893695e-05, "loss": 0.4406, "step": 29070 }, { "epoch": 1.969713395216478, "grad_norm": 4.163554668426514, "learning_rate": 6.122937914983914e-05, "loss": 0.7418, "step": 29071 }, { "epoch": 1.96978115048445, "grad_norm": 6.399400234222412, "learning_rate": 6.122801013074133e-05, "loss": 0.4582, "step": 29072 }, { "epoch": 1.9698489057524222, "grad_norm": 5.5069122314453125, "learning_rate": 6.122664111164351e-05, "loss": 0.5764, "step": 29073 }, { "epoch": 1.9699166610203944, "grad_norm": 6.591382026672363, "learning_rate": 6.122527209254569e-05, "loss": 0.6758, "step": 29074 }, { "epoch": 1.9699844162883664, "grad_norm": 6.420788288116455, "learning_rate": 6.122390307344787e-05, "loss": 0.55, "step": 29075 }, { "epoch": 1.9700521715563384, "grad_norm": 6.120480060577393, "learning_rate": 6.122253405435005e-05, "loss": 0.6763, "step": 29076 }, { "epoch": 1.9701199268243106, "grad_norm": 5.674555778503418, "learning_rate": 6.122116503525224e-05, "loss": 0.62, "step": 29077 }, { "epoch": 1.9701876820922828, "grad_norm": 6.375414848327637, "learning_rate": 6.121979601615442e-05, "loss": 0.81, "step": 29078 }, { "epoch": 1.9702554373602548, "grad_norm": 6.800955295562744, "learning_rate": 6.12184269970566e-05, "loss": 0.6041, "step": 29079 }, { "epoch": 1.9703231926282267, "grad_norm": 5.515408039093018, "learning_rate": 6.121705797795879e-05, "loss": 0.5926, "step": 29080 }, { "epoch": 1.970390947896199, "grad_norm": 6.310666084289551, "learning_rate": 6.121568895886098e-05, "loss": 0.5326, "step": 29081 }, { "epoch": 1.9704587031641712, "grad_norm": 5.113640308380127, "learning_rate": 6.121431993976316e-05, "loss": 0.8384, "step": 29082 }, { "epoch": 1.9705264584321431, "grad_norm": 5.180152416229248, "learning_rate": 6.121295092066534e-05, "loss": 0.636, "step": 29083 }, { "epoch": 1.9705942137001151, "grad_norm": 5.768306732177734, "learning_rate": 6.121158190156753e-05, "loss": 0.7534, "step": 29084 }, { "epoch": 1.9706619689680873, "grad_norm": 5.877782344818115, "learning_rate": 6.121021288246971e-05, "loss": 0.5127, "step": 29085 }, { "epoch": 1.9707297242360593, "grad_norm": 5.022743225097656, "learning_rate": 6.12088438633719e-05, "loss": 0.5219, "step": 29086 }, { "epoch": 1.9707974795040313, "grad_norm": 6.975640773773193, "learning_rate": 6.120747484427409e-05, "loss": 0.4574, "step": 29087 }, { "epoch": 1.9708652347720035, "grad_norm": 5.373055934906006, "learning_rate": 6.120610582517627e-05, "loss": 0.716, "step": 29088 }, { "epoch": 1.9709329900399757, "grad_norm": 6.908960342407227, "learning_rate": 6.120473680607845e-05, "loss": 0.6069, "step": 29089 }, { "epoch": 1.9710007453079477, "grad_norm": 5.559511661529541, "learning_rate": 6.120336778698064e-05, "loss": 0.742, "step": 29090 }, { "epoch": 1.9710685005759196, "grad_norm": 4.668619155883789, "learning_rate": 6.120199876788282e-05, "loss": 0.5867, "step": 29091 }, { "epoch": 1.9711362558438918, "grad_norm": 5.993563652038574, "learning_rate": 6.1200629748785e-05, "loss": 0.7798, "step": 29092 }, { "epoch": 1.971204011111864, "grad_norm": 7.855579853057861, "learning_rate": 6.119926072968718e-05, "loss": 0.604, "step": 29093 }, { "epoch": 1.971271766379836, "grad_norm": 5.290358543395996, "learning_rate": 6.119789171058936e-05, "loss": 0.6264, "step": 29094 }, { "epoch": 1.971339521647808, "grad_norm": 11.625998497009277, "learning_rate": 6.119652269149156e-05, "loss": 0.4847, "step": 29095 }, { "epoch": 1.9714072769157802, "grad_norm": 7.166466236114502, "learning_rate": 6.119515367239374e-05, "loss": 0.4949, "step": 29096 }, { "epoch": 1.9714750321837524, "grad_norm": 6.266848087310791, "learning_rate": 6.119378465329592e-05, "loss": 0.5583, "step": 29097 }, { "epoch": 1.9715427874517244, "grad_norm": 4.791106700897217, "learning_rate": 6.11924156341981e-05, "loss": 0.6493, "step": 29098 }, { "epoch": 1.9716105427196964, "grad_norm": 10.213676452636719, "learning_rate": 6.119104661510028e-05, "loss": 0.7433, "step": 29099 }, { "epoch": 1.9716782979876686, "grad_norm": 7.391441822052002, "learning_rate": 6.118967759600247e-05, "loss": 0.5539, "step": 29100 }, { "epoch": 1.9717460532556408, "grad_norm": 6.06369161605835, "learning_rate": 6.118830857690465e-05, "loss": 0.7236, "step": 29101 }, { "epoch": 1.9718138085236128, "grad_norm": 5.83076810836792, "learning_rate": 6.118693955780683e-05, "loss": 0.6266, "step": 29102 }, { "epoch": 1.9718815637915847, "grad_norm": 5.446613311767578, "learning_rate": 6.118557053870901e-05, "loss": 0.7851, "step": 29103 }, { "epoch": 1.971949319059557, "grad_norm": 4.595570087432861, "learning_rate": 6.118420151961121e-05, "loss": 0.5, "step": 29104 }, { "epoch": 1.972017074327529, "grad_norm": 5.581231117248535, "learning_rate": 6.118283250051339e-05, "loss": 0.833, "step": 29105 }, { "epoch": 1.972084829595501, "grad_norm": 4.57436466217041, "learning_rate": 6.118146348141557e-05, "loss": 0.5918, "step": 29106 }, { "epoch": 1.9721525848634731, "grad_norm": 6.772485256195068, "learning_rate": 6.118009446231775e-05, "loss": 0.5781, "step": 29107 }, { "epoch": 1.9722203401314453, "grad_norm": 5.762423992156982, "learning_rate": 6.117872544321993e-05, "loss": 0.7979, "step": 29108 }, { "epoch": 1.9722880953994173, "grad_norm": 4.854193687438965, "learning_rate": 6.117735642412212e-05, "loss": 0.5735, "step": 29109 }, { "epoch": 1.9723558506673893, "grad_norm": 5.683525085449219, "learning_rate": 6.11759874050243e-05, "loss": 0.4926, "step": 29110 }, { "epoch": 1.9724236059353615, "grad_norm": 5.574082374572754, "learning_rate": 6.117461838592648e-05, "loss": 0.6427, "step": 29111 }, { "epoch": 1.9724913612033337, "grad_norm": 4.221299171447754, "learning_rate": 6.117324936682866e-05, "loss": 0.53, "step": 29112 }, { "epoch": 1.9725591164713057, "grad_norm": 4.3060994148254395, "learning_rate": 6.117188034773086e-05, "loss": 0.5906, "step": 29113 }, { "epoch": 1.9726268717392776, "grad_norm": 3.809828519821167, "learning_rate": 6.117051132863304e-05, "loss": 0.545, "step": 29114 }, { "epoch": 1.9726946270072498, "grad_norm": 5.8677191734313965, "learning_rate": 6.116914230953522e-05, "loss": 0.7414, "step": 29115 }, { "epoch": 1.972762382275222, "grad_norm": 6.821242809295654, "learning_rate": 6.11677732904374e-05, "loss": 0.7761, "step": 29116 }, { "epoch": 1.972830137543194, "grad_norm": 6.918764591217041, "learning_rate": 6.116640427133958e-05, "loss": 0.76, "step": 29117 }, { "epoch": 1.972897892811166, "grad_norm": 4.7809319496154785, "learning_rate": 6.116503525224177e-05, "loss": 0.5887, "step": 29118 }, { "epoch": 1.9729656480791382, "grad_norm": 5.96099328994751, "learning_rate": 6.116366623314395e-05, "loss": 0.7916, "step": 29119 }, { "epoch": 1.9730334033471102, "grad_norm": 7.8772125244140625, "learning_rate": 6.116229721404613e-05, "loss": 0.5158, "step": 29120 }, { "epoch": 1.9731011586150822, "grad_norm": 5.068696975708008, "learning_rate": 6.116092819494831e-05, "loss": 0.6552, "step": 29121 }, { "epoch": 1.9731689138830544, "grad_norm": 6.544254302978516, "learning_rate": 6.115955917585051e-05, "loss": 0.7004, "step": 29122 }, { "epoch": 1.9732366691510266, "grad_norm": 5.0695600509643555, "learning_rate": 6.115819015675269e-05, "loss": 0.6308, "step": 29123 }, { "epoch": 1.9733044244189986, "grad_norm": 9.45584487915039, "learning_rate": 6.115682113765487e-05, "loss": 0.5774, "step": 29124 }, { "epoch": 1.9733721796869705, "grad_norm": 7.496036529541016, "learning_rate": 6.115545211855705e-05, "loss": 0.7171, "step": 29125 }, { "epoch": 1.9734399349549427, "grad_norm": 4.8124542236328125, "learning_rate": 6.115408309945923e-05, "loss": 0.5858, "step": 29126 }, { "epoch": 1.973507690222915, "grad_norm": 7.2567548751831055, "learning_rate": 6.115271408036142e-05, "loss": 0.5624, "step": 29127 }, { "epoch": 1.973575445490887, "grad_norm": 4.965682506561279, "learning_rate": 6.11513450612636e-05, "loss": 0.501, "step": 29128 }, { "epoch": 1.973643200758859, "grad_norm": 4.368419170379639, "learning_rate": 6.114997604216578e-05, "loss": 0.4613, "step": 29129 }, { "epoch": 1.973710956026831, "grad_norm": 5.586247444152832, "learning_rate": 6.114860702306798e-05, "loss": 0.5976, "step": 29130 }, { "epoch": 1.9737787112948033, "grad_norm": 4.455949306488037, "learning_rate": 6.114723800397016e-05, "loss": 0.6686, "step": 29131 }, { "epoch": 1.9738464665627753, "grad_norm": 8.187183380126953, "learning_rate": 6.114586898487234e-05, "loss": 0.4631, "step": 29132 }, { "epoch": 1.9739142218307473, "grad_norm": 7.660210132598877, "learning_rate": 6.114449996577453e-05, "loss": 0.4972, "step": 29133 }, { "epoch": 1.9739819770987195, "grad_norm": 5.718005657196045, "learning_rate": 6.114313094667671e-05, "loss": 0.5247, "step": 29134 }, { "epoch": 1.9740497323666915, "grad_norm": 5.195508003234863, "learning_rate": 6.11417619275789e-05, "loss": 0.5135, "step": 29135 }, { "epoch": 1.9741174876346634, "grad_norm": 4.406049728393555, "learning_rate": 6.114039290848109e-05, "loss": 0.5905, "step": 29136 }, { "epoch": 1.9741852429026356, "grad_norm": 9.305685043334961, "learning_rate": 6.113902388938327e-05, "loss": 0.7646, "step": 29137 }, { "epoch": 1.9742529981706078, "grad_norm": 10.103351593017578, "learning_rate": 6.113765487028545e-05, "loss": 0.7094, "step": 29138 }, { "epoch": 1.9743207534385798, "grad_norm": 6.384303569793701, "learning_rate": 6.113628585118763e-05, "loss": 0.6003, "step": 29139 }, { "epoch": 1.9743885087065518, "grad_norm": 6.301858425140381, "learning_rate": 6.113491683208981e-05, "loss": 0.7834, "step": 29140 }, { "epoch": 1.974456263974524, "grad_norm": 4.316931247711182, "learning_rate": 6.1133547812992e-05, "loss": 0.5206, "step": 29141 }, { "epoch": 1.9745240192424962, "grad_norm": 4.4475836753845215, "learning_rate": 6.113217879389418e-05, "loss": 0.3805, "step": 29142 }, { "epoch": 1.9745917745104682, "grad_norm": 5.714684963226318, "learning_rate": 6.113080977479636e-05, "loss": 0.4544, "step": 29143 }, { "epoch": 1.9746595297784402, "grad_norm": 7.544383525848389, "learning_rate": 6.112944075569854e-05, "loss": 0.6437, "step": 29144 }, { "epoch": 1.9747272850464124, "grad_norm": 5.403080940246582, "learning_rate": 6.112807173660074e-05, "loss": 0.7203, "step": 29145 }, { "epoch": 1.9747950403143846, "grad_norm": 9.630719184875488, "learning_rate": 6.112670271750292e-05, "loss": 0.6454, "step": 29146 }, { "epoch": 1.9748627955823566, "grad_norm": 6.431058406829834, "learning_rate": 6.11253336984051e-05, "loss": 0.6069, "step": 29147 }, { "epoch": 1.9749305508503285, "grad_norm": 7.027250289916992, "learning_rate": 6.112396467930728e-05, "loss": 0.6836, "step": 29148 }, { "epoch": 1.9749983061183007, "grad_norm": 5.682072162628174, "learning_rate": 6.112259566020946e-05, "loss": 0.5951, "step": 29149 }, { "epoch": 1.975066061386273, "grad_norm": 5.594546794891357, "learning_rate": 6.112122664111165e-05, "loss": 0.5401, "step": 29150 }, { "epoch": 1.975133816654245, "grad_norm": 7.0353593826293945, "learning_rate": 6.111985762201383e-05, "loss": 0.77, "step": 29151 }, { "epoch": 1.975201571922217, "grad_norm": 6.079371452331543, "learning_rate": 6.111848860291601e-05, "loss": 0.5504, "step": 29152 }, { "epoch": 1.975269327190189, "grad_norm": 6.796693325042725, "learning_rate": 6.11171195838182e-05, "loss": 0.8217, "step": 29153 }, { "epoch": 1.975337082458161, "grad_norm": 4.1710028648376465, "learning_rate": 6.111575056472037e-05, "loss": 0.5693, "step": 29154 }, { "epoch": 1.975404837726133, "grad_norm": 4.595174312591553, "learning_rate": 6.111438154562257e-05, "loss": 0.5511, "step": 29155 }, { "epoch": 1.9754725929941053, "grad_norm": 7.055856227874756, "learning_rate": 6.111301252652475e-05, "loss": 0.5781, "step": 29156 }, { "epoch": 1.9755403482620775, "grad_norm": 5.3448309898376465, "learning_rate": 6.111164350742693e-05, "loss": 0.6755, "step": 29157 }, { "epoch": 1.9756081035300495, "grad_norm": 5.585182189941406, "learning_rate": 6.111027448832911e-05, "loss": 0.5967, "step": 29158 }, { "epoch": 1.9756758587980214, "grad_norm": 5.3608784675598145, "learning_rate": 6.11089054692313e-05, "loss": 0.4367, "step": 29159 }, { "epoch": 1.9757436140659936, "grad_norm": 5.268752098083496, "learning_rate": 6.110753645013348e-05, "loss": 0.8278, "step": 29160 }, { "epoch": 1.9758113693339658, "grad_norm": 7.872457504272461, "learning_rate": 6.110616743103566e-05, "loss": 0.5777, "step": 29161 }, { "epoch": 1.9758791246019378, "grad_norm": 6.554051876068115, "learning_rate": 6.110479841193784e-05, "loss": 0.4972, "step": 29162 }, { "epoch": 1.9759468798699098, "grad_norm": 3.708975076675415, "learning_rate": 6.110342939284002e-05, "loss": 0.4514, "step": 29163 }, { "epoch": 1.976014635137882, "grad_norm": 10.670190811157227, "learning_rate": 6.110206037374222e-05, "loss": 0.7938, "step": 29164 }, { "epoch": 1.9760823904058542, "grad_norm": 5.527973175048828, "learning_rate": 6.11006913546444e-05, "loss": 0.37, "step": 29165 }, { "epoch": 1.9761501456738262, "grad_norm": 4.663074970245361, "learning_rate": 6.109932233554658e-05, "loss": 0.619, "step": 29166 }, { "epoch": 1.9762179009417982, "grad_norm": 8.066825866699219, "learning_rate": 6.109795331644876e-05, "loss": 0.5881, "step": 29167 }, { "epoch": 1.9762856562097704, "grad_norm": 6.406994342803955, "learning_rate": 6.109658429735095e-05, "loss": 0.5421, "step": 29168 }, { "epoch": 1.9763534114777424, "grad_norm": 6.178088665008545, "learning_rate": 6.109521527825313e-05, "loss": 0.7933, "step": 29169 }, { "epoch": 1.9764211667457143, "grad_norm": 4.954931259155273, "learning_rate": 6.109384625915531e-05, "loss": 0.5842, "step": 29170 }, { "epoch": 1.9764889220136865, "grad_norm": 6.357505798339844, "learning_rate": 6.10924772400575e-05, "loss": 0.6318, "step": 29171 }, { "epoch": 1.9765566772816587, "grad_norm": 8.971858978271484, "learning_rate": 6.109110822095967e-05, "loss": 0.604, "step": 29172 }, { "epoch": 1.9766244325496307, "grad_norm": 5.59404993057251, "learning_rate": 6.108973920186187e-05, "loss": 0.5759, "step": 29173 }, { "epoch": 1.9766921878176027, "grad_norm": 3.9462108612060547, "learning_rate": 6.108837018276405e-05, "loss": 0.703, "step": 29174 }, { "epoch": 1.976759943085575, "grad_norm": 6.318485736846924, "learning_rate": 6.108700116366623e-05, "loss": 0.7048, "step": 29175 }, { "epoch": 1.976827698353547, "grad_norm": 5.466614246368408, "learning_rate": 6.108563214456842e-05, "loss": 0.5246, "step": 29176 }, { "epoch": 1.976895453621519, "grad_norm": 5.598781585693359, "learning_rate": 6.10842631254706e-05, "loss": 0.7905, "step": 29177 }, { "epoch": 1.976963208889491, "grad_norm": 6.144973278045654, "learning_rate": 6.108289410637278e-05, "loss": 0.7029, "step": 29178 }, { "epoch": 1.9770309641574633, "grad_norm": 5.156154632568359, "learning_rate": 6.108152508727498e-05, "loss": 0.6175, "step": 29179 }, { "epoch": 1.9770987194254355, "grad_norm": 6.548519134521484, "learning_rate": 6.108015606817716e-05, "loss": 0.5728, "step": 29180 }, { "epoch": 1.9771664746934074, "grad_norm": 4.843504428863525, "learning_rate": 6.107878704907934e-05, "loss": 0.6404, "step": 29181 }, { "epoch": 1.9772342299613794, "grad_norm": 4.773132801055908, "learning_rate": 6.107741802998153e-05, "loss": 0.7727, "step": 29182 }, { "epoch": 1.9773019852293516, "grad_norm": 6.03572940826416, "learning_rate": 6.107604901088371e-05, "loss": 0.8244, "step": 29183 }, { "epoch": 1.9773697404973236, "grad_norm": 7.507187843322754, "learning_rate": 6.107467999178589e-05, "loss": 0.6138, "step": 29184 }, { "epoch": 1.9774374957652956, "grad_norm": 8.291000366210938, "learning_rate": 6.107331097268807e-05, "loss": 0.5802, "step": 29185 }, { "epoch": 1.9775052510332678, "grad_norm": 6.824193000793457, "learning_rate": 6.107194195359025e-05, "loss": 0.7046, "step": 29186 }, { "epoch": 1.97757300630124, "grad_norm": 5.519742488861084, "learning_rate": 6.107057293449245e-05, "loss": 0.6461, "step": 29187 }, { "epoch": 1.977640761569212, "grad_norm": 3.898591995239258, "learning_rate": 6.106920391539463e-05, "loss": 0.504, "step": 29188 }, { "epoch": 1.977708516837184, "grad_norm": 6.527244567871094, "learning_rate": 6.106783489629681e-05, "loss": 0.7215, "step": 29189 }, { "epoch": 1.9777762721051562, "grad_norm": 5.34447717666626, "learning_rate": 6.106646587719899e-05, "loss": 0.6549, "step": 29190 }, { "epoch": 1.9778440273731284, "grad_norm": 5.403507709503174, "learning_rate": 6.106509685810118e-05, "loss": 0.5068, "step": 29191 }, { "epoch": 1.9779117826411003, "grad_norm": 5.637250900268555, "learning_rate": 6.106372783900336e-05, "loss": 0.6938, "step": 29192 }, { "epoch": 1.9779795379090723, "grad_norm": 5.247925281524658, "learning_rate": 6.106235881990554e-05, "loss": 0.5856, "step": 29193 }, { "epoch": 1.9780472931770445, "grad_norm": 6.183668613433838, "learning_rate": 6.106098980080772e-05, "loss": 0.5885, "step": 29194 }, { "epoch": 1.9781150484450167, "grad_norm": 5.017308235168457, "learning_rate": 6.10596207817099e-05, "loss": 0.614, "step": 29195 }, { "epoch": 1.9781828037129887, "grad_norm": 7.671649932861328, "learning_rate": 6.10582517626121e-05, "loss": 0.4249, "step": 29196 }, { "epoch": 1.9782505589809607, "grad_norm": 6.845453262329102, "learning_rate": 6.105688274351428e-05, "loss": 0.7515, "step": 29197 }, { "epoch": 1.978318314248933, "grad_norm": 8.472447395324707, "learning_rate": 6.105551372441646e-05, "loss": 0.8222, "step": 29198 }, { "epoch": 1.978386069516905, "grad_norm": 5.762235641479492, "learning_rate": 6.105414470531864e-05, "loss": 0.7827, "step": 29199 }, { "epoch": 1.9784538247848769, "grad_norm": 8.070672988891602, "learning_rate": 6.105277568622083e-05, "loss": 0.7289, "step": 29200 }, { "epoch": 1.978521580052849, "grad_norm": 5.931797981262207, "learning_rate": 6.105140666712301e-05, "loss": 0.5611, "step": 29201 }, { "epoch": 1.9785893353208213, "grad_norm": 4.924221515655518, "learning_rate": 6.10500376480252e-05, "loss": 0.5137, "step": 29202 }, { "epoch": 1.9786570905887932, "grad_norm": 4.802031517028809, "learning_rate": 6.104866862892737e-05, "loss": 0.6611, "step": 29203 }, { "epoch": 1.9787248458567652, "grad_norm": 5.251140594482422, "learning_rate": 6.104729960982955e-05, "loss": 0.7273, "step": 29204 }, { "epoch": 1.9787926011247374, "grad_norm": 5.971168041229248, "learning_rate": 6.104593059073175e-05, "loss": 0.6423, "step": 29205 }, { "epoch": 1.9788603563927096, "grad_norm": 6.108429431915283, "learning_rate": 6.104456157163393e-05, "loss": 0.5271, "step": 29206 }, { "epoch": 1.9789281116606816, "grad_norm": 6.793957233428955, "learning_rate": 6.104319255253611e-05, "loss": 0.7571, "step": 29207 }, { "epoch": 1.9789958669286536, "grad_norm": 5.661201000213623, "learning_rate": 6.104182353343829e-05, "loss": 0.6512, "step": 29208 }, { "epoch": 1.9790636221966258, "grad_norm": 7.303994178771973, "learning_rate": 6.104045451434047e-05, "loss": 0.5725, "step": 29209 }, { "epoch": 1.979131377464598, "grad_norm": 5.280054092407227, "learning_rate": 6.103908549524266e-05, "loss": 0.635, "step": 29210 }, { "epoch": 1.97919913273257, "grad_norm": 5.315549373626709, "learning_rate": 6.103771647614484e-05, "loss": 0.6626, "step": 29211 }, { "epoch": 1.979266888000542, "grad_norm": 5.132471561431885, "learning_rate": 6.103634745704702e-05, "loss": 0.7256, "step": 29212 }, { "epoch": 1.9793346432685142, "grad_norm": 7.8937530517578125, "learning_rate": 6.103497843794921e-05, "loss": 0.7582, "step": 29213 }, { "epoch": 1.9794023985364864, "grad_norm": 7.379942417144775, "learning_rate": 6.10336094188514e-05, "loss": 0.6258, "step": 29214 }, { "epoch": 1.9794701538044583, "grad_norm": 5.588009357452393, "learning_rate": 6.103224039975358e-05, "loss": 0.7745, "step": 29215 }, { "epoch": 1.9795379090724303, "grad_norm": 5.918999195098877, "learning_rate": 6.1030871380655766e-05, "loss": 0.6559, "step": 29216 }, { "epoch": 1.9796056643404025, "grad_norm": 6.166866302490234, "learning_rate": 6.1029502361557946e-05, "loss": 0.6223, "step": 29217 }, { "epoch": 1.9796734196083745, "grad_norm": 4.456559658050537, "learning_rate": 6.1028133342460126e-05, "loss": 0.6503, "step": 29218 }, { "epoch": 1.9797411748763465, "grad_norm": 8.1209716796875, "learning_rate": 6.102676432336232e-05, "loss": 0.6926, "step": 29219 }, { "epoch": 1.9798089301443187, "grad_norm": 8.937030792236328, "learning_rate": 6.10253953042645e-05, "loss": 0.816, "step": 29220 }, { "epoch": 1.979876685412291, "grad_norm": 4.560615539550781, "learning_rate": 6.102402628516668e-05, "loss": 0.5007, "step": 29221 }, { "epoch": 1.9799444406802629, "grad_norm": 4.467002868652344, "learning_rate": 6.102265726606886e-05, "loss": 0.4402, "step": 29222 }, { "epoch": 1.9800121959482349, "grad_norm": 4.761358261108398, "learning_rate": 6.1021288246971055e-05, "loss": 0.5876, "step": 29223 }, { "epoch": 1.980079951216207, "grad_norm": 8.711389541625977, "learning_rate": 6.1019919227873235e-05, "loss": 0.6724, "step": 29224 }, { "epoch": 1.9801477064841793, "grad_norm": 4.563511371612549, "learning_rate": 6.1018550208775416e-05, "loss": 0.729, "step": 29225 }, { "epoch": 1.9802154617521512, "grad_norm": 5.8568830490112305, "learning_rate": 6.1017181189677596e-05, "loss": 0.5381, "step": 29226 }, { "epoch": 1.9802832170201232, "grad_norm": 5.622007846832275, "learning_rate": 6.1015812170579776e-05, "loss": 0.5929, "step": 29227 }, { "epoch": 1.9803509722880954, "grad_norm": 5.546485900878906, "learning_rate": 6.101444315148197e-05, "loss": 0.6261, "step": 29228 }, { "epoch": 1.9804187275560676, "grad_norm": 7.600739002227783, "learning_rate": 6.101307413238415e-05, "loss": 0.826, "step": 29229 }, { "epoch": 1.9804864828240396, "grad_norm": 4.219735622406006, "learning_rate": 6.101170511328633e-05, "loss": 0.5736, "step": 29230 }, { "epoch": 1.9805542380920116, "grad_norm": 7.087001800537109, "learning_rate": 6.101033609418851e-05, "loss": 0.7639, "step": 29231 }, { "epoch": 1.9806219933599838, "grad_norm": 4.191345691680908, "learning_rate": 6.100896707509069e-05, "loss": 0.5043, "step": 29232 }, { "epoch": 1.9806897486279558, "grad_norm": 4.256446361541748, "learning_rate": 6.1007598055992886e-05, "loss": 0.5711, "step": 29233 }, { "epoch": 1.9807575038959278, "grad_norm": 5.975095748901367, "learning_rate": 6.1006229036895066e-05, "loss": 0.6592, "step": 29234 }, { "epoch": 1.9808252591639, "grad_norm": 5.278697967529297, "learning_rate": 6.1004860017797246e-05, "loss": 0.5434, "step": 29235 }, { "epoch": 1.9808930144318722, "grad_norm": 7.825021743774414, "learning_rate": 6.1003490998699433e-05, "loss": 0.8694, "step": 29236 }, { "epoch": 1.9809607696998441, "grad_norm": 5.6275434494018555, "learning_rate": 6.100212197960162e-05, "loss": 0.9384, "step": 29237 }, { "epoch": 1.9810285249678161, "grad_norm": 5.599697113037109, "learning_rate": 6.10007529605038e-05, "loss": 0.6659, "step": 29238 }, { "epoch": 1.9810962802357883, "grad_norm": 7.955117702484131, "learning_rate": 6.099938394140599e-05, "loss": 0.7303, "step": 29239 }, { "epoch": 1.9811640355037605, "grad_norm": 5.457128524780273, "learning_rate": 6.099801492230817e-05, "loss": 0.6155, "step": 29240 }, { "epoch": 1.9812317907717325, "grad_norm": 6.675145149230957, "learning_rate": 6.099664590321035e-05, "loss": 0.6614, "step": 29241 }, { "epoch": 1.9812995460397045, "grad_norm": 4.7875189781188965, "learning_rate": 6.099527688411254e-05, "loss": 0.632, "step": 29242 }, { "epoch": 1.9813673013076767, "grad_norm": 5.890411853790283, "learning_rate": 6.099390786501472e-05, "loss": 0.6471, "step": 29243 }, { "epoch": 1.981435056575649, "grad_norm": 7.684047222137451, "learning_rate": 6.09925388459169e-05, "loss": 0.5744, "step": 29244 }, { "epoch": 1.9815028118436209, "grad_norm": 5.36227560043335, "learning_rate": 6.0991169826819084e-05, "loss": 0.6905, "step": 29245 }, { "epoch": 1.9815705671115929, "grad_norm": 5.775804042816162, "learning_rate": 6.098980080772128e-05, "loss": 0.5906, "step": 29246 }, { "epoch": 1.981638322379565, "grad_norm": 5.543133735656738, "learning_rate": 6.098843178862346e-05, "loss": 0.5977, "step": 29247 }, { "epoch": 1.9817060776475373, "grad_norm": 5.8936614990234375, "learning_rate": 6.098706276952564e-05, "loss": 0.6929, "step": 29248 }, { "epoch": 1.981773832915509, "grad_norm": 5.188004016876221, "learning_rate": 6.098569375042782e-05, "loss": 0.6008, "step": 29249 }, { "epoch": 1.9818415881834812, "grad_norm": 6.418683052062988, "learning_rate": 6.098432473133e-05, "loss": 0.634, "step": 29250 }, { "epoch": 1.9819093434514534, "grad_norm": 5.446265697479248, "learning_rate": 6.098295571223219e-05, "loss": 0.5886, "step": 29251 }, { "epoch": 1.9819770987194254, "grad_norm": 10.674468040466309, "learning_rate": 6.098158669313437e-05, "loss": 0.6413, "step": 29252 }, { "epoch": 1.9820448539873974, "grad_norm": 8.875579833984375, "learning_rate": 6.0980217674036553e-05, "loss": 0.8604, "step": 29253 }, { "epoch": 1.9821126092553696, "grad_norm": 7.381422996520996, "learning_rate": 6.0978848654938734e-05, "loss": 0.6703, "step": 29254 }, { "epoch": 1.9821803645233418, "grad_norm": 5.07864236831665, "learning_rate": 6.097747963584093e-05, "loss": 0.6142, "step": 29255 }, { "epoch": 1.9822481197913138, "grad_norm": 5.215816497802734, "learning_rate": 6.097611061674311e-05, "loss": 0.5586, "step": 29256 }, { "epoch": 1.9823158750592857, "grad_norm": 6.6811065673828125, "learning_rate": 6.097474159764529e-05, "loss": 0.7708, "step": 29257 }, { "epoch": 1.982383630327258, "grad_norm": 3.915214776992798, "learning_rate": 6.097337257854747e-05, "loss": 0.4863, "step": 29258 }, { "epoch": 1.9824513855952302, "grad_norm": 5.984079360961914, "learning_rate": 6.0972003559449656e-05, "loss": 0.8194, "step": 29259 }, { "epoch": 1.9825191408632021, "grad_norm": 6.563571929931641, "learning_rate": 6.097063454035184e-05, "loss": 0.7449, "step": 29260 }, { "epoch": 1.9825868961311741, "grad_norm": 7.223334789276123, "learning_rate": 6.096926552125402e-05, "loss": 0.8122, "step": 29261 }, { "epoch": 1.9826546513991463, "grad_norm": 4.725507736206055, "learning_rate": 6.096789650215621e-05, "loss": 0.5794, "step": 29262 }, { "epoch": 1.9827224066671185, "grad_norm": 7.533315658569336, "learning_rate": 6.096652748305839e-05, "loss": 0.7101, "step": 29263 }, { "epoch": 1.9827901619350905, "grad_norm": 5.472788333892822, "learning_rate": 6.096515846396057e-05, "loss": 0.8899, "step": 29264 }, { "epoch": 1.9828579172030625, "grad_norm": 10.444723129272461, "learning_rate": 6.0963789444862765e-05, "loss": 0.5685, "step": 29265 }, { "epoch": 1.9829256724710347, "grad_norm": 5.520651817321777, "learning_rate": 6.0962420425764945e-05, "loss": 0.7229, "step": 29266 }, { "epoch": 1.9829934277390067, "grad_norm": 5.322668075561523, "learning_rate": 6.0961051406667126e-05, "loss": 0.4736, "step": 29267 }, { "epoch": 1.9830611830069786, "grad_norm": 5.071753025054932, "learning_rate": 6.0959682387569306e-05, "loss": 0.4539, "step": 29268 }, { "epoch": 1.9831289382749508, "grad_norm": 4.457190036773682, "learning_rate": 6.09583133684715e-05, "loss": 0.536, "step": 29269 }, { "epoch": 1.983196693542923, "grad_norm": 5.495970726013184, "learning_rate": 6.095694434937368e-05, "loss": 0.6112, "step": 29270 }, { "epoch": 1.983264448810895, "grad_norm": 8.851449012756348, "learning_rate": 6.095557533027586e-05, "loss": 0.5997, "step": 29271 }, { "epoch": 1.983332204078867, "grad_norm": 5.388453960418701, "learning_rate": 6.095420631117804e-05, "loss": 0.4751, "step": 29272 }, { "epoch": 1.9833999593468392, "grad_norm": 5.6391472816467285, "learning_rate": 6.095283729208022e-05, "loss": 0.6066, "step": 29273 }, { "epoch": 1.9834677146148114, "grad_norm": 4.896326065063477, "learning_rate": 6.0951468272982415e-05, "loss": 0.6187, "step": 29274 }, { "epoch": 1.9835354698827834, "grad_norm": 10.501460075378418, "learning_rate": 6.0950099253884595e-05, "loss": 0.4117, "step": 29275 }, { "epoch": 1.9836032251507554, "grad_norm": 5.536748886108398, "learning_rate": 6.0948730234786776e-05, "loss": 0.5699, "step": 29276 }, { "epoch": 1.9836709804187276, "grad_norm": 11.2087984085083, "learning_rate": 6.0947361215688956e-05, "loss": 0.6298, "step": 29277 }, { "epoch": 1.9837387356866998, "grad_norm": 5.225502014160156, "learning_rate": 6.094599219659115e-05, "loss": 0.4806, "step": 29278 }, { "epoch": 1.9838064909546718, "grad_norm": 5.214675426483154, "learning_rate": 6.094462317749333e-05, "loss": 0.5792, "step": 29279 }, { "epoch": 1.9838742462226437, "grad_norm": 6.512155055999756, "learning_rate": 6.094325415839551e-05, "loss": 0.7061, "step": 29280 }, { "epoch": 1.983942001490616, "grad_norm": 6.108458518981934, "learning_rate": 6.094188513929769e-05, "loss": 0.6067, "step": 29281 }, { "epoch": 1.984009756758588, "grad_norm": 5.490647315979004, "learning_rate": 6.094051612019988e-05, "loss": 0.7807, "step": 29282 }, { "epoch": 1.98407751202656, "grad_norm": 5.583939552307129, "learning_rate": 6.0939147101102065e-05, "loss": 0.6718, "step": 29283 }, { "epoch": 1.9841452672945321, "grad_norm": 5.866552829742432, "learning_rate": 6.0937778082004246e-05, "loss": 0.7879, "step": 29284 }, { "epoch": 1.9842130225625043, "grad_norm": 6.884492874145508, "learning_rate": 6.093640906290643e-05, "loss": 0.7827, "step": 29285 }, { "epoch": 1.9842807778304763, "grad_norm": 5.867642402648926, "learning_rate": 6.093504004380861e-05, "loss": 0.6619, "step": 29286 }, { "epoch": 1.9843485330984483, "grad_norm": 6.681375503540039, "learning_rate": 6.0933671024710793e-05, "loss": 0.6566, "step": 29287 }, { "epoch": 1.9844162883664205, "grad_norm": 12.13077163696289, "learning_rate": 6.093230200561299e-05, "loss": 0.7489, "step": 29288 }, { "epoch": 1.9844840436343927, "grad_norm": 5.143853187561035, "learning_rate": 6.093093298651517e-05, "loss": 0.6509, "step": 29289 }, { "epoch": 1.9845517989023647, "grad_norm": 6.225203990936279, "learning_rate": 6.092956396741735e-05, "loss": 0.4678, "step": 29290 }, { "epoch": 1.9846195541703366, "grad_norm": 5.9019975662231445, "learning_rate": 6.092819494831953e-05, "loss": 0.6421, "step": 29291 }, { "epoch": 1.9846873094383088, "grad_norm": 8.58917236328125, "learning_rate": 6.092682592922172e-05, "loss": 0.7143, "step": 29292 }, { "epoch": 1.984755064706281, "grad_norm": 5.4592719078063965, "learning_rate": 6.09254569101239e-05, "loss": 0.6401, "step": 29293 }, { "epoch": 1.984822819974253, "grad_norm": 5.9085869789123535, "learning_rate": 6.092408789102608e-05, "loss": 0.6382, "step": 29294 }, { "epoch": 1.984890575242225, "grad_norm": 6.807305335998535, "learning_rate": 6.092271887192826e-05, "loss": 0.6466, "step": 29295 }, { "epoch": 1.9849583305101972, "grad_norm": 5.443212985992432, "learning_rate": 6.0921349852830444e-05, "loss": 0.7593, "step": 29296 }, { "epoch": 1.9850260857781694, "grad_norm": 6.156885623931885, "learning_rate": 6.091998083373264e-05, "loss": 0.6004, "step": 29297 }, { "epoch": 1.9850938410461412, "grad_norm": 8.625861167907715, "learning_rate": 6.091861181463482e-05, "loss": 0.6907, "step": 29298 }, { "epoch": 1.9851615963141134, "grad_norm": 9.0084228515625, "learning_rate": 6.0917242795537e-05, "loss": 0.6248, "step": 29299 }, { "epoch": 1.9852293515820856, "grad_norm": 5.424419403076172, "learning_rate": 6.091587377643918e-05, "loss": 0.5995, "step": 29300 }, { "epoch": 1.9852971068500576, "grad_norm": 10.027426719665527, "learning_rate": 6.091450475734137e-05, "loss": 0.4714, "step": 29301 }, { "epoch": 1.9853648621180295, "grad_norm": 5.1062188148498535, "learning_rate": 6.091313573824355e-05, "loss": 0.6059, "step": 29302 }, { "epoch": 1.9854326173860017, "grad_norm": 5.143561840057373, "learning_rate": 6.091176671914573e-05, "loss": 0.7047, "step": 29303 }, { "epoch": 1.985500372653974, "grad_norm": 6.226891994476318, "learning_rate": 6.0910397700047913e-05, "loss": 0.7069, "step": 29304 }, { "epoch": 1.985568127921946, "grad_norm": 5.539346218109131, "learning_rate": 6.09090286809501e-05, "loss": 0.7386, "step": 29305 }, { "epoch": 1.985635883189918, "grad_norm": 6.061764240264893, "learning_rate": 6.090765966185229e-05, "loss": 0.8741, "step": 29306 }, { "epoch": 1.98570363845789, "grad_norm": 6.610141277313232, "learning_rate": 6.090629064275447e-05, "loss": 0.5333, "step": 29307 }, { "epoch": 1.9857713937258623, "grad_norm": 7.0431227684021, "learning_rate": 6.0904921623656655e-05, "loss": 0.6673, "step": 29308 }, { "epoch": 1.9858391489938343, "grad_norm": 8.665759086608887, "learning_rate": 6.0903552604558835e-05, "loss": 0.4317, "step": 29309 }, { "epoch": 1.9859069042618063, "grad_norm": 4.941988468170166, "learning_rate": 6.090218358546102e-05, "loss": 0.4938, "step": 29310 }, { "epoch": 1.9859746595297785, "grad_norm": 5.111955642700195, "learning_rate": 6.090081456636321e-05, "loss": 0.6504, "step": 29311 }, { "epoch": 1.9860424147977507, "grad_norm": 5.99816370010376, "learning_rate": 6.089944554726539e-05, "loss": 0.5854, "step": 29312 }, { "epoch": 1.9861101700657227, "grad_norm": 7.296607971191406, "learning_rate": 6.089807652816757e-05, "loss": 0.5195, "step": 29313 }, { "epoch": 1.9861779253336946, "grad_norm": 6.1671142578125, "learning_rate": 6.089670750906975e-05, "loss": 0.7053, "step": 29314 }, { "epoch": 1.9862456806016668, "grad_norm": 4.67803955078125, "learning_rate": 6.0895338489971945e-05, "loss": 0.5016, "step": 29315 }, { "epoch": 1.9863134358696388, "grad_norm": 6.748191833496094, "learning_rate": 6.0893969470874125e-05, "loss": 0.7484, "step": 29316 }, { "epoch": 1.9863811911376108, "grad_norm": 5.843008995056152, "learning_rate": 6.0892600451776305e-05, "loss": 0.4773, "step": 29317 }, { "epoch": 1.986448946405583, "grad_norm": 7.7899041175842285, "learning_rate": 6.0891231432678486e-05, "loss": 0.5035, "step": 29318 }, { "epoch": 1.9865167016735552, "grad_norm": 8.380123138427734, "learning_rate": 6.0889862413580666e-05, "loss": 0.7831, "step": 29319 }, { "epoch": 1.9865844569415272, "grad_norm": 5.251950263977051, "learning_rate": 6.088849339448286e-05, "loss": 0.5802, "step": 29320 }, { "epoch": 1.9866522122094992, "grad_norm": 4.617541313171387, "learning_rate": 6.088712437538504e-05, "loss": 0.5667, "step": 29321 }, { "epoch": 1.9867199674774714, "grad_norm": 5.5353169441223145, "learning_rate": 6.088575535628722e-05, "loss": 0.7037, "step": 29322 }, { "epoch": 1.9867877227454436, "grad_norm": 5.650306701660156, "learning_rate": 6.08843863371894e-05, "loss": 0.5484, "step": 29323 }, { "epoch": 1.9868554780134156, "grad_norm": 5.2462005615234375, "learning_rate": 6.0883017318091595e-05, "loss": 0.6353, "step": 29324 }, { "epoch": 1.9869232332813875, "grad_norm": 8.117757797241211, "learning_rate": 6.0881648298993775e-05, "loss": 0.4884, "step": 29325 }, { "epoch": 1.9869909885493597, "grad_norm": 8.640921592712402, "learning_rate": 6.0880279279895955e-05, "loss": 0.5432, "step": 29326 }, { "epoch": 1.987058743817332, "grad_norm": 4.557378768920898, "learning_rate": 6.0878910260798136e-05, "loss": 0.7886, "step": 29327 }, { "epoch": 1.987126499085304, "grad_norm": 4.910999774932861, "learning_rate": 6.087754124170032e-05, "loss": 0.5803, "step": 29328 }, { "epoch": 1.987194254353276, "grad_norm": 9.217652320861816, "learning_rate": 6.087617222260251e-05, "loss": 0.9591, "step": 29329 }, { "epoch": 1.987262009621248, "grad_norm": 9.519241333007812, "learning_rate": 6.087480320350469e-05, "loss": 0.6405, "step": 29330 }, { "epoch": 1.98732976488922, "grad_norm": 8.052745819091797, "learning_rate": 6.087343418440688e-05, "loss": 0.6577, "step": 29331 }, { "epoch": 1.987397520157192, "grad_norm": 5.580687046051025, "learning_rate": 6.087206516530906e-05, "loss": 0.5855, "step": 29332 }, { "epoch": 1.9874652754251643, "grad_norm": 4.678225040435791, "learning_rate": 6.0870696146211245e-05, "loss": 0.5354, "step": 29333 }, { "epoch": 1.9875330306931365, "grad_norm": 7.269389629364014, "learning_rate": 6.086932712711343e-05, "loss": 0.7457, "step": 29334 }, { "epoch": 1.9876007859611085, "grad_norm": 7.884680271148682, "learning_rate": 6.086795810801561e-05, "loss": 0.64, "step": 29335 }, { "epoch": 1.9876685412290804, "grad_norm": 5.23255729675293, "learning_rate": 6.086658908891779e-05, "loss": 0.7396, "step": 29336 }, { "epoch": 1.9877362964970526, "grad_norm": 6.600447654724121, "learning_rate": 6.086522006981997e-05, "loss": 0.4925, "step": 29337 }, { "epoch": 1.9878040517650248, "grad_norm": 6.306134223937988, "learning_rate": 6.086385105072217e-05, "loss": 0.6893, "step": 29338 }, { "epoch": 1.9878718070329968, "grad_norm": 7.071653842926025, "learning_rate": 6.086248203162435e-05, "loss": 0.781, "step": 29339 }, { "epoch": 1.9879395623009688, "grad_norm": 4.45372200012207, "learning_rate": 6.086111301252653e-05, "loss": 0.5989, "step": 29340 }, { "epoch": 1.988007317568941, "grad_norm": 10.462879180908203, "learning_rate": 6.085974399342871e-05, "loss": 0.4985, "step": 29341 }, { "epoch": 1.9880750728369132, "grad_norm": 4.253630638122559, "learning_rate": 6.085837497433089e-05, "loss": 0.5699, "step": 29342 }, { "epoch": 1.9881428281048852, "grad_norm": 5.947275161743164, "learning_rate": 6.085700595523308e-05, "loss": 0.835, "step": 29343 }, { "epoch": 1.9882105833728572, "grad_norm": 5.026688575744629, "learning_rate": 6.085563693613526e-05, "loss": 0.7419, "step": 29344 }, { "epoch": 1.9882783386408294, "grad_norm": 5.879662990570068, "learning_rate": 6.085426791703744e-05, "loss": 0.7447, "step": 29345 }, { "epoch": 1.9883460939088016, "grad_norm": 5.576076030731201, "learning_rate": 6.085289889793962e-05, "loss": 0.6571, "step": 29346 }, { "epoch": 1.9884138491767733, "grad_norm": 5.002601146697998, "learning_rate": 6.085152987884182e-05, "loss": 0.5342, "step": 29347 }, { "epoch": 1.9884816044447455, "grad_norm": 5.968959808349609, "learning_rate": 6.0850160859744e-05, "loss": 0.7129, "step": 29348 }, { "epoch": 1.9885493597127177, "grad_norm": 6.5818681716918945, "learning_rate": 6.084879184064618e-05, "loss": 0.6763, "step": 29349 }, { "epoch": 1.9886171149806897, "grad_norm": 6.747228145599365, "learning_rate": 6.084742282154836e-05, "loss": 0.8433, "step": 29350 }, { "epoch": 1.9886848702486617, "grad_norm": 6.111175537109375, "learning_rate": 6.0846053802450545e-05, "loss": 0.7719, "step": 29351 }, { "epoch": 1.988752625516634, "grad_norm": 6.864616870880127, "learning_rate": 6.084468478335273e-05, "loss": 0.6203, "step": 29352 }, { "epoch": 1.988820380784606, "grad_norm": 4.756804943084717, "learning_rate": 6.084331576425491e-05, "loss": 0.6553, "step": 29353 }, { "epoch": 1.988888136052578, "grad_norm": 5.672853946685791, "learning_rate": 6.08419467451571e-05, "loss": 0.7022, "step": 29354 }, { "epoch": 1.98895589132055, "grad_norm": 8.924792289733887, "learning_rate": 6.084057772605928e-05, "loss": 0.5313, "step": 29355 }, { "epoch": 1.9890236465885223, "grad_norm": 4.992954254150391, "learning_rate": 6.083920870696147e-05, "loss": 0.5712, "step": 29356 }, { "epoch": 1.9890914018564945, "grad_norm": 5.769439220428467, "learning_rate": 6.0837839687863654e-05, "loss": 0.7222, "step": 29357 }, { "epoch": 1.9891591571244664, "grad_norm": 9.371283531188965, "learning_rate": 6.0836470668765835e-05, "loss": 0.6196, "step": 29358 }, { "epoch": 1.9892269123924384, "grad_norm": 4.97420597076416, "learning_rate": 6.0835101649668015e-05, "loss": 0.7648, "step": 29359 }, { "epoch": 1.9892946676604106, "grad_norm": 6.345842361450195, "learning_rate": 6.0833732630570195e-05, "loss": 0.699, "step": 29360 }, { "epoch": 1.9893624229283828, "grad_norm": 5.001991271972656, "learning_rate": 6.083236361147239e-05, "loss": 0.5697, "step": 29361 }, { "epoch": 1.9894301781963548, "grad_norm": 5.653055191040039, "learning_rate": 6.083099459237457e-05, "loss": 0.6265, "step": 29362 }, { "epoch": 1.9894979334643268, "grad_norm": 7.085868835449219, "learning_rate": 6.082962557327675e-05, "loss": 0.4482, "step": 29363 }, { "epoch": 1.989565688732299, "grad_norm": 5.462406635284424, "learning_rate": 6.082825655417893e-05, "loss": 0.5903, "step": 29364 }, { "epoch": 1.989633444000271, "grad_norm": 4.69868803024292, "learning_rate": 6.082688753508111e-05, "loss": 0.7195, "step": 29365 }, { "epoch": 1.989701199268243, "grad_norm": 7.083800792694092, "learning_rate": 6.0825518515983305e-05, "loss": 0.7987, "step": 29366 }, { "epoch": 1.9897689545362152, "grad_norm": 6.382864952087402, "learning_rate": 6.0824149496885485e-05, "loss": 0.8108, "step": 29367 }, { "epoch": 1.9898367098041874, "grad_norm": 5.505385398864746, "learning_rate": 6.0822780477787665e-05, "loss": 0.7113, "step": 29368 }, { "epoch": 1.9899044650721593, "grad_norm": 5.154223918914795, "learning_rate": 6.0821411458689846e-05, "loss": 0.5853, "step": 29369 }, { "epoch": 1.9899722203401313, "grad_norm": 5.797886848449707, "learning_rate": 6.082004243959204e-05, "loss": 0.5368, "step": 29370 }, { "epoch": 1.9900399756081035, "grad_norm": 6.80644416809082, "learning_rate": 6.081867342049422e-05, "loss": 0.6625, "step": 29371 }, { "epoch": 1.9901077308760757, "grad_norm": 5.385120391845703, "learning_rate": 6.08173044013964e-05, "loss": 0.5543, "step": 29372 }, { "epoch": 1.9901754861440477, "grad_norm": 4.737466812133789, "learning_rate": 6.081593538229858e-05, "loss": 0.5235, "step": 29373 }, { "epoch": 1.9902432414120197, "grad_norm": 8.754328727722168, "learning_rate": 6.081456636320077e-05, "loss": 0.647, "step": 29374 }, { "epoch": 1.990310996679992, "grad_norm": 8.160957336425781, "learning_rate": 6.0813197344102955e-05, "loss": 0.6809, "step": 29375 }, { "epoch": 1.990378751947964, "grad_norm": 8.9296875, "learning_rate": 6.0811828325005135e-05, "loss": 0.7064, "step": 29376 }, { "epoch": 1.990446507215936, "grad_norm": 7.88253927230835, "learning_rate": 6.081045930590732e-05, "loss": 0.8076, "step": 29377 }, { "epoch": 1.990514262483908, "grad_norm": 5.31406831741333, "learning_rate": 6.08090902868095e-05, "loss": 0.8348, "step": 29378 }, { "epoch": 1.9905820177518803, "grad_norm": 6.807018756866455, "learning_rate": 6.080772126771169e-05, "loss": 0.5522, "step": 29379 }, { "epoch": 1.9906497730198522, "grad_norm": 4.634055137634277, "learning_rate": 6.080635224861388e-05, "loss": 0.7777, "step": 29380 }, { "epoch": 1.9907175282878242, "grad_norm": 6.491459846496582, "learning_rate": 6.080498322951606e-05, "loss": 0.8025, "step": 29381 }, { "epoch": 1.9907852835557964, "grad_norm": 6.749392509460449, "learning_rate": 6.080361421041824e-05, "loss": 0.6132, "step": 29382 }, { "epoch": 1.9908530388237686, "grad_norm": 7.121930122375488, "learning_rate": 6.080224519132042e-05, "loss": 0.708, "step": 29383 }, { "epoch": 1.9909207940917406, "grad_norm": 5.439730167388916, "learning_rate": 6.080087617222261e-05, "loss": 0.7301, "step": 29384 }, { "epoch": 1.9909885493597126, "grad_norm": 6.3607025146484375, "learning_rate": 6.079950715312479e-05, "loss": 0.5789, "step": 29385 }, { "epoch": 1.9910563046276848, "grad_norm": 6.757500171661377, "learning_rate": 6.079813813402697e-05, "loss": 0.5929, "step": 29386 }, { "epoch": 1.991124059895657, "grad_norm": 5.456933975219727, "learning_rate": 6.079676911492915e-05, "loss": 0.9068, "step": 29387 }, { "epoch": 1.991191815163629, "grad_norm": 5.656543254852295, "learning_rate": 6.079540009583135e-05, "loss": 0.6714, "step": 29388 }, { "epoch": 1.991259570431601, "grad_norm": 5.5502777099609375, "learning_rate": 6.079403107673353e-05, "loss": 0.6358, "step": 29389 }, { "epoch": 1.9913273256995732, "grad_norm": 6.89990234375, "learning_rate": 6.079266205763571e-05, "loss": 0.675, "step": 29390 }, { "epoch": 1.9913950809675454, "grad_norm": 4.1465044021606445, "learning_rate": 6.079129303853789e-05, "loss": 0.5561, "step": 29391 }, { "epoch": 1.9914628362355173, "grad_norm": 5.8360595703125, "learning_rate": 6.078992401944007e-05, "loss": 0.7285, "step": 29392 }, { "epoch": 1.9915305915034893, "grad_norm": 5.9560322761535645, "learning_rate": 6.078855500034226e-05, "loss": 0.6881, "step": 29393 }, { "epoch": 1.9915983467714615, "grad_norm": 6.406257629394531, "learning_rate": 6.078718598124444e-05, "loss": 0.6743, "step": 29394 }, { "epoch": 1.9916661020394337, "grad_norm": 8.416624069213867, "learning_rate": 6.078581696214662e-05, "loss": 0.6582, "step": 29395 }, { "epoch": 1.9917338573074055, "grad_norm": 5.766854763031006, "learning_rate": 6.07844479430488e-05, "loss": 0.5185, "step": 29396 }, { "epoch": 1.9918016125753777, "grad_norm": 5.401268005371094, "learning_rate": 6.078307892395099e-05, "loss": 0.4883, "step": 29397 }, { "epoch": 1.99186936784335, "grad_norm": 6.95695686340332, "learning_rate": 6.078170990485318e-05, "loss": 0.793, "step": 29398 }, { "epoch": 1.9919371231113219, "grad_norm": 6.640393257141113, "learning_rate": 6.078034088575536e-05, "loss": 0.6764, "step": 29399 }, { "epoch": 1.9920048783792939, "grad_norm": 8.663447380065918, "learning_rate": 6.0778971866657545e-05, "loss": 0.8746, "step": 29400 }, { "epoch": 1.992072633647266, "grad_norm": 6.471493244171143, "learning_rate": 6.0777602847559725e-05, "loss": 0.7532, "step": 29401 }, { "epoch": 1.9921403889152383, "grad_norm": 5.577736854553223, "learning_rate": 6.077623382846191e-05, "loss": 0.5514, "step": 29402 }, { "epoch": 1.9922081441832102, "grad_norm": 8.605772018432617, "learning_rate": 6.07748648093641e-05, "loss": 0.6858, "step": 29403 }, { "epoch": 1.9922758994511822, "grad_norm": 4.2971601486206055, "learning_rate": 6.077349579026628e-05, "loss": 0.4911, "step": 29404 }, { "epoch": 1.9923436547191544, "grad_norm": 5.2711920738220215, "learning_rate": 6.077212677116846e-05, "loss": 0.6326, "step": 29405 }, { "epoch": 1.9924114099871266, "grad_norm": 4.5069193840026855, "learning_rate": 6.077075775207064e-05, "loss": 0.5886, "step": 29406 }, { "epoch": 1.9924791652550986, "grad_norm": 7.496578216552734, "learning_rate": 6.0769388732972834e-05, "loss": 0.7968, "step": 29407 }, { "epoch": 1.9925469205230706, "grad_norm": 3.848207473754883, "learning_rate": 6.0768019713875014e-05, "loss": 0.3802, "step": 29408 }, { "epoch": 1.9926146757910428, "grad_norm": 5.502758026123047, "learning_rate": 6.0766650694777195e-05, "loss": 0.5713, "step": 29409 }, { "epoch": 1.992682431059015, "grad_norm": 6.045882701873779, "learning_rate": 6.0765281675679375e-05, "loss": 0.7072, "step": 29410 }, { "epoch": 1.992750186326987, "grad_norm": 6.353261470794678, "learning_rate": 6.076391265658157e-05, "loss": 0.6166, "step": 29411 }, { "epoch": 1.992817941594959, "grad_norm": 4.109040260314941, "learning_rate": 6.076254363748375e-05, "loss": 0.6267, "step": 29412 }, { "epoch": 1.9928856968629312, "grad_norm": 4.570509910583496, "learning_rate": 6.076117461838593e-05, "loss": 0.7134, "step": 29413 }, { "epoch": 1.9929534521309031, "grad_norm": 5.379923343658447, "learning_rate": 6.075980559928811e-05, "loss": 0.6635, "step": 29414 }, { "epoch": 1.9930212073988751, "grad_norm": 5.416598796844482, "learning_rate": 6.075843658019029e-05, "loss": 0.6777, "step": 29415 }, { "epoch": 1.9930889626668473, "grad_norm": 5.6916937828063965, "learning_rate": 6.0757067561092484e-05, "loss": 0.6013, "step": 29416 }, { "epoch": 1.9931567179348195, "grad_norm": 5.379004001617432, "learning_rate": 6.0755698541994665e-05, "loss": 0.6345, "step": 29417 }, { "epoch": 1.9932244732027915, "grad_norm": 6.295782089233398, "learning_rate": 6.0754329522896845e-05, "loss": 0.5697, "step": 29418 }, { "epoch": 1.9932922284707635, "grad_norm": 6.223413944244385, "learning_rate": 6.0752960503799025e-05, "loss": 0.6019, "step": 29419 }, { "epoch": 1.9933599837387357, "grad_norm": 12.508454322814941, "learning_rate": 6.075159148470121e-05, "loss": 0.6164, "step": 29420 }, { "epoch": 1.9934277390067079, "grad_norm": 7.4256367683410645, "learning_rate": 6.07502224656034e-05, "loss": 0.73, "step": 29421 }, { "epoch": 1.9934954942746799, "grad_norm": 7.593201160430908, "learning_rate": 6.074885344650558e-05, "loss": 0.6345, "step": 29422 }, { "epoch": 1.9935632495426518, "grad_norm": 5.019646167755127, "learning_rate": 6.074748442740777e-05, "loss": 0.5133, "step": 29423 }, { "epoch": 1.993631004810624, "grad_norm": 5.897983074188232, "learning_rate": 6.074611540830995e-05, "loss": 0.6344, "step": 29424 }, { "epoch": 1.9936987600785963, "grad_norm": 5.429863452911377, "learning_rate": 6.0744746389212134e-05, "loss": 0.6702, "step": 29425 }, { "epoch": 1.9937665153465682, "grad_norm": 5.724225997924805, "learning_rate": 6.074337737011432e-05, "loss": 0.8, "step": 29426 }, { "epoch": 1.9938342706145402, "grad_norm": 7.967280864715576, "learning_rate": 6.07420083510165e-05, "loss": 0.8627, "step": 29427 }, { "epoch": 1.9939020258825124, "grad_norm": 4.264880657196045, "learning_rate": 6.074063933191868e-05, "loss": 0.5319, "step": 29428 }, { "epoch": 1.9939697811504844, "grad_norm": 5.2654523849487305, "learning_rate": 6.073927031282086e-05, "loss": 0.5738, "step": 29429 }, { "epoch": 1.9940375364184564, "grad_norm": 7.116591930389404, "learning_rate": 6.0737901293723056e-05, "loss": 0.9348, "step": 29430 }, { "epoch": 1.9941052916864286, "grad_norm": 5.7883172035217285, "learning_rate": 6.073653227462524e-05, "loss": 0.6519, "step": 29431 }, { "epoch": 1.9941730469544008, "grad_norm": 4.56779146194458, "learning_rate": 6.073516325552742e-05, "loss": 0.5987, "step": 29432 }, { "epoch": 1.9942408022223728, "grad_norm": 3.837368965148926, "learning_rate": 6.07337942364296e-05, "loss": 0.5853, "step": 29433 }, { "epoch": 1.9943085574903447, "grad_norm": 9.682390213012695, "learning_rate": 6.073242521733179e-05, "loss": 0.7175, "step": 29434 }, { "epoch": 1.994376312758317, "grad_norm": 4.5829243659973145, "learning_rate": 6.073105619823397e-05, "loss": 0.5814, "step": 29435 }, { "epoch": 1.9944440680262892, "grad_norm": 5.505808353424072, "learning_rate": 6.072968717913615e-05, "loss": 0.6741, "step": 29436 }, { "epoch": 1.9945118232942611, "grad_norm": 4.479388236999512, "learning_rate": 6.072831816003833e-05, "loss": 0.6255, "step": 29437 }, { "epoch": 1.9945795785622331, "grad_norm": 8.89773941040039, "learning_rate": 6.072694914094051e-05, "loss": 0.7003, "step": 29438 }, { "epoch": 1.9946473338302053, "grad_norm": 4.8071746826171875, "learning_rate": 6.072558012184271e-05, "loss": 0.4372, "step": 29439 }, { "epoch": 1.9947150890981775, "grad_norm": 5.538967609405518, "learning_rate": 6.072421110274489e-05, "loss": 0.7656, "step": 29440 }, { "epoch": 1.9947828443661495, "grad_norm": 6.0372490882873535, "learning_rate": 6.072284208364707e-05, "loss": 0.6996, "step": 29441 }, { "epoch": 1.9948505996341215, "grad_norm": 4.667264461517334, "learning_rate": 6.072147306454925e-05, "loss": 0.7133, "step": 29442 }, { "epoch": 1.9949183549020937, "grad_norm": 5.061943054199219, "learning_rate": 6.072010404545144e-05, "loss": 0.8279, "step": 29443 }, { "epoch": 1.9949861101700659, "grad_norm": 8.61410140991211, "learning_rate": 6.071873502635362e-05, "loss": 0.6026, "step": 29444 }, { "epoch": 1.9950538654380376, "grad_norm": 5.381556034088135, "learning_rate": 6.07173660072558e-05, "loss": 0.5613, "step": 29445 }, { "epoch": 1.9951216207060098, "grad_norm": 6.3068671226501465, "learning_rate": 6.071599698815799e-05, "loss": 0.6624, "step": 29446 }, { "epoch": 1.995189375973982, "grad_norm": 4.966466426849365, "learning_rate": 6.071462796906017e-05, "loss": 0.6657, "step": 29447 }, { "epoch": 1.995257131241954, "grad_norm": 6.995785236358643, "learning_rate": 6.071325894996236e-05, "loss": 0.6992, "step": 29448 }, { "epoch": 1.995324886509926, "grad_norm": 5.431858539581299, "learning_rate": 6.0711889930864544e-05, "loss": 0.8313, "step": 29449 }, { "epoch": 1.9953926417778982, "grad_norm": 4.248841285705566, "learning_rate": 6.0710520911766724e-05, "loss": 0.4806, "step": 29450 }, { "epoch": 1.9954603970458704, "grad_norm": 6.397432327270508, "learning_rate": 6.0709151892668905e-05, "loss": 0.6347, "step": 29451 }, { "epoch": 1.9955281523138424, "grad_norm": 7.363058090209961, "learning_rate": 6.0707782873571085e-05, "loss": 0.525, "step": 29452 }, { "epoch": 1.9955959075818144, "grad_norm": 4.596001625061035, "learning_rate": 6.070641385447328e-05, "loss": 0.6233, "step": 29453 }, { "epoch": 1.9956636628497866, "grad_norm": 4.657079219818115, "learning_rate": 6.070504483537546e-05, "loss": 0.6413, "step": 29454 }, { "epoch": 1.9957314181177588, "grad_norm": 10.19721508026123, "learning_rate": 6.070367581627764e-05, "loss": 0.793, "step": 29455 }, { "epoch": 1.9957991733857308, "grad_norm": 8.594694137573242, "learning_rate": 6.070230679717982e-05, "loss": 0.4314, "step": 29456 }, { "epoch": 1.9958669286537027, "grad_norm": 5.3768110275268555, "learning_rate": 6.0700937778082014e-05, "loss": 0.763, "step": 29457 }, { "epoch": 1.995934683921675, "grad_norm": 4.789430141448975, "learning_rate": 6.0699568758984194e-05, "loss": 0.639, "step": 29458 }, { "epoch": 1.9960024391896471, "grad_norm": 6.539261341094971, "learning_rate": 6.0698199739886374e-05, "loss": 0.655, "step": 29459 }, { "epoch": 1.9960701944576191, "grad_norm": 5.305748462677002, "learning_rate": 6.0696830720788555e-05, "loss": 0.6855, "step": 29460 }, { "epoch": 1.996137949725591, "grad_norm": 5.494818687438965, "learning_rate": 6.0695461701690735e-05, "loss": 0.7666, "step": 29461 }, { "epoch": 1.9962057049935633, "grad_norm": 9.304529190063477, "learning_rate": 6.069409268259293e-05, "loss": 0.7973, "step": 29462 }, { "epoch": 1.9962734602615353, "grad_norm": 10.955415725708008, "learning_rate": 6.069272366349511e-05, "loss": 0.6222, "step": 29463 }, { "epoch": 1.9963412155295073, "grad_norm": 6.213497638702393, "learning_rate": 6.069135464439729e-05, "loss": 0.6682, "step": 29464 }, { "epoch": 1.9964089707974795, "grad_norm": 8.0345458984375, "learning_rate": 6.068998562529947e-05, "loss": 0.645, "step": 29465 }, { "epoch": 1.9964767260654517, "grad_norm": 3.823809862136841, "learning_rate": 6.0688616606201664e-05, "loss": 0.4907, "step": 29466 }, { "epoch": 1.9965444813334237, "grad_norm": 6.956606388092041, "learning_rate": 6.0687247587103844e-05, "loss": 0.6844, "step": 29467 }, { "epoch": 1.9966122366013956, "grad_norm": 5.052365779876709, "learning_rate": 6.0685878568006025e-05, "loss": 0.6724, "step": 29468 }, { "epoch": 1.9966799918693678, "grad_norm": 7.330844402313232, "learning_rate": 6.0684509548908205e-05, "loss": 0.6755, "step": 29469 }, { "epoch": 1.99674774713734, "grad_norm": 4.676812648773193, "learning_rate": 6.068314052981039e-05, "loss": 0.6518, "step": 29470 }, { "epoch": 1.996815502405312, "grad_norm": 4.831790447235107, "learning_rate": 6.068177151071258e-05, "loss": 0.7714, "step": 29471 }, { "epoch": 1.996883257673284, "grad_norm": 6.003378868103027, "learning_rate": 6.068040249161476e-05, "loss": 0.5169, "step": 29472 }, { "epoch": 1.9969510129412562, "grad_norm": 6.830814838409424, "learning_rate": 6.067903347251695e-05, "loss": 0.8032, "step": 29473 }, { "epoch": 1.9970187682092284, "grad_norm": 7.508457183837891, "learning_rate": 6.067766445341913e-05, "loss": 0.9248, "step": 29474 }, { "epoch": 1.9970865234772004, "grad_norm": 5.409514427185059, "learning_rate": 6.067629543432131e-05, "loss": 0.6694, "step": 29475 }, { "epoch": 1.9971542787451724, "grad_norm": 5.681884288787842, "learning_rate": 6.06749264152235e-05, "loss": 0.7584, "step": 29476 }, { "epoch": 1.9972220340131446, "grad_norm": 4.127732276916504, "learning_rate": 6.067355739612568e-05, "loss": 0.5146, "step": 29477 }, { "epoch": 1.9972897892811166, "grad_norm": 8.106917381286621, "learning_rate": 6.067218837702786e-05, "loss": 0.7145, "step": 29478 }, { "epoch": 1.9973575445490885, "grad_norm": 5.82974910736084, "learning_rate": 6.067081935793004e-05, "loss": 0.6233, "step": 29479 }, { "epoch": 1.9974252998170607, "grad_norm": 5.071499347686768, "learning_rate": 6.0669450338832236e-05, "loss": 0.6973, "step": 29480 }, { "epoch": 1.997493055085033, "grad_norm": 5.108856678009033, "learning_rate": 6.0668081319734416e-05, "loss": 0.5789, "step": 29481 }, { "epoch": 1.997560810353005, "grad_norm": 6.39151668548584, "learning_rate": 6.06667123006366e-05, "loss": 0.7151, "step": 29482 }, { "epoch": 1.997628565620977, "grad_norm": 5.161790370941162, "learning_rate": 6.066534328153878e-05, "loss": 0.515, "step": 29483 }, { "epoch": 1.997696320888949, "grad_norm": 4.943962097167969, "learning_rate": 6.066397426244096e-05, "loss": 0.58, "step": 29484 }, { "epoch": 1.9977640761569213, "grad_norm": 6.901295185089111, "learning_rate": 6.066260524334315e-05, "loss": 0.6464, "step": 29485 }, { "epoch": 1.9978318314248933, "grad_norm": 5.377569198608398, "learning_rate": 6.066123622424533e-05, "loss": 0.6199, "step": 29486 }, { "epoch": 1.9978995866928653, "grad_norm": 6.235507011413574, "learning_rate": 6.065986720514751e-05, "loss": 0.4524, "step": 29487 }, { "epoch": 1.9979673419608375, "grad_norm": 5.418730735778809, "learning_rate": 6.065849818604969e-05, "loss": 0.5719, "step": 29488 }, { "epoch": 1.9980350972288097, "grad_norm": 5.875093460083008, "learning_rate": 6.0657129166951886e-05, "loss": 0.7331, "step": 29489 }, { "epoch": 1.9981028524967817, "grad_norm": 5.552278995513916, "learning_rate": 6.065576014785407e-05, "loss": 0.4725, "step": 29490 }, { "epoch": 1.9981706077647536, "grad_norm": 5.748478412628174, "learning_rate": 6.065439112875625e-05, "loss": 0.7747, "step": 29491 }, { "epoch": 1.9982383630327258, "grad_norm": 11.541815757751465, "learning_rate": 6.065302210965843e-05, "loss": 0.6361, "step": 29492 }, { "epoch": 1.998306118300698, "grad_norm": 12.455324172973633, "learning_rate": 6.0651653090560614e-05, "loss": 0.7121, "step": 29493 }, { "epoch": 1.9983738735686698, "grad_norm": 5.434767246246338, "learning_rate": 6.06502840714628e-05, "loss": 0.754, "step": 29494 }, { "epoch": 1.998441628836642, "grad_norm": 5.827384948730469, "learning_rate": 6.064891505236498e-05, "loss": 0.669, "step": 29495 }, { "epoch": 1.9985093841046142, "grad_norm": 7.219960689544678, "learning_rate": 6.064754603326717e-05, "loss": 0.7607, "step": 29496 }, { "epoch": 1.9985771393725862, "grad_norm": 5.809063911437988, "learning_rate": 6.064617701416935e-05, "loss": 0.7069, "step": 29497 }, { "epoch": 1.9986448946405582, "grad_norm": 5.252683162689209, "learning_rate": 6.064480799507153e-05, "loss": 0.5916, "step": 29498 }, { "epoch": 1.9987126499085304, "grad_norm": 6.503682613372803, "learning_rate": 6.0643438975973724e-05, "loss": 0.8998, "step": 29499 }, { "epoch": 1.9987804051765026, "grad_norm": 4.233732223510742, "learning_rate": 6.0642069956875904e-05, "loss": 0.4775, "step": 29500 }, { "epoch": 1.9988481604444746, "grad_norm": 5.518711566925049, "learning_rate": 6.0640700937778084e-05, "loss": 0.7294, "step": 29501 }, { "epoch": 1.9989159157124465, "grad_norm": 5.889267444610596, "learning_rate": 6.0639331918680265e-05, "loss": 1.0587, "step": 29502 }, { "epoch": 1.9989836709804187, "grad_norm": 5.110995769500732, "learning_rate": 6.063796289958246e-05, "loss": 0.6188, "step": 29503 }, { "epoch": 1.999051426248391, "grad_norm": 7.661651134490967, "learning_rate": 6.063659388048464e-05, "loss": 0.7318, "step": 29504 }, { "epoch": 1.999119181516363, "grad_norm": 6.2814040184021, "learning_rate": 6.063522486138682e-05, "loss": 0.596, "step": 29505 }, { "epoch": 1.999186936784335, "grad_norm": 9.107486724853516, "learning_rate": 6.0633855842289e-05, "loss": 0.4435, "step": 29506 }, { "epoch": 1.999254692052307, "grad_norm": 11.688543319702148, "learning_rate": 6.063248682319118e-05, "loss": 0.7047, "step": 29507 }, { "epoch": 1.9993224473202793, "grad_norm": 13.16971206665039, "learning_rate": 6.0631117804093374e-05, "loss": 0.7755, "step": 29508 }, { "epoch": 1.9993902025882513, "grad_norm": 5.9566826820373535, "learning_rate": 6.0629748784995554e-05, "loss": 0.8847, "step": 29509 }, { "epoch": 1.9994579578562233, "grad_norm": 5.143085479736328, "learning_rate": 6.0628379765897734e-05, "loss": 0.8833, "step": 29510 }, { "epoch": 1.9995257131241955, "grad_norm": 6.059861183166504, "learning_rate": 6.0627010746799915e-05, "loss": 0.7584, "step": 29511 }, { "epoch": 1.9995934683921675, "grad_norm": 6.2443695068359375, "learning_rate": 6.062564172770211e-05, "loss": 0.743, "step": 29512 }, { "epoch": 1.9995934683921675, "eval_loss": 0.6558533906936646, "eval_noise_accuracy": 0.0, "eval_runtime": 1493.0701, "eval_samples_per_second": 3.442, "eval_steps_per_second": 0.216, "eval_wer": 71.22533777276354, "step": 29512 } ], "logging_steps": 1, "max_steps": 73795, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 3689, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.16247913648128e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }