{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4998284969472457, "eval_steps": 3644, "global_step": 10932, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013720244220347122, "grad_norm": 5.03125, "learning_rate": 2.0000000000000002e-07, "loss": 0.7728, "step": 1 }, { "epoch": 0.00013720244220347122, "eval_loss": 2.1386725902557373, "eval_runtime": 117.3232, "eval_samples_per_second": 1.44, "eval_steps_per_second": 0.724, "step": 1 }, { "epoch": 0.00027440488440694244, "grad_norm": 6.5625, "learning_rate": 4.0000000000000003e-07, "loss": 1.0166, "step": 2 }, { "epoch": 0.00041160732661041366, "grad_norm": 5.5, "learning_rate": 6.000000000000001e-07, "loss": 0.9455, "step": 3 }, { "epoch": 0.0005488097688138849, "grad_norm": 6.21875, "learning_rate": 8.000000000000001e-07, "loss": 0.9975, "step": 4 }, { "epoch": 0.0006860122110173561, "grad_norm": 5.53125, "learning_rate": 1.0000000000000002e-06, "loss": 1.0104, "step": 5 }, { "epoch": 0.0008232146532208273, "grad_norm": 6.125, "learning_rate": 1.2000000000000002e-06, "loss": 0.9714, "step": 6 }, { "epoch": 0.0009604170954242985, "grad_norm": 5.78125, "learning_rate": 1.4000000000000001e-06, "loss": 1.1187, "step": 7 }, { "epoch": 0.0010976195376277698, "grad_norm": 4.9375, "learning_rate": 1.6000000000000001e-06, "loss": 0.999, "step": 8 }, { "epoch": 0.001234821979831241, "grad_norm": 5.21875, "learning_rate": 1.8000000000000001e-06, "loss": 1.0177, "step": 9 }, { "epoch": 0.0013720244220347122, "grad_norm": 4.4375, "learning_rate": 2.0000000000000003e-06, "loss": 0.8118, "step": 10 }, { "epoch": 0.0015092268642381834, "grad_norm": 4.4375, "learning_rate": 2.2e-06, "loss": 0.9933, "step": 11 }, { "epoch": 0.0016464293064416546, "grad_norm": 4.0, "learning_rate": 2.4000000000000003e-06, "loss": 0.9576, "step": 12 }, { "epoch": 0.0017836317486451259, "grad_norm": 3.703125, "learning_rate": 2.6e-06, "loss": 0.9752, "step": 13 }, { "epoch": 0.001920834190848597, "grad_norm": 3.109375, "learning_rate": 2.8000000000000003e-06, "loss": 0.8734, "step": 14 }, { "epoch": 0.0020580366330520683, "grad_norm": 3.21875, "learning_rate": 3e-06, "loss": 0.8848, "step": 15 }, { "epoch": 0.0021952390752555395, "grad_norm": 2.796875, "learning_rate": 3.2000000000000003e-06, "loss": 0.9299, "step": 16 }, { "epoch": 0.0023324415174590107, "grad_norm": 2.578125, "learning_rate": 3.4000000000000005e-06, "loss": 0.8871, "step": 17 }, { "epoch": 0.002469643959662482, "grad_norm": 2.28125, "learning_rate": 3.6000000000000003e-06, "loss": 0.6966, "step": 18 }, { "epoch": 0.002606846401865953, "grad_norm": 2.703125, "learning_rate": 3.8000000000000005e-06, "loss": 1.0016, "step": 19 }, { "epoch": 0.0027440488440694244, "grad_norm": 2.40625, "learning_rate": 4.000000000000001e-06, "loss": 0.8789, "step": 20 }, { "epoch": 0.0028812512862728956, "grad_norm": 2.453125, "learning_rate": 4.2000000000000004e-06, "loss": 0.8945, "step": 21 }, { "epoch": 0.003018453728476367, "grad_norm": 2.671875, "learning_rate": 4.4e-06, "loss": 0.9587, "step": 22 }, { "epoch": 0.003155656170679838, "grad_norm": 2.4375, "learning_rate": 4.600000000000001e-06, "loss": 0.8768, "step": 23 }, { "epoch": 0.0032928586128833093, "grad_norm": 2.453125, "learning_rate": 4.800000000000001e-06, "loss": 0.8977, "step": 24 }, { "epoch": 0.0034300610550867805, "grad_norm": 2.34375, "learning_rate": 5e-06, "loss": 0.8553, "step": 25 }, { "epoch": 0.0035672634972902517, "grad_norm": 2.03125, "learning_rate": 5.2e-06, "loss": 0.8546, "step": 26 }, { "epoch": 0.003704465939493723, "grad_norm": 2.375, "learning_rate": 5.400000000000001e-06, "loss": 0.9306, "step": 27 }, { "epoch": 0.003841668381697194, "grad_norm": 1.875, "learning_rate": 5.600000000000001e-06, "loss": 0.7251, "step": 28 }, { "epoch": 0.003978870823900665, "grad_norm": 1.984375, "learning_rate": 5.8e-06, "loss": 0.7678, "step": 29 }, { "epoch": 0.004116073266104137, "grad_norm": 1.96875, "learning_rate": 6e-06, "loss": 0.8668, "step": 30 }, { "epoch": 0.004253275708307608, "grad_norm": 1.9765625, "learning_rate": 6.200000000000001e-06, "loss": 0.9034, "step": 31 }, { "epoch": 0.004390478150511079, "grad_norm": 1.703125, "learning_rate": 6.4000000000000006e-06, "loss": 0.7841, "step": 32 }, { "epoch": 0.00452768059271455, "grad_norm": 1.90625, "learning_rate": 6.600000000000001e-06, "loss": 0.7927, "step": 33 }, { "epoch": 0.0046648830349180215, "grad_norm": 1.9140625, "learning_rate": 6.800000000000001e-06, "loss": 0.8473, "step": 34 }, { "epoch": 0.004802085477121493, "grad_norm": 1.65625, "learning_rate": 7e-06, "loss": 0.7598, "step": 35 }, { "epoch": 0.004939287919324964, "grad_norm": 1.640625, "learning_rate": 7.2000000000000005e-06, "loss": 0.7043, "step": 36 }, { "epoch": 0.005076490361528435, "grad_norm": 1.6796875, "learning_rate": 7.4e-06, "loss": 0.7576, "step": 37 }, { "epoch": 0.005213692803731906, "grad_norm": 1.5390625, "learning_rate": 7.600000000000001e-06, "loss": 0.7466, "step": 38 }, { "epoch": 0.005350895245935378, "grad_norm": 1.6171875, "learning_rate": 7.800000000000002e-06, "loss": 0.7588, "step": 39 }, { "epoch": 0.005488097688138849, "grad_norm": 1.484375, "learning_rate": 8.000000000000001e-06, "loss": 0.7456, "step": 40 }, { "epoch": 0.00562530013034232, "grad_norm": 1.609375, "learning_rate": 8.2e-06, "loss": 0.8141, "step": 41 }, { "epoch": 0.005762502572545791, "grad_norm": 1.6015625, "learning_rate": 8.400000000000001e-06, "loss": 0.8084, "step": 42 }, { "epoch": 0.0058997050147492625, "grad_norm": 1.546875, "learning_rate": 8.6e-06, "loss": 0.74, "step": 43 }, { "epoch": 0.006036907456952734, "grad_norm": 1.5703125, "learning_rate": 8.8e-06, "loss": 0.796, "step": 44 }, { "epoch": 0.006174109899156205, "grad_norm": 1.5234375, "learning_rate": 9e-06, "loss": 0.7346, "step": 45 }, { "epoch": 0.006311312341359676, "grad_norm": 1.53125, "learning_rate": 9.200000000000002e-06, "loss": 0.7728, "step": 46 }, { "epoch": 0.006448514783563147, "grad_norm": 1.5234375, "learning_rate": 9.4e-06, "loss": 0.7579, "step": 47 }, { "epoch": 0.0065857172257666186, "grad_norm": 1.390625, "learning_rate": 9.600000000000001e-06, "loss": 0.6439, "step": 48 }, { "epoch": 0.00672291966797009, "grad_norm": 1.609375, "learning_rate": 9.800000000000001e-06, "loss": 0.7373, "step": 49 }, { "epoch": 0.006860122110173561, "grad_norm": 1.515625, "learning_rate": 1e-05, "loss": 0.7634, "step": 50 }, { "epoch": 0.006997324552377032, "grad_norm": 1.6484375, "learning_rate": 1.02e-05, "loss": 0.8177, "step": 51 }, { "epoch": 0.0071345269945805034, "grad_norm": 1.2421875, "learning_rate": 1.04e-05, "loss": 0.5714, "step": 52 }, { "epoch": 0.007271729436783975, "grad_norm": 1.625, "learning_rate": 1.0600000000000002e-05, "loss": 0.8138, "step": 53 }, { "epoch": 0.007408931878987446, "grad_norm": 1.5703125, "learning_rate": 1.0800000000000002e-05, "loss": 0.7912, "step": 54 }, { "epoch": 0.007546134321190917, "grad_norm": 1.4765625, "learning_rate": 1.1000000000000001e-05, "loss": 0.6853, "step": 55 }, { "epoch": 0.007683336763394388, "grad_norm": 1.40625, "learning_rate": 1.1200000000000001e-05, "loss": 0.6824, "step": 56 }, { "epoch": 0.00782053920559786, "grad_norm": 1.578125, "learning_rate": 1.14e-05, "loss": 0.5946, "step": 57 }, { "epoch": 0.00795774164780133, "grad_norm": 1.4453125, "learning_rate": 1.16e-05, "loss": 0.7336, "step": 58 }, { "epoch": 0.008094944090004802, "grad_norm": 1.40625, "learning_rate": 1.18e-05, "loss": 0.6735, "step": 59 }, { "epoch": 0.008232146532208273, "grad_norm": 1.5625, "learning_rate": 1.2e-05, "loss": 0.8619, "step": 60 }, { "epoch": 0.008369348974411744, "grad_norm": 1.4921875, "learning_rate": 1.22e-05, "loss": 0.7084, "step": 61 }, { "epoch": 0.008506551416615216, "grad_norm": 1.578125, "learning_rate": 1.2400000000000002e-05, "loss": 0.7643, "step": 62 }, { "epoch": 0.008643753858818687, "grad_norm": 1.546875, "learning_rate": 1.2600000000000001e-05, "loss": 0.7693, "step": 63 }, { "epoch": 0.008780956301022158, "grad_norm": 1.6328125, "learning_rate": 1.2800000000000001e-05, "loss": 0.6744, "step": 64 }, { "epoch": 0.00891815874322563, "grad_norm": 1.53125, "learning_rate": 1.3000000000000001e-05, "loss": 0.6964, "step": 65 }, { "epoch": 0.0090553611854291, "grad_norm": 1.5078125, "learning_rate": 1.3200000000000002e-05, "loss": 0.6962, "step": 66 }, { "epoch": 0.009192563627632572, "grad_norm": 1.46875, "learning_rate": 1.3400000000000002e-05, "loss": 0.7291, "step": 67 }, { "epoch": 0.009329766069836043, "grad_norm": 1.4375, "learning_rate": 1.3600000000000002e-05, "loss": 0.6229, "step": 68 }, { "epoch": 0.009466968512039514, "grad_norm": 1.4296875, "learning_rate": 1.38e-05, "loss": 0.7007, "step": 69 }, { "epoch": 0.009604170954242985, "grad_norm": 1.3671875, "learning_rate": 1.4e-05, "loss": 0.6732, "step": 70 }, { "epoch": 0.009741373396446457, "grad_norm": 1.546875, "learning_rate": 1.4200000000000001e-05, "loss": 0.6837, "step": 71 }, { "epoch": 0.009878575838649928, "grad_norm": 1.4921875, "learning_rate": 1.4400000000000001e-05, "loss": 0.6904, "step": 72 }, { "epoch": 0.010015778280853399, "grad_norm": 1.625, "learning_rate": 1.46e-05, "loss": 0.7529, "step": 73 }, { "epoch": 0.01015298072305687, "grad_norm": 1.3828125, "learning_rate": 1.48e-05, "loss": 0.6368, "step": 74 }, { "epoch": 0.010290183165260341, "grad_norm": 1.40625, "learning_rate": 1.5000000000000002e-05, "loss": 0.6416, "step": 75 }, { "epoch": 0.010427385607463813, "grad_norm": 1.5390625, "learning_rate": 1.5200000000000002e-05, "loss": 0.7155, "step": 76 }, { "epoch": 0.010564588049667284, "grad_norm": 1.5546875, "learning_rate": 1.54e-05, "loss": 0.71, "step": 77 }, { "epoch": 0.010701790491870755, "grad_norm": 1.5, "learning_rate": 1.5600000000000003e-05, "loss": 0.759, "step": 78 }, { "epoch": 0.010838992934074226, "grad_norm": 1.5703125, "learning_rate": 1.58e-05, "loss": 0.7485, "step": 79 }, { "epoch": 0.010976195376277698, "grad_norm": 1.375, "learning_rate": 1.6000000000000003e-05, "loss": 0.6418, "step": 80 }, { "epoch": 0.011113397818481169, "grad_norm": 1.5234375, "learning_rate": 1.62e-05, "loss": 0.6497, "step": 81 }, { "epoch": 0.01125060026068464, "grad_norm": 1.4921875, "learning_rate": 1.64e-05, "loss": 0.6416, "step": 82 }, { "epoch": 0.011387802702888111, "grad_norm": 1.609375, "learning_rate": 1.66e-05, "loss": 0.7289, "step": 83 }, { "epoch": 0.011525005145091582, "grad_norm": 1.6328125, "learning_rate": 1.6800000000000002e-05, "loss": 0.7212, "step": 84 }, { "epoch": 0.011662207587295054, "grad_norm": 1.609375, "learning_rate": 1.7e-05, "loss": 0.7564, "step": 85 }, { "epoch": 0.011799410029498525, "grad_norm": 1.6640625, "learning_rate": 1.72e-05, "loss": 0.78, "step": 86 }, { "epoch": 0.011936612471701996, "grad_norm": 1.3828125, "learning_rate": 1.7400000000000003e-05, "loss": 0.6194, "step": 87 }, { "epoch": 0.012073814913905467, "grad_norm": 1.4140625, "learning_rate": 1.76e-05, "loss": 0.6584, "step": 88 }, { "epoch": 0.012211017356108939, "grad_norm": 1.609375, "learning_rate": 1.7800000000000002e-05, "loss": 0.6753, "step": 89 }, { "epoch": 0.01234821979831241, "grad_norm": 1.5703125, "learning_rate": 1.8e-05, "loss": 0.7334, "step": 90 }, { "epoch": 0.012485422240515881, "grad_norm": 1.703125, "learning_rate": 1.8200000000000002e-05, "loss": 0.7377, "step": 91 }, { "epoch": 0.012622624682719352, "grad_norm": 1.703125, "learning_rate": 1.8400000000000003e-05, "loss": 0.6934, "step": 92 }, { "epoch": 0.012759827124922823, "grad_norm": 1.5234375, "learning_rate": 1.86e-05, "loss": 0.6372, "step": 93 }, { "epoch": 0.012897029567126295, "grad_norm": 1.671875, "learning_rate": 1.88e-05, "loss": 0.7623, "step": 94 }, { "epoch": 0.013034232009329766, "grad_norm": 1.59375, "learning_rate": 1.9e-05, "loss": 0.7187, "step": 95 }, { "epoch": 0.013171434451533237, "grad_norm": 1.5859375, "learning_rate": 1.9200000000000003e-05, "loss": 0.7063, "step": 96 }, { "epoch": 0.013308636893736708, "grad_norm": 1.59375, "learning_rate": 1.94e-05, "loss": 0.7087, "step": 97 }, { "epoch": 0.01344583933594018, "grad_norm": 1.3125, "learning_rate": 1.9600000000000002e-05, "loss": 0.5403, "step": 98 }, { "epoch": 0.01358304177814365, "grad_norm": 1.4140625, "learning_rate": 1.98e-05, "loss": 0.5345, "step": 99 }, { "epoch": 0.013720244220347122, "grad_norm": 1.640625, "learning_rate": 2e-05, "loss": 0.6933, "step": 100 }, { "epoch": 0.013857446662550593, "grad_norm": 1.5078125, "learning_rate": 1.9999999895818093e-05, "loss": 0.6652, "step": 101 }, { "epoch": 0.013994649104754064, "grad_norm": 1.4765625, "learning_rate": 1.9999999583272365e-05, "loss": 0.7272, "step": 102 }, { "epoch": 0.014131851546957536, "grad_norm": 1.6015625, "learning_rate": 1.9999999062362823e-05, "loss": 0.683, "step": 103 }, { "epoch": 0.014269053989161007, "grad_norm": 1.546875, "learning_rate": 1.999999833308949e-05, "loss": 0.6063, "step": 104 }, { "epoch": 0.014406256431364478, "grad_norm": 1.6171875, "learning_rate": 1.9999997395452365e-05, "loss": 0.7214, "step": 105 }, { "epoch": 0.01454345887356795, "grad_norm": 1.609375, "learning_rate": 1.9999996249451478e-05, "loss": 0.6006, "step": 106 }, { "epoch": 0.01468066131577142, "grad_norm": 1.609375, "learning_rate": 1.9999994895086846e-05, "loss": 0.7652, "step": 107 }, { "epoch": 0.014817863757974892, "grad_norm": 1.53125, "learning_rate": 1.9999993332358502e-05, "loss": 0.655, "step": 108 }, { "epoch": 0.014955066200178363, "grad_norm": 1.4296875, "learning_rate": 1.9999991561266478e-05, "loss": 0.6011, "step": 109 }, { "epoch": 0.015092268642381834, "grad_norm": 1.453125, "learning_rate": 1.999998958181081e-05, "loss": 0.5914, "step": 110 }, { "epoch": 0.015229471084585305, "grad_norm": 1.6328125, "learning_rate": 1.9999987393991537e-05, "loss": 0.5925, "step": 111 }, { "epoch": 0.015366673526788777, "grad_norm": 1.390625, "learning_rate": 1.9999984997808713e-05, "loss": 0.6211, "step": 112 }, { "epoch": 0.015503875968992248, "grad_norm": 1.5625, "learning_rate": 1.9999982393262377e-05, "loss": 0.6942, "step": 113 }, { "epoch": 0.01564107841119572, "grad_norm": 1.6953125, "learning_rate": 1.999997958035259e-05, "loss": 0.73, "step": 114 }, { "epoch": 0.01577828085339919, "grad_norm": 1.4375, "learning_rate": 1.9999976559079407e-05, "loss": 0.6224, "step": 115 }, { "epoch": 0.01591548329560266, "grad_norm": 1.7109375, "learning_rate": 1.9999973329442895e-05, "loss": 0.6727, "step": 116 }, { "epoch": 0.016052685737806133, "grad_norm": 1.609375, "learning_rate": 1.999996989144312e-05, "loss": 0.7086, "step": 117 }, { "epoch": 0.016189888180009604, "grad_norm": 1.4453125, "learning_rate": 1.999996624508015e-05, "loss": 0.6272, "step": 118 }, { "epoch": 0.016327090622213075, "grad_norm": 1.4765625, "learning_rate": 1.9999962390354064e-05, "loss": 0.6951, "step": 119 }, { "epoch": 0.016464293064416546, "grad_norm": 1.6953125, "learning_rate": 1.9999958327264942e-05, "loss": 0.6178, "step": 120 }, { "epoch": 0.016601495506620018, "grad_norm": 1.625, "learning_rate": 1.999995405581287e-05, "loss": 0.6712, "step": 121 }, { "epoch": 0.01673869794882349, "grad_norm": 1.4921875, "learning_rate": 1.9999949575997934e-05, "loss": 0.6199, "step": 122 }, { "epoch": 0.01687590039102696, "grad_norm": 1.546875, "learning_rate": 1.999994488782023e-05, "loss": 0.7071, "step": 123 }, { "epoch": 0.01701310283323043, "grad_norm": 1.5234375, "learning_rate": 1.9999939991279854e-05, "loss": 0.6345, "step": 124 }, { "epoch": 0.017150305275433902, "grad_norm": 1.53125, "learning_rate": 1.999993488637691e-05, "loss": 0.6237, "step": 125 }, { "epoch": 0.017287507717637374, "grad_norm": 1.5703125, "learning_rate": 1.9999929573111502e-05, "loss": 0.7013, "step": 126 }, { "epoch": 0.017424710159840845, "grad_norm": 1.6015625, "learning_rate": 1.9999924051483744e-05, "loss": 0.6693, "step": 127 }, { "epoch": 0.017561912602044316, "grad_norm": 1.4609375, "learning_rate": 1.9999918321493744e-05, "loss": 0.6545, "step": 128 }, { "epoch": 0.017699115044247787, "grad_norm": 1.328125, "learning_rate": 1.9999912383141633e-05, "loss": 0.5797, "step": 129 }, { "epoch": 0.01783631748645126, "grad_norm": 1.375, "learning_rate": 1.9999906236427524e-05, "loss": 0.6287, "step": 130 }, { "epoch": 0.01797351992865473, "grad_norm": 1.5546875, "learning_rate": 1.9999899881351546e-05, "loss": 0.6837, "step": 131 }, { "epoch": 0.0181107223708582, "grad_norm": 1.5546875, "learning_rate": 1.9999893317913843e-05, "loss": 0.6451, "step": 132 }, { "epoch": 0.018247924813061672, "grad_norm": 1.5703125, "learning_rate": 1.999988654611453e-05, "loss": 0.6439, "step": 133 }, { "epoch": 0.018385127255265143, "grad_norm": 1.3984375, "learning_rate": 1.9999879565953772e-05, "loss": 0.5381, "step": 134 }, { "epoch": 0.018522329697468615, "grad_norm": 1.609375, "learning_rate": 1.9999872377431703e-05, "loss": 0.7208, "step": 135 }, { "epoch": 0.018659532139672086, "grad_norm": 1.640625, "learning_rate": 1.999986498054847e-05, "loss": 0.766, "step": 136 }, { "epoch": 0.018796734581875557, "grad_norm": 1.5703125, "learning_rate": 1.9999857375304233e-05, "loss": 0.7966, "step": 137 }, { "epoch": 0.01893393702407903, "grad_norm": 1.46875, "learning_rate": 1.999984956169915e-05, "loss": 0.638, "step": 138 }, { "epoch": 0.0190711394662825, "grad_norm": 1.625, "learning_rate": 1.999984153973338e-05, "loss": 0.7494, "step": 139 }, { "epoch": 0.01920834190848597, "grad_norm": 1.359375, "learning_rate": 1.999983330940709e-05, "loss": 0.6228, "step": 140 }, { "epoch": 0.019345544350689442, "grad_norm": 1.4921875, "learning_rate": 1.999982487072045e-05, "loss": 0.6124, "step": 141 }, { "epoch": 0.019482746792892913, "grad_norm": 1.4921875, "learning_rate": 1.999981622367365e-05, "loss": 0.643, "step": 142 }, { "epoch": 0.019619949235096384, "grad_norm": 1.5, "learning_rate": 1.999980736826685e-05, "loss": 0.7544, "step": 143 }, { "epoch": 0.019757151677299856, "grad_norm": 1.453125, "learning_rate": 1.999979830450025e-05, "loss": 0.6164, "step": 144 }, { "epoch": 0.019894354119503327, "grad_norm": 1.515625, "learning_rate": 1.999978903237403e-05, "loss": 0.6695, "step": 145 }, { "epoch": 0.020031556561706798, "grad_norm": 1.3359375, "learning_rate": 1.9999779551888386e-05, "loss": 0.5726, "step": 146 }, { "epoch": 0.02016875900391027, "grad_norm": 1.453125, "learning_rate": 1.999976986304352e-05, "loss": 0.6, "step": 147 }, { "epoch": 0.02030596144611374, "grad_norm": 1.546875, "learning_rate": 1.9999759965839627e-05, "loss": 0.7008, "step": 148 }, { "epoch": 0.020443163888317212, "grad_norm": 1.375, "learning_rate": 1.9999749860276912e-05, "loss": 0.5492, "step": 149 }, { "epoch": 0.020580366330520683, "grad_norm": 1.5234375, "learning_rate": 1.9999739546355595e-05, "loss": 0.7223, "step": 150 }, { "epoch": 0.020717568772724154, "grad_norm": 1.484375, "learning_rate": 1.9999729024075882e-05, "loss": 0.6619, "step": 151 }, { "epoch": 0.020854771214927625, "grad_norm": 1.6171875, "learning_rate": 1.9999718293437997e-05, "loss": 0.6918, "step": 152 }, { "epoch": 0.020991973657131097, "grad_norm": 1.4453125, "learning_rate": 1.999970735444216e-05, "loss": 0.6515, "step": 153 }, { "epoch": 0.021129176099334568, "grad_norm": 1.5390625, "learning_rate": 1.9999696207088605e-05, "loss": 0.7082, "step": 154 }, { "epoch": 0.02126637854153804, "grad_norm": 1.640625, "learning_rate": 1.9999684851377556e-05, "loss": 0.7337, "step": 155 }, { "epoch": 0.02140358098374151, "grad_norm": 1.5703125, "learning_rate": 1.9999673287309256e-05, "loss": 0.6667, "step": 156 }, { "epoch": 0.02154078342594498, "grad_norm": 1.609375, "learning_rate": 1.9999661514883943e-05, "loss": 0.6972, "step": 157 }, { "epoch": 0.021677985868148453, "grad_norm": 1.6015625, "learning_rate": 1.9999649534101864e-05, "loss": 0.7383, "step": 158 }, { "epoch": 0.021815188310351924, "grad_norm": 1.5078125, "learning_rate": 1.9999637344963267e-05, "loss": 0.6236, "step": 159 }, { "epoch": 0.021952390752555395, "grad_norm": 1.4609375, "learning_rate": 1.9999624947468405e-05, "loss": 0.6091, "step": 160 }, { "epoch": 0.022089593194758866, "grad_norm": 1.40625, "learning_rate": 1.9999612341617543e-05, "loss": 0.6303, "step": 161 }, { "epoch": 0.022226795636962338, "grad_norm": 1.375, "learning_rate": 1.9999599527410934e-05, "loss": 0.6126, "step": 162 }, { "epoch": 0.02236399807916581, "grad_norm": 1.359375, "learning_rate": 1.9999586504848854e-05, "loss": 0.6275, "step": 163 }, { "epoch": 0.02250120052136928, "grad_norm": 1.3671875, "learning_rate": 1.9999573273931567e-05, "loss": 0.5938, "step": 164 }, { "epoch": 0.02263840296357275, "grad_norm": 1.5625, "learning_rate": 1.9999559834659353e-05, "loss": 0.6412, "step": 165 }, { "epoch": 0.022775605405776223, "grad_norm": 1.3515625, "learning_rate": 1.999954618703249e-05, "loss": 0.5641, "step": 166 }, { "epoch": 0.022912807847979694, "grad_norm": 1.484375, "learning_rate": 1.9999532331051262e-05, "loss": 0.6354, "step": 167 }, { "epoch": 0.023050010290183165, "grad_norm": 1.484375, "learning_rate": 1.9999518266715964e-05, "loss": 0.6753, "step": 168 }, { "epoch": 0.023187212732386636, "grad_norm": 1.390625, "learning_rate": 1.9999503994026878e-05, "loss": 0.5715, "step": 169 }, { "epoch": 0.023324415174590107, "grad_norm": 1.4765625, "learning_rate": 1.9999489512984312e-05, "loss": 0.6365, "step": 170 }, { "epoch": 0.02346161761679358, "grad_norm": 1.5546875, "learning_rate": 1.999947482358856e-05, "loss": 0.7194, "step": 171 }, { "epoch": 0.02359882005899705, "grad_norm": 1.4765625, "learning_rate": 1.9999459925839932e-05, "loss": 0.6717, "step": 172 }, { "epoch": 0.02373602250120052, "grad_norm": 1.4609375, "learning_rate": 1.9999444819738734e-05, "loss": 0.684, "step": 173 }, { "epoch": 0.023873224943403992, "grad_norm": 1.5234375, "learning_rate": 1.999942950528529e-05, "loss": 0.6517, "step": 174 }, { "epoch": 0.024010427385607463, "grad_norm": 1.4296875, "learning_rate": 1.999941398247991e-05, "loss": 0.6234, "step": 175 }, { "epoch": 0.024147629827810935, "grad_norm": 1.5703125, "learning_rate": 1.9999398251322924e-05, "loss": 0.6853, "step": 176 }, { "epoch": 0.024284832270014406, "grad_norm": 1.4375, "learning_rate": 1.9999382311814655e-05, "loss": 0.653, "step": 177 }, { "epoch": 0.024422034712217877, "grad_norm": 1.5, "learning_rate": 1.9999366163955435e-05, "loss": 0.6577, "step": 178 }, { "epoch": 0.02455923715442135, "grad_norm": 1.4921875, "learning_rate": 1.9999349807745605e-05, "loss": 0.6371, "step": 179 }, { "epoch": 0.02469643959662482, "grad_norm": 1.4765625, "learning_rate": 1.99993332431855e-05, "loss": 0.6402, "step": 180 }, { "epoch": 0.02483364203882829, "grad_norm": 1.46875, "learning_rate": 1.9999316470275468e-05, "loss": 0.6129, "step": 181 }, { "epoch": 0.024970844481031762, "grad_norm": 1.4453125, "learning_rate": 1.9999299489015862e-05, "loss": 0.5876, "step": 182 }, { "epoch": 0.025108046923235233, "grad_norm": 1.5546875, "learning_rate": 1.9999282299407032e-05, "loss": 0.687, "step": 183 }, { "epoch": 0.025245249365438704, "grad_norm": 1.5, "learning_rate": 1.9999264901449337e-05, "loss": 0.6588, "step": 184 }, { "epoch": 0.025382451807642176, "grad_norm": 1.390625, "learning_rate": 1.9999247295143137e-05, "loss": 0.6348, "step": 185 }, { "epoch": 0.025519654249845647, "grad_norm": 1.375, "learning_rate": 1.99992294804888e-05, "loss": 0.6167, "step": 186 }, { "epoch": 0.025656856692049118, "grad_norm": 1.4296875, "learning_rate": 1.99992114574867e-05, "loss": 0.6395, "step": 187 }, { "epoch": 0.02579405913425259, "grad_norm": 1.4609375, "learning_rate": 1.999919322613721e-05, "loss": 0.6319, "step": 188 }, { "epoch": 0.02593126157645606, "grad_norm": 1.5078125, "learning_rate": 1.9999174786440712e-05, "loss": 0.6711, "step": 189 }, { "epoch": 0.026068464018659532, "grad_norm": 1.421875, "learning_rate": 1.9999156138397587e-05, "loss": 0.5974, "step": 190 }, { "epoch": 0.026205666460863003, "grad_norm": 1.4609375, "learning_rate": 1.9999137282008226e-05, "loss": 0.658, "step": 191 }, { "epoch": 0.026342868903066474, "grad_norm": 1.4140625, "learning_rate": 1.999911821727302e-05, "loss": 0.6267, "step": 192 }, { "epoch": 0.026480071345269945, "grad_norm": 1.578125, "learning_rate": 1.999909894419237e-05, "loss": 0.6751, "step": 193 }, { "epoch": 0.026617273787473417, "grad_norm": 1.359375, "learning_rate": 1.9999079462766675e-05, "loss": 0.6053, "step": 194 }, { "epoch": 0.026754476229676888, "grad_norm": 1.375, "learning_rate": 1.999905977299634e-05, "loss": 0.6513, "step": 195 }, { "epoch": 0.02689167867188036, "grad_norm": 1.5390625, "learning_rate": 1.9999039874881777e-05, "loss": 0.7633, "step": 196 }, { "epoch": 0.02702888111408383, "grad_norm": 1.25, "learning_rate": 1.99990197684234e-05, "loss": 0.4977, "step": 197 }, { "epoch": 0.0271660835562873, "grad_norm": 1.5546875, "learning_rate": 1.9998999453621627e-05, "loss": 0.677, "step": 198 }, { "epoch": 0.027303285998490773, "grad_norm": 1.40625, "learning_rate": 1.9998978930476883e-05, "loss": 0.68, "step": 199 }, { "epoch": 0.027440488440694244, "grad_norm": 1.40625, "learning_rate": 1.9998958198989592e-05, "loss": 0.625, "step": 200 }, { "epoch": 0.027577690882897715, "grad_norm": 1.4140625, "learning_rate": 1.9998937259160193e-05, "loss": 0.6376, "step": 201 }, { "epoch": 0.027714893325101186, "grad_norm": 1.40625, "learning_rate": 1.9998916110989112e-05, "loss": 0.6593, "step": 202 }, { "epoch": 0.027852095767304658, "grad_norm": 1.3671875, "learning_rate": 1.99988947544768e-05, "loss": 0.5515, "step": 203 }, { "epoch": 0.02798929820950813, "grad_norm": 1.3671875, "learning_rate": 1.99988731896237e-05, "loss": 0.5686, "step": 204 }, { "epoch": 0.0281265006517116, "grad_norm": 1.5078125, "learning_rate": 1.9998851416430255e-05, "loss": 0.6642, "step": 205 }, { "epoch": 0.02826370309391507, "grad_norm": 1.40625, "learning_rate": 1.9998829434896922e-05, "loss": 0.6171, "step": 206 }, { "epoch": 0.028400905536118543, "grad_norm": 1.421875, "learning_rate": 1.999880724502416e-05, "loss": 0.5874, "step": 207 }, { "epoch": 0.028538107978322014, "grad_norm": 1.4375, "learning_rate": 1.9998784846812436e-05, "loss": 0.6492, "step": 208 }, { "epoch": 0.028675310420525485, "grad_norm": 1.34375, "learning_rate": 1.9998762240262205e-05, "loss": 0.5767, "step": 209 }, { "epoch": 0.028812512862728956, "grad_norm": 1.515625, "learning_rate": 1.999873942537395e-05, "loss": 0.6614, "step": 210 }, { "epoch": 0.028949715304932427, "grad_norm": 1.484375, "learning_rate": 1.999871640214814e-05, "loss": 0.6335, "step": 211 }, { "epoch": 0.0290869177471359, "grad_norm": 1.4453125, "learning_rate": 1.9998693170585252e-05, "loss": 0.6463, "step": 212 }, { "epoch": 0.02922412018933937, "grad_norm": 1.375, "learning_rate": 1.999866973068578e-05, "loss": 0.6295, "step": 213 }, { "epoch": 0.02936132263154284, "grad_norm": 1.421875, "learning_rate": 1.9998646082450202e-05, "loss": 0.6556, "step": 214 }, { "epoch": 0.029498525073746312, "grad_norm": 1.4609375, "learning_rate": 1.9998622225879017e-05, "loss": 0.6434, "step": 215 }, { "epoch": 0.029635727515949783, "grad_norm": 1.4921875, "learning_rate": 1.999859816097272e-05, "loss": 0.6321, "step": 216 }, { "epoch": 0.029772929958153255, "grad_norm": 1.375, "learning_rate": 1.999857388773181e-05, "loss": 0.6363, "step": 217 }, { "epoch": 0.029910132400356726, "grad_norm": 1.4296875, "learning_rate": 1.9998549406156797e-05, "loss": 0.6136, "step": 218 }, { "epoch": 0.030047334842560197, "grad_norm": 1.3359375, "learning_rate": 1.999852471624819e-05, "loss": 0.6055, "step": 219 }, { "epoch": 0.03018453728476367, "grad_norm": 1.4140625, "learning_rate": 1.9998499818006506e-05, "loss": 0.606, "step": 220 }, { "epoch": 0.03032173972696714, "grad_norm": 1.5078125, "learning_rate": 1.9998474711432256e-05, "loss": 0.6466, "step": 221 }, { "epoch": 0.03045894216917061, "grad_norm": 1.5234375, "learning_rate": 1.999844939652597e-05, "loss": 0.7543, "step": 222 }, { "epoch": 0.030596144611374082, "grad_norm": 1.28125, "learning_rate": 1.9998423873288175e-05, "loss": 0.5769, "step": 223 }, { "epoch": 0.030733347053577553, "grad_norm": 1.3828125, "learning_rate": 1.9998398141719396e-05, "loss": 0.5887, "step": 224 }, { "epoch": 0.030870549495781024, "grad_norm": 1.4296875, "learning_rate": 1.9998372201820177e-05, "loss": 0.6869, "step": 225 }, { "epoch": 0.031007751937984496, "grad_norm": 1.40625, "learning_rate": 1.9998346053591057e-05, "loss": 0.6639, "step": 226 }, { "epoch": 0.031144954380187967, "grad_norm": 1.4375, "learning_rate": 1.9998319697032577e-05, "loss": 0.7014, "step": 227 }, { "epoch": 0.03128215682239144, "grad_norm": 1.4609375, "learning_rate": 1.9998293132145294e-05, "loss": 0.6402, "step": 228 }, { "epoch": 0.03141935926459491, "grad_norm": 1.3828125, "learning_rate": 1.999826635892975e-05, "loss": 0.617, "step": 229 }, { "epoch": 0.03155656170679838, "grad_norm": 1.3984375, "learning_rate": 1.999823937738651e-05, "loss": 0.6827, "step": 230 }, { "epoch": 0.03169376414900185, "grad_norm": 1.46875, "learning_rate": 1.999821218751614e-05, "loss": 0.6063, "step": 231 }, { "epoch": 0.03183096659120532, "grad_norm": 1.328125, "learning_rate": 1.99981847893192e-05, "loss": 0.557, "step": 232 }, { "epoch": 0.031968169033408794, "grad_norm": 1.5390625, "learning_rate": 1.999815718279626e-05, "loss": 0.7154, "step": 233 }, { "epoch": 0.032105371475612265, "grad_norm": 1.40625, "learning_rate": 1.99981293679479e-05, "loss": 0.6099, "step": 234 }, { "epoch": 0.03224257391781574, "grad_norm": 1.5234375, "learning_rate": 1.99981013447747e-05, "loss": 0.7428, "step": 235 }, { "epoch": 0.03237977636001921, "grad_norm": 1.3203125, "learning_rate": 1.9998073113277237e-05, "loss": 0.5762, "step": 236 }, { "epoch": 0.03251697880222268, "grad_norm": 1.5078125, "learning_rate": 1.9998044673456104e-05, "loss": 0.6503, "step": 237 }, { "epoch": 0.03265418124442615, "grad_norm": 1.53125, "learning_rate": 1.9998016025311897e-05, "loss": 0.6595, "step": 238 }, { "epoch": 0.03279138368662962, "grad_norm": 1.6875, "learning_rate": 1.9997987168845205e-05, "loss": 0.6118, "step": 239 }, { "epoch": 0.03292858612883309, "grad_norm": 1.28125, "learning_rate": 1.9997958104056635e-05, "loss": 0.5685, "step": 240 }, { "epoch": 0.033065788571036564, "grad_norm": 1.40625, "learning_rate": 1.9997928830946792e-05, "loss": 0.5966, "step": 241 }, { "epoch": 0.033202991013240035, "grad_norm": 1.5234375, "learning_rate": 1.9997899349516285e-05, "loss": 0.6441, "step": 242 }, { "epoch": 0.033340193455443506, "grad_norm": 1.5390625, "learning_rate": 1.999786965976573e-05, "loss": 0.7842, "step": 243 }, { "epoch": 0.03347739589764698, "grad_norm": 1.3515625, "learning_rate": 1.999783976169574e-05, "loss": 0.5971, "step": 244 }, { "epoch": 0.03361459833985045, "grad_norm": 1.4609375, "learning_rate": 1.9997809655306943e-05, "loss": 0.618, "step": 245 }, { "epoch": 0.03375180078205392, "grad_norm": 1.4453125, "learning_rate": 1.9997779340599964e-05, "loss": 0.5651, "step": 246 }, { "epoch": 0.03388900322425739, "grad_norm": 1.484375, "learning_rate": 1.999774881757544e-05, "loss": 0.6896, "step": 247 }, { "epoch": 0.03402620566646086, "grad_norm": 1.4453125, "learning_rate": 1.9997718086234e-05, "loss": 0.6399, "step": 248 }, { "epoch": 0.034163408108664334, "grad_norm": 1.4765625, "learning_rate": 1.9997687146576288e-05, "loss": 0.6788, "step": 249 }, { "epoch": 0.034300610550867805, "grad_norm": 1.40625, "learning_rate": 1.999765599860295e-05, "loss": 0.6152, "step": 250 }, { "epoch": 0.034437812993071276, "grad_norm": 1.5703125, "learning_rate": 1.9997624642314627e-05, "loss": 0.7219, "step": 251 }, { "epoch": 0.03457501543527475, "grad_norm": 1.484375, "learning_rate": 1.9997593077711984e-05, "loss": 0.6389, "step": 252 }, { "epoch": 0.03471221787747822, "grad_norm": 1.3515625, "learning_rate": 1.9997561304795668e-05, "loss": 0.6269, "step": 253 }, { "epoch": 0.03484942031968169, "grad_norm": 1.2578125, "learning_rate": 1.999752932356635e-05, "loss": 0.5951, "step": 254 }, { "epoch": 0.03498662276188516, "grad_norm": 1.453125, "learning_rate": 1.9997497134024693e-05, "loss": 0.6744, "step": 255 }, { "epoch": 0.03512382520408863, "grad_norm": 1.46875, "learning_rate": 1.9997464736171365e-05, "loss": 0.6259, "step": 256 }, { "epoch": 0.035261027646292104, "grad_norm": 1.296875, "learning_rate": 1.9997432130007042e-05, "loss": 0.6308, "step": 257 }, { "epoch": 0.035398230088495575, "grad_norm": 1.4375, "learning_rate": 1.9997399315532404e-05, "loss": 0.5295, "step": 258 }, { "epoch": 0.035535432530699046, "grad_norm": 1.4453125, "learning_rate": 1.999736629274814e-05, "loss": 0.6629, "step": 259 }, { "epoch": 0.03567263497290252, "grad_norm": 1.4609375, "learning_rate": 1.9997333061654928e-05, "loss": 0.6426, "step": 260 }, { "epoch": 0.03580983741510599, "grad_norm": 1.3828125, "learning_rate": 1.9997299622253466e-05, "loss": 0.6255, "step": 261 }, { "epoch": 0.03594703985730946, "grad_norm": 1.421875, "learning_rate": 1.9997265974544454e-05, "loss": 0.5951, "step": 262 }, { "epoch": 0.03608424229951293, "grad_norm": 1.4453125, "learning_rate": 1.9997232118528586e-05, "loss": 0.5859, "step": 263 }, { "epoch": 0.0362214447417164, "grad_norm": 1.4296875, "learning_rate": 1.999719805420657e-05, "loss": 0.6174, "step": 264 }, { "epoch": 0.03635864718391987, "grad_norm": 1.296875, "learning_rate": 1.9997163781579124e-05, "loss": 0.58, "step": 265 }, { "epoch": 0.036495849626123344, "grad_norm": 1.2890625, "learning_rate": 1.999712930064695e-05, "loss": 0.5531, "step": 266 }, { "epoch": 0.036633052068326816, "grad_norm": 1.4140625, "learning_rate": 1.999709461141077e-05, "loss": 0.6075, "step": 267 }, { "epoch": 0.03677025451053029, "grad_norm": 1.578125, "learning_rate": 1.999705971387131e-05, "loss": 0.7026, "step": 268 }, { "epoch": 0.03690745695273376, "grad_norm": 1.515625, "learning_rate": 1.9997024608029293e-05, "loss": 0.6398, "step": 269 }, { "epoch": 0.03704465939493723, "grad_norm": 1.4375, "learning_rate": 1.999698929388546e-05, "loss": 0.6721, "step": 270 }, { "epoch": 0.0371818618371407, "grad_norm": 1.5234375, "learning_rate": 1.9996953771440532e-05, "loss": 0.6244, "step": 271 }, { "epoch": 0.03731906427934417, "grad_norm": 1.3671875, "learning_rate": 1.999691804069526e-05, "loss": 0.5696, "step": 272 }, { "epoch": 0.03745626672154764, "grad_norm": 1.3984375, "learning_rate": 1.9996882101650383e-05, "loss": 0.5785, "step": 273 }, { "epoch": 0.037593469163751114, "grad_norm": 1.4296875, "learning_rate": 1.9996845954306652e-05, "loss": 0.6429, "step": 274 }, { "epoch": 0.037730671605954585, "grad_norm": 1.4609375, "learning_rate": 1.999680959866482e-05, "loss": 0.6668, "step": 275 }, { "epoch": 0.03786787404815806, "grad_norm": 1.3125, "learning_rate": 1.9996773034725648e-05, "loss": 0.6418, "step": 276 }, { "epoch": 0.03800507649036153, "grad_norm": 1.421875, "learning_rate": 1.9996736262489892e-05, "loss": 0.6583, "step": 277 }, { "epoch": 0.038142278932565, "grad_norm": 1.484375, "learning_rate": 1.9996699281958323e-05, "loss": 0.6915, "step": 278 }, { "epoch": 0.03827948137476847, "grad_norm": 1.390625, "learning_rate": 1.999666209313171e-05, "loss": 0.5695, "step": 279 }, { "epoch": 0.03841668381697194, "grad_norm": 1.375, "learning_rate": 1.9996624696010825e-05, "loss": 0.5647, "step": 280 }, { "epoch": 0.03855388625917541, "grad_norm": 1.3828125, "learning_rate": 1.9996587090596447e-05, "loss": 0.6259, "step": 281 }, { "epoch": 0.038691088701378884, "grad_norm": 1.3984375, "learning_rate": 1.9996549276889366e-05, "loss": 0.6425, "step": 282 }, { "epoch": 0.038828291143582355, "grad_norm": 1.359375, "learning_rate": 1.9996511254890365e-05, "loss": 0.5763, "step": 283 }, { "epoch": 0.038965493585785826, "grad_norm": 1.3203125, "learning_rate": 1.999647302460024e-05, "loss": 0.6143, "step": 284 }, { "epoch": 0.0391026960279893, "grad_norm": 1.3125, "learning_rate": 1.999643458601978e-05, "loss": 0.5852, "step": 285 }, { "epoch": 0.03923989847019277, "grad_norm": 1.5390625, "learning_rate": 1.9996395939149793e-05, "loss": 0.6341, "step": 286 }, { "epoch": 0.03937710091239624, "grad_norm": 1.4921875, "learning_rate": 1.9996357083991082e-05, "loss": 0.6323, "step": 287 }, { "epoch": 0.03951430335459971, "grad_norm": 1.5, "learning_rate": 1.9996318020544455e-05, "loss": 0.6272, "step": 288 }, { "epoch": 0.03965150579680318, "grad_norm": 1.3984375, "learning_rate": 1.9996278748810726e-05, "loss": 0.6585, "step": 289 }, { "epoch": 0.039788708239006654, "grad_norm": 1.1875, "learning_rate": 1.9996239268790717e-05, "loss": 0.4989, "step": 290 }, { "epoch": 0.039925910681210125, "grad_norm": 1.4375, "learning_rate": 1.999619958048525e-05, "loss": 0.6437, "step": 291 }, { "epoch": 0.040063113123413596, "grad_norm": 1.5078125, "learning_rate": 1.9996159683895148e-05, "loss": 0.6515, "step": 292 }, { "epoch": 0.04020031556561707, "grad_norm": 1.4296875, "learning_rate": 1.999611957902125e-05, "loss": 0.6578, "step": 293 }, { "epoch": 0.04033751800782054, "grad_norm": 1.4765625, "learning_rate": 1.9996079265864376e-05, "loss": 0.6505, "step": 294 }, { "epoch": 0.04047472045002401, "grad_norm": 1.34375, "learning_rate": 1.9996038744425385e-05, "loss": 0.5177, "step": 295 }, { "epoch": 0.04061192289222748, "grad_norm": 1.4609375, "learning_rate": 1.999599801470511e-05, "loss": 0.6601, "step": 296 }, { "epoch": 0.04074912533443095, "grad_norm": 1.40625, "learning_rate": 1.99959570767044e-05, "loss": 0.5885, "step": 297 }, { "epoch": 0.040886327776634424, "grad_norm": 1.453125, "learning_rate": 1.9995915930424114e-05, "loss": 0.6404, "step": 298 }, { "epoch": 0.041023530218837895, "grad_norm": 1.4453125, "learning_rate": 1.9995874575865102e-05, "loss": 0.6181, "step": 299 }, { "epoch": 0.041160732661041366, "grad_norm": 1.4921875, "learning_rate": 1.9995833013028234e-05, "loss": 0.6948, "step": 300 }, { "epoch": 0.04129793510324484, "grad_norm": 1.5234375, "learning_rate": 1.9995791241914366e-05, "loss": 0.6596, "step": 301 }, { "epoch": 0.04143513754544831, "grad_norm": 1.390625, "learning_rate": 1.9995749262524377e-05, "loss": 0.6338, "step": 302 }, { "epoch": 0.04157233998765178, "grad_norm": 1.40625, "learning_rate": 1.9995707074859134e-05, "loss": 0.6271, "step": 303 }, { "epoch": 0.04170954242985525, "grad_norm": 1.3984375, "learning_rate": 1.9995664678919527e-05, "loss": 0.6304, "step": 304 }, { "epoch": 0.04184674487205872, "grad_norm": 1.40625, "learning_rate": 1.9995622074706428e-05, "loss": 0.6497, "step": 305 }, { "epoch": 0.04198394731426219, "grad_norm": 1.484375, "learning_rate": 1.999557926222073e-05, "loss": 0.6682, "step": 306 }, { "epoch": 0.042121149756465665, "grad_norm": 1.3125, "learning_rate": 1.9995536241463327e-05, "loss": 0.5353, "step": 307 }, { "epoch": 0.042258352198669136, "grad_norm": 1.265625, "learning_rate": 1.999549301243511e-05, "loss": 0.5954, "step": 308 }, { "epoch": 0.04239555464087261, "grad_norm": 1.3671875, "learning_rate": 1.9995449575136986e-05, "loss": 0.6509, "step": 309 }, { "epoch": 0.04253275708307608, "grad_norm": 1.453125, "learning_rate": 1.9995405929569852e-05, "loss": 0.6658, "step": 310 }, { "epoch": 0.04266995952527955, "grad_norm": 1.4765625, "learning_rate": 1.9995362075734626e-05, "loss": 0.6364, "step": 311 }, { "epoch": 0.04280716196748302, "grad_norm": 1.5859375, "learning_rate": 1.9995318013632217e-05, "loss": 0.7956, "step": 312 }, { "epoch": 0.04294436440968649, "grad_norm": 1.3984375, "learning_rate": 1.9995273743263545e-05, "loss": 0.6666, "step": 313 }, { "epoch": 0.04308156685188996, "grad_norm": 1.328125, "learning_rate": 1.999522926462953e-05, "loss": 0.6682, "step": 314 }, { "epoch": 0.043218769294093434, "grad_norm": 1.359375, "learning_rate": 1.9995184577731098e-05, "loss": 0.5966, "step": 315 }, { "epoch": 0.043355971736296905, "grad_norm": 1.4921875, "learning_rate": 1.9995139682569186e-05, "loss": 0.5612, "step": 316 }, { "epoch": 0.04349317417850038, "grad_norm": 1.3515625, "learning_rate": 1.9995094579144725e-05, "loss": 0.5534, "step": 317 }, { "epoch": 0.04363037662070385, "grad_norm": 1.5234375, "learning_rate": 1.9995049267458654e-05, "loss": 0.6876, "step": 318 }, { "epoch": 0.04376757906290732, "grad_norm": 1.546875, "learning_rate": 1.9995003747511918e-05, "loss": 0.6634, "step": 319 }, { "epoch": 0.04390478150511079, "grad_norm": 1.453125, "learning_rate": 1.999495801930547e-05, "loss": 0.635, "step": 320 }, { "epoch": 0.04404198394731426, "grad_norm": 1.4609375, "learning_rate": 1.9994912082840256e-05, "loss": 0.6406, "step": 321 }, { "epoch": 0.04417918638951773, "grad_norm": 1.6640625, "learning_rate": 1.9994865938117237e-05, "loss": 0.685, "step": 322 }, { "epoch": 0.044316388831721204, "grad_norm": 1.3828125, "learning_rate": 1.9994819585137376e-05, "loss": 0.6281, "step": 323 }, { "epoch": 0.044453591273924675, "grad_norm": 1.4140625, "learning_rate": 1.9994773023901633e-05, "loss": 0.6222, "step": 324 }, { "epoch": 0.044590793716128146, "grad_norm": 1.3515625, "learning_rate": 1.9994726254410982e-05, "loss": 0.631, "step": 325 }, { "epoch": 0.04472799615833162, "grad_norm": 1.4609375, "learning_rate": 1.9994679276666398e-05, "loss": 0.6592, "step": 326 }, { "epoch": 0.04486519860053509, "grad_norm": 1.40625, "learning_rate": 1.999463209066886e-05, "loss": 0.6422, "step": 327 }, { "epoch": 0.04500240104273856, "grad_norm": 1.3125, "learning_rate": 1.999458469641935e-05, "loss": 0.5442, "step": 328 }, { "epoch": 0.04513960348494203, "grad_norm": 1.5, "learning_rate": 1.999453709391885e-05, "loss": 0.6134, "step": 329 }, { "epoch": 0.0452768059271455, "grad_norm": 1.3671875, "learning_rate": 1.9994489283168366e-05, "loss": 0.589, "step": 330 }, { "epoch": 0.045414008369348974, "grad_norm": 1.4765625, "learning_rate": 1.9994441264168883e-05, "loss": 0.6859, "step": 331 }, { "epoch": 0.045551210811552445, "grad_norm": 1.484375, "learning_rate": 1.99943930369214e-05, "loss": 0.6486, "step": 332 }, { "epoch": 0.045688413253755916, "grad_norm": 1.4140625, "learning_rate": 1.9994344601426928e-05, "loss": 0.576, "step": 333 }, { "epoch": 0.04582561569595939, "grad_norm": 1.4609375, "learning_rate": 1.999429595768648e-05, "loss": 0.6525, "step": 334 }, { "epoch": 0.04596281813816286, "grad_norm": 1.4453125, "learning_rate": 1.999424710570106e-05, "loss": 0.6441, "step": 335 }, { "epoch": 0.04610002058036633, "grad_norm": 1.3515625, "learning_rate": 1.9994198045471687e-05, "loss": 0.6097, "step": 336 }, { "epoch": 0.0462372230225698, "grad_norm": 1.4453125, "learning_rate": 1.999414877699939e-05, "loss": 0.5991, "step": 337 }, { "epoch": 0.04637442546477327, "grad_norm": 1.3359375, "learning_rate": 1.999409930028519e-05, "loss": 0.573, "step": 338 }, { "epoch": 0.046511627906976744, "grad_norm": 1.46875, "learning_rate": 1.999404961533012e-05, "loss": 0.6967, "step": 339 }, { "epoch": 0.046648830349180215, "grad_norm": 1.4296875, "learning_rate": 1.9993999722135214e-05, "loss": 0.6289, "step": 340 }, { "epoch": 0.046786032791383686, "grad_norm": 1.4296875, "learning_rate": 1.9993949620701515e-05, "loss": 0.6484, "step": 341 }, { "epoch": 0.04692323523358716, "grad_norm": 1.46875, "learning_rate": 1.9993899311030064e-05, "loss": 0.6296, "step": 342 }, { "epoch": 0.04706043767579063, "grad_norm": 1.3984375, "learning_rate": 1.9993848793121905e-05, "loss": 0.6677, "step": 343 }, { "epoch": 0.0471976401179941, "grad_norm": 1.3125, "learning_rate": 1.99937980669781e-05, "loss": 0.5996, "step": 344 }, { "epoch": 0.04733484256019757, "grad_norm": 1.484375, "learning_rate": 1.9993747132599698e-05, "loss": 0.6187, "step": 345 }, { "epoch": 0.04747204500240104, "grad_norm": 1.5625, "learning_rate": 1.9993695989987766e-05, "loss": 0.7488, "step": 346 }, { "epoch": 0.04760924744460451, "grad_norm": 1.6015625, "learning_rate": 1.999364463914337e-05, "loss": 0.6873, "step": 347 }, { "epoch": 0.047746449886807985, "grad_norm": 1.4453125, "learning_rate": 1.999359308006757e-05, "loss": 0.6571, "step": 348 }, { "epoch": 0.047883652329011456, "grad_norm": 1.421875, "learning_rate": 1.999354131276145e-05, "loss": 0.6342, "step": 349 }, { "epoch": 0.04802085477121493, "grad_norm": 1.46875, "learning_rate": 1.9993489337226087e-05, "loss": 0.6637, "step": 350 }, { "epoch": 0.0481580572134184, "grad_norm": 1.3359375, "learning_rate": 1.999343715346256e-05, "loss": 0.5827, "step": 351 }, { "epoch": 0.04829525965562187, "grad_norm": 1.2734375, "learning_rate": 1.9993384761471964e-05, "loss": 0.5206, "step": 352 }, { "epoch": 0.04843246209782534, "grad_norm": 1.453125, "learning_rate": 1.9993332161255386e-05, "loss": 0.7444, "step": 353 }, { "epoch": 0.04856966454002881, "grad_norm": 1.359375, "learning_rate": 1.9993279352813922e-05, "loss": 0.5542, "step": 354 }, { "epoch": 0.04870686698223228, "grad_norm": 1.359375, "learning_rate": 1.999322633614867e-05, "loss": 0.6162, "step": 355 }, { "epoch": 0.048844069424435754, "grad_norm": 1.390625, "learning_rate": 1.9993173111260735e-05, "loss": 0.5725, "step": 356 }, { "epoch": 0.048981271866639225, "grad_norm": 1.546875, "learning_rate": 1.9993119678151232e-05, "loss": 0.6567, "step": 357 }, { "epoch": 0.0491184743088427, "grad_norm": 1.40625, "learning_rate": 1.9993066036821268e-05, "loss": 0.6119, "step": 358 }, { "epoch": 0.04925567675104617, "grad_norm": 1.40625, "learning_rate": 1.9993012187271964e-05, "loss": 0.6189, "step": 359 }, { "epoch": 0.04939287919324964, "grad_norm": 1.3671875, "learning_rate": 1.999295812950444e-05, "loss": 0.6239, "step": 360 }, { "epoch": 0.04953008163545311, "grad_norm": 1.5234375, "learning_rate": 1.9992903863519825e-05, "loss": 0.6119, "step": 361 }, { "epoch": 0.04966728407765658, "grad_norm": 1.53125, "learning_rate": 1.9992849389319246e-05, "loss": 0.6404, "step": 362 }, { "epoch": 0.04980448651986005, "grad_norm": 1.9375, "learning_rate": 1.999279470690384e-05, "loss": 0.5341, "step": 363 }, { "epoch": 0.049941688962063524, "grad_norm": 1.359375, "learning_rate": 1.999273981627475e-05, "loss": 0.6083, "step": 364 }, { "epoch": 0.050078891404266995, "grad_norm": 1.4140625, "learning_rate": 1.9992684717433114e-05, "loss": 0.5757, "step": 365 }, { "epoch": 0.050216093846470466, "grad_norm": 1.2890625, "learning_rate": 1.9992629410380085e-05, "loss": 0.5575, "step": 366 }, { "epoch": 0.05035329628867394, "grad_norm": 1.390625, "learning_rate": 1.999257389511681e-05, "loss": 0.6272, "step": 367 }, { "epoch": 0.05049049873087741, "grad_norm": 1.3671875, "learning_rate": 1.999251817164445e-05, "loss": 0.6308, "step": 368 }, { "epoch": 0.05062770117308088, "grad_norm": 1.515625, "learning_rate": 1.9992462239964164e-05, "loss": 0.6134, "step": 369 }, { "epoch": 0.05076490361528435, "grad_norm": 1.3125, "learning_rate": 1.9992406100077117e-05, "loss": 0.6205, "step": 370 }, { "epoch": 0.05090210605748782, "grad_norm": 1.4609375, "learning_rate": 1.9992349751984482e-05, "loss": 0.6931, "step": 371 }, { "epoch": 0.051039308499691294, "grad_norm": 1.4765625, "learning_rate": 1.999229319568743e-05, "loss": 0.6152, "step": 372 }, { "epoch": 0.051176510941894765, "grad_norm": 1.3515625, "learning_rate": 1.9992236431187136e-05, "loss": 0.6358, "step": 373 }, { "epoch": 0.051313713384098236, "grad_norm": 1.4453125, "learning_rate": 1.9992179458484793e-05, "loss": 0.5815, "step": 374 }, { "epoch": 0.05145091582630171, "grad_norm": 1.421875, "learning_rate": 1.999212227758158e-05, "loss": 0.5815, "step": 375 }, { "epoch": 0.05158811826850518, "grad_norm": 1.453125, "learning_rate": 1.999206488847869e-05, "loss": 0.7972, "step": 376 }, { "epoch": 0.05172532071070865, "grad_norm": 1.234375, "learning_rate": 1.999200729117732e-05, "loss": 0.5441, "step": 377 }, { "epoch": 0.05186252315291212, "grad_norm": 1.375, "learning_rate": 1.9991949485678668e-05, "loss": 0.6223, "step": 378 }, { "epoch": 0.05199972559511559, "grad_norm": 1.1953125, "learning_rate": 1.9991891471983942e-05, "loss": 0.5011, "step": 379 }, { "epoch": 0.052136928037319064, "grad_norm": 1.4140625, "learning_rate": 1.999183325009435e-05, "loss": 0.6838, "step": 380 }, { "epoch": 0.052274130479522535, "grad_norm": 1.546875, "learning_rate": 1.99917748200111e-05, "loss": 0.7237, "step": 381 }, { "epoch": 0.052411332921726006, "grad_norm": 1.421875, "learning_rate": 1.9991716181735416e-05, "loss": 0.7143, "step": 382 }, { "epoch": 0.05254853536392948, "grad_norm": 1.4375, "learning_rate": 1.9991657335268515e-05, "loss": 0.6187, "step": 383 }, { "epoch": 0.05268573780613295, "grad_norm": 1.4609375, "learning_rate": 1.9991598280611625e-05, "loss": 0.6425, "step": 384 }, { "epoch": 0.05282294024833642, "grad_norm": 1.375, "learning_rate": 1.999153901776598e-05, "loss": 0.67, "step": 385 }, { "epoch": 0.05296014269053989, "grad_norm": 1.3515625, "learning_rate": 1.999147954673281e-05, "loss": 0.5994, "step": 386 }, { "epoch": 0.05309734513274336, "grad_norm": 1.3828125, "learning_rate": 1.999141986751336e-05, "loss": 0.6846, "step": 387 }, { "epoch": 0.05323454757494683, "grad_norm": 1.3828125, "learning_rate": 1.999135998010886e-05, "loss": 0.6166, "step": 388 }, { "epoch": 0.053371750017150305, "grad_norm": 1.484375, "learning_rate": 1.9991299884520572e-05, "loss": 0.6156, "step": 389 }, { "epoch": 0.053508952459353776, "grad_norm": 1.2890625, "learning_rate": 1.9991239580749745e-05, "loss": 0.6482, "step": 390 }, { "epoch": 0.05364615490155725, "grad_norm": 1.421875, "learning_rate": 1.999117906879763e-05, "loss": 0.6405, "step": 391 }, { "epoch": 0.05378335734376072, "grad_norm": 1.2890625, "learning_rate": 1.9991118348665495e-05, "loss": 0.6093, "step": 392 }, { "epoch": 0.05392055978596419, "grad_norm": 1.390625, "learning_rate": 1.99910574203546e-05, "loss": 0.6152, "step": 393 }, { "epoch": 0.05405776222816766, "grad_norm": 1.4921875, "learning_rate": 1.9990996283866217e-05, "loss": 0.7032, "step": 394 }, { "epoch": 0.05419496467037113, "grad_norm": 1.3515625, "learning_rate": 1.999093493920162e-05, "loss": 0.5894, "step": 395 }, { "epoch": 0.0543321671125746, "grad_norm": 1.4140625, "learning_rate": 1.9990873386362086e-05, "loss": 0.623, "step": 396 }, { "epoch": 0.054469369554778074, "grad_norm": 1.5390625, "learning_rate": 1.99908116253489e-05, "loss": 0.6906, "step": 397 }, { "epoch": 0.054606571996981546, "grad_norm": 1.4140625, "learning_rate": 1.999074965616334e-05, "loss": 0.6093, "step": 398 }, { "epoch": 0.05474377443918502, "grad_norm": 1.3359375, "learning_rate": 1.999068747880671e-05, "loss": 0.661, "step": 399 }, { "epoch": 0.05488097688138849, "grad_norm": 1.4140625, "learning_rate": 1.99906250932803e-05, "loss": 0.5944, "step": 400 }, { "epoch": 0.05501817932359196, "grad_norm": 1.4140625, "learning_rate": 1.9990562499585407e-05, "loss": 0.6458, "step": 401 }, { "epoch": 0.05515538176579543, "grad_norm": 1.2578125, "learning_rate": 1.9990499697723338e-05, "loss": 0.5416, "step": 402 }, { "epoch": 0.0552925842079989, "grad_norm": 1.34375, "learning_rate": 1.9990436687695404e-05, "loss": 0.6028, "step": 403 }, { "epoch": 0.05542978665020237, "grad_norm": 1.359375, "learning_rate": 1.999037346950291e-05, "loss": 0.6129, "step": 404 }, { "epoch": 0.055566989092405844, "grad_norm": 1.578125, "learning_rate": 1.9990310043147182e-05, "loss": 0.6888, "step": 405 }, { "epoch": 0.055704191534609315, "grad_norm": 1.3515625, "learning_rate": 1.9990246408629537e-05, "loss": 0.6778, "step": 406 }, { "epoch": 0.055841393976812786, "grad_norm": 1.3359375, "learning_rate": 1.9990182565951303e-05, "loss": 0.6227, "step": 407 }, { "epoch": 0.05597859641901626, "grad_norm": 1.328125, "learning_rate": 1.9990118515113806e-05, "loss": 0.5523, "step": 408 }, { "epoch": 0.05611579886121973, "grad_norm": 1.421875, "learning_rate": 1.9990054256118387e-05, "loss": 0.6316, "step": 409 }, { "epoch": 0.0562530013034232, "grad_norm": 1.3359375, "learning_rate": 1.998998978896638e-05, "loss": 0.5587, "step": 410 }, { "epoch": 0.05639020374562667, "grad_norm": 1.3359375, "learning_rate": 1.998992511365913e-05, "loss": 0.5901, "step": 411 }, { "epoch": 0.05652740618783014, "grad_norm": 1.296875, "learning_rate": 1.9989860230197985e-05, "loss": 0.5822, "step": 412 }, { "epoch": 0.056664608630033614, "grad_norm": 1.515625, "learning_rate": 1.9989795138584296e-05, "loss": 0.6448, "step": 413 }, { "epoch": 0.056801811072237085, "grad_norm": 1.453125, "learning_rate": 1.998972983881942e-05, "loss": 0.5236, "step": 414 }, { "epoch": 0.056939013514440556, "grad_norm": 1.40625, "learning_rate": 1.9989664330904718e-05, "loss": 0.6571, "step": 415 }, { "epoch": 0.05707621595664403, "grad_norm": 1.359375, "learning_rate": 1.9989598614841555e-05, "loss": 0.5982, "step": 416 }, { "epoch": 0.0572134183988475, "grad_norm": 1.4296875, "learning_rate": 1.99895326906313e-05, "loss": 0.5563, "step": 417 }, { "epoch": 0.05735062084105097, "grad_norm": 1.3125, "learning_rate": 1.9989466558275322e-05, "loss": 0.5679, "step": 418 }, { "epoch": 0.05748782328325444, "grad_norm": 1.40625, "learning_rate": 1.9989400217775005e-05, "loss": 0.6549, "step": 419 }, { "epoch": 0.05762502572545791, "grad_norm": 1.4140625, "learning_rate": 1.998933366913173e-05, "loss": 0.6779, "step": 420 }, { "epoch": 0.057762228167661384, "grad_norm": 1.265625, "learning_rate": 1.9989266912346884e-05, "loss": 0.5491, "step": 421 }, { "epoch": 0.057899430609864855, "grad_norm": 1.328125, "learning_rate": 1.9989199947421858e-05, "loss": 0.6211, "step": 422 }, { "epoch": 0.058036633052068326, "grad_norm": 1.421875, "learning_rate": 1.9989132774358044e-05, "loss": 0.6443, "step": 423 }, { "epoch": 0.0581738354942718, "grad_norm": 1.3359375, "learning_rate": 1.9989065393156847e-05, "loss": 0.4985, "step": 424 }, { "epoch": 0.05831103793647527, "grad_norm": 1.28125, "learning_rate": 1.9988997803819664e-05, "loss": 0.6099, "step": 425 }, { "epoch": 0.05844824037867874, "grad_norm": 1.4140625, "learning_rate": 1.998893000634791e-05, "loss": 0.6339, "step": 426 }, { "epoch": 0.05858544282088221, "grad_norm": 1.3046875, "learning_rate": 1.9988862000742994e-05, "loss": 0.5118, "step": 427 }, { "epoch": 0.05872264526308568, "grad_norm": 1.3046875, "learning_rate": 1.9988793787006333e-05, "loss": 0.5613, "step": 428 }, { "epoch": 0.05885984770528915, "grad_norm": 1.421875, "learning_rate": 1.998872536513935e-05, "loss": 0.5894, "step": 429 }, { "epoch": 0.058997050147492625, "grad_norm": 1.453125, "learning_rate": 1.998865673514347e-05, "loss": 0.6096, "step": 430 }, { "epoch": 0.059134252589696096, "grad_norm": 1.2578125, "learning_rate": 1.9988587897020124e-05, "loss": 0.5964, "step": 431 }, { "epoch": 0.05927145503189957, "grad_norm": 1.3125, "learning_rate": 1.9988518850770747e-05, "loss": 0.6378, "step": 432 }, { "epoch": 0.05940865747410304, "grad_norm": 1.3828125, "learning_rate": 1.9988449596396773e-05, "loss": 0.6424, "step": 433 }, { "epoch": 0.05954585991630651, "grad_norm": 1.4375, "learning_rate": 1.9988380133899648e-05, "loss": 0.6724, "step": 434 }, { "epoch": 0.05968306235850998, "grad_norm": 1.3125, "learning_rate": 1.998831046328082e-05, "loss": 0.5906, "step": 435 }, { "epoch": 0.05982026480071345, "grad_norm": 1.3515625, "learning_rate": 1.998824058454174e-05, "loss": 0.6653, "step": 436 }, { "epoch": 0.05995746724291692, "grad_norm": 1.3203125, "learning_rate": 1.9988170497683862e-05, "loss": 0.5692, "step": 437 }, { "epoch": 0.060094669685120394, "grad_norm": 1.4375, "learning_rate": 1.9988100202708648e-05, "loss": 0.6374, "step": 438 }, { "epoch": 0.060231872127323866, "grad_norm": 1.3359375, "learning_rate": 1.9988029699617568e-05, "loss": 0.5221, "step": 439 }, { "epoch": 0.06036907456952734, "grad_norm": 1.3984375, "learning_rate": 1.998795898841208e-05, "loss": 0.582, "step": 440 }, { "epoch": 0.06050627701173081, "grad_norm": 1.359375, "learning_rate": 1.998788806909367e-05, "loss": 0.5092, "step": 441 }, { "epoch": 0.06064347945393428, "grad_norm": 1.546875, "learning_rate": 1.9987816941663806e-05, "loss": 0.7017, "step": 442 }, { "epoch": 0.06078068189613775, "grad_norm": 1.3515625, "learning_rate": 1.998774560612397e-05, "loss": 0.6216, "step": 443 }, { "epoch": 0.06091788433834122, "grad_norm": 1.296875, "learning_rate": 1.998767406247566e-05, "loss": 0.5369, "step": 444 }, { "epoch": 0.06105508678054469, "grad_norm": 1.4765625, "learning_rate": 1.9987602310720352e-05, "loss": 0.6185, "step": 445 }, { "epoch": 0.061192289222748164, "grad_norm": 1.2421875, "learning_rate": 1.9987530350859546e-05, "loss": 0.577, "step": 446 }, { "epoch": 0.061329491664951635, "grad_norm": 1.3359375, "learning_rate": 1.9987458182894745e-05, "loss": 0.5421, "step": 447 }, { "epoch": 0.061466694107155107, "grad_norm": 1.375, "learning_rate": 1.998738580682745e-05, "loss": 0.5401, "step": 448 }, { "epoch": 0.06160389654935858, "grad_norm": 1.4453125, "learning_rate": 1.998731322265917e-05, "loss": 0.6799, "step": 449 }, { "epoch": 0.06174109899156205, "grad_norm": 1.484375, "learning_rate": 1.998724043039142e-05, "loss": 0.6888, "step": 450 }, { "epoch": 0.06187830143376552, "grad_norm": 1.4140625, "learning_rate": 1.998716743002571e-05, "loss": 0.6609, "step": 451 }, { "epoch": 0.06201550387596899, "grad_norm": 1.4609375, "learning_rate": 1.9987094221563565e-05, "loss": 0.6386, "step": 452 }, { "epoch": 0.06215270631817246, "grad_norm": 1.265625, "learning_rate": 1.9987020805006508e-05, "loss": 0.5273, "step": 453 }, { "epoch": 0.062289908760375934, "grad_norm": 1.4140625, "learning_rate": 1.9986947180356075e-05, "loss": 0.6219, "step": 454 }, { "epoch": 0.062427111202579405, "grad_norm": 1.4296875, "learning_rate": 1.9986873347613796e-05, "loss": 0.646, "step": 455 }, { "epoch": 0.06256431364478288, "grad_norm": 1.421875, "learning_rate": 1.9986799306781205e-05, "loss": 0.6487, "step": 456 }, { "epoch": 0.06270151608698635, "grad_norm": 1.375, "learning_rate": 1.9986725057859853e-05, "loss": 0.5674, "step": 457 }, { "epoch": 0.06283871852918982, "grad_norm": 1.3359375, "learning_rate": 1.998665060085128e-05, "loss": 0.5261, "step": 458 }, { "epoch": 0.06297592097139329, "grad_norm": 1.4609375, "learning_rate": 1.998657593575704e-05, "loss": 0.6431, "step": 459 }, { "epoch": 0.06311312341359676, "grad_norm": 1.40625, "learning_rate": 1.9986501062578694e-05, "loss": 0.5625, "step": 460 }, { "epoch": 0.06325032585580023, "grad_norm": 1.3359375, "learning_rate": 1.9986425981317794e-05, "loss": 0.5621, "step": 461 }, { "epoch": 0.0633875282980037, "grad_norm": 1.40625, "learning_rate": 1.9986350691975908e-05, "loss": 0.6481, "step": 462 }, { "epoch": 0.06352473074020717, "grad_norm": 1.53125, "learning_rate": 1.9986275194554605e-05, "loss": 0.5938, "step": 463 }, { "epoch": 0.06366193318241065, "grad_norm": 1.3984375, "learning_rate": 1.9986199489055457e-05, "loss": 0.628, "step": 464 }, { "epoch": 0.06379913562461412, "grad_norm": 1.4765625, "learning_rate": 1.9986123575480044e-05, "loss": 0.6217, "step": 465 }, { "epoch": 0.06393633806681759, "grad_norm": 1.328125, "learning_rate": 1.9986047453829943e-05, "loss": 0.5745, "step": 466 }, { "epoch": 0.06407354050902106, "grad_norm": 1.53125, "learning_rate": 1.9985971124106744e-05, "loss": 0.701, "step": 467 }, { "epoch": 0.06421074295122453, "grad_norm": 1.3046875, "learning_rate": 1.9985894586312037e-05, "loss": 0.5759, "step": 468 }, { "epoch": 0.064347945393428, "grad_norm": 1.4140625, "learning_rate": 1.9985817840447416e-05, "loss": 0.6558, "step": 469 }, { "epoch": 0.06448514783563147, "grad_norm": 1.5234375, "learning_rate": 1.998574088651448e-05, "loss": 0.6988, "step": 470 }, { "epoch": 0.06462235027783494, "grad_norm": 1.2578125, "learning_rate": 1.9985663724514835e-05, "loss": 0.5298, "step": 471 }, { "epoch": 0.06475955272003842, "grad_norm": 1.375, "learning_rate": 1.9985586354450084e-05, "loss": 0.6394, "step": 472 }, { "epoch": 0.06489675516224189, "grad_norm": 1.375, "learning_rate": 1.9985508776321843e-05, "loss": 0.6599, "step": 473 }, { "epoch": 0.06503395760444536, "grad_norm": 1.4140625, "learning_rate": 1.9985430990131726e-05, "loss": 0.6154, "step": 474 }, { "epoch": 0.06517116004664883, "grad_norm": 1.2109375, "learning_rate": 1.9985352995881355e-05, "loss": 0.5265, "step": 475 }, { "epoch": 0.0653083624888523, "grad_norm": 1.453125, "learning_rate": 1.998527479357235e-05, "loss": 0.6291, "step": 476 }, { "epoch": 0.06544556493105577, "grad_norm": 1.2265625, "learning_rate": 1.998519638320635e-05, "loss": 0.467, "step": 477 }, { "epoch": 0.06558276737325924, "grad_norm": 1.390625, "learning_rate": 1.9985117764784987e-05, "loss": 0.6537, "step": 478 }, { "epoch": 0.06571996981546271, "grad_norm": 1.5, "learning_rate": 1.9985038938309892e-05, "loss": 0.692, "step": 479 }, { "epoch": 0.06585717225766619, "grad_norm": 1.3359375, "learning_rate": 1.9984959903782712e-05, "loss": 0.5917, "step": 480 }, { "epoch": 0.06599437469986966, "grad_norm": 1.3671875, "learning_rate": 1.9984880661205097e-05, "loss": 0.6446, "step": 481 }, { "epoch": 0.06613157714207313, "grad_norm": 1.5078125, "learning_rate": 1.998480121057869e-05, "loss": 0.7062, "step": 482 }, { "epoch": 0.0662687795842766, "grad_norm": 1.28125, "learning_rate": 1.9984721551905157e-05, "loss": 0.5593, "step": 483 }, { "epoch": 0.06640598202648007, "grad_norm": 1.1875, "learning_rate": 1.9984641685186144e-05, "loss": 0.4965, "step": 484 }, { "epoch": 0.06654318446868354, "grad_norm": 1.34375, "learning_rate": 1.998456161042333e-05, "loss": 0.5644, "step": 485 }, { "epoch": 0.06668038691088701, "grad_norm": 1.390625, "learning_rate": 1.9984481327618374e-05, "loss": 0.6113, "step": 486 }, { "epoch": 0.06681758935309048, "grad_norm": 1.21875, "learning_rate": 1.998440083677295e-05, "loss": 0.5699, "step": 487 }, { "epoch": 0.06695479179529396, "grad_norm": 1.3515625, "learning_rate": 1.998432013788874e-05, "loss": 0.5832, "step": 488 }, { "epoch": 0.06709199423749743, "grad_norm": 1.296875, "learning_rate": 1.998423923096742e-05, "loss": 0.5222, "step": 489 }, { "epoch": 0.0672291966797009, "grad_norm": 1.3671875, "learning_rate": 1.9984158116010675e-05, "loss": 0.622, "step": 490 }, { "epoch": 0.06736639912190437, "grad_norm": 1.3984375, "learning_rate": 1.9984076793020202e-05, "loss": 0.6364, "step": 491 }, { "epoch": 0.06750360156410784, "grad_norm": 1.40625, "learning_rate": 1.998399526199769e-05, "loss": 0.5696, "step": 492 }, { "epoch": 0.06764080400631131, "grad_norm": 1.34375, "learning_rate": 1.998391352294484e-05, "loss": 0.5951, "step": 493 }, { "epoch": 0.06777800644851478, "grad_norm": 1.328125, "learning_rate": 1.9983831575863353e-05, "loss": 0.5425, "step": 494 }, { "epoch": 0.06791520889071825, "grad_norm": 1.3125, "learning_rate": 1.998374942075494e-05, "loss": 0.623, "step": 495 }, { "epoch": 0.06805241133292173, "grad_norm": 1.234375, "learning_rate": 1.9983667057621306e-05, "loss": 0.5208, "step": 496 }, { "epoch": 0.0681896137751252, "grad_norm": 1.3515625, "learning_rate": 1.9983584486464177e-05, "loss": 0.6024, "step": 497 }, { "epoch": 0.06832681621732867, "grad_norm": 1.359375, "learning_rate": 1.9983501707285266e-05, "loss": 0.5725, "step": 498 }, { "epoch": 0.06846401865953214, "grad_norm": 1.390625, "learning_rate": 1.99834187200863e-05, "loss": 0.5762, "step": 499 }, { "epoch": 0.06860122110173561, "grad_norm": 1.3515625, "learning_rate": 1.9983335524869005e-05, "loss": 0.5361, "step": 500 }, { "epoch": 0.06873842354393908, "grad_norm": 1.4375, "learning_rate": 1.998325212163512e-05, "loss": 0.5634, "step": 501 }, { "epoch": 0.06887562598614255, "grad_norm": 1.25, "learning_rate": 1.9983168510386385e-05, "loss": 0.5015, "step": 502 }, { "epoch": 0.06901282842834602, "grad_norm": 1.625, "learning_rate": 1.9983084691124534e-05, "loss": 0.5524, "step": 503 }, { "epoch": 0.0691500308705495, "grad_norm": 1.8515625, "learning_rate": 1.9983000663851313e-05, "loss": 0.6061, "step": 504 }, { "epoch": 0.06928723331275297, "grad_norm": 1.4375, "learning_rate": 1.9982916428568485e-05, "loss": 0.5565, "step": 505 }, { "epoch": 0.06942443575495644, "grad_norm": 1.40625, "learning_rate": 1.998283198527779e-05, "loss": 0.5798, "step": 506 }, { "epoch": 0.06956163819715991, "grad_norm": 1.4296875, "learning_rate": 1.9982747333981e-05, "loss": 0.5549, "step": 507 }, { "epoch": 0.06969884063936338, "grad_norm": 1.296875, "learning_rate": 1.998266247467987e-05, "loss": 0.6264, "step": 508 }, { "epoch": 0.06983604308156685, "grad_norm": 1.3125, "learning_rate": 1.9982577407376174e-05, "loss": 0.5309, "step": 509 }, { "epoch": 0.06997324552377032, "grad_norm": 1.546875, "learning_rate": 1.998249213207168e-05, "loss": 0.6711, "step": 510 }, { "epoch": 0.0701104479659738, "grad_norm": 1.3671875, "learning_rate": 1.998240664876817e-05, "loss": 0.653, "step": 511 }, { "epoch": 0.07024765040817726, "grad_norm": 1.3671875, "learning_rate": 1.998232095746742e-05, "loss": 0.558, "step": 512 }, { "epoch": 0.07038485285038074, "grad_norm": 1.5390625, "learning_rate": 1.9982235058171216e-05, "loss": 0.6782, "step": 513 }, { "epoch": 0.07052205529258421, "grad_norm": 1.203125, "learning_rate": 1.9982148950881353e-05, "loss": 0.4785, "step": 514 }, { "epoch": 0.07065925773478768, "grad_norm": 1.34375, "learning_rate": 1.998206263559962e-05, "loss": 0.6292, "step": 515 }, { "epoch": 0.07079646017699115, "grad_norm": 1.46875, "learning_rate": 1.9981976112327818e-05, "loss": 0.6621, "step": 516 }, { "epoch": 0.07093366261919462, "grad_norm": 1.4453125, "learning_rate": 1.998188938106775e-05, "loss": 0.6264, "step": 517 }, { "epoch": 0.07107086506139809, "grad_norm": 1.25, "learning_rate": 1.998180244182122e-05, "loss": 0.556, "step": 518 }, { "epoch": 0.07120806750360156, "grad_norm": 1.328125, "learning_rate": 1.998171529459004e-05, "loss": 0.6087, "step": 519 }, { "epoch": 0.07134526994580503, "grad_norm": 1.359375, "learning_rate": 1.998162793937603e-05, "loss": 0.6037, "step": 520 }, { "epoch": 0.0714824723880085, "grad_norm": 1.34375, "learning_rate": 1.9981540376181008e-05, "loss": 0.6145, "step": 521 }, { "epoch": 0.07161967483021198, "grad_norm": 1.3125, "learning_rate": 1.9981452605006795e-05, "loss": 0.577, "step": 522 }, { "epoch": 0.07175687727241545, "grad_norm": 1.40625, "learning_rate": 1.9981364625855226e-05, "loss": 0.6345, "step": 523 }, { "epoch": 0.07189407971461892, "grad_norm": 1.3984375, "learning_rate": 1.998127643872813e-05, "loss": 0.6758, "step": 524 }, { "epoch": 0.07203128215682239, "grad_norm": 1.3515625, "learning_rate": 1.9981188043627345e-05, "loss": 0.5886, "step": 525 }, { "epoch": 0.07216848459902586, "grad_norm": 1.390625, "learning_rate": 1.9981099440554716e-05, "loss": 0.6565, "step": 526 }, { "epoch": 0.07230568704122933, "grad_norm": 1.3046875, "learning_rate": 1.9981010629512086e-05, "loss": 0.5651, "step": 527 }, { "epoch": 0.0724428894834328, "grad_norm": 1.40625, "learning_rate": 1.9980921610501305e-05, "loss": 0.5895, "step": 528 }, { "epoch": 0.07258009192563628, "grad_norm": 1.3515625, "learning_rate": 1.9980832383524227e-05, "loss": 0.6253, "step": 529 }, { "epoch": 0.07271729436783975, "grad_norm": 1.53125, "learning_rate": 1.9980742948582716e-05, "loss": 0.6193, "step": 530 }, { "epoch": 0.07285449681004322, "grad_norm": 1.3671875, "learning_rate": 1.998065330567863e-05, "loss": 0.6054, "step": 531 }, { "epoch": 0.07299169925224669, "grad_norm": 1.4296875, "learning_rate": 1.998056345481384e-05, "loss": 0.71, "step": 532 }, { "epoch": 0.07312890169445016, "grad_norm": 1.375, "learning_rate": 1.998047339599022e-05, "loss": 0.608, "step": 533 }, { "epoch": 0.07326610413665363, "grad_norm": 1.359375, "learning_rate": 1.9980383129209648e-05, "loss": 0.6567, "step": 534 }, { "epoch": 0.0734033065788571, "grad_norm": 1.296875, "learning_rate": 1.998029265447399e-05, "loss": 0.6055, "step": 535 }, { "epoch": 0.07354050902106057, "grad_norm": 1.4140625, "learning_rate": 1.998020197178515e-05, "loss": 0.6739, "step": 536 }, { "epoch": 0.07367771146326405, "grad_norm": 1.34375, "learning_rate": 1.998011108114501e-05, "loss": 0.6262, "step": 537 }, { "epoch": 0.07381491390546752, "grad_norm": 1.4296875, "learning_rate": 1.9980019982555458e-05, "loss": 0.6058, "step": 538 }, { "epoch": 0.07395211634767099, "grad_norm": 1.3828125, "learning_rate": 1.9979928676018403e-05, "loss": 0.5714, "step": 539 }, { "epoch": 0.07408931878987446, "grad_norm": 1.3671875, "learning_rate": 1.9979837161535742e-05, "loss": 0.6004, "step": 540 }, { "epoch": 0.07422652123207793, "grad_norm": 1.4296875, "learning_rate": 1.9979745439109375e-05, "loss": 0.6502, "step": 541 }, { "epoch": 0.0743637236742814, "grad_norm": 1.2890625, "learning_rate": 1.9979653508741225e-05, "loss": 0.5198, "step": 542 }, { "epoch": 0.07450092611648487, "grad_norm": 1.40625, "learning_rate": 1.9979561370433204e-05, "loss": 0.6239, "step": 543 }, { "epoch": 0.07463812855868834, "grad_norm": 1.3203125, "learning_rate": 1.9979469024187232e-05, "loss": 0.5729, "step": 544 }, { "epoch": 0.07477533100089181, "grad_norm": 1.3515625, "learning_rate": 1.9979376470005225e-05, "loss": 0.5773, "step": 545 }, { "epoch": 0.07491253344309529, "grad_norm": 1.3125, "learning_rate": 1.9979283707889124e-05, "loss": 0.5506, "step": 546 }, { "epoch": 0.07504973588529876, "grad_norm": 1.375, "learning_rate": 1.9979190737840852e-05, "loss": 0.6346, "step": 547 }, { "epoch": 0.07518693832750223, "grad_norm": 1.3828125, "learning_rate": 1.9979097559862352e-05, "loss": 0.5563, "step": 548 }, { "epoch": 0.0753241407697057, "grad_norm": 1.328125, "learning_rate": 1.9979004173955564e-05, "loss": 0.6164, "step": 549 }, { "epoch": 0.07546134321190917, "grad_norm": 1.3046875, "learning_rate": 1.9978910580122434e-05, "loss": 0.5879, "step": 550 }, { "epoch": 0.07559854565411264, "grad_norm": 1.421875, "learning_rate": 1.9978816778364907e-05, "loss": 0.6232, "step": 551 }, { "epoch": 0.07573574809631611, "grad_norm": 1.359375, "learning_rate": 1.9978722768684947e-05, "loss": 0.5963, "step": 552 }, { "epoch": 0.07587295053851958, "grad_norm": 1.3671875, "learning_rate": 1.9978628551084506e-05, "loss": 0.6201, "step": 553 }, { "epoch": 0.07601015298072306, "grad_norm": 1.2421875, "learning_rate": 1.9978534125565548e-05, "loss": 0.5468, "step": 554 }, { "epoch": 0.07614735542292653, "grad_norm": 1.328125, "learning_rate": 1.9978439492130044e-05, "loss": 0.5836, "step": 555 }, { "epoch": 0.07628455786513, "grad_norm": 1.2265625, "learning_rate": 1.9978344650779965e-05, "loss": 0.5451, "step": 556 }, { "epoch": 0.07642176030733347, "grad_norm": 1.3671875, "learning_rate": 1.997824960151728e-05, "loss": 0.6823, "step": 557 }, { "epoch": 0.07655896274953694, "grad_norm": 1.375, "learning_rate": 1.9978154344343978e-05, "loss": 0.6716, "step": 558 }, { "epoch": 0.07669616519174041, "grad_norm": 1.2734375, "learning_rate": 1.997805887926204e-05, "loss": 0.5246, "step": 559 }, { "epoch": 0.07683336763394388, "grad_norm": 1.421875, "learning_rate": 1.9977963206273454e-05, "loss": 0.6345, "step": 560 }, { "epoch": 0.07697057007614735, "grad_norm": 1.3125, "learning_rate": 1.997786732538022e-05, "loss": 0.5315, "step": 561 }, { "epoch": 0.07710777251835083, "grad_norm": 1.4921875, "learning_rate": 1.9977771236584326e-05, "loss": 0.6927, "step": 562 }, { "epoch": 0.0772449749605543, "grad_norm": 1.40625, "learning_rate": 1.9977674939887785e-05, "loss": 0.6176, "step": 563 }, { "epoch": 0.07738217740275777, "grad_norm": 1.375, "learning_rate": 1.9977578435292596e-05, "loss": 0.6241, "step": 564 }, { "epoch": 0.07751937984496124, "grad_norm": 1.234375, "learning_rate": 1.9977481722800767e-05, "loss": 0.528, "step": 565 }, { "epoch": 0.07765658228716471, "grad_norm": 1.71875, "learning_rate": 1.9977384802414324e-05, "loss": 0.6594, "step": 566 }, { "epoch": 0.07779378472936818, "grad_norm": 1.4609375, "learning_rate": 1.9977287674135275e-05, "loss": 0.6588, "step": 567 }, { "epoch": 0.07793098717157165, "grad_norm": 1.3828125, "learning_rate": 1.9977190337965653e-05, "loss": 0.6504, "step": 568 }, { "epoch": 0.07806818961377512, "grad_norm": 1.421875, "learning_rate": 1.997709279390748e-05, "loss": 0.6946, "step": 569 }, { "epoch": 0.0782053920559786, "grad_norm": 1.421875, "learning_rate": 1.9976995041962793e-05, "loss": 0.662, "step": 570 }, { "epoch": 0.07834259449818207, "grad_norm": 1.4140625, "learning_rate": 1.9976897082133627e-05, "loss": 0.5794, "step": 571 }, { "epoch": 0.07847979694038554, "grad_norm": 1.453125, "learning_rate": 1.9976798914422017e-05, "loss": 0.5881, "step": 572 }, { "epoch": 0.07861699938258901, "grad_norm": 1.390625, "learning_rate": 1.997670053883002e-05, "loss": 0.6463, "step": 573 }, { "epoch": 0.07875420182479248, "grad_norm": 1.390625, "learning_rate": 1.9976601955359676e-05, "loss": 0.5309, "step": 574 }, { "epoch": 0.07889140426699595, "grad_norm": 1.296875, "learning_rate": 1.9976503164013042e-05, "loss": 0.5445, "step": 575 }, { "epoch": 0.07902860670919942, "grad_norm": 1.328125, "learning_rate": 1.997640416479218e-05, "loss": 0.6031, "step": 576 }, { "epoch": 0.0791658091514029, "grad_norm": 1.359375, "learning_rate": 1.9976304957699152e-05, "loss": 0.5715, "step": 577 }, { "epoch": 0.07930301159360637, "grad_norm": 1.390625, "learning_rate": 1.9976205542736023e-05, "loss": 0.5859, "step": 578 }, { "epoch": 0.07944021403580984, "grad_norm": 1.28125, "learning_rate": 1.997610591990486e-05, "loss": 0.585, "step": 579 }, { "epoch": 0.07957741647801331, "grad_norm": 1.265625, "learning_rate": 1.9976006089207748e-05, "loss": 0.5211, "step": 580 }, { "epoch": 0.07971461892021678, "grad_norm": 1.5546875, "learning_rate": 1.997590605064676e-05, "loss": 0.7025, "step": 581 }, { "epoch": 0.07985182136242025, "grad_norm": 1.2734375, "learning_rate": 1.9975805804223983e-05, "loss": 0.5408, "step": 582 }, { "epoch": 0.07998902380462372, "grad_norm": 1.1796875, "learning_rate": 1.997570534994151e-05, "loss": 0.4968, "step": 583 }, { "epoch": 0.08012622624682719, "grad_norm": 1.390625, "learning_rate": 1.9975604687801426e-05, "loss": 0.5789, "step": 584 }, { "epoch": 0.08026342868903066, "grad_norm": 1.40625, "learning_rate": 1.9975503817805832e-05, "loss": 0.6513, "step": 585 }, { "epoch": 0.08040063113123413, "grad_norm": 1.703125, "learning_rate": 1.997540273995683e-05, "loss": 0.5323, "step": 586 }, { "epoch": 0.0805378335734376, "grad_norm": 1.4140625, "learning_rate": 1.997530145425653e-05, "loss": 0.663, "step": 587 }, { "epoch": 0.08067503601564108, "grad_norm": 1.390625, "learning_rate": 1.9975199960707038e-05, "loss": 0.6007, "step": 588 }, { "epoch": 0.08081223845784455, "grad_norm": 1.3671875, "learning_rate": 1.9975098259310466e-05, "loss": 0.5942, "step": 589 }, { "epoch": 0.08094944090004802, "grad_norm": 1.671875, "learning_rate": 1.997499635006894e-05, "loss": 0.6164, "step": 590 }, { "epoch": 0.08108664334225149, "grad_norm": 1.375, "learning_rate": 1.9974894232984575e-05, "loss": 0.5907, "step": 591 }, { "epoch": 0.08122384578445496, "grad_norm": 1.3359375, "learning_rate": 1.997479190805951e-05, "loss": 0.6226, "step": 592 }, { "epoch": 0.08136104822665843, "grad_norm": 1.296875, "learning_rate": 1.9974689375295867e-05, "loss": 0.581, "step": 593 }, { "epoch": 0.0814982506688619, "grad_norm": 1.3125, "learning_rate": 1.9974586634695786e-05, "loss": 0.5425, "step": 594 }, { "epoch": 0.08163545311106538, "grad_norm": 1.296875, "learning_rate": 1.997448368626141e-05, "loss": 0.5488, "step": 595 }, { "epoch": 0.08177265555326885, "grad_norm": 1.2890625, "learning_rate": 1.997438052999488e-05, "loss": 0.5621, "step": 596 }, { "epoch": 0.08190985799547232, "grad_norm": 1.390625, "learning_rate": 1.997427716589835e-05, "loss": 0.5944, "step": 597 }, { "epoch": 0.08204706043767579, "grad_norm": 1.40625, "learning_rate": 1.997417359397397e-05, "loss": 0.6178, "step": 598 }, { "epoch": 0.08218426287987926, "grad_norm": 1.5703125, "learning_rate": 1.9974069814223896e-05, "loss": 0.6616, "step": 599 }, { "epoch": 0.08232146532208273, "grad_norm": 1.34375, "learning_rate": 1.99739658266503e-05, "loss": 0.6294, "step": 600 }, { "epoch": 0.0824586677642862, "grad_norm": 1.4921875, "learning_rate": 1.9973861631255338e-05, "loss": 0.671, "step": 601 }, { "epoch": 0.08259587020648967, "grad_norm": 1.4375, "learning_rate": 1.9973757228041186e-05, "loss": 0.711, "step": 602 }, { "epoch": 0.08273307264869315, "grad_norm": 1.4765625, "learning_rate": 1.9973652617010017e-05, "loss": 0.6402, "step": 603 }, { "epoch": 0.08287027509089662, "grad_norm": 1.28125, "learning_rate": 1.9973547798164015e-05, "loss": 0.581, "step": 604 }, { "epoch": 0.08300747753310009, "grad_norm": 1.359375, "learning_rate": 1.997344277150536e-05, "loss": 0.6652, "step": 605 }, { "epoch": 0.08314467997530356, "grad_norm": 1.375, "learning_rate": 1.9973337537036244e-05, "loss": 0.614, "step": 606 }, { "epoch": 0.08328188241750703, "grad_norm": 1.421875, "learning_rate": 1.9973232094758854e-05, "loss": 0.6123, "step": 607 }, { "epoch": 0.0834190848597105, "grad_norm": 1.359375, "learning_rate": 1.9973126444675395e-05, "loss": 0.6254, "step": 608 }, { "epoch": 0.08355628730191397, "grad_norm": 1.4296875, "learning_rate": 1.9973020586788062e-05, "loss": 0.643, "step": 609 }, { "epoch": 0.08369348974411744, "grad_norm": 1.4296875, "learning_rate": 1.997291452109906e-05, "loss": 0.5816, "step": 610 }, { "epoch": 0.08383069218632092, "grad_norm": 1.3515625, "learning_rate": 1.9972808247610605e-05, "loss": 0.5663, "step": 611 }, { "epoch": 0.08396789462852439, "grad_norm": 1.2890625, "learning_rate": 1.997270176632491e-05, "loss": 0.5875, "step": 612 }, { "epoch": 0.08410509707072786, "grad_norm": 1.1484375, "learning_rate": 1.997259507724419e-05, "loss": 0.5284, "step": 613 }, { "epoch": 0.08424229951293133, "grad_norm": 1.359375, "learning_rate": 1.9972488180370668e-05, "loss": 0.6354, "step": 614 }, { "epoch": 0.0843795019551348, "grad_norm": 1.3515625, "learning_rate": 1.997238107570657e-05, "loss": 0.6217, "step": 615 }, { "epoch": 0.08451670439733827, "grad_norm": 1.390625, "learning_rate": 1.9972273763254136e-05, "loss": 0.6434, "step": 616 }, { "epoch": 0.08465390683954174, "grad_norm": 1.3125, "learning_rate": 1.9972166243015598e-05, "loss": 0.5813, "step": 617 }, { "epoch": 0.08479110928174521, "grad_norm": 1.2109375, "learning_rate": 1.9972058514993193e-05, "loss": 0.5671, "step": 618 }, { "epoch": 0.08492831172394869, "grad_norm": 1.34375, "learning_rate": 1.997195057918917e-05, "loss": 0.5776, "step": 619 }, { "epoch": 0.08506551416615216, "grad_norm": 1.3046875, "learning_rate": 1.9971842435605774e-05, "loss": 0.6061, "step": 620 }, { "epoch": 0.08520271660835563, "grad_norm": 1.3515625, "learning_rate": 1.9971734084245257e-05, "loss": 0.6238, "step": 621 }, { "epoch": 0.0853399190505591, "grad_norm": 1.25, "learning_rate": 1.9971625525109882e-05, "loss": 0.4886, "step": 622 }, { "epoch": 0.08547712149276257, "grad_norm": 1.453125, "learning_rate": 1.9971516758201912e-05, "loss": 0.6342, "step": 623 }, { "epoch": 0.08561432393496604, "grad_norm": 1.3046875, "learning_rate": 1.9971407783523605e-05, "loss": 0.5658, "step": 624 }, { "epoch": 0.08575152637716951, "grad_norm": 1.265625, "learning_rate": 1.997129860107724e-05, "loss": 0.4906, "step": 625 }, { "epoch": 0.08588872881937298, "grad_norm": 1.265625, "learning_rate": 1.9971189210865087e-05, "loss": 0.5751, "step": 626 }, { "epoch": 0.08602593126157645, "grad_norm": 1.40625, "learning_rate": 1.9971079612889427e-05, "loss": 0.613, "step": 627 }, { "epoch": 0.08616313370377993, "grad_norm": 1.3046875, "learning_rate": 1.9970969807152543e-05, "loss": 0.5764, "step": 628 }, { "epoch": 0.0863003361459834, "grad_norm": 1.390625, "learning_rate": 1.9970859793656726e-05, "loss": 0.6292, "step": 629 }, { "epoch": 0.08643753858818687, "grad_norm": 1.3828125, "learning_rate": 1.9970749572404265e-05, "loss": 0.6133, "step": 630 }, { "epoch": 0.08657474103039034, "grad_norm": 1.3203125, "learning_rate": 1.9970639143397455e-05, "loss": 0.5793, "step": 631 }, { "epoch": 0.08671194347259381, "grad_norm": 1.53125, "learning_rate": 1.9970528506638603e-05, "loss": 0.6873, "step": 632 }, { "epoch": 0.08684914591479728, "grad_norm": 1.3515625, "learning_rate": 1.997041766213001e-05, "loss": 0.6002, "step": 633 }, { "epoch": 0.08698634835700075, "grad_norm": 1.3828125, "learning_rate": 1.9970306609873985e-05, "loss": 0.6421, "step": 634 }, { "epoch": 0.08712355079920422, "grad_norm": 1.3828125, "learning_rate": 1.9970195349872844e-05, "loss": 0.5914, "step": 635 }, { "epoch": 0.0872607532414077, "grad_norm": 1.265625, "learning_rate": 1.99700838821289e-05, "loss": 0.5866, "step": 636 }, { "epoch": 0.08739795568361117, "grad_norm": 1.46875, "learning_rate": 1.9969972206644486e-05, "loss": 0.602, "step": 637 }, { "epoch": 0.08753515812581464, "grad_norm": 1.3125, "learning_rate": 1.996986032342192e-05, "loss": 0.5198, "step": 638 }, { "epoch": 0.08767236056801811, "grad_norm": 1.390625, "learning_rate": 1.9969748232463538e-05, "loss": 0.6649, "step": 639 }, { "epoch": 0.08780956301022158, "grad_norm": 1.34375, "learning_rate": 1.996963593377167e-05, "loss": 0.5408, "step": 640 }, { "epoch": 0.08794676545242505, "grad_norm": 1.40625, "learning_rate": 1.9969523427348666e-05, "loss": 0.6436, "step": 641 }, { "epoch": 0.08808396789462852, "grad_norm": 1.3359375, "learning_rate": 1.996941071319686e-05, "loss": 0.6146, "step": 642 }, { "epoch": 0.088221170336832, "grad_norm": 1.5078125, "learning_rate": 1.9969297791318604e-05, "loss": 0.6579, "step": 643 }, { "epoch": 0.08835837277903547, "grad_norm": 1.5078125, "learning_rate": 1.996918466171625e-05, "loss": 0.5855, "step": 644 }, { "epoch": 0.08849557522123894, "grad_norm": 1.375, "learning_rate": 1.996907132439216e-05, "loss": 0.6111, "step": 645 }, { "epoch": 0.08863277766344241, "grad_norm": 1.3671875, "learning_rate": 1.996895777934869e-05, "loss": 0.6067, "step": 646 }, { "epoch": 0.08876998010564588, "grad_norm": 1.359375, "learning_rate": 1.9968844026588206e-05, "loss": 0.5753, "step": 647 }, { "epoch": 0.08890718254784935, "grad_norm": 1.2578125, "learning_rate": 1.9968730066113086e-05, "loss": 0.5237, "step": 648 }, { "epoch": 0.08904438499005282, "grad_norm": 1.3046875, "learning_rate": 1.9968615897925694e-05, "loss": 0.5714, "step": 649 }, { "epoch": 0.08918158743225629, "grad_norm": 1.28125, "learning_rate": 1.9968501522028414e-05, "loss": 0.5826, "step": 650 }, { "epoch": 0.08931878987445976, "grad_norm": 1.4921875, "learning_rate": 1.996838693842363e-05, "loss": 0.5971, "step": 651 }, { "epoch": 0.08945599231666324, "grad_norm": 1.5, "learning_rate": 1.9968272147113726e-05, "loss": 0.6558, "step": 652 }, { "epoch": 0.0895931947588667, "grad_norm": 1.34375, "learning_rate": 1.9968157148101097e-05, "loss": 0.6211, "step": 653 }, { "epoch": 0.08973039720107018, "grad_norm": 1.3515625, "learning_rate": 1.996804194138814e-05, "loss": 0.5779, "step": 654 }, { "epoch": 0.08986759964327365, "grad_norm": 1.1796875, "learning_rate": 1.996792652697725e-05, "loss": 0.4716, "step": 655 }, { "epoch": 0.09000480208547712, "grad_norm": 1.40625, "learning_rate": 1.996781090487084e-05, "loss": 0.6368, "step": 656 }, { "epoch": 0.09014200452768059, "grad_norm": 1.3046875, "learning_rate": 1.9967695075071312e-05, "loss": 0.5774, "step": 657 }, { "epoch": 0.09027920696988406, "grad_norm": 1.484375, "learning_rate": 1.996757903758108e-05, "loss": 0.6434, "step": 658 }, { "epoch": 0.09041640941208753, "grad_norm": 1.1484375, "learning_rate": 1.996746279240257e-05, "loss": 0.4638, "step": 659 }, { "epoch": 0.090553611854291, "grad_norm": 1.3203125, "learning_rate": 1.9967346339538195e-05, "loss": 0.6208, "step": 660 }, { "epoch": 0.09069081429649448, "grad_norm": 1.3046875, "learning_rate": 1.996722967899038e-05, "loss": 0.6014, "step": 661 }, { "epoch": 0.09082801673869795, "grad_norm": 1.421875, "learning_rate": 1.9967112810761567e-05, "loss": 0.6756, "step": 662 }, { "epoch": 0.09096521918090142, "grad_norm": 1.3515625, "learning_rate": 1.996699573485418e-05, "loss": 0.5885, "step": 663 }, { "epoch": 0.09110242162310489, "grad_norm": 1.171875, "learning_rate": 1.9966878451270664e-05, "loss": 0.5491, "step": 664 }, { "epoch": 0.09123962406530836, "grad_norm": 1.28125, "learning_rate": 1.9966760960013462e-05, "loss": 0.5688, "step": 665 }, { "epoch": 0.09137682650751183, "grad_norm": 1.3515625, "learning_rate": 1.996664326108502e-05, "loss": 0.5915, "step": 666 }, { "epoch": 0.0915140289497153, "grad_norm": 1.328125, "learning_rate": 1.9966525354487792e-05, "loss": 0.6238, "step": 667 }, { "epoch": 0.09165123139191877, "grad_norm": 1.3359375, "learning_rate": 1.996640724022424e-05, "loss": 0.5816, "step": 668 }, { "epoch": 0.09178843383412225, "grad_norm": 1.3828125, "learning_rate": 1.9966288918296817e-05, "loss": 0.6552, "step": 669 }, { "epoch": 0.09192563627632572, "grad_norm": 1.296875, "learning_rate": 1.9966170388707987e-05, "loss": 0.5695, "step": 670 }, { "epoch": 0.09206283871852919, "grad_norm": 1.390625, "learning_rate": 1.996605165146023e-05, "loss": 0.621, "step": 671 }, { "epoch": 0.09220004116073266, "grad_norm": 1.25, "learning_rate": 1.996593270655601e-05, "loss": 0.5362, "step": 672 }, { "epoch": 0.09233724360293613, "grad_norm": 1.53125, "learning_rate": 1.9965813553997813e-05, "loss": 0.6477, "step": 673 }, { "epoch": 0.0924744460451396, "grad_norm": 1.4453125, "learning_rate": 1.996569419378812e-05, "loss": 0.6394, "step": 674 }, { "epoch": 0.09261164848734307, "grad_norm": 1.3828125, "learning_rate": 1.9965574625929412e-05, "loss": 0.591, "step": 675 }, { "epoch": 0.09274885092954654, "grad_norm": 1.3984375, "learning_rate": 1.9965454850424184e-05, "loss": 0.5431, "step": 676 }, { "epoch": 0.09288605337175002, "grad_norm": 1.3984375, "learning_rate": 1.9965334867274934e-05, "loss": 0.6144, "step": 677 }, { "epoch": 0.09302325581395349, "grad_norm": 1.296875, "learning_rate": 1.996521467648416e-05, "loss": 0.5769, "step": 678 }, { "epoch": 0.09316045825615696, "grad_norm": 1.3828125, "learning_rate": 1.9965094278054367e-05, "loss": 0.6152, "step": 679 }, { "epoch": 0.09329766069836043, "grad_norm": 1.4609375, "learning_rate": 1.996497367198806e-05, "loss": 0.5895, "step": 680 }, { "epoch": 0.0934348631405639, "grad_norm": 1.2734375, "learning_rate": 1.996485285828776e-05, "loss": 0.5556, "step": 681 }, { "epoch": 0.09357206558276737, "grad_norm": 1.328125, "learning_rate": 1.996473183695598e-05, "loss": 0.6067, "step": 682 }, { "epoch": 0.09370926802497084, "grad_norm": 1.1875, "learning_rate": 1.9964610607995238e-05, "loss": 0.5094, "step": 683 }, { "epoch": 0.09384647046717431, "grad_norm": 1.4609375, "learning_rate": 1.9964489171408062e-05, "loss": 0.6331, "step": 684 }, { "epoch": 0.09398367290937779, "grad_norm": 1.515625, "learning_rate": 1.9964367527196987e-05, "loss": 0.667, "step": 685 }, { "epoch": 0.09412087535158126, "grad_norm": 1.3515625, "learning_rate": 1.9964245675364543e-05, "loss": 0.6063, "step": 686 }, { "epoch": 0.09425807779378473, "grad_norm": 1.453125, "learning_rate": 1.9964123615913272e-05, "loss": 0.6123, "step": 687 }, { "epoch": 0.0943952802359882, "grad_norm": 1.5, "learning_rate": 1.996400134884571e-05, "loss": 0.5946, "step": 688 }, { "epoch": 0.09453248267819167, "grad_norm": 1.5, "learning_rate": 1.996387887416442e-05, "loss": 0.6697, "step": 689 }, { "epoch": 0.09466968512039514, "grad_norm": 1.4453125, "learning_rate": 1.9963756191871933e-05, "loss": 0.7151, "step": 690 }, { "epoch": 0.09480688756259861, "grad_norm": 1.46875, "learning_rate": 1.996363330197082e-05, "loss": 0.6648, "step": 691 }, { "epoch": 0.09494409000480208, "grad_norm": 1.390625, "learning_rate": 1.996351020446364e-05, "loss": 0.6104, "step": 692 }, { "epoch": 0.09508129244700556, "grad_norm": 1.375, "learning_rate": 1.9963386899352954e-05, "loss": 0.6046, "step": 693 }, { "epoch": 0.09521849488920903, "grad_norm": 1.359375, "learning_rate": 1.9963263386641333e-05, "loss": 0.5915, "step": 694 }, { "epoch": 0.0953556973314125, "grad_norm": 1.328125, "learning_rate": 1.996313966633135e-05, "loss": 0.5448, "step": 695 }, { "epoch": 0.09549289977361597, "grad_norm": 1.40625, "learning_rate": 1.9963015738425583e-05, "loss": 0.6784, "step": 696 }, { "epoch": 0.09563010221581944, "grad_norm": 1.34375, "learning_rate": 1.9962891602926614e-05, "loss": 0.5887, "step": 697 }, { "epoch": 0.09576730465802291, "grad_norm": 1.4140625, "learning_rate": 1.9962767259837034e-05, "loss": 0.5377, "step": 698 }, { "epoch": 0.09590450710022638, "grad_norm": 1.2578125, "learning_rate": 1.996264270915943e-05, "loss": 0.5316, "step": 699 }, { "epoch": 0.09604170954242985, "grad_norm": 1.34375, "learning_rate": 1.9962517950896392e-05, "loss": 0.5832, "step": 700 }, { "epoch": 0.09617891198463333, "grad_norm": 1.3046875, "learning_rate": 1.9962392985050524e-05, "loss": 0.4989, "step": 701 }, { "epoch": 0.0963161144268368, "grad_norm": 1.2890625, "learning_rate": 1.9962267811624436e-05, "loss": 0.5045, "step": 702 }, { "epoch": 0.09645331686904027, "grad_norm": 1.375, "learning_rate": 1.9962142430620727e-05, "loss": 0.6251, "step": 703 }, { "epoch": 0.09659051931124374, "grad_norm": 1.4375, "learning_rate": 1.9962016842042016e-05, "loss": 0.6743, "step": 704 }, { "epoch": 0.09672772175344721, "grad_norm": 1.375, "learning_rate": 1.9961891045890915e-05, "loss": 0.5741, "step": 705 }, { "epoch": 0.09686492419565068, "grad_norm": 1.390625, "learning_rate": 1.9961765042170047e-05, "loss": 0.6185, "step": 706 }, { "epoch": 0.09700212663785415, "grad_norm": 1.4375, "learning_rate": 1.9961638830882037e-05, "loss": 0.6558, "step": 707 }, { "epoch": 0.09713932908005762, "grad_norm": 1.46875, "learning_rate": 1.996151241202952e-05, "loss": 0.6556, "step": 708 }, { "epoch": 0.0972765315222611, "grad_norm": 1.375, "learning_rate": 1.996138578561512e-05, "loss": 0.5534, "step": 709 }, { "epoch": 0.09741373396446457, "grad_norm": 1.3828125, "learning_rate": 1.996125895164148e-05, "loss": 0.6439, "step": 710 }, { "epoch": 0.09755093640666804, "grad_norm": 1.5546875, "learning_rate": 1.9961131910111246e-05, "loss": 0.6894, "step": 711 }, { "epoch": 0.09768813884887151, "grad_norm": 1.234375, "learning_rate": 1.9961004661027065e-05, "loss": 0.4645, "step": 712 }, { "epoch": 0.09782534129107498, "grad_norm": 1.53125, "learning_rate": 1.9960877204391586e-05, "loss": 0.6096, "step": 713 }, { "epoch": 0.09796254373327845, "grad_norm": 1.296875, "learning_rate": 1.996074954020746e-05, "loss": 0.5887, "step": 714 }, { "epoch": 0.09809974617548192, "grad_norm": 1.3671875, "learning_rate": 1.9960621668477356e-05, "loss": 0.5744, "step": 715 }, { "epoch": 0.0982369486176854, "grad_norm": 1.4609375, "learning_rate": 1.9960493589203932e-05, "loss": 0.6564, "step": 716 }, { "epoch": 0.09837415105988886, "grad_norm": 1.4140625, "learning_rate": 1.9960365302389858e-05, "loss": 0.6716, "step": 717 }, { "epoch": 0.09851135350209234, "grad_norm": 1.375, "learning_rate": 1.996023680803781e-05, "loss": 0.5991, "step": 718 }, { "epoch": 0.09864855594429581, "grad_norm": 1.265625, "learning_rate": 1.9960108106150464e-05, "loss": 0.5302, "step": 719 }, { "epoch": 0.09878575838649928, "grad_norm": 1.5234375, "learning_rate": 1.9959979196730505e-05, "loss": 0.6072, "step": 720 }, { "epoch": 0.09892296082870275, "grad_norm": 1.375, "learning_rate": 1.995985007978061e-05, "loss": 0.5553, "step": 721 }, { "epoch": 0.09906016327090622, "grad_norm": 1.3984375, "learning_rate": 1.9959720755303473e-05, "loss": 0.5637, "step": 722 }, { "epoch": 0.09919736571310969, "grad_norm": 1.3125, "learning_rate": 1.995959122330179e-05, "loss": 0.6372, "step": 723 }, { "epoch": 0.09933456815531316, "grad_norm": 1.3671875, "learning_rate": 1.9959461483778268e-05, "loss": 0.6239, "step": 724 }, { "epoch": 0.09947177059751663, "grad_norm": 1.390625, "learning_rate": 1.9959331536735592e-05, "loss": 0.6337, "step": 725 }, { "epoch": 0.0996089730397201, "grad_norm": 1.2109375, "learning_rate": 1.9959201382176485e-05, "loss": 0.4852, "step": 726 }, { "epoch": 0.09974617548192358, "grad_norm": 1.4140625, "learning_rate": 1.9959071020103657e-05, "loss": 0.6071, "step": 727 }, { "epoch": 0.09988337792412705, "grad_norm": 1.328125, "learning_rate": 1.9958940450519815e-05, "loss": 0.5834, "step": 728 }, { "epoch": 0.10002058036633052, "grad_norm": 1.4609375, "learning_rate": 1.9958809673427687e-05, "loss": 0.5863, "step": 729 }, { "epoch": 0.10015778280853399, "grad_norm": 1.3828125, "learning_rate": 1.995867868883e-05, "loss": 0.5723, "step": 730 }, { "epoch": 0.10029498525073746, "grad_norm": 1.328125, "learning_rate": 1.995854749672948e-05, "loss": 0.6504, "step": 731 }, { "epoch": 0.10043218769294093, "grad_norm": 1.453125, "learning_rate": 1.9958416097128857e-05, "loss": 0.6653, "step": 732 }, { "epoch": 0.1005693901351444, "grad_norm": 1.4375, "learning_rate": 1.9958284490030875e-05, "loss": 0.6318, "step": 733 }, { "epoch": 0.10070659257734788, "grad_norm": 1.390625, "learning_rate": 1.9958152675438272e-05, "loss": 0.646, "step": 734 }, { "epoch": 0.10084379501955135, "grad_norm": 1.375, "learning_rate": 1.9958020653353798e-05, "loss": 0.6041, "step": 735 }, { "epoch": 0.10098099746175482, "grad_norm": 1.2265625, "learning_rate": 1.99578884237802e-05, "loss": 0.527, "step": 736 }, { "epoch": 0.10111819990395829, "grad_norm": 1.3671875, "learning_rate": 1.9957755986720238e-05, "loss": 0.5778, "step": 737 }, { "epoch": 0.10125540234616176, "grad_norm": 1.2890625, "learning_rate": 1.9957623342176665e-05, "loss": 0.6147, "step": 738 }, { "epoch": 0.10139260478836523, "grad_norm": 1.265625, "learning_rate": 1.995749049015225e-05, "loss": 0.5722, "step": 739 }, { "epoch": 0.1015298072305687, "grad_norm": 1.234375, "learning_rate": 1.995735743064976e-05, "loss": 0.4949, "step": 740 }, { "epoch": 0.10166700967277217, "grad_norm": 1.3046875, "learning_rate": 1.995722416367197e-05, "loss": 0.5752, "step": 741 }, { "epoch": 0.10180421211497565, "grad_norm": 1.4375, "learning_rate": 1.995709068922165e-05, "loss": 0.5653, "step": 742 }, { "epoch": 0.10194141455717912, "grad_norm": 1.375, "learning_rate": 1.9956957007301588e-05, "loss": 0.5757, "step": 743 }, { "epoch": 0.10207861699938259, "grad_norm": 1.34375, "learning_rate": 1.9956823117914567e-05, "loss": 0.7197, "step": 744 }, { "epoch": 0.10221581944158606, "grad_norm": 1.453125, "learning_rate": 1.995668902106337e-05, "loss": 0.6016, "step": 745 }, { "epoch": 0.10235302188378953, "grad_norm": 1.2890625, "learning_rate": 1.9956554716750805e-05, "loss": 0.554, "step": 746 }, { "epoch": 0.102490224325993, "grad_norm": 1.3828125, "learning_rate": 1.9956420204979658e-05, "loss": 0.6487, "step": 747 }, { "epoch": 0.10262742676819647, "grad_norm": 1.359375, "learning_rate": 1.995628548575274e-05, "loss": 0.554, "step": 748 }, { "epoch": 0.10276462921039994, "grad_norm": 1.359375, "learning_rate": 1.9956150559072852e-05, "loss": 0.605, "step": 749 }, { "epoch": 0.10290183165260341, "grad_norm": 1.234375, "learning_rate": 1.995601542494281e-05, "loss": 0.5852, "step": 750 }, { "epoch": 0.10303903409480689, "grad_norm": 1.4765625, "learning_rate": 1.995588008336543e-05, "loss": 0.5907, "step": 751 }, { "epoch": 0.10317623653701036, "grad_norm": 1.2265625, "learning_rate": 1.9955744534343525e-05, "loss": 0.4949, "step": 752 }, { "epoch": 0.10331343897921383, "grad_norm": 1.5546875, "learning_rate": 1.9955608777879925e-05, "loss": 0.7241, "step": 753 }, { "epoch": 0.1034506414214173, "grad_norm": 1.359375, "learning_rate": 1.995547281397746e-05, "loss": 0.5493, "step": 754 }, { "epoch": 0.10358784386362077, "grad_norm": 1.390625, "learning_rate": 1.995533664263896e-05, "loss": 0.6049, "step": 755 }, { "epoch": 0.10372504630582424, "grad_norm": 1.375, "learning_rate": 1.995520026386726e-05, "loss": 0.6144, "step": 756 }, { "epoch": 0.10386224874802771, "grad_norm": 1.21875, "learning_rate": 1.995506367766521e-05, "loss": 0.5227, "step": 757 }, { "epoch": 0.10399945119023118, "grad_norm": 1.296875, "learning_rate": 1.9954926884035648e-05, "loss": 0.5974, "step": 758 }, { "epoch": 0.10413665363243466, "grad_norm": 1.15625, "learning_rate": 1.9954789882981426e-05, "loss": 0.3836, "step": 759 }, { "epoch": 0.10427385607463813, "grad_norm": 1.2734375, "learning_rate": 1.99546526745054e-05, "loss": 0.5958, "step": 760 }, { "epoch": 0.1044110585168416, "grad_norm": 1.25, "learning_rate": 1.9954515258610432e-05, "loss": 0.6211, "step": 761 }, { "epoch": 0.10454826095904507, "grad_norm": 1.3984375, "learning_rate": 1.995437763529938e-05, "loss": 0.6235, "step": 762 }, { "epoch": 0.10468546340124854, "grad_norm": 1.3359375, "learning_rate": 1.9954239804575112e-05, "loss": 0.6238, "step": 763 }, { "epoch": 0.10482266584345201, "grad_norm": 1.3203125, "learning_rate": 1.9954101766440502e-05, "loss": 0.5013, "step": 764 }, { "epoch": 0.10495986828565548, "grad_norm": 1.2734375, "learning_rate": 1.9953963520898423e-05, "loss": 0.5784, "step": 765 }, { "epoch": 0.10509707072785895, "grad_norm": 1.390625, "learning_rate": 1.9953825067951758e-05, "loss": 0.532, "step": 766 }, { "epoch": 0.10523427317006243, "grad_norm": 1.3828125, "learning_rate": 1.9953686407603398e-05, "loss": 0.576, "step": 767 }, { "epoch": 0.1053714756122659, "grad_norm": 1.421875, "learning_rate": 1.9953547539856222e-05, "loss": 0.581, "step": 768 }, { "epoch": 0.10550867805446937, "grad_norm": 1.4296875, "learning_rate": 1.995340846471313e-05, "loss": 0.6084, "step": 769 }, { "epoch": 0.10564588049667284, "grad_norm": 1.421875, "learning_rate": 1.9953269182177013e-05, "loss": 0.675, "step": 770 }, { "epoch": 0.10578308293887631, "grad_norm": 1.3515625, "learning_rate": 1.9953129692250778e-05, "loss": 0.5866, "step": 771 }, { "epoch": 0.10592028538107978, "grad_norm": 1.359375, "learning_rate": 1.9952989994937334e-05, "loss": 0.5811, "step": 772 }, { "epoch": 0.10605748782328325, "grad_norm": 1.3515625, "learning_rate": 1.9952850090239587e-05, "loss": 0.6014, "step": 773 }, { "epoch": 0.10619469026548672, "grad_norm": 1.46875, "learning_rate": 1.9952709978160455e-05, "loss": 0.5951, "step": 774 }, { "epoch": 0.1063318927076902, "grad_norm": 1.359375, "learning_rate": 1.9952569658702857e-05, "loss": 0.5705, "step": 775 }, { "epoch": 0.10646909514989367, "grad_norm": 1.40625, "learning_rate": 1.9952429131869713e-05, "loss": 0.6254, "step": 776 }, { "epoch": 0.10660629759209714, "grad_norm": 1.28125, "learning_rate": 1.9952288397663957e-05, "loss": 0.5808, "step": 777 }, { "epoch": 0.10674350003430061, "grad_norm": 1.21875, "learning_rate": 1.9952147456088516e-05, "loss": 0.4828, "step": 778 }, { "epoch": 0.10688070247650408, "grad_norm": 1.3203125, "learning_rate": 1.995200630714633e-05, "loss": 0.5841, "step": 779 }, { "epoch": 0.10701790491870755, "grad_norm": 1.4140625, "learning_rate": 1.995186495084034e-05, "loss": 0.6039, "step": 780 }, { "epoch": 0.10715510736091102, "grad_norm": 1.1953125, "learning_rate": 1.9951723387173492e-05, "loss": 0.5136, "step": 781 }, { "epoch": 0.1072923098031145, "grad_norm": 1.4765625, "learning_rate": 1.9951581616148732e-05, "loss": 0.6967, "step": 782 }, { "epoch": 0.10742951224531797, "grad_norm": 1.375, "learning_rate": 1.9951439637769016e-05, "loss": 0.5451, "step": 783 }, { "epoch": 0.10756671468752144, "grad_norm": 1.28125, "learning_rate": 1.9951297452037305e-05, "loss": 0.5303, "step": 784 }, { "epoch": 0.10770391712972491, "grad_norm": 1.21875, "learning_rate": 1.9951155058956556e-05, "loss": 0.5317, "step": 785 }, { "epoch": 0.10784111957192838, "grad_norm": 1.2734375, "learning_rate": 1.995101245852974e-05, "loss": 0.6057, "step": 786 }, { "epoch": 0.10797832201413185, "grad_norm": 1.359375, "learning_rate": 1.995086965075983e-05, "loss": 0.5456, "step": 787 }, { "epoch": 0.10811552445633532, "grad_norm": 1.375, "learning_rate": 1.9950726635649797e-05, "loss": 0.6526, "step": 788 }, { "epoch": 0.10825272689853879, "grad_norm": 1.328125, "learning_rate": 1.9950583413202623e-05, "loss": 0.5738, "step": 789 }, { "epoch": 0.10838992934074226, "grad_norm": 1.296875, "learning_rate": 1.9950439983421296e-05, "loss": 0.5642, "step": 790 }, { "epoch": 0.10852713178294573, "grad_norm": 1.265625, "learning_rate": 1.9950296346308794e-05, "loss": 0.464, "step": 791 }, { "epoch": 0.1086643342251492, "grad_norm": 1.2578125, "learning_rate": 1.9950152501868122e-05, "loss": 0.6178, "step": 792 }, { "epoch": 0.10880153666735268, "grad_norm": 1.328125, "learning_rate": 1.995000845010227e-05, "loss": 0.5893, "step": 793 }, { "epoch": 0.10893873910955615, "grad_norm": 1.5078125, "learning_rate": 1.9949864191014242e-05, "loss": 0.7211, "step": 794 }, { "epoch": 0.10907594155175962, "grad_norm": 1.3046875, "learning_rate": 1.9949719724607042e-05, "loss": 0.5637, "step": 795 }, { "epoch": 0.10921314399396309, "grad_norm": 1.3203125, "learning_rate": 1.994957505088368e-05, "loss": 0.509, "step": 796 }, { "epoch": 0.10935034643616656, "grad_norm": 1.2578125, "learning_rate": 1.9949430169847177e-05, "loss": 0.5683, "step": 797 }, { "epoch": 0.10948754887837003, "grad_norm": 1.3046875, "learning_rate": 1.9949285081500542e-05, "loss": 0.582, "step": 798 }, { "epoch": 0.1096247513205735, "grad_norm": 1.2265625, "learning_rate": 1.9949139785846806e-05, "loss": 0.5588, "step": 799 }, { "epoch": 0.10976195376277698, "grad_norm": 1.28125, "learning_rate": 1.994899428288899e-05, "loss": 0.5738, "step": 800 }, { "epoch": 0.10989915620498045, "grad_norm": 1.296875, "learning_rate": 1.994884857263013e-05, "loss": 0.579, "step": 801 }, { "epoch": 0.11003635864718392, "grad_norm": 1.3046875, "learning_rate": 1.994870265507326e-05, "loss": 0.5886, "step": 802 }, { "epoch": 0.11017356108938739, "grad_norm": 1.203125, "learning_rate": 1.9948556530221426e-05, "loss": 0.529, "step": 803 }, { "epoch": 0.11031076353159086, "grad_norm": 1.328125, "learning_rate": 1.9948410198077663e-05, "loss": 0.6159, "step": 804 }, { "epoch": 0.11044796597379433, "grad_norm": 1.3203125, "learning_rate": 1.994826365864503e-05, "loss": 0.5951, "step": 805 }, { "epoch": 0.1105851684159978, "grad_norm": 1.3046875, "learning_rate": 1.994811691192657e-05, "loss": 0.6231, "step": 806 }, { "epoch": 0.11072237085820127, "grad_norm": 1.3125, "learning_rate": 1.994796995792535e-05, "loss": 0.645, "step": 807 }, { "epoch": 0.11085957330040475, "grad_norm": 1.296875, "learning_rate": 1.9947822796644427e-05, "loss": 0.6059, "step": 808 }, { "epoch": 0.11099677574260822, "grad_norm": 1.2578125, "learning_rate": 1.994767542808687e-05, "loss": 0.548, "step": 809 }, { "epoch": 0.11113397818481169, "grad_norm": 1.3984375, "learning_rate": 1.9947527852255744e-05, "loss": 0.6335, "step": 810 }, { "epoch": 0.11127118062701516, "grad_norm": 1.34375, "learning_rate": 1.994738006915413e-05, "loss": 0.5279, "step": 811 }, { "epoch": 0.11140838306921863, "grad_norm": 1.5859375, "learning_rate": 1.9947232078785108e-05, "loss": 0.671, "step": 812 }, { "epoch": 0.1115455855114221, "grad_norm": 1.2421875, "learning_rate": 1.9947083881151757e-05, "loss": 0.5655, "step": 813 }, { "epoch": 0.11168278795362557, "grad_norm": 1.40625, "learning_rate": 1.994693547625717e-05, "loss": 0.5623, "step": 814 }, { "epoch": 0.11181999039582904, "grad_norm": 1.34375, "learning_rate": 1.9946786864104427e-05, "loss": 0.5962, "step": 815 }, { "epoch": 0.11195719283803252, "grad_norm": 1.5078125, "learning_rate": 1.994663804469664e-05, "loss": 0.6434, "step": 816 }, { "epoch": 0.11209439528023599, "grad_norm": 1.375, "learning_rate": 1.9946489018036905e-05, "loss": 0.6385, "step": 817 }, { "epoch": 0.11223159772243946, "grad_norm": 1.2421875, "learning_rate": 1.9946339784128322e-05, "loss": 0.5484, "step": 818 }, { "epoch": 0.11236880016464293, "grad_norm": 1.296875, "learning_rate": 1.9946190342974006e-05, "loss": 0.6032, "step": 819 }, { "epoch": 0.1125060026068464, "grad_norm": 1.4140625, "learning_rate": 1.994604069457707e-05, "loss": 0.5846, "step": 820 }, { "epoch": 0.11264320504904987, "grad_norm": 1.3828125, "learning_rate": 1.994589083894063e-05, "loss": 0.5796, "step": 821 }, { "epoch": 0.11278040749125334, "grad_norm": 1.28125, "learning_rate": 1.994574077606781e-05, "loss": 0.5579, "step": 822 }, { "epoch": 0.11291760993345681, "grad_norm": 1.3203125, "learning_rate": 1.9945590505961735e-05, "loss": 0.6421, "step": 823 }, { "epoch": 0.11305481237566029, "grad_norm": 1.3359375, "learning_rate": 1.9945440028625536e-05, "loss": 0.5738, "step": 824 }, { "epoch": 0.11319201481786376, "grad_norm": 1.21875, "learning_rate": 1.994528934406235e-05, "loss": 0.5046, "step": 825 }, { "epoch": 0.11332921726006723, "grad_norm": 1.328125, "learning_rate": 1.994513845227532e-05, "loss": 0.5913, "step": 826 }, { "epoch": 0.1134664197022707, "grad_norm": 1.25, "learning_rate": 1.9944987353267583e-05, "loss": 0.5315, "step": 827 }, { "epoch": 0.11360362214447417, "grad_norm": 1.421875, "learning_rate": 1.9944836047042293e-05, "loss": 0.5983, "step": 828 }, { "epoch": 0.11374082458667764, "grad_norm": 1.3828125, "learning_rate": 1.99446845336026e-05, "loss": 0.6184, "step": 829 }, { "epoch": 0.11387802702888111, "grad_norm": 1.34375, "learning_rate": 1.994453281295166e-05, "loss": 0.6386, "step": 830 }, { "epoch": 0.11401522947108458, "grad_norm": 1.34375, "learning_rate": 1.994438088509264e-05, "loss": 0.6114, "step": 831 }, { "epoch": 0.11415243191328805, "grad_norm": 1.2109375, "learning_rate": 1.9944228750028696e-05, "loss": 0.5375, "step": 832 }, { "epoch": 0.11428963435549153, "grad_norm": 1.3671875, "learning_rate": 1.9944076407763008e-05, "loss": 0.6197, "step": 833 }, { "epoch": 0.114426836797695, "grad_norm": 1.3984375, "learning_rate": 1.9943923858298743e-05, "loss": 0.6451, "step": 834 }, { "epoch": 0.11456403923989847, "grad_norm": 1.25, "learning_rate": 1.9943771101639083e-05, "loss": 0.4896, "step": 835 }, { "epoch": 0.11470124168210194, "grad_norm": 1.28125, "learning_rate": 1.994361813778721e-05, "loss": 0.5935, "step": 836 }, { "epoch": 0.11483844412430541, "grad_norm": 1.171875, "learning_rate": 1.994346496674631e-05, "loss": 0.4839, "step": 837 }, { "epoch": 0.11497564656650888, "grad_norm": 1.3984375, "learning_rate": 1.994331158851958e-05, "loss": 0.5849, "step": 838 }, { "epoch": 0.11511284900871235, "grad_norm": 1.3359375, "learning_rate": 1.9943158003110204e-05, "loss": 0.6081, "step": 839 }, { "epoch": 0.11525005145091582, "grad_norm": 1.34375, "learning_rate": 1.99430042105214e-05, "loss": 0.6195, "step": 840 }, { "epoch": 0.1153872538931193, "grad_norm": 1.3828125, "learning_rate": 1.9942850210756358e-05, "loss": 0.6524, "step": 841 }, { "epoch": 0.11552445633532277, "grad_norm": 1.5078125, "learning_rate": 1.994269600381829e-05, "loss": 0.6513, "step": 842 }, { "epoch": 0.11566165877752624, "grad_norm": 1.3125, "learning_rate": 1.994254158971041e-05, "loss": 0.5391, "step": 843 }, { "epoch": 0.11579886121972971, "grad_norm": 1.34375, "learning_rate": 1.9942386968435936e-05, "loss": 0.5817, "step": 844 }, { "epoch": 0.11593606366193318, "grad_norm": 1.4765625, "learning_rate": 1.994223213999809e-05, "loss": 0.5964, "step": 845 }, { "epoch": 0.11607326610413665, "grad_norm": 1.265625, "learning_rate": 1.9942077104400098e-05, "loss": 0.5445, "step": 846 }, { "epoch": 0.11621046854634012, "grad_norm": 1.3125, "learning_rate": 1.994192186164519e-05, "loss": 0.56, "step": 847 }, { "epoch": 0.1163476709885436, "grad_norm": 1.4453125, "learning_rate": 1.99417664117366e-05, "loss": 0.6588, "step": 848 }, { "epoch": 0.11648487343074707, "grad_norm": 1.3359375, "learning_rate": 1.9941610754677567e-05, "loss": 0.6417, "step": 849 }, { "epoch": 0.11662207587295054, "grad_norm": 1.3125, "learning_rate": 1.9941454890471337e-05, "loss": 0.5911, "step": 850 }, { "epoch": 0.11675927831515401, "grad_norm": 1.359375, "learning_rate": 1.9941298819121155e-05, "loss": 0.5177, "step": 851 }, { "epoch": 0.11689648075735748, "grad_norm": 1.4765625, "learning_rate": 1.994114254063027e-05, "loss": 0.6211, "step": 852 }, { "epoch": 0.11703368319956095, "grad_norm": 1.3515625, "learning_rate": 1.9940986055001948e-05, "loss": 0.5992, "step": 853 }, { "epoch": 0.11717088564176442, "grad_norm": 1.375, "learning_rate": 1.994082936223944e-05, "loss": 0.6593, "step": 854 }, { "epoch": 0.11730808808396789, "grad_norm": 1.453125, "learning_rate": 1.9940672462346013e-05, "loss": 0.6533, "step": 855 }, { "epoch": 0.11744529052617136, "grad_norm": 1.2578125, "learning_rate": 1.994051535532494e-05, "loss": 0.5598, "step": 856 }, { "epoch": 0.11758249296837484, "grad_norm": 1.3828125, "learning_rate": 1.994035804117949e-05, "loss": 0.6055, "step": 857 }, { "epoch": 0.1177196954105783, "grad_norm": 1.3984375, "learning_rate": 1.9940200519912946e-05, "loss": 0.6008, "step": 858 }, { "epoch": 0.11785689785278178, "grad_norm": 1.375, "learning_rate": 1.9940042791528583e-05, "loss": 0.5941, "step": 859 }, { "epoch": 0.11799410029498525, "grad_norm": 1.390625, "learning_rate": 1.9939884856029696e-05, "loss": 0.6038, "step": 860 }, { "epoch": 0.11813130273718872, "grad_norm": 1.3046875, "learning_rate": 1.993972671341957e-05, "loss": 0.5772, "step": 861 }, { "epoch": 0.11826850517939219, "grad_norm": 1.3671875, "learning_rate": 1.99395683637015e-05, "loss": 0.5638, "step": 862 }, { "epoch": 0.11840570762159566, "grad_norm": 1.40625, "learning_rate": 1.993940980687879e-05, "loss": 0.6376, "step": 863 }, { "epoch": 0.11854291006379913, "grad_norm": 1.2890625, "learning_rate": 1.9939251042954738e-05, "loss": 0.4738, "step": 864 }, { "epoch": 0.1186801125060026, "grad_norm": 1.28125, "learning_rate": 1.9939092071932655e-05, "loss": 0.5426, "step": 865 }, { "epoch": 0.11881731494820608, "grad_norm": 1.265625, "learning_rate": 1.9938932893815855e-05, "loss": 0.5193, "step": 866 }, { "epoch": 0.11895451739040955, "grad_norm": 1.2578125, "learning_rate": 1.993877350860765e-05, "loss": 0.5579, "step": 867 }, { "epoch": 0.11909171983261302, "grad_norm": 1.4453125, "learning_rate": 1.9938613916311367e-05, "loss": 0.6131, "step": 868 }, { "epoch": 0.11922892227481649, "grad_norm": 1.2734375, "learning_rate": 1.9938454116930327e-05, "loss": 0.4531, "step": 869 }, { "epoch": 0.11936612471701996, "grad_norm": 1.3359375, "learning_rate": 1.993829411046786e-05, "loss": 0.5773, "step": 870 }, { "epoch": 0.11950332715922343, "grad_norm": 1.2890625, "learning_rate": 1.9938133896927303e-05, "loss": 0.6279, "step": 871 }, { "epoch": 0.1196405296014269, "grad_norm": 1.265625, "learning_rate": 1.993797347631199e-05, "loss": 0.5614, "step": 872 }, { "epoch": 0.11977773204363037, "grad_norm": 1.2734375, "learning_rate": 1.9937812848625268e-05, "loss": 0.5944, "step": 873 }, { "epoch": 0.11991493448583385, "grad_norm": 1.3515625, "learning_rate": 1.993765201387048e-05, "loss": 0.6063, "step": 874 }, { "epoch": 0.12005213692803732, "grad_norm": 1.2734375, "learning_rate": 1.9937490972050978e-05, "loss": 0.5948, "step": 875 }, { "epoch": 0.12018933937024079, "grad_norm": 1.328125, "learning_rate": 1.993732972317012e-05, "loss": 0.5958, "step": 876 }, { "epoch": 0.12032654181244426, "grad_norm": 1.234375, "learning_rate": 1.993716826723126e-05, "loss": 0.5751, "step": 877 }, { "epoch": 0.12046374425464773, "grad_norm": 1.2421875, "learning_rate": 1.9937006604237772e-05, "loss": 0.5121, "step": 878 }, { "epoch": 0.1206009466968512, "grad_norm": 1.390625, "learning_rate": 1.9936844734193016e-05, "loss": 0.589, "step": 879 }, { "epoch": 0.12073814913905467, "grad_norm": 1.3671875, "learning_rate": 1.9936682657100367e-05, "loss": 0.662, "step": 880 }, { "epoch": 0.12087535158125814, "grad_norm": 1.265625, "learning_rate": 1.9936520372963205e-05, "loss": 0.5412, "step": 881 }, { "epoch": 0.12101255402346162, "grad_norm": 1.28125, "learning_rate": 1.993635788178491e-05, "loss": 0.5873, "step": 882 }, { "epoch": 0.12114975646566509, "grad_norm": 1.390625, "learning_rate": 1.9936195183568862e-05, "loss": 0.6127, "step": 883 }, { "epoch": 0.12128695890786856, "grad_norm": 1.28125, "learning_rate": 1.993603227831846e-05, "loss": 0.5283, "step": 884 }, { "epoch": 0.12142416135007203, "grad_norm": 1.3203125, "learning_rate": 1.993586916603709e-05, "loss": 0.5583, "step": 885 }, { "epoch": 0.1215613637922755, "grad_norm": 1.3203125, "learning_rate": 1.993570584672816e-05, "loss": 0.5892, "step": 886 }, { "epoch": 0.12169856623447897, "grad_norm": 1.2734375, "learning_rate": 1.9935542320395065e-05, "loss": 0.5923, "step": 887 }, { "epoch": 0.12183576867668244, "grad_norm": 1.2890625, "learning_rate": 1.9935378587041218e-05, "loss": 0.5548, "step": 888 }, { "epoch": 0.12197297111888591, "grad_norm": 1.3515625, "learning_rate": 1.9935214646670024e-05, "loss": 0.5792, "step": 889 }, { "epoch": 0.12211017356108939, "grad_norm": 1.3828125, "learning_rate": 1.9935050499284904e-05, "loss": 0.6265, "step": 890 }, { "epoch": 0.12224737600329286, "grad_norm": 1.2578125, "learning_rate": 1.9934886144889278e-05, "loss": 0.545, "step": 891 }, { "epoch": 0.12238457844549633, "grad_norm": 1.6015625, "learning_rate": 1.993472158348657e-05, "loss": 0.7007, "step": 892 }, { "epoch": 0.1225217808876998, "grad_norm": 1.375, "learning_rate": 1.9934556815080205e-05, "loss": 0.636, "step": 893 }, { "epoch": 0.12265898332990327, "grad_norm": 1.3203125, "learning_rate": 1.9934391839673622e-05, "loss": 0.591, "step": 894 }, { "epoch": 0.12279618577210674, "grad_norm": 1.484375, "learning_rate": 1.9934226657270256e-05, "loss": 0.6286, "step": 895 }, { "epoch": 0.12293338821431021, "grad_norm": 1.3671875, "learning_rate": 1.993406126787355e-05, "loss": 0.5896, "step": 896 }, { "epoch": 0.12307059065651368, "grad_norm": 1.3359375, "learning_rate": 1.9933895671486943e-05, "loss": 0.5729, "step": 897 }, { "epoch": 0.12320779309871716, "grad_norm": 1.3984375, "learning_rate": 1.99337298681139e-05, "loss": 0.6626, "step": 898 }, { "epoch": 0.12334499554092063, "grad_norm": 1.3515625, "learning_rate": 1.9933563857757862e-05, "loss": 0.5488, "step": 899 }, { "epoch": 0.1234821979831241, "grad_norm": 1.2578125, "learning_rate": 1.9933397640422295e-05, "loss": 0.4904, "step": 900 }, { "epoch": 0.12361940042532757, "grad_norm": 1.375, "learning_rate": 1.993323121611066e-05, "loss": 0.6393, "step": 901 }, { "epoch": 0.12375660286753104, "grad_norm": 1.375, "learning_rate": 1.9933064584826426e-05, "loss": 0.5846, "step": 902 }, { "epoch": 0.12389380530973451, "grad_norm": 1.359375, "learning_rate": 1.993289774657306e-05, "loss": 0.588, "step": 903 }, { "epoch": 0.12403100775193798, "grad_norm": 1.34375, "learning_rate": 1.9932730701354046e-05, "loss": 0.5454, "step": 904 }, { "epoch": 0.12416821019414145, "grad_norm": 1.375, "learning_rate": 1.9932563449172863e-05, "loss": 0.5527, "step": 905 }, { "epoch": 0.12430541263634493, "grad_norm": 1.234375, "learning_rate": 1.9932395990032992e-05, "loss": 0.4871, "step": 906 }, { "epoch": 0.1244426150785484, "grad_norm": 1.2578125, "learning_rate": 1.9932228323937924e-05, "loss": 0.5287, "step": 907 }, { "epoch": 0.12457981752075187, "grad_norm": 1.265625, "learning_rate": 1.9932060450891154e-05, "loss": 0.5234, "step": 908 }, { "epoch": 0.12471701996295534, "grad_norm": 1.375, "learning_rate": 1.9931892370896174e-05, "loss": 0.5401, "step": 909 }, { "epoch": 0.12485422240515881, "grad_norm": 1.359375, "learning_rate": 1.99317240839565e-05, "loss": 0.5014, "step": 910 }, { "epoch": 0.12499142484736228, "grad_norm": 1.375, "learning_rate": 1.993155559007562e-05, "loss": 0.593, "step": 911 }, { "epoch": 0.12512862728956575, "grad_norm": 1.3203125, "learning_rate": 1.993138688925706e-05, "loss": 0.5818, "step": 912 }, { "epoch": 0.12526582973176922, "grad_norm": 1.2421875, "learning_rate": 1.993121798150433e-05, "loss": 0.5365, "step": 913 }, { "epoch": 0.1254030321739727, "grad_norm": 1.328125, "learning_rate": 1.9931048866820942e-05, "loss": 0.5999, "step": 914 }, { "epoch": 0.12554023461617617, "grad_norm": 1.1640625, "learning_rate": 1.9930879545210434e-05, "loss": 0.461, "step": 915 }, { "epoch": 0.12567743705837964, "grad_norm": 1.2578125, "learning_rate": 1.9930710016676323e-05, "loss": 0.5859, "step": 916 }, { "epoch": 0.1258146395005831, "grad_norm": 1.1640625, "learning_rate": 1.9930540281222146e-05, "loss": 0.5079, "step": 917 }, { "epoch": 0.12595184194278658, "grad_norm": 1.3671875, "learning_rate": 1.993037033885144e-05, "loss": 0.6281, "step": 918 }, { "epoch": 0.12608904438499005, "grad_norm": 1.2578125, "learning_rate": 1.993020018956774e-05, "loss": 0.5248, "step": 919 }, { "epoch": 0.12622624682719352, "grad_norm": 1.359375, "learning_rate": 1.99300298333746e-05, "loss": 0.6196, "step": 920 }, { "epoch": 0.126363449269397, "grad_norm": 1.3671875, "learning_rate": 1.9929859270275564e-05, "loss": 0.5754, "step": 921 }, { "epoch": 0.12650065171160046, "grad_norm": 1.40625, "learning_rate": 1.9929688500274186e-05, "loss": 0.5636, "step": 922 }, { "epoch": 0.12663785415380394, "grad_norm": 1.2578125, "learning_rate": 1.9929517523374027e-05, "loss": 0.5574, "step": 923 }, { "epoch": 0.1267750565960074, "grad_norm": 1.3046875, "learning_rate": 1.992934633957865e-05, "loss": 0.5471, "step": 924 }, { "epoch": 0.12691225903821088, "grad_norm": 1.3046875, "learning_rate": 1.9929174948891618e-05, "loss": 0.5494, "step": 925 }, { "epoch": 0.12704946148041435, "grad_norm": 1.3359375, "learning_rate": 1.9929003351316503e-05, "loss": 0.5681, "step": 926 }, { "epoch": 0.12718666392261782, "grad_norm": 1.3984375, "learning_rate": 1.9928831546856884e-05, "loss": 0.574, "step": 927 }, { "epoch": 0.1273238663648213, "grad_norm": 1.2890625, "learning_rate": 1.9928659535516336e-05, "loss": 0.5933, "step": 928 }, { "epoch": 0.12746106880702476, "grad_norm": 1.28125, "learning_rate": 1.992848731729845e-05, "loss": 0.5224, "step": 929 }, { "epoch": 0.12759827124922823, "grad_norm": 1.3203125, "learning_rate": 1.9928314892206805e-05, "loss": 0.5838, "step": 930 }, { "epoch": 0.1277354736914317, "grad_norm": 1.296875, "learning_rate": 1.9928142260245e-05, "loss": 0.5239, "step": 931 }, { "epoch": 0.12787267613363518, "grad_norm": 1.3046875, "learning_rate": 1.992796942141663e-05, "loss": 0.596, "step": 932 }, { "epoch": 0.12800987857583865, "grad_norm": 1.3203125, "learning_rate": 1.9927796375725297e-05, "loss": 0.5866, "step": 933 }, { "epoch": 0.12814708101804212, "grad_norm": 1.3828125, "learning_rate": 1.9927623123174608e-05, "loss": 0.5675, "step": 934 }, { "epoch": 0.1282842834602456, "grad_norm": 1.3515625, "learning_rate": 1.9927449663768173e-05, "loss": 0.6137, "step": 935 }, { "epoch": 0.12842148590244906, "grad_norm": 1.4609375, "learning_rate": 1.9927275997509604e-05, "loss": 0.625, "step": 936 }, { "epoch": 0.12855868834465253, "grad_norm": 1.4375, "learning_rate": 1.992710212440252e-05, "loss": 0.6748, "step": 937 }, { "epoch": 0.128695890786856, "grad_norm": 1.390625, "learning_rate": 1.992692804445054e-05, "loss": 0.5526, "step": 938 }, { "epoch": 0.12883309322905948, "grad_norm": 1.203125, "learning_rate": 1.9926753757657302e-05, "loss": 0.5482, "step": 939 }, { "epoch": 0.12897029567126295, "grad_norm": 1.4375, "learning_rate": 1.992657926402643e-05, "loss": 0.6099, "step": 940 }, { "epoch": 0.12910749811346642, "grad_norm": 1.4921875, "learning_rate": 1.9926404563561557e-05, "loss": 0.6486, "step": 941 }, { "epoch": 0.1292447005556699, "grad_norm": 1.25, "learning_rate": 1.992622965626633e-05, "loss": 0.5381, "step": 942 }, { "epoch": 0.12938190299787336, "grad_norm": 1.3359375, "learning_rate": 1.992605454214439e-05, "loss": 0.6498, "step": 943 }, { "epoch": 0.12951910544007683, "grad_norm": 1.265625, "learning_rate": 1.9925879221199386e-05, "loss": 0.5121, "step": 944 }, { "epoch": 0.1296563078822803, "grad_norm": 1.2734375, "learning_rate": 1.992570369343497e-05, "loss": 0.5635, "step": 945 }, { "epoch": 0.12979351032448377, "grad_norm": 1.1640625, "learning_rate": 1.9925527958854803e-05, "loss": 0.562, "step": 946 }, { "epoch": 0.12993071276668725, "grad_norm": 1.21875, "learning_rate": 1.9925352017462542e-05, "loss": 0.5461, "step": 947 }, { "epoch": 0.13006791520889072, "grad_norm": 1.4296875, "learning_rate": 1.992517586926186e-05, "loss": 0.6019, "step": 948 }, { "epoch": 0.1302051176510942, "grad_norm": 1.40625, "learning_rate": 1.9924999514256417e-05, "loss": 0.6417, "step": 949 }, { "epoch": 0.13034232009329766, "grad_norm": 1.3359375, "learning_rate": 1.9924822952449894e-05, "loss": 0.612, "step": 950 }, { "epoch": 0.13047952253550113, "grad_norm": 1.3671875, "learning_rate": 1.9924646183845972e-05, "loss": 0.648, "step": 951 }, { "epoch": 0.1306167249777046, "grad_norm": 1.28125, "learning_rate": 1.9924469208448327e-05, "loss": 0.5136, "step": 952 }, { "epoch": 0.13075392741990807, "grad_norm": 1.3359375, "learning_rate": 1.9924292026260654e-05, "loss": 0.5719, "step": 953 }, { "epoch": 0.13089112986211154, "grad_norm": 1.1953125, "learning_rate": 1.9924114637286637e-05, "loss": 0.5426, "step": 954 }, { "epoch": 0.13102833230431501, "grad_norm": 1.359375, "learning_rate": 1.992393704152998e-05, "loss": 0.5989, "step": 955 }, { "epoch": 0.1311655347465185, "grad_norm": 1.3203125, "learning_rate": 1.9923759238994383e-05, "loss": 0.518, "step": 956 }, { "epoch": 0.13130273718872196, "grad_norm": 1.484375, "learning_rate": 1.9923581229683546e-05, "loss": 0.6424, "step": 957 }, { "epoch": 0.13143993963092543, "grad_norm": 1.3125, "learning_rate": 1.992340301360118e-05, "loss": 0.5384, "step": 958 }, { "epoch": 0.1315771420731289, "grad_norm": 1.3203125, "learning_rate": 1.9923224590750994e-05, "loss": 0.6066, "step": 959 }, { "epoch": 0.13171434451533237, "grad_norm": 1.3125, "learning_rate": 1.9923045961136717e-05, "loss": 0.4991, "step": 960 }, { "epoch": 0.13185154695753584, "grad_norm": 1.4140625, "learning_rate": 1.992286712476206e-05, "loss": 0.544, "step": 961 }, { "epoch": 0.1319887493997393, "grad_norm": 1.28125, "learning_rate": 1.9922688081630756e-05, "loss": 0.5899, "step": 962 }, { "epoch": 0.13212595184194278, "grad_norm": 1.1484375, "learning_rate": 1.992250883174653e-05, "loss": 0.5029, "step": 963 }, { "epoch": 0.13226315428414626, "grad_norm": 1.3125, "learning_rate": 1.992232937511312e-05, "loss": 0.5831, "step": 964 }, { "epoch": 0.13240035672634973, "grad_norm": 1.234375, "learning_rate": 1.9922149711734268e-05, "loss": 0.4979, "step": 965 }, { "epoch": 0.1325375591685532, "grad_norm": 1.3046875, "learning_rate": 1.9921969841613714e-05, "loss": 0.5268, "step": 966 }, { "epoch": 0.13267476161075667, "grad_norm": 1.296875, "learning_rate": 1.9921789764755205e-05, "loss": 0.523, "step": 967 }, { "epoch": 0.13281196405296014, "grad_norm": 1.2578125, "learning_rate": 1.9921609481162494e-05, "loss": 0.5882, "step": 968 }, { "epoch": 0.1329491664951636, "grad_norm": 1.40625, "learning_rate": 1.992142899083934e-05, "loss": 0.6612, "step": 969 }, { "epoch": 0.13308636893736708, "grad_norm": 1.265625, "learning_rate": 1.99212482937895e-05, "loss": 0.532, "step": 970 }, { "epoch": 0.13322357137957055, "grad_norm": 1.1796875, "learning_rate": 1.9921067390016745e-05, "loss": 0.4995, "step": 971 }, { "epoch": 0.13336077382177403, "grad_norm": 1.1875, "learning_rate": 1.992088627952484e-05, "loss": 0.4953, "step": 972 }, { "epoch": 0.1334979762639775, "grad_norm": 1.3203125, "learning_rate": 1.9920704962317555e-05, "loss": 0.6231, "step": 973 }, { "epoch": 0.13363517870618097, "grad_norm": 1.3203125, "learning_rate": 1.9920523438398673e-05, "loss": 0.5876, "step": 974 }, { "epoch": 0.13377238114838444, "grad_norm": 1.234375, "learning_rate": 1.9920341707771978e-05, "loss": 0.4959, "step": 975 }, { "epoch": 0.1339095835905879, "grad_norm": 1.2265625, "learning_rate": 1.992015977044125e-05, "loss": 0.5544, "step": 976 }, { "epoch": 0.13404678603279138, "grad_norm": 1.1640625, "learning_rate": 1.9919977626410286e-05, "loss": 0.4532, "step": 977 }, { "epoch": 0.13418398847499485, "grad_norm": 1.265625, "learning_rate": 1.991979527568288e-05, "loss": 0.5445, "step": 978 }, { "epoch": 0.13432119091719832, "grad_norm": 1.265625, "learning_rate": 1.991961271826283e-05, "loss": 0.5599, "step": 979 }, { "epoch": 0.1344583933594018, "grad_norm": 1.296875, "learning_rate": 1.991942995415394e-05, "loss": 0.541, "step": 980 }, { "epoch": 0.13459559580160527, "grad_norm": 1.2265625, "learning_rate": 1.991924698336002e-05, "loss": 0.5228, "step": 981 }, { "epoch": 0.13473279824380874, "grad_norm": 1.3046875, "learning_rate": 1.991906380588488e-05, "loss": 0.5952, "step": 982 }, { "epoch": 0.1348700006860122, "grad_norm": 1.3046875, "learning_rate": 1.9918880421732336e-05, "loss": 0.6087, "step": 983 }, { "epoch": 0.13500720312821568, "grad_norm": 1.3359375, "learning_rate": 1.991869683090621e-05, "loss": 0.6256, "step": 984 }, { "epoch": 0.13514440557041915, "grad_norm": 1.3046875, "learning_rate": 1.9918513033410335e-05, "loss": 0.51, "step": 985 }, { "epoch": 0.13528160801262262, "grad_norm": 1.3046875, "learning_rate": 1.991832902924853e-05, "loss": 0.5413, "step": 986 }, { "epoch": 0.1354188104548261, "grad_norm": 1.21875, "learning_rate": 1.991814481842463e-05, "loss": 0.518, "step": 987 }, { "epoch": 0.13555601289702957, "grad_norm": 1.3828125, "learning_rate": 1.991796040094248e-05, "loss": 0.6332, "step": 988 }, { "epoch": 0.13569321533923304, "grad_norm": 1.328125, "learning_rate": 1.9917775776805916e-05, "loss": 0.552, "step": 989 }, { "epoch": 0.1358304177814365, "grad_norm": 1.296875, "learning_rate": 1.991759094601879e-05, "loss": 0.6036, "step": 990 }, { "epoch": 0.13596762022363998, "grad_norm": 1.1953125, "learning_rate": 1.991740590858495e-05, "loss": 0.4955, "step": 991 }, { "epoch": 0.13610482266584345, "grad_norm": 1.46875, "learning_rate": 1.991722066450825e-05, "loss": 0.5984, "step": 992 }, { "epoch": 0.13624202510804692, "grad_norm": 1.6796875, "learning_rate": 1.9917035213792558e-05, "loss": 0.6559, "step": 993 }, { "epoch": 0.1363792275502504, "grad_norm": 1.296875, "learning_rate": 1.9916849556441726e-05, "loss": 0.5802, "step": 994 }, { "epoch": 0.13651642999245386, "grad_norm": 1.3125, "learning_rate": 1.9916663692459633e-05, "loss": 0.6078, "step": 995 }, { "epoch": 0.13665363243465734, "grad_norm": 1.4296875, "learning_rate": 1.9916477621850148e-05, "loss": 0.7136, "step": 996 }, { "epoch": 0.1367908348768608, "grad_norm": 1.3125, "learning_rate": 1.9916291344617143e-05, "loss": 0.6125, "step": 997 }, { "epoch": 0.13692803731906428, "grad_norm": 1.28125, "learning_rate": 1.9916104860764508e-05, "loss": 0.5954, "step": 998 }, { "epoch": 0.13706523976126775, "grad_norm": 1.3515625, "learning_rate": 1.9915918170296123e-05, "loss": 0.6153, "step": 999 }, { "epoch": 0.13720244220347122, "grad_norm": 1.2578125, "learning_rate": 1.991573127321588e-05, "loss": 0.5108, "step": 1000 }, { "epoch": 0.1373396446456747, "grad_norm": 1.2265625, "learning_rate": 1.991554416952767e-05, "loss": 0.5389, "step": 1001 }, { "epoch": 0.13747684708787816, "grad_norm": 1.1640625, "learning_rate": 1.9915356859235397e-05, "loss": 0.4862, "step": 1002 }, { "epoch": 0.13761404953008163, "grad_norm": 1.2890625, "learning_rate": 1.9915169342342962e-05, "loss": 0.55, "step": 1003 }, { "epoch": 0.1377512519722851, "grad_norm": 1.28125, "learning_rate": 1.991498161885427e-05, "loss": 0.572, "step": 1004 }, { "epoch": 0.13788845441448858, "grad_norm": 1.2734375, "learning_rate": 1.9914793688773235e-05, "loss": 0.5111, "step": 1005 }, { "epoch": 0.13802565685669205, "grad_norm": 1.3359375, "learning_rate": 1.991460555210377e-05, "loss": 0.5784, "step": 1006 }, { "epoch": 0.13816285929889552, "grad_norm": 1.3046875, "learning_rate": 1.9914417208849797e-05, "loss": 0.5246, "step": 1007 }, { "epoch": 0.138300061741099, "grad_norm": 1.2421875, "learning_rate": 1.991422865901524e-05, "loss": 0.5461, "step": 1008 }, { "epoch": 0.13843726418330246, "grad_norm": 1.3046875, "learning_rate": 1.9914039902604027e-05, "loss": 0.5142, "step": 1009 }, { "epoch": 0.13857446662550593, "grad_norm": 1.2578125, "learning_rate": 1.9913850939620095e-05, "loss": 0.5816, "step": 1010 }, { "epoch": 0.1387116690677094, "grad_norm": 1.3515625, "learning_rate": 1.9913661770067375e-05, "loss": 0.5829, "step": 1011 }, { "epoch": 0.13884887150991287, "grad_norm": 1.3828125, "learning_rate": 1.991347239394981e-05, "loss": 0.6128, "step": 1012 }, { "epoch": 0.13898607395211635, "grad_norm": 1.390625, "learning_rate": 1.9913282811271354e-05, "loss": 0.6288, "step": 1013 }, { "epoch": 0.13912327639431982, "grad_norm": 1.3359375, "learning_rate": 1.9913093022035947e-05, "loss": 0.6378, "step": 1014 }, { "epoch": 0.1392604788365233, "grad_norm": 1.4140625, "learning_rate": 1.9912903026247544e-05, "loss": 0.6542, "step": 1015 }, { "epoch": 0.13939768127872676, "grad_norm": 1.203125, "learning_rate": 1.9912712823910113e-05, "loss": 0.4564, "step": 1016 }, { "epoch": 0.13953488372093023, "grad_norm": 1.3671875, "learning_rate": 1.9912522415027607e-05, "loss": 0.5976, "step": 1017 }, { "epoch": 0.1396720861631337, "grad_norm": 1.265625, "learning_rate": 1.9912331799604002e-05, "loss": 0.5637, "step": 1018 }, { "epoch": 0.13980928860533717, "grad_norm": 1.2734375, "learning_rate": 1.991214097764326e-05, "loss": 0.5594, "step": 1019 }, { "epoch": 0.13994649104754064, "grad_norm": 1.5, "learning_rate": 1.9911949949149368e-05, "loss": 0.5366, "step": 1020 }, { "epoch": 0.14008369348974412, "grad_norm": 1.4375, "learning_rate": 1.9911758714126298e-05, "loss": 0.638, "step": 1021 }, { "epoch": 0.1402208959319476, "grad_norm": 1.484375, "learning_rate": 1.9911567272578036e-05, "loss": 0.6017, "step": 1022 }, { "epoch": 0.14035809837415106, "grad_norm": 1.328125, "learning_rate": 1.9911375624508575e-05, "loss": 0.5994, "step": 1023 }, { "epoch": 0.14049530081635453, "grad_norm": 1.453125, "learning_rate": 1.9911183769921904e-05, "loss": 0.6708, "step": 1024 }, { "epoch": 0.140632503258558, "grad_norm": 1.171875, "learning_rate": 1.9910991708822024e-05, "loss": 0.4774, "step": 1025 }, { "epoch": 0.14076970570076147, "grad_norm": 1.1875, "learning_rate": 1.9910799441212933e-05, "loss": 0.5624, "step": 1026 }, { "epoch": 0.14090690814296494, "grad_norm": 1.484375, "learning_rate": 1.9910606967098643e-05, "loss": 0.6718, "step": 1027 }, { "epoch": 0.14104411058516841, "grad_norm": 1.3671875, "learning_rate": 1.9910414286483158e-05, "loss": 0.5772, "step": 1028 }, { "epoch": 0.14118131302737189, "grad_norm": 1.234375, "learning_rate": 1.9910221399370494e-05, "loss": 0.4738, "step": 1029 }, { "epoch": 0.14131851546957536, "grad_norm": 1.375, "learning_rate": 1.991002830576467e-05, "loss": 0.6495, "step": 1030 }, { "epoch": 0.14145571791177883, "grad_norm": 1.4296875, "learning_rate": 1.990983500566972e-05, "loss": 0.5496, "step": 1031 }, { "epoch": 0.1415929203539823, "grad_norm": 1.3671875, "learning_rate": 1.9909641499089653e-05, "loss": 0.5391, "step": 1032 }, { "epoch": 0.14173012279618577, "grad_norm": 1.3828125, "learning_rate": 1.9909447786028514e-05, "loss": 0.5264, "step": 1033 }, { "epoch": 0.14186732523838924, "grad_norm": 1.125, "learning_rate": 1.990925386649034e-05, "loss": 0.5095, "step": 1034 }, { "epoch": 0.1420045276805927, "grad_norm": 1.40625, "learning_rate": 1.990905974047916e-05, "loss": 0.6394, "step": 1035 }, { "epoch": 0.14214173012279618, "grad_norm": 1.34375, "learning_rate": 1.9908865407999033e-05, "loss": 0.5455, "step": 1036 }, { "epoch": 0.14227893256499966, "grad_norm": 1.3671875, "learning_rate": 1.9908670869053997e-05, "loss": 0.6198, "step": 1037 }, { "epoch": 0.14241613500720313, "grad_norm": 1.328125, "learning_rate": 1.9908476123648113e-05, "loss": 0.5559, "step": 1038 }, { "epoch": 0.1425533374494066, "grad_norm": 1.265625, "learning_rate": 1.9908281171785436e-05, "loss": 0.5836, "step": 1039 }, { "epoch": 0.14269053989161007, "grad_norm": 1.3046875, "learning_rate": 1.9908086013470025e-05, "loss": 0.5565, "step": 1040 }, { "epoch": 0.14282774233381354, "grad_norm": 1.3125, "learning_rate": 1.9907890648705948e-05, "loss": 0.5264, "step": 1041 }, { "epoch": 0.142964944776017, "grad_norm": 1.5, "learning_rate": 1.990769507749728e-05, "loss": 0.5937, "step": 1042 }, { "epoch": 0.14310214721822048, "grad_norm": 1.2890625, "learning_rate": 1.9907499299848094e-05, "loss": 0.568, "step": 1043 }, { "epoch": 0.14323934966042395, "grad_norm": 1.390625, "learning_rate": 1.9907303315762468e-05, "loss": 0.653, "step": 1044 }, { "epoch": 0.14337655210262742, "grad_norm": 1.2890625, "learning_rate": 1.9907107125244484e-05, "loss": 0.6316, "step": 1045 }, { "epoch": 0.1435137545448309, "grad_norm": 1.2890625, "learning_rate": 1.9906910728298232e-05, "loss": 0.6034, "step": 1046 }, { "epoch": 0.14365095698703437, "grad_norm": 1.203125, "learning_rate": 1.9906714124927804e-05, "loss": 0.5299, "step": 1047 }, { "epoch": 0.14378815942923784, "grad_norm": 1.5, "learning_rate": 1.9906517315137293e-05, "loss": 0.6052, "step": 1048 }, { "epoch": 0.1439253618714413, "grad_norm": 1.3671875, "learning_rate": 1.990632029893081e-05, "loss": 0.6188, "step": 1049 }, { "epoch": 0.14406256431364478, "grad_norm": 1.3828125, "learning_rate": 1.9906123076312453e-05, "loss": 0.598, "step": 1050 }, { "epoch": 0.14419976675584825, "grad_norm": 1.4921875, "learning_rate": 1.990592564728633e-05, "loss": 0.6427, "step": 1051 }, { "epoch": 0.14433696919805172, "grad_norm": 1.40625, "learning_rate": 1.9905728011856552e-05, "loss": 0.6356, "step": 1052 }, { "epoch": 0.1444741716402552, "grad_norm": 1.4609375, "learning_rate": 1.9905530170027244e-05, "loss": 0.5867, "step": 1053 }, { "epoch": 0.14461137408245867, "grad_norm": 1.3515625, "learning_rate": 1.990533212180253e-05, "loss": 0.6082, "step": 1054 }, { "epoch": 0.14474857652466214, "grad_norm": 1.390625, "learning_rate": 1.9905133867186528e-05, "loss": 0.6282, "step": 1055 }, { "epoch": 0.1448857789668656, "grad_norm": 1.390625, "learning_rate": 1.9904935406183376e-05, "loss": 0.6245, "step": 1056 }, { "epoch": 0.14502298140906908, "grad_norm": 1.2890625, "learning_rate": 1.9904736738797205e-05, "loss": 0.6162, "step": 1057 }, { "epoch": 0.14516018385127255, "grad_norm": 1.328125, "learning_rate": 1.9904537865032157e-05, "loss": 0.5939, "step": 1058 }, { "epoch": 0.14529738629347602, "grad_norm": 1.2109375, "learning_rate": 1.9904338784892373e-05, "loss": 0.4978, "step": 1059 }, { "epoch": 0.1454345887356795, "grad_norm": 1.4296875, "learning_rate": 1.9904139498382004e-05, "loss": 0.5936, "step": 1060 }, { "epoch": 0.14557179117788296, "grad_norm": 1.3125, "learning_rate": 1.9903940005505203e-05, "loss": 0.5224, "step": 1061 }, { "epoch": 0.14570899362008644, "grad_norm": 1.3359375, "learning_rate": 1.990374030626612e-05, "loss": 0.5832, "step": 1062 }, { "epoch": 0.1458461960622899, "grad_norm": 1.3671875, "learning_rate": 1.9903540400668928e-05, "loss": 0.555, "step": 1063 }, { "epoch": 0.14598339850449338, "grad_norm": 1.453125, "learning_rate": 1.9903340288717783e-05, "loss": 0.6289, "step": 1064 }, { "epoch": 0.14612060094669685, "grad_norm": 1.328125, "learning_rate": 1.9903139970416855e-05, "loss": 0.5454, "step": 1065 }, { "epoch": 0.14625780338890032, "grad_norm": 1.3984375, "learning_rate": 1.9902939445770322e-05, "loss": 0.577, "step": 1066 }, { "epoch": 0.1463950058311038, "grad_norm": 1.3125, "learning_rate": 1.9902738714782358e-05, "loss": 0.5746, "step": 1067 }, { "epoch": 0.14653220827330726, "grad_norm": 1.3828125, "learning_rate": 1.990253777745715e-05, "loss": 0.5503, "step": 1068 }, { "epoch": 0.14666941071551073, "grad_norm": 1.390625, "learning_rate": 1.990233663379888e-05, "loss": 0.6091, "step": 1069 }, { "epoch": 0.1468066131577142, "grad_norm": 5.15625, "learning_rate": 1.9902135283811744e-05, "loss": 0.6732, "step": 1070 }, { "epoch": 0.14694381559991768, "grad_norm": 1.3046875, "learning_rate": 1.9901933727499932e-05, "loss": 0.5939, "step": 1071 }, { "epoch": 0.14708101804212115, "grad_norm": 1.3125, "learning_rate": 1.990173196486765e-05, "loss": 0.5352, "step": 1072 }, { "epoch": 0.14721822048432462, "grad_norm": 1.2421875, "learning_rate": 1.9901529995919095e-05, "loss": 0.5508, "step": 1073 }, { "epoch": 0.1473554229265281, "grad_norm": 1.390625, "learning_rate": 1.9901327820658482e-05, "loss": 0.6097, "step": 1074 }, { "epoch": 0.14749262536873156, "grad_norm": 1.421875, "learning_rate": 1.990112543909002e-05, "loss": 0.6494, "step": 1075 }, { "epoch": 0.14762982781093503, "grad_norm": 1.4296875, "learning_rate": 1.990092285121793e-05, "loss": 0.6558, "step": 1076 }, { "epoch": 0.1477670302531385, "grad_norm": 3.671875, "learning_rate": 1.9900720057046424e-05, "loss": 0.598, "step": 1077 }, { "epoch": 0.14790423269534198, "grad_norm": 1.296875, "learning_rate": 1.9900517056579734e-05, "loss": 0.5616, "step": 1078 }, { "epoch": 0.14804143513754545, "grad_norm": 1.421875, "learning_rate": 1.990031384982209e-05, "loss": 0.5771, "step": 1079 }, { "epoch": 0.14817863757974892, "grad_norm": 1.2734375, "learning_rate": 1.9900110436777727e-05, "loss": 0.4808, "step": 1080 }, { "epoch": 0.1483158400219524, "grad_norm": 1.203125, "learning_rate": 1.989990681745088e-05, "loss": 0.5422, "step": 1081 }, { "epoch": 0.14845304246415586, "grad_norm": 1.2578125, "learning_rate": 1.989970299184579e-05, "loss": 0.5324, "step": 1082 }, { "epoch": 0.14859024490635933, "grad_norm": 1.4140625, "learning_rate": 1.989949895996671e-05, "loss": 0.5959, "step": 1083 }, { "epoch": 0.1487274473485628, "grad_norm": 1.34375, "learning_rate": 1.9899294721817887e-05, "loss": 0.5535, "step": 1084 }, { "epoch": 0.14886464979076627, "grad_norm": 1.2890625, "learning_rate": 1.989909027740358e-05, "loss": 0.5109, "step": 1085 }, { "epoch": 0.14900185223296974, "grad_norm": 1.2265625, "learning_rate": 1.9898885626728045e-05, "loss": 0.5005, "step": 1086 }, { "epoch": 0.14913905467517322, "grad_norm": 1.296875, "learning_rate": 1.989868076979555e-05, "loss": 0.5175, "step": 1087 }, { "epoch": 0.1492762571173767, "grad_norm": 1.359375, "learning_rate": 1.989847570661036e-05, "loss": 0.5881, "step": 1088 }, { "epoch": 0.14941345955958016, "grad_norm": 1.5546875, "learning_rate": 1.989827043717675e-05, "loss": 0.658, "step": 1089 }, { "epoch": 0.14955066200178363, "grad_norm": 1.234375, "learning_rate": 1.9898064961498998e-05, "loss": 0.4847, "step": 1090 }, { "epoch": 0.1496878644439871, "grad_norm": 1.3515625, "learning_rate": 1.989785927958138e-05, "loss": 0.638, "step": 1091 }, { "epoch": 0.14982506688619057, "grad_norm": 1.4375, "learning_rate": 1.9897653391428188e-05, "loss": 0.5347, "step": 1092 }, { "epoch": 0.14996226932839404, "grad_norm": 1.3046875, "learning_rate": 1.989744729704371e-05, "loss": 0.5632, "step": 1093 }, { "epoch": 0.15009947177059751, "grad_norm": 1.2734375, "learning_rate": 1.9897240996432238e-05, "loss": 0.5536, "step": 1094 }, { "epoch": 0.15023667421280099, "grad_norm": 1.3125, "learning_rate": 1.9897034489598073e-05, "loss": 0.5557, "step": 1095 }, { "epoch": 0.15037387665500446, "grad_norm": 1.2578125, "learning_rate": 1.9896827776545514e-05, "loss": 0.576, "step": 1096 }, { "epoch": 0.15051107909720793, "grad_norm": 1.28125, "learning_rate": 1.9896620857278875e-05, "loss": 0.6374, "step": 1097 }, { "epoch": 0.1506482815394114, "grad_norm": 1.2421875, "learning_rate": 1.9896413731802463e-05, "loss": 0.563, "step": 1098 }, { "epoch": 0.15078548398161487, "grad_norm": 1.25, "learning_rate": 1.9896206400120594e-05, "loss": 0.5426, "step": 1099 }, { "epoch": 0.15092268642381834, "grad_norm": 1.3046875, "learning_rate": 1.9895998862237587e-05, "loss": 0.5687, "step": 1100 }, { "epoch": 0.1510598888660218, "grad_norm": 1.3125, "learning_rate": 1.9895791118157768e-05, "loss": 0.5737, "step": 1101 }, { "epoch": 0.15119709130822528, "grad_norm": 1.1484375, "learning_rate": 1.9895583167885467e-05, "loss": 0.5017, "step": 1102 }, { "epoch": 0.15133429375042876, "grad_norm": 1.2109375, "learning_rate": 1.9895375011425015e-05, "loss": 0.4704, "step": 1103 }, { "epoch": 0.15147149619263223, "grad_norm": 1.390625, "learning_rate": 1.989516664878075e-05, "loss": 0.5725, "step": 1104 }, { "epoch": 0.1516086986348357, "grad_norm": 1.1796875, "learning_rate": 1.9894958079957014e-05, "loss": 0.4769, "step": 1105 }, { "epoch": 0.15174590107703917, "grad_norm": 1.3125, "learning_rate": 1.989474930495815e-05, "loss": 0.5688, "step": 1106 }, { "epoch": 0.15188310351924264, "grad_norm": 1.2890625, "learning_rate": 1.989454032378851e-05, "loss": 0.5548, "step": 1107 }, { "epoch": 0.1520203059614461, "grad_norm": 1.265625, "learning_rate": 1.989433113645245e-05, "loss": 0.5787, "step": 1108 }, { "epoch": 0.15215750840364958, "grad_norm": 1.2890625, "learning_rate": 1.9894121742954323e-05, "loss": 0.5638, "step": 1109 }, { "epoch": 0.15229471084585305, "grad_norm": 1.2421875, "learning_rate": 1.98939121432985e-05, "loss": 0.5439, "step": 1110 }, { "epoch": 0.15243191328805653, "grad_norm": 1.1953125, "learning_rate": 1.9893702337489345e-05, "loss": 0.5371, "step": 1111 }, { "epoch": 0.15256911573026, "grad_norm": 1.421875, "learning_rate": 1.9893492325531228e-05, "loss": 0.6843, "step": 1112 }, { "epoch": 0.15270631817246347, "grad_norm": 1.28125, "learning_rate": 1.9893282107428527e-05, "loss": 0.5327, "step": 1113 }, { "epoch": 0.15284352061466694, "grad_norm": 1.21875, "learning_rate": 1.9893071683185616e-05, "loss": 0.5586, "step": 1114 }, { "epoch": 0.1529807230568704, "grad_norm": 1.34375, "learning_rate": 1.989286105280689e-05, "loss": 0.61, "step": 1115 }, { "epoch": 0.15311792549907388, "grad_norm": 1.3671875, "learning_rate": 1.989265021629673e-05, "loss": 0.6014, "step": 1116 }, { "epoch": 0.15325512794127735, "grad_norm": 1.265625, "learning_rate": 1.989243917365953e-05, "loss": 0.4998, "step": 1117 }, { "epoch": 0.15339233038348082, "grad_norm": 1.421875, "learning_rate": 1.9892227924899693e-05, "loss": 0.6446, "step": 1118 }, { "epoch": 0.1535295328256843, "grad_norm": 1.40625, "learning_rate": 1.9892016470021617e-05, "loss": 0.5363, "step": 1119 }, { "epoch": 0.15366673526788777, "grad_norm": 1.3671875, "learning_rate": 1.9891804809029702e-05, "loss": 0.6081, "step": 1120 }, { "epoch": 0.15380393771009124, "grad_norm": 1.3515625, "learning_rate": 1.989159294192837e-05, "loss": 0.5661, "step": 1121 }, { "epoch": 0.1539411401522947, "grad_norm": 1.2734375, "learning_rate": 1.9891380868722025e-05, "loss": 0.5692, "step": 1122 }, { "epoch": 0.15407834259449818, "grad_norm": 1.3828125, "learning_rate": 1.989116858941509e-05, "loss": 0.5595, "step": 1123 }, { "epoch": 0.15421554503670165, "grad_norm": 1.28125, "learning_rate": 1.989095610401199e-05, "loss": 0.607, "step": 1124 }, { "epoch": 0.15435274747890512, "grad_norm": 1.2109375, "learning_rate": 1.9890743412517148e-05, "loss": 0.5432, "step": 1125 }, { "epoch": 0.1544899499211086, "grad_norm": 1.296875, "learning_rate": 1.9890530514935e-05, "loss": 0.6018, "step": 1126 }, { "epoch": 0.15462715236331206, "grad_norm": 1.203125, "learning_rate": 1.9890317411269978e-05, "loss": 0.4706, "step": 1127 }, { "epoch": 0.15476435480551554, "grad_norm": 1.328125, "learning_rate": 1.989010410152653e-05, "loss": 0.5967, "step": 1128 }, { "epoch": 0.154901557247719, "grad_norm": 1.3671875, "learning_rate": 1.9889890585709094e-05, "loss": 0.6015, "step": 1129 }, { "epoch": 0.15503875968992248, "grad_norm": 1.296875, "learning_rate": 1.9889676863822117e-05, "loss": 0.6059, "step": 1130 }, { "epoch": 0.15517596213212595, "grad_norm": 1.3125, "learning_rate": 1.9889462935870057e-05, "loss": 0.5543, "step": 1131 }, { "epoch": 0.15531316457432942, "grad_norm": 1.3671875, "learning_rate": 1.9889248801857367e-05, "loss": 0.6201, "step": 1132 }, { "epoch": 0.1554503670165329, "grad_norm": 1.3359375, "learning_rate": 1.9889034461788517e-05, "loss": 0.603, "step": 1133 }, { "epoch": 0.15558756945873636, "grad_norm": 1.296875, "learning_rate": 1.9888819915667966e-05, "loss": 0.6107, "step": 1134 }, { "epoch": 0.15572477190093983, "grad_norm": 1.5078125, "learning_rate": 1.9888605163500183e-05, "loss": 0.6472, "step": 1135 }, { "epoch": 0.1558619743431433, "grad_norm": 1.25, "learning_rate": 1.988839020528965e-05, "loss": 0.5241, "step": 1136 }, { "epoch": 0.15599917678534678, "grad_norm": 1.234375, "learning_rate": 1.9888175041040835e-05, "loss": 0.5492, "step": 1137 }, { "epoch": 0.15613637922755025, "grad_norm": 1.1875, "learning_rate": 1.9887959670758233e-05, "loss": 0.5281, "step": 1138 }, { "epoch": 0.15627358166975372, "grad_norm": 1.3515625, "learning_rate": 1.9887744094446328e-05, "loss": 0.6195, "step": 1139 }, { "epoch": 0.1564107841119572, "grad_norm": 1.34375, "learning_rate": 1.9887528312109607e-05, "loss": 0.5756, "step": 1140 }, { "epoch": 0.15654798655416066, "grad_norm": 1.296875, "learning_rate": 1.988731232375257e-05, "loss": 0.5684, "step": 1141 }, { "epoch": 0.15668518899636413, "grad_norm": 1.203125, "learning_rate": 1.9887096129379716e-05, "loss": 0.4759, "step": 1142 }, { "epoch": 0.1568223914385676, "grad_norm": 1.4140625, "learning_rate": 1.9886879728995555e-05, "loss": 0.6613, "step": 1143 }, { "epoch": 0.15695959388077108, "grad_norm": 1.3359375, "learning_rate": 1.988666312260459e-05, "loss": 0.5895, "step": 1144 }, { "epoch": 0.15709679632297455, "grad_norm": 1.375, "learning_rate": 1.9886446310211332e-05, "loss": 0.6145, "step": 1145 }, { "epoch": 0.15723399876517802, "grad_norm": 1.328125, "learning_rate": 1.9886229291820305e-05, "loss": 0.5877, "step": 1146 }, { "epoch": 0.1573712012073815, "grad_norm": 1.3125, "learning_rate": 1.9886012067436025e-05, "loss": 0.6091, "step": 1147 }, { "epoch": 0.15750840364958496, "grad_norm": 1.3203125, "learning_rate": 1.9885794637063026e-05, "loss": 0.513, "step": 1148 }, { "epoch": 0.15764560609178843, "grad_norm": 1.4140625, "learning_rate": 1.988557700070583e-05, "loss": 0.5998, "step": 1149 }, { "epoch": 0.1577828085339919, "grad_norm": 1.3203125, "learning_rate": 1.9885359158368977e-05, "loss": 0.6599, "step": 1150 }, { "epoch": 0.15792001097619537, "grad_norm": 1.2734375, "learning_rate": 1.9885141110057006e-05, "loss": 0.5464, "step": 1151 }, { "epoch": 0.15805721341839885, "grad_norm": 1.34375, "learning_rate": 1.988492285577446e-05, "loss": 0.6196, "step": 1152 }, { "epoch": 0.15819441586060232, "grad_norm": 1.484375, "learning_rate": 1.9884704395525884e-05, "loss": 0.6599, "step": 1153 }, { "epoch": 0.1583316183028058, "grad_norm": 1.3828125, "learning_rate": 1.9884485729315833e-05, "loss": 0.5768, "step": 1154 }, { "epoch": 0.15846882074500926, "grad_norm": 1.3671875, "learning_rate": 1.988426685714886e-05, "loss": 0.544, "step": 1155 }, { "epoch": 0.15860602318721273, "grad_norm": 1.2890625, "learning_rate": 1.988404777902953e-05, "loss": 0.6529, "step": 1156 }, { "epoch": 0.1587432256294162, "grad_norm": 1.25, "learning_rate": 1.98838284949624e-05, "loss": 0.5014, "step": 1157 }, { "epoch": 0.15888042807161967, "grad_norm": 1.25, "learning_rate": 1.9883609004952048e-05, "loss": 0.5314, "step": 1158 }, { "epoch": 0.15901763051382314, "grad_norm": 1.34375, "learning_rate": 1.9883389309003044e-05, "loss": 0.5715, "step": 1159 }, { "epoch": 0.15915483295602662, "grad_norm": 1.3046875, "learning_rate": 1.9883169407119964e-05, "loss": 0.5264, "step": 1160 }, { "epoch": 0.1592920353982301, "grad_norm": 1.3515625, "learning_rate": 1.988294929930739e-05, "loss": 0.6733, "step": 1161 }, { "epoch": 0.15942923784043356, "grad_norm": 1.2109375, "learning_rate": 1.988272898556991e-05, "loss": 0.4977, "step": 1162 }, { "epoch": 0.15956644028263703, "grad_norm": 1.2890625, "learning_rate": 1.9882508465912116e-05, "loss": 0.5704, "step": 1163 }, { "epoch": 0.1597036427248405, "grad_norm": 1.265625, "learning_rate": 1.98822877403386e-05, "loss": 0.5175, "step": 1164 }, { "epoch": 0.15984084516704397, "grad_norm": 1.3125, "learning_rate": 1.988206680885396e-05, "loss": 0.5468, "step": 1165 }, { "epoch": 0.15997804760924744, "grad_norm": 1.3359375, "learning_rate": 1.9881845671462804e-05, "loss": 0.5744, "step": 1166 }, { "epoch": 0.1601152500514509, "grad_norm": 1.3828125, "learning_rate": 1.9881624328169734e-05, "loss": 0.631, "step": 1167 }, { "epoch": 0.16025245249365438, "grad_norm": 1.2421875, "learning_rate": 1.9881402778979367e-05, "loss": 0.4838, "step": 1168 }, { "epoch": 0.16038965493585786, "grad_norm": 1.3671875, "learning_rate": 1.9881181023896317e-05, "loss": 0.5877, "step": 1169 }, { "epoch": 0.16052685737806133, "grad_norm": 1.265625, "learning_rate": 1.9880959062925206e-05, "loss": 0.5345, "step": 1170 }, { "epoch": 0.1606640598202648, "grad_norm": 1.421875, "learning_rate": 1.988073689607066e-05, "loss": 0.6584, "step": 1171 }, { "epoch": 0.16080126226246827, "grad_norm": 1.4140625, "learning_rate": 1.9880514523337303e-05, "loss": 0.5368, "step": 1172 }, { "epoch": 0.16093846470467174, "grad_norm": 1.375, "learning_rate": 1.988029194472977e-05, "loss": 0.6038, "step": 1173 }, { "epoch": 0.1610756671468752, "grad_norm": 1.2265625, "learning_rate": 1.9880069160252703e-05, "loss": 0.5583, "step": 1174 }, { "epoch": 0.16121286958907868, "grad_norm": 1.328125, "learning_rate": 1.9879846169910738e-05, "loss": 0.5507, "step": 1175 }, { "epoch": 0.16135007203128215, "grad_norm": 1.2109375, "learning_rate": 1.9879622973708526e-05, "loss": 0.4883, "step": 1176 }, { "epoch": 0.16148727447348563, "grad_norm": 1.2109375, "learning_rate": 1.9879399571650717e-05, "loss": 0.5128, "step": 1177 }, { "epoch": 0.1616244769156891, "grad_norm": 1.2890625, "learning_rate": 1.9879175963741964e-05, "loss": 0.5645, "step": 1178 }, { "epoch": 0.16176167935789257, "grad_norm": 1.4609375, "learning_rate": 1.987895214998693e-05, "loss": 0.6296, "step": 1179 }, { "epoch": 0.16189888180009604, "grad_norm": 1.375, "learning_rate": 1.9878728130390266e-05, "loss": 0.6226, "step": 1180 }, { "epoch": 0.1620360842422995, "grad_norm": 1.21875, "learning_rate": 1.987850390495666e-05, "loss": 0.5928, "step": 1181 }, { "epoch": 0.16217328668450298, "grad_norm": 1.2890625, "learning_rate": 1.987827947369077e-05, "loss": 0.59, "step": 1182 }, { "epoch": 0.16231048912670645, "grad_norm": 1.3203125, "learning_rate": 1.9878054836597274e-05, "loss": 0.565, "step": 1183 }, { "epoch": 0.16244769156890992, "grad_norm": 1.25, "learning_rate": 1.9877829993680856e-05, "loss": 0.5652, "step": 1184 }, { "epoch": 0.1625848940111134, "grad_norm": 1.296875, "learning_rate": 1.9877604944946197e-05, "loss": 0.5283, "step": 1185 }, { "epoch": 0.16272209645331687, "grad_norm": 1.2890625, "learning_rate": 1.9877379690397995e-05, "loss": 0.6157, "step": 1186 }, { "epoch": 0.16285929889552034, "grad_norm": 1.296875, "learning_rate": 1.987715423004093e-05, "loss": 0.5231, "step": 1187 }, { "epoch": 0.1629965013377238, "grad_norm": 1.234375, "learning_rate": 1.987692856387971e-05, "loss": 0.4849, "step": 1188 }, { "epoch": 0.16313370377992728, "grad_norm": 1.3359375, "learning_rate": 1.9876702691919034e-05, "loss": 0.4888, "step": 1189 }, { "epoch": 0.16327090622213075, "grad_norm": 1.3046875, "learning_rate": 1.9876476614163603e-05, "loss": 0.5078, "step": 1190 }, { "epoch": 0.16340810866433422, "grad_norm": 1.3359375, "learning_rate": 1.987625033061814e-05, "loss": 0.5883, "step": 1191 }, { "epoch": 0.1635453111065377, "grad_norm": 1.3828125, "learning_rate": 1.9876023841287347e-05, "loss": 0.5962, "step": 1192 }, { "epoch": 0.16368251354874117, "grad_norm": 1.15625, "learning_rate": 1.9875797146175954e-05, "loss": 0.4851, "step": 1193 }, { "epoch": 0.16381971599094464, "grad_norm": 1.2578125, "learning_rate": 1.9875570245288674e-05, "loss": 0.5079, "step": 1194 }, { "epoch": 0.1639569184331481, "grad_norm": 1.2890625, "learning_rate": 1.9875343138630244e-05, "loss": 0.5524, "step": 1195 }, { "epoch": 0.16409412087535158, "grad_norm": 1.4453125, "learning_rate": 1.987511582620539e-05, "loss": 0.6655, "step": 1196 }, { "epoch": 0.16423132331755505, "grad_norm": 1.3515625, "learning_rate": 1.9874888308018855e-05, "loss": 0.6289, "step": 1197 }, { "epoch": 0.16436852575975852, "grad_norm": 1.3203125, "learning_rate": 1.9874660584075372e-05, "loss": 0.5439, "step": 1198 }, { "epoch": 0.164505728201962, "grad_norm": 1.453125, "learning_rate": 1.987443265437969e-05, "loss": 0.7194, "step": 1199 }, { "epoch": 0.16464293064416546, "grad_norm": 1.359375, "learning_rate": 1.9874204518936556e-05, "loss": 0.6525, "step": 1200 }, { "epoch": 0.16478013308636894, "grad_norm": 1.2578125, "learning_rate": 1.9873976177750725e-05, "loss": 0.5726, "step": 1201 }, { "epoch": 0.1649173355285724, "grad_norm": 1.2421875, "learning_rate": 1.987374763082696e-05, "loss": 0.4827, "step": 1202 }, { "epoch": 0.16505453797077588, "grad_norm": 1.1796875, "learning_rate": 1.9873518878170012e-05, "loss": 0.5447, "step": 1203 }, { "epoch": 0.16519174041297935, "grad_norm": 1.328125, "learning_rate": 1.9873289919784656e-05, "loss": 0.548, "step": 1204 }, { "epoch": 0.16532894285518282, "grad_norm": 1.2109375, "learning_rate": 1.9873060755675658e-05, "loss": 0.5008, "step": 1205 }, { "epoch": 0.1654661452973863, "grad_norm": 1.34375, "learning_rate": 1.9872831385847796e-05, "loss": 0.5809, "step": 1206 }, { "epoch": 0.16560334773958976, "grad_norm": 1.28125, "learning_rate": 1.987260181030585e-05, "loss": 0.5946, "step": 1207 }, { "epoch": 0.16574055018179323, "grad_norm": 1.3359375, "learning_rate": 1.9872372029054602e-05, "loss": 0.599, "step": 1208 }, { "epoch": 0.1658777526239967, "grad_norm": 1.265625, "learning_rate": 1.9872142042098836e-05, "loss": 0.4971, "step": 1209 }, { "epoch": 0.16601495506620018, "grad_norm": 1.2421875, "learning_rate": 1.9871911849443348e-05, "loss": 0.5391, "step": 1210 }, { "epoch": 0.16615215750840365, "grad_norm": 1.3125, "learning_rate": 1.9871681451092935e-05, "loss": 0.6271, "step": 1211 }, { "epoch": 0.16628935995060712, "grad_norm": 1.15625, "learning_rate": 1.98714508470524e-05, "loss": 0.506, "step": 1212 }, { "epoch": 0.1664265623928106, "grad_norm": 1.375, "learning_rate": 1.9871220037326538e-05, "loss": 0.6227, "step": 1213 }, { "epoch": 0.16656376483501406, "grad_norm": 1.328125, "learning_rate": 1.987098902192017e-05, "loss": 0.6172, "step": 1214 }, { "epoch": 0.16670096727721753, "grad_norm": 1.234375, "learning_rate": 1.9870757800838103e-05, "loss": 0.5769, "step": 1215 }, { "epoch": 0.166838169719421, "grad_norm": 1.234375, "learning_rate": 1.987052637408516e-05, "loss": 0.5811, "step": 1216 }, { "epoch": 0.16697537216162447, "grad_norm": 1.140625, "learning_rate": 1.987029474166615e-05, "loss": 0.5187, "step": 1217 }, { "epoch": 0.16711257460382795, "grad_norm": 1.2890625, "learning_rate": 1.9870062903585916e-05, "loss": 0.6049, "step": 1218 }, { "epoch": 0.16724977704603142, "grad_norm": 1.2265625, "learning_rate": 1.9869830859849277e-05, "loss": 0.6028, "step": 1219 }, { "epoch": 0.1673869794882349, "grad_norm": 1.296875, "learning_rate": 1.9869598610461075e-05, "loss": 0.5703, "step": 1220 }, { "epoch": 0.16752418193043836, "grad_norm": 1.3125, "learning_rate": 1.9869366155426145e-05, "loss": 0.5429, "step": 1221 }, { "epoch": 0.16766138437264183, "grad_norm": 1.21875, "learning_rate": 1.9869133494749335e-05, "loss": 0.5095, "step": 1222 }, { "epoch": 0.1677985868148453, "grad_norm": 1.2734375, "learning_rate": 1.9868900628435487e-05, "loss": 0.5681, "step": 1223 }, { "epoch": 0.16793578925704877, "grad_norm": 1.21875, "learning_rate": 1.9868667556489457e-05, "loss": 0.5261, "step": 1224 }, { "epoch": 0.16807299169925224, "grad_norm": 1.3515625, "learning_rate": 1.98684342789161e-05, "loss": 0.6654, "step": 1225 }, { "epoch": 0.16821019414145572, "grad_norm": 1.328125, "learning_rate": 1.9868200795720275e-05, "loss": 0.5893, "step": 1226 }, { "epoch": 0.1683473965836592, "grad_norm": 1.1484375, "learning_rate": 1.986796710690685e-05, "loss": 0.4721, "step": 1227 }, { "epoch": 0.16848459902586266, "grad_norm": 1.3203125, "learning_rate": 1.9867733212480698e-05, "loss": 0.5674, "step": 1228 }, { "epoch": 0.16862180146806613, "grad_norm": 1.3046875, "learning_rate": 1.9867499112446683e-05, "loss": 0.5477, "step": 1229 }, { "epoch": 0.1687590039102696, "grad_norm": 1.3671875, "learning_rate": 1.986726480680969e-05, "loss": 0.6335, "step": 1230 }, { "epoch": 0.16889620635247307, "grad_norm": 1.328125, "learning_rate": 1.9867030295574597e-05, "loss": 0.5851, "step": 1231 }, { "epoch": 0.16903340879467654, "grad_norm": 1.203125, "learning_rate": 1.986679557874629e-05, "loss": 0.4982, "step": 1232 }, { "epoch": 0.16917061123688001, "grad_norm": 1.125, "learning_rate": 1.9866560656329664e-05, "loss": 0.4657, "step": 1233 }, { "epoch": 0.16930781367908349, "grad_norm": 1.2890625, "learning_rate": 1.986632552832961e-05, "loss": 0.5659, "step": 1234 }, { "epoch": 0.16944501612128696, "grad_norm": 1.1640625, "learning_rate": 1.986609019475103e-05, "loss": 0.4745, "step": 1235 }, { "epoch": 0.16958221856349043, "grad_norm": 1.34375, "learning_rate": 1.9865854655598825e-05, "loss": 0.6649, "step": 1236 }, { "epoch": 0.1697194210056939, "grad_norm": 1.2265625, "learning_rate": 1.9865618910877904e-05, "loss": 0.5465, "step": 1237 }, { "epoch": 0.16985662344789737, "grad_norm": 1.1875, "learning_rate": 1.986538296059318e-05, "loss": 0.5488, "step": 1238 }, { "epoch": 0.16999382589010084, "grad_norm": 1.296875, "learning_rate": 1.9865146804749567e-05, "loss": 0.5927, "step": 1239 }, { "epoch": 0.1701310283323043, "grad_norm": 1.2578125, "learning_rate": 1.9864910443351987e-05, "loss": 0.5338, "step": 1240 }, { "epoch": 0.17026823077450778, "grad_norm": 1.203125, "learning_rate": 1.9864673876405366e-05, "loss": 0.5042, "step": 1241 }, { "epoch": 0.17040543321671126, "grad_norm": 1.2109375, "learning_rate": 1.9864437103914632e-05, "loss": 0.5029, "step": 1242 }, { "epoch": 0.17054263565891473, "grad_norm": 1.2578125, "learning_rate": 1.986420012588472e-05, "loss": 0.5775, "step": 1243 }, { "epoch": 0.1706798381011182, "grad_norm": 1.5234375, "learning_rate": 1.9863962942320563e-05, "loss": 0.6027, "step": 1244 }, { "epoch": 0.17081704054332167, "grad_norm": 1.3203125, "learning_rate": 1.986372555322711e-05, "loss": 0.609, "step": 1245 }, { "epoch": 0.17095424298552514, "grad_norm": 1.4609375, "learning_rate": 1.9863487958609302e-05, "loss": 0.5922, "step": 1246 }, { "epoch": 0.1710914454277286, "grad_norm": 1.2265625, "learning_rate": 1.9863250158472095e-05, "loss": 0.5279, "step": 1247 }, { "epoch": 0.17122864786993208, "grad_norm": 1.2109375, "learning_rate": 1.9863012152820436e-05, "loss": 0.548, "step": 1248 }, { "epoch": 0.17136585031213555, "grad_norm": 1.28125, "learning_rate": 1.986277394165929e-05, "loss": 0.6105, "step": 1249 }, { "epoch": 0.17150305275433902, "grad_norm": 1.3515625, "learning_rate": 1.9862535524993617e-05, "loss": 0.5188, "step": 1250 }, { "epoch": 0.1716402551965425, "grad_norm": 1.2421875, "learning_rate": 1.986229690282839e-05, "loss": 0.5308, "step": 1251 }, { "epoch": 0.17177745763874597, "grad_norm": 1.375, "learning_rate": 1.9862058075168573e-05, "loss": 0.5599, "step": 1252 }, { "epoch": 0.17191466008094944, "grad_norm": 1.4140625, "learning_rate": 1.9861819042019152e-05, "loss": 0.5473, "step": 1253 }, { "epoch": 0.1720518625231529, "grad_norm": 1.3046875, "learning_rate": 1.9861579803385098e-05, "loss": 0.5672, "step": 1254 }, { "epoch": 0.17218906496535638, "grad_norm": 1.4296875, "learning_rate": 1.9861340359271405e-05, "loss": 0.6653, "step": 1255 }, { "epoch": 0.17232626740755985, "grad_norm": 1.3671875, "learning_rate": 1.9861100709683054e-05, "loss": 0.5926, "step": 1256 }, { "epoch": 0.17246346984976332, "grad_norm": 1.3125, "learning_rate": 1.986086085462504e-05, "loss": 0.6029, "step": 1257 }, { "epoch": 0.1726006722919668, "grad_norm": 1.25, "learning_rate": 1.986062079410237e-05, "loss": 0.5387, "step": 1258 }, { "epoch": 0.17273787473417027, "grad_norm": 1.3515625, "learning_rate": 1.9860380528120034e-05, "loss": 0.579, "step": 1259 }, { "epoch": 0.17287507717637374, "grad_norm": 1.3828125, "learning_rate": 1.9860140056683044e-05, "loss": 0.4991, "step": 1260 }, { "epoch": 0.1730122796185772, "grad_norm": 1.390625, "learning_rate": 1.9859899379796407e-05, "loss": 0.6251, "step": 1261 }, { "epoch": 0.17314948206078068, "grad_norm": 1.2734375, "learning_rate": 1.9859658497465144e-05, "loss": 0.5457, "step": 1262 }, { "epoch": 0.17328668450298415, "grad_norm": 1.4375, "learning_rate": 1.985941740969427e-05, "loss": 0.5808, "step": 1263 }, { "epoch": 0.17342388694518762, "grad_norm": 1.21875, "learning_rate": 1.9859176116488808e-05, "loss": 0.4712, "step": 1264 }, { "epoch": 0.1735610893873911, "grad_norm": 1.3828125, "learning_rate": 1.9858934617853787e-05, "loss": 0.6258, "step": 1265 }, { "epoch": 0.17369829182959456, "grad_norm": 1.21875, "learning_rate": 1.9858692913794236e-05, "loss": 0.5609, "step": 1266 }, { "epoch": 0.17383549427179804, "grad_norm": 1.375, "learning_rate": 1.9858451004315198e-05, "loss": 0.6124, "step": 1267 }, { "epoch": 0.1739726967140015, "grad_norm": 1.3203125, "learning_rate": 1.9858208889421706e-05, "loss": 0.5739, "step": 1268 }, { "epoch": 0.17410989915620498, "grad_norm": 1.3515625, "learning_rate": 1.985796656911881e-05, "loss": 0.6377, "step": 1269 }, { "epoch": 0.17424710159840845, "grad_norm": 1.375, "learning_rate": 1.9857724043411557e-05, "loss": 0.6072, "step": 1270 }, { "epoch": 0.17438430404061192, "grad_norm": 1.28125, "learning_rate": 1.9857481312305e-05, "loss": 0.5991, "step": 1271 }, { "epoch": 0.1745215064828154, "grad_norm": 1.3359375, "learning_rate": 1.9857238375804196e-05, "loss": 0.6073, "step": 1272 }, { "epoch": 0.17465870892501886, "grad_norm": 1.078125, "learning_rate": 1.985699523391421e-05, "loss": 0.4626, "step": 1273 }, { "epoch": 0.17479591136722233, "grad_norm": 1.234375, "learning_rate": 1.9856751886640105e-05, "loss": 0.5351, "step": 1274 }, { "epoch": 0.1749331138094258, "grad_norm": 1.25, "learning_rate": 1.9856508333986952e-05, "loss": 0.558, "step": 1275 }, { "epoch": 0.17507031625162928, "grad_norm": 1.34375, "learning_rate": 1.985626457595983e-05, "loss": 0.6131, "step": 1276 }, { "epoch": 0.17520751869383275, "grad_norm": 1.2890625, "learning_rate": 1.985602061256381e-05, "loss": 0.5616, "step": 1277 }, { "epoch": 0.17534472113603622, "grad_norm": 1.2890625, "learning_rate": 1.9855776443803985e-05, "loss": 0.5321, "step": 1278 }, { "epoch": 0.1754819235782397, "grad_norm": 1.234375, "learning_rate": 1.9855532069685432e-05, "loss": 0.5229, "step": 1279 }, { "epoch": 0.17561912602044316, "grad_norm": 1.203125, "learning_rate": 1.9855287490213255e-05, "loss": 0.461, "step": 1280 }, { "epoch": 0.17575632846264663, "grad_norm": 1.46875, "learning_rate": 1.985504270539254e-05, "loss": 0.6011, "step": 1281 }, { "epoch": 0.1758935309048501, "grad_norm": 1.3046875, "learning_rate": 1.985479771522839e-05, "loss": 0.5481, "step": 1282 }, { "epoch": 0.17603073334705358, "grad_norm": 1.328125, "learning_rate": 1.9854552519725908e-05, "loss": 0.6486, "step": 1283 }, { "epoch": 0.17616793578925705, "grad_norm": 1.328125, "learning_rate": 1.985430711889021e-05, "loss": 0.6145, "step": 1284 }, { "epoch": 0.17630513823146052, "grad_norm": 1.296875, "learning_rate": 1.9854061512726404e-05, "loss": 0.5878, "step": 1285 }, { "epoch": 0.176442340673664, "grad_norm": 1.359375, "learning_rate": 1.9853815701239607e-05, "loss": 0.6323, "step": 1286 }, { "epoch": 0.17657954311586746, "grad_norm": 1.1796875, "learning_rate": 1.9853569684434945e-05, "loss": 0.4753, "step": 1287 }, { "epoch": 0.17671674555807093, "grad_norm": 1.2890625, "learning_rate": 1.9853323462317536e-05, "loss": 0.612, "step": 1288 }, { "epoch": 0.1768539480002744, "grad_norm": 1.265625, "learning_rate": 1.985307703489252e-05, "loss": 0.6412, "step": 1289 }, { "epoch": 0.17699115044247787, "grad_norm": 1.4921875, "learning_rate": 1.9852830402165026e-05, "loss": 0.5727, "step": 1290 }, { "epoch": 0.17712835288468134, "grad_norm": 1.4140625, "learning_rate": 1.9852583564140196e-05, "loss": 0.6018, "step": 1291 }, { "epoch": 0.17726555532688482, "grad_norm": 1.359375, "learning_rate": 1.985233652082317e-05, "loss": 0.582, "step": 1292 }, { "epoch": 0.1774027577690883, "grad_norm": 1.2578125, "learning_rate": 1.98520892722191e-05, "loss": 0.5847, "step": 1293 }, { "epoch": 0.17753996021129176, "grad_norm": 1.3203125, "learning_rate": 1.9851841818333127e-05, "loss": 0.6316, "step": 1294 }, { "epoch": 0.17767716265349523, "grad_norm": 1.359375, "learning_rate": 1.985159415917042e-05, "loss": 0.632, "step": 1295 }, { "epoch": 0.1778143650956987, "grad_norm": 1.234375, "learning_rate": 1.985134629473614e-05, "loss": 0.563, "step": 1296 }, { "epoch": 0.17795156753790217, "grad_norm": 1.390625, "learning_rate": 1.9851098225035436e-05, "loss": 0.6076, "step": 1297 }, { "epoch": 0.17808876998010564, "grad_norm": 1.3515625, "learning_rate": 1.9850849950073493e-05, "loss": 0.5431, "step": 1298 }, { "epoch": 0.17822597242230911, "grad_norm": 1.359375, "learning_rate": 1.9850601469855473e-05, "loss": 0.5858, "step": 1299 }, { "epoch": 0.17836317486451259, "grad_norm": 1.296875, "learning_rate": 1.985035278438656e-05, "loss": 0.6244, "step": 1300 }, { "epoch": 0.17850037730671606, "grad_norm": 1.296875, "learning_rate": 1.9850103893671936e-05, "loss": 0.5958, "step": 1301 }, { "epoch": 0.17863757974891953, "grad_norm": 1.296875, "learning_rate": 1.984985479771678e-05, "loss": 0.5632, "step": 1302 }, { "epoch": 0.178774782191123, "grad_norm": 1.34375, "learning_rate": 1.9849605496526293e-05, "loss": 0.5877, "step": 1303 }, { "epoch": 0.17891198463332647, "grad_norm": 1.2265625, "learning_rate": 1.984935599010566e-05, "loss": 0.5364, "step": 1304 }, { "epoch": 0.17904918707552994, "grad_norm": 1.2421875, "learning_rate": 1.9849106278460085e-05, "loss": 0.5883, "step": 1305 }, { "epoch": 0.1791863895177334, "grad_norm": 1.2421875, "learning_rate": 1.984885636159477e-05, "loss": 0.5833, "step": 1306 }, { "epoch": 0.17932359195993688, "grad_norm": 1.25, "learning_rate": 1.984860623951492e-05, "loss": 0.5426, "step": 1307 }, { "epoch": 0.17946079440214036, "grad_norm": 1.28125, "learning_rate": 1.984835591222575e-05, "loss": 0.6062, "step": 1308 }, { "epoch": 0.17959799684434383, "grad_norm": 1.28125, "learning_rate": 1.9848105379732474e-05, "loss": 0.5759, "step": 1309 }, { "epoch": 0.1797351992865473, "grad_norm": 1.296875, "learning_rate": 1.9847854642040313e-05, "loss": 0.5714, "step": 1310 }, { "epoch": 0.17987240172875077, "grad_norm": 1.34375, "learning_rate": 1.984760369915449e-05, "loss": 0.4833, "step": 1311 }, { "epoch": 0.18000960417095424, "grad_norm": 1.25, "learning_rate": 1.9847352551080233e-05, "loss": 0.5374, "step": 1312 }, { "epoch": 0.1801468066131577, "grad_norm": 1.265625, "learning_rate": 1.984710119782278e-05, "loss": 0.6159, "step": 1313 }, { "epoch": 0.18028400905536118, "grad_norm": 1.203125, "learning_rate": 1.9846849639387366e-05, "loss": 0.5674, "step": 1314 }, { "epoch": 0.18042121149756465, "grad_norm": 1.21875, "learning_rate": 1.9846597875779232e-05, "loss": 0.4865, "step": 1315 }, { "epoch": 0.18055841393976813, "grad_norm": 1.40625, "learning_rate": 1.9846345907003622e-05, "loss": 0.5938, "step": 1316 }, { "epoch": 0.1806956163819716, "grad_norm": 1.2890625, "learning_rate": 1.9846093733065786e-05, "loss": 0.5194, "step": 1317 }, { "epoch": 0.18083281882417507, "grad_norm": 1.3359375, "learning_rate": 1.9845841353970984e-05, "loss": 0.6033, "step": 1318 }, { "epoch": 0.18097002126637854, "grad_norm": 1.4140625, "learning_rate": 1.9845588769724466e-05, "loss": 0.6616, "step": 1319 }, { "epoch": 0.181107223708582, "grad_norm": 1.3671875, "learning_rate": 1.9845335980331505e-05, "loss": 0.6336, "step": 1320 }, { "epoch": 0.18124442615078548, "grad_norm": 1.2890625, "learning_rate": 1.984508298579736e-05, "loss": 0.5934, "step": 1321 }, { "epoch": 0.18138162859298895, "grad_norm": 1.421875, "learning_rate": 1.9844829786127304e-05, "loss": 0.629, "step": 1322 }, { "epoch": 0.18151883103519242, "grad_norm": 1.3203125, "learning_rate": 1.9844576381326617e-05, "loss": 0.592, "step": 1323 }, { "epoch": 0.1816560334773959, "grad_norm": 1.4140625, "learning_rate": 1.9844322771400575e-05, "loss": 0.6237, "step": 1324 }, { "epoch": 0.18179323591959937, "grad_norm": 1.265625, "learning_rate": 1.9844068956354463e-05, "loss": 0.5427, "step": 1325 }, { "epoch": 0.18193043836180284, "grad_norm": 1.203125, "learning_rate": 1.984381493619357e-05, "loss": 0.5431, "step": 1326 }, { "epoch": 0.1820676408040063, "grad_norm": 1.28125, "learning_rate": 1.9843560710923194e-05, "loss": 0.5885, "step": 1327 }, { "epoch": 0.18220484324620978, "grad_norm": 1.21875, "learning_rate": 1.984330628054862e-05, "loss": 0.534, "step": 1328 }, { "epoch": 0.18234204568841325, "grad_norm": 1.2421875, "learning_rate": 1.9843051645075162e-05, "loss": 0.5098, "step": 1329 }, { "epoch": 0.18247924813061672, "grad_norm": 1.3515625, "learning_rate": 1.9842796804508117e-05, "loss": 0.5864, "step": 1330 }, { "epoch": 0.1826164505728202, "grad_norm": 1.2265625, "learning_rate": 1.9842541758852796e-05, "loss": 0.5109, "step": 1331 }, { "epoch": 0.18275365301502366, "grad_norm": 1.671875, "learning_rate": 1.9842286508114523e-05, "loss": 0.57, "step": 1332 }, { "epoch": 0.18289085545722714, "grad_norm": 1.203125, "learning_rate": 1.98420310522986e-05, "loss": 0.554, "step": 1333 }, { "epoch": 0.1830280578994306, "grad_norm": 1.2578125, "learning_rate": 1.9841775391410366e-05, "loss": 0.5832, "step": 1334 }, { "epoch": 0.18316526034163408, "grad_norm": 1.1953125, "learning_rate": 1.9841519525455135e-05, "loss": 0.496, "step": 1335 }, { "epoch": 0.18330246278383755, "grad_norm": 1.21875, "learning_rate": 1.984126345443825e-05, "loss": 0.5524, "step": 1336 }, { "epoch": 0.18343966522604102, "grad_norm": 1.34375, "learning_rate": 1.9841007178365035e-05, "loss": 0.6118, "step": 1337 }, { "epoch": 0.1835768676682445, "grad_norm": 1.2109375, "learning_rate": 1.9840750697240837e-05, "loss": 0.5426, "step": 1338 }, { "epoch": 0.18371407011044796, "grad_norm": 1.3671875, "learning_rate": 1.9840494011071e-05, "loss": 0.621, "step": 1339 }, { "epoch": 0.18385127255265143, "grad_norm": 1.46875, "learning_rate": 1.9840237119860872e-05, "loss": 0.6322, "step": 1340 }, { "epoch": 0.1839884749948549, "grad_norm": 1.359375, "learning_rate": 1.9839980023615802e-05, "loss": 0.5664, "step": 1341 }, { "epoch": 0.18412567743705838, "grad_norm": 1.203125, "learning_rate": 1.9839722722341152e-05, "loss": 0.4877, "step": 1342 }, { "epoch": 0.18426287987926185, "grad_norm": 1.296875, "learning_rate": 1.983946521604228e-05, "loss": 0.6348, "step": 1343 }, { "epoch": 0.18440008232146532, "grad_norm": 1.3984375, "learning_rate": 1.9839207504724555e-05, "loss": 0.5536, "step": 1344 }, { "epoch": 0.1845372847636688, "grad_norm": 1.234375, "learning_rate": 1.983894958839334e-05, "loss": 0.4965, "step": 1345 }, { "epoch": 0.18467448720587226, "grad_norm": 1.3671875, "learning_rate": 1.9838691467054014e-05, "loss": 0.5568, "step": 1346 }, { "epoch": 0.18481168964807573, "grad_norm": 1.28125, "learning_rate": 1.9838433140711956e-05, "loss": 0.591, "step": 1347 }, { "epoch": 0.1849488920902792, "grad_norm": 1.1796875, "learning_rate": 1.9838174609372546e-05, "loss": 0.4718, "step": 1348 }, { "epoch": 0.18508609453248268, "grad_norm": 1.28125, "learning_rate": 1.983791587304117e-05, "loss": 0.558, "step": 1349 }, { "epoch": 0.18522329697468615, "grad_norm": 1.328125, "learning_rate": 1.9837656931723226e-05, "loss": 0.5354, "step": 1350 }, { "epoch": 0.18536049941688962, "grad_norm": 1.2421875, "learning_rate": 1.9837397785424103e-05, "loss": 0.5311, "step": 1351 }, { "epoch": 0.1854977018590931, "grad_norm": 1.4921875, "learning_rate": 1.98371384341492e-05, "loss": 0.6, "step": 1352 }, { "epoch": 0.18563490430129656, "grad_norm": 1.3828125, "learning_rate": 1.9836878877903927e-05, "loss": 0.606, "step": 1353 }, { "epoch": 0.18577210674350003, "grad_norm": 1.1953125, "learning_rate": 1.9836619116693684e-05, "loss": 0.5109, "step": 1354 }, { "epoch": 0.1859093091857035, "grad_norm": 1.296875, "learning_rate": 1.983635915052389e-05, "loss": 0.6164, "step": 1355 }, { "epoch": 0.18604651162790697, "grad_norm": 1.3359375, "learning_rate": 1.9836098979399957e-05, "loss": 0.5948, "step": 1356 }, { "epoch": 0.18618371407011045, "grad_norm": 1.2890625, "learning_rate": 1.9835838603327312e-05, "loss": 0.5717, "step": 1357 }, { "epoch": 0.18632091651231392, "grad_norm": 1.328125, "learning_rate": 1.9835578022311376e-05, "loss": 0.5946, "step": 1358 }, { "epoch": 0.1864581189545174, "grad_norm": 1.2265625, "learning_rate": 1.9835317236357577e-05, "loss": 0.4995, "step": 1359 }, { "epoch": 0.18659532139672086, "grad_norm": 1.3671875, "learning_rate": 1.9835056245471357e-05, "loss": 0.5642, "step": 1360 }, { "epoch": 0.18673252383892433, "grad_norm": 1.2734375, "learning_rate": 1.9834795049658145e-05, "loss": 0.5415, "step": 1361 }, { "epoch": 0.1868697262811278, "grad_norm": 1.28125, "learning_rate": 1.9834533648923388e-05, "loss": 0.541, "step": 1362 }, { "epoch": 0.18700692872333127, "grad_norm": 1.390625, "learning_rate": 1.983427204327253e-05, "loss": 0.5135, "step": 1363 }, { "epoch": 0.18714413116553474, "grad_norm": 1.3828125, "learning_rate": 1.9834010232711024e-05, "loss": 0.6213, "step": 1364 }, { "epoch": 0.18728133360773822, "grad_norm": 1.3515625, "learning_rate": 1.983374821724432e-05, "loss": 0.6629, "step": 1365 }, { "epoch": 0.1874185360499417, "grad_norm": 1.1875, "learning_rate": 1.9833485996877888e-05, "loss": 0.4736, "step": 1366 }, { "epoch": 0.18755573849214516, "grad_norm": 1.1953125, "learning_rate": 1.9833223571617184e-05, "loss": 0.5401, "step": 1367 }, { "epoch": 0.18769294093434863, "grad_norm": 1.3203125, "learning_rate": 1.9832960941467677e-05, "loss": 0.6259, "step": 1368 }, { "epoch": 0.1878301433765521, "grad_norm": 1.2421875, "learning_rate": 1.9832698106434842e-05, "loss": 0.5048, "step": 1369 }, { "epoch": 0.18796734581875557, "grad_norm": 1.2578125, "learning_rate": 1.983243506652415e-05, "loss": 0.5312, "step": 1370 }, { "epoch": 0.18810454826095904, "grad_norm": 1.3359375, "learning_rate": 1.983217182174109e-05, "loss": 0.6163, "step": 1371 }, { "epoch": 0.1882417507031625, "grad_norm": 1.4921875, "learning_rate": 1.983190837209114e-05, "loss": 0.4988, "step": 1372 }, { "epoch": 0.18837895314536598, "grad_norm": 1.3125, "learning_rate": 1.983164471757979e-05, "loss": 0.532, "step": 1373 }, { "epoch": 0.18851615558756946, "grad_norm": 1.296875, "learning_rate": 1.9831380858212535e-05, "loss": 0.5868, "step": 1374 }, { "epoch": 0.18865335802977293, "grad_norm": 1.234375, "learning_rate": 1.983111679399488e-05, "loss": 0.5483, "step": 1375 }, { "epoch": 0.1887905604719764, "grad_norm": 1.2578125, "learning_rate": 1.9830852524932313e-05, "loss": 0.5991, "step": 1376 }, { "epoch": 0.18892776291417987, "grad_norm": 1.3359375, "learning_rate": 1.983058805103035e-05, "loss": 0.5967, "step": 1377 }, { "epoch": 0.18906496535638334, "grad_norm": 1.34375, "learning_rate": 1.98303233722945e-05, "loss": 0.619, "step": 1378 }, { "epoch": 0.1892021677985868, "grad_norm": 1.3125, "learning_rate": 1.9830058488730276e-05, "loss": 0.6151, "step": 1379 }, { "epoch": 0.18933937024079028, "grad_norm": 1.1875, "learning_rate": 1.98297934003432e-05, "loss": 0.4886, "step": 1380 }, { "epoch": 0.18947657268299375, "grad_norm": 1.09375, "learning_rate": 1.9829528107138792e-05, "loss": 0.4658, "step": 1381 }, { "epoch": 0.18961377512519723, "grad_norm": 1.25, "learning_rate": 1.9829262609122585e-05, "loss": 0.5269, "step": 1382 }, { "epoch": 0.1897509775674007, "grad_norm": 1.359375, "learning_rate": 1.9828996906300105e-05, "loss": 0.5825, "step": 1383 }, { "epoch": 0.18988818000960417, "grad_norm": 1.296875, "learning_rate": 1.9828730998676894e-05, "loss": 0.6055, "step": 1384 }, { "epoch": 0.19002538245180764, "grad_norm": 1.3359375, "learning_rate": 1.9828464886258487e-05, "loss": 0.59, "step": 1385 }, { "epoch": 0.1901625848940111, "grad_norm": 1.203125, "learning_rate": 1.982819856905043e-05, "loss": 0.501, "step": 1386 }, { "epoch": 0.19029978733621458, "grad_norm": 1.3828125, "learning_rate": 1.982793204705828e-05, "loss": 0.7112, "step": 1387 }, { "epoch": 0.19043698977841805, "grad_norm": 1.34375, "learning_rate": 1.982766532028758e-05, "loss": 0.5328, "step": 1388 }, { "epoch": 0.19057419222062152, "grad_norm": 1.359375, "learning_rate": 1.982739838874389e-05, "loss": 0.6475, "step": 1389 }, { "epoch": 0.190711394662825, "grad_norm": 1.3046875, "learning_rate": 1.9827131252432776e-05, "loss": 0.5979, "step": 1390 }, { "epoch": 0.19084859710502847, "grad_norm": 1.25, "learning_rate": 1.98268639113598e-05, "loss": 0.605, "step": 1391 }, { "epoch": 0.19098579954723194, "grad_norm": 1.28125, "learning_rate": 1.9826596365530536e-05, "loss": 0.6398, "step": 1392 }, { "epoch": 0.1911230019894354, "grad_norm": 1.3359375, "learning_rate": 1.9826328614950555e-05, "loss": 0.5446, "step": 1393 }, { "epoch": 0.19126020443163888, "grad_norm": 1.0859375, "learning_rate": 1.9826060659625438e-05, "loss": 0.45, "step": 1394 }, { "epoch": 0.19139740687384235, "grad_norm": 1.421875, "learning_rate": 1.9825792499560768e-05, "loss": 0.5924, "step": 1395 }, { "epoch": 0.19153460931604582, "grad_norm": 1.2890625, "learning_rate": 1.982552413476213e-05, "loss": 0.524, "step": 1396 }, { "epoch": 0.1916718117582493, "grad_norm": 1.3125, "learning_rate": 1.9825255565235123e-05, "loss": 0.5491, "step": 1397 }, { "epoch": 0.19180901420045277, "grad_norm": 1.2578125, "learning_rate": 1.9824986790985335e-05, "loss": 0.5784, "step": 1398 }, { "epoch": 0.19194621664265624, "grad_norm": 1.453125, "learning_rate": 1.982471781201837e-05, "loss": 0.5994, "step": 1399 }, { "epoch": 0.1920834190848597, "grad_norm": 1.21875, "learning_rate": 1.9824448628339832e-05, "loss": 0.4862, "step": 1400 }, { "epoch": 0.19222062152706318, "grad_norm": 1.2734375, "learning_rate": 1.982417923995533e-05, "loss": 0.5573, "step": 1401 }, { "epoch": 0.19235782396926665, "grad_norm": 1.3984375, "learning_rate": 1.9823909646870476e-05, "loss": 0.6254, "step": 1402 }, { "epoch": 0.19249502641147012, "grad_norm": 1.25, "learning_rate": 1.982363984909089e-05, "loss": 0.5046, "step": 1403 }, { "epoch": 0.1926322288536736, "grad_norm": 1.40625, "learning_rate": 1.982336984662219e-05, "loss": 0.6088, "step": 1404 }, { "epoch": 0.19276943129587706, "grad_norm": 1.453125, "learning_rate": 1.9823099639470004e-05, "loss": 0.6174, "step": 1405 }, { "epoch": 0.19290663373808054, "grad_norm": 1.3125, "learning_rate": 1.982282922763996e-05, "loss": 0.5952, "step": 1406 }, { "epoch": 0.193043836180284, "grad_norm": 1.3671875, "learning_rate": 1.9822558611137698e-05, "loss": 0.6308, "step": 1407 }, { "epoch": 0.19318103862248748, "grad_norm": 1.3046875, "learning_rate": 1.9822287789968852e-05, "loss": 0.5578, "step": 1408 }, { "epoch": 0.19331824106469095, "grad_norm": 1.3046875, "learning_rate": 1.9822016764139065e-05, "loss": 0.5399, "step": 1409 }, { "epoch": 0.19345544350689442, "grad_norm": 1.265625, "learning_rate": 1.9821745533653986e-05, "loss": 0.5578, "step": 1410 }, { "epoch": 0.1935926459490979, "grad_norm": 1.296875, "learning_rate": 1.982147409851926e-05, "loss": 0.6227, "step": 1411 }, { "epoch": 0.19372984839130136, "grad_norm": 1.2578125, "learning_rate": 1.9821202458740557e-05, "loss": 0.5699, "step": 1412 }, { "epoch": 0.19386705083350483, "grad_norm": 1.234375, "learning_rate": 1.9820930614323524e-05, "loss": 0.5756, "step": 1413 }, { "epoch": 0.1940042532757083, "grad_norm": 1.34375, "learning_rate": 1.982065856527383e-05, "loss": 0.5543, "step": 1414 }, { "epoch": 0.19414145571791178, "grad_norm": 1.484375, "learning_rate": 1.9820386311597142e-05, "loss": 0.66, "step": 1415 }, { "epoch": 0.19427865816011525, "grad_norm": 1.2578125, "learning_rate": 1.9820113853299133e-05, "loss": 0.5506, "step": 1416 }, { "epoch": 0.19441586060231872, "grad_norm": 1.2421875, "learning_rate": 1.9819841190385483e-05, "loss": 0.5522, "step": 1417 }, { "epoch": 0.1945530630445222, "grad_norm": 1.203125, "learning_rate": 1.981956832286187e-05, "loss": 0.4236, "step": 1418 }, { "epoch": 0.19469026548672566, "grad_norm": 1.3671875, "learning_rate": 1.981929525073398e-05, "loss": 0.618, "step": 1419 }, { "epoch": 0.19482746792892913, "grad_norm": 1.296875, "learning_rate": 1.9819021974007504e-05, "loss": 0.6218, "step": 1420 }, { "epoch": 0.1949646703711326, "grad_norm": 1.2109375, "learning_rate": 1.9818748492688137e-05, "loss": 0.5256, "step": 1421 }, { "epoch": 0.19510187281333607, "grad_norm": 1.25, "learning_rate": 1.9818474806781575e-05, "loss": 0.5936, "step": 1422 }, { "epoch": 0.19523907525553955, "grad_norm": 1.2265625, "learning_rate": 1.9818200916293523e-05, "loss": 0.5729, "step": 1423 }, { "epoch": 0.19537627769774302, "grad_norm": 1.2421875, "learning_rate": 1.9817926821229684e-05, "loss": 0.5775, "step": 1424 }, { "epoch": 0.1955134801399465, "grad_norm": 1.296875, "learning_rate": 1.981765252159577e-05, "loss": 0.5863, "step": 1425 }, { "epoch": 0.19565068258214996, "grad_norm": 1.203125, "learning_rate": 1.9817378017397505e-05, "loss": 0.4828, "step": 1426 }, { "epoch": 0.19578788502435343, "grad_norm": 1.2734375, "learning_rate": 1.9817103308640597e-05, "loss": 0.5497, "step": 1427 }, { "epoch": 0.1959250874665569, "grad_norm": 1.375, "learning_rate": 1.9816828395330778e-05, "loss": 0.6035, "step": 1428 }, { "epoch": 0.19606228990876037, "grad_norm": 1.2734375, "learning_rate": 1.9816553277473768e-05, "loss": 0.5075, "step": 1429 }, { "epoch": 0.19619949235096384, "grad_norm": 1.359375, "learning_rate": 1.9816277955075306e-05, "loss": 0.5437, "step": 1430 }, { "epoch": 0.19633669479316732, "grad_norm": 1.34375, "learning_rate": 1.9816002428141128e-05, "loss": 0.5927, "step": 1431 }, { "epoch": 0.1964738972353708, "grad_norm": 1.390625, "learning_rate": 1.9815726696676974e-05, "loss": 0.5597, "step": 1432 }, { "epoch": 0.19661109967757426, "grad_norm": 1.25, "learning_rate": 1.9815450760688587e-05, "loss": 0.549, "step": 1433 }, { "epoch": 0.19674830211977773, "grad_norm": 1.28125, "learning_rate": 1.981517462018172e-05, "loss": 0.6047, "step": 1434 }, { "epoch": 0.1968855045619812, "grad_norm": 1.390625, "learning_rate": 1.981489827516213e-05, "loss": 0.5783, "step": 1435 }, { "epoch": 0.19702270700418467, "grad_norm": 1.375, "learning_rate": 1.9814621725635564e-05, "loss": 0.6199, "step": 1436 }, { "epoch": 0.19715990944638814, "grad_norm": 1.203125, "learning_rate": 1.9814344971607795e-05, "loss": 0.5547, "step": 1437 }, { "epoch": 0.19729711188859161, "grad_norm": 1.2734375, "learning_rate": 1.9814068013084582e-05, "loss": 0.5942, "step": 1438 }, { "epoch": 0.19743431433079509, "grad_norm": 1.2109375, "learning_rate": 1.9813790850071704e-05, "loss": 0.5696, "step": 1439 }, { "epoch": 0.19757151677299856, "grad_norm": 1.28125, "learning_rate": 1.9813513482574926e-05, "loss": 0.6033, "step": 1440 }, { "epoch": 0.19770871921520203, "grad_norm": 1.203125, "learning_rate": 1.9813235910600033e-05, "loss": 0.5771, "step": 1441 }, { "epoch": 0.1978459216574055, "grad_norm": 1.1953125, "learning_rate": 1.9812958134152814e-05, "loss": 0.4786, "step": 1442 }, { "epoch": 0.19798312409960897, "grad_norm": 1.21875, "learning_rate": 1.9812680153239044e-05, "loss": 0.5057, "step": 1443 }, { "epoch": 0.19812032654181244, "grad_norm": 1.3046875, "learning_rate": 1.981240196786453e-05, "loss": 0.628, "step": 1444 }, { "epoch": 0.1982575289840159, "grad_norm": 1.3359375, "learning_rate": 1.9812123578035056e-05, "loss": 0.5914, "step": 1445 }, { "epoch": 0.19839473142621938, "grad_norm": 1.125, "learning_rate": 1.9811844983756428e-05, "loss": 0.497, "step": 1446 }, { "epoch": 0.19853193386842286, "grad_norm": 1.2109375, "learning_rate": 1.981156618503445e-05, "loss": 0.5157, "step": 1447 }, { "epoch": 0.19866913631062633, "grad_norm": 1.171875, "learning_rate": 1.981128718187493e-05, "loss": 0.5125, "step": 1448 }, { "epoch": 0.1988063387528298, "grad_norm": 1.2890625, "learning_rate": 1.9811007974283686e-05, "loss": 0.5166, "step": 1449 }, { "epoch": 0.19894354119503327, "grad_norm": 1.171875, "learning_rate": 1.981072856226653e-05, "loss": 0.5558, "step": 1450 }, { "epoch": 0.19908074363723674, "grad_norm": 1.203125, "learning_rate": 1.9810448945829285e-05, "loss": 0.5066, "step": 1451 }, { "epoch": 0.1992179460794402, "grad_norm": 1.2578125, "learning_rate": 1.981016912497778e-05, "loss": 0.5562, "step": 1452 }, { "epoch": 0.19935514852164368, "grad_norm": 1.3203125, "learning_rate": 1.9809889099717843e-05, "loss": 0.5954, "step": 1453 }, { "epoch": 0.19949235096384715, "grad_norm": 1.2734375, "learning_rate": 1.9809608870055312e-05, "loss": 0.5361, "step": 1454 }, { "epoch": 0.19962955340605062, "grad_norm": 1.40625, "learning_rate": 1.9809328435996024e-05, "loss": 0.6548, "step": 1455 }, { "epoch": 0.1997667558482541, "grad_norm": 1.296875, "learning_rate": 1.9809047797545818e-05, "loss": 0.6245, "step": 1456 }, { "epoch": 0.19990395829045757, "grad_norm": 1.28125, "learning_rate": 1.980876695471055e-05, "loss": 0.5589, "step": 1457 }, { "epoch": 0.20004116073266104, "grad_norm": 1.25, "learning_rate": 1.980848590749606e-05, "loss": 0.5604, "step": 1458 }, { "epoch": 0.2001783631748645, "grad_norm": 1.359375, "learning_rate": 1.980820465590822e-05, "loss": 0.5701, "step": 1459 }, { "epoch": 0.20031556561706798, "grad_norm": 1.125, "learning_rate": 1.9807923199952878e-05, "loss": 0.5108, "step": 1460 }, { "epoch": 0.20045276805927145, "grad_norm": 1.328125, "learning_rate": 1.98076415396359e-05, "loss": 0.6269, "step": 1461 }, { "epoch": 0.20058997050147492, "grad_norm": 1.2109375, "learning_rate": 1.9807359674963158e-05, "loss": 0.4909, "step": 1462 }, { "epoch": 0.2007271729436784, "grad_norm": 1.140625, "learning_rate": 1.9807077605940522e-05, "loss": 0.4503, "step": 1463 }, { "epoch": 0.20086437538588187, "grad_norm": 1.25, "learning_rate": 1.9806795332573874e-05, "loss": 0.5483, "step": 1464 }, { "epoch": 0.20100157782808534, "grad_norm": 1.453125, "learning_rate": 1.980651285486909e-05, "loss": 0.6187, "step": 1465 }, { "epoch": 0.2011387802702888, "grad_norm": 1.1953125, "learning_rate": 1.980623017283206e-05, "loss": 0.5484, "step": 1466 }, { "epoch": 0.20127598271249228, "grad_norm": 1.2578125, "learning_rate": 1.9805947286468675e-05, "loss": 0.6168, "step": 1467 }, { "epoch": 0.20141318515469575, "grad_norm": 1.203125, "learning_rate": 1.9805664195784822e-05, "loss": 0.5529, "step": 1468 }, { "epoch": 0.20155038759689922, "grad_norm": 1.3203125, "learning_rate": 1.9805380900786407e-05, "loss": 0.5769, "step": 1469 }, { "epoch": 0.2016875900391027, "grad_norm": 1.25, "learning_rate": 1.980509740147933e-05, "loss": 0.4699, "step": 1470 }, { "epoch": 0.20182479248130616, "grad_norm": 1.3203125, "learning_rate": 1.98048136978695e-05, "loss": 0.6369, "step": 1471 }, { "epoch": 0.20196199492350964, "grad_norm": 1.203125, "learning_rate": 1.980452978996282e-05, "loss": 0.5196, "step": 1472 }, { "epoch": 0.2020991973657131, "grad_norm": 1.40625, "learning_rate": 1.980424567776522e-05, "loss": 0.6453, "step": 1473 }, { "epoch": 0.20223639980791658, "grad_norm": 1.1328125, "learning_rate": 1.9803961361282607e-05, "loss": 0.4736, "step": 1474 }, { "epoch": 0.20237360225012005, "grad_norm": 1.1953125, "learning_rate": 1.9803676840520913e-05, "loss": 0.5222, "step": 1475 }, { "epoch": 0.20251080469232352, "grad_norm": 1.375, "learning_rate": 1.9803392115486063e-05, "loss": 0.5955, "step": 1476 }, { "epoch": 0.202648007134527, "grad_norm": 1.1953125, "learning_rate": 1.980310718618399e-05, "loss": 0.5165, "step": 1477 }, { "epoch": 0.20278520957673046, "grad_norm": 1.46875, "learning_rate": 1.9802822052620634e-05, "loss": 0.6765, "step": 1478 }, { "epoch": 0.20292241201893393, "grad_norm": 1.390625, "learning_rate": 1.980253671480193e-05, "loss": 0.5049, "step": 1479 }, { "epoch": 0.2030596144611374, "grad_norm": 1.34375, "learning_rate": 1.9802251172733827e-05, "loss": 0.5063, "step": 1480 }, { "epoch": 0.20319681690334088, "grad_norm": 1.3203125, "learning_rate": 1.9801965426422276e-05, "loss": 0.5968, "step": 1481 }, { "epoch": 0.20333401934554435, "grad_norm": 1.3125, "learning_rate": 1.9801679475873226e-05, "loss": 0.5167, "step": 1482 }, { "epoch": 0.20347122178774782, "grad_norm": 1.3203125, "learning_rate": 1.9801393321092644e-05, "loss": 0.5587, "step": 1483 }, { "epoch": 0.2036084242299513, "grad_norm": 1.1796875, "learning_rate": 1.9801106962086485e-05, "loss": 0.4282, "step": 1484 }, { "epoch": 0.20374562667215476, "grad_norm": 1.28125, "learning_rate": 1.9800820398860717e-05, "loss": 0.5557, "step": 1485 }, { "epoch": 0.20388282911435823, "grad_norm": 1.2890625, "learning_rate": 1.9800533631421312e-05, "loss": 0.5552, "step": 1486 }, { "epoch": 0.2040200315565617, "grad_norm": 1.2734375, "learning_rate": 1.9800246659774244e-05, "loss": 0.5636, "step": 1487 }, { "epoch": 0.20415723399876518, "grad_norm": 1.3828125, "learning_rate": 1.9799959483925493e-05, "loss": 0.6266, "step": 1488 }, { "epoch": 0.20429443644096865, "grad_norm": 1.328125, "learning_rate": 1.9799672103881042e-05, "loss": 0.5395, "step": 1489 }, { "epoch": 0.20443163888317212, "grad_norm": 1.2890625, "learning_rate": 1.979938451964688e-05, "loss": 0.5503, "step": 1490 }, { "epoch": 0.2045688413253756, "grad_norm": 1.3203125, "learning_rate": 1.9799096731229002e-05, "loss": 0.619, "step": 1491 }, { "epoch": 0.20470604376757906, "grad_norm": 1.390625, "learning_rate": 1.97988087386334e-05, "loss": 0.5948, "step": 1492 }, { "epoch": 0.20484324620978253, "grad_norm": 1.375, "learning_rate": 1.9798520541866077e-05, "loss": 0.6711, "step": 1493 }, { "epoch": 0.204980448651986, "grad_norm": 1.296875, "learning_rate": 1.9798232140933035e-05, "loss": 0.5962, "step": 1494 }, { "epoch": 0.20511765109418947, "grad_norm": 1.2734375, "learning_rate": 1.979794353584029e-05, "loss": 0.5429, "step": 1495 }, { "epoch": 0.20525485353639294, "grad_norm": 1.328125, "learning_rate": 1.9797654726593847e-05, "loss": 0.5978, "step": 1496 }, { "epoch": 0.20539205597859642, "grad_norm": 1.4140625, "learning_rate": 1.9797365713199728e-05, "loss": 0.6816, "step": 1497 }, { "epoch": 0.2055292584207999, "grad_norm": 1.28125, "learning_rate": 1.9797076495663957e-05, "loss": 0.5807, "step": 1498 }, { "epoch": 0.20566646086300336, "grad_norm": 1.25, "learning_rate": 1.9796787073992557e-05, "loss": 0.6013, "step": 1499 }, { "epoch": 0.20580366330520683, "grad_norm": 1.3828125, "learning_rate": 1.9796497448191558e-05, "loss": 0.5968, "step": 1500 }, { "epoch": 0.2059408657474103, "grad_norm": 1.265625, "learning_rate": 1.9796207618266998e-05, "loss": 0.5462, "step": 1501 }, { "epoch": 0.20607806818961377, "grad_norm": 1.3515625, "learning_rate": 1.9795917584224913e-05, "loss": 0.5773, "step": 1502 }, { "epoch": 0.20621527063181724, "grad_norm": 1.2734375, "learning_rate": 1.9795627346071348e-05, "loss": 0.5489, "step": 1503 }, { "epoch": 0.20635247307402071, "grad_norm": 1.375, "learning_rate": 1.979533690381235e-05, "loss": 0.5944, "step": 1504 }, { "epoch": 0.20648967551622419, "grad_norm": 1.328125, "learning_rate": 1.9795046257453972e-05, "loss": 0.6184, "step": 1505 }, { "epoch": 0.20662687795842766, "grad_norm": 1.3515625, "learning_rate": 1.9794755407002268e-05, "loss": 0.5708, "step": 1506 }, { "epoch": 0.20676408040063113, "grad_norm": 1.2578125, "learning_rate": 1.9794464352463298e-05, "loss": 0.5803, "step": 1507 }, { "epoch": 0.2069012828428346, "grad_norm": 1.2578125, "learning_rate": 1.9794173093843126e-05, "loss": 0.575, "step": 1508 }, { "epoch": 0.20703848528503807, "grad_norm": 1.1953125, "learning_rate": 1.979388163114783e-05, "loss": 0.526, "step": 1509 }, { "epoch": 0.20717568772724154, "grad_norm": 1.265625, "learning_rate": 1.979358996438347e-05, "loss": 0.5715, "step": 1510 }, { "epoch": 0.207312890169445, "grad_norm": 1.2734375, "learning_rate": 1.9793298093556128e-05, "loss": 0.5776, "step": 1511 }, { "epoch": 0.20745009261164848, "grad_norm": 1.390625, "learning_rate": 1.9793006018671884e-05, "loss": 0.5843, "step": 1512 }, { "epoch": 0.20758729505385196, "grad_norm": 1.2578125, "learning_rate": 1.979271373973683e-05, "loss": 0.5092, "step": 1513 }, { "epoch": 0.20772449749605543, "grad_norm": 1.328125, "learning_rate": 1.9792421256757053e-05, "loss": 0.5621, "step": 1514 }, { "epoch": 0.2078616999382589, "grad_norm": 1.2421875, "learning_rate": 1.9792128569738646e-05, "loss": 0.5381, "step": 1515 }, { "epoch": 0.20799890238046237, "grad_norm": 1.1953125, "learning_rate": 1.9791835678687705e-05, "loss": 0.4927, "step": 1516 }, { "epoch": 0.20813610482266584, "grad_norm": 1.1953125, "learning_rate": 1.979154258361034e-05, "loss": 0.5145, "step": 1517 }, { "epoch": 0.2082733072648693, "grad_norm": 1.2890625, "learning_rate": 1.979124928451265e-05, "loss": 0.5329, "step": 1518 }, { "epoch": 0.20841050970707278, "grad_norm": 1.3828125, "learning_rate": 1.979095578140075e-05, "loss": 0.6585, "step": 1519 }, { "epoch": 0.20854771214927625, "grad_norm": 1.4765625, "learning_rate": 1.9790662074280754e-05, "loss": 0.5936, "step": 1520 }, { "epoch": 0.20868491459147973, "grad_norm": 1.234375, "learning_rate": 1.9790368163158788e-05, "loss": 0.5617, "step": 1521 }, { "epoch": 0.2088221170336832, "grad_norm": 1.25, "learning_rate": 1.979007404804097e-05, "loss": 0.5596, "step": 1522 }, { "epoch": 0.20895931947588667, "grad_norm": 1.3671875, "learning_rate": 1.9789779728933428e-05, "loss": 0.5789, "step": 1523 }, { "epoch": 0.20909652191809014, "grad_norm": 1.21875, "learning_rate": 1.9789485205842294e-05, "loss": 0.5201, "step": 1524 }, { "epoch": 0.2092337243602936, "grad_norm": 1.6875, "learning_rate": 1.978919047877371e-05, "loss": 0.4834, "step": 1525 }, { "epoch": 0.20937092680249708, "grad_norm": 1.2265625, "learning_rate": 1.9788895547733817e-05, "loss": 0.5077, "step": 1526 }, { "epoch": 0.20950812924470055, "grad_norm": 1.34375, "learning_rate": 1.9788600412728752e-05, "loss": 0.5767, "step": 1527 }, { "epoch": 0.20964533168690402, "grad_norm": 1.28125, "learning_rate": 1.9788305073764676e-05, "loss": 0.5639, "step": 1528 }, { "epoch": 0.2097825341291075, "grad_norm": 1.4453125, "learning_rate": 1.978800953084773e-05, "loss": 0.5608, "step": 1529 }, { "epoch": 0.20991973657131097, "grad_norm": 1.4296875, "learning_rate": 1.9787713783984083e-05, "loss": 0.5425, "step": 1530 }, { "epoch": 0.21005693901351444, "grad_norm": 1.3359375, "learning_rate": 1.9787417833179892e-05, "loss": 0.6189, "step": 1531 }, { "epoch": 0.2101941414557179, "grad_norm": 1.25, "learning_rate": 1.9787121678441322e-05, "loss": 0.5294, "step": 1532 }, { "epoch": 0.21033134389792138, "grad_norm": 1.328125, "learning_rate": 1.9786825319774548e-05, "loss": 0.5619, "step": 1533 }, { "epoch": 0.21046854634012485, "grad_norm": 1.2109375, "learning_rate": 1.9786528757185744e-05, "loss": 0.5158, "step": 1534 }, { "epoch": 0.21060574878232832, "grad_norm": 1.2734375, "learning_rate": 1.978623199068109e-05, "loss": 0.5077, "step": 1535 }, { "epoch": 0.2107429512245318, "grad_norm": 1.2265625, "learning_rate": 1.978593502026677e-05, "loss": 0.508, "step": 1536 }, { "epoch": 0.21088015366673526, "grad_norm": 1.296875, "learning_rate": 1.9785637845948966e-05, "loss": 0.6063, "step": 1537 }, { "epoch": 0.21101735610893874, "grad_norm": 1.2109375, "learning_rate": 1.9785340467733875e-05, "loss": 0.5167, "step": 1538 }, { "epoch": 0.2111545585511422, "grad_norm": 1.2265625, "learning_rate": 1.978504288562769e-05, "loss": 0.5332, "step": 1539 }, { "epoch": 0.21129176099334568, "grad_norm": 1.3046875, "learning_rate": 1.978474509963662e-05, "loss": 0.62, "step": 1540 }, { "epoch": 0.21142896343554915, "grad_norm": 1.2890625, "learning_rate": 1.978444710976686e-05, "loss": 0.6307, "step": 1541 }, { "epoch": 0.21156616587775262, "grad_norm": 1.1953125, "learning_rate": 1.978414891602462e-05, "loss": 0.5017, "step": 1542 }, { "epoch": 0.2117033683199561, "grad_norm": 1.25, "learning_rate": 1.978385051841612e-05, "loss": 0.5356, "step": 1543 }, { "epoch": 0.21184057076215956, "grad_norm": 1.28125, "learning_rate": 1.9783551916947573e-05, "loss": 0.5289, "step": 1544 }, { "epoch": 0.21197777320436303, "grad_norm": 1.421875, "learning_rate": 1.97832531116252e-05, "loss": 0.6884, "step": 1545 }, { "epoch": 0.2121149756465665, "grad_norm": 1.09375, "learning_rate": 1.9782954102455226e-05, "loss": 0.4806, "step": 1546 }, { "epoch": 0.21225217808876998, "grad_norm": 1.3671875, "learning_rate": 1.9782654889443887e-05, "loss": 0.5773, "step": 1547 }, { "epoch": 0.21238938053097345, "grad_norm": 1.28125, "learning_rate": 1.9782355472597414e-05, "loss": 0.5532, "step": 1548 }, { "epoch": 0.21252658297317692, "grad_norm": 1.1796875, "learning_rate": 1.9782055851922047e-05, "loss": 0.512, "step": 1549 }, { "epoch": 0.2126637854153804, "grad_norm": 1.28125, "learning_rate": 1.9781756027424027e-05, "loss": 0.5932, "step": 1550 }, { "epoch": 0.21280098785758386, "grad_norm": 1.3671875, "learning_rate": 1.97814559991096e-05, "loss": 0.6314, "step": 1551 }, { "epoch": 0.21293819029978733, "grad_norm": 1.3046875, "learning_rate": 1.978115576698502e-05, "loss": 0.5661, "step": 1552 }, { "epoch": 0.2130753927419908, "grad_norm": 1.3046875, "learning_rate": 1.9780855331056545e-05, "loss": 0.5651, "step": 1553 }, { "epoch": 0.21321259518419428, "grad_norm": 1.4140625, "learning_rate": 1.9780554691330432e-05, "loss": 0.6409, "step": 1554 }, { "epoch": 0.21334979762639775, "grad_norm": 1.1484375, "learning_rate": 1.9780253847812942e-05, "loss": 0.5147, "step": 1555 }, { "epoch": 0.21348700006860122, "grad_norm": 1.1484375, "learning_rate": 1.977995280051035e-05, "loss": 0.4666, "step": 1556 }, { "epoch": 0.2136242025108047, "grad_norm": 1.28125, "learning_rate": 1.9779651549428926e-05, "loss": 0.5726, "step": 1557 }, { "epoch": 0.21376140495300816, "grad_norm": 1.203125, "learning_rate": 1.9779350094574944e-05, "loss": 0.5295, "step": 1558 }, { "epoch": 0.21389860739521163, "grad_norm": 1.1953125, "learning_rate": 1.9779048435954695e-05, "loss": 0.5387, "step": 1559 }, { "epoch": 0.2140358098374151, "grad_norm": 1.34375, "learning_rate": 1.977874657357445e-05, "loss": 0.6111, "step": 1560 }, { "epoch": 0.21417301227961857, "grad_norm": 1.328125, "learning_rate": 1.977844450744051e-05, "loss": 0.6218, "step": 1561 }, { "epoch": 0.21431021472182205, "grad_norm": 1.3125, "learning_rate": 1.9778142237559163e-05, "loss": 0.5918, "step": 1562 }, { "epoch": 0.21444741716402552, "grad_norm": 1.421875, "learning_rate": 1.977783976393671e-05, "loss": 0.6206, "step": 1563 }, { "epoch": 0.214584619606229, "grad_norm": 1.2578125, "learning_rate": 1.9777537086579452e-05, "loss": 0.6101, "step": 1564 }, { "epoch": 0.21472182204843246, "grad_norm": 1.25, "learning_rate": 1.9777234205493697e-05, "loss": 0.5002, "step": 1565 }, { "epoch": 0.21485902449063593, "grad_norm": 1.2421875, "learning_rate": 1.9776931120685758e-05, "loss": 0.6055, "step": 1566 }, { "epoch": 0.2149962269328394, "grad_norm": 1.296875, "learning_rate": 1.977662783216195e-05, "loss": 0.5893, "step": 1567 }, { "epoch": 0.21513342937504287, "grad_norm": 1.25, "learning_rate": 1.9776324339928584e-05, "loss": 0.5822, "step": 1568 }, { "epoch": 0.21527063181724634, "grad_norm": 1.1953125, "learning_rate": 1.977602064399199e-05, "loss": 0.5376, "step": 1569 }, { "epoch": 0.21540783425944982, "grad_norm": 1.328125, "learning_rate": 1.97757167443585e-05, "loss": 0.604, "step": 1570 }, { "epoch": 0.2155450367016533, "grad_norm": 1.2421875, "learning_rate": 1.9775412641034443e-05, "loss": 0.5529, "step": 1571 }, { "epoch": 0.21568223914385676, "grad_norm": 1.2578125, "learning_rate": 1.9775108334026147e-05, "loss": 0.589, "step": 1572 }, { "epoch": 0.21581944158606023, "grad_norm": 1.3125, "learning_rate": 1.9774803823339966e-05, "loss": 0.6141, "step": 1573 }, { "epoch": 0.2159566440282637, "grad_norm": 1.2734375, "learning_rate": 1.9774499108982236e-05, "loss": 0.539, "step": 1574 }, { "epoch": 0.21609384647046717, "grad_norm": 1.421875, "learning_rate": 1.9774194190959314e-05, "loss": 0.6172, "step": 1575 }, { "epoch": 0.21623104891267064, "grad_norm": 1.2890625, "learning_rate": 1.9773889069277544e-05, "loss": 0.5673, "step": 1576 }, { "epoch": 0.2163682513548741, "grad_norm": 1.203125, "learning_rate": 1.9773583743943288e-05, "loss": 0.5634, "step": 1577 }, { "epoch": 0.21650545379707758, "grad_norm": 1.1796875, "learning_rate": 1.9773278214962907e-05, "loss": 0.5564, "step": 1578 }, { "epoch": 0.21664265623928106, "grad_norm": 1.2890625, "learning_rate": 1.9772972482342768e-05, "loss": 0.5532, "step": 1579 }, { "epoch": 0.21677985868148453, "grad_norm": 1.296875, "learning_rate": 1.9772666546089248e-05, "loss": 0.5594, "step": 1580 }, { "epoch": 0.216917061123688, "grad_norm": 1.2109375, "learning_rate": 1.977236040620871e-05, "loss": 0.5413, "step": 1581 }, { "epoch": 0.21705426356589147, "grad_norm": 1.15625, "learning_rate": 1.9772054062707537e-05, "loss": 0.5093, "step": 1582 }, { "epoch": 0.21719146600809494, "grad_norm": 1.2265625, "learning_rate": 1.9771747515592114e-05, "loss": 0.5154, "step": 1583 }, { "epoch": 0.2173286684502984, "grad_norm": 1.2890625, "learning_rate": 1.9771440764868828e-05, "loss": 0.6065, "step": 1584 }, { "epoch": 0.21746587089250188, "grad_norm": 1.3515625, "learning_rate": 1.977113381054407e-05, "loss": 0.6228, "step": 1585 }, { "epoch": 0.21760307333470535, "grad_norm": 1.2421875, "learning_rate": 1.9770826652624234e-05, "loss": 0.5359, "step": 1586 }, { "epoch": 0.21774027577690883, "grad_norm": 1.296875, "learning_rate": 1.9770519291115722e-05, "loss": 0.6177, "step": 1587 }, { "epoch": 0.2178774782191123, "grad_norm": 1.3203125, "learning_rate": 1.977021172602494e-05, "loss": 0.6001, "step": 1588 }, { "epoch": 0.21801468066131577, "grad_norm": 1.328125, "learning_rate": 1.976990395735829e-05, "loss": 0.636, "step": 1589 }, { "epoch": 0.21815188310351924, "grad_norm": 1.328125, "learning_rate": 1.9769595985122195e-05, "loss": 0.5882, "step": 1590 }, { "epoch": 0.2182890855457227, "grad_norm": 1.2109375, "learning_rate": 1.9769287809323066e-05, "loss": 0.5702, "step": 1591 }, { "epoch": 0.21842628798792618, "grad_norm": 1.3203125, "learning_rate": 1.976897942996732e-05, "loss": 0.6021, "step": 1592 }, { "epoch": 0.21856349043012965, "grad_norm": 1.296875, "learning_rate": 1.976867084706139e-05, "loss": 0.5866, "step": 1593 }, { "epoch": 0.21870069287233312, "grad_norm": 1.28125, "learning_rate": 1.9768362060611704e-05, "loss": 0.5964, "step": 1594 }, { "epoch": 0.2188378953145366, "grad_norm": 1.234375, "learning_rate": 1.9768053070624695e-05, "loss": 0.5272, "step": 1595 }, { "epoch": 0.21897509775674007, "grad_norm": 1.328125, "learning_rate": 1.97677438771068e-05, "loss": 0.5548, "step": 1596 }, { "epoch": 0.21911230019894354, "grad_norm": 1.3828125, "learning_rate": 1.9767434480064464e-05, "loss": 0.5832, "step": 1597 }, { "epoch": 0.219249502641147, "grad_norm": 1.234375, "learning_rate": 1.9767124879504126e-05, "loss": 0.5676, "step": 1598 }, { "epoch": 0.21938670508335048, "grad_norm": 1.1640625, "learning_rate": 1.976681507543225e-05, "loss": 0.5821, "step": 1599 }, { "epoch": 0.21952390752555395, "grad_norm": 1.1640625, "learning_rate": 1.9766505067855282e-05, "loss": 0.4992, "step": 1600 }, { "epoch": 0.21966110996775742, "grad_norm": 1.3671875, "learning_rate": 1.9766194856779685e-05, "loss": 0.6111, "step": 1601 }, { "epoch": 0.2197983124099609, "grad_norm": 1.3203125, "learning_rate": 1.9765884442211923e-05, "loss": 0.5845, "step": 1602 }, { "epoch": 0.21993551485216437, "grad_norm": 1.2734375, "learning_rate": 1.9765573824158462e-05, "loss": 0.5981, "step": 1603 }, { "epoch": 0.22007271729436784, "grad_norm": 1.2265625, "learning_rate": 1.9765263002625776e-05, "loss": 0.5335, "step": 1604 }, { "epoch": 0.2202099197365713, "grad_norm": 1.2734375, "learning_rate": 1.9764951977620336e-05, "loss": 0.4878, "step": 1605 }, { "epoch": 0.22034712217877478, "grad_norm": 1.2421875, "learning_rate": 1.9764640749148627e-05, "loss": 0.4961, "step": 1606 }, { "epoch": 0.22048432462097825, "grad_norm": 1.34375, "learning_rate": 1.9764329317217135e-05, "loss": 0.6353, "step": 1607 }, { "epoch": 0.22062152706318172, "grad_norm": 1.3046875, "learning_rate": 1.976401768183235e-05, "loss": 0.5738, "step": 1608 }, { "epoch": 0.2207587295053852, "grad_norm": 1.2734375, "learning_rate": 1.9763705843000758e-05, "loss": 0.573, "step": 1609 }, { "epoch": 0.22089593194758866, "grad_norm": 1.21875, "learning_rate": 1.9763393800728867e-05, "loss": 0.5028, "step": 1610 }, { "epoch": 0.22103313438979214, "grad_norm": 1.171875, "learning_rate": 1.976308155502317e-05, "loss": 0.5503, "step": 1611 }, { "epoch": 0.2211703368319956, "grad_norm": 1.34375, "learning_rate": 1.9762769105890176e-05, "loss": 0.6157, "step": 1612 }, { "epoch": 0.22130753927419908, "grad_norm": 1.28125, "learning_rate": 1.97624564533364e-05, "loss": 0.5362, "step": 1613 }, { "epoch": 0.22144474171640255, "grad_norm": 1.3515625, "learning_rate": 1.976214359736835e-05, "loss": 0.6569, "step": 1614 }, { "epoch": 0.22158194415860602, "grad_norm": 1.28125, "learning_rate": 1.9761830537992543e-05, "loss": 0.5477, "step": 1615 }, { "epoch": 0.2217191466008095, "grad_norm": 1.2578125, "learning_rate": 1.9761517275215513e-05, "loss": 0.5444, "step": 1616 }, { "epoch": 0.22185634904301296, "grad_norm": 1.2734375, "learning_rate": 1.976120380904378e-05, "loss": 0.5553, "step": 1617 }, { "epoch": 0.22199355148521643, "grad_norm": 1.296875, "learning_rate": 1.9760890139483872e-05, "loss": 0.5388, "step": 1618 }, { "epoch": 0.2221307539274199, "grad_norm": 1.328125, "learning_rate": 1.9760576266542332e-05, "loss": 0.5668, "step": 1619 }, { "epoch": 0.22226795636962338, "grad_norm": 1.3203125, "learning_rate": 1.976026219022569e-05, "loss": 0.5818, "step": 1620 }, { "epoch": 0.22240515881182685, "grad_norm": 1.2421875, "learning_rate": 1.9759947910540505e-05, "loss": 0.562, "step": 1621 }, { "epoch": 0.22254236125403032, "grad_norm": 1.265625, "learning_rate": 1.9759633427493314e-05, "loss": 0.5579, "step": 1622 }, { "epoch": 0.2226795636962338, "grad_norm": 1.171875, "learning_rate": 1.9759318741090672e-05, "loss": 0.4855, "step": 1623 }, { "epoch": 0.22281676613843726, "grad_norm": 1.1328125, "learning_rate": 1.975900385133914e-05, "loss": 0.4888, "step": 1624 }, { "epoch": 0.22295396858064073, "grad_norm": 1.3046875, "learning_rate": 1.9758688758245275e-05, "loss": 0.6616, "step": 1625 }, { "epoch": 0.2230911710228442, "grad_norm": 1.3671875, "learning_rate": 1.9758373461815644e-05, "loss": 0.5678, "step": 1626 }, { "epoch": 0.22322837346504767, "grad_norm": 1.34375, "learning_rate": 1.9758057962056815e-05, "loss": 0.6469, "step": 1627 }, { "epoch": 0.22336557590725115, "grad_norm": 1.2421875, "learning_rate": 1.9757742258975364e-05, "loss": 0.5363, "step": 1628 }, { "epoch": 0.22350277834945462, "grad_norm": 1.25, "learning_rate": 1.9757426352577867e-05, "loss": 0.5737, "step": 1629 }, { "epoch": 0.2236399807916581, "grad_norm": 1.0390625, "learning_rate": 1.9757110242870907e-05, "loss": 0.4164, "step": 1630 }, { "epoch": 0.22377718323386156, "grad_norm": 1.171875, "learning_rate": 1.9756793929861075e-05, "loss": 0.52, "step": 1631 }, { "epoch": 0.22391438567606503, "grad_norm": 1.2421875, "learning_rate": 1.9756477413554953e-05, "loss": 0.5247, "step": 1632 }, { "epoch": 0.2240515881182685, "grad_norm": 1.1640625, "learning_rate": 1.9756160693959145e-05, "loss": 0.5029, "step": 1633 }, { "epoch": 0.22418879056047197, "grad_norm": 1.2421875, "learning_rate": 1.9755843771080244e-05, "loss": 0.6366, "step": 1634 }, { "epoch": 0.22432599300267544, "grad_norm": 1.3125, "learning_rate": 1.9755526644924857e-05, "loss": 0.6106, "step": 1635 }, { "epoch": 0.22446319544487892, "grad_norm": 1.2265625, "learning_rate": 1.9755209315499588e-05, "loss": 0.5224, "step": 1636 }, { "epoch": 0.2246003978870824, "grad_norm": 1.25, "learning_rate": 1.9754891782811053e-05, "loss": 0.564, "step": 1637 }, { "epoch": 0.22473760032928586, "grad_norm": 1.34375, "learning_rate": 1.9754574046865867e-05, "loss": 0.5883, "step": 1638 }, { "epoch": 0.22487480277148933, "grad_norm": 1.2890625, "learning_rate": 1.9754256107670652e-05, "loss": 0.5778, "step": 1639 }, { "epoch": 0.2250120052136928, "grad_norm": 1.234375, "learning_rate": 1.975393796523203e-05, "loss": 0.5268, "step": 1640 }, { "epoch": 0.22514920765589627, "grad_norm": 1.28125, "learning_rate": 1.9753619619556627e-05, "loss": 0.5817, "step": 1641 }, { "epoch": 0.22528641009809974, "grad_norm": 1.3359375, "learning_rate": 1.9753301070651085e-05, "loss": 0.6711, "step": 1642 }, { "epoch": 0.22542361254030321, "grad_norm": 1.2421875, "learning_rate": 1.9752982318522033e-05, "loss": 0.5875, "step": 1643 }, { "epoch": 0.22556081498250669, "grad_norm": 1.3671875, "learning_rate": 1.975266336317612e-05, "loss": 0.6276, "step": 1644 }, { "epoch": 0.22569801742471016, "grad_norm": 1.2421875, "learning_rate": 1.9752344204619986e-05, "loss": 0.5351, "step": 1645 }, { "epoch": 0.22583521986691363, "grad_norm": 1.2421875, "learning_rate": 1.9752024842860282e-05, "loss": 0.592, "step": 1646 }, { "epoch": 0.2259724223091171, "grad_norm": 1.3125, "learning_rate": 1.9751705277903666e-05, "loss": 0.6134, "step": 1647 }, { "epoch": 0.22610962475132057, "grad_norm": 1.234375, "learning_rate": 1.9751385509756792e-05, "loss": 0.5474, "step": 1648 }, { "epoch": 0.22624682719352404, "grad_norm": 1.296875, "learning_rate": 1.9751065538426328e-05, "loss": 0.5508, "step": 1649 }, { "epoch": 0.2263840296357275, "grad_norm": 1.3046875, "learning_rate": 1.9750745363918934e-05, "loss": 0.6368, "step": 1650 }, { "epoch": 0.22652123207793098, "grad_norm": 1.2265625, "learning_rate": 1.9750424986241286e-05, "loss": 0.5392, "step": 1651 }, { "epoch": 0.22665843452013446, "grad_norm": 1.3359375, "learning_rate": 1.9750104405400055e-05, "loss": 0.5826, "step": 1652 }, { "epoch": 0.22679563696233793, "grad_norm": 1.3125, "learning_rate": 1.9749783621401932e-05, "loss": 0.5953, "step": 1653 }, { "epoch": 0.2269328394045414, "grad_norm": 1.2578125, "learning_rate": 1.974946263425359e-05, "loss": 0.5882, "step": 1654 }, { "epoch": 0.22707004184674487, "grad_norm": 1.1953125, "learning_rate": 1.974914144396172e-05, "loss": 0.4802, "step": 1655 }, { "epoch": 0.22720724428894834, "grad_norm": 1.2109375, "learning_rate": 1.9748820050533014e-05, "loss": 0.5772, "step": 1656 }, { "epoch": 0.2273444467311518, "grad_norm": 1.2890625, "learning_rate": 1.9748498453974173e-05, "loss": 0.5798, "step": 1657 }, { "epoch": 0.22748164917335528, "grad_norm": 1.3046875, "learning_rate": 1.974817665429189e-05, "loss": 0.5783, "step": 1658 }, { "epoch": 0.22761885161555875, "grad_norm": 1.2109375, "learning_rate": 1.974785465149288e-05, "loss": 0.4989, "step": 1659 }, { "epoch": 0.22775605405776223, "grad_norm": 1.1875, "learning_rate": 1.9747532445583845e-05, "loss": 0.533, "step": 1660 }, { "epoch": 0.2278932564999657, "grad_norm": 1.2734375, "learning_rate": 1.97472100365715e-05, "loss": 0.6225, "step": 1661 }, { "epoch": 0.22803045894216917, "grad_norm": 1.265625, "learning_rate": 1.9746887424462562e-05, "loss": 0.5682, "step": 1662 }, { "epoch": 0.22816766138437264, "grad_norm": 1.140625, "learning_rate": 1.9746564609263758e-05, "loss": 0.4685, "step": 1663 }, { "epoch": 0.2283048638265761, "grad_norm": 1.1328125, "learning_rate": 1.974624159098181e-05, "loss": 0.5085, "step": 1664 }, { "epoch": 0.22844206626877958, "grad_norm": 1.203125, "learning_rate": 1.974591836962345e-05, "loss": 0.4946, "step": 1665 }, { "epoch": 0.22857926871098305, "grad_norm": 1.328125, "learning_rate": 1.974559494519541e-05, "loss": 0.5661, "step": 1666 }, { "epoch": 0.22871647115318652, "grad_norm": 1.25, "learning_rate": 1.9745271317704433e-05, "loss": 0.5803, "step": 1667 }, { "epoch": 0.22885367359539, "grad_norm": 1.2578125, "learning_rate": 1.974494748715726e-05, "loss": 0.5539, "step": 1668 }, { "epoch": 0.22899087603759347, "grad_norm": 1.3125, "learning_rate": 1.9744623453560638e-05, "loss": 0.6038, "step": 1669 }, { "epoch": 0.22912807847979694, "grad_norm": 1.390625, "learning_rate": 1.974429921692132e-05, "loss": 0.5568, "step": 1670 }, { "epoch": 0.2292652809220004, "grad_norm": 1.2265625, "learning_rate": 1.9743974777246064e-05, "loss": 0.4933, "step": 1671 }, { "epoch": 0.22940248336420388, "grad_norm": 1.265625, "learning_rate": 1.9743650134541624e-05, "loss": 0.5113, "step": 1672 }, { "epoch": 0.22953968580640735, "grad_norm": 1.2421875, "learning_rate": 1.974332528881477e-05, "loss": 0.4978, "step": 1673 }, { "epoch": 0.22967688824861082, "grad_norm": 1.28125, "learning_rate": 1.974300024007227e-05, "loss": 0.5941, "step": 1674 }, { "epoch": 0.2298140906908143, "grad_norm": 1.2890625, "learning_rate": 1.9742674988320894e-05, "loss": 0.5672, "step": 1675 }, { "epoch": 0.22995129313301776, "grad_norm": 1.3046875, "learning_rate": 1.974234953356742e-05, "loss": 0.5844, "step": 1676 }, { "epoch": 0.23008849557522124, "grad_norm": 1.2421875, "learning_rate": 1.974202387581863e-05, "loss": 0.4682, "step": 1677 }, { "epoch": 0.2302256980174247, "grad_norm": 1.2421875, "learning_rate": 1.974169801508131e-05, "loss": 0.5361, "step": 1678 }, { "epoch": 0.23036290045962818, "grad_norm": 1.2890625, "learning_rate": 1.974137195136225e-05, "loss": 0.5273, "step": 1679 }, { "epoch": 0.23050010290183165, "grad_norm": 1.2421875, "learning_rate": 1.974104568466824e-05, "loss": 0.5357, "step": 1680 }, { "epoch": 0.23063730534403512, "grad_norm": 1.296875, "learning_rate": 1.9740719215006084e-05, "loss": 0.6331, "step": 1681 }, { "epoch": 0.2307745077862386, "grad_norm": 1.140625, "learning_rate": 1.9740392542382582e-05, "loss": 0.4694, "step": 1682 }, { "epoch": 0.23091171022844206, "grad_norm": 1.3046875, "learning_rate": 1.974006566680454e-05, "loss": 0.5518, "step": 1683 }, { "epoch": 0.23104891267064553, "grad_norm": 1.3671875, "learning_rate": 1.973973858827877e-05, "loss": 0.5426, "step": 1684 }, { "epoch": 0.231186115112849, "grad_norm": 1.15625, "learning_rate": 1.9739411306812087e-05, "loss": 0.4637, "step": 1685 }, { "epoch": 0.23132331755505248, "grad_norm": 1.1953125, "learning_rate": 1.9739083822411308e-05, "loss": 0.487, "step": 1686 }, { "epoch": 0.23146051999725595, "grad_norm": 1.21875, "learning_rate": 1.973875613508326e-05, "loss": 0.4329, "step": 1687 }, { "epoch": 0.23159772243945942, "grad_norm": 1.2421875, "learning_rate": 1.9738428244834766e-05, "loss": 0.5499, "step": 1688 }, { "epoch": 0.2317349248816629, "grad_norm": 1.1953125, "learning_rate": 1.9738100151672667e-05, "loss": 0.5503, "step": 1689 }, { "epoch": 0.23187212732386636, "grad_norm": 1.109375, "learning_rate": 1.973777185560379e-05, "loss": 0.4693, "step": 1690 }, { "epoch": 0.23200932976606983, "grad_norm": 1.359375, "learning_rate": 1.973744335663498e-05, "loss": 0.5538, "step": 1691 }, { "epoch": 0.2321465322082733, "grad_norm": 1.21875, "learning_rate": 1.973711465477308e-05, "loss": 0.529, "step": 1692 }, { "epoch": 0.23228373465047678, "grad_norm": 1.1953125, "learning_rate": 1.973678575002494e-05, "loss": 0.562, "step": 1693 }, { "epoch": 0.23242093709268025, "grad_norm": 1.1796875, "learning_rate": 1.9736456642397413e-05, "loss": 0.5266, "step": 1694 }, { "epoch": 0.23255813953488372, "grad_norm": 1.2265625, "learning_rate": 1.9736127331897358e-05, "loss": 0.5408, "step": 1695 }, { "epoch": 0.2326953419770872, "grad_norm": 1.2890625, "learning_rate": 1.9735797818531634e-05, "loss": 0.5747, "step": 1696 }, { "epoch": 0.23283254441929066, "grad_norm": 1.3046875, "learning_rate": 1.9735468102307106e-05, "loss": 0.5558, "step": 1697 }, { "epoch": 0.23296974686149413, "grad_norm": 1.234375, "learning_rate": 1.9735138183230646e-05, "loss": 0.5474, "step": 1698 }, { "epoch": 0.2331069493036976, "grad_norm": 1.1484375, "learning_rate": 1.973480806130913e-05, "loss": 0.5, "step": 1699 }, { "epoch": 0.23324415174590107, "grad_norm": 1.1953125, "learning_rate": 1.9734477736549435e-05, "loss": 0.5196, "step": 1700 }, { "epoch": 0.23338135418810455, "grad_norm": 1.2265625, "learning_rate": 1.9734147208958444e-05, "loss": 0.5459, "step": 1701 }, { "epoch": 0.23351855663030802, "grad_norm": 1.328125, "learning_rate": 1.9733816478543045e-05, "loss": 0.6165, "step": 1702 }, { "epoch": 0.2336557590725115, "grad_norm": 1.2734375, "learning_rate": 1.9733485545310124e-05, "loss": 0.5607, "step": 1703 }, { "epoch": 0.23379296151471496, "grad_norm": 1.2578125, "learning_rate": 1.9733154409266584e-05, "loss": 0.5582, "step": 1704 }, { "epoch": 0.23393016395691843, "grad_norm": 1.203125, "learning_rate": 1.973282307041932e-05, "loss": 0.5233, "step": 1705 }, { "epoch": 0.2340673663991219, "grad_norm": 1.1875, "learning_rate": 1.973249152877524e-05, "loss": 0.527, "step": 1706 }, { "epoch": 0.23420456884132537, "grad_norm": 1.125, "learning_rate": 1.9732159784341242e-05, "loss": 0.4612, "step": 1707 }, { "epoch": 0.23434177128352884, "grad_norm": 1.25, "learning_rate": 1.9731827837124248e-05, "loss": 0.5677, "step": 1708 }, { "epoch": 0.23447897372573231, "grad_norm": 1.40625, "learning_rate": 1.9731495687131174e-05, "loss": 0.6234, "step": 1709 }, { "epoch": 0.23461617616793579, "grad_norm": 1.3203125, "learning_rate": 1.9731163334368933e-05, "loss": 0.6269, "step": 1710 }, { "epoch": 0.23475337861013926, "grad_norm": 1.15625, "learning_rate": 1.9730830778844464e-05, "loss": 0.4939, "step": 1711 }, { "epoch": 0.23489058105234273, "grad_norm": 1.1484375, "learning_rate": 1.9730498020564684e-05, "loss": 0.5227, "step": 1712 }, { "epoch": 0.2350277834945462, "grad_norm": 1.2890625, "learning_rate": 1.973016505953653e-05, "loss": 0.5435, "step": 1713 }, { "epoch": 0.23516498593674967, "grad_norm": 1.4375, "learning_rate": 1.972983189576694e-05, "loss": 0.56, "step": 1714 }, { "epoch": 0.23530218837895314, "grad_norm": 1.28125, "learning_rate": 1.972949852926286e-05, "loss": 0.5478, "step": 1715 }, { "epoch": 0.2354393908211566, "grad_norm": 1.2421875, "learning_rate": 1.9729164960031227e-05, "loss": 0.5698, "step": 1716 }, { "epoch": 0.23557659326336008, "grad_norm": 1.1796875, "learning_rate": 1.9728831188079002e-05, "loss": 0.5177, "step": 1717 }, { "epoch": 0.23571379570556356, "grad_norm": 1.2109375, "learning_rate": 1.972849721341313e-05, "loss": 0.5387, "step": 1718 }, { "epoch": 0.23585099814776703, "grad_norm": 1.3515625, "learning_rate": 1.9728163036040575e-05, "loss": 0.5263, "step": 1719 }, { "epoch": 0.2359882005899705, "grad_norm": 1.25, "learning_rate": 1.97278286559683e-05, "loss": 0.5591, "step": 1720 }, { "epoch": 0.23612540303217397, "grad_norm": 1.4296875, "learning_rate": 1.972749407320327e-05, "loss": 0.621, "step": 1721 }, { "epoch": 0.23626260547437744, "grad_norm": 1.3359375, "learning_rate": 1.9727159287752462e-05, "loss": 0.5645, "step": 1722 }, { "epoch": 0.2363998079165809, "grad_norm": 1.1796875, "learning_rate": 1.9726824299622843e-05, "loss": 0.4914, "step": 1723 }, { "epoch": 0.23653701035878438, "grad_norm": 1.265625, "learning_rate": 1.97264891088214e-05, "loss": 0.5664, "step": 1724 }, { "epoch": 0.23667421280098785, "grad_norm": 1.4140625, "learning_rate": 1.9726153715355115e-05, "loss": 0.6383, "step": 1725 }, { "epoch": 0.23681141524319133, "grad_norm": 1.21875, "learning_rate": 1.9725818119230976e-05, "loss": 0.5096, "step": 1726 }, { "epoch": 0.2369486176853948, "grad_norm": 1.1953125, "learning_rate": 1.9725482320455973e-05, "loss": 0.5242, "step": 1727 }, { "epoch": 0.23708582012759827, "grad_norm": 1.2421875, "learning_rate": 1.972514631903711e-05, "loss": 0.5653, "step": 1728 }, { "epoch": 0.23722302256980174, "grad_norm": 1.390625, "learning_rate": 1.9724810114981377e-05, "loss": 0.6371, "step": 1729 }, { "epoch": 0.2373602250120052, "grad_norm": 1.171875, "learning_rate": 1.9724473708295795e-05, "loss": 0.5006, "step": 1730 }, { "epoch": 0.23749742745420868, "grad_norm": 1.234375, "learning_rate": 1.9724137098987358e-05, "loss": 0.5407, "step": 1731 }, { "epoch": 0.23763462989641215, "grad_norm": 1.296875, "learning_rate": 1.9723800287063087e-05, "loss": 0.5565, "step": 1732 }, { "epoch": 0.23777183233861562, "grad_norm": 1.28125, "learning_rate": 1.972346327253e-05, "loss": 0.604, "step": 1733 }, { "epoch": 0.2379090347808191, "grad_norm": 1.1640625, "learning_rate": 1.972312605539512e-05, "loss": 0.5233, "step": 1734 }, { "epoch": 0.23804623722302257, "grad_norm": 1.28125, "learning_rate": 1.9722788635665468e-05, "loss": 0.6113, "step": 1735 }, { "epoch": 0.23818343966522604, "grad_norm": 1.21875, "learning_rate": 1.972245101334808e-05, "loss": 0.5345, "step": 1736 }, { "epoch": 0.2383206421074295, "grad_norm": 1.328125, "learning_rate": 1.972211318844999e-05, "loss": 0.6099, "step": 1737 }, { "epoch": 0.23845784454963298, "grad_norm": 1.2734375, "learning_rate": 1.9721775160978235e-05, "loss": 0.5653, "step": 1738 }, { "epoch": 0.23859504699183645, "grad_norm": 1.2109375, "learning_rate": 1.972143693093986e-05, "loss": 0.5401, "step": 1739 }, { "epoch": 0.23873224943403992, "grad_norm": 1.1796875, "learning_rate": 1.972109849834191e-05, "loss": 0.5135, "step": 1740 }, { "epoch": 0.2388694518762434, "grad_norm": 1.265625, "learning_rate": 1.972075986319144e-05, "loss": 0.5885, "step": 1741 }, { "epoch": 0.23900665431844687, "grad_norm": 1.2421875, "learning_rate": 1.9720421025495505e-05, "loss": 0.4825, "step": 1742 }, { "epoch": 0.23914385676065034, "grad_norm": 1.3984375, "learning_rate": 1.9720081985261164e-05, "loss": 0.5726, "step": 1743 }, { "epoch": 0.2392810592028538, "grad_norm": 1.15625, "learning_rate": 1.971974274249548e-05, "loss": 0.4853, "step": 1744 }, { "epoch": 0.23941826164505728, "grad_norm": 1.2265625, "learning_rate": 1.9719403297205522e-05, "loss": 0.4922, "step": 1745 }, { "epoch": 0.23955546408726075, "grad_norm": 1.328125, "learning_rate": 1.9719063649398373e-05, "loss": 0.591, "step": 1746 }, { "epoch": 0.23969266652946422, "grad_norm": 1.2734375, "learning_rate": 1.9718723799081095e-05, "loss": 0.5715, "step": 1747 }, { "epoch": 0.2398298689716677, "grad_norm": 1.34375, "learning_rate": 1.9718383746260778e-05, "loss": 0.6124, "step": 1748 }, { "epoch": 0.23996707141387116, "grad_norm": 1.2265625, "learning_rate": 1.9718043490944502e-05, "loss": 0.5314, "step": 1749 }, { "epoch": 0.24010427385607463, "grad_norm": 1.3671875, "learning_rate": 1.9717703033139365e-05, "loss": 0.6013, "step": 1750 }, { "epoch": 0.2402414762982781, "grad_norm": 1.1796875, "learning_rate": 1.971736237285245e-05, "loss": 0.4756, "step": 1751 }, { "epoch": 0.24037867874048158, "grad_norm": 1.1484375, "learning_rate": 1.9717021510090863e-05, "loss": 0.4793, "step": 1752 }, { "epoch": 0.24051588118268505, "grad_norm": 1.4140625, "learning_rate": 1.9716680444861706e-05, "loss": 0.6043, "step": 1753 }, { "epoch": 0.24065308362488852, "grad_norm": 1.1484375, "learning_rate": 1.9716339177172082e-05, "loss": 0.464, "step": 1754 }, { "epoch": 0.240790286067092, "grad_norm": 1.171875, "learning_rate": 1.9715997707029105e-05, "loss": 0.4825, "step": 1755 }, { "epoch": 0.24092748850929546, "grad_norm": 1.265625, "learning_rate": 1.971565603443989e-05, "loss": 0.5278, "step": 1756 }, { "epoch": 0.24106469095149893, "grad_norm": 1.328125, "learning_rate": 1.9715314159411552e-05, "loss": 0.5756, "step": 1757 }, { "epoch": 0.2412018933937024, "grad_norm": 1.203125, "learning_rate": 1.971497208195122e-05, "loss": 0.4984, "step": 1758 }, { "epoch": 0.24133909583590588, "grad_norm": 1.2421875, "learning_rate": 1.9714629802066016e-05, "loss": 0.5315, "step": 1759 }, { "epoch": 0.24147629827810935, "grad_norm": 1.328125, "learning_rate": 1.9714287319763076e-05, "loss": 0.5834, "step": 1760 }, { "epoch": 0.24161350072031282, "grad_norm": 1.3984375, "learning_rate": 1.9713944635049533e-05, "loss": 0.5985, "step": 1761 }, { "epoch": 0.2417507031625163, "grad_norm": 1.2890625, "learning_rate": 1.9713601747932532e-05, "loss": 0.5927, "step": 1762 }, { "epoch": 0.24188790560471976, "grad_norm": 1.1953125, "learning_rate": 1.9713258658419213e-05, "loss": 0.5322, "step": 1763 }, { "epoch": 0.24202510804692323, "grad_norm": 1.328125, "learning_rate": 1.971291536651673e-05, "loss": 0.631, "step": 1764 }, { "epoch": 0.2421623104891267, "grad_norm": 1.3515625, "learning_rate": 1.971257187223223e-05, "loss": 0.6478, "step": 1765 }, { "epoch": 0.24229951293133017, "grad_norm": 1.171875, "learning_rate": 1.9712228175572868e-05, "loss": 0.5405, "step": 1766 }, { "epoch": 0.24243671537353365, "grad_norm": 1.234375, "learning_rate": 1.9711884276545818e-05, "loss": 0.5094, "step": 1767 }, { "epoch": 0.24257391781573712, "grad_norm": 1.1796875, "learning_rate": 1.9711540175158233e-05, "loss": 0.5298, "step": 1768 }, { "epoch": 0.2427111202579406, "grad_norm": 1.3359375, "learning_rate": 1.971119587141729e-05, "loss": 0.6107, "step": 1769 }, { "epoch": 0.24284832270014406, "grad_norm": 1.3203125, "learning_rate": 1.9710851365330156e-05, "loss": 0.5861, "step": 1770 }, { "epoch": 0.24298552514234753, "grad_norm": 1.1953125, "learning_rate": 1.9710506656904015e-05, "loss": 0.511, "step": 1771 }, { "epoch": 0.243122727584551, "grad_norm": 1.3984375, "learning_rate": 1.971016174614605e-05, "loss": 0.6262, "step": 1772 }, { "epoch": 0.24325993002675447, "grad_norm": 1.2265625, "learning_rate": 1.970981663306345e-05, "loss": 0.565, "step": 1773 }, { "epoch": 0.24339713246895794, "grad_norm": 1.234375, "learning_rate": 1.9709471317663395e-05, "loss": 0.5812, "step": 1774 }, { "epoch": 0.24353433491116142, "grad_norm": 1.3984375, "learning_rate": 1.970912579995309e-05, "loss": 0.5815, "step": 1775 }, { "epoch": 0.2436715373533649, "grad_norm": 1.375, "learning_rate": 1.9708780079939733e-05, "loss": 0.6456, "step": 1776 }, { "epoch": 0.24380873979556836, "grad_norm": 1.296875, "learning_rate": 1.970843415763052e-05, "loss": 0.6054, "step": 1777 }, { "epoch": 0.24394594223777183, "grad_norm": 1.3203125, "learning_rate": 1.970808803303267e-05, "loss": 0.5902, "step": 1778 }, { "epoch": 0.2440831446799753, "grad_norm": 1.140625, "learning_rate": 1.9707741706153387e-05, "loss": 0.4109, "step": 1779 }, { "epoch": 0.24422034712217877, "grad_norm": 1.3515625, "learning_rate": 1.970739517699989e-05, "loss": 0.5441, "step": 1780 }, { "epoch": 0.24435754956438224, "grad_norm": 1.3125, "learning_rate": 1.97070484455794e-05, "loss": 0.6167, "step": 1781 }, { "epoch": 0.2444947520065857, "grad_norm": 1.234375, "learning_rate": 1.9706701511899142e-05, "loss": 0.4839, "step": 1782 }, { "epoch": 0.24463195444878919, "grad_norm": 1.28125, "learning_rate": 1.970635437596634e-05, "loss": 0.5873, "step": 1783 }, { "epoch": 0.24476915689099266, "grad_norm": 1.3125, "learning_rate": 1.9706007037788234e-05, "loss": 0.5805, "step": 1784 }, { "epoch": 0.24490635933319613, "grad_norm": 1.25, "learning_rate": 1.9705659497372056e-05, "loss": 0.5727, "step": 1785 }, { "epoch": 0.2450435617753996, "grad_norm": 1.1796875, "learning_rate": 1.970531175472505e-05, "loss": 0.5432, "step": 1786 }, { "epoch": 0.24518076421760307, "grad_norm": 1.2890625, "learning_rate": 1.9704963809854463e-05, "loss": 0.5769, "step": 1787 }, { "epoch": 0.24531796665980654, "grad_norm": 1.2109375, "learning_rate": 1.9704615662767542e-05, "loss": 0.506, "step": 1788 }, { "epoch": 0.24545516910201, "grad_norm": 1.2265625, "learning_rate": 1.9704267313471542e-05, "loss": 0.5702, "step": 1789 }, { "epoch": 0.24559237154421348, "grad_norm": 1.1640625, "learning_rate": 1.970391876197372e-05, "loss": 0.4528, "step": 1790 }, { "epoch": 0.24572957398641695, "grad_norm": 1.484375, "learning_rate": 1.9703570008281343e-05, "loss": 0.5817, "step": 1791 }, { "epoch": 0.24586677642862043, "grad_norm": 1.3125, "learning_rate": 1.9703221052401672e-05, "loss": 0.5658, "step": 1792 }, { "epoch": 0.2460039788708239, "grad_norm": 1.3046875, "learning_rate": 1.9702871894341985e-05, "loss": 0.6009, "step": 1793 }, { "epoch": 0.24614118131302737, "grad_norm": 1.3125, "learning_rate": 1.970252253410955e-05, "loss": 0.5792, "step": 1794 }, { "epoch": 0.24627838375523084, "grad_norm": 1.28125, "learning_rate": 1.970217297171165e-05, "loss": 0.5135, "step": 1795 }, { "epoch": 0.2464155861974343, "grad_norm": 1.2421875, "learning_rate": 1.970182320715557e-05, "loss": 0.546, "step": 1796 }, { "epoch": 0.24655278863963778, "grad_norm": 1.2734375, "learning_rate": 1.970147324044859e-05, "loss": 0.5836, "step": 1797 }, { "epoch": 0.24668999108184125, "grad_norm": 1.4140625, "learning_rate": 1.970112307159801e-05, "loss": 0.627, "step": 1798 }, { "epoch": 0.24682719352404472, "grad_norm": 1.3046875, "learning_rate": 1.9700772700611132e-05, "loss": 0.5324, "step": 1799 }, { "epoch": 0.2469643959662482, "grad_norm": 1.25, "learning_rate": 1.970042212749524e-05, "loss": 0.5705, "step": 1800 }, { "epoch": 0.24710159840845167, "grad_norm": 1.125, "learning_rate": 1.970007135225765e-05, "loss": 0.4775, "step": 1801 }, { "epoch": 0.24723880085065514, "grad_norm": 1.2734375, "learning_rate": 1.9699720374905668e-05, "loss": 0.5734, "step": 1802 }, { "epoch": 0.2473760032928586, "grad_norm": 1.3046875, "learning_rate": 1.9699369195446607e-05, "loss": 0.596, "step": 1803 }, { "epoch": 0.24751320573506208, "grad_norm": 1.3984375, "learning_rate": 1.9699017813887784e-05, "loss": 0.625, "step": 1804 }, { "epoch": 0.24765040817726555, "grad_norm": 1.3046875, "learning_rate": 1.9698666230236523e-05, "loss": 0.5587, "step": 1805 }, { "epoch": 0.24778761061946902, "grad_norm": 1.2578125, "learning_rate": 1.969831444450015e-05, "loss": 0.5909, "step": 1806 }, { "epoch": 0.2479248130616725, "grad_norm": 1.296875, "learning_rate": 1.969796245668599e-05, "loss": 0.6435, "step": 1807 }, { "epoch": 0.24806201550387597, "grad_norm": 1.25, "learning_rate": 1.969761026680138e-05, "loss": 0.5846, "step": 1808 }, { "epoch": 0.24819921794607944, "grad_norm": 1.125, "learning_rate": 1.9697257874853654e-05, "loss": 0.4653, "step": 1809 }, { "epoch": 0.2483364203882829, "grad_norm": 1.3359375, "learning_rate": 1.9696905280850164e-05, "loss": 0.6306, "step": 1810 }, { "epoch": 0.24847362283048638, "grad_norm": 1.2578125, "learning_rate": 1.9696552484798253e-05, "loss": 0.5369, "step": 1811 }, { "epoch": 0.24861082527268985, "grad_norm": 1.3125, "learning_rate": 1.9696199486705267e-05, "loss": 0.6018, "step": 1812 }, { "epoch": 0.24874802771489332, "grad_norm": 1.171875, "learning_rate": 1.969584628657857e-05, "loss": 0.5171, "step": 1813 }, { "epoch": 0.2488852301570968, "grad_norm": 1.2578125, "learning_rate": 1.969549288442551e-05, "loss": 0.4893, "step": 1814 }, { "epoch": 0.24902243259930026, "grad_norm": 1.2734375, "learning_rate": 1.969513928025346e-05, "loss": 0.5943, "step": 1815 }, { "epoch": 0.24915963504150374, "grad_norm": 1.265625, "learning_rate": 1.9694785474069784e-05, "loss": 0.5451, "step": 1816 }, { "epoch": 0.2492968374837072, "grad_norm": 1.2421875, "learning_rate": 1.9694431465881854e-05, "loss": 0.5899, "step": 1817 }, { "epoch": 0.24943403992591068, "grad_norm": 1.3515625, "learning_rate": 1.969407725569705e-05, "loss": 0.5665, "step": 1818 }, { "epoch": 0.24957124236811415, "grad_norm": 1.25, "learning_rate": 1.9693722843522746e-05, "loss": 0.5616, "step": 1819 }, { "epoch": 0.24970844481031762, "grad_norm": 1.203125, "learning_rate": 1.9693368229366333e-05, "loss": 0.5039, "step": 1820 }, { "epoch": 0.2498456472525211, "grad_norm": 1.296875, "learning_rate": 1.9693013413235196e-05, "loss": 0.588, "step": 1821 }, { "epoch": 0.24998284969472456, "grad_norm": 1.3515625, "learning_rate": 1.969265839513673e-05, "loss": 0.6569, "step": 1822 }, { "epoch": 0.25012005213692806, "grad_norm": 1.21875, "learning_rate": 1.969230317507833e-05, "loss": 0.6087, "step": 1823 }, { "epoch": 0.2502572545791315, "grad_norm": 1.265625, "learning_rate": 1.9691947753067397e-05, "loss": 0.5518, "step": 1824 }, { "epoch": 0.250394457021335, "grad_norm": 1.2734375, "learning_rate": 1.9691592129111343e-05, "loss": 0.54, "step": 1825 }, { "epoch": 0.25053165946353845, "grad_norm": 1.2890625, "learning_rate": 1.9691236303217572e-05, "loss": 0.5737, "step": 1826 }, { "epoch": 0.25066886190574195, "grad_norm": 1.25, "learning_rate": 1.9690880275393498e-05, "loss": 0.5382, "step": 1827 }, { "epoch": 0.2508060643479454, "grad_norm": 1.140625, "learning_rate": 1.9690524045646544e-05, "loss": 0.4979, "step": 1828 }, { "epoch": 0.2509432667901489, "grad_norm": 1.1796875, "learning_rate": 1.9690167613984127e-05, "loss": 0.4919, "step": 1829 }, { "epoch": 0.25108046923235233, "grad_norm": 1.21875, "learning_rate": 1.9689810980413677e-05, "loss": 0.5591, "step": 1830 }, { "epoch": 0.25121767167455583, "grad_norm": 1.2578125, "learning_rate": 1.9689454144942624e-05, "loss": 0.5396, "step": 1831 }, { "epoch": 0.2513548741167593, "grad_norm": 1.359375, "learning_rate": 1.9689097107578405e-05, "loss": 0.6191, "step": 1832 }, { "epoch": 0.2514920765589628, "grad_norm": 1.25, "learning_rate": 1.9688739868328457e-05, "loss": 0.4732, "step": 1833 }, { "epoch": 0.2516292790011662, "grad_norm": 1.2421875, "learning_rate": 1.9688382427200223e-05, "loss": 0.4998, "step": 1834 }, { "epoch": 0.2517664814433697, "grad_norm": 1.1640625, "learning_rate": 1.9688024784201155e-05, "loss": 0.5078, "step": 1835 }, { "epoch": 0.25190368388557316, "grad_norm": 1.234375, "learning_rate": 1.9687666939338696e-05, "loss": 0.5199, "step": 1836 }, { "epoch": 0.25204088632777666, "grad_norm": 1.28125, "learning_rate": 1.9687308892620314e-05, "loss": 0.5943, "step": 1837 }, { "epoch": 0.2521780887699801, "grad_norm": 1.3125, "learning_rate": 1.968695064405346e-05, "loss": 0.5648, "step": 1838 }, { "epoch": 0.2523152912121836, "grad_norm": 1.1875, "learning_rate": 1.9686592193645606e-05, "loss": 0.445, "step": 1839 }, { "epoch": 0.25245249365438704, "grad_norm": 1.3359375, "learning_rate": 1.9686233541404217e-05, "loss": 0.5346, "step": 1840 }, { "epoch": 0.25258969609659054, "grad_norm": 1.3515625, "learning_rate": 1.9685874687336763e-05, "loss": 0.589, "step": 1841 }, { "epoch": 0.252726898538794, "grad_norm": 1.2578125, "learning_rate": 1.9685515631450727e-05, "loss": 0.5438, "step": 1842 }, { "epoch": 0.2528641009809975, "grad_norm": 1.3828125, "learning_rate": 1.9685156373753585e-05, "loss": 0.5501, "step": 1843 }, { "epoch": 0.25300130342320093, "grad_norm": 1.234375, "learning_rate": 1.968479691425283e-05, "loss": 0.5149, "step": 1844 }, { "epoch": 0.25313850586540443, "grad_norm": 1.3046875, "learning_rate": 1.9684437252955943e-05, "loss": 0.5806, "step": 1845 }, { "epoch": 0.25327570830760787, "grad_norm": 1.2734375, "learning_rate": 1.9684077389870425e-05, "loss": 0.5787, "step": 1846 }, { "epoch": 0.25341291074981137, "grad_norm": 1.296875, "learning_rate": 1.968371732500377e-05, "loss": 0.5168, "step": 1847 }, { "epoch": 0.2535501131920148, "grad_norm": 1.3203125, "learning_rate": 1.9683357058363478e-05, "loss": 0.5468, "step": 1848 }, { "epoch": 0.2536873156342183, "grad_norm": 1.390625, "learning_rate": 1.9682996589957066e-05, "loss": 0.5832, "step": 1849 }, { "epoch": 0.25382451807642176, "grad_norm": 1.3359375, "learning_rate": 1.9682635919792037e-05, "loss": 0.5622, "step": 1850 }, { "epoch": 0.25396172051862526, "grad_norm": 1.234375, "learning_rate": 1.9682275047875906e-05, "loss": 0.5441, "step": 1851 }, { "epoch": 0.2540989229608287, "grad_norm": 1.2734375, "learning_rate": 1.9681913974216192e-05, "loss": 0.5057, "step": 1852 }, { "epoch": 0.2542361254030322, "grad_norm": 1.3046875, "learning_rate": 1.9681552698820427e-05, "loss": 0.5441, "step": 1853 }, { "epoch": 0.25437332784523564, "grad_norm": 1.25, "learning_rate": 1.9681191221696125e-05, "loss": 0.5604, "step": 1854 }, { "epoch": 0.25451053028743914, "grad_norm": 1.296875, "learning_rate": 1.968082954285083e-05, "loss": 0.516, "step": 1855 }, { "epoch": 0.2546477327296426, "grad_norm": 1.1953125, "learning_rate": 1.9680467662292072e-05, "loss": 0.5171, "step": 1856 }, { "epoch": 0.2547849351718461, "grad_norm": 1.1875, "learning_rate": 1.9680105580027388e-05, "loss": 0.5149, "step": 1857 }, { "epoch": 0.2549221376140495, "grad_norm": 1.3203125, "learning_rate": 1.9679743296064332e-05, "loss": 0.5679, "step": 1858 }, { "epoch": 0.255059340056253, "grad_norm": 1.2734375, "learning_rate": 1.9679380810410443e-05, "loss": 0.5282, "step": 1859 }, { "epoch": 0.25519654249845647, "grad_norm": 1.3515625, "learning_rate": 1.9679018123073282e-05, "loss": 0.5905, "step": 1860 }, { "epoch": 0.25533374494065997, "grad_norm": 1.21875, "learning_rate": 1.9678655234060396e-05, "loss": 0.5486, "step": 1861 }, { "epoch": 0.2554709473828634, "grad_norm": 1.40625, "learning_rate": 1.967829214337936e-05, "loss": 0.668, "step": 1862 }, { "epoch": 0.2556081498250669, "grad_norm": 1.2734375, "learning_rate": 1.967792885103773e-05, "loss": 0.5312, "step": 1863 }, { "epoch": 0.25574535226727035, "grad_norm": 1.21875, "learning_rate": 1.9677565357043077e-05, "loss": 0.5672, "step": 1864 }, { "epoch": 0.25588255470947385, "grad_norm": 1.1328125, "learning_rate": 1.9677201661402974e-05, "loss": 0.4653, "step": 1865 }, { "epoch": 0.2560197571516773, "grad_norm": 1.40625, "learning_rate": 1.9676837764125002e-05, "loss": 0.6123, "step": 1866 }, { "epoch": 0.2561569595938808, "grad_norm": 1.296875, "learning_rate": 1.9676473665216746e-05, "loss": 0.5781, "step": 1867 }, { "epoch": 0.25629416203608424, "grad_norm": 1.3828125, "learning_rate": 1.9676109364685784e-05, "loss": 0.5618, "step": 1868 }, { "epoch": 0.25643136447828774, "grad_norm": 1.15625, "learning_rate": 1.967574486253971e-05, "loss": 0.4992, "step": 1869 }, { "epoch": 0.2565685669204912, "grad_norm": 1.1953125, "learning_rate": 1.9675380158786124e-05, "loss": 0.5471, "step": 1870 }, { "epoch": 0.2567057693626947, "grad_norm": 1.2265625, "learning_rate": 1.9675015253432622e-05, "loss": 0.6101, "step": 1871 }, { "epoch": 0.2568429718048981, "grad_norm": 1.265625, "learning_rate": 1.9674650146486806e-05, "loss": 0.5445, "step": 1872 }, { "epoch": 0.2569801742471016, "grad_norm": 1.296875, "learning_rate": 1.967428483795628e-05, "loss": 0.558, "step": 1873 }, { "epoch": 0.25711737668930507, "grad_norm": 1.3046875, "learning_rate": 1.9673919327848666e-05, "loss": 0.579, "step": 1874 }, { "epoch": 0.25725457913150857, "grad_norm": 1.25, "learning_rate": 1.967355361617157e-05, "loss": 0.61, "step": 1875 }, { "epoch": 0.257391781573712, "grad_norm": 1.1484375, "learning_rate": 1.9673187702932613e-05, "loss": 0.4734, "step": 1876 }, { "epoch": 0.2575289840159155, "grad_norm": 1.2109375, "learning_rate": 1.9672821588139427e-05, "loss": 0.535, "step": 1877 }, { "epoch": 0.25766618645811895, "grad_norm": 1.3359375, "learning_rate": 1.9672455271799634e-05, "loss": 0.595, "step": 1878 }, { "epoch": 0.25780338890032245, "grad_norm": 1.3359375, "learning_rate": 1.967208875392087e-05, "loss": 0.5646, "step": 1879 }, { "epoch": 0.2579405913425259, "grad_norm": 1.2109375, "learning_rate": 1.967172203451077e-05, "loss": 0.5332, "step": 1880 }, { "epoch": 0.2580777937847294, "grad_norm": 1.2578125, "learning_rate": 1.9671355113576974e-05, "loss": 0.5455, "step": 1881 }, { "epoch": 0.25821499622693284, "grad_norm": 1.3515625, "learning_rate": 1.967098799112713e-05, "loss": 0.5403, "step": 1882 }, { "epoch": 0.25835219866913633, "grad_norm": 1.359375, "learning_rate": 1.9670620667168885e-05, "loss": 0.5996, "step": 1883 }, { "epoch": 0.2584894011113398, "grad_norm": 1.3359375, "learning_rate": 1.9670253141709895e-05, "loss": 0.5485, "step": 1884 }, { "epoch": 0.2586266035535433, "grad_norm": 1.4765625, "learning_rate": 1.9669885414757816e-05, "loss": 0.5825, "step": 1885 }, { "epoch": 0.2587638059957467, "grad_norm": 1.3203125, "learning_rate": 1.966951748632031e-05, "loss": 0.5666, "step": 1886 }, { "epoch": 0.2589010084379502, "grad_norm": 1.2109375, "learning_rate": 1.966914935640505e-05, "loss": 0.5309, "step": 1887 }, { "epoch": 0.25903821088015366, "grad_norm": 1.3359375, "learning_rate": 1.9668781025019695e-05, "loss": 0.5499, "step": 1888 }, { "epoch": 0.25917541332235716, "grad_norm": 1.3203125, "learning_rate": 1.9668412492171925e-05, "loss": 0.5673, "step": 1889 }, { "epoch": 0.2593126157645606, "grad_norm": 1.2421875, "learning_rate": 1.966804375786942e-05, "loss": 0.5125, "step": 1890 }, { "epoch": 0.2594498182067641, "grad_norm": 1.25, "learning_rate": 1.9667674822119865e-05, "loss": 0.4482, "step": 1891 }, { "epoch": 0.25958702064896755, "grad_norm": 1.25, "learning_rate": 1.9667305684930944e-05, "loss": 0.5369, "step": 1892 }, { "epoch": 0.25972422309117105, "grad_norm": 1.359375, "learning_rate": 1.9666936346310347e-05, "loss": 0.5447, "step": 1893 }, { "epoch": 0.2598614255333745, "grad_norm": 1.3125, "learning_rate": 1.9666566806265776e-05, "loss": 0.5689, "step": 1894 }, { "epoch": 0.259998627975578, "grad_norm": 1.1875, "learning_rate": 1.9666197064804922e-05, "loss": 0.5268, "step": 1895 }, { "epoch": 0.26013583041778143, "grad_norm": 1.1640625, "learning_rate": 1.9665827121935493e-05, "loss": 0.5245, "step": 1896 }, { "epoch": 0.26027303285998493, "grad_norm": 1.3671875, "learning_rate": 1.9665456977665204e-05, "loss": 0.5155, "step": 1897 }, { "epoch": 0.2604102353021884, "grad_norm": 1.65625, "learning_rate": 1.966508663200176e-05, "loss": 0.508, "step": 1898 }, { "epoch": 0.2605474377443919, "grad_norm": 1.65625, "learning_rate": 1.9664716084952877e-05, "loss": 0.6057, "step": 1899 }, { "epoch": 0.2606846401865953, "grad_norm": 1.171875, "learning_rate": 1.9664345336526275e-05, "loss": 0.4861, "step": 1900 }, { "epoch": 0.2608218426287988, "grad_norm": 1.359375, "learning_rate": 1.966397438672969e-05, "loss": 0.6342, "step": 1901 }, { "epoch": 0.26095904507100226, "grad_norm": 1.2421875, "learning_rate": 1.9663603235570838e-05, "loss": 0.5823, "step": 1902 }, { "epoch": 0.26109624751320576, "grad_norm": 1.2890625, "learning_rate": 1.9663231883057458e-05, "loss": 0.5432, "step": 1903 }, { "epoch": 0.2612334499554092, "grad_norm": 1.3515625, "learning_rate": 1.966286032919729e-05, "loss": 0.5145, "step": 1904 }, { "epoch": 0.2613706523976127, "grad_norm": 1.40625, "learning_rate": 1.966248857399807e-05, "loss": 0.6516, "step": 1905 }, { "epoch": 0.26150785483981615, "grad_norm": 1.2265625, "learning_rate": 1.966211661746755e-05, "loss": 0.5317, "step": 1906 }, { "epoch": 0.26164505728201964, "grad_norm": 1.265625, "learning_rate": 1.9661744459613476e-05, "loss": 0.5874, "step": 1907 }, { "epoch": 0.2617822597242231, "grad_norm": 1.1640625, "learning_rate": 1.9661372100443604e-05, "loss": 0.4803, "step": 1908 }, { "epoch": 0.2619194621664266, "grad_norm": 1.125, "learning_rate": 1.9660999539965693e-05, "loss": 0.443, "step": 1909 }, { "epoch": 0.26205666460863003, "grad_norm": 1.2421875, "learning_rate": 1.9660626778187506e-05, "loss": 0.5747, "step": 1910 }, { "epoch": 0.26219386705083353, "grad_norm": 1.3359375, "learning_rate": 1.966025381511681e-05, "loss": 0.6122, "step": 1911 }, { "epoch": 0.262331069493037, "grad_norm": 1.34375, "learning_rate": 1.9659880650761373e-05, "loss": 0.5914, "step": 1912 }, { "epoch": 0.26246827193524047, "grad_norm": 1.4921875, "learning_rate": 1.9659507285128976e-05, "loss": 0.6132, "step": 1913 }, { "epoch": 0.2626054743774439, "grad_norm": 1.28125, "learning_rate": 1.9659133718227395e-05, "loss": 0.5507, "step": 1914 }, { "epoch": 0.2627426768196474, "grad_norm": 1.2109375, "learning_rate": 1.9658759950064416e-05, "loss": 0.4729, "step": 1915 }, { "epoch": 0.26287987926185086, "grad_norm": 1.1796875, "learning_rate": 1.965838598064782e-05, "loss": 0.558, "step": 1916 }, { "epoch": 0.26301708170405436, "grad_norm": 1.390625, "learning_rate": 1.9658011809985407e-05, "loss": 0.6942, "step": 1917 }, { "epoch": 0.2631542841462578, "grad_norm": 1.3828125, "learning_rate": 1.9657637438084974e-05, "loss": 0.5886, "step": 1918 }, { "epoch": 0.2632914865884613, "grad_norm": 1.2578125, "learning_rate": 1.9657262864954316e-05, "loss": 0.5928, "step": 1919 }, { "epoch": 0.26342868903066474, "grad_norm": 1.15625, "learning_rate": 1.965688809060124e-05, "loss": 0.4932, "step": 1920 }, { "epoch": 0.26356589147286824, "grad_norm": 1.171875, "learning_rate": 1.9656513115033555e-05, "loss": 0.5165, "step": 1921 }, { "epoch": 0.2637030939150717, "grad_norm": 1.3203125, "learning_rate": 1.9656137938259074e-05, "loss": 0.5836, "step": 1922 }, { "epoch": 0.2638402963572752, "grad_norm": 1.390625, "learning_rate": 1.9655762560285615e-05, "loss": 0.6425, "step": 1923 }, { "epoch": 0.2639774987994786, "grad_norm": 1.234375, "learning_rate": 1.9655386981121e-05, "loss": 0.494, "step": 1924 }, { "epoch": 0.2641147012416821, "grad_norm": 1.2109375, "learning_rate": 1.9655011200773054e-05, "loss": 0.5332, "step": 1925 }, { "epoch": 0.26425190368388557, "grad_norm": 1.25, "learning_rate": 1.9654635219249605e-05, "loss": 0.5577, "step": 1926 }, { "epoch": 0.26438910612608907, "grad_norm": 1.109375, "learning_rate": 1.965425903655849e-05, "loss": 0.5301, "step": 1927 }, { "epoch": 0.2645263085682925, "grad_norm": 1.3125, "learning_rate": 1.965388265270755e-05, "loss": 0.5821, "step": 1928 }, { "epoch": 0.264663511010496, "grad_norm": 1.390625, "learning_rate": 1.9653506067704615e-05, "loss": 0.636, "step": 1929 }, { "epoch": 0.26480071345269945, "grad_norm": 1.28125, "learning_rate": 1.9653129281557546e-05, "loss": 0.5557, "step": 1930 }, { "epoch": 0.26493791589490295, "grad_norm": 1.328125, "learning_rate": 1.9652752294274185e-05, "loss": 0.6352, "step": 1931 }, { "epoch": 0.2650751183371064, "grad_norm": 1.234375, "learning_rate": 1.9652375105862395e-05, "loss": 0.5811, "step": 1932 }, { "epoch": 0.2652123207793099, "grad_norm": 1.2109375, "learning_rate": 1.9651997716330027e-05, "loss": 0.4756, "step": 1933 }, { "epoch": 0.26534952322151334, "grad_norm": 1.3046875, "learning_rate": 1.9651620125684946e-05, "loss": 0.5397, "step": 1934 }, { "epoch": 0.26548672566371684, "grad_norm": 1.2578125, "learning_rate": 1.9651242333935025e-05, "loss": 0.5847, "step": 1935 }, { "epoch": 0.2656239281059203, "grad_norm": 1.234375, "learning_rate": 1.9650864341088133e-05, "loss": 0.5962, "step": 1936 }, { "epoch": 0.2657611305481238, "grad_norm": 1.265625, "learning_rate": 1.9650486147152143e-05, "loss": 0.5845, "step": 1937 }, { "epoch": 0.2658983329903272, "grad_norm": 1.3125, "learning_rate": 1.9650107752134936e-05, "loss": 0.5369, "step": 1938 }, { "epoch": 0.2660355354325307, "grad_norm": 1.28125, "learning_rate": 1.9649729156044403e-05, "loss": 0.5424, "step": 1939 }, { "epoch": 0.26617273787473417, "grad_norm": 1.2890625, "learning_rate": 1.9649350358888424e-05, "loss": 0.5511, "step": 1940 }, { "epoch": 0.26630994031693767, "grad_norm": 1.15625, "learning_rate": 1.9648971360674894e-05, "loss": 0.5345, "step": 1941 }, { "epoch": 0.2664471427591411, "grad_norm": 1.2421875, "learning_rate": 1.964859216141171e-05, "loss": 0.4779, "step": 1942 }, { "epoch": 0.2665843452013446, "grad_norm": 1.2578125, "learning_rate": 1.964821276110678e-05, "loss": 0.5443, "step": 1943 }, { "epoch": 0.26672154764354805, "grad_norm": 1.1171875, "learning_rate": 1.9647833159768e-05, "loss": 0.5158, "step": 1944 }, { "epoch": 0.26685875008575155, "grad_norm": 1.3359375, "learning_rate": 1.9647453357403285e-05, "loss": 0.5539, "step": 1945 }, { "epoch": 0.266995952527955, "grad_norm": 1.1875, "learning_rate": 1.9647073354020544e-05, "loss": 0.5113, "step": 1946 }, { "epoch": 0.2671331549701585, "grad_norm": 1.296875, "learning_rate": 1.96466931496277e-05, "loss": 0.5485, "step": 1947 }, { "epoch": 0.26727035741236194, "grad_norm": 1.3828125, "learning_rate": 1.9646312744232673e-05, "loss": 0.5377, "step": 1948 }, { "epoch": 0.26740755985456544, "grad_norm": 1.25, "learning_rate": 1.9645932137843386e-05, "loss": 0.5148, "step": 1949 }, { "epoch": 0.2675447622967689, "grad_norm": 1.28125, "learning_rate": 1.9645551330467776e-05, "loss": 0.4488, "step": 1950 }, { "epoch": 0.2676819647389724, "grad_norm": 1.3671875, "learning_rate": 1.9645170322113773e-05, "loss": 0.6369, "step": 1951 }, { "epoch": 0.2678191671811758, "grad_norm": 1.1953125, "learning_rate": 1.9644789112789317e-05, "loss": 0.5148, "step": 1952 }, { "epoch": 0.2679563696233793, "grad_norm": 1.2421875, "learning_rate": 1.964440770250235e-05, "loss": 0.5563, "step": 1953 }, { "epoch": 0.26809357206558276, "grad_norm": 1.21875, "learning_rate": 1.9644026091260825e-05, "loss": 0.5397, "step": 1954 }, { "epoch": 0.26823077450778626, "grad_norm": 1.25, "learning_rate": 1.9643644279072684e-05, "loss": 0.5676, "step": 1955 }, { "epoch": 0.2683679769499897, "grad_norm": 1.296875, "learning_rate": 1.9643262265945887e-05, "loss": 0.577, "step": 1956 }, { "epoch": 0.2685051793921932, "grad_norm": 1.28125, "learning_rate": 1.9642880051888395e-05, "loss": 0.5109, "step": 1957 }, { "epoch": 0.26864238183439665, "grad_norm": 1.3515625, "learning_rate": 1.964249763690817e-05, "loss": 0.573, "step": 1958 }, { "epoch": 0.26877958427660015, "grad_norm": 1.2265625, "learning_rate": 1.9642115021013187e-05, "loss": 0.5273, "step": 1959 }, { "epoch": 0.2689167867188036, "grad_norm": 1.3359375, "learning_rate": 1.9641732204211407e-05, "loss": 0.581, "step": 1960 }, { "epoch": 0.2690539891610071, "grad_norm": 1.21875, "learning_rate": 1.9641349186510812e-05, "loss": 0.4805, "step": 1961 }, { "epoch": 0.26919119160321053, "grad_norm": 1.3046875, "learning_rate": 1.9640965967919384e-05, "loss": 0.5895, "step": 1962 }, { "epoch": 0.26932839404541403, "grad_norm": 1.265625, "learning_rate": 1.9640582548445106e-05, "loss": 0.554, "step": 1963 }, { "epoch": 0.2694655964876175, "grad_norm": 1.203125, "learning_rate": 1.964019892809597e-05, "loss": 0.5004, "step": 1964 }, { "epoch": 0.269602798929821, "grad_norm": 1.1171875, "learning_rate": 1.963981510687996e-05, "loss": 0.4031, "step": 1965 }, { "epoch": 0.2697400013720244, "grad_norm": 1.3203125, "learning_rate": 1.9639431084805086e-05, "loss": 0.5127, "step": 1966 }, { "epoch": 0.2698772038142279, "grad_norm": 1.234375, "learning_rate": 1.9639046861879346e-05, "loss": 0.5588, "step": 1967 }, { "epoch": 0.27001440625643136, "grad_norm": 1.3359375, "learning_rate": 1.963866243811074e-05, "loss": 0.6375, "step": 1968 }, { "epoch": 0.27015160869863486, "grad_norm": 1.265625, "learning_rate": 1.9638277813507283e-05, "loss": 0.4959, "step": 1969 }, { "epoch": 0.2702888111408383, "grad_norm": 1.3828125, "learning_rate": 1.9637892988076986e-05, "loss": 0.5302, "step": 1970 }, { "epoch": 0.2704260135830418, "grad_norm": 1.296875, "learning_rate": 1.9637507961827872e-05, "loss": 0.514, "step": 1971 }, { "epoch": 0.27056321602524525, "grad_norm": 1.3046875, "learning_rate": 1.963712273476796e-05, "loss": 0.5801, "step": 1972 }, { "epoch": 0.27070041846744874, "grad_norm": 1.265625, "learning_rate": 1.9636737306905277e-05, "loss": 0.5489, "step": 1973 }, { "epoch": 0.2708376209096522, "grad_norm": 1.25, "learning_rate": 1.963635167824786e-05, "loss": 0.5545, "step": 1974 }, { "epoch": 0.2709748233518557, "grad_norm": 1.2578125, "learning_rate": 1.963596584880373e-05, "loss": 0.5676, "step": 1975 }, { "epoch": 0.27111202579405913, "grad_norm": 1.1484375, "learning_rate": 1.963557981858094e-05, "loss": 0.532, "step": 1976 }, { "epoch": 0.27124922823626263, "grad_norm": 1.1640625, "learning_rate": 1.963519358758753e-05, "loss": 0.5259, "step": 1977 }, { "epoch": 0.2713864306784661, "grad_norm": 1.2890625, "learning_rate": 1.963480715583154e-05, "loss": 0.5384, "step": 1978 }, { "epoch": 0.27152363312066957, "grad_norm": 1.1328125, "learning_rate": 1.9634420523321035e-05, "loss": 0.5228, "step": 1979 }, { "epoch": 0.271660835562873, "grad_norm": 1.2578125, "learning_rate": 1.963403369006406e-05, "loss": 0.4976, "step": 1980 }, { "epoch": 0.2717980380050765, "grad_norm": 1.359375, "learning_rate": 1.963364665606868e-05, "loss": 0.6626, "step": 1981 }, { "epoch": 0.27193524044727996, "grad_norm": 1.2109375, "learning_rate": 1.9633259421342955e-05, "loss": 0.548, "step": 1982 }, { "epoch": 0.27207244288948346, "grad_norm": 1.234375, "learning_rate": 1.963287198589496e-05, "loss": 0.4949, "step": 1983 }, { "epoch": 0.2722096453316869, "grad_norm": 1.2421875, "learning_rate": 1.9632484349732767e-05, "loss": 0.5594, "step": 1984 }, { "epoch": 0.2723468477738904, "grad_norm": 1.15625, "learning_rate": 1.963209651286445e-05, "loss": 0.4858, "step": 1985 }, { "epoch": 0.27248405021609384, "grad_norm": 1.3125, "learning_rate": 1.9631708475298087e-05, "loss": 0.5773, "step": 1986 }, { "epoch": 0.27262125265829734, "grad_norm": 1.4765625, "learning_rate": 1.963132023704177e-05, "loss": 0.6363, "step": 1987 }, { "epoch": 0.2727584551005008, "grad_norm": 1.125, "learning_rate": 1.9630931798103588e-05, "loss": 0.4363, "step": 1988 }, { "epoch": 0.2728956575427043, "grad_norm": 1.140625, "learning_rate": 1.963054315849163e-05, "loss": 0.4881, "step": 1989 }, { "epoch": 0.2730328599849077, "grad_norm": 1.2265625, "learning_rate": 1.9630154318213996e-05, "loss": 0.5512, "step": 1990 }, { "epoch": 0.2731700624271112, "grad_norm": 1.28125, "learning_rate": 1.9629765277278787e-05, "loss": 0.5568, "step": 1991 }, { "epoch": 0.27330726486931467, "grad_norm": 1.328125, "learning_rate": 1.962937603569411e-05, "loss": 0.6186, "step": 1992 }, { "epoch": 0.27344446731151817, "grad_norm": 1.1953125, "learning_rate": 1.962898659346808e-05, "loss": 0.4996, "step": 1993 }, { "epoch": 0.2735816697537216, "grad_norm": 1.25, "learning_rate": 1.9628596950608806e-05, "loss": 0.5979, "step": 1994 }, { "epoch": 0.2737188721959251, "grad_norm": 1.2265625, "learning_rate": 1.9628207107124408e-05, "loss": 0.4953, "step": 1995 }, { "epoch": 0.27385607463812855, "grad_norm": 1.2265625, "learning_rate": 1.962781706302301e-05, "loss": 0.5267, "step": 1996 }, { "epoch": 0.27399327708033205, "grad_norm": 1.1953125, "learning_rate": 1.9627426818312736e-05, "loss": 0.545, "step": 1997 }, { "epoch": 0.2741304795225355, "grad_norm": 1.25, "learning_rate": 1.962703637300172e-05, "loss": 0.5833, "step": 1998 }, { "epoch": 0.274267681964739, "grad_norm": 1.1484375, "learning_rate": 1.9626645727098096e-05, "loss": 0.5003, "step": 1999 }, { "epoch": 0.27440488440694244, "grad_norm": 1.1796875, "learning_rate": 1.9626254880610007e-05, "loss": 0.5334, "step": 2000 }, { "epoch": 0.27454208684914594, "grad_norm": 1.1484375, "learning_rate": 1.9625863833545594e-05, "loss": 0.47, "step": 2001 }, { "epoch": 0.2746792892913494, "grad_norm": 1.2109375, "learning_rate": 1.9625472585913002e-05, "loss": 0.5621, "step": 2002 }, { "epoch": 0.2748164917335529, "grad_norm": 1.234375, "learning_rate": 1.9625081137720392e-05, "loss": 0.5497, "step": 2003 }, { "epoch": 0.2749536941757563, "grad_norm": 1.3046875, "learning_rate": 1.9624689488975914e-05, "loss": 0.5902, "step": 2004 }, { "epoch": 0.2750908966179598, "grad_norm": 1.2421875, "learning_rate": 1.9624297639687725e-05, "loss": 0.5841, "step": 2005 }, { "epoch": 0.27522809906016327, "grad_norm": 1.265625, "learning_rate": 1.9623905589863997e-05, "loss": 0.5902, "step": 2006 }, { "epoch": 0.27536530150236677, "grad_norm": 1.3671875, "learning_rate": 1.9623513339512894e-05, "loss": 0.5345, "step": 2007 }, { "epoch": 0.2755025039445702, "grad_norm": 1.3125, "learning_rate": 1.9623120888642594e-05, "loss": 0.5955, "step": 2008 }, { "epoch": 0.2756397063867737, "grad_norm": 1.328125, "learning_rate": 1.962272823726127e-05, "loss": 0.574, "step": 2009 }, { "epoch": 0.27577690882897715, "grad_norm": 1.2265625, "learning_rate": 1.9622335385377107e-05, "loss": 0.5282, "step": 2010 }, { "epoch": 0.27591411127118065, "grad_norm": 1.2578125, "learning_rate": 1.962194233299829e-05, "loss": 0.5284, "step": 2011 }, { "epoch": 0.2760513137133841, "grad_norm": 1.2265625, "learning_rate": 1.9621549080133002e-05, "loss": 0.5334, "step": 2012 }, { "epoch": 0.2761885161555876, "grad_norm": 1.34375, "learning_rate": 1.9621155626789444e-05, "loss": 0.5284, "step": 2013 }, { "epoch": 0.27632571859779104, "grad_norm": 1.296875, "learning_rate": 1.9620761972975813e-05, "loss": 0.5822, "step": 2014 }, { "epoch": 0.27646292103999454, "grad_norm": 1.1953125, "learning_rate": 1.9620368118700312e-05, "loss": 0.5156, "step": 2015 }, { "epoch": 0.276600123482198, "grad_norm": 1.296875, "learning_rate": 1.9619974063971144e-05, "loss": 0.6374, "step": 2016 }, { "epoch": 0.2767373259244015, "grad_norm": 1.3203125, "learning_rate": 1.961957980879652e-05, "loss": 0.5997, "step": 2017 }, { "epoch": 0.2768745283666049, "grad_norm": 1.2734375, "learning_rate": 1.9619185353184664e-05, "loss": 0.5649, "step": 2018 }, { "epoch": 0.2770117308088084, "grad_norm": 1.25, "learning_rate": 1.961879069714378e-05, "loss": 0.5623, "step": 2019 }, { "epoch": 0.27714893325101186, "grad_norm": 1.2890625, "learning_rate": 1.9618395840682106e-05, "loss": 0.5702, "step": 2020 }, { "epoch": 0.27728613569321536, "grad_norm": 1.2265625, "learning_rate": 1.9618000783807858e-05, "loss": 0.5498, "step": 2021 }, { "epoch": 0.2774233381354188, "grad_norm": 1.2109375, "learning_rate": 1.9617605526529274e-05, "loss": 0.5035, "step": 2022 }, { "epoch": 0.2775605405776223, "grad_norm": 1.203125, "learning_rate": 1.9617210068854583e-05, "loss": 0.5531, "step": 2023 }, { "epoch": 0.27769774301982575, "grad_norm": 1.1875, "learning_rate": 1.9616814410792036e-05, "loss": 0.4568, "step": 2024 }, { "epoch": 0.27783494546202925, "grad_norm": 1.1796875, "learning_rate": 1.9616418552349866e-05, "loss": 0.5142, "step": 2025 }, { "epoch": 0.2779721479042327, "grad_norm": 1.21875, "learning_rate": 1.9616022493536327e-05, "loss": 0.5348, "step": 2026 }, { "epoch": 0.2781093503464362, "grad_norm": 1.2734375, "learning_rate": 1.961562623435967e-05, "loss": 0.5602, "step": 2027 }, { "epoch": 0.27824655278863963, "grad_norm": 1.3125, "learning_rate": 1.9615229774828156e-05, "loss": 0.5485, "step": 2028 }, { "epoch": 0.27838375523084313, "grad_norm": 1.296875, "learning_rate": 1.961483311495004e-05, "loss": 0.5979, "step": 2029 }, { "epoch": 0.2785209576730466, "grad_norm": 1.1640625, "learning_rate": 1.9614436254733587e-05, "loss": 0.5199, "step": 2030 }, { "epoch": 0.2786581601152501, "grad_norm": 1.3515625, "learning_rate": 1.9614039194187066e-05, "loss": 0.5918, "step": 2031 }, { "epoch": 0.2787953625574535, "grad_norm": 1.296875, "learning_rate": 1.9613641933318757e-05, "loss": 0.6394, "step": 2032 }, { "epoch": 0.278932564999657, "grad_norm": 1.3984375, "learning_rate": 1.9613244472136928e-05, "loss": 0.6398, "step": 2033 }, { "epoch": 0.27906976744186046, "grad_norm": 1.125, "learning_rate": 1.961284681064987e-05, "loss": 0.5259, "step": 2034 }, { "epoch": 0.27920696988406396, "grad_norm": 1.3359375, "learning_rate": 1.9612448948865857e-05, "loss": 0.5979, "step": 2035 }, { "epoch": 0.2793441723262674, "grad_norm": 1.4765625, "learning_rate": 1.9612050886793194e-05, "loss": 0.5952, "step": 2036 }, { "epoch": 0.2794813747684709, "grad_norm": 1.3515625, "learning_rate": 1.9611652624440162e-05, "loss": 0.5915, "step": 2037 }, { "epoch": 0.27961857721067435, "grad_norm": 1.2265625, "learning_rate": 1.9611254161815064e-05, "loss": 0.5225, "step": 2038 }, { "epoch": 0.27975577965287785, "grad_norm": 1.2109375, "learning_rate": 1.9610855498926207e-05, "loss": 0.5638, "step": 2039 }, { "epoch": 0.2798929820950813, "grad_norm": 1.171875, "learning_rate": 1.961045663578189e-05, "loss": 0.4823, "step": 2040 }, { "epoch": 0.2800301845372848, "grad_norm": 1.34375, "learning_rate": 1.9610057572390432e-05, "loss": 0.5229, "step": 2041 }, { "epoch": 0.28016738697948823, "grad_norm": 1.28125, "learning_rate": 1.9609658308760143e-05, "loss": 0.5351, "step": 2042 }, { "epoch": 0.28030458942169173, "grad_norm": 1.2421875, "learning_rate": 1.960925884489934e-05, "loss": 0.5699, "step": 2043 }, { "epoch": 0.2804417918638952, "grad_norm": 1.28125, "learning_rate": 1.960885918081635e-05, "loss": 0.5114, "step": 2044 }, { "epoch": 0.2805789943060987, "grad_norm": 1.203125, "learning_rate": 1.96084593165195e-05, "loss": 0.5684, "step": 2045 }, { "epoch": 0.2807161967483021, "grad_norm": 1.4140625, "learning_rate": 1.960805925201712e-05, "loss": 0.6651, "step": 2046 }, { "epoch": 0.2808533991905056, "grad_norm": 1.34375, "learning_rate": 1.960765898731755e-05, "loss": 0.6148, "step": 2047 }, { "epoch": 0.28099060163270906, "grad_norm": 1.2578125, "learning_rate": 1.960725852242913e-05, "loss": 0.5824, "step": 2048 }, { "epoch": 0.28112780407491256, "grad_norm": 1.2421875, "learning_rate": 1.9606857857360196e-05, "loss": 0.5078, "step": 2049 }, { "epoch": 0.281265006517116, "grad_norm": 1.28125, "learning_rate": 1.9606456992119106e-05, "loss": 0.5854, "step": 2050 }, { "epoch": 0.2814022089593195, "grad_norm": 1.2578125, "learning_rate": 1.960605592671421e-05, "loss": 0.5791, "step": 2051 }, { "epoch": 0.28153941140152294, "grad_norm": 1.28125, "learning_rate": 1.9605654661153863e-05, "loss": 0.5017, "step": 2052 }, { "epoch": 0.28167661384372644, "grad_norm": 1.3125, "learning_rate": 1.9605253195446424e-05, "loss": 0.5598, "step": 2053 }, { "epoch": 0.2818138162859299, "grad_norm": 1.203125, "learning_rate": 1.960485152960026e-05, "loss": 0.4778, "step": 2054 }, { "epoch": 0.2819510187281334, "grad_norm": 1.265625, "learning_rate": 1.960444966362374e-05, "loss": 0.4895, "step": 2055 }, { "epoch": 0.28208822117033683, "grad_norm": 1.1796875, "learning_rate": 1.9604047597525243e-05, "loss": 0.5225, "step": 2056 }, { "epoch": 0.2822254236125403, "grad_norm": 1.203125, "learning_rate": 1.960364533131314e-05, "loss": 0.4855, "step": 2057 }, { "epoch": 0.28236262605474377, "grad_norm": 1.3359375, "learning_rate": 1.9603242864995813e-05, "loss": 0.5378, "step": 2058 }, { "epoch": 0.28249982849694727, "grad_norm": 1.296875, "learning_rate": 1.960284019858165e-05, "loss": 0.5472, "step": 2059 }, { "epoch": 0.2826370309391507, "grad_norm": 1.3046875, "learning_rate": 1.9602437332079042e-05, "loss": 0.5396, "step": 2060 }, { "epoch": 0.2827742333813542, "grad_norm": 1.1796875, "learning_rate": 1.960203426549638e-05, "loss": 0.512, "step": 2061 }, { "epoch": 0.28291143582355766, "grad_norm": 1.3671875, "learning_rate": 1.9601630998842066e-05, "loss": 0.5872, "step": 2062 }, { "epoch": 0.28304863826576115, "grad_norm": 1.2578125, "learning_rate": 1.96012275321245e-05, "loss": 0.575, "step": 2063 }, { "epoch": 0.2831858407079646, "grad_norm": 1.265625, "learning_rate": 1.960082386535209e-05, "loss": 0.5294, "step": 2064 }, { "epoch": 0.2833230431501681, "grad_norm": 1.2890625, "learning_rate": 1.9600419998533248e-05, "loss": 0.6142, "step": 2065 }, { "epoch": 0.28346024559237154, "grad_norm": 1.21875, "learning_rate": 1.9600015931676387e-05, "loss": 0.5961, "step": 2066 }, { "epoch": 0.28359744803457504, "grad_norm": 1.9140625, "learning_rate": 1.9599611664789927e-05, "loss": 0.5356, "step": 2067 }, { "epoch": 0.2837346504767785, "grad_norm": 1.2578125, "learning_rate": 1.959920719788229e-05, "loss": 0.544, "step": 2068 }, { "epoch": 0.283871852918982, "grad_norm": 1.1875, "learning_rate": 1.959880253096191e-05, "loss": 0.5261, "step": 2069 }, { "epoch": 0.2840090553611854, "grad_norm": 1.28125, "learning_rate": 1.9598397664037212e-05, "loss": 0.6126, "step": 2070 }, { "epoch": 0.2841462578033889, "grad_norm": 1.2265625, "learning_rate": 1.9597992597116634e-05, "loss": 0.5187, "step": 2071 }, { "epoch": 0.28428346024559237, "grad_norm": 1.3046875, "learning_rate": 1.9597587330208613e-05, "loss": 0.5084, "step": 2072 }, { "epoch": 0.28442066268779587, "grad_norm": 1.28125, "learning_rate": 1.9597181863321598e-05, "loss": 0.5754, "step": 2073 }, { "epoch": 0.2845578651299993, "grad_norm": 1.2265625, "learning_rate": 1.9596776196464037e-05, "loss": 0.5224, "step": 2074 }, { "epoch": 0.2846950675722028, "grad_norm": 1.21875, "learning_rate": 1.959637032964438e-05, "loss": 0.5319, "step": 2075 }, { "epoch": 0.28483227001440625, "grad_norm": 1.1015625, "learning_rate": 1.9595964262871086e-05, "loss": 0.4772, "step": 2076 }, { "epoch": 0.28496947245660975, "grad_norm": 1.390625, "learning_rate": 1.9595557996152618e-05, "loss": 0.6138, "step": 2077 }, { "epoch": 0.2851066748988132, "grad_norm": 1.265625, "learning_rate": 1.9595151529497437e-05, "loss": 0.5822, "step": 2078 }, { "epoch": 0.2852438773410167, "grad_norm": 1.234375, "learning_rate": 1.9594744862914013e-05, "loss": 0.5323, "step": 2079 }, { "epoch": 0.28538107978322014, "grad_norm": 1.296875, "learning_rate": 1.959433799641082e-05, "loss": 0.589, "step": 2080 }, { "epoch": 0.28551828222542364, "grad_norm": 1.453125, "learning_rate": 1.9593930929996336e-05, "loss": 0.6378, "step": 2081 }, { "epoch": 0.2856554846676271, "grad_norm": 1.34375, "learning_rate": 1.9593523663679043e-05, "loss": 0.6002, "step": 2082 }, { "epoch": 0.2857926871098306, "grad_norm": 1.1875, "learning_rate": 1.9593116197467427e-05, "loss": 0.4972, "step": 2083 }, { "epoch": 0.285929889552034, "grad_norm": 1.2578125, "learning_rate": 1.9592708531369977e-05, "loss": 0.5394, "step": 2084 }, { "epoch": 0.2860670919942375, "grad_norm": 1.25, "learning_rate": 1.9592300665395187e-05, "loss": 0.5572, "step": 2085 }, { "epoch": 0.28620429443644096, "grad_norm": 1.1640625, "learning_rate": 1.9591892599551556e-05, "loss": 0.5418, "step": 2086 }, { "epoch": 0.28634149687864446, "grad_norm": 1.2734375, "learning_rate": 1.9591484333847587e-05, "loss": 0.5795, "step": 2087 }, { "epoch": 0.2864786993208479, "grad_norm": 1.2109375, "learning_rate": 1.9591075868291788e-05, "loss": 0.552, "step": 2088 }, { "epoch": 0.2866159017630514, "grad_norm": 1.171875, "learning_rate": 1.959066720289267e-05, "loss": 0.5146, "step": 2089 }, { "epoch": 0.28675310420525485, "grad_norm": 1.3203125, "learning_rate": 1.9590258337658747e-05, "loss": 0.5618, "step": 2090 }, { "epoch": 0.28689030664745835, "grad_norm": 1.203125, "learning_rate": 1.9589849272598537e-05, "loss": 0.5016, "step": 2091 }, { "epoch": 0.2870275090896618, "grad_norm": 1.3203125, "learning_rate": 1.958944000772056e-05, "loss": 0.5669, "step": 2092 }, { "epoch": 0.2871647115318653, "grad_norm": 1.203125, "learning_rate": 1.9589030543033355e-05, "loss": 0.5407, "step": 2093 }, { "epoch": 0.28730191397406873, "grad_norm": 1.2265625, "learning_rate": 1.9588620878545445e-05, "loss": 0.5422, "step": 2094 }, { "epoch": 0.28743911641627223, "grad_norm": 1.21875, "learning_rate": 1.9588211014265366e-05, "loss": 0.5734, "step": 2095 }, { "epoch": 0.2875763188584757, "grad_norm": 1.4765625, "learning_rate": 1.958780095020166e-05, "loss": 0.6731, "step": 2096 }, { "epoch": 0.2877135213006792, "grad_norm": 1.2578125, "learning_rate": 1.9587390686362873e-05, "loss": 0.6034, "step": 2097 }, { "epoch": 0.2878507237428826, "grad_norm": 1.2890625, "learning_rate": 1.9586980222757552e-05, "loss": 0.6032, "step": 2098 }, { "epoch": 0.2879879261850861, "grad_norm": 1.2265625, "learning_rate": 1.9586569559394248e-05, "loss": 0.5696, "step": 2099 }, { "epoch": 0.28812512862728956, "grad_norm": 1.3046875, "learning_rate": 1.9586158696281517e-05, "loss": 0.6441, "step": 2100 }, { "epoch": 0.28826233106949306, "grad_norm": 1.1953125, "learning_rate": 1.9585747633427928e-05, "loss": 0.5041, "step": 2101 }, { "epoch": 0.2883995335116965, "grad_norm": 1.3515625, "learning_rate": 1.9585336370842035e-05, "loss": 0.6588, "step": 2102 }, { "epoch": 0.2885367359539, "grad_norm": 1.3203125, "learning_rate": 1.9584924908532413e-05, "loss": 0.5919, "step": 2103 }, { "epoch": 0.28867393839610345, "grad_norm": 1.1875, "learning_rate": 1.9584513246507636e-05, "loss": 0.5429, "step": 2104 }, { "epoch": 0.28881114083830695, "grad_norm": 1.2578125, "learning_rate": 1.9584101384776274e-05, "loss": 0.5673, "step": 2105 }, { "epoch": 0.2889483432805104, "grad_norm": 1.1640625, "learning_rate": 1.9583689323346923e-05, "loss": 0.5088, "step": 2106 }, { "epoch": 0.2890855457227139, "grad_norm": 1.171875, "learning_rate": 1.9583277062228156e-05, "loss": 0.4901, "step": 2107 }, { "epoch": 0.28922274816491733, "grad_norm": 1.25, "learning_rate": 1.9582864601428568e-05, "loss": 0.592, "step": 2108 }, { "epoch": 0.28935995060712083, "grad_norm": 1.3046875, "learning_rate": 1.958245194095675e-05, "loss": 0.5359, "step": 2109 }, { "epoch": 0.2894971530493243, "grad_norm": 1.203125, "learning_rate": 1.9582039080821312e-05, "loss": 0.5382, "step": 2110 }, { "epoch": 0.2896343554915278, "grad_norm": 1.1796875, "learning_rate": 1.958162602103084e-05, "loss": 0.5217, "step": 2111 }, { "epoch": 0.2897715579337312, "grad_norm": 1.265625, "learning_rate": 1.958121276159395e-05, "loss": 0.6059, "step": 2112 }, { "epoch": 0.2899087603759347, "grad_norm": 1.1171875, "learning_rate": 1.958079930251925e-05, "loss": 0.474, "step": 2113 }, { "epoch": 0.29004596281813816, "grad_norm": 1.296875, "learning_rate": 1.958038564381536e-05, "loss": 0.5772, "step": 2114 }, { "epoch": 0.29018316526034166, "grad_norm": 1.25, "learning_rate": 1.957997178549089e-05, "loss": 0.4678, "step": 2115 }, { "epoch": 0.2903203677025451, "grad_norm": 1.21875, "learning_rate": 1.9579557727554475e-05, "loss": 0.5311, "step": 2116 }, { "epoch": 0.2904575701447486, "grad_norm": 1.2890625, "learning_rate": 1.9579143470014735e-05, "loss": 0.5779, "step": 2117 }, { "epoch": 0.29059477258695204, "grad_norm": 1.296875, "learning_rate": 1.95787290128803e-05, "loss": 0.5157, "step": 2118 }, { "epoch": 0.29073197502915554, "grad_norm": 1.25, "learning_rate": 1.9578314356159807e-05, "loss": 0.5045, "step": 2119 }, { "epoch": 0.290869177471359, "grad_norm": 1.203125, "learning_rate": 1.9577899499861903e-05, "loss": 0.5473, "step": 2120 }, { "epoch": 0.2910063799135625, "grad_norm": 1.328125, "learning_rate": 1.9577484443995223e-05, "loss": 0.5839, "step": 2121 }, { "epoch": 0.29114358235576593, "grad_norm": 1.375, "learning_rate": 1.9577069188568417e-05, "loss": 0.5585, "step": 2122 }, { "epoch": 0.2912807847979694, "grad_norm": 1.265625, "learning_rate": 1.9576653733590146e-05, "loss": 0.5562, "step": 2123 }, { "epoch": 0.29141798724017287, "grad_norm": 1.2109375, "learning_rate": 1.9576238079069053e-05, "loss": 0.4913, "step": 2124 }, { "epoch": 0.29155518968237637, "grad_norm": 1.2265625, "learning_rate": 1.957582222501381e-05, "loss": 0.4979, "step": 2125 }, { "epoch": 0.2916923921245798, "grad_norm": 1.34375, "learning_rate": 1.9575406171433077e-05, "loss": 0.5129, "step": 2126 }, { "epoch": 0.2918295945667833, "grad_norm": 1.328125, "learning_rate": 1.957498991833552e-05, "loss": 0.5851, "step": 2127 }, { "epoch": 0.29196679700898676, "grad_norm": 1.1171875, "learning_rate": 1.9574573465729818e-05, "loss": 0.4268, "step": 2128 }, { "epoch": 0.29210399945119025, "grad_norm": 1.21875, "learning_rate": 1.9574156813624646e-05, "loss": 0.5446, "step": 2129 }, { "epoch": 0.2922412018933937, "grad_norm": 1.34375, "learning_rate": 1.9573739962028683e-05, "loss": 0.5912, "step": 2130 }, { "epoch": 0.2923784043355972, "grad_norm": 1.3046875, "learning_rate": 1.9573322910950624e-05, "loss": 0.5463, "step": 2131 }, { "epoch": 0.29251560677780064, "grad_norm": 1.375, "learning_rate": 1.9572905660399145e-05, "loss": 0.5698, "step": 2132 }, { "epoch": 0.29265280922000414, "grad_norm": 1.28125, "learning_rate": 1.957248821038295e-05, "loss": 0.5747, "step": 2133 }, { "epoch": 0.2927900116622076, "grad_norm": 1.2109375, "learning_rate": 1.9572070560910737e-05, "loss": 0.488, "step": 2134 }, { "epoch": 0.2929272141044111, "grad_norm": 1.03125, "learning_rate": 1.9571652711991202e-05, "loss": 0.3822, "step": 2135 }, { "epoch": 0.2930644165466145, "grad_norm": 1.1328125, "learning_rate": 1.9571234663633058e-05, "loss": 0.4869, "step": 2136 }, { "epoch": 0.293201618988818, "grad_norm": 1.21875, "learning_rate": 1.9570816415845008e-05, "loss": 0.5116, "step": 2137 }, { "epoch": 0.29333882143102147, "grad_norm": 1.21875, "learning_rate": 1.9570397968635775e-05, "loss": 0.5371, "step": 2138 }, { "epoch": 0.29347602387322497, "grad_norm": 1.2109375, "learning_rate": 1.9569979322014076e-05, "loss": 0.4964, "step": 2139 }, { "epoch": 0.2936132263154284, "grad_norm": 1.125, "learning_rate": 1.956956047598863e-05, "loss": 0.5044, "step": 2140 }, { "epoch": 0.2937504287576319, "grad_norm": 1.265625, "learning_rate": 1.9569141430568166e-05, "loss": 0.5682, "step": 2141 }, { "epoch": 0.29388763119983535, "grad_norm": 1.265625, "learning_rate": 1.9568722185761418e-05, "loss": 0.572, "step": 2142 }, { "epoch": 0.29402483364203885, "grad_norm": 1.390625, "learning_rate": 1.9568302741577117e-05, "loss": 0.579, "step": 2143 }, { "epoch": 0.2941620360842423, "grad_norm": 1.3046875, "learning_rate": 1.956788309802401e-05, "loss": 0.5263, "step": 2144 }, { "epoch": 0.2942992385264458, "grad_norm": 1.203125, "learning_rate": 1.956746325511083e-05, "loss": 0.5513, "step": 2145 }, { "epoch": 0.29443644096864924, "grad_norm": 1.1484375, "learning_rate": 1.9567043212846334e-05, "loss": 0.4708, "step": 2146 }, { "epoch": 0.29457364341085274, "grad_norm": 1.2421875, "learning_rate": 1.956662297123927e-05, "loss": 0.5634, "step": 2147 }, { "epoch": 0.2947108458530562, "grad_norm": 1.1328125, "learning_rate": 1.95662025302984e-05, "loss": 0.4839, "step": 2148 }, { "epoch": 0.2948480482952597, "grad_norm": 1.2109375, "learning_rate": 1.956578189003248e-05, "loss": 0.4868, "step": 2149 }, { "epoch": 0.2949852507374631, "grad_norm": 1.4765625, "learning_rate": 1.956536105045027e-05, "loss": 0.6556, "step": 2150 }, { "epoch": 0.2951224531796666, "grad_norm": 1.125, "learning_rate": 1.9564940011560545e-05, "loss": 0.4577, "step": 2151 }, { "epoch": 0.29525965562187007, "grad_norm": 1.3515625, "learning_rate": 1.9564518773372077e-05, "loss": 0.6043, "step": 2152 }, { "epoch": 0.29539685806407356, "grad_norm": 1.234375, "learning_rate": 1.9564097335893642e-05, "loss": 0.5083, "step": 2153 }, { "epoch": 0.295534060506277, "grad_norm": 1.2109375, "learning_rate": 1.956367569913402e-05, "loss": 0.5902, "step": 2154 }, { "epoch": 0.2956712629484805, "grad_norm": 1.2734375, "learning_rate": 1.9563253863102004e-05, "loss": 0.5294, "step": 2155 }, { "epoch": 0.29580846539068395, "grad_norm": 1.2265625, "learning_rate": 1.9562831827806373e-05, "loss": 0.5441, "step": 2156 }, { "epoch": 0.29594566783288745, "grad_norm": 1.296875, "learning_rate": 1.9562409593255926e-05, "loss": 0.5643, "step": 2157 }, { "epoch": 0.2960828702750909, "grad_norm": 1.3046875, "learning_rate": 1.9561987159459463e-05, "loss": 0.6039, "step": 2158 }, { "epoch": 0.2962200727172944, "grad_norm": 1.2734375, "learning_rate": 1.9561564526425777e-05, "loss": 0.6453, "step": 2159 }, { "epoch": 0.29635727515949783, "grad_norm": 1.234375, "learning_rate": 1.9561141694163687e-05, "loss": 0.5315, "step": 2160 }, { "epoch": 0.29649447760170133, "grad_norm": 1.296875, "learning_rate": 1.9560718662681993e-05, "loss": 0.5747, "step": 2161 }, { "epoch": 0.2966316800439048, "grad_norm": 1.1796875, "learning_rate": 1.9560295431989513e-05, "loss": 0.5205, "step": 2162 }, { "epoch": 0.2967688824861083, "grad_norm": 1.296875, "learning_rate": 1.9559872002095067e-05, "loss": 0.6125, "step": 2163 }, { "epoch": 0.2969060849283117, "grad_norm": 1.203125, "learning_rate": 1.9559448373007478e-05, "loss": 0.5281, "step": 2164 }, { "epoch": 0.2970432873705152, "grad_norm": 1.2890625, "learning_rate": 1.9559024544735568e-05, "loss": 0.6088, "step": 2165 }, { "epoch": 0.29718048981271866, "grad_norm": 1.2421875, "learning_rate": 1.9558600517288174e-05, "loss": 0.5571, "step": 2166 }, { "epoch": 0.29731769225492216, "grad_norm": 1.2109375, "learning_rate": 1.9558176290674128e-05, "loss": 0.5594, "step": 2167 }, { "epoch": 0.2974548946971256, "grad_norm": 1.1796875, "learning_rate": 1.9557751864902266e-05, "loss": 0.5285, "step": 2168 }, { "epoch": 0.2975920971393291, "grad_norm": 1.265625, "learning_rate": 1.955732723998144e-05, "loss": 0.6261, "step": 2169 }, { "epoch": 0.29772929958153255, "grad_norm": 1.1328125, "learning_rate": 1.9556902415920493e-05, "loss": 0.4698, "step": 2170 }, { "epoch": 0.29786650202373605, "grad_norm": 1.296875, "learning_rate": 1.9556477392728275e-05, "loss": 0.5964, "step": 2171 }, { "epoch": 0.2980037044659395, "grad_norm": 1.265625, "learning_rate": 1.9556052170413643e-05, "loss": 0.5451, "step": 2172 }, { "epoch": 0.298140906908143, "grad_norm": 1.3828125, "learning_rate": 1.9555626748985462e-05, "loss": 0.6266, "step": 2173 }, { "epoch": 0.29827810935034643, "grad_norm": 1.3515625, "learning_rate": 1.955520112845259e-05, "loss": 0.6087, "step": 2174 }, { "epoch": 0.29841531179254993, "grad_norm": 1.3203125, "learning_rate": 1.9554775308823896e-05, "loss": 0.5707, "step": 2175 }, { "epoch": 0.2985525142347534, "grad_norm": 1.15625, "learning_rate": 1.9554349290108253e-05, "loss": 0.4967, "step": 2176 }, { "epoch": 0.2986897166769569, "grad_norm": 1.3125, "learning_rate": 1.9553923072314542e-05, "loss": 0.628, "step": 2177 }, { "epoch": 0.2988269191191603, "grad_norm": 1.21875, "learning_rate": 1.955349665545164e-05, "loss": 0.4999, "step": 2178 }, { "epoch": 0.2989641215613638, "grad_norm": 1.3046875, "learning_rate": 1.955307003952843e-05, "loss": 0.5342, "step": 2179 }, { "epoch": 0.29910132400356726, "grad_norm": 1.1875, "learning_rate": 1.9552643224553805e-05, "loss": 0.4035, "step": 2180 }, { "epoch": 0.29923852644577076, "grad_norm": 1.3125, "learning_rate": 1.955221621053666e-05, "loss": 0.4752, "step": 2181 }, { "epoch": 0.2993757288879742, "grad_norm": 1.1875, "learning_rate": 1.955178899748589e-05, "loss": 0.539, "step": 2182 }, { "epoch": 0.2995129313301777, "grad_norm": 1.1875, "learning_rate": 1.955136158541039e-05, "loss": 0.5093, "step": 2183 }, { "epoch": 0.29965013377238114, "grad_norm": 1.2109375, "learning_rate": 1.9550933974319075e-05, "loss": 0.5346, "step": 2184 }, { "epoch": 0.29978733621458464, "grad_norm": 1.1953125, "learning_rate": 1.955050616422085e-05, "loss": 0.5168, "step": 2185 }, { "epoch": 0.2999245386567881, "grad_norm": 1.1796875, "learning_rate": 1.9550078155124632e-05, "loss": 0.5038, "step": 2186 }, { "epoch": 0.3000617410989916, "grad_norm": 1.171875, "learning_rate": 1.954964994703934e-05, "loss": 0.5225, "step": 2187 }, { "epoch": 0.30019894354119503, "grad_norm": 1.1484375, "learning_rate": 1.954922153997389e-05, "loss": 0.4709, "step": 2188 }, { "epoch": 0.30033614598339853, "grad_norm": 1.1796875, "learning_rate": 1.9548792933937216e-05, "loss": 0.5019, "step": 2189 }, { "epoch": 0.30047334842560197, "grad_norm": 1.2578125, "learning_rate": 1.9548364128938244e-05, "loss": 0.5816, "step": 2190 }, { "epoch": 0.30061055086780547, "grad_norm": 1.203125, "learning_rate": 1.9547935124985907e-05, "loss": 0.4969, "step": 2191 }, { "epoch": 0.3007477533100089, "grad_norm": 1.1796875, "learning_rate": 1.9547505922089152e-05, "loss": 0.5035, "step": 2192 }, { "epoch": 0.3008849557522124, "grad_norm": 1.1953125, "learning_rate": 1.9547076520256912e-05, "loss": 0.5351, "step": 2193 }, { "epoch": 0.30102215819441586, "grad_norm": 1.125, "learning_rate": 1.9546646919498143e-05, "loss": 0.4644, "step": 2194 }, { "epoch": 0.30115936063661936, "grad_norm": 1.2734375, "learning_rate": 1.9546217119821788e-05, "loss": 0.6454, "step": 2195 }, { "epoch": 0.3012965630788228, "grad_norm": 1.3125, "learning_rate": 1.954578712123681e-05, "loss": 0.6554, "step": 2196 }, { "epoch": 0.3014337655210263, "grad_norm": 1.1953125, "learning_rate": 1.9545356923752164e-05, "loss": 0.6034, "step": 2197 }, { "epoch": 0.30157096796322974, "grad_norm": 1.2578125, "learning_rate": 1.9544926527376815e-05, "loss": 0.5079, "step": 2198 }, { "epoch": 0.30170817040543324, "grad_norm": 1.203125, "learning_rate": 1.9544495932119733e-05, "loss": 0.5544, "step": 2199 }, { "epoch": 0.3018453728476367, "grad_norm": 1.1640625, "learning_rate": 1.9544065137989886e-05, "loss": 0.4623, "step": 2200 }, { "epoch": 0.3019825752898402, "grad_norm": 1.203125, "learning_rate": 1.9543634144996252e-05, "loss": 0.5629, "step": 2201 }, { "epoch": 0.3021197777320436, "grad_norm": 1.109375, "learning_rate": 1.9543202953147814e-05, "loss": 0.474, "step": 2202 }, { "epoch": 0.3022569801742471, "grad_norm": 1.1484375, "learning_rate": 1.9542771562453553e-05, "loss": 0.4739, "step": 2203 }, { "epoch": 0.30239418261645057, "grad_norm": 1.1953125, "learning_rate": 1.9542339972922458e-05, "loss": 0.5029, "step": 2204 }, { "epoch": 0.30253138505865407, "grad_norm": 1.2109375, "learning_rate": 1.9541908184563524e-05, "loss": 0.5199, "step": 2205 }, { "epoch": 0.3026685875008575, "grad_norm": 1.390625, "learning_rate": 1.954147619738574e-05, "loss": 0.5943, "step": 2206 }, { "epoch": 0.302805789943061, "grad_norm": 1.1640625, "learning_rate": 1.954104401139812e-05, "loss": 0.4935, "step": 2207 }, { "epoch": 0.30294299238526445, "grad_norm": 1.296875, "learning_rate": 1.9540611626609666e-05, "loss": 0.5108, "step": 2208 }, { "epoch": 0.30308019482746795, "grad_norm": 1.328125, "learning_rate": 1.9540179043029374e-05, "loss": 0.5683, "step": 2209 }, { "epoch": 0.3032173972696714, "grad_norm": 1.265625, "learning_rate": 1.9539746260666276e-05, "loss": 0.5029, "step": 2210 }, { "epoch": 0.3033545997118749, "grad_norm": 1.2265625, "learning_rate": 1.9539313279529377e-05, "loss": 0.5682, "step": 2211 }, { "epoch": 0.30349180215407834, "grad_norm": 1.203125, "learning_rate": 1.9538880099627702e-05, "loss": 0.5145, "step": 2212 }, { "epoch": 0.30362900459628184, "grad_norm": 1.125, "learning_rate": 1.9538446720970282e-05, "loss": 0.4384, "step": 2213 }, { "epoch": 0.3037662070384853, "grad_norm": 1.2734375, "learning_rate": 1.953801314356614e-05, "loss": 0.5809, "step": 2214 }, { "epoch": 0.3039034094806888, "grad_norm": 1.2734375, "learning_rate": 1.9537579367424312e-05, "loss": 0.5551, "step": 2215 }, { "epoch": 0.3040406119228922, "grad_norm": 1.25, "learning_rate": 1.9537145392553838e-05, "loss": 0.6141, "step": 2216 }, { "epoch": 0.3041778143650957, "grad_norm": 1.2109375, "learning_rate": 1.953671121896376e-05, "loss": 0.5351, "step": 2217 }, { "epoch": 0.30431501680729917, "grad_norm": 1.1796875, "learning_rate": 1.9536276846663125e-05, "loss": 0.5376, "step": 2218 }, { "epoch": 0.30445221924950266, "grad_norm": 1.2734375, "learning_rate": 1.9535842275660983e-05, "loss": 0.5498, "step": 2219 }, { "epoch": 0.3045894216917061, "grad_norm": 1.234375, "learning_rate": 1.9535407505966385e-05, "loss": 0.4999, "step": 2220 }, { "epoch": 0.3047266241339096, "grad_norm": 1.328125, "learning_rate": 1.9534972537588398e-05, "loss": 0.6124, "step": 2221 }, { "epoch": 0.30486382657611305, "grad_norm": 1.1328125, "learning_rate": 1.9534537370536083e-05, "loss": 0.4231, "step": 2222 }, { "epoch": 0.30500102901831655, "grad_norm": 1.2578125, "learning_rate": 1.95341020048185e-05, "loss": 0.5825, "step": 2223 }, { "epoch": 0.30513823146052, "grad_norm": 1.359375, "learning_rate": 1.953366644044473e-05, "loss": 0.5683, "step": 2224 }, { "epoch": 0.3052754339027235, "grad_norm": 1.296875, "learning_rate": 1.9533230677423846e-05, "loss": 0.5522, "step": 2225 }, { "epoch": 0.30541263634492694, "grad_norm": 1.1953125, "learning_rate": 1.9532794715764925e-05, "loss": 0.5224, "step": 2226 }, { "epoch": 0.30554983878713043, "grad_norm": 1.3203125, "learning_rate": 1.953235855547705e-05, "loss": 0.5552, "step": 2227 }, { "epoch": 0.3056870412293339, "grad_norm": 1.2421875, "learning_rate": 1.9531922196569312e-05, "loss": 0.5576, "step": 2228 }, { "epoch": 0.3058242436715374, "grad_norm": 1.3203125, "learning_rate": 1.9531485639050804e-05, "loss": 0.541, "step": 2229 }, { "epoch": 0.3059614461137408, "grad_norm": 1.328125, "learning_rate": 1.9531048882930618e-05, "loss": 0.5708, "step": 2230 }, { "epoch": 0.3060986485559443, "grad_norm": 1.1015625, "learning_rate": 1.9530611928217856e-05, "loss": 0.458, "step": 2231 }, { "epoch": 0.30623585099814776, "grad_norm": 1.1796875, "learning_rate": 1.9530174774921624e-05, "loss": 0.4747, "step": 2232 }, { "epoch": 0.30637305344035126, "grad_norm": 1.0390625, "learning_rate": 1.9529737423051034e-05, "loss": 0.4314, "step": 2233 }, { "epoch": 0.3065102558825547, "grad_norm": 1.3515625, "learning_rate": 1.952929987261519e-05, "loss": 0.571, "step": 2234 }, { "epoch": 0.3066474583247582, "grad_norm": 1.1953125, "learning_rate": 1.952886212362322e-05, "loss": 0.5343, "step": 2235 }, { "epoch": 0.30678466076696165, "grad_norm": 1.25, "learning_rate": 1.9528424176084235e-05, "loss": 0.5976, "step": 2236 }, { "epoch": 0.30692186320916515, "grad_norm": 1.2265625, "learning_rate": 1.9527986030007364e-05, "loss": 0.5562, "step": 2237 }, { "epoch": 0.3070590656513686, "grad_norm": 1.21875, "learning_rate": 1.9527547685401738e-05, "loss": 0.5016, "step": 2238 }, { "epoch": 0.3071962680935721, "grad_norm": 1.28125, "learning_rate": 1.952710914227649e-05, "loss": 0.6114, "step": 2239 }, { "epoch": 0.30733347053577553, "grad_norm": 1.21875, "learning_rate": 1.9526670400640755e-05, "loss": 0.5389, "step": 2240 }, { "epoch": 0.30747067297797903, "grad_norm": 1.140625, "learning_rate": 1.9526231460503676e-05, "loss": 0.5137, "step": 2241 }, { "epoch": 0.3076078754201825, "grad_norm": 1.2578125, "learning_rate": 1.95257923218744e-05, "loss": 0.5739, "step": 2242 }, { "epoch": 0.307745077862386, "grad_norm": 1.234375, "learning_rate": 1.952535298476208e-05, "loss": 0.5388, "step": 2243 }, { "epoch": 0.3078822803045894, "grad_norm": 1.2109375, "learning_rate": 1.9524913449175865e-05, "loss": 0.5479, "step": 2244 }, { "epoch": 0.3080194827467929, "grad_norm": 1.2265625, "learning_rate": 1.9524473715124915e-05, "loss": 0.5552, "step": 2245 }, { "epoch": 0.30815668518899636, "grad_norm": 1.2109375, "learning_rate": 1.9524033782618394e-05, "loss": 0.5361, "step": 2246 }, { "epoch": 0.30829388763119986, "grad_norm": 1.2578125, "learning_rate": 1.9523593651665463e-05, "loss": 0.576, "step": 2247 }, { "epoch": 0.3084310900734033, "grad_norm": 1.3671875, "learning_rate": 1.9523153322275302e-05, "loss": 0.5646, "step": 2248 }, { "epoch": 0.3085682925156068, "grad_norm": 1.21875, "learning_rate": 1.952271279445708e-05, "loss": 0.5126, "step": 2249 }, { "epoch": 0.30870549495781024, "grad_norm": 1.328125, "learning_rate": 1.952227206821997e-05, "loss": 0.5749, "step": 2250 }, { "epoch": 0.30884269740001374, "grad_norm": 1.15625, "learning_rate": 1.952183114357317e-05, "loss": 0.4517, "step": 2251 }, { "epoch": 0.3089798998422172, "grad_norm": 1.25, "learning_rate": 1.9521390020525856e-05, "loss": 0.5776, "step": 2252 }, { "epoch": 0.3091171022844207, "grad_norm": 1.390625, "learning_rate": 1.9520948699087224e-05, "loss": 0.5861, "step": 2253 }, { "epoch": 0.30925430472662413, "grad_norm": 1.3671875, "learning_rate": 1.9520507179266466e-05, "loss": 0.595, "step": 2254 }, { "epoch": 0.30939150716882763, "grad_norm": 1.1953125, "learning_rate": 1.9520065461072787e-05, "loss": 0.4916, "step": 2255 }, { "epoch": 0.30952870961103107, "grad_norm": 1.2734375, "learning_rate": 1.9519623544515386e-05, "loss": 0.5798, "step": 2256 }, { "epoch": 0.30966591205323457, "grad_norm": 1.1875, "learning_rate": 1.9519181429603472e-05, "loss": 0.4866, "step": 2257 }, { "epoch": 0.309803114495438, "grad_norm": 1.2421875, "learning_rate": 1.951873911634626e-05, "loss": 0.5825, "step": 2258 }, { "epoch": 0.3099403169376415, "grad_norm": 1.140625, "learning_rate": 1.951829660475296e-05, "loss": 0.4801, "step": 2259 }, { "epoch": 0.31007751937984496, "grad_norm": 1.234375, "learning_rate": 1.9517853894832802e-05, "loss": 0.585, "step": 2260 }, { "epoch": 0.31021472182204846, "grad_norm": 1.296875, "learning_rate": 1.9517410986595e-05, "loss": 0.529, "step": 2261 }, { "epoch": 0.3103519242642519, "grad_norm": 1.21875, "learning_rate": 1.951696788004879e-05, "loss": 0.5408, "step": 2262 }, { "epoch": 0.3104891267064554, "grad_norm": 1.2578125, "learning_rate": 1.9516524575203402e-05, "loss": 0.5258, "step": 2263 }, { "epoch": 0.31062632914865884, "grad_norm": 1.1875, "learning_rate": 1.951608107206807e-05, "loss": 0.4973, "step": 2264 }, { "epoch": 0.31076353159086234, "grad_norm": 1.3203125, "learning_rate": 1.951563737065204e-05, "loss": 0.5455, "step": 2265 }, { "epoch": 0.3109007340330658, "grad_norm": 1.2265625, "learning_rate": 1.9515193470964556e-05, "loss": 0.5801, "step": 2266 }, { "epoch": 0.3110379364752693, "grad_norm": 1.2578125, "learning_rate": 1.9514749373014864e-05, "loss": 0.5165, "step": 2267 }, { "epoch": 0.3111751389174727, "grad_norm": 1.2421875, "learning_rate": 1.9514305076812225e-05, "loss": 0.4957, "step": 2268 }, { "epoch": 0.3113123413596762, "grad_norm": 1.1796875, "learning_rate": 1.9513860582365885e-05, "loss": 0.5242, "step": 2269 }, { "epoch": 0.31144954380187967, "grad_norm": 1.2734375, "learning_rate": 1.9513415889685115e-05, "loss": 0.5227, "step": 2270 }, { "epoch": 0.31158674624408317, "grad_norm": 1.34375, "learning_rate": 1.9512970998779177e-05, "loss": 0.6442, "step": 2271 }, { "epoch": 0.3117239486862866, "grad_norm": 1.3046875, "learning_rate": 1.951252590965734e-05, "loss": 0.5433, "step": 2272 }, { "epoch": 0.3118611511284901, "grad_norm": 1.2109375, "learning_rate": 1.9512080622328884e-05, "loss": 0.5552, "step": 2273 }, { "epoch": 0.31199835357069355, "grad_norm": 1.2734375, "learning_rate": 1.951163513680308e-05, "loss": 0.5061, "step": 2274 }, { "epoch": 0.31213555601289705, "grad_norm": 1.3203125, "learning_rate": 1.9511189453089213e-05, "loss": 0.621, "step": 2275 }, { "epoch": 0.3122727584551005, "grad_norm": 1.28125, "learning_rate": 1.951074357119657e-05, "loss": 0.5778, "step": 2276 }, { "epoch": 0.312409960897304, "grad_norm": 1.1953125, "learning_rate": 1.9510297491134443e-05, "loss": 0.5341, "step": 2277 }, { "epoch": 0.31254716333950744, "grad_norm": 1.2421875, "learning_rate": 1.9509851212912123e-05, "loss": 0.5671, "step": 2278 }, { "epoch": 0.31268436578171094, "grad_norm": 1.1640625, "learning_rate": 1.950940473653891e-05, "loss": 0.5517, "step": 2279 }, { "epoch": 0.3128215682239144, "grad_norm": 1.1484375, "learning_rate": 1.9508958062024107e-05, "loss": 0.5305, "step": 2280 }, { "epoch": 0.3129587706661179, "grad_norm": 1.2734375, "learning_rate": 1.9508511189377026e-05, "loss": 0.5824, "step": 2281 }, { "epoch": 0.3130959731083213, "grad_norm": 1.2109375, "learning_rate": 1.950806411860697e-05, "loss": 0.5089, "step": 2282 }, { "epoch": 0.3132331755505248, "grad_norm": 1.296875, "learning_rate": 1.9507616849723263e-05, "loss": 0.5429, "step": 2283 }, { "epoch": 0.31337037799272827, "grad_norm": 1.3671875, "learning_rate": 1.9507169382735212e-05, "loss": 0.6476, "step": 2284 }, { "epoch": 0.31350758043493177, "grad_norm": 1.2734375, "learning_rate": 1.9506721717652156e-05, "loss": 0.5365, "step": 2285 }, { "epoch": 0.3136447828771352, "grad_norm": 1.1796875, "learning_rate": 1.950627385448341e-05, "loss": 0.4958, "step": 2286 }, { "epoch": 0.3137819853193387, "grad_norm": 1.3515625, "learning_rate": 1.9505825793238312e-05, "loss": 0.5702, "step": 2287 }, { "epoch": 0.31391918776154215, "grad_norm": 1.328125, "learning_rate": 1.9505377533926197e-05, "loss": 0.6289, "step": 2288 }, { "epoch": 0.31405639020374565, "grad_norm": 1.2109375, "learning_rate": 1.9504929076556406e-05, "loss": 0.5712, "step": 2289 }, { "epoch": 0.3141935926459491, "grad_norm": 1.1796875, "learning_rate": 1.9504480421138284e-05, "loss": 0.5289, "step": 2290 }, { "epoch": 0.3143307950881526, "grad_norm": 1.15625, "learning_rate": 1.9504031567681173e-05, "loss": 0.4681, "step": 2291 }, { "epoch": 0.31446799753035604, "grad_norm": 1.125, "learning_rate": 1.9503582516194433e-05, "loss": 0.4497, "step": 2292 }, { "epoch": 0.31460519997255953, "grad_norm": 1.3359375, "learning_rate": 1.950313326668742e-05, "loss": 0.5563, "step": 2293 }, { "epoch": 0.314742402414763, "grad_norm": 1.2265625, "learning_rate": 1.950268381916949e-05, "loss": 0.579, "step": 2294 }, { "epoch": 0.3148796048569665, "grad_norm": 1.296875, "learning_rate": 1.9502234173650005e-05, "loss": 0.5254, "step": 2295 }, { "epoch": 0.3150168072991699, "grad_norm": 1.25, "learning_rate": 1.9501784330138345e-05, "loss": 0.5059, "step": 2296 }, { "epoch": 0.3151540097413734, "grad_norm": 1.3203125, "learning_rate": 1.9501334288643877e-05, "loss": 0.6298, "step": 2297 }, { "epoch": 0.31529121218357686, "grad_norm": 1.203125, "learning_rate": 1.9500884049175976e-05, "loss": 0.5115, "step": 2298 }, { "epoch": 0.31542841462578036, "grad_norm": 1.2734375, "learning_rate": 1.950043361174403e-05, "loss": 0.5665, "step": 2299 }, { "epoch": 0.3155656170679838, "grad_norm": 1.296875, "learning_rate": 1.9499982976357416e-05, "loss": 0.5906, "step": 2300 }, { "epoch": 0.3157028195101873, "grad_norm": 1.2578125, "learning_rate": 1.949953214302553e-05, "loss": 0.6225, "step": 2301 }, { "epoch": 0.31584002195239075, "grad_norm": 1.1875, "learning_rate": 1.9499081111757763e-05, "loss": 0.4916, "step": 2302 }, { "epoch": 0.31597722439459425, "grad_norm": 1.2578125, "learning_rate": 1.9498629882563516e-05, "loss": 0.5414, "step": 2303 }, { "epoch": 0.3161144268367977, "grad_norm": 1.296875, "learning_rate": 1.949817845545219e-05, "loss": 0.5454, "step": 2304 }, { "epoch": 0.3162516292790012, "grad_norm": 1.3125, "learning_rate": 1.9497726830433184e-05, "loss": 0.6106, "step": 2305 }, { "epoch": 0.31638883172120463, "grad_norm": 1.21875, "learning_rate": 1.9497275007515915e-05, "loss": 0.5838, "step": 2306 }, { "epoch": 0.31652603416340813, "grad_norm": 1.2734375, "learning_rate": 1.94968229867098e-05, "loss": 0.5746, "step": 2307 }, { "epoch": 0.3166632366056116, "grad_norm": 1.234375, "learning_rate": 1.949637076802425e-05, "loss": 0.5495, "step": 2308 }, { "epoch": 0.3168004390478151, "grad_norm": 1.28125, "learning_rate": 1.9495918351468695e-05, "loss": 0.5868, "step": 2309 }, { "epoch": 0.3169376414900185, "grad_norm": 1.1953125, "learning_rate": 1.9495465737052557e-05, "loss": 0.5459, "step": 2310 }, { "epoch": 0.317074843932222, "grad_norm": 1.25, "learning_rate": 1.9495012924785268e-05, "loss": 0.5864, "step": 2311 }, { "epoch": 0.31721204637442546, "grad_norm": 1.1875, "learning_rate": 1.9494559914676262e-05, "loss": 0.5278, "step": 2312 }, { "epoch": 0.31734924881662896, "grad_norm": 1.2421875, "learning_rate": 1.949410670673498e-05, "loss": 0.6006, "step": 2313 }, { "epoch": 0.3174864512588324, "grad_norm": 1.234375, "learning_rate": 1.949365330097086e-05, "loss": 0.5431, "step": 2314 }, { "epoch": 0.3176236537010359, "grad_norm": 1.3828125, "learning_rate": 1.949319969739336e-05, "loss": 0.6322, "step": 2315 }, { "epoch": 0.31776085614323935, "grad_norm": 1.25, "learning_rate": 1.9492745896011923e-05, "loss": 0.5598, "step": 2316 }, { "epoch": 0.31789805858544284, "grad_norm": 1.2578125, "learning_rate": 1.9492291896836003e-05, "loss": 0.5207, "step": 2317 }, { "epoch": 0.3180352610276463, "grad_norm": 1.140625, "learning_rate": 1.949183769987507e-05, "loss": 0.5553, "step": 2318 }, { "epoch": 0.3181724634698498, "grad_norm": 1.1875, "learning_rate": 1.9491383305138576e-05, "loss": 0.5316, "step": 2319 }, { "epoch": 0.31830966591205323, "grad_norm": 1.2578125, "learning_rate": 1.9490928712635995e-05, "loss": 0.5926, "step": 2320 }, { "epoch": 0.31844686835425673, "grad_norm": 1.4375, "learning_rate": 1.94904739223768e-05, "loss": 0.6055, "step": 2321 }, { "epoch": 0.3185840707964602, "grad_norm": 1.3125, "learning_rate": 1.9490018934370464e-05, "loss": 0.6556, "step": 2322 }, { "epoch": 0.31872127323866367, "grad_norm": 1.390625, "learning_rate": 1.9489563748626468e-05, "loss": 0.6139, "step": 2323 }, { "epoch": 0.3188584756808671, "grad_norm": 1.3125, "learning_rate": 1.9489108365154296e-05, "loss": 0.4969, "step": 2324 }, { "epoch": 0.3189956781230706, "grad_norm": 1.1796875, "learning_rate": 1.948865278396344e-05, "loss": 0.5188, "step": 2325 }, { "epoch": 0.31913288056527406, "grad_norm": 1.2890625, "learning_rate": 1.948819700506339e-05, "loss": 0.5438, "step": 2326 }, { "epoch": 0.31927008300747756, "grad_norm": 1.265625, "learning_rate": 1.9487741028463643e-05, "loss": 0.5506, "step": 2327 }, { "epoch": 0.319407285449681, "grad_norm": 1.0390625, "learning_rate": 1.9487284854173698e-05, "loss": 0.4004, "step": 2328 }, { "epoch": 0.3195444878918845, "grad_norm": 1.171875, "learning_rate": 1.9486828482203064e-05, "loss": 0.4778, "step": 2329 }, { "epoch": 0.31968169033408794, "grad_norm": 1.296875, "learning_rate": 1.9486371912561247e-05, "loss": 0.5897, "step": 2330 }, { "epoch": 0.31981889277629144, "grad_norm": 1.125, "learning_rate": 1.9485915145257762e-05, "loss": 0.5352, "step": 2331 }, { "epoch": 0.3199560952184949, "grad_norm": 1.328125, "learning_rate": 1.9485458180302127e-05, "loss": 0.541, "step": 2332 }, { "epoch": 0.3200932976606984, "grad_norm": 1.2421875, "learning_rate": 1.948500101770386e-05, "loss": 0.5578, "step": 2333 }, { "epoch": 0.3202305001029018, "grad_norm": 1.2890625, "learning_rate": 1.948454365747249e-05, "loss": 0.5281, "step": 2334 }, { "epoch": 0.3203677025451053, "grad_norm": 1.2265625, "learning_rate": 1.9484086099617543e-05, "loss": 0.5034, "step": 2335 }, { "epoch": 0.32050490498730877, "grad_norm": 1.3125, "learning_rate": 1.948362834414856e-05, "loss": 0.61, "step": 2336 }, { "epoch": 0.32064210742951227, "grad_norm": 1.296875, "learning_rate": 1.948317039107507e-05, "loss": 0.5581, "step": 2337 }, { "epoch": 0.3207793098717157, "grad_norm": 1.296875, "learning_rate": 1.9482712240406624e-05, "loss": 0.573, "step": 2338 }, { "epoch": 0.3209165123139192, "grad_norm": 1.2578125, "learning_rate": 1.948225389215276e-05, "loss": 0.5106, "step": 2339 }, { "epoch": 0.32105371475612265, "grad_norm": 1.2421875, "learning_rate": 1.9481795346323034e-05, "loss": 0.5685, "step": 2340 }, { "epoch": 0.32119091719832615, "grad_norm": 1.28125, "learning_rate": 1.9481336602926998e-05, "loss": 0.4654, "step": 2341 }, { "epoch": 0.3213281196405296, "grad_norm": 1.296875, "learning_rate": 1.9480877661974208e-05, "loss": 0.5627, "step": 2342 }, { "epoch": 0.3214653220827331, "grad_norm": 1.234375, "learning_rate": 1.9480418523474235e-05, "loss": 0.5632, "step": 2343 }, { "epoch": 0.32160252452493654, "grad_norm": 1.1328125, "learning_rate": 1.9479959187436635e-05, "loss": 0.4634, "step": 2344 }, { "epoch": 0.32173972696714004, "grad_norm": 1.390625, "learning_rate": 1.947949965387099e-05, "loss": 0.5838, "step": 2345 }, { "epoch": 0.3218769294093435, "grad_norm": 1.3359375, "learning_rate": 1.9479039922786866e-05, "loss": 0.6734, "step": 2346 }, { "epoch": 0.322014131851547, "grad_norm": 1.15625, "learning_rate": 1.9478579994193845e-05, "loss": 0.5631, "step": 2347 }, { "epoch": 0.3221513342937504, "grad_norm": 1.28125, "learning_rate": 1.9478119868101514e-05, "loss": 0.5432, "step": 2348 }, { "epoch": 0.3222885367359539, "grad_norm": 1.1875, "learning_rate": 1.9477659544519453e-05, "loss": 0.5179, "step": 2349 }, { "epoch": 0.32242573917815737, "grad_norm": 1.234375, "learning_rate": 1.947719902345726e-05, "loss": 0.5129, "step": 2350 }, { "epoch": 0.32256294162036087, "grad_norm": 1.15625, "learning_rate": 1.9476738304924526e-05, "loss": 0.5512, "step": 2351 }, { "epoch": 0.3227001440625643, "grad_norm": 1.25, "learning_rate": 1.9476277388930857e-05, "loss": 0.4893, "step": 2352 }, { "epoch": 0.3228373465047678, "grad_norm": 1.125, "learning_rate": 1.947581627548585e-05, "loss": 0.4986, "step": 2353 }, { "epoch": 0.32297454894697125, "grad_norm": 1.3671875, "learning_rate": 1.947535496459912e-05, "loss": 0.6521, "step": 2354 }, { "epoch": 0.32311175138917475, "grad_norm": 1.28125, "learning_rate": 1.9474893456280267e-05, "loss": 0.5477, "step": 2355 }, { "epoch": 0.3232489538313782, "grad_norm": 1.296875, "learning_rate": 1.947443175053892e-05, "loss": 0.5527, "step": 2356 }, { "epoch": 0.3233861562735817, "grad_norm": 1.265625, "learning_rate": 1.947396984738469e-05, "loss": 0.5631, "step": 2357 }, { "epoch": 0.32352335871578514, "grad_norm": 1.2890625, "learning_rate": 1.947350774682721e-05, "loss": 0.6186, "step": 2358 }, { "epoch": 0.32366056115798864, "grad_norm": 1.3046875, "learning_rate": 1.9473045448876103e-05, "loss": 0.5439, "step": 2359 }, { "epoch": 0.3237977636001921, "grad_norm": 1.34375, "learning_rate": 1.9472582953541003e-05, "loss": 0.6501, "step": 2360 }, { "epoch": 0.3239349660423956, "grad_norm": 1.2890625, "learning_rate": 1.9472120260831545e-05, "loss": 0.5481, "step": 2361 }, { "epoch": 0.324072168484599, "grad_norm": 1.3125, "learning_rate": 1.947165737075737e-05, "loss": 0.6014, "step": 2362 }, { "epoch": 0.3242093709268025, "grad_norm": 1.2109375, "learning_rate": 1.9471194283328124e-05, "loss": 0.553, "step": 2363 }, { "epoch": 0.32434657336900596, "grad_norm": 1.21875, "learning_rate": 1.947073099855346e-05, "loss": 0.545, "step": 2364 }, { "epoch": 0.32448377581120946, "grad_norm": 1.203125, "learning_rate": 1.9470267516443027e-05, "loss": 0.5596, "step": 2365 }, { "epoch": 0.3246209782534129, "grad_norm": 1.265625, "learning_rate": 1.946980383700648e-05, "loss": 0.5174, "step": 2366 }, { "epoch": 0.3247581806956164, "grad_norm": 1.171875, "learning_rate": 1.9469339960253483e-05, "loss": 0.5012, "step": 2367 }, { "epoch": 0.32489538313781985, "grad_norm": 1.390625, "learning_rate": 1.9468875886193704e-05, "loss": 0.6556, "step": 2368 }, { "epoch": 0.32503258558002335, "grad_norm": 1.21875, "learning_rate": 1.946841161483681e-05, "loss": 0.5243, "step": 2369 }, { "epoch": 0.3251697880222268, "grad_norm": 1.2265625, "learning_rate": 1.946794714619247e-05, "loss": 0.5757, "step": 2370 }, { "epoch": 0.3253069904644303, "grad_norm": 1.234375, "learning_rate": 1.946748248027037e-05, "loss": 0.5498, "step": 2371 }, { "epoch": 0.32544419290663373, "grad_norm": 1.4296875, "learning_rate": 1.9467017617080194e-05, "loss": 0.6469, "step": 2372 }, { "epoch": 0.32558139534883723, "grad_norm": 1.2265625, "learning_rate": 1.9466552556631618e-05, "loss": 0.4552, "step": 2373 }, { "epoch": 0.3257185977910407, "grad_norm": 1.34375, "learning_rate": 1.9466087298934337e-05, "loss": 0.6228, "step": 2374 }, { "epoch": 0.3258558002332442, "grad_norm": 1.265625, "learning_rate": 1.9465621843998045e-05, "loss": 0.5418, "step": 2375 }, { "epoch": 0.3259930026754476, "grad_norm": 1.1953125, "learning_rate": 1.9465156191832445e-05, "loss": 0.537, "step": 2376 }, { "epoch": 0.3261302051176511, "grad_norm": 1.140625, "learning_rate": 1.946469034244723e-05, "loss": 0.4875, "step": 2377 }, { "epoch": 0.32626740755985456, "grad_norm": 1.1796875, "learning_rate": 1.9464224295852113e-05, "loss": 0.5148, "step": 2378 }, { "epoch": 0.32640461000205806, "grad_norm": 1.078125, "learning_rate": 1.9463758052056805e-05, "loss": 0.469, "step": 2379 }, { "epoch": 0.3265418124442615, "grad_norm": 1.3125, "learning_rate": 1.946329161107102e-05, "loss": 0.5436, "step": 2380 }, { "epoch": 0.326679014886465, "grad_norm": 1.2421875, "learning_rate": 1.9462824972904474e-05, "loss": 0.5291, "step": 2381 }, { "epoch": 0.32681621732866845, "grad_norm": 1.1875, "learning_rate": 1.9462358137566896e-05, "loss": 0.5091, "step": 2382 }, { "epoch": 0.32695341977087194, "grad_norm": 1.2578125, "learning_rate": 1.9461891105068006e-05, "loss": 0.5787, "step": 2383 }, { "epoch": 0.3270906222130754, "grad_norm": 1.1796875, "learning_rate": 1.946142387541754e-05, "loss": 0.4548, "step": 2384 }, { "epoch": 0.3272278246552789, "grad_norm": 1.3515625, "learning_rate": 1.9460956448625232e-05, "loss": 0.5456, "step": 2385 }, { "epoch": 0.32736502709748233, "grad_norm": 1.4375, "learning_rate": 1.946048882470082e-05, "loss": 0.6155, "step": 2386 }, { "epoch": 0.32750222953968583, "grad_norm": 1.328125, "learning_rate": 1.946002100365405e-05, "loss": 0.5801, "step": 2387 }, { "epoch": 0.3276394319818893, "grad_norm": 1.109375, "learning_rate": 1.9459552985494673e-05, "loss": 0.4847, "step": 2388 }, { "epoch": 0.32777663442409277, "grad_norm": 1.1015625, "learning_rate": 1.9459084770232434e-05, "loss": 0.4654, "step": 2389 }, { "epoch": 0.3279138368662962, "grad_norm": 1.3203125, "learning_rate": 1.945861635787709e-05, "loss": 0.5372, "step": 2390 }, { "epoch": 0.3280510393084997, "grad_norm": 1.1953125, "learning_rate": 1.94581477484384e-05, "loss": 0.5638, "step": 2391 }, { "epoch": 0.32818824175070316, "grad_norm": 1.2109375, "learning_rate": 1.9457678941926138e-05, "loss": 0.5756, "step": 2392 }, { "epoch": 0.32832544419290666, "grad_norm": 1.2109375, "learning_rate": 1.945720993835006e-05, "loss": 0.5466, "step": 2393 }, { "epoch": 0.3284626466351101, "grad_norm": 1.203125, "learning_rate": 1.945674073771995e-05, "loss": 0.5656, "step": 2394 }, { "epoch": 0.3285998490773136, "grad_norm": 1.3515625, "learning_rate": 1.945627134004557e-05, "loss": 0.5274, "step": 2395 }, { "epoch": 0.32873705151951704, "grad_norm": 1.1328125, "learning_rate": 1.9455801745336712e-05, "loss": 0.4653, "step": 2396 }, { "epoch": 0.32887425396172054, "grad_norm": 1.1953125, "learning_rate": 1.9455331953603158e-05, "loss": 0.5441, "step": 2397 }, { "epoch": 0.329011456403924, "grad_norm": 1.1484375, "learning_rate": 1.9454861964854694e-05, "loss": 0.5431, "step": 2398 }, { "epoch": 0.3291486588461275, "grad_norm": 1.1328125, "learning_rate": 1.9454391779101115e-05, "loss": 0.5037, "step": 2399 }, { "epoch": 0.3292858612883309, "grad_norm": 1.3359375, "learning_rate": 1.9453921396352216e-05, "loss": 0.6173, "step": 2400 }, { "epoch": 0.3294230637305344, "grad_norm": 1.28125, "learning_rate": 1.9453450816617803e-05, "loss": 0.5525, "step": 2401 }, { "epoch": 0.32956026617273787, "grad_norm": 1.234375, "learning_rate": 1.9452980039907676e-05, "loss": 0.5898, "step": 2402 }, { "epoch": 0.32969746861494137, "grad_norm": 1.3359375, "learning_rate": 1.945250906623165e-05, "loss": 0.5981, "step": 2403 }, { "epoch": 0.3298346710571448, "grad_norm": 1.2109375, "learning_rate": 1.945203789559953e-05, "loss": 0.5302, "step": 2404 }, { "epoch": 0.3299718734993483, "grad_norm": 1.359375, "learning_rate": 1.9451566528021138e-05, "loss": 0.5515, "step": 2405 }, { "epoch": 0.33010907594155176, "grad_norm": 1.3125, "learning_rate": 1.9451094963506297e-05, "loss": 0.5757, "step": 2406 }, { "epoch": 0.33024627838375525, "grad_norm": 1.2421875, "learning_rate": 1.9450623202064832e-05, "loss": 0.5074, "step": 2407 }, { "epoch": 0.3303834808259587, "grad_norm": 1.1953125, "learning_rate": 1.9450151243706573e-05, "loss": 0.4738, "step": 2408 }, { "epoch": 0.3305206832681622, "grad_norm": 1.3046875, "learning_rate": 1.944967908844135e-05, "loss": 0.5553, "step": 2409 }, { "epoch": 0.33065788571036564, "grad_norm": 1.25, "learning_rate": 1.9449206736279005e-05, "loss": 0.5494, "step": 2410 }, { "epoch": 0.33079508815256914, "grad_norm": 1.2578125, "learning_rate": 1.944873418722938e-05, "loss": 0.6035, "step": 2411 }, { "epoch": 0.3309322905947726, "grad_norm": 1.3359375, "learning_rate": 1.9448261441302317e-05, "loss": 0.5618, "step": 2412 }, { "epoch": 0.3310694930369761, "grad_norm": 1.1875, "learning_rate": 1.9447788498507676e-05, "loss": 0.4228, "step": 2413 }, { "epoch": 0.3312066954791795, "grad_norm": 1.3359375, "learning_rate": 1.94473153588553e-05, "loss": 0.5779, "step": 2414 }, { "epoch": 0.331343897921383, "grad_norm": 1.234375, "learning_rate": 1.9446842022355056e-05, "loss": 0.587, "step": 2415 }, { "epoch": 0.33148110036358647, "grad_norm": 1.1875, "learning_rate": 1.9446368489016803e-05, "loss": 0.5668, "step": 2416 }, { "epoch": 0.33161830280578997, "grad_norm": 1.3125, "learning_rate": 1.9445894758850407e-05, "loss": 0.5777, "step": 2417 }, { "epoch": 0.3317555052479934, "grad_norm": 1.2734375, "learning_rate": 1.9445420831865738e-05, "loss": 0.5897, "step": 2418 }, { "epoch": 0.3318927076901969, "grad_norm": 1.2421875, "learning_rate": 1.9444946708072672e-05, "loss": 0.5252, "step": 2419 }, { "epoch": 0.33202991013240035, "grad_norm": 1.21875, "learning_rate": 1.9444472387481093e-05, "loss": 0.551, "step": 2420 }, { "epoch": 0.33216711257460385, "grad_norm": 1.203125, "learning_rate": 1.9443997870100877e-05, "loss": 0.5408, "step": 2421 }, { "epoch": 0.3323043150168073, "grad_norm": 1.2265625, "learning_rate": 1.9443523155941917e-05, "loss": 0.567, "step": 2422 }, { "epoch": 0.3324415174590108, "grad_norm": 1.2109375, "learning_rate": 1.9443048245014097e-05, "loss": 0.5352, "step": 2423 }, { "epoch": 0.33257871990121424, "grad_norm": 1.3125, "learning_rate": 1.944257313732732e-05, "loss": 0.5631, "step": 2424 }, { "epoch": 0.33271592234341774, "grad_norm": 1.234375, "learning_rate": 1.944209783289148e-05, "loss": 0.5193, "step": 2425 }, { "epoch": 0.3328531247856212, "grad_norm": 1.1171875, "learning_rate": 1.9441622331716487e-05, "loss": 0.5024, "step": 2426 }, { "epoch": 0.3329903272278247, "grad_norm": 1.296875, "learning_rate": 1.9441146633812242e-05, "loss": 0.6042, "step": 2427 }, { "epoch": 0.3331275296700281, "grad_norm": 1.1953125, "learning_rate": 1.944067073918866e-05, "loss": 0.4875, "step": 2428 }, { "epoch": 0.3332647321122316, "grad_norm": 1.2421875, "learning_rate": 1.9440194647855658e-05, "loss": 0.5637, "step": 2429 }, { "epoch": 0.33340193455443506, "grad_norm": 1.2265625, "learning_rate": 1.9439718359823152e-05, "loss": 0.5403, "step": 2430 }, { "epoch": 0.33353913699663856, "grad_norm": 1.2109375, "learning_rate": 1.943924187510107e-05, "loss": 0.4918, "step": 2431 }, { "epoch": 0.333676339438842, "grad_norm": 1.171875, "learning_rate": 1.9438765193699337e-05, "loss": 0.54, "step": 2432 }, { "epoch": 0.3338135418810455, "grad_norm": 1.28125, "learning_rate": 1.943828831562789e-05, "loss": 0.574, "step": 2433 }, { "epoch": 0.33395074432324895, "grad_norm": 1.234375, "learning_rate": 1.943781124089666e-05, "loss": 0.4972, "step": 2434 }, { "epoch": 0.33408794676545245, "grad_norm": 1.1953125, "learning_rate": 1.9437333969515593e-05, "loss": 0.5054, "step": 2435 }, { "epoch": 0.3342251492076559, "grad_norm": 1.28125, "learning_rate": 1.9436856501494628e-05, "loss": 0.5682, "step": 2436 }, { "epoch": 0.3343623516498594, "grad_norm": 1.234375, "learning_rate": 1.9436378836843716e-05, "loss": 0.6259, "step": 2437 }, { "epoch": 0.33449955409206283, "grad_norm": 1.1484375, "learning_rate": 1.943590097557281e-05, "loss": 0.4728, "step": 2438 }, { "epoch": 0.33463675653426633, "grad_norm": 1.09375, "learning_rate": 1.943542291769187e-05, "loss": 0.4348, "step": 2439 }, { "epoch": 0.3347739589764698, "grad_norm": 1.359375, "learning_rate": 1.9434944663210853e-05, "loss": 0.5507, "step": 2440 }, { "epoch": 0.3349111614186733, "grad_norm": 1.21875, "learning_rate": 1.9434466212139726e-05, "loss": 0.5364, "step": 2441 }, { "epoch": 0.3350483638608767, "grad_norm": 1.1171875, "learning_rate": 1.9433987564488453e-05, "loss": 0.4355, "step": 2442 }, { "epoch": 0.3351855663030802, "grad_norm": 1.2265625, "learning_rate": 1.9433508720267018e-05, "loss": 0.5152, "step": 2443 }, { "epoch": 0.33532276874528366, "grad_norm": 1.296875, "learning_rate": 1.943302967948539e-05, "loss": 0.504, "step": 2444 }, { "epoch": 0.33545997118748716, "grad_norm": 1.1953125, "learning_rate": 1.9432550442153547e-05, "loss": 0.5212, "step": 2445 }, { "epoch": 0.3355971736296906, "grad_norm": 1.171875, "learning_rate": 1.9432071008281486e-05, "loss": 0.4682, "step": 2446 }, { "epoch": 0.3357343760718941, "grad_norm": 1.15625, "learning_rate": 1.943159137787919e-05, "loss": 0.4445, "step": 2447 }, { "epoch": 0.33587157851409755, "grad_norm": 1.1328125, "learning_rate": 1.943111155095665e-05, "loss": 0.4807, "step": 2448 }, { "epoch": 0.33600878095630105, "grad_norm": 1.28125, "learning_rate": 1.9430631527523874e-05, "loss": 0.5654, "step": 2449 }, { "epoch": 0.3361459833985045, "grad_norm": 1.2109375, "learning_rate": 1.9430151307590848e-05, "loss": 0.5544, "step": 2450 }, { "epoch": 0.336283185840708, "grad_norm": 1.15625, "learning_rate": 1.9429670891167595e-05, "loss": 0.5364, "step": 2451 }, { "epoch": 0.33642038828291143, "grad_norm": 1.2265625, "learning_rate": 1.9429190278264116e-05, "loss": 0.5294, "step": 2452 }, { "epoch": 0.33655759072511493, "grad_norm": 1.1640625, "learning_rate": 1.9428709468890426e-05, "loss": 0.5285, "step": 2453 }, { "epoch": 0.3366947931673184, "grad_norm": 1.2109375, "learning_rate": 1.9428228463056546e-05, "loss": 0.4967, "step": 2454 }, { "epoch": 0.3368319956095219, "grad_norm": 1.2265625, "learning_rate": 1.9427747260772492e-05, "loss": 0.5274, "step": 2455 }, { "epoch": 0.3369691980517253, "grad_norm": 1.1875, "learning_rate": 1.9427265862048297e-05, "loss": 0.5614, "step": 2456 }, { "epoch": 0.3371064004939288, "grad_norm": 1.25, "learning_rate": 1.942678426689399e-05, "loss": 0.5191, "step": 2457 }, { "epoch": 0.33724360293613226, "grad_norm": 1.2265625, "learning_rate": 1.942630247531961e-05, "loss": 0.5022, "step": 2458 }, { "epoch": 0.33738080537833576, "grad_norm": 1.234375, "learning_rate": 1.9425820487335183e-05, "loss": 0.5614, "step": 2459 }, { "epoch": 0.3375180078205392, "grad_norm": 1.3828125, "learning_rate": 1.9425338302950765e-05, "loss": 0.607, "step": 2460 }, { "epoch": 0.3376552102627427, "grad_norm": 1.28125, "learning_rate": 1.9424855922176396e-05, "loss": 0.535, "step": 2461 }, { "epoch": 0.33779241270494614, "grad_norm": 1.1015625, "learning_rate": 1.9424373345022126e-05, "loss": 0.4896, "step": 2462 }, { "epoch": 0.33792961514714964, "grad_norm": 1.2890625, "learning_rate": 1.942389057149802e-05, "loss": 0.532, "step": 2463 }, { "epoch": 0.3380668175893531, "grad_norm": 1.15625, "learning_rate": 1.9423407601614126e-05, "loss": 0.4858, "step": 2464 }, { "epoch": 0.3382040200315566, "grad_norm": 1.3359375, "learning_rate": 1.9422924435380514e-05, "loss": 0.588, "step": 2465 }, { "epoch": 0.33834122247376003, "grad_norm": 1.28125, "learning_rate": 1.9422441072807247e-05, "loss": 0.4875, "step": 2466 }, { "epoch": 0.3384784249159635, "grad_norm": 1.1484375, "learning_rate": 1.94219575139044e-05, "loss": 0.5186, "step": 2467 }, { "epoch": 0.33861562735816697, "grad_norm": 1.2265625, "learning_rate": 1.9421473758682047e-05, "loss": 0.5242, "step": 2468 }, { "epoch": 0.33875282980037047, "grad_norm": 1.4140625, "learning_rate": 1.9420989807150268e-05, "loss": 0.6116, "step": 2469 }, { "epoch": 0.3388900322425739, "grad_norm": 1.265625, "learning_rate": 1.942050565931915e-05, "loss": 0.5342, "step": 2470 }, { "epoch": 0.3390272346847774, "grad_norm": 1.3125, "learning_rate": 1.9420021315198772e-05, "loss": 0.6939, "step": 2471 }, { "epoch": 0.33916443712698086, "grad_norm": 1.265625, "learning_rate": 1.9419536774799236e-05, "loss": 0.4977, "step": 2472 }, { "epoch": 0.33930163956918435, "grad_norm": 1.1640625, "learning_rate": 1.9419052038130632e-05, "loss": 0.4666, "step": 2473 }, { "epoch": 0.3394388420113878, "grad_norm": 1.265625, "learning_rate": 1.9418567105203065e-05, "loss": 0.4895, "step": 2474 }, { "epoch": 0.3395760444535913, "grad_norm": 1.234375, "learning_rate": 1.9418081976026634e-05, "loss": 0.5229, "step": 2475 }, { "epoch": 0.33971324689579474, "grad_norm": 1.15625, "learning_rate": 1.941759665061145e-05, "loss": 0.5281, "step": 2476 }, { "epoch": 0.33985044933799824, "grad_norm": 1.40625, "learning_rate": 1.941711112896762e-05, "loss": 0.6314, "step": 2477 }, { "epoch": 0.3399876517802017, "grad_norm": 1.3359375, "learning_rate": 1.941662541110527e-05, "loss": 0.609, "step": 2478 }, { "epoch": 0.3401248542224052, "grad_norm": 1.21875, "learning_rate": 1.9416139497034517e-05, "loss": 0.4964, "step": 2479 }, { "epoch": 0.3402620566646086, "grad_norm": 1.2109375, "learning_rate": 1.9415653386765485e-05, "loss": 0.5315, "step": 2480 }, { "epoch": 0.3403992591068121, "grad_norm": 1.296875, "learning_rate": 1.94151670803083e-05, "loss": 0.5045, "step": 2481 }, { "epoch": 0.34053646154901557, "grad_norm": 1.296875, "learning_rate": 1.9414680577673103e-05, "loss": 0.5933, "step": 2482 }, { "epoch": 0.34067366399121907, "grad_norm": 1.28125, "learning_rate": 1.941419387887002e-05, "loss": 0.5696, "step": 2483 }, { "epoch": 0.3408108664334225, "grad_norm": 1.2890625, "learning_rate": 1.94137069839092e-05, "loss": 0.5755, "step": 2484 }, { "epoch": 0.340948068875626, "grad_norm": 1.21875, "learning_rate": 1.9413219892800785e-05, "loss": 0.5288, "step": 2485 }, { "epoch": 0.34108527131782945, "grad_norm": 1.296875, "learning_rate": 1.9412732605554922e-05, "loss": 0.6084, "step": 2486 }, { "epoch": 0.34122247376003295, "grad_norm": 1.109375, "learning_rate": 1.941224512218177e-05, "loss": 0.4055, "step": 2487 }, { "epoch": 0.3413596762022364, "grad_norm": 1.1640625, "learning_rate": 1.9411757442691482e-05, "loss": 0.5294, "step": 2488 }, { "epoch": 0.3414968786444399, "grad_norm": 1.2734375, "learning_rate": 1.9411269567094223e-05, "loss": 0.607, "step": 2489 }, { "epoch": 0.34163408108664334, "grad_norm": 1.1484375, "learning_rate": 1.9410781495400154e-05, "loss": 0.5395, "step": 2490 }, { "epoch": 0.34177128352884684, "grad_norm": 1.2890625, "learning_rate": 1.9410293227619447e-05, "loss": 0.506, "step": 2491 }, { "epoch": 0.3419084859710503, "grad_norm": 1.3203125, "learning_rate": 1.9409804763762276e-05, "loss": 0.534, "step": 2492 }, { "epoch": 0.3420456884132538, "grad_norm": 1.15625, "learning_rate": 1.9409316103838824e-05, "loss": 0.4262, "step": 2493 }, { "epoch": 0.3421828908554572, "grad_norm": 1.296875, "learning_rate": 1.9408827247859262e-05, "loss": 0.5805, "step": 2494 }, { "epoch": 0.3423200932976607, "grad_norm": 1.28125, "learning_rate": 1.9408338195833783e-05, "loss": 0.5382, "step": 2495 }, { "epoch": 0.34245729573986416, "grad_norm": 1.34375, "learning_rate": 1.9407848947772577e-05, "loss": 0.5748, "step": 2496 }, { "epoch": 0.34259449818206766, "grad_norm": 1.2578125, "learning_rate": 1.9407359503685836e-05, "loss": 0.5349, "step": 2497 }, { "epoch": 0.3427317006242711, "grad_norm": 1.0859375, "learning_rate": 1.9406869863583758e-05, "loss": 0.4923, "step": 2498 }, { "epoch": 0.3428689030664746, "grad_norm": 1.265625, "learning_rate": 1.9406380027476546e-05, "loss": 0.5098, "step": 2499 }, { "epoch": 0.34300610550867805, "grad_norm": 1.265625, "learning_rate": 1.940588999537441e-05, "loss": 0.5229, "step": 2500 }, { "epoch": 0.34314330795088155, "grad_norm": 1.21875, "learning_rate": 1.9405399767287556e-05, "loss": 0.5262, "step": 2501 }, { "epoch": 0.343280510393085, "grad_norm": 1.3515625, "learning_rate": 1.94049093432262e-05, "loss": 0.518, "step": 2502 }, { "epoch": 0.3434177128352885, "grad_norm": 1.1953125, "learning_rate": 1.9404418723200558e-05, "loss": 0.4603, "step": 2503 }, { "epoch": 0.34355491527749193, "grad_norm": 1.1875, "learning_rate": 1.9403927907220858e-05, "loss": 0.5056, "step": 2504 }, { "epoch": 0.34369211771969543, "grad_norm": 1.296875, "learning_rate": 1.940343689529732e-05, "loss": 0.5925, "step": 2505 }, { "epoch": 0.3438293201618989, "grad_norm": 1.3984375, "learning_rate": 1.9402945687440183e-05, "loss": 0.5236, "step": 2506 }, { "epoch": 0.3439665226041024, "grad_norm": 1.15625, "learning_rate": 1.940245428365968e-05, "loss": 0.5171, "step": 2507 }, { "epoch": 0.3441037250463058, "grad_norm": 1.25, "learning_rate": 1.9401962683966045e-05, "loss": 0.5771, "step": 2508 }, { "epoch": 0.3442409274885093, "grad_norm": 1.1953125, "learning_rate": 1.9401470888369526e-05, "loss": 0.4763, "step": 2509 }, { "epoch": 0.34437812993071276, "grad_norm": 1.1796875, "learning_rate": 1.9400978896880368e-05, "loss": 0.5324, "step": 2510 }, { "epoch": 0.34451533237291626, "grad_norm": 1.2734375, "learning_rate": 1.940048670950882e-05, "loss": 0.5651, "step": 2511 }, { "epoch": 0.3446525348151197, "grad_norm": 1.3203125, "learning_rate": 1.9399994326265142e-05, "loss": 0.5224, "step": 2512 }, { "epoch": 0.3447897372573232, "grad_norm": 1.1953125, "learning_rate": 1.9399501747159596e-05, "loss": 0.5276, "step": 2513 }, { "epoch": 0.34492693969952665, "grad_norm": 1.234375, "learning_rate": 1.939900897220244e-05, "loss": 0.5736, "step": 2514 }, { "epoch": 0.34506414214173015, "grad_norm": 1.2578125, "learning_rate": 1.9398516001403935e-05, "loss": 0.5121, "step": 2515 }, { "epoch": 0.3452013445839336, "grad_norm": 1.1953125, "learning_rate": 1.9398022834774368e-05, "loss": 0.5195, "step": 2516 }, { "epoch": 0.3453385470261371, "grad_norm": 1.3359375, "learning_rate": 1.9397529472324007e-05, "loss": 0.5341, "step": 2517 }, { "epoch": 0.34547574946834053, "grad_norm": 1.15625, "learning_rate": 1.939703591406313e-05, "loss": 0.5003, "step": 2518 }, { "epoch": 0.34561295191054403, "grad_norm": 1.2265625, "learning_rate": 1.9396542160002025e-05, "loss": 0.5472, "step": 2519 }, { "epoch": 0.3457501543527475, "grad_norm": 1.3359375, "learning_rate": 1.9396048210150978e-05, "loss": 0.5704, "step": 2520 }, { "epoch": 0.345887356794951, "grad_norm": 1.140625, "learning_rate": 1.939555406452028e-05, "loss": 0.4666, "step": 2521 }, { "epoch": 0.3460245592371544, "grad_norm": 1.296875, "learning_rate": 1.939505972312023e-05, "loss": 0.4906, "step": 2522 }, { "epoch": 0.3461617616793579, "grad_norm": 1.359375, "learning_rate": 1.9394565185961127e-05, "loss": 0.5934, "step": 2523 }, { "epoch": 0.34629896412156136, "grad_norm": 1.2578125, "learning_rate": 1.9394070453053277e-05, "loss": 0.6086, "step": 2524 }, { "epoch": 0.34643616656376486, "grad_norm": 1.265625, "learning_rate": 1.9393575524406986e-05, "loss": 0.6226, "step": 2525 }, { "epoch": 0.3465733690059683, "grad_norm": 1.15625, "learning_rate": 1.9393080400032567e-05, "loss": 0.4449, "step": 2526 }, { "epoch": 0.3467105714481718, "grad_norm": 1.2265625, "learning_rate": 1.9392585079940337e-05, "loss": 0.5809, "step": 2527 }, { "epoch": 0.34684777389037524, "grad_norm": 1.2734375, "learning_rate": 1.9392089564140616e-05, "loss": 0.5561, "step": 2528 }, { "epoch": 0.34698497633257874, "grad_norm": 1.203125, "learning_rate": 1.939159385264373e-05, "loss": 0.5685, "step": 2529 }, { "epoch": 0.3471221787747822, "grad_norm": 1.3046875, "learning_rate": 1.9391097945460005e-05, "loss": 0.6034, "step": 2530 }, { "epoch": 0.3472593812169857, "grad_norm": 1.1640625, "learning_rate": 1.939060184259978e-05, "loss": 0.5245, "step": 2531 }, { "epoch": 0.34739658365918913, "grad_norm": 1.2890625, "learning_rate": 1.9390105544073385e-05, "loss": 0.557, "step": 2532 }, { "epoch": 0.3475337861013926, "grad_norm": 1.078125, "learning_rate": 1.938960904989117e-05, "loss": 0.4674, "step": 2533 }, { "epoch": 0.34767098854359607, "grad_norm": 1.1953125, "learning_rate": 1.9389112360063467e-05, "loss": 0.5532, "step": 2534 }, { "epoch": 0.34780819098579957, "grad_norm": 1.15625, "learning_rate": 1.9388615474600635e-05, "loss": 0.5154, "step": 2535 }, { "epoch": 0.347945393428003, "grad_norm": 1.2734375, "learning_rate": 1.9388118393513026e-05, "loss": 0.5627, "step": 2536 }, { "epoch": 0.3480825958702065, "grad_norm": 1.0703125, "learning_rate": 1.9387621116810995e-05, "loss": 0.5127, "step": 2537 }, { "epoch": 0.34821979831240996, "grad_norm": 1.25, "learning_rate": 1.9387123644504905e-05, "loss": 0.5212, "step": 2538 }, { "epoch": 0.34835700075461346, "grad_norm": 1.1953125, "learning_rate": 1.9386625976605122e-05, "loss": 0.5201, "step": 2539 }, { "epoch": 0.3484942031968169, "grad_norm": 1.2890625, "learning_rate": 1.9386128113122013e-05, "loss": 0.5808, "step": 2540 }, { "epoch": 0.3486314056390204, "grad_norm": 1.328125, "learning_rate": 1.9385630054065955e-05, "loss": 0.6369, "step": 2541 }, { "epoch": 0.34876860808122384, "grad_norm": 1.3359375, "learning_rate": 1.9385131799447324e-05, "loss": 0.5238, "step": 2542 }, { "epoch": 0.34890581052342734, "grad_norm": 1.28125, "learning_rate": 1.9384633349276503e-05, "loss": 0.5591, "step": 2543 }, { "epoch": 0.3490430129656308, "grad_norm": 1.3203125, "learning_rate": 1.9384134703563875e-05, "loss": 0.5753, "step": 2544 }, { "epoch": 0.3491802154078343, "grad_norm": 1.1796875, "learning_rate": 1.9383635862319833e-05, "loss": 0.5227, "step": 2545 }, { "epoch": 0.3493174178500377, "grad_norm": 1.1015625, "learning_rate": 1.938313682555477e-05, "loss": 0.4576, "step": 2546 }, { "epoch": 0.3494546202922412, "grad_norm": 1.1796875, "learning_rate": 1.9382637593279082e-05, "loss": 0.5272, "step": 2547 }, { "epoch": 0.34959182273444467, "grad_norm": 1.375, "learning_rate": 1.9382138165503176e-05, "loss": 0.6142, "step": 2548 }, { "epoch": 0.34972902517664817, "grad_norm": 1.296875, "learning_rate": 1.9381638542237457e-05, "loss": 0.5275, "step": 2549 }, { "epoch": 0.3498662276188516, "grad_norm": 1.2421875, "learning_rate": 1.938113872349233e-05, "loss": 0.5526, "step": 2550 }, { "epoch": 0.3500034300610551, "grad_norm": 1.2109375, "learning_rate": 1.9380638709278213e-05, "loss": 0.5049, "step": 2551 }, { "epoch": 0.35014063250325855, "grad_norm": 1.375, "learning_rate": 1.9380138499605526e-05, "loss": 0.569, "step": 2552 }, { "epoch": 0.35027783494546205, "grad_norm": 1.1796875, "learning_rate": 1.9379638094484693e-05, "loss": 0.5201, "step": 2553 }, { "epoch": 0.3504150373876655, "grad_norm": 1.3046875, "learning_rate": 1.9379137493926135e-05, "loss": 0.5767, "step": 2554 }, { "epoch": 0.350552239829869, "grad_norm": 1.234375, "learning_rate": 1.9378636697940284e-05, "loss": 0.581, "step": 2555 }, { "epoch": 0.35068944227207244, "grad_norm": 1.21875, "learning_rate": 1.937813570653758e-05, "loss": 0.567, "step": 2556 }, { "epoch": 0.35082664471427594, "grad_norm": 1.2578125, "learning_rate": 1.9377634519728458e-05, "loss": 0.564, "step": 2557 }, { "epoch": 0.3509638471564794, "grad_norm": 1.2578125, "learning_rate": 1.937713313752336e-05, "loss": 0.543, "step": 2558 }, { "epoch": 0.3511010495986829, "grad_norm": 1.2109375, "learning_rate": 1.9376631559932734e-05, "loss": 0.5644, "step": 2559 }, { "epoch": 0.3512382520408863, "grad_norm": 1.171875, "learning_rate": 1.937612978696703e-05, "loss": 0.5112, "step": 2560 }, { "epoch": 0.3513754544830898, "grad_norm": 1.1796875, "learning_rate": 1.9375627818636708e-05, "loss": 0.4889, "step": 2561 }, { "epoch": 0.35151265692529327, "grad_norm": 1.21875, "learning_rate": 1.9375125654952223e-05, "loss": 0.4977, "step": 2562 }, { "epoch": 0.35164985936749676, "grad_norm": 1.2578125, "learning_rate": 1.9374623295924037e-05, "loss": 0.5325, "step": 2563 }, { "epoch": 0.3517870618097002, "grad_norm": 1.3828125, "learning_rate": 1.937412074156262e-05, "loss": 0.5683, "step": 2564 }, { "epoch": 0.3519242642519037, "grad_norm": 1.296875, "learning_rate": 1.937361799187844e-05, "loss": 0.5641, "step": 2565 }, { "epoch": 0.35206146669410715, "grad_norm": 1.2109375, "learning_rate": 1.937311504688198e-05, "loss": 0.5173, "step": 2566 }, { "epoch": 0.35219866913631065, "grad_norm": 1.234375, "learning_rate": 1.9372611906583712e-05, "loss": 0.5501, "step": 2567 }, { "epoch": 0.3523358715785141, "grad_norm": 1.21875, "learning_rate": 1.9372108570994124e-05, "loss": 0.5301, "step": 2568 }, { "epoch": 0.3524730740207176, "grad_norm": 1.203125, "learning_rate": 1.93716050401237e-05, "loss": 0.5467, "step": 2569 }, { "epoch": 0.35261027646292104, "grad_norm": 1.0625, "learning_rate": 1.9371101313982934e-05, "loss": 0.4333, "step": 2570 }, { "epoch": 0.35274747890512453, "grad_norm": 1.2265625, "learning_rate": 1.937059739258232e-05, "loss": 0.5655, "step": 2571 }, { "epoch": 0.352884681347328, "grad_norm": 1.2109375, "learning_rate": 1.9370093275932364e-05, "loss": 0.5105, "step": 2572 }, { "epoch": 0.3530218837895315, "grad_norm": 1.2421875, "learning_rate": 1.9369588964043565e-05, "loss": 0.5512, "step": 2573 }, { "epoch": 0.3531590862317349, "grad_norm": 1.2421875, "learning_rate": 1.9369084456926428e-05, "loss": 0.5381, "step": 2574 }, { "epoch": 0.3532962886739384, "grad_norm": 1.28125, "learning_rate": 1.936857975459147e-05, "loss": 0.5766, "step": 2575 }, { "epoch": 0.35343349111614186, "grad_norm": 1.2734375, "learning_rate": 1.9368074857049204e-05, "loss": 0.5916, "step": 2576 }, { "epoch": 0.35357069355834536, "grad_norm": 1.21875, "learning_rate": 1.9367569764310157e-05, "loss": 0.5492, "step": 2577 }, { "epoch": 0.3537078960005488, "grad_norm": 1.140625, "learning_rate": 1.9367064476384843e-05, "loss": 0.4529, "step": 2578 }, { "epoch": 0.3538450984427523, "grad_norm": 1.125, "learning_rate": 1.93665589932838e-05, "loss": 0.489, "step": 2579 }, { "epoch": 0.35398230088495575, "grad_norm": 1.2421875, "learning_rate": 1.9366053315017552e-05, "loss": 0.5689, "step": 2580 }, { "epoch": 0.35411950332715925, "grad_norm": 1.265625, "learning_rate": 1.936554744159664e-05, "loss": 0.5731, "step": 2581 }, { "epoch": 0.3542567057693627, "grad_norm": 1.1953125, "learning_rate": 1.9365041373031606e-05, "loss": 0.5093, "step": 2582 }, { "epoch": 0.3543939082115662, "grad_norm": 1.265625, "learning_rate": 1.9364535109332993e-05, "loss": 0.5795, "step": 2583 }, { "epoch": 0.35453111065376963, "grad_norm": 1.328125, "learning_rate": 1.9364028650511352e-05, "loss": 0.5793, "step": 2584 }, { "epoch": 0.35466831309597313, "grad_norm": 1.2578125, "learning_rate": 1.9363521996577227e-05, "loss": 0.5274, "step": 2585 }, { "epoch": 0.3548055155381766, "grad_norm": 1.25, "learning_rate": 1.9363015147541188e-05, "loss": 0.5738, "step": 2586 }, { "epoch": 0.3549427179803801, "grad_norm": 1.2265625, "learning_rate": 1.9362508103413783e-05, "loss": 0.5281, "step": 2587 }, { "epoch": 0.3550799204225835, "grad_norm": 1.328125, "learning_rate": 1.9362000864205587e-05, "loss": 0.6002, "step": 2588 }, { "epoch": 0.355217122864787, "grad_norm": 1.2421875, "learning_rate": 1.9361493429927163e-05, "loss": 0.5485, "step": 2589 }, { "epoch": 0.35535432530699046, "grad_norm": 1.2265625, "learning_rate": 1.9360985800589086e-05, "loss": 0.5189, "step": 2590 }, { "epoch": 0.35549152774919396, "grad_norm": 1.3203125, "learning_rate": 1.9360477976201934e-05, "loss": 0.5466, "step": 2591 }, { "epoch": 0.3556287301913974, "grad_norm": 1.1875, "learning_rate": 1.9359969956776287e-05, "loss": 0.5075, "step": 2592 }, { "epoch": 0.3557659326336009, "grad_norm": 1.28125, "learning_rate": 1.935946174232273e-05, "loss": 0.6105, "step": 2593 }, { "epoch": 0.35590313507580434, "grad_norm": 1.171875, "learning_rate": 1.9358953332851856e-05, "loss": 0.5193, "step": 2594 }, { "epoch": 0.35604033751800784, "grad_norm": 1.1875, "learning_rate": 1.9358444728374254e-05, "loss": 0.5208, "step": 2595 }, { "epoch": 0.3561775399602113, "grad_norm": 1.203125, "learning_rate": 1.9357935928900524e-05, "loss": 0.5585, "step": 2596 }, { "epoch": 0.3563147424024148, "grad_norm": 1.234375, "learning_rate": 1.9357426934441265e-05, "loss": 0.5251, "step": 2597 }, { "epoch": 0.35645194484461823, "grad_norm": 1.125, "learning_rate": 1.9356917745007084e-05, "loss": 0.496, "step": 2598 }, { "epoch": 0.35658914728682173, "grad_norm": 1.1875, "learning_rate": 1.9356408360608595e-05, "loss": 0.5191, "step": 2599 }, { "epoch": 0.35672634972902517, "grad_norm": 1.34375, "learning_rate": 1.93558987812564e-05, "loss": 0.6201, "step": 2600 }, { "epoch": 0.35686355217122867, "grad_norm": 1.2421875, "learning_rate": 1.935538900696113e-05, "loss": 0.5229, "step": 2601 }, { "epoch": 0.3570007546134321, "grad_norm": 1.2578125, "learning_rate": 1.93548790377334e-05, "loss": 0.5024, "step": 2602 }, { "epoch": 0.3571379570556356, "grad_norm": 1.1875, "learning_rate": 1.935436887358384e-05, "loss": 0.4933, "step": 2603 }, { "epoch": 0.35727515949783906, "grad_norm": 1.1484375, "learning_rate": 1.9353858514523072e-05, "loss": 0.5079, "step": 2604 }, { "epoch": 0.35741236194004256, "grad_norm": 1.296875, "learning_rate": 1.9353347960561737e-05, "loss": 0.6008, "step": 2605 }, { "epoch": 0.357549564382246, "grad_norm": 1.2265625, "learning_rate": 1.935283721171047e-05, "loss": 0.6402, "step": 2606 }, { "epoch": 0.3576867668244495, "grad_norm": 1.296875, "learning_rate": 1.935232626797992e-05, "loss": 0.5093, "step": 2607 }, { "epoch": 0.35782396926665294, "grad_norm": 1.2890625, "learning_rate": 1.9351815129380723e-05, "loss": 0.5959, "step": 2608 }, { "epoch": 0.35796117170885644, "grad_norm": 1.15625, "learning_rate": 1.9351303795923532e-05, "loss": 0.5071, "step": 2609 }, { "epoch": 0.3580983741510599, "grad_norm": 1.1875, "learning_rate": 1.9350792267619005e-05, "loss": 0.4926, "step": 2610 }, { "epoch": 0.3582355765932634, "grad_norm": 1.3046875, "learning_rate": 1.9350280544477802e-05, "loss": 0.5496, "step": 2611 }, { "epoch": 0.3583727790354668, "grad_norm": 1.2265625, "learning_rate": 1.9349768626510577e-05, "loss": 0.5642, "step": 2612 }, { "epoch": 0.3585099814776703, "grad_norm": 1.2421875, "learning_rate": 1.9349256513728004e-05, "loss": 0.5526, "step": 2613 }, { "epoch": 0.35864718391987377, "grad_norm": 1.4296875, "learning_rate": 1.9348744206140752e-05, "loss": 0.6373, "step": 2614 }, { "epoch": 0.35878438636207727, "grad_norm": 1.40625, "learning_rate": 1.9348231703759496e-05, "loss": 0.5494, "step": 2615 }, { "epoch": 0.3589215888042807, "grad_norm": 1.3671875, "learning_rate": 1.934771900659491e-05, "loss": 0.7986, "step": 2616 }, { "epoch": 0.3590587912464842, "grad_norm": 1.25, "learning_rate": 1.9347206114657683e-05, "loss": 0.5412, "step": 2617 }, { "epoch": 0.35919599368868765, "grad_norm": 1.3984375, "learning_rate": 1.9346693027958497e-05, "loss": 0.5828, "step": 2618 }, { "epoch": 0.35933319613089115, "grad_norm": 1.296875, "learning_rate": 1.9346179746508043e-05, "loss": 0.5576, "step": 2619 }, { "epoch": 0.3594703985730946, "grad_norm": 1.4140625, "learning_rate": 1.9345666270317027e-05, "loss": 0.6598, "step": 2620 }, { "epoch": 0.3596076010152981, "grad_norm": 1.34375, "learning_rate": 1.9345152599396127e-05, "loss": 0.6156, "step": 2621 }, { "epoch": 0.35974480345750154, "grad_norm": 1.140625, "learning_rate": 1.934463873375607e-05, "loss": 0.5296, "step": 2622 }, { "epoch": 0.35988200589970504, "grad_norm": 1.1640625, "learning_rate": 1.934412467340754e-05, "loss": 0.5629, "step": 2623 }, { "epoch": 0.3600192083419085, "grad_norm": 1.2578125, "learning_rate": 1.934361041836126e-05, "loss": 0.5432, "step": 2624 }, { "epoch": 0.360156410784112, "grad_norm": 1.2265625, "learning_rate": 1.9343095968627953e-05, "loss": 0.5852, "step": 2625 }, { "epoch": 0.3602936132263154, "grad_norm": 0.96875, "learning_rate": 1.9342581324218325e-05, "loss": 0.3652, "step": 2626 }, { "epoch": 0.3604308156685189, "grad_norm": 1.2265625, "learning_rate": 1.9342066485143104e-05, "loss": 0.5776, "step": 2627 }, { "epoch": 0.36056801811072237, "grad_norm": 1.125, "learning_rate": 1.9341551451413013e-05, "loss": 0.4942, "step": 2628 }, { "epoch": 0.36070522055292586, "grad_norm": 1.1484375, "learning_rate": 1.9341036223038796e-05, "loss": 0.4865, "step": 2629 }, { "epoch": 0.3608424229951293, "grad_norm": 1.2578125, "learning_rate": 1.9340520800031174e-05, "loss": 0.5632, "step": 2630 }, { "epoch": 0.3609796254373328, "grad_norm": 1.28125, "learning_rate": 1.9340005182400896e-05, "loss": 0.5011, "step": 2631 }, { "epoch": 0.36111682787953625, "grad_norm": 1.3125, "learning_rate": 1.93394893701587e-05, "loss": 0.5922, "step": 2632 }, { "epoch": 0.36125403032173975, "grad_norm": 1.3046875, "learning_rate": 1.933897336331534e-05, "loss": 0.5665, "step": 2633 }, { "epoch": 0.3613912327639432, "grad_norm": 1.234375, "learning_rate": 1.933845716188156e-05, "loss": 0.5899, "step": 2634 }, { "epoch": 0.3615284352061467, "grad_norm": 1.1796875, "learning_rate": 1.9337940765868122e-05, "loss": 0.5341, "step": 2635 }, { "epoch": 0.36166563764835014, "grad_norm": 1.1484375, "learning_rate": 1.9337424175285782e-05, "loss": 0.5009, "step": 2636 }, { "epoch": 0.36180284009055363, "grad_norm": 1.1640625, "learning_rate": 1.9336907390145304e-05, "loss": 0.5848, "step": 2637 }, { "epoch": 0.3619400425327571, "grad_norm": 1.1796875, "learning_rate": 1.933639041045746e-05, "loss": 0.5048, "step": 2638 }, { "epoch": 0.3620772449749606, "grad_norm": 1.234375, "learning_rate": 1.933587323623302e-05, "loss": 0.6021, "step": 2639 }, { "epoch": 0.362214447417164, "grad_norm": 1.25, "learning_rate": 1.933535586748276e-05, "loss": 0.5607, "step": 2640 }, { "epoch": 0.3623516498593675, "grad_norm": 1.0390625, "learning_rate": 1.9334838304217457e-05, "loss": 0.4096, "step": 2641 }, { "epoch": 0.36248885230157096, "grad_norm": 1.21875, "learning_rate": 1.9334320546447897e-05, "loss": 0.5119, "step": 2642 }, { "epoch": 0.36262605474377446, "grad_norm": 1.1484375, "learning_rate": 1.9333802594184868e-05, "loss": 0.5386, "step": 2643 }, { "epoch": 0.3627632571859779, "grad_norm": 1.2734375, "learning_rate": 1.9333284447439164e-05, "loss": 0.5718, "step": 2644 }, { "epoch": 0.3629004596281814, "grad_norm": 1.3125, "learning_rate": 1.933276610622158e-05, "loss": 0.6035, "step": 2645 }, { "epoch": 0.36303766207038485, "grad_norm": 1.171875, "learning_rate": 1.9332247570542917e-05, "loss": 0.4875, "step": 2646 }, { "epoch": 0.36317486451258835, "grad_norm": 1.390625, "learning_rate": 1.9331728840413978e-05, "loss": 0.5718, "step": 2647 }, { "epoch": 0.3633120669547918, "grad_norm": 1.1953125, "learning_rate": 1.9331209915845575e-05, "loss": 0.51, "step": 2648 }, { "epoch": 0.3634492693969953, "grad_norm": 1.2578125, "learning_rate": 1.9330690796848516e-05, "loss": 0.6179, "step": 2649 }, { "epoch": 0.36358647183919873, "grad_norm": 1.140625, "learning_rate": 1.933017148343362e-05, "loss": 0.4605, "step": 2650 }, { "epoch": 0.36372367428140223, "grad_norm": 1.2109375, "learning_rate": 1.9329651975611707e-05, "loss": 0.5562, "step": 2651 }, { "epoch": 0.3638608767236057, "grad_norm": 1.171875, "learning_rate": 1.9329132273393604e-05, "loss": 0.5061, "step": 2652 }, { "epoch": 0.3639980791658092, "grad_norm": 1.1640625, "learning_rate": 1.9328612376790134e-05, "loss": 0.5163, "step": 2653 }, { "epoch": 0.3641352816080126, "grad_norm": 1.3046875, "learning_rate": 1.9328092285812136e-05, "loss": 0.5888, "step": 2654 }, { "epoch": 0.3642724840502161, "grad_norm": 1.1796875, "learning_rate": 1.9327572000470443e-05, "loss": 0.5074, "step": 2655 }, { "epoch": 0.36440968649241956, "grad_norm": 1.09375, "learning_rate": 1.9327051520775896e-05, "loss": 0.4706, "step": 2656 }, { "epoch": 0.36454688893462306, "grad_norm": 1.1171875, "learning_rate": 1.9326530846739343e-05, "loss": 0.5082, "step": 2657 }, { "epoch": 0.3646840913768265, "grad_norm": 1.1875, "learning_rate": 1.932600997837163e-05, "loss": 0.5504, "step": 2658 }, { "epoch": 0.36482129381903, "grad_norm": 1.21875, "learning_rate": 1.932548891568361e-05, "loss": 0.5311, "step": 2659 }, { "epoch": 0.36495849626123344, "grad_norm": 1.09375, "learning_rate": 1.932496765868614e-05, "loss": 0.488, "step": 2660 }, { "epoch": 0.36509569870343694, "grad_norm": 1.1875, "learning_rate": 1.9324446207390088e-05, "loss": 0.5231, "step": 2661 }, { "epoch": 0.3652329011456404, "grad_norm": 1.2578125, "learning_rate": 1.9323924561806307e-05, "loss": 0.527, "step": 2662 }, { "epoch": 0.3653701035878439, "grad_norm": 1.1171875, "learning_rate": 1.9323402721945674e-05, "loss": 0.4927, "step": 2663 }, { "epoch": 0.36550730603004733, "grad_norm": 1.3671875, "learning_rate": 1.932288068781906e-05, "loss": 0.6319, "step": 2664 }, { "epoch": 0.36564450847225083, "grad_norm": 1.3046875, "learning_rate": 1.9322358459437343e-05, "loss": 0.5292, "step": 2665 }, { "epoch": 0.36578171091445427, "grad_norm": 1.3046875, "learning_rate": 1.9321836036811405e-05, "loss": 0.5789, "step": 2666 }, { "epoch": 0.36591891335665777, "grad_norm": 1.2578125, "learning_rate": 1.932131341995213e-05, "loss": 0.6023, "step": 2667 }, { "epoch": 0.3660561157988612, "grad_norm": 1.21875, "learning_rate": 1.9320790608870412e-05, "loss": 0.5579, "step": 2668 }, { "epoch": 0.3661933182410647, "grad_norm": 1.2265625, "learning_rate": 1.932026760357714e-05, "loss": 0.5251, "step": 2669 }, { "epoch": 0.36633052068326816, "grad_norm": 1.125, "learning_rate": 1.9319744404083208e-05, "loss": 0.495, "step": 2670 }, { "epoch": 0.36646772312547166, "grad_norm": 1.09375, "learning_rate": 1.9319221010399525e-05, "loss": 0.473, "step": 2671 }, { "epoch": 0.3666049255676751, "grad_norm": 1.2421875, "learning_rate": 1.9318697422536992e-05, "loss": 0.5347, "step": 2672 }, { "epoch": 0.3667421280098786, "grad_norm": 1.203125, "learning_rate": 1.931817364050652e-05, "loss": 0.5705, "step": 2673 }, { "epoch": 0.36687933045208204, "grad_norm": 1.1953125, "learning_rate": 1.9317649664319023e-05, "loss": 0.5337, "step": 2674 }, { "epoch": 0.36701653289428554, "grad_norm": 1.140625, "learning_rate": 1.931712549398542e-05, "loss": 0.4495, "step": 2675 }, { "epoch": 0.367153735336489, "grad_norm": 1.1875, "learning_rate": 1.9316601129516627e-05, "loss": 0.5376, "step": 2676 }, { "epoch": 0.3672909377786925, "grad_norm": 1.1953125, "learning_rate": 1.931607657092358e-05, "loss": 0.4948, "step": 2677 }, { "epoch": 0.3674281402208959, "grad_norm": 1.28125, "learning_rate": 1.93155518182172e-05, "loss": 0.6024, "step": 2678 }, { "epoch": 0.3675653426630994, "grad_norm": 1.265625, "learning_rate": 1.9315026871408426e-05, "loss": 0.5686, "step": 2679 }, { "epoch": 0.36770254510530287, "grad_norm": 1.21875, "learning_rate": 1.931450173050819e-05, "loss": 0.5229, "step": 2680 }, { "epoch": 0.36783974754750637, "grad_norm": 1.28125, "learning_rate": 1.931397639552744e-05, "loss": 0.5511, "step": 2681 }, { "epoch": 0.3679769499897098, "grad_norm": 1.28125, "learning_rate": 1.931345086647712e-05, "loss": 0.5318, "step": 2682 }, { "epoch": 0.3681141524319133, "grad_norm": 1.359375, "learning_rate": 1.9312925143368185e-05, "loss": 0.6072, "step": 2683 }, { "epoch": 0.36825135487411675, "grad_norm": 1.25, "learning_rate": 1.931239922621158e-05, "loss": 0.4805, "step": 2684 }, { "epoch": 0.36838855731632025, "grad_norm": 1.40625, "learning_rate": 1.931187311501827e-05, "loss": 0.5757, "step": 2685 }, { "epoch": 0.3685257597585237, "grad_norm": 1.28125, "learning_rate": 1.931134680979921e-05, "loss": 0.5587, "step": 2686 }, { "epoch": 0.3686629622007272, "grad_norm": 1.234375, "learning_rate": 1.931082031056538e-05, "loss": 0.5744, "step": 2687 }, { "epoch": 0.36880016464293064, "grad_norm": 1.296875, "learning_rate": 1.9310293617327734e-05, "loss": 0.4884, "step": 2688 }, { "epoch": 0.36893736708513414, "grad_norm": 1.1953125, "learning_rate": 1.9309766730097257e-05, "loss": 0.4676, "step": 2689 }, { "epoch": 0.3690745695273376, "grad_norm": 1.1953125, "learning_rate": 1.9309239648884922e-05, "loss": 0.4916, "step": 2690 }, { "epoch": 0.3692117719695411, "grad_norm": 1.375, "learning_rate": 1.9308712373701717e-05, "loss": 0.6662, "step": 2691 }, { "epoch": 0.3693489744117445, "grad_norm": 1.390625, "learning_rate": 1.9308184904558623e-05, "loss": 0.6097, "step": 2692 }, { "epoch": 0.369486176853948, "grad_norm": 1.28125, "learning_rate": 1.9307657241466634e-05, "loss": 0.5953, "step": 2693 }, { "epoch": 0.36962337929615147, "grad_norm": 1.1953125, "learning_rate": 1.930712938443674e-05, "loss": 0.533, "step": 2694 }, { "epoch": 0.36976058173835497, "grad_norm": 1.2578125, "learning_rate": 1.930660133347995e-05, "loss": 0.5399, "step": 2695 }, { "epoch": 0.3698977841805584, "grad_norm": 1.2265625, "learning_rate": 1.9306073088607257e-05, "loss": 0.5554, "step": 2696 }, { "epoch": 0.3700349866227619, "grad_norm": 1.3359375, "learning_rate": 1.9305544649829668e-05, "loss": 0.6073, "step": 2697 }, { "epoch": 0.37017218906496535, "grad_norm": 1.375, "learning_rate": 1.93050160171582e-05, "loss": 0.5545, "step": 2698 }, { "epoch": 0.37030939150716885, "grad_norm": 1.2890625, "learning_rate": 1.9304487190603862e-05, "loss": 0.5598, "step": 2699 }, { "epoch": 0.3704465939493723, "grad_norm": 1.1328125, "learning_rate": 1.9303958170177676e-05, "loss": 0.4086, "step": 2700 }, { "epoch": 0.3705837963915758, "grad_norm": 1.171875, "learning_rate": 1.9303428955890663e-05, "loss": 0.5356, "step": 2701 }, { "epoch": 0.37072099883377924, "grad_norm": 1.3125, "learning_rate": 1.930289954775385e-05, "loss": 0.5624, "step": 2702 }, { "epoch": 0.37085820127598274, "grad_norm": 1.234375, "learning_rate": 1.930236994577827e-05, "loss": 0.5053, "step": 2703 }, { "epoch": 0.3709954037181862, "grad_norm": 1.4609375, "learning_rate": 1.9301840149974954e-05, "loss": 0.6657, "step": 2704 }, { "epoch": 0.3711326061603897, "grad_norm": 1.265625, "learning_rate": 1.9301310160354946e-05, "loss": 0.4869, "step": 2705 }, { "epoch": 0.3712698086025931, "grad_norm": 1.2421875, "learning_rate": 1.9300779976929288e-05, "loss": 0.5775, "step": 2706 }, { "epoch": 0.3714070110447966, "grad_norm": 1.234375, "learning_rate": 1.9300249599709023e-05, "loss": 0.5321, "step": 2707 }, { "epoch": 0.37154421348700006, "grad_norm": 1.21875, "learning_rate": 1.9299719028705207e-05, "loss": 0.4868, "step": 2708 }, { "epoch": 0.37168141592920356, "grad_norm": 1.2265625, "learning_rate": 1.929918826392889e-05, "loss": 0.5485, "step": 2709 }, { "epoch": 0.371818618371407, "grad_norm": 1.25, "learning_rate": 1.929865730539114e-05, "loss": 0.5959, "step": 2710 }, { "epoch": 0.3719558208136105, "grad_norm": 1.203125, "learning_rate": 1.9298126153103013e-05, "loss": 0.5255, "step": 2711 }, { "epoch": 0.37209302325581395, "grad_norm": 1.1953125, "learning_rate": 1.9297594807075574e-05, "loss": 0.5274, "step": 2712 }, { "epoch": 0.37223022569801745, "grad_norm": 1.078125, "learning_rate": 1.92970632673199e-05, "loss": 0.4563, "step": 2713 }, { "epoch": 0.3723674281402209, "grad_norm": 1.140625, "learning_rate": 1.9296531533847064e-05, "loss": 0.5017, "step": 2714 }, { "epoch": 0.3725046305824244, "grad_norm": 1.2421875, "learning_rate": 1.9295999606668148e-05, "loss": 0.5456, "step": 2715 }, { "epoch": 0.37264183302462783, "grad_norm": 1.1875, "learning_rate": 1.9295467485794234e-05, "loss": 0.5603, "step": 2716 }, { "epoch": 0.37277903546683133, "grad_norm": 1.1015625, "learning_rate": 1.9294935171236407e-05, "loss": 0.4995, "step": 2717 }, { "epoch": 0.3729162379090348, "grad_norm": 1.3046875, "learning_rate": 1.929440266300576e-05, "loss": 0.6243, "step": 2718 }, { "epoch": 0.3730534403512383, "grad_norm": 1.2265625, "learning_rate": 1.9293869961113388e-05, "loss": 0.5656, "step": 2719 }, { "epoch": 0.3731906427934417, "grad_norm": 1.2421875, "learning_rate": 1.9293337065570394e-05, "loss": 0.5534, "step": 2720 }, { "epoch": 0.3733278452356452, "grad_norm": 1.328125, "learning_rate": 1.929280397638788e-05, "loss": 0.5305, "step": 2721 }, { "epoch": 0.37346504767784866, "grad_norm": 1.0859375, "learning_rate": 1.929227069357695e-05, "loss": 0.4737, "step": 2722 }, { "epoch": 0.37360225012005216, "grad_norm": 1.375, "learning_rate": 1.9291737217148722e-05, "loss": 0.6676, "step": 2723 }, { "epoch": 0.3737394525622556, "grad_norm": 1.3125, "learning_rate": 1.9291203547114306e-05, "loss": 0.5446, "step": 2724 }, { "epoch": 0.3738766550044591, "grad_norm": 1.1875, "learning_rate": 1.929066968348482e-05, "loss": 0.5167, "step": 2725 }, { "epoch": 0.37401385744666255, "grad_norm": 1.3046875, "learning_rate": 1.92901356262714e-05, "loss": 0.5728, "step": 2726 }, { "epoch": 0.37415105988886604, "grad_norm": 1.28125, "learning_rate": 1.928960137548516e-05, "loss": 0.6282, "step": 2727 }, { "epoch": 0.3742882623310695, "grad_norm": 1.1015625, "learning_rate": 1.928906693113724e-05, "loss": 0.5004, "step": 2728 }, { "epoch": 0.374425464773273, "grad_norm": 1.203125, "learning_rate": 1.9288532293238772e-05, "loss": 0.5481, "step": 2729 }, { "epoch": 0.37456266721547643, "grad_norm": 1.375, "learning_rate": 1.9287997461800897e-05, "loss": 0.5953, "step": 2730 }, { "epoch": 0.37469986965767993, "grad_norm": 1.2578125, "learning_rate": 1.928746243683476e-05, "loss": 0.4912, "step": 2731 }, { "epoch": 0.3748370720998834, "grad_norm": 1.2421875, "learning_rate": 1.9286927218351508e-05, "loss": 0.5929, "step": 2732 }, { "epoch": 0.37497427454208687, "grad_norm": 1.1796875, "learning_rate": 1.9286391806362292e-05, "loss": 0.4715, "step": 2733 }, { "epoch": 0.3751114769842903, "grad_norm": 1.3984375, "learning_rate": 1.928585620087827e-05, "loss": 0.6462, "step": 2734 }, { "epoch": 0.3752486794264938, "grad_norm": 1.2109375, "learning_rate": 1.9285320401910602e-05, "loss": 0.4759, "step": 2735 }, { "epoch": 0.37538588186869726, "grad_norm": 1.2421875, "learning_rate": 1.9284784409470453e-05, "loss": 0.5322, "step": 2736 }, { "epoch": 0.37552308431090076, "grad_norm": 1.265625, "learning_rate": 1.9284248223568985e-05, "loss": 0.5668, "step": 2737 }, { "epoch": 0.3756602867531042, "grad_norm": 1.2109375, "learning_rate": 1.928371184421738e-05, "loss": 0.5129, "step": 2738 }, { "epoch": 0.3757974891953077, "grad_norm": 1.15625, "learning_rate": 1.9283175271426806e-05, "loss": 0.4796, "step": 2739 }, { "epoch": 0.37593469163751114, "grad_norm": 1.21875, "learning_rate": 1.9282638505208446e-05, "loss": 0.4921, "step": 2740 }, { "epoch": 0.37607189407971464, "grad_norm": 1.265625, "learning_rate": 1.9282101545573483e-05, "loss": 0.5381, "step": 2741 }, { "epoch": 0.3762090965219181, "grad_norm": 1.375, "learning_rate": 1.928156439253311e-05, "loss": 0.5969, "step": 2742 }, { "epoch": 0.3763462989641216, "grad_norm": 1.234375, "learning_rate": 1.9281027046098516e-05, "loss": 0.5222, "step": 2743 }, { "epoch": 0.376483501406325, "grad_norm": 1.1953125, "learning_rate": 1.9280489506280896e-05, "loss": 0.5047, "step": 2744 }, { "epoch": 0.3766207038485285, "grad_norm": 1.2890625, "learning_rate": 1.927995177309145e-05, "loss": 0.5123, "step": 2745 }, { "epoch": 0.37675790629073197, "grad_norm": 1.2421875, "learning_rate": 1.9279413846541386e-05, "loss": 0.5208, "step": 2746 }, { "epoch": 0.37689510873293547, "grad_norm": 1.2109375, "learning_rate": 1.9278875726641912e-05, "loss": 0.535, "step": 2747 }, { "epoch": 0.3770323111751389, "grad_norm": 1.1875, "learning_rate": 1.9278337413404238e-05, "loss": 0.5199, "step": 2748 }, { "epoch": 0.3771695136173424, "grad_norm": 1.1953125, "learning_rate": 1.9277798906839584e-05, "loss": 0.5178, "step": 2749 }, { "epoch": 0.37730671605954585, "grad_norm": 1.1484375, "learning_rate": 1.9277260206959165e-05, "loss": 0.4242, "step": 2750 }, { "epoch": 0.37744391850174935, "grad_norm": 1.2421875, "learning_rate": 1.927672131377421e-05, "loss": 0.5462, "step": 2751 }, { "epoch": 0.3775811209439528, "grad_norm": 1.2890625, "learning_rate": 1.9276182227295943e-05, "loss": 0.5696, "step": 2752 }, { "epoch": 0.3777183233861563, "grad_norm": 1.21875, "learning_rate": 1.9275642947535606e-05, "loss": 0.5813, "step": 2753 }, { "epoch": 0.37785552582835974, "grad_norm": 1.25, "learning_rate": 1.9275103474504428e-05, "loss": 0.5447, "step": 2754 }, { "epoch": 0.37799272827056324, "grad_norm": 1.2421875, "learning_rate": 1.9274563808213646e-05, "loss": 0.5099, "step": 2755 }, { "epoch": 0.3781299307127667, "grad_norm": 1.1640625, "learning_rate": 1.9274023948674517e-05, "loss": 0.4849, "step": 2756 }, { "epoch": 0.3782671331549702, "grad_norm": 1.2421875, "learning_rate": 1.9273483895898276e-05, "loss": 0.5551, "step": 2757 }, { "epoch": 0.3784043355971736, "grad_norm": 1.21875, "learning_rate": 1.9272943649896184e-05, "loss": 0.5666, "step": 2758 }, { "epoch": 0.3785415380393771, "grad_norm": 1.25, "learning_rate": 1.92724032106795e-05, "loss": 0.5684, "step": 2759 }, { "epoch": 0.37867874048158057, "grad_norm": 1.2734375, "learning_rate": 1.927186257825948e-05, "loss": 0.5769, "step": 2760 }, { "epoch": 0.37881594292378407, "grad_norm": 1.2109375, "learning_rate": 1.927132175264739e-05, "loss": 0.5223, "step": 2761 }, { "epoch": 0.3789531453659875, "grad_norm": 1.234375, "learning_rate": 1.9270780733854496e-05, "loss": 0.5663, "step": 2762 }, { "epoch": 0.379090347808191, "grad_norm": 1.2265625, "learning_rate": 1.9270239521892073e-05, "loss": 0.5905, "step": 2763 }, { "epoch": 0.37922755025039445, "grad_norm": 1.28125, "learning_rate": 1.92696981167714e-05, "loss": 0.6243, "step": 2764 }, { "epoch": 0.37936475269259795, "grad_norm": 1.2421875, "learning_rate": 1.9269156518503756e-05, "loss": 0.5719, "step": 2765 }, { "epoch": 0.3795019551348014, "grad_norm": 1.15625, "learning_rate": 1.926861472710043e-05, "loss": 0.481, "step": 2766 }, { "epoch": 0.3796391575770049, "grad_norm": 1.2421875, "learning_rate": 1.9268072742572703e-05, "loss": 0.5168, "step": 2767 }, { "epoch": 0.37977636001920834, "grad_norm": 1.15625, "learning_rate": 1.9267530564931874e-05, "loss": 0.4735, "step": 2768 }, { "epoch": 0.37991356246141184, "grad_norm": 1.1640625, "learning_rate": 1.9266988194189236e-05, "loss": 0.4725, "step": 2769 }, { "epoch": 0.3800507649036153, "grad_norm": 1.1796875, "learning_rate": 1.9266445630356094e-05, "loss": 0.5059, "step": 2770 }, { "epoch": 0.3801879673458188, "grad_norm": 1.1015625, "learning_rate": 1.926590287344375e-05, "loss": 0.4395, "step": 2771 }, { "epoch": 0.3803251697880222, "grad_norm": 1.296875, "learning_rate": 1.9265359923463517e-05, "loss": 0.4734, "step": 2772 }, { "epoch": 0.3804623722302257, "grad_norm": 1.234375, "learning_rate": 1.9264816780426704e-05, "loss": 0.5285, "step": 2773 }, { "epoch": 0.38059957467242916, "grad_norm": 1.0703125, "learning_rate": 1.926427344434463e-05, "loss": 0.4697, "step": 2774 }, { "epoch": 0.38073677711463266, "grad_norm": 1.140625, "learning_rate": 1.926372991522862e-05, "loss": 0.5037, "step": 2775 }, { "epoch": 0.3808739795568361, "grad_norm": 1.28125, "learning_rate": 1.926318619308999e-05, "loss": 0.4941, "step": 2776 }, { "epoch": 0.3810111819990396, "grad_norm": 1.3203125, "learning_rate": 1.9262642277940074e-05, "loss": 0.5619, "step": 2777 }, { "epoch": 0.38114838444124305, "grad_norm": 1.171875, "learning_rate": 1.9262098169790205e-05, "loss": 0.5454, "step": 2778 }, { "epoch": 0.38128558688344655, "grad_norm": 1.1953125, "learning_rate": 1.9261553868651725e-05, "loss": 0.477, "step": 2779 }, { "epoch": 0.38142278932565, "grad_norm": 1.28125, "learning_rate": 1.926100937453597e-05, "loss": 0.5982, "step": 2780 }, { "epoch": 0.3815599917678535, "grad_norm": 1.3125, "learning_rate": 1.9260464687454282e-05, "loss": 0.5347, "step": 2781 }, { "epoch": 0.38169719421005693, "grad_norm": 1.140625, "learning_rate": 1.925991980741802e-05, "loss": 0.464, "step": 2782 }, { "epoch": 0.38183439665226043, "grad_norm": 1.2109375, "learning_rate": 1.925937473443853e-05, "loss": 0.494, "step": 2783 }, { "epoch": 0.3819715990944639, "grad_norm": 1.375, "learning_rate": 1.925882946852717e-05, "loss": 0.6286, "step": 2784 }, { "epoch": 0.3821088015366674, "grad_norm": 1.265625, "learning_rate": 1.9258284009695303e-05, "loss": 0.5907, "step": 2785 }, { "epoch": 0.3822460039788708, "grad_norm": 1.296875, "learning_rate": 1.9257738357954295e-05, "loss": 0.5485, "step": 2786 }, { "epoch": 0.3823832064210743, "grad_norm": 1.2734375, "learning_rate": 1.9257192513315512e-05, "loss": 0.6222, "step": 2787 }, { "epoch": 0.38252040886327776, "grad_norm": 1.2265625, "learning_rate": 1.925664647579033e-05, "loss": 0.562, "step": 2788 }, { "epoch": 0.38265761130548126, "grad_norm": 1.171875, "learning_rate": 1.925610024539013e-05, "loss": 0.4963, "step": 2789 }, { "epoch": 0.3827948137476847, "grad_norm": 1.1953125, "learning_rate": 1.9255553822126286e-05, "loss": 0.5105, "step": 2790 }, { "epoch": 0.3829320161898882, "grad_norm": 1.234375, "learning_rate": 1.925500720601019e-05, "loss": 0.5701, "step": 2791 }, { "epoch": 0.38306921863209165, "grad_norm": 1.203125, "learning_rate": 1.9254460397053227e-05, "loss": 0.5402, "step": 2792 }, { "epoch": 0.38320642107429514, "grad_norm": 1.2265625, "learning_rate": 1.9253913395266794e-05, "loss": 0.5687, "step": 2793 }, { "epoch": 0.3833436235164986, "grad_norm": 1.125, "learning_rate": 1.9253366200662286e-05, "loss": 0.46, "step": 2794 }, { "epoch": 0.3834808259587021, "grad_norm": 1.2890625, "learning_rate": 1.9252818813251104e-05, "loss": 0.5474, "step": 2795 }, { "epoch": 0.38361802840090553, "grad_norm": 1.171875, "learning_rate": 1.925227123304466e-05, "loss": 0.4841, "step": 2796 }, { "epoch": 0.38375523084310903, "grad_norm": 1.203125, "learning_rate": 1.925172346005435e-05, "loss": 0.4592, "step": 2797 }, { "epoch": 0.3838924332853125, "grad_norm": 1.1796875, "learning_rate": 1.9251175494291604e-05, "loss": 0.5109, "step": 2798 }, { "epoch": 0.38402963572751597, "grad_norm": 1.1640625, "learning_rate": 1.925062733576783e-05, "loss": 0.5186, "step": 2799 }, { "epoch": 0.3841668381697194, "grad_norm": 1.15625, "learning_rate": 1.925007898449445e-05, "loss": 0.4857, "step": 2800 }, { "epoch": 0.3843040406119229, "grad_norm": 1.421875, "learning_rate": 1.924953044048289e-05, "loss": 0.5841, "step": 2801 }, { "epoch": 0.38444124305412636, "grad_norm": 1.28125, "learning_rate": 1.924898170374458e-05, "loss": 0.541, "step": 2802 }, { "epoch": 0.38457844549632986, "grad_norm": 1.265625, "learning_rate": 1.9248432774290958e-05, "loss": 0.6067, "step": 2803 }, { "epoch": 0.3847156479385333, "grad_norm": 1.2109375, "learning_rate": 1.9247883652133457e-05, "loss": 0.5441, "step": 2804 }, { "epoch": 0.3848528503807368, "grad_norm": 1.203125, "learning_rate": 1.924733433728352e-05, "loss": 0.5739, "step": 2805 }, { "epoch": 0.38499005282294024, "grad_norm": 1.2109375, "learning_rate": 1.924678482975259e-05, "loss": 0.4886, "step": 2806 }, { "epoch": 0.38512725526514374, "grad_norm": 1.1875, "learning_rate": 1.9246235129552122e-05, "loss": 0.5628, "step": 2807 }, { "epoch": 0.3852644577073472, "grad_norm": 1.3359375, "learning_rate": 1.924568523669357e-05, "loss": 0.6201, "step": 2808 }, { "epoch": 0.3854016601495507, "grad_norm": 1.171875, "learning_rate": 1.924513515118838e-05, "loss": 0.5009, "step": 2809 }, { "epoch": 0.38553886259175413, "grad_norm": 1.1796875, "learning_rate": 1.9244584873048028e-05, "loss": 0.5078, "step": 2810 }, { "epoch": 0.3856760650339576, "grad_norm": 1.3203125, "learning_rate": 1.9244034402283974e-05, "loss": 0.5723, "step": 2811 }, { "epoch": 0.38581326747616107, "grad_norm": 1.265625, "learning_rate": 1.924348373890769e-05, "loss": 0.5577, "step": 2812 }, { "epoch": 0.38595046991836457, "grad_norm": 1.34375, "learning_rate": 1.9242932882930647e-05, "loss": 0.6097, "step": 2813 }, { "epoch": 0.386087672360568, "grad_norm": 1.078125, "learning_rate": 1.9242381834364325e-05, "loss": 0.4022, "step": 2814 }, { "epoch": 0.3862248748027715, "grad_norm": 1.1953125, "learning_rate": 1.92418305932202e-05, "loss": 0.5278, "step": 2815 }, { "epoch": 0.38636207724497496, "grad_norm": 1.1875, "learning_rate": 1.924127915950977e-05, "loss": 0.527, "step": 2816 }, { "epoch": 0.38649927968717845, "grad_norm": 1.203125, "learning_rate": 1.924072753324451e-05, "loss": 0.5411, "step": 2817 }, { "epoch": 0.3866364821293819, "grad_norm": 1.2734375, "learning_rate": 1.9240175714435927e-05, "loss": 0.537, "step": 2818 }, { "epoch": 0.3867736845715854, "grad_norm": 1.1953125, "learning_rate": 1.923962370309551e-05, "loss": 0.5277, "step": 2819 }, { "epoch": 0.38691088701378884, "grad_norm": 1.28125, "learning_rate": 1.923907149923477e-05, "loss": 0.5968, "step": 2820 }, { "epoch": 0.38704808945599234, "grad_norm": 1.3203125, "learning_rate": 1.92385191028652e-05, "loss": 0.5611, "step": 2821 }, { "epoch": 0.3871852918981958, "grad_norm": 1.234375, "learning_rate": 1.9237966513998323e-05, "loss": 0.5122, "step": 2822 }, { "epoch": 0.3873224943403993, "grad_norm": 1.3046875, "learning_rate": 1.9237413732645646e-05, "loss": 0.5812, "step": 2823 }, { "epoch": 0.3874596967826027, "grad_norm": 1.2578125, "learning_rate": 1.9236860758818687e-05, "loss": 0.5805, "step": 2824 }, { "epoch": 0.3875968992248062, "grad_norm": 1.2109375, "learning_rate": 1.9236307592528966e-05, "loss": 0.5327, "step": 2825 }, { "epoch": 0.38773410166700967, "grad_norm": 1.1953125, "learning_rate": 1.9235754233788016e-05, "loss": 0.5334, "step": 2826 }, { "epoch": 0.38787130410921317, "grad_norm": 1.1484375, "learning_rate": 1.9235200682607364e-05, "loss": 0.5191, "step": 2827 }, { "epoch": 0.3880085065514166, "grad_norm": 1.296875, "learning_rate": 1.923464693899854e-05, "loss": 0.525, "step": 2828 }, { "epoch": 0.3881457089936201, "grad_norm": 1.1796875, "learning_rate": 1.9234093002973086e-05, "loss": 0.4115, "step": 2829 }, { "epoch": 0.38828291143582355, "grad_norm": 1.171875, "learning_rate": 1.923353887454254e-05, "loss": 0.5082, "step": 2830 }, { "epoch": 0.38842011387802705, "grad_norm": 1.2265625, "learning_rate": 1.9232984553718458e-05, "loss": 0.4886, "step": 2831 }, { "epoch": 0.3885573163202305, "grad_norm": 1.234375, "learning_rate": 1.9232430040512377e-05, "loss": 0.5036, "step": 2832 }, { "epoch": 0.388694518762434, "grad_norm": 1.2578125, "learning_rate": 1.9231875334935858e-05, "loss": 0.6137, "step": 2833 }, { "epoch": 0.38883172120463744, "grad_norm": 1.3125, "learning_rate": 1.9231320437000464e-05, "loss": 0.6344, "step": 2834 }, { "epoch": 0.38896892364684094, "grad_norm": 1.1953125, "learning_rate": 1.9230765346717744e-05, "loss": 0.5503, "step": 2835 }, { "epoch": 0.3891061260890444, "grad_norm": 1.2421875, "learning_rate": 1.9230210064099278e-05, "loss": 0.5437, "step": 2836 }, { "epoch": 0.3892433285312479, "grad_norm": 1.171875, "learning_rate": 1.9229654589156623e-05, "loss": 0.5099, "step": 2837 }, { "epoch": 0.3893805309734513, "grad_norm": 1.03125, "learning_rate": 1.9229098921901363e-05, "loss": 0.4151, "step": 2838 }, { "epoch": 0.3895177334156548, "grad_norm": 1.28125, "learning_rate": 1.9228543062345072e-05, "loss": 0.5849, "step": 2839 }, { "epoch": 0.38965493585785826, "grad_norm": 1.2109375, "learning_rate": 1.9227987010499333e-05, "loss": 0.5272, "step": 2840 }, { "epoch": 0.38979213830006176, "grad_norm": 1.1328125, "learning_rate": 1.9227430766375733e-05, "loss": 0.4731, "step": 2841 }, { "epoch": 0.3899293407422652, "grad_norm": 1.0703125, "learning_rate": 1.9226874329985858e-05, "loss": 0.4607, "step": 2842 }, { "epoch": 0.3900665431844687, "grad_norm": 1.203125, "learning_rate": 1.9226317701341307e-05, "loss": 0.5107, "step": 2843 }, { "epoch": 0.39020374562667215, "grad_norm": 1.375, "learning_rate": 1.9225760880453678e-05, "loss": 0.61, "step": 2844 }, { "epoch": 0.39034094806887565, "grad_norm": 1.2890625, "learning_rate": 1.922520386733457e-05, "loss": 0.6311, "step": 2845 }, { "epoch": 0.3904781505110791, "grad_norm": 1.171875, "learning_rate": 1.922464666199559e-05, "loss": 0.4531, "step": 2846 }, { "epoch": 0.3906153529532826, "grad_norm": 1.1328125, "learning_rate": 1.922408926444835e-05, "loss": 0.4544, "step": 2847 }, { "epoch": 0.39075255539548603, "grad_norm": 1.2734375, "learning_rate": 1.922353167470446e-05, "loss": 0.6497, "step": 2848 }, { "epoch": 0.39088975783768953, "grad_norm": 1.2734375, "learning_rate": 1.9222973892775544e-05, "loss": 0.5049, "step": 2849 }, { "epoch": 0.391026960279893, "grad_norm": 1.2578125, "learning_rate": 1.9222415918673217e-05, "loss": 0.5549, "step": 2850 }, { "epoch": 0.3911641627220965, "grad_norm": 1.2421875, "learning_rate": 1.9221857752409113e-05, "loss": 0.5847, "step": 2851 }, { "epoch": 0.3913013651642999, "grad_norm": 1.3359375, "learning_rate": 1.922129939399486e-05, "loss": 0.6237, "step": 2852 }, { "epoch": 0.3914385676065034, "grad_norm": 1.28125, "learning_rate": 1.9220740843442086e-05, "loss": 0.5853, "step": 2853 }, { "epoch": 0.39157577004870686, "grad_norm": 0.99609375, "learning_rate": 1.9220182100762436e-05, "loss": 0.352, "step": 2854 }, { "epoch": 0.39171297249091036, "grad_norm": 1.2578125, "learning_rate": 1.921962316596755e-05, "loss": 0.4685, "step": 2855 }, { "epoch": 0.3918501749331138, "grad_norm": 1.3046875, "learning_rate": 1.9219064039069075e-05, "loss": 0.5537, "step": 2856 }, { "epoch": 0.3919873773753173, "grad_norm": 1.28125, "learning_rate": 1.921850472007866e-05, "loss": 0.5977, "step": 2857 }, { "epoch": 0.39212457981752075, "grad_norm": 1.328125, "learning_rate": 1.921794520900796e-05, "loss": 0.6118, "step": 2858 }, { "epoch": 0.39226178225972425, "grad_norm": 1.1484375, "learning_rate": 1.9217385505868627e-05, "loss": 0.5255, "step": 2859 }, { "epoch": 0.3923989847019277, "grad_norm": 1.15625, "learning_rate": 1.9216825610672334e-05, "loss": 0.4796, "step": 2860 }, { "epoch": 0.3925361871441312, "grad_norm": 1.171875, "learning_rate": 1.9216265523430744e-05, "loss": 0.5486, "step": 2861 }, { "epoch": 0.39267338958633463, "grad_norm": 1.3125, "learning_rate": 1.921570524415552e-05, "loss": 0.6042, "step": 2862 }, { "epoch": 0.39281059202853813, "grad_norm": 1.265625, "learning_rate": 1.9215144772858345e-05, "loss": 0.4459, "step": 2863 }, { "epoch": 0.3929477944707416, "grad_norm": 1.21875, "learning_rate": 1.9214584109550895e-05, "loss": 0.5119, "step": 2864 }, { "epoch": 0.3930849969129451, "grad_norm": 1.2890625, "learning_rate": 1.9214023254244846e-05, "loss": 0.5846, "step": 2865 }, { "epoch": 0.3932221993551485, "grad_norm": 1.265625, "learning_rate": 1.921346220695189e-05, "loss": 0.6058, "step": 2866 }, { "epoch": 0.393359401797352, "grad_norm": 1.2890625, "learning_rate": 1.921290096768372e-05, "loss": 0.5894, "step": 2867 }, { "epoch": 0.39349660423955546, "grad_norm": 1.2265625, "learning_rate": 1.9212339536452026e-05, "loss": 0.4999, "step": 2868 }, { "epoch": 0.39363380668175896, "grad_norm": 1.34375, "learning_rate": 1.9211777913268503e-05, "loss": 0.716, "step": 2869 }, { "epoch": 0.3937710091239624, "grad_norm": 1.21875, "learning_rate": 1.921121609814486e-05, "loss": 0.5131, "step": 2870 }, { "epoch": 0.3939082115661659, "grad_norm": 1.125, "learning_rate": 1.92106540910928e-05, "loss": 0.477, "step": 2871 }, { "epoch": 0.39404541400836934, "grad_norm": 1.1796875, "learning_rate": 1.921009189212403e-05, "loss": 0.5525, "step": 2872 }, { "epoch": 0.39418261645057284, "grad_norm": 1.234375, "learning_rate": 1.9209529501250272e-05, "loss": 0.5424, "step": 2873 }, { "epoch": 0.3943198188927763, "grad_norm": 1.3125, "learning_rate": 1.9208966918483235e-05, "loss": 0.6435, "step": 2874 }, { "epoch": 0.3944570213349798, "grad_norm": 1.2109375, "learning_rate": 1.9208404143834646e-05, "loss": 0.5501, "step": 2875 }, { "epoch": 0.39459422377718323, "grad_norm": 1.359375, "learning_rate": 1.9207841177316235e-05, "loss": 0.6459, "step": 2876 }, { "epoch": 0.3947314262193867, "grad_norm": 1.1640625, "learning_rate": 1.9207278018939726e-05, "loss": 0.5211, "step": 2877 }, { "epoch": 0.39486862866159017, "grad_norm": 1.2109375, "learning_rate": 1.9206714668716854e-05, "loss": 0.5413, "step": 2878 }, { "epoch": 0.39500583110379367, "grad_norm": 1.28125, "learning_rate": 1.920615112665936e-05, "loss": 0.5454, "step": 2879 }, { "epoch": 0.3951430335459971, "grad_norm": 1.265625, "learning_rate": 1.9205587392778985e-05, "loss": 0.5414, "step": 2880 }, { "epoch": 0.3952802359882006, "grad_norm": 1.328125, "learning_rate": 1.9205023467087475e-05, "loss": 0.5649, "step": 2881 }, { "epoch": 0.39541743843040406, "grad_norm": 1.1796875, "learning_rate": 1.920445934959658e-05, "loss": 0.5663, "step": 2882 }, { "epoch": 0.39555464087260755, "grad_norm": 1.1640625, "learning_rate": 1.920389504031805e-05, "loss": 0.5621, "step": 2883 }, { "epoch": 0.395691843314811, "grad_norm": 1.28125, "learning_rate": 1.9203330539263653e-05, "loss": 0.5661, "step": 2884 }, { "epoch": 0.3958290457570145, "grad_norm": 1.390625, "learning_rate": 1.9202765846445143e-05, "loss": 0.6107, "step": 2885 }, { "epoch": 0.39596624819921794, "grad_norm": 1.2421875, "learning_rate": 1.9202200961874288e-05, "loss": 0.5465, "step": 2886 }, { "epoch": 0.39610345064142144, "grad_norm": 1.2265625, "learning_rate": 1.920163588556286e-05, "loss": 0.5643, "step": 2887 }, { "epoch": 0.3962406530836249, "grad_norm": 1.1953125, "learning_rate": 1.9201070617522634e-05, "loss": 0.5147, "step": 2888 }, { "epoch": 0.3963778555258284, "grad_norm": 1.171875, "learning_rate": 1.9200505157765383e-05, "loss": 0.4667, "step": 2889 }, { "epoch": 0.3965150579680318, "grad_norm": 1.3046875, "learning_rate": 1.9199939506302894e-05, "loss": 0.525, "step": 2890 }, { "epoch": 0.3966522604102353, "grad_norm": 1.25, "learning_rate": 1.919937366314695e-05, "loss": 0.5307, "step": 2891 }, { "epoch": 0.39678946285243877, "grad_norm": 1.359375, "learning_rate": 1.9198807628309344e-05, "loss": 0.5992, "step": 2892 }, { "epoch": 0.39692666529464227, "grad_norm": 1.2734375, "learning_rate": 1.919824140180187e-05, "loss": 0.5102, "step": 2893 }, { "epoch": 0.3970638677368457, "grad_norm": 1.1953125, "learning_rate": 1.9197674983636322e-05, "loss": 0.5409, "step": 2894 }, { "epoch": 0.3972010701790492, "grad_norm": 1.234375, "learning_rate": 1.9197108373824505e-05, "loss": 0.5401, "step": 2895 }, { "epoch": 0.39733827262125265, "grad_norm": 1.328125, "learning_rate": 1.9196541572378225e-05, "loss": 0.5688, "step": 2896 }, { "epoch": 0.39747547506345615, "grad_norm": 1.109375, "learning_rate": 1.9195974579309295e-05, "loss": 0.4735, "step": 2897 }, { "epoch": 0.3976126775056596, "grad_norm": 1.1796875, "learning_rate": 1.9195407394629518e-05, "loss": 0.5461, "step": 2898 }, { "epoch": 0.3977498799478631, "grad_norm": 1.234375, "learning_rate": 1.919484001835073e-05, "loss": 0.6086, "step": 2899 }, { "epoch": 0.39788708239006654, "grad_norm": 1.2734375, "learning_rate": 1.919427245048474e-05, "loss": 0.6465, "step": 2900 }, { "epoch": 0.39802428483227004, "grad_norm": 1.1953125, "learning_rate": 1.9193704691043376e-05, "loss": 0.5363, "step": 2901 }, { "epoch": 0.3981614872744735, "grad_norm": 1.1953125, "learning_rate": 1.919313674003847e-05, "loss": 0.5137, "step": 2902 }, { "epoch": 0.398298689716677, "grad_norm": 1.2890625, "learning_rate": 1.9192568597481854e-05, "loss": 0.5817, "step": 2903 }, { "epoch": 0.3984358921588804, "grad_norm": 1.3046875, "learning_rate": 1.9192000263385368e-05, "loss": 0.6248, "step": 2904 }, { "epoch": 0.3985730946010839, "grad_norm": 1.3046875, "learning_rate": 1.9191431737760854e-05, "loss": 0.6045, "step": 2905 }, { "epoch": 0.39871029704328736, "grad_norm": 1.2421875, "learning_rate": 1.9190863020620155e-05, "loss": 0.547, "step": 2906 }, { "epoch": 0.39884749948549086, "grad_norm": 1.171875, "learning_rate": 1.9190294111975128e-05, "loss": 0.5099, "step": 2907 }, { "epoch": 0.3989847019276943, "grad_norm": 1.3828125, "learning_rate": 1.918972501183762e-05, "loss": 0.579, "step": 2908 }, { "epoch": 0.3991219043698978, "grad_norm": 1.2578125, "learning_rate": 1.9189155720219494e-05, "loss": 0.6362, "step": 2909 }, { "epoch": 0.39925910681210125, "grad_norm": 1.1953125, "learning_rate": 1.9188586237132606e-05, "loss": 0.5325, "step": 2910 }, { "epoch": 0.39939630925430475, "grad_norm": 1.2578125, "learning_rate": 1.9188016562588828e-05, "loss": 0.5034, "step": 2911 }, { "epoch": 0.3995335116965082, "grad_norm": 1.1875, "learning_rate": 1.9187446696600026e-05, "loss": 0.4969, "step": 2912 }, { "epoch": 0.3996707141387117, "grad_norm": 1.1796875, "learning_rate": 1.9186876639178077e-05, "loss": 0.569, "step": 2913 }, { "epoch": 0.39980791658091513, "grad_norm": 1.1875, "learning_rate": 1.9186306390334855e-05, "loss": 0.5462, "step": 2914 }, { "epoch": 0.39994511902311863, "grad_norm": 1.28125, "learning_rate": 1.9185735950082244e-05, "loss": 0.5745, "step": 2915 }, { "epoch": 0.4000823214653221, "grad_norm": 1.2109375, "learning_rate": 1.9185165318432135e-05, "loss": 0.5235, "step": 2916 }, { "epoch": 0.4002195239075256, "grad_norm": 1.3125, "learning_rate": 1.9184594495396407e-05, "loss": 0.6203, "step": 2917 }, { "epoch": 0.400356726349729, "grad_norm": 1.2421875, "learning_rate": 1.9184023480986962e-05, "loss": 0.5097, "step": 2918 }, { "epoch": 0.4004939287919325, "grad_norm": 1.203125, "learning_rate": 1.9183452275215698e-05, "loss": 0.569, "step": 2919 }, { "epoch": 0.40063113123413596, "grad_norm": 1.2578125, "learning_rate": 1.918288087809451e-05, "loss": 0.4921, "step": 2920 }, { "epoch": 0.40076833367633946, "grad_norm": 1.3203125, "learning_rate": 1.9182309289635316e-05, "loss": 0.6149, "step": 2921 }, { "epoch": 0.4009055361185429, "grad_norm": 1.28125, "learning_rate": 1.9181737509850013e-05, "loss": 0.5387, "step": 2922 }, { "epoch": 0.4010427385607464, "grad_norm": 1.2109375, "learning_rate": 1.9181165538750518e-05, "loss": 0.5503, "step": 2923 }, { "epoch": 0.40117994100294985, "grad_norm": 1.140625, "learning_rate": 1.9180593376348754e-05, "loss": 0.5545, "step": 2924 }, { "epoch": 0.40131714344515335, "grad_norm": 1.1640625, "learning_rate": 1.918002102265664e-05, "loss": 0.4811, "step": 2925 }, { "epoch": 0.4014543458873568, "grad_norm": 1.2109375, "learning_rate": 1.91794484776861e-05, "loss": 0.5635, "step": 2926 }, { "epoch": 0.4015915483295603, "grad_norm": 1.2734375, "learning_rate": 1.917887574144907e-05, "loss": 0.5503, "step": 2927 }, { "epoch": 0.40172875077176373, "grad_norm": 1.3203125, "learning_rate": 1.9178302813957474e-05, "loss": 0.535, "step": 2928 }, { "epoch": 0.40186595321396723, "grad_norm": 1.1484375, "learning_rate": 1.9177729695223254e-05, "loss": 0.5082, "step": 2929 }, { "epoch": 0.4020031556561707, "grad_norm": 1.203125, "learning_rate": 1.9177156385258357e-05, "loss": 0.5394, "step": 2930 }, { "epoch": 0.4021403580983742, "grad_norm": 1.1328125, "learning_rate": 1.917658288407472e-05, "loss": 0.5005, "step": 2931 }, { "epoch": 0.4022775605405776, "grad_norm": 1.2890625, "learning_rate": 1.9176009191684297e-05, "loss": 0.5413, "step": 2932 }, { "epoch": 0.4024147629827811, "grad_norm": 1.1953125, "learning_rate": 1.9175435308099044e-05, "loss": 0.5248, "step": 2933 }, { "epoch": 0.40255196542498456, "grad_norm": 1.25, "learning_rate": 1.9174861233330917e-05, "loss": 0.5655, "step": 2934 }, { "epoch": 0.40268916786718806, "grad_norm": 1.1796875, "learning_rate": 1.9174286967391876e-05, "loss": 0.5188, "step": 2935 }, { "epoch": 0.4028263703093915, "grad_norm": 1.25, "learning_rate": 1.9173712510293886e-05, "loss": 0.589, "step": 2936 }, { "epoch": 0.402963572751595, "grad_norm": 1.109375, "learning_rate": 1.917313786204892e-05, "loss": 0.5133, "step": 2937 }, { "epoch": 0.40310077519379844, "grad_norm": 1.1796875, "learning_rate": 1.9172563022668945e-05, "loss": 0.5226, "step": 2938 }, { "epoch": 0.40323797763600194, "grad_norm": 1.3203125, "learning_rate": 1.9171987992165947e-05, "loss": 0.6028, "step": 2939 }, { "epoch": 0.4033751800782054, "grad_norm": 1.25, "learning_rate": 1.9171412770551903e-05, "loss": 0.5195, "step": 2940 }, { "epoch": 0.4035123825204089, "grad_norm": 1.21875, "learning_rate": 1.91708373578388e-05, "loss": 0.4896, "step": 2941 }, { "epoch": 0.40364958496261233, "grad_norm": 1.25, "learning_rate": 1.9170261754038628e-05, "loss": 0.4908, "step": 2942 }, { "epoch": 0.40378678740481583, "grad_norm": 1.15625, "learning_rate": 1.9169685959163378e-05, "loss": 0.4738, "step": 2943 }, { "epoch": 0.40392398984701927, "grad_norm": 1.3515625, "learning_rate": 1.9169109973225048e-05, "loss": 0.6146, "step": 2944 }, { "epoch": 0.40406119228922277, "grad_norm": 1.3046875, "learning_rate": 1.9168533796235644e-05, "loss": 0.5818, "step": 2945 }, { "epoch": 0.4041983947314262, "grad_norm": 1.2421875, "learning_rate": 1.9167957428207165e-05, "loss": 0.5312, "step": 2946 }, { "epoch": 0.4043355971736297, "grad_norm": 1.21875, "learning_rate": 1.9167380869151624e-05, "loss": 0.5187, "step": 2947 }, { "epoch": 0.40447279961583316, "grad_norm": 1.1484375, "learning_rate": 1.9166804119081033e-05, "loss": 0.5037, "step": 2948 }, { "epoch": 0.40461000205803666, "grad_norm": 1.3046875, "learning_rate": 1.916622717800741e-05, "loss": 0.5542, "step": 2949 }, { "epoch": 0.4047472045002401, "grad_norm": 1.265625, "learning_rate": 1.9165650045942774e-05, "loss": 0.5443, "step": 2950 }, { "epoch": 0.4048844069424436, "grad_norm": 1.28125, "learning_rate": 1.9165072722899155e-05, "loss": 0.5755, "step": 2951 }, { "epoch": 0.40502160938464704, "grad_norm": 1.234375, "learning_rate": 1.916449520888858e-05, "loss": 0.6015, "step": 2952 }, { "epoch": 0.40515881182685054, "grad_norm": 1.203125, "learning_rate": 1.9163917503923083e-05, "loss": 0.5511, "step": 2953 }, { "epoch": 0.405296014269054, "grad_norm": 1.2578125, "learning_rate": 1.91633396080147e-05, "loss": 0.5202, "step": 2954 }, { "epoch": 0.4054332167112575, "grad_norm": 1.15625, "learning_rate": 1.916276152117547e-05, "loss": 0.4801, "step": 2955 }, { "epoch": 0.4055704191534609, "grad_norm": 1.265625, "learning_rate": 1.9162183243417446e-05, "loss": 0.5498, "step": 2956 }, { "epoch": 0.4057076215956644, "grad_norm": 1.203125, "learning_rate": 1.9161604774752672e-05, "loss": 0.4837, "step": 2957 }, { "epoch": 0.40584482403786787, "grad_norm": 1.2890625, "learning_rate": 1.9161026115193197e-05, "loss": 0.5172, "step": 2958 }, { "epoch": 0.40598202648007137, "grad_norm": 1.125, "learning_rate": 1.9160447264751087e-05, "loss": 0.4348, "step": 2959 }, { "epoch": 0.4061192289222748, "grad_norm": 1.3359375, "learning_rate": 1.9159868223438396e-05, "loss": 0.5681, "step": 2960 }, { "epoch": 0.4062564313644783, "grad_norm": 1.296875, "learning_rate": 1.9159288991267197e-05, "loss": 0.5691, "step": 2961 }, { "epoch": 0.40639363380668175, "grad_norm": 1.2734375, "learning_rate": 1.9158709568249547e-05, "loss": 0.5408, "step": 2962 }, { "epoch": 0.40653083624888525, "grad_norm": 1.25, "learning_rate": 1.915812995439753e-05, "loss": 0.575, "step": 2963 }, { "epoch": 0.4066680386910887, "grad_norm": 1.1640625, "learning_rate": 1.9157550149723215e-05, "loss": 0.4632, "step": 2964 }, { "epoch": 0.4068052411332922, "grad_norm": 1.15625, "learning_rate": 1.9156970154238694e-05, "loss": 0.5065, "step": 2965 }, { "epoch": 0.40694244357549564, "grad_norm": 1.328125, "learning_rate": 1.915638996795604e-05, "loss": 0.5631, "step": 2966 }, { "epoch": 0.40707964601769914, "grad_norm": 1.1796875, "learning_rate": 1.915580959088735e-05, "loss": 0.4651, "step": 2967 }, { "epoch": 0.4072168484599026, "grad_norm": 1.28125, "learning_rate": 1.9155229023044712e-05, "loss": 0.635, "step": 2968 }, { "epoch": 0.4073540509021061, "grad_norm": 1.2109375, "learning_rate": 1.9154648264440226e-05, "loss": 0.5681, "step": 2969 }, { "epoch": 0.4074912533443095, "grad_norm": 1.21875, "learning_rate": 1.9154067315085994e-05, "loss": 0.5623, "step": 2970 }, { "epoch": 0.407628455786513, "grad_norm": 1.1953125, "learning_rate": 1.9153486174994117e-05, "loss": 0.5124, "step": 2971 }, { "epoch": 0.40776565822871647, "grad_norm": 1.1171875, "learning_rate": 1.9152904844176707e-05, "loss": 0.4552, "step": 2972 }, { "epoch": 0.40790286067091996, "grad_norm": 1.1484375, "learning_rate": 1.915232332264587e-05, "loss": 0.511, "step": 2973 }, { "epoch": 0.4080400631131234, "grad_norm": 1.296875, "learning_rate": 1.9151741610413738e-05, "loss": 0.532, "step": 2974 }, { "epoch": 0.4081772655553269, "grad_norm": 1.25, "learning_rate": 1.9151159707492416e-05, "loss": 0.5827, "step": 2975 }, { "epoch": 0.40831446799753035, "grad_norm": 1.1875, "learning_rate": 1.9150577613894033e-05, "loss": 0.5636, "step": 2976 }, { "epoch": 0.40845167043973385, "grad_norm": 1.3046875, "learning_rate": 1.9149995329630723e-05, "loss": 0.5809, "step": 2977 }, { "epoch": 0.4085888728819373, "grad_norm": 1.1796875, "learning_rate": 1.9149412854714614e-05, "loss": 0.4541, "step": 2978 }, { "epoch": 0.4087260753241408, "grad_norm": 1.3515625, "learning_rate": 1.9148830189157847e-05, "loss": 0.5762, "step": 2979 }, { "epoch": 0.40886327776634424, "grad_norm": 1.15625, "learning_rate": 1.9148247332972557e-05, "loss": 0.5155, "step": 2980 }, { "epoch": 0.40900048020854773, "grad_norm": 1.1796875, "learning_rate": 1.9147664286170892e-05, "loss": 0.4586, "step": 2981 }, { "epoch": 0.4091376826507512, "grad_norm": 1.3046875, "learning_rate": 1.9147081048765e-05, "loss": 0.6147, "step": 2982 }, { "epoch": 0.4092748850929547, "grad_norm": 1.3203125, "learning_rate": 1.9146497620767034e-05, "loss": 0.5833, "step": 2983 }, { "epoch": 0.4094120875351581, "grad_norm": 1.3125, "learning_rate": 1.9145914002189147e-05, "loss": 0.47, "step": 2984 }, { "epoch": 0.4095492899773616, "grad_norm": 1.1953125, "learning_rate": 1.9145330193043508e-05, "loss": 0.4824, "step": 2985 }, { "epoch": 0.40968649241956506, "grad_norm": 1.265625, "learning_rate": 1.914474619334227e-05, "loss": 0.5473, "step": 2986 }, { "epoch": 0.40982369486176856, "grad_norm": 1.28125, "learning_rate": 1.914416200309761e-05, "loss": 0.5916, "step": 2987 }, { "epoch": 0.409960897303972, "grad_norm": 1.25, "learning_rate": 1.9143577622321698e-05, "loss": 0.5655, "step": 2988 }, { "epoch": 0.4100980997461755, "grad_norm": 1.2265625, "learning_rate": 1.9142993051026712e-05, "loss": 0.5078, "step": 2989 }, { "epoch": 0.41023530218837895, "grad_norm": 1.21875, "learning_rate": 1.9142408289224824e-05, "loss": 0.5126, "step": 2990 }, { "epoch": 0.41037250463058245, "grad_norm": 1.2734375, "learning_rate": 1.9141823336928232e-05, "loss": 0.6025, "step": 2991 }, { "epoch": 0.4105097070727859, "grad_norm": 1.2421875, "learning_rate": 1.9141238194149113e-05, "loss": 0.542, "step": 2992 }, { "epoch": 0.4106469095149894, "grad_norm": 1.2109375, "learning_rate": 1.9140652860899666e-05, "loss": 0.5487, "step": 2993 }, { "epoch": 0.41078411195719283, "grad_norm": 1.28125, "learning_rate": 1.914006733719208e-05, "loss": 0.5155, "step": 2994 }, { "epoch": 0.41092131439939633, "grad_norm": 1.1328125, "learning_rate": 1.9139481623038564e-05, "loss": 0.4218, "step": 2995 }, { "epoch": 0.4110585168415998, "grad_norm": 1.21875, "learning_rate": 1.9138895718451317e-05, "loss": 0.5114, "step": 2996 }, { "epoch": 0.4111957192838033, "grad_norm": 1.2578125, "learning_rate": 1.9138309623442545e-05, "loss": 0.5718, "step": 2997 }, { "epoch": 0.4113329217260067, "grad_norm": 1.2265625, "learning_rate": 1.9137723338024467e-05, "loss": 0.5451, "step": 2998 }, { "epoch": 0.4114701241682102, "grad_norm": 1.2734375, "learning_rate": 1.9137136862209293e-05, "loss": 0.5277, "step": 2999 }, { "epoch": 0.41160732661041366, "grad_norm": 1.2109375, "learning_rate": 1.9136550196009247e-05, "loss": 0.5377, "step": 3000 }, { "epoch": 0.41174452905261716, "grad_norm": 1.234375, "learning_rate": 1.913596333943655e-05, "loss": 0.5225, "step": 3001 }, { "epoch": 0.4118817314948206, "grad_norm": 1.2109375, "learning_rate": 1.9135376292503432e-05, "loss": 0.5773, "step": 3002 }, { "epoch": 0.4120189339370241, "grad_norm": 1.203125, "learning_rate": 1.9134789055222125e-05, "loss": 0.502, "step": 3003 }, { "epoch": 0.41215613637922754, "grad_norm": 1.0859375, "learning_rate": 1.913420162760486e-05, "loss": 0.4469, "step": 3004 }, { "epoch": 0.41229333882143104, "grad_norm": 1.1953125, "learning_rate": 1.9133614009663888e-05, "loss": 0.5669, "step": 3005 }, { "epoch": 0.4124305412636345, "grad_norm": 1.2421875, "learning_rate": 1.913302620141144e-05, "loss": 0.5774, "step": 3006 }, { "epoch": 0.412567743705838, "grad_norm": 1.1484375, "learning_rate": 1.913243820285977e-05, "loss": 0.42, "step": 3007 }, { "epoch": 0.41270494614804143, "grad_norm": 1.1796875, "learning_rate": 1.913185001402113e-05, "loss": 0.5207, "step": 3008 }, { "epoch": 0.41284214859024493, "grad_norm": 1.359375, "learning_rate": 1.913126163490778e-05, "loss": 0.5995, "step": 3009 }, { "epoch": 0.41297935103244837, "grad_norm": 1.2734375, "learning_rate": 1.9130673065531967e-05, "loss": 0.5618, "step": 3010 }, { "epoch": 0.41311655347465187, "grad_norm": 1.2578125, "learning_rate": 1.9130084305905967e-05, "loss": 0.5095, "step": 3011 }, { "epoch": 0.4132537559168553, "grad_norm": 1.265625, "learning_rate": 1.9129495356042043e-05, "loss": 0.5523, "step": 3012 }, { "epoch": 0.4133909583590588, "grad_norm": 1.1484375, "learning_rate": 1.9128906215952465e-05, "loss": 0.4321, "step": 3013 }, { "epoch": 0.41352816080126226, "grad_norm": 1.25, "learning_rate": 1.9128316885649513e-05, "loss": 0.5013, "step": 3014 }, { "epoch": 0.41366536324346576, "grad_norm": 1.3046875, "learning_rate": 1.912772736514546e-05, "loss": 0.5964, "step": 3015 }, { "epoch": 0.4138025656856692, "grad_norm": 1.1796875, "learning_rate": 1.9127137654452593e-05, "loss": 0.4827, "step": 3016 }, { "epoch": 0.4139397681278727, "grad_norm": 1.2890625, "learning_rate": 1.9126547753583203e-05, "loss": 0.5146, "step": 3017 }, { "epoch": 0.41407697057007614, "grad_norm": 1.3515625, "learning_rate": 1.9125957662549573e-05, "loss": 0.5652, "step": 3018 }, { "epoch": 0.41421417301227964, "grad_norm": 1.171875, "learning_rate": 1.912536738136401e-05, "loss": 0.46, "step": 3019 }, { "epoch": 0.4143513754544831, "grad_norm": 1.234375, "learning_rate": 1.91247769100388e-05, "loss": 0.5396, "step": 3020 }, { "epoch": 0.4144885778966866, "grad_norm": 1.1953125, "learning_rate": 1.9124186248586258e-05, "loss": 0.5024, "step": 3021 }, { "epoch": 0.41462578033889, "grad_norm": 1.375, "learning_rate": 1.9123595397018685e-05, "loss": 0.622, "step": 3022 }, { "epoch": 0.4147629827810935, "grad_norm": 1.1328125, "learning_rate": 1.912300435534839e-05, "loss": 0.4831, "step": 3023 }, { "epoch": 0.41490018522329697, "grad_norm": 1.2421875, "learning_rate": 1.9122413123587695e-05, "loss": 0.5706, "step": 3024 }, { "epoch": 0.41503738766550047, "grad_norm": 1.3671875, "learning_rate": 1.9121821701748915e-05, "loss": 0.6766, "step": 3025 }, { "epoch": 0.4151745901077039, "grad_norm": 1.2734375, "learning_rate": 1.9121230089844376e-05, "loss": 0.5942, "step": 3026 }, { "epoch": 0.4153117925499074, "grad_norm": 1.3671875, "learning_rate": 1.91206382878864e-05, "loss": 0.5884, "step": 3027 }, { "epoch": 0.41544899499211085, "grad_norm": 1.1640625, "learning_rate": 1.912004629588732e-05, "loss": 0.5149, "step": 3028 }, { "epoch": 0.41558619743431435, "grad_norm": 1.28125, "learning_rate": 1.9119454113859476e-05, "loss": 0.5714, "step": 3029 }, { "epoch": 0.4157233998765178, "grad_norm": 1.1953125, "learning_rate": 1.9118861741815196e-05, "loss": 0.5304, "step": 3030 }, { "epoch": 0.4158606023187213, "grad_norm": 1.15625, "learning_rate": 1.9118269179766837e-05, "loss": 0.5077, "step": 3031 }, { "epoch": 0.41599780476092474, "grad_norm": 1.265625, "learning_rate": 1.9117676427726735e-05, "loss": 0.4969, "step": 3032 }, { "epoch": 0.41613500720312824, "grad_norm": 1.171875, "learning_rate": 1.9117083485707242e-05, "loss": 0.5192, "step": 3033 }, { "epoch": 0.4162722096453317, "grad_norm": 1.21875, "learning_rate": 1.9116490353720717e-05, "loss": 0.5755, "step": 3034 }, { "epoch": 0.4164094120875352, "grad_norm": 1.1171875, "learning_rate": 1.911589703177952e-05, "loss": 0.4714, "step": 3035 }, { "epoch": 0.4165466145297386, "grad_norm": 1.1171875, "learning_rate": 1.9115303519896007e-05, "loss": 0.477, "step": 3036 }, { "epoch": 0.4166838169719421, "grad_norm": 1.2109375, "learning_rate": 1.911470981808255e-05, "loss": 0.542, "step": 3037 }, { "epoch": 0.41682101941414557, "grad_norm": 1.1796875, "learning_rate": 1.9114115926351516e-05, "loss": 0.4867, "step": 3038 }, { "epoch": 0.41695822185634906, "grad_norm": 1.2421875, "learning_rate": 1.9113521844715282e-05, "loss": 0.6079, "step": 3039 }, { "epoch": 0.4170954242985525, "grad_norm": 1.125, "learning_rate": 1.9112927573186225e-05, "loss": 0.4795, "step": 3040 }, { "epoch": 0.417232626740756, "grad_norm": 1.1953125, "learning_rate": 1.9112333111776735e-05, "loss": 0.5414, "step": 3041 }, { "epoch": 0.41736982918295945, "grad_norm": 1.140625, "learning_rate": 1.911173846049918e-05, "loss": 0.4985, "step": 3042 }, { "epoch": 0.41750703162516295, "grad_norm": 1.265625, "learning_rate": 1.911114361936597e-05, "loss": 0.5736, "step": 3043 }, { "epoch": 0.4176442340673664, "grad_norm": 1.2421875, "learning_rate": 1.911054858838949e-05, "loss": 0.5439, "step": 3044 }, { "epoch": 0.4177814365095699, "grad_norm": 1.1875, "learning_rate": 1.9109953367582138e-05, "loss": 0.5108, "step": 3045 }, { "epoch": 0.41791863895177334, "grad_norm": 1.2421875, "learning_rate": 1.910935795695632e-05, "loss": 0.5346, "step": 3046 }, { "epoch": 0.41805584139397683, "grad_norm": 1.1640625, "learning_rate": 1.910876235652444e-05, "loss": 0.5378, "step": 3047 }, { "epoch": 0.4181930438361803, "grad_norm": 1.1796875, "learning_rate": 1.9108166566298908e-05, "loss": 0.5708, "step": 3048 }, { "epoch": 0.4183302462783838, "grad_norm": 1.109375, "learning_rate": 1.9107570586292135e-05, "loss": 0.4718, "step": 3049 }, { "epoch": 0.4184674487205872, "grad_norm": 1.1328125, "learning_rate": 1.9106974416516547e-05, "loss": 0.5008, "step": 3050 }, { "epoch": 0.4186046511627907, "grad_norm": 1.25, "learning_rate": 1.910637805698456e-05, "loss": 0.5381, "step": 3051 }, { "epoch": 0.41874185360499416, "grad_norm": 1.203125, "learning_rate": 1.91057815077086e-05, "loss": 0.5328, "step": 3052 }, { "epoch": 0.41887905604719766, "grad_norm": 1.2421875, "learning_rate": 1.91051847687011e-05, "loss": 0.5593, "step": 3053 }, { "epoch": 0.4190162584894011, "grad_norm": 1.1484375, "learning_rate": 1.910458783997449e-05, "loss": 0.548, "step": 3054 }, { "epoch": 0.4191534609316046, "grad_norm": 1.1328125, "learning_rate": 1.9103990721541216e-05, "loss": 0.4527, "step": 3055 }, { "epoch": 0.41929066337380805, "grad_norm": 1.21875, "learning_rate": 1.910339341341371e-05, "loss": 0.5675, "step": 3056 }, { "epoch": 0.41942786581601155, "grad_norm": 1.1484375, "learning_rate": 1.910279591560442e-05, "loss": 0.4721, "step": 3057 }, { "epoch": 0.419565068258215, "grad_norm": 1.3515625, "learning_rate": 1.91021982281258e-05, "loss": 0.584, "step": 3058 }, { "epoch": 0.4197022707004185, "grad_norm": 1.2265625, "learning_rate": 1.9101600350990298e-05, "loss": 0.5034, "step": 3059 }, { "epoch": 0.41983947314262193, "grad_norm": 1.1875, "learning_rate": 1.9101002284210377e-05, "loss": 0.5361, "step": 3060 }, { "epoch": 0.41997667558482543, "grad_norm": 1.25, "learning_rate": 1.9100404027798495e-05, "loss": 0.5305, "step": 3061 }, { "epoch": 0.4201138780270289, "grad_norm": 1.2265625, "learning_rate": 1.909980558176712e-05, "loss": 0.4948, "step": 3062 }, { "epoch": 0.4202510804692324, "grad_norm": 1.171875, "learning_rate": 1.9099206946128722e-05, "loss": 0.4938, "step": 3063 }, { "epoch": 0.4203882829114358, "grad_norm": 1.265625, "learning_rate": 1.9098608120895768e-05, "loss": 0.5651, "step": 3064 }, { "epoch": 0.4205254853536393, "grad_norm": 1.2421875, "learning_rate": 1.9098009106080743e-05, "loss": 0.514, "step": 3065 }, { "epoch": 0.42066268779584276, "grad_norm": 1.28125, "learning_rate": 1.9097409901696126e-05, "loss": 0.5987, "step": 3066 }, { "epoch": 0.42079989023804626, "grad_norm": 1.25, "learning_rate": 1.90968105077544e-05, "loss": 0.5262, "step": 3067 }, { "epoch": 0.4209370926802497, "grad_norm": 1.3046875, "learning_rate": 1.9096210924268053e-05, "loss": 0.6277, "step": 3068 }, { "epoch": 0.4210742951224532, "grad_norm": 1.1875, "learning_rate": 1.9095611151249582e-05, "loss": 0.487, "step": 3069 }, { "epoch": 0.42121149756465665, "grad_norm": 1.2890625, "learning_rate": 1.9095011188711483e-05, "loss": 0.4942, "step": 3070 }, { "epoch": 0.42134870000686014, "grad_norm": 1.1015625, "learning_rate": 1.909441103666626e-05, "loss": 0.4336, "step": 3071 }, { "epoch": 0.4214859024490636, "grad_norm": 1.1953125, "learning_rate": 1.909381069512641e-05, "loss": 0.5461, "step": 3072 }, { "epoch": 0.4216231048912671, "grad_norm": 1.2890625, "learning_rate": 1.909321016410445e-05, "loss": 0.525, "step": 3073 }, { "epoch": 0.42176030733347053, "grad_norm": 1.1796875, "learning_rate": 1.909260944361289e-05, "loss": 0.504, "step": 3074 }, { "epoch": 0.42189750977567403, "grad_norm": 1.2734375, "learning_rate": 1.9092008533664246e-05, "loss": 0.4819, "step": 3075 }, { "epoch": 0.4220347122178775, "grad_norm": 1.296875, "learning_rate": 1.9091407434271038e-05, "loss": 0.6332, "step": 3076 }, { "epoch": 0.42217191466008097, "grad_norm": 1.0859375, "learning_rate": 1.9090806145445793e-05, "loss": 0.4657, "step": 3077 }, { "epoch": 0.4223091171022844, "grad_norm": 1.328125, "learning_rate": 1.909020466720104e-05, "loss": 0.5888, "step": 3078 }, { "epoch": 0.4224463195444879, "grad_norm": 1.2578125, "learning_rate": 1.9089602999549306e-05, "loss": 0.5271, "step": 3079 }, { "epoch": 0.42258352198669136, "grad_norm": 1.21875, "learning_rate": 1.9089001142503138e-05, "loss": 0.5046, "step": 3080 }, { "epoch": 0.42272072442889486, "grad_norm": 1.3125, "learning_rate": 1.9088399096075067e-05, "loss": 0.549, "step": 3081 }, { "epoch": 0.4228579268710983, "grad_norm": 1.3046875, "learning_rate": 1.9087796860277642e-05, "loss": 0.5494, "step": 3082 }, { "epoch": 0.4229951293133018, "grad_norm": 1.296875, "learning_rate": 1.908719443512341e-05, "loss": 0.5263, "step": 3083 }, { "epoch": 0.42313233175550524, "grad_norm": 1.3359375, "learning_rate": 1.9086591820624923e-05, "loss": 0.5682, "step": 3084 }, { "epoch": 0.42326953419770874, "grad_norm": 1.2421875, "learning_rate": 1.908598901679474e-05, "loss": 0.4633, "step": 3085 }, { "epoch": 0.4234067366399122, "grad_norm": 1.09375, "learning_rate": 1.908538602364542e-05, "loss": 0.4494, "step": 3086 }, { "epoch": 0.4235439390821157, "grad_norm": 1.2421875, "learning_rate": 1.9084782841189522e-05, "loss": 0.591, "step": 3087 }, { "epoch": 0.4236811415243191, "grad_norm": 1.21875, "learning_rate": 1.908417946943962e-05, "loss": 0.5333, "step": 3088 }, { "epoch": 0.4238183439665226, "grad_norm": 1.2890625, "learning_rate": 1.908357590840829e-05, "loss": 0.5611, "step": 3089 }, { "epoch": 0.42395554640872607, "grad_norm": 1.2421875, "learning_rate": 1.90829721581081e-05, "loss": 0.5165, "step": 3090 }, { "epoch": 0.42409274885092957, "grad_norm": 1.3515625, "learning_rate": 1.908236821855163e-05, "loss": 0.6026, "step": 3091 }, { "epoch": 0.424229951293133, "grad_norm": 1.3046875, "learning_rate": 1.9081764089751468e-05, "loss": 0.5336, "step": 3092 }, { "epoch": 0.4243671537353365, "grad_norm": 1.1796875, "learning_rate": 1.90811597717202e-05, "loss": 0.5106, "step": 3093 }, { "epoch": 0.42450435617753995, "grad_norm": 1.2109375, "learning_rate": 1.908055526447042e-05, "loss": 0.542, "step": 3094 }, { "epoch": 0.42464155861974345, "grad_norm": 1.1796875, "learning_rate": 1.907995056801472e-05, "loss": 0.5162, "step": 3095 }, { "epoch": 0.4247787610619469, "grad_norm": 1.28125, "learning_rate": 1.9079345682365706e-05, "loss": 0.5445, "step": 3096 }, { "epoch": 0.4249159635041504, "grad_norm": 1.2578125, "learning_rate": 1.907874060753597e-05, "loss": 0.556, "step": 3097 }, { "epoch": 0.42505316594635384, "grad_norm": 1.1640625, "learning_rate": 1.9078135343538134e-05, "loss": 0.5367, "step": 3098 }, { "epoch": 0.42519036838855734, "grad_norm": 1.2109375, "learning_rate": 1.90775298903848e-05, "loss": 0.5669, "step": 3099 }, { "epoch": 0.4253275708307608, "grad_norm": 1.234375, "learning_rate": 1.9076924248088584e-05, "loss": 0.5412, "step": 3100 }, { "epoch": 0.4254647732729643, "grad_norm": 1.28125, "learning_rate": 1.907631841666211e-05, "loss": 0.5622, "step": 3101 }, { "epoch": 0.4256019757151677, "grad_norm": 1.21875, "learning_rate": 1.9075712396117997e-05, "loss": 0.5271, "step": 3102 }, { "epoch": 0.4257391781573712, "grad_norm": 1.2421875, "learning_rate": 1.9075106186468875e-05, "loss": 0.5571, "step": 3103 }, { "epoch": 0.42587638059957467, "grad_norm": 1.1328125, "learning_rate": 1.9074499787727372e-05, "loss": 0.4736, "step": 3104 }, { "epoch": 0.42601358304177817, "grad_norm": 1.25, "learning_rate": 1.9073893199906126e-05, "loss": 0.5736, "step": 3105 }, { "epoch": 0.4261507854839816, "grad_norm": 1.2265625, "learning_rate": 1.907328642301778e-05, "loss": 0.491, "step": 3106 }, { "epoch": 0.4262879879261851, "grad_norm": 1.234375, "learning_rate": 1.9072679457074964e-05, "loss": 0.505, "step": 3107 }, { "epoch": 0.42642519036838855, "grad_norm": 1.140625, "learning_rate": 1.907207230209034e-05, "loss": 0.4585, "step": 3108 }, { "epoch": 0.42656239281059205, "grad_norm": 1.296875, "learning_rate": 1.907146495807655e-05, "loss": 0.6143, "step": 3109 }, { "epoch": 0.4266995952527955, "grad_norm": 1.28125, "learning_rate": 1.9070857425046252e-05, "loss": 0.5489, "step": 3110 }, { "epoch": 0.426836797694999, "grad_norm": 1.234375, "learning_rate": 1.90702497030121e-05, "loss": 0.542, "step": 3111 }, { "epoch": 0.42697400013720244, "grad_norm": 1.234375, "learning_rate": 1.906964179198677e-05, "loss": 0.5632, "step": 3112 }, { "epoch": 0.42711120257940594, "grad_norm": 1.21875, "learning_rate": 1.906903369198291e-05, "loss": 0.5275, "step": 3113 }, { "epoch": 0.4272484050216094, "grad_norm": 1.3125, "learning_rate": 1.9068425403013205e-05, "loss": 0.5996, "step": 3114 }, { "epoch": 0.4273856074638129, "grad_norm": 1.1484375, "learning_rate": 1.906781692509032e-05, "loss": 0.4739, "step": 3115 }, { "epoch": 0.4275228099060163, "grad_norm": 1.1796875, "learning_rate": 1.9067208258226943e-05, "loss": 0.535, "step": 3116 }, { "epoch": 0.4276600123482198, "grad_norm": 1.1875, "learning_rate": 1.9066599402435747e-05, "loss": 0.5064, "step": 3117 }, { "epoch": 0.42779721479042326, "grad_norm": 1.296875, "learning_rate": 1.9065990357729426e-05, "loss": 0.5584, "step": 3118 }, { "epoch": 0.42793441723262676, "grad_norm": 1.28125, "learning_rate": 1.9065381124120664e-05, "loss": 0.6012, "step": 3119 }, { "epoch": 0.4280716196748302, "grad_norm": 1.1953125, "learning_rate": 1.9064771701622162e-05, "loss": 0.5383, "step": 3120 }, { "epoch": 0.4282088221170337, "grad_norm": 1.1875, "learning_rate": 1.9064162090246613e-05, "loss": 0.5732, "step": 3121 }, { "epoch": 0.42834602455923715, "grad_norm": 1.1875, "learning_rate": 1.906355229000672e-05, "loss": 0.5078, "step": 3122 }, { "epoch": 0.42848322700144065, "grad_norm": 1.21875, "learning_rate": 1.906294230091519e-05, "loss": 0.5463, "step": 3123 }, { "epoch": 0.4286204294436441, "grad_norm": 1.3984375, "learning_rate": 1.906233212298473e-05, "loss": 0.6091, "step": 3124 }, { "epoch": 0.4287576318858476, "grad_norm": 1.2109375, "learning_rate": 1.906172175622806e-05, "loss": 0.5526, "step": 3125 }, { "epoch": 0.42889483432805103, "grad_norm": 1.3359375, "learning_rate": 1.906111120065789e-05, "loss": 0.6183, "step": 3126 }, { "epoch": 0.42903203677025453, "grad_norm": 1.2578125, "learning_rate": 1.9060500456286953e-05, "loss": 0.5701, "step": 3127 }, { "epoch": 0.429169239212458, "grad_norm": 1.1484375, "learning_rate": 1.9059889523127962e-05, "loss": 0.4778, "step": 3128 }, { "epoch": 0.4293064416546615, "grad_norm": 1.1796875, "learning_rate": 1.9059278401193654e-05, "loss": 0.4957, "step": 3129 }, { "epoch": 0.4294436440968649, "grad_norm": 1.1484375, "learning_rate": 1.905866709049676e-05, "loss": 0.498, "step": 3130 }, { "epoch": 0.4295808465390684, "grad_norm": 1.203125, "learning_rate": 1.905805559105002e-05, "loss": 0.5186, "step": 3131 }, { "epoch": 0.42971804898127186, "grad_norm": 1.3125, "learning_rate": 1.905744390286617e-05, "loss": 0.5528, "step": 3132 }, { "epoch": 0.42985525142347536, "grad_norm": 1.2109375, "learning_rate": 1.9056832025957963e-05, "loss": 0.5688, "step": 3133 }, { "epoch": 0.4299924538656788, "grad_norm": 1.2890625, "learning_rate": 1.9056219960338143e-05, "loss": 0.5879, "step": 3134 }, { "epoch": 0.4301296563078823, "grad_norm": 1.3203125, "learning_rate": 1.9055607706019465e-05, "loss": 0.6203, "step": 3135 }, { "epoch": 0.43026685875008575, "grad_norm": 1.2734375, "learning_rate": 1.9054995263014687e-05, "loss": 0.5698, "step": 3136 }, { "epoch": 0.43040406119228924, "grad_norm": 1.1171875, "learning_rate": 1.905438263133657e-05, "loss": 0.4683, "step": 3137 }, { "epoch": 0.4305412636344927, "grad_norm": 1.546875, "learning_rate": 1.9053769810997874e-05, "loss": 0.5712, "step": 3138 }, { "epoch": 0.4306784660766962, "grad_norm": 1.25, "learning_rate": 1.9053156802011373e-05, "loss": 0.5631, "step": 3139 }, { "epoch": 0.43081566851889963, "grad_norm": 1.359375, "learning_rate": 1.905254360438984e-05, "loss": 0.5745, "step": 3140 }, { "epoch": 0.43095287096110313, "grad_norm": 1.203125, "learning_rate": 1.905193021814605e-05, "loss": 0.5265, "step": 3141 }, { "epoch": 0.4310900734033066, "grad_norm": 1.265625, "learning_rate": 1.9051316643292787e-05, "loss": 0.6081, "step": 3142 }, { "epoch": 0.43122727584551007, "grad_norm": 1.2109375, "learning_rate": 1.9050702879842832e-05, "loss": 0.5647, "step": 3143 }, { "epoch": 0.4313644782877135, "grad_norm": 1.3203125, "learning_rate": 1.9050088927808973e-05, "loss": 0.5545, "step": 3144 }, { "epoch": 0.431501680729917, "grad_norm": 1.078125, "learning_rate": 1.904947478720401e-05, "loss": 0.4162, "step": 3145 }, { "epoch": 0.43163888317212046, "grad_norm": 1.28125, "learning_rate": 1.9048860458040724e-05, "loss": 0.5702, "step": 3146 }, { "epoch": 0.43177608561432396, "grad_norm": 1.40625, "learning_rate": 1.904824594033193e-05, "loss": 0.576, "step": 3147 }, { "epoch": 0.4319132880565274, "grad_norm": 1.265625, "learning_rate": 1.9047631234090432e-05, "loss": 0.5624, "step": 3148 }, { "epoch": 0.4320504904987309, "grad_norm": 1.265625, "learning_rate": 1.9047016339329028e-05, "loss": 0.5916, "step": 3149 }, { "epoch": 0.43218769294093434, "grad_norm": 1.1953125, "learning_rate": 1.9046401256060537e-05, "loss": 0.543, "step": 3150 }, { "epoch": 0.43232489538313784, "grad_norm": 1.3046875, "learning_rate": 1.9045785984297773e-05, "loss": 0.6327, "step": 3151 }, { "epoch": 0.4324620978253413, "grad_norm": 1.1796875, "learning_rate": 1.9045170524053562e-05, "loss": 0.518, "step": 3152 }, { "epoch": 0.4325993002675448, "grad_norm": 1.2265625, "learning_rate": 1.904455487534072e-05, "loss": 0.5294, "step": 3153 }, { "epoch": 0.4327365027097482, "grad_norm": 1.1484375, "learning_rate": 1.9043939038172074e-05, "loss": 0.4534, "step": 3154 }, { "epoch": 0.4328737051519517, "grad_norm": 1.1796875, "learning_rate": 1.9043323012560463e-05, "loss": 0.5793, "step": 3155 }, { "epoch": 0.43301090759415517, "grad_norm": 1.28125, "learning_rate": 1.904270679851872e-05, "loss": 0.5053, "step": 3156 }, { "epoch": 0.43314811003635867, "grad_norm": 1.296875, "learning_rate": 1.904209039605968e-05, "loss": 0.5226, "step": 3157 }, { "epoch": 0.4332853124785621, "grad_norm": 1.3125, "learning_rate": 1.90414738051962e-05, "loss": 0.5786, "step": 3158 }, { "epoch": 0.4334225149207656, "grad_norm": 1.2265625, "learning_rate": 1.904085702594111e-05, "loss": 0.5404, "step": 3159 }, { "epoch": 0.43355971736296905, "grad_norm": 1.1484375, "learning_rate": 1.9040240058307272e-05, "loss": 0.4943, "step": 3160 }, { "epoch": 0.43369691980517255, "grad_norm": 1.1328125, "learning_rate": 1.9039622902307542e-05, "loss": 0.4233, "step": 3161 }, { "epoch": 0.433834122247376, "grad_norm": 1.1875, "learning_rate": 1.9039005557954774e-05, "loss": 0.5198, "step": 3162 }, { "epoch": 0.4339713246895795, "grad_norm": 1.25, "learning_rate": 1.9038388025261834e-05, "loss": 0.5556, "step": 3163 }, { "epoch": 0.43410852713178294, "grad_norm": 1.0859375, "learning_rate": 1.9037770304241587e-05, "loss": 0.475, "step": 3164 }, { "epoch": 0.43424572957398644, "grad_norm": 1.3046875, "learning_rate": 1.9037152394906906e-05, "loss": 0.5893, "step": 3165 }, { "epoch": 0.4343829320161899, "grad_norm": 1.25, "learning_rate": 1.9036534297270668e-05, "loss": 0.5636, "step": 3166 }, { "epoch": 0.4345201344583934, "grad_norm": 1.25, "learning_rate": 1.9035916011345748e-05, "loss": 0.5128, "step": 3167 }, { "epoch": 0.4346573369005968, "grad_norm": 1.109375, "learning_rate": 1.903529753714503e-05, "loss": 0.4921, "step": 3168 }, { "epoch": 0.4347945393428003, "grad_norm": 1.1015625, "learning_rate": 1.9034678874681406e-05, "loss": 0.4822, "step": 3169 }, { "epoch": 0.43493174178500377, "grad_norm": 1.296875, "learning_rate": 1.9034060023967758e-05, "loss": 0.6072, "step": 3170 }, { "epoch": 0.43506894422720727, "grad_norm": 1.2265625, "learning_rate": 1.9033440985016985e-05, "loss": 0.5094, "step": 3171 }, { "epoch": 0.4352061466694107, "grad_norm": 1.234375, "learning_rate": 1.9032821757841983e-05, "loss": 0.5146, "step": 3172 }, { "epoch": 0.4353433491116142, "grad_norm": 1.1875, "learning_rate": 1.903220234245566e-05, "loss": 0.5462, "step": 3173 }, { "epoch": 0.43548055155381765, "grad_norm": 1.265625, "learning_rate": 1.903158273887092e-05, "loss": 0.5041, "step": 3174 }, { "epoch": 0.43561775399602115, "grad_norm": 1.171875, "learning_rate": 1.903096294710067e-05, "loss": 0.4562, "step": 3175 }, { "epoch": 0.4357549564382246, "grad_norm": 1.1484375, "learning_rate": 1.9030342967157826e-05, "loss": 0.4919, "step": 3176 }, { "epoch": 0.4358921588804281, "grad_norm": 1.171875, "learning_rate": 1.902972279905531e-05, "loss": 0.5532, "step": 3177 }, { "epoch": 0.43602936132263154, "grad_norm": 1.21875, "learning_rate": 1.902910244280604e-05, "loss": 0.5312, "step": 3178 }, { "epoch": 0.43616656376483504, "grad_norm": 1.1015625, "learning_rate": 1.9028481898422938e-05, "loss": 0.4906, "step": 3179 }, { "epoch": 0.4363037662070385, "grad_norm": 1.1484375, "learning_rate": 1.9027861165918943e-05, "loss": 0.5203, "step": 3180 }, { "epoch": 0.436440968649242, "grad_norm": 1.1640625, "learning_rate": 1.902724024530698e-05, "loss": 0.4626, "step": 3181 }, { "epoch": 0.4365781710914454, "grad_norm": 1.34375, "learning_rate": 1.9026619136599994e-05, "loss": 0.6573, "step": 3182 }, { "epoch": 0.4367153735336489, "grad_norm": 1.3359375, "learning_rate": 1.9025997839810926e-05, "loss": 0.5365, "step": 3183 }, { "epoch": 0.43685257597585236, "grad_norm": 1.21875, "learning_rate": 1.9025376354952718e-05, "loss": 0.5008, "step": 3184 }, { "epoch": 0.43698977841805586, "grad_norm": 1.1953125, "learning_rate": 1.902475468203832e-05, "loss": 0.5852, "step": 3185 }, { "epoch": 0.4371269808602593, "grad_norm": 1.2578125, "learning_rate": 1.902413282108069e-05, "loss": 0.6073, "step": 3186 }, { "epoch": 0.4372641833024628, "grad_norm": 1.21875, "learning_rate": 1.9023510772092774e-05, "loss": 0.5211, "step": 3187 }, { "epoch": 0.43740138574466625, "grad_norm": 1.2109375, "learning_rate": 1.9022888535087546e-05, "loss": 0.5532, "step": 3188 }, { "epoch": 0.43753858818686975, "grad_norm": 1.3671875, "learning_rate": 1.9022266110077965e-05, "loss": 0.6171, "step": 3189 }, { "epoch": 0.4376757906290732, "grad_norm": 1.1640625, "learning_rate": 1.9021643497077e-05, "loss": 0.5345, "step": 3190 }, { "epoch": 0.4378129930712767, "grad_norm": 1.3984375, "learning_rate": 1.9021020696097627e-05, "loss": 0.6006, "step": 3191 }, { "epoch": 0.43795019551348013, "grad_norm": 1.3203125, "learning_rate": 1.9020397707152822e-05, "loss": 0.5162, "step": 3192 }, { "epoch": 0.43808739795568363, "grad_norm": 1.1484375, "learning_rate": 1.9019774530255564e-05, "loss": 0.4695, "step": 3193 }, { "epoch": 0.4382246003978871, "grad_norm": 1.2578125, "learning_rate": 1.9019151165418838e-05, "loss": 0.5478, "step": 3194 }, { "epoch": 0.4383618028400906, "grad_norm": 1.234375, "learning_rate": 1.9018527612655632e-05, "loss": 0.55, "step": 3195 }, { "epoch": 0.438499005282294, "grad_norm": 1.265625, "learning_rate": 1.9017903871978943e-05, "loss": 0.5251, "step": 3196 }, { "epoch": 0.4386362077244975, "grad_norm": 1.2109375, "learning_rate": 1.9017279943401764e-05, "loss": 0.4951, "step": 3197 }, { "epoch": 0.43877341016670096, "grad_norm": 1.1328125, "learning_rate": 1.901665582693709e-05, "loss": 0.4309, "step": 3198 }, { "epoch": 0.43891061260890446, "grad_norm": 1.25, "learning_rate": 1.9016031522597936e-05, "loss": 0.5601, "step": 3199 }, { "epoch": 0.4390478150511079, "grad_norm": 1.203125, "learning_rate": 1.9015407030397305e-05, "loss": 0.5268, "step": 3200 }, { "epoch": 0.4391850174933114, "grad_norm": 1.3203125, "learning_rate": 1.901478235034821e-05, "loss": 0.5213, "step": 3201 }, { "epoch": 0.43932221993551485, "grad_norm": 1.2421875, "learning_rate": 1.901415748246367e-05, "loss": 0.5581, "step": 3202 }, { "epoch": 0.43945942237771835, "grad_norm": 1.21875, "learning_rate": 1.9013532426756694e-05, "loss": 0.524, "step": 3203 }, { "epoch": 0.4395966248199218, "grad_norm": 1.375, "learning_rate": 1.901290718324032e-05, "loss": 0.5221, "step": 3204 }, { "epoch": 0.4397338272621253, "grad_norm": 1.125, "learning_rate": 1.9012281751927565e-05, "loss": 0.5315, "step": 3205 }, { "epoch": 0.43987102970432873, "grad_norm": 1.1875, "learning_rate": 1.9011656132831465e-05, "loss": 0.4971, "step": 3206 }, { "epoch": 0.44000823214653223, "grad_norm": 1.2578125, "learning_rate": 1.9011030325965058e-05, "loss": 0.5709, "step": 3207 }, { "epoch": 0.4401454345887357, "grad_norm": 1.21875, "learning_rate": 1.901040433134138e-05, "loss": 0.5518, "step": 3208 }, { "epoch": 0.4402826370309392, "grad_norm": 1.265625, "learning_rate": 1.9009778148973477e-05, "loss": 0.522, "step": 3209 }, { "epoch": 0.4404198394731426, "grad_norm": 1.1171875, "learning_rate": 1.9009151778874395e-05, "loss": 0.4918, "step": 3210 }, { "epoch": 0.4405570419153461, "grad_norm": 1.2890625, "learning_rate": 1.9008525221057182e-05, "loss": 0.5551, "step": 3211 }, { "epoch": 0.44069424435754956, "grad_norm": 1.2734375, "learning_rate": 1.9007898475534904e-05, "loss": 0.5118, "step": 3212 }, { "epoch": 0.44083144679975306, "grad_norm": 1.234375, "learning_rate": 1.9007271542320604e-05, "loss": 0.5427, "step": 3213 }, { "epoch": 0.4409686492419565, "grad_norm": 1.2734375, "learning_rate": 1.9006644421427357e-05, "loss": 0.5878, "step": 3214 }, { "epoch": 0.44110585168416, "grad_norm": 1.2265625, "learning_rate": 1.9006017112868228e-05, "loss": 0.5078, "step": 3215 }, { "epoch": 0.44124305412636344, "grad_norm": 1.125, "learning_rate": 1.9005389616656285e-05, "loss": 0.422, "step": 3216 }, { "epoch": 0.44138025656856694, "grad_norm": 1.296875, "learning_rate": 1.9004761932804606e-05, "loss": 0.5498, "step": 3217 }, { "epoch": 0.4415174590107704, "grad_norm": 1.21875, "learning_rate": 1.9004134061326266e-05, "loss": 0.548, "step": 3218 }, { "epoch": 0.4416546614529739, "grad_norm": 1.1796875, "learning_rate": 1.9003506002234353e-05, "loss": 0.466, "step": 3219 }, { "epoch": 0.44179186389517733, "grad_norm": 1.34375, "learning_rate": 1.9002877755541945e-05, "loss": 0.5218, "step": 3220 }, { "epoch": 0.4419290663373808, "grad_norm": 1.2578125, "learning_rate": 1.9002249321262138e-05, "loss": 0.5092, "step": 3221 }, { "epoch": 0.44206626877958427, "grad_norm": 1.15625, "learning_rate": 1.900162069940803e-05, "loss": 0.4491, "step": 3222 }, { "epoch": 0.44220347122178777, "grad_norm": 1.203125, "learning_rate": 1.9000991889992712e-05, "loss": 0.4759, "step": 3223 }, { "epoch": 0.4423406736639912, "grad_norm": 1.3203125, "learning_rate": 1.9000362893029287e-05, "loss": 0.5919, "step": 3224 }, { "epoch": 0.4424778761061947, "grad_norm": 1.5546875, "learning_rate": 1.8999733708530864e-05, "loss": 0.5883, "step": 3225 }, { "epoch": 0.44261507854839816, "grad_norm": 1.5390625, "learning_rate": 1.8999104336510554e-05, "loss": 0.5851, "step": 3226 }, { "epoch": 0.44275228099060165, "grad_norm": 1.046875, "learning_rate": 1.8998474776981466e-05, "loss": 0.3942, "step": 3227 }, { "epoch": 0.4428894834328051, "grad_norm": 1.2890625, "learning_rate": 1.8997845029956722e-05, "loss": 0.5543, "step": 3228 }, { "epoch": 0.4430266858750086, "grad_norm": 1.2734375, "learning_rate": 1.8997215095449442e-05, "loss": 0.5903, "step": 3229 }, { "epoch": 0.44316388831721204, "grad_norm": 1.1796875, "learning_rate": 1.899658497347275e-05, "loss": 0.5408, "step": 3230 }, { "epoch": 0.44330109075941554, "grad_norm": 1.359375, "learning_rate": 1.899595466403978e-05, "loss": 0.5176, "step": 3231 }, { "epoch": 0.443438293201619, "grad_norm": 1.3125, "learning_rate": 1.899532416716366e-05, "loss": 0.531, "step": 3232 }, { "epoch": 0.4435754956438225, "grad_norm": 1.234375, "learning_rate": 1.8994693482857532e-05, "loss": 0.507, "step": 3233 }, { "epoch": 0.4437126980860259, "grad_norm": 1.1953125, "learning_rate": 1.899406261113453e-05, "loss": 0.5173, "step": 3234 }, { "epoch": 0.4438499005282294, "grad_norm": 1.34375, "learning_rate": 1.899343155200781e-05, "loss": 0.5731, "step": 3235 }, { "epoch": 0.44398710297043287, "grad_norm": 1.28125, "learning_rate": 1.8992800305490512e-05, "loss": 0.592, "step": 3236 }, { "epoch": 0.44412430541263637, "grad_norm": 1.2890625, "learning_rate": 1.899216887159579e-05, "loss": 0.5373, "step": 3237 }, { "epoch": 0.4442615078548398, "grad_norm": 1.234375, "learning_rate": 1.899153725033681e-05, "loss": 0.5071, "step": 3238 }, { "epoch": 0.4443987102970433, "grad_norm": 1.40625, "learning_rate": 1.8990905441726718e-05, "loss": 0.6791, "step": 3239 }, { "epoch": 0.44453591273924675, "grad_norm": 1.375, "learning_rate": 1.8990273445778688e-05, "loss": 0.6133, "step": 3240 }, { "epoch": 0.44467311518145025, "grad_norm": 1.3046875, "learning_rate": 1.898964126250589e-05, "loss": 0.5505, "step": 3241 }, { "epoch": 0.4448103176236537, "grad_norm": 1.375, "learning_rate": 1.8989008891921488e-05, "loss": 0.5231, "step": 3242 }, { "epoch": 0.4449475200658572, "grad_norm": 1.328125, "learning_rate": 1.8988376334038668e-05, "loss": 0.5494, "step": 3243 }, { "epoch": 0.44508472250806064, "grad_norm": 1.28125, "learning_rate": 1.8987743588870602e-05, "loss": 0.5265, "step": 3244 }, { "epoch": 0.44522192495026414, "grad_norm": 1.2109375, "learning_rate": 1.8987110656430477e-05, "loss": 0.5085, "step": 3245 }, { "epoch": 0.4453591273924676, "grad_norm": 1.28125, "learning_rate": 1.8986477536731486e-05, "loss": 0.5332, "step": 3246 }, { "epoch": 0.4454963298346711, "grad_norm": 1.28125, "learning_rate": 1.8985844229786812e-05, "loss": 0.5587, "step": 3247 }, { "epoch": 0.4456335322768745, "grad_norm": 1.1796875, "learning_rate": 1.8985210735609656e-05, "loss": 0.5626, "step": 3248 }, { "epoch": 0.445770734719078, "grad_norm": 1.328125, "learning_rate": 1.8984577054213217e-05, "loss": 0.5935, "step": 3249 }, { "epoch": 0.44590793716128146, "grad_norm": 1.28125, "learning_rate": 1.89839431856107e-05, "loss": 0.559, "step": 3250 }, { "epoch": 0.44604513960348496, "grad_norm": 1.25, "learning_rate": 1.898330912981531e-05, "loss": 0.4588, "step": 3251 }, { "epoch": 0.4461823420456884, "grad_norm": 1.15625, "learning_rate": 1.898267488684026e-05, "loss": 0.4856, "step": 3252 }, { "epoch": 0.4463195444878919, "grad_norm": 1.28125, "learning_rate": 1.8982040456698767e-05, "loss": 0.5234, "step": 3253 }, { "epoch": 0.44645674693009535, "grad_norm": 1.484375, "learning_rate": 1.8981405839404045e-05, "loss": 0.68, "step": 3254 }, { "epoch": 0.44659394937229885, "grad_norm": 1.25, "learning_rate": 1.8980771034969322e-05, "loss": 0.5069, "step": 3255 }, { "epoch": 0.4467311518145023, "grad_norm": 1.234375, "learning_rate": 1.8980136043407823e-05, "loss": 0.4895, "step": 3256 }, { "epoch": 0.4468683542567058, "grad_norm": 1.28125, "learning_rate": 1.897950086473278e-05, "loss": 0.5685, "step": 3257 }, { "epoch": 0.44700555669890923, "grad_norm": 1.2109375, "learning_rate": 1.897886549895743e-05, "loss": 0.5251, "step": 3258 }, { "epoch": 0.44714275914111273, "grad_norm": 1.1796875, "learning_rate": 1.8978229946095002e-05, "loss": 0.5463, "step": 3259 }, { "epoch": 0.4472799615833162, "grad_norm": 1.2265625, "learning_rate": 1.897759420615875e-05, "loss": 0.5248, "step": 3260 }, { "epoch": 0.4474171640255197, "grad_norm": 1.2109375, "learning_rate": 1.8976958279161913e-05, "loss": 0.5974, "step": 3261 }, { "epoch": 0.4475543664677231, "grad_norm": 1.234375, "learning_rate": 1.897632216511775e-05, "loss": 0.5611, "step": 3262 }, { "epoch": 0.4476915689099266, "grad_norm": 1.0390625, "learning_rate": 1.8975685864039506e-05, "loss": 0.3716, "step": 3263 }, { "epoch": 0.44782877135213006, "grad_norm": 1.0859375, "learning_rate": 1.8975049375940443e-05, "loss": 0.4775, "step": 3264 }, { "epoch": 0.44796597379433356, "grad_norm": 1.1640625, "learning_rate": 1.8974412700833822e-05, "loss": 0.5046, "step": 3265 }, { "epoch": 0.448103176236537, "grad_norm": 1.296875, "learning_rate": 1.897377583873291e-05, "loss": 0.5936, "step": 3266 }, { "epoch": 0.4482403786787405, "grad_norm": 1.140625, "learning_rate": 1.897313878965098e-05, "loss": 0.4825, "step": 3267 }, { "epoch": 0.44837758112094395, "grad_norm": 1.3125, "learning_rate": 1.89725015536013e-05, "loss": 0.5823, "step": 3268 }, { "epoch": 0.44851478356314745, "grad_norm": 1.2421875, "learning_rate": 1.8971864130597155e-05, "loss": 0.5257, "step": 3269 }, { "epoch": 0.4486519860053509, "grad_norm": 1.2109375, "learning_rate": 1.8971226520651817e-05, "loss": 0.5465, "step": 3270 }, { "epoch": 0.4487891884475544, "grad_norm": 1.265625, "learning_rate": 1.897058872377858e-05, "loss": 0.5273, "step": 3271 }, { "epoch": 0.44892639088975783, "grad_norm": 1.3515625, "learning_rate": 1.8969950739990727e-05, "loss": 0.647, "step": 3272 }, { "epoch": 0.44906359333196133, "grad_norm": 1.140625, "learning_rate": 1.8969312569301555e-05, "loss": 0.5187, "step": 3273 }, { "epoch": 0.4492007957741648, "grad_norm": 1.2578125, "learning_rate": 1.896867421172436e-05, "loss": 0.5532, "step": 3274 }, { "epoch": 0.4493379982163683, "grad_norm": 1.25, "learning_rate": 1.896803566727244e-05, "loss": 0.5252, "step": 3275 }, { "epoch": 0.4494752006585717, "grad_norm": 1.2109375, "learning_rate": 1.896739693595911e-05, "loss": 0.5371, "step": 3276 }, { "epoch": 0.4496124031007752, "grad_norm": 1.140625, "learning_rate": 1.8966758017797668e-05, "loss": 0.4894, "step": 3277 }, { "epoch": 0.44974960554297866, "grad_norm": 1.2421875, "learning_rate": 1.896611891280143e-05, "loss": 0.5402, "step": 3278 }, { "epoch": 0.44988680798518216, "grad_norm": 1.1796875, "learning_rate": 1.896547962098372e-05, "loss": 0.5541, "step": 3279 }, { "epoch": 0.4500240104273856, "grad_norm": 1.265625, "learning_rate": 1.8964840142357847e-05, "loss": 0.555, "step": 3280 }, { "epoch": 0.4501612128695891, "grad_norm": 1.2890625, "learning_rate": 1.896420047693714e-05, "loss": 0.6535, "step": 3281 }, { "epoch": 0.45029841531179254, "grad_norm": 1.3203125, "learning_rate": 1.896356062473493e-05, "loss": 0.6016, "step": 3282 }, { "epoch": 0.45043561775399604, "grad_norm": 1.2109375, "learning_rate": 1.896292058576455e-05, "loss": 0.56, "step": 3283 }, { "epoch": 0.4505728201961995, "grad_norm": 1.2421875, "learning_rate": 1.8962280360039328e-05, "loss": 0.5068, "step": 3284 }, { "epoch": 0.450710022638403, "grad_norm": 1.265625, "learning_rate": 1.896163994757261e-05, "loss": 0.4916, "step": 3285 }, { "epoch": 0.45084722508060643, "grad_norm": 1.1953125, "learning_rate": 1.896099934837774e-05, "loss": 0.4902, "step": 3286 }, { "epoch": 0.4509844275228099, "grad_norm": 1.328125, "learning_rate": 1.8960358562468065e-05, "loss": 0.5254, "step": 3287 }, { "epoch": 0.45112162996501337, "grad_norm": 1.171875, "learning_rate": 1.8959717589856933e-05, "loss": 0.5324, "step": 3288 }, { "epoch": 0.45125883240721687, "grad_norm": 1.1640625, "learning_rate": 1.8959076430557707e-05, "loss": 0.5011, "step": 3289 }, { "epoch": 0.4513960348494203, "grad_norm": 1.2265625, "learning_rate": 1.8958435084583745e-05, "loss": 0.5063, "step": 3290 }, { "epoch": 0.4515332372916238, "grad_norm": 1.2578125, "learning_rate": 1.89577935519484e-05, "loss": 0.5195, "step": 3291 }, { "epoch": 0.45167043973382726, "grad_norm": 1.1328125, "learning_rate": 1.8957151832665054e-05, "loss": 0.5259, "step": 3292 }, { "epoch": 0.45180764217603075, "grad_norm": 1.25, "learning_rate": 1.8956509926747067e-05, "loss": 0.5761, "step": 3293 }, { "epoch": 0.4519448446182342, "grad_norm": 1.2578125, "learning_rate": 1.895586783420782e-05, "loss": 0.4919, "step": 3294 }, { "epoch": 0.4520820470604377, "grad_norm": 1.25, "learning_rate": 1.895522555506069e-05, "loss": 0.4867, "step": 3295 }, { "epoch": 0.45221924950264114, "grad_norm": 1.2890625, "learning_rate": 1.895458308931906e-05, "loss": 0.5355, "step": 3296 }, { "epoch": 0.45235645194484464, "grad_norm": 1.2421875, "learning_rate": 1.8953940436996313e-05, "loss": 0.5607, "step": 3297 }, { "epoch": 0.4524936543870481, "grad_norm": 1.25, "learning_rate": 1.8953297598105846e-05, "loss": 0.473, "step": 3298 }, { "epoch": 0.4526308568292516, "grad_norm": 1.15625, "learning_rate": 1.8952654572661052e-05, "loss": 0.4634, "step": 3299 }, { "epoch": 0.452768059271455, "grad_norm": 1.1875, "learning_rate": 1.8952011360675324e-05, "loss": 0.5016, "step": 3300 }, { "epoch": 0.4529052617136585, "grad_norm": 1.0859375, "learning_rate": 1.895136796216207e-05, "loss": 0.4766, "step": 3301 }, { "epoch": 0.45304246415586197, "grad_norm": 1.2265625, "learning_rate": 1.8950724377134695e-05, "loss": 0.5265, "step": 3302 }, { "epoch": 0.45317966659806547, "grad_norm": 1.1484375, "learning_rate": 1.8950080605606604e-05, "loss": 0.4973, "step": 3303 }, { "epoch": 0.4533168690402689, "grad_norm": 1.2109375, "learning_rate": 1.8949436647591217e-05, "loss": 0.5182, "step": 3304 }, { "epoch": 0.4534540714824724, "grad_norm": 1.1640625, "learning_rate": 1.894879250310195e-05, "loss": 0.4403, "step": 3305 }, { "epoch": 0.45359127392467585, "grad_norm": 1.0859375, "learning_rate": 1.8948148172152227e-05, "loss": 0.4179, "step": 3306 }, { "epoch": 0.45372847636687935, "grad_norm": 1.28125, "learning_rate": 1.8947503654755465e-05, "loss": 0.5347, "step": 3307 }, { "epoch": 0.4538656788090828, "grad_norm": 1.265625, "learning_rate": 1.8946858950925104e-05, "loss": 0.5586, "step": 3308 }, { "epoch": 0.4540028812512863, "grad_norm": 1.234375, "learning_rate": 1.894621406067457e-05, "loss": 0.4928, "step": 3309 }, { "epoch": 0.45414008369348974, "grad_norm": 1.1953125, "learning_rate": 1.8945568984017307e-05, "loss": 0.4838, "step": 3310 }, { "epoch": 0.45427728613569324, "grad_norm": 1.34375, "learning_rate": 1.8944923720966745e-05, "loss": 0.5526, "step": 3311 }, { "epoch": 0.4544144885778967, "grad_norm": 1.21875, "learning_rate": 1.894427827153634e-05, "loss": 0.5252, "step": 3312 }, { "epoch": 0.4545516910201002, "grad_norm": 1.171875, "learning_rate": 1.894363263573954e-05, "loss": 0.4584, "step": 3313 }, { "epoch": 0.4546888934623036, "grad_norm": 1.1953125, "learning_rate": 1.894298681358979e-05, "loss": 0.4699, "step": 3314 }, { "epoch": 0.4548260959045071, "grad_norm": 1.3046875, "learning_rate": 1.8942340805100547e-05, "loss": 0.5528, "step": 3315 }, { "epoch": 0.45496329834671057, "grad_norm": 1.5546875, "learning_rate": 1.8941694610285276e-05, "loss": 0.5488, "step": 3316 }, { "epoch": 0.45510050078891406, "grad_norm": 1.203125, "learning_rate": 1.8941048229157447e-05, "loss": 0.5843, "step": 3317 }, { "epoch": 0.4552377032311175, "grad_norm": 1.1484375, "learning_rate": 1.894040166173052e-05, "loss": 0.4709, "step": 3318 }, { "epoch": 0.455374905673321, "grad_norm": 1.203125, "learning_rate": 1.8939754908017963e-05, "loss": 0.5273, "step": 3319 }, { "epoch": 0.45551210811552445, "grad_norm": 1.1328125, "learning_rate": 1.8939107968033266e-05, "loss": 0.4258, "step": 3320 }, { "epoch": 0.45564931055772795, "grad_norm": 1.15625, "learning_rate": 1.8938460841789897e-05, "loss": 0.459, "step": 3321 }, { "epoch": 0.4557865129999314, "grad_norm": 1.234375, "learning_rate": 1.8937813529301343e-05, "loss": 0.5348, "step": 3322 }, { "epoch": 0.4559237154421349, "grad_norm": 1.265625, "learning_rate": 1.8937166030581097e-05, "loss": 0.5456, "step": 3323 }, { "epoch": 0.45606091788433833, "grad_norm": 1.3203125, "learning_rate": 1.893651834564264e-05, "loss": 0.567, "step": 3324 }, { "epoch": 0.45619812032654183, "grad_norm": 1.3046875, "learning_rate": 1.893587047449948e-05, "loss": 0.5278, "step": 3325 }, { "epoch": 0.4563353227687453, "grad_norm": 1.2734375, "learning_rate": 1.8935222417165104e-05, "loss": 0.5861, "step": 3326 }, { "epoch": 0.4564725252109488, "grad_norm": 1.109375, "learning_rate": 1.8934574173653022e-05, "loss": 0.4519, "step": 3327 }, { "epoch": 0.4566097276531522, "grad_norm": 1.1796875, "learning_rate": 1.893392574397674e-05, "loss": 0.5727, "step": 3328 }, { "epoch": 0.4567469300953557, "grad_norm": 1.3515625, "learning_rate": 1.8933277128149767e-05, "loss": 0.5796, "step": 3329 }, { "epoch": 0.45688413253755916, "grad_norm": 1.15625, "learning_rate": 1.8932628326185624e-05, "loss": 0.4581, "step": 3330 }, { "epoch": 0.45702133497976266, "grad_norm": 1.2109375, "learning_rate": 1.8931979338097823e-05, "loss": 0.5079, "step": 3331 }, { "epoch": 0.4571585374219661, "grad_norm": 1.1171875, "learning_rate": 1.8931330163899888e-05, "loss": 0.4583, "step": 3332 }, { "epoch": 0.4572957398641696, "grad_norm": 1.2109375, "learning_rate": 1.8930680803605345e-05, "loss": 0.5371, "step": 3333 }, { "epoch": 0.45743294230637305, "grad_norm": 1.265625, "learning_rate": 1.893003125722773e-05, "loss": 0.5706, "step": 3334 }, { "epoch": 0.45757014474857655, "grad_norm": 1.1875, "learning_rate": 1.8929381524780567e-05, "loss": 0.5471, "step": 3335 }, { "epoch": 0.45770734719078, "grad_norm": 1.125, "learning_rate": 1.8928731606277404e-05, "loss": 0.4949, "step": 3336 }, { "epoch": 0.4578445496329835, "grad_norm": 1.21875, "learning_rate": 1.8928081501731778e-05, "loss": 0.5196, "step": 3337 }, { "epoch": 0.45798175207518693, "grad_norm": 1.1875, "learning_rate": 1.8927431211157236e-05, "loss": 0.5745, "step": 3338 }, { "epoch": 0.45811895451739043, "grad_norm": 1.140625, "learning_rate": 1.8926780734567326e-05, "loss": 0.4881, "step": 3339 }, { "epoch": 0.4582561569595939, "grad_norm": 1.1875, "learning_rate": 1.8926130071975605e-05, "loss": 0.5367, "step": 3340 }, { "epoch": 0.4583933594017974, "grad_norm": 1.2265625, "learning_rate": 1.8925479223395625e-05, "loss": 0.6043, "step": 3341 }, { "epoch": 0.4585305618440008, "grad_norm": 1.1796875, "learning_rate": 1.8924828188840953e-05, "loss": 0.477, "step": 3342 }, { "epoch": 0.4586677642862043, "grad_norm": 1.1953125, "learning_rate": 1.892417696832515e-05, "loss": 0.52, "step": 3343 }, { "epoch": 0.45880496672840776, "grad_norm": 1.1640625, "learning_rate": 1.892352556186179e-05, "loss": 0.5039, "step": 3344 }, { "epoch": 0.45894216917061126, "grad_norm": 1.3203125, "learning_rate": 1.8922873969464442e-05, "loss": 0.6055, "step": 3345 }, { "epoch": 0.4590793716128147, "grad_norm": 1.15625, "learning_rate": 1.8922222191146683e-05, "loss": 0.4983, "step": 3346 }, { "epoch": 0.4592165740550182, "grad_norm": 1.171875, "learning_rate": 1.892157022692209e-05, "loss": 0.5616, "step": 3347 }, { "epoch": 0.45935377649722164, "grad_norm": 1.015625, "learning_rate": 1.892091807680426e-05, "loss": 0.4016, "step": 3348 }, { "epoch": 0.45949097893942514, "grad_norm": 1.2890625, "learning_rate": 1.892026574080677e-05, "loss": 0.607, "step": 3349 }, { "epoch": 0.4596281813816286, "grad_norm": 1.234375, "learning_rate": 1.8919613218943217e-05, "loss": 0.5541, "step": 3350 }, { "epoch": 0.4597653838238321, "grad_norm": 1.234375, "learning_rate": 1.8918960511227193e-05, "loss": 0.5448, "step": 3351 }, { "epoch": 0.45990258626603553, "grad_norm": 1.2578125, "learning_rate": 1.8918307617672306e-05, "loss": 0.5516, "step": 3352 }, { "epoch": 0.46003978870823903, "grad_norm": 1.359375, "learning_rate": 1.891765453829215e-05, "loss": 0.592, "step": 3353 }, { "epoch": 0.46017699115044247, "grad_norm": 1.2109375, "learning_rate": 1.891700127310034e-05, "loss": 0.5191, "step": 3354 }, { "epoch": 0.46031419359264597, "grad_norm": 1.375, "learning_rate": 1.8916347822110482e-05, "loss": 0.6511, "step": 3355 }, { "epoch": 0.4604513960348494, "grad_norm": 1.265625, "learning_rate": 1.89156941853362e-05, "loss": 0.5612, "step": 3356 }, { "epoch": 0.4605885984770529, "grad_norm": 1.2578125, "learning_rate": 1.891504036279111e-05, "loss": 0.5646, "step": 3357 }, { "epoch": 0.46072580091925636, "grad_norm": 1.1875, "learning_rate": 1.8914386354488826e-05, "loss": 0.477, "step": 3358 }, { "epoch": 0.46086300336145986, "grad_norm": 1.1875, "learning_rate": 1.8913732160442988e-05, "loss": 0.5416, "step": 3359 }, { "epoch": 0.4610002058036633, "grad_norm": 1.1875, "learning_rate": 1.8913077780667222e-05, "loss": 0.5109, "step": 3360 }, { "epoch": 0.4611374082458668, "grad_norm": 1.1875, "learning_rate": 1.8912423215175163e-05, "loss": 0.4864, "step": 3361 }, { "epoch": 0.46127461068807024, "grad_norm": 1.2421875, "learning_rate": 1.891176846398045e-05, "loss": 0.536, "step": 3362 }, { "epoch": 0.46141181313027374, "grad_norm": 1.21875, "learning_rate": 1.8911113527096722e-05, "loss": 0.5634, "step": 3363 }, { "epoch": 0.4615490155724772, "grad_norm": 1.3515625, "learning_rate": 1.8910458404537633e-05, "loss": 0.6284, "step": 3364 }, { "epoch": 0.4616862180146807, "grad_norm": 1.1796875, "learning_rate": 1.8909803096316826e-05, "loss": 0.509, "step": 3365 }, { "epoch": 0.4618234204568841, "grad_norm": 1.2109375, "learning_rate": 1.890914760244796e-05, "loss": 0.5244, "step": 3366 }, { "epoch": 0.4619606228990876, "grad_norm": 1.3125, "learning_rate": 1.8908491922944693e-05, "loss": 0.5678, "step": 3367 }, { "epoch": 0.46209782534129107, "grad_norm": 1.1484375, "learning_rate": 1.8907836057820682e-05, "loss": 0.4961, "step": 3368 }, { "epoch": 0.46223502778349457, "grad_norm": 1.2421875, "learning_rate": 1.8907180007089597e-05, "loss": 0.5521, "step": 3369 }, { "epoch": 0.462372230225698, "grad_norm": 1.1953125, "learning_rate": 1.8906523770765107e-05, "loss": 0.5267, "step": 3370 }, { "epoch": 0.4625094326679015, "grad_norm": 1.1875, "learning_rate": 1.8905867348860888e-05, "loss": 0.5434, "step": 3371 }, { "epoch": 0.46264663511010495, "grad_norm": 1.203125, "learning_rate": 1.8905210741390612e-05, "loss": 0.5394, "step": 3372 }, { "epoch": 0.46278383755230845, "grad_norm": 1.3203125, "learning_rate": 1.890455394836797e-05, "loss": 0.6014, "step": 3373 }, { "epoch": 0.4629210399945119, "grad_norm": 1.1171875, "learning_rate": 1.8903896969806635e-05, "loss": 0.4341, "step": 3374 }, { "epoch": 0.4630582424367154, "grad_norm": 1.109375, "learning_rate": 1.8903239805720303e-05, "loss": 0.4413, "step": 3375 }, { "epoch": 0.46319544487891884, "grad_norm": 1.234375, "learning_rate": 1.8902582456122666e-05, "loss": 0.5055, "step": 3376 }, { "epoch": 0.46333264732112234, "grad_norm": 1.1796875, "learning_rate": 1.8901924921027423e-05, "loss": 0.5102, "step": 3377 }, { "epoch": 0.4634698497633258, "grad_norm": 1.296875, "learning_rate": 1.8901267200448268e-05, "loss": 0.5304, "step": 3378 }, { "epoch": 0.4636070522055293, "grad_norm": 1.171875, "learning_rate": 1.890060929439891e-05, "loss": 0.5265, "step": 3379 }, { "epoch": 0.4637442546477327, "grad_norm": 1.2421875, "learning_rate": 1.889995120289306e-05, "loss": 0.4744, "step": 3380 }, { "epoch": 0.4638814570899362, "grad_norm": 1.21875, "learning_rate": 1.8899292925944422e-05, "loss": 0.5403, "step": 3381 }, { "epoch": 0.46401865953213967, "grad_norm": 1.2109375, "learning_rate": 1.8898634463566724e-05, "loss": 0.5215, "step": 3382 }, { "epoch": 0.46415586197434316, "grad_norm": 1.171875, "learning_rate": 1.8897975815773673e-05, "loss": 0.4912, "step": 3383 }, { "epoch": 0.4642930644165466, "grad_norm": 1.2109375, "learning_rate": 1.8897316982579005e-05, "loss": 0.5783, "step": 3384 }, { "epoch": 0.4644302668587501, "grad_norm": 1.3046875, "learning_rate": 1.8896657963996438e-05, "loss": 0.5273, "step": 3385 }, { "epoch": 0.46456746930095355, "grad_norm": 1.1484375, "learning_rate": 1.889599876003971e-05, "loss": 0.5141, "step": 3386 }, { "epoch": 0.46470467174315705, "grad_norm": 1.1484375, "learning_rate": 1.8895339370722552e-05, "loss": 0.4937, "step": 3387 }, { "epoch": 0.4648418741853605, "grad_norm": 1.1171875, "learning_rate": 1.8894679796058704e-05, "loss": 0.4929, "step": 3388 }, { "epoch": 0.464979076627564, "grad_norm": 1.2890625, "learning_rate": 1.8894020036061913e-05, "loss": 0.5971, "step": 3389 }, { "epoch": 0.46511627906976744, "grad_norm": 1.3359375, "learning_rate": 1.8893360090745924e-05, "loss": 0.5656, "step": 3390 }, { "epoch": 0.46525348151197093, "grad_norm": 1.296875, "learning_rate": 1.8892699960124484e-05, "loss": 0.6278, "step": 3391 }, { "epoch": 0.4653906839541744, "grad_norm": 1.2578125, "learning_rate": 1.8892039644211353e-05, "loss": 0.5535, "step": 3392 }, { "epoch": 0.4655278863963779, "grad_norm": 1.125, "learning_rate": 1.8891379143020286e-05, "loss": 0.4923, "step": 3393 }, { "epoch": 0.4656650888385813, "grad_norm": 1.15625, "learning_rate": 1.889071845656505e-05, "loss": 0.5442, "step": 3394 }, { "epoch": 0.4658022912807848, "grad_norm": 1.0859375, "learning_rate": 1.8890057584859404e-05, "loss": 0.4436, "step": 3395 }, { "epoch": 0.46593949372298826, "grad_norm": 1.140625, "learning_rate": 1.8889396527917125e-05, "loss": 0.4657, "step": 3396 }, { "epoch": 0.46607669616519176, "grad_norm": 1.078125, "learning_rate": 1.8888735285751986e-05, "loss": 0.4237, "step": 3397 }, { "epoch": 0.4662138986073952, "grad_norm": 1.171875, "learning_rate": 1.888807385837776e-05, "loss": 0.5254, "step": 3398 }, { "epoch": 0.4663511010495987, "grad_norm": 1.0703125, "learning_rate": 1.8887412245808234e-05, "loss": 0.4028, "step": 3399 }, { "epoch": 0.46648830349180215, "grad_norm": 1.296875, "learning_rate": 1.8886750448057193e-05, "loss": 0.5669, "step": 3400 }, { "epoch": 0.46662550593400565, "grad_norm": 1.1953125, "learning_rate": 1.8886088465138425e-05, "loss": 0.5152, "step": 3401 }, { "epoch": 0.4667627083762091, "grad_norm": 1.3203125, "learning_rate": 1.8885426297065722e-05, "loss": 0.5841, "step": 3402 }, { "epoch": 0.4668999108184126, "grad_norm": 1.2578125, "learning_rate": 1.888476394385288e-05, "loss": 0.5628, "step": 3403 }, { "epoch": 0.46703711326061603, "grad_norm": 1.1875, "learning_rate": 1.8884101405513707e-05, "loss": 0.5098, "step": 3404 }, { "epoch": 0.46717431570281953, "grad_norm": 1.171875, "learning_rate": 1.8883438682062002e-05, "loss": 0.4509, "step": 3405 }, { "epoch": 0.467311518145023, "grad_norm": 1.2265625, "learning_rate": 1.8882775773511578e-05, "loss": 0.5656, "step": 3406 }, { "epoch": 0.4674487205872265, "grad_norm": 1.2109375, "learning_rate": 1.888211267987624e-05, "loss": 0.4544, "step": 3407 }, { "epoch": 0.4675859230294299, "grad_norm": 1.0390625, "learning_rate": 1.8881449401169812e-05, "loss": 0.4375, "step": 3408 }, { "epoch": 0.4677231254716334, "grad_norm": 1.1796875, "learning_rate": 1.8880785937406114e-05, "loss": 0.5569, "step": 3409 }, { "epoch": 0.46786032791383686, "grad_norm": 1.2265625, "learning_rate": 1.8880122288598968e-05, "loss": 0.5477, "step": 3410 }, { "epoch": 0.46799753035604036, "grad_norm": 1.3046875, "learning_rate": 1.88794584547622e-05, "loss": 0.5795, "step": 3411 }, { "epoch": 0.4681347327982438, "grad_norm": 1.1640625, "learning_rate": 1.8878794435909643e-05, "loss": 0.4725, "step": 3412 }, { "epoch": 0.4682719352404473, "grad_norm": 1.171875, "learning_rate": 1.8878130232055135e-05, "loss": 0.5292, "step": 3413 }, { "epoch": 0.46840913768265074, "grad_norm": 1.34375, "learning_rate": 1.8877465843212518e-05, "loss": 0.5879, "step": 3414 }, { "epoch": 0.46854634012485424, "grad_norm": 1.234375, "learning_rate": 1.8876801269395625e-05, "loss": 0.5367, "step": 3415 }, { "epoch": 0.4686835425670577, "grad_norm": 1.34375, "learning_rate": 1.8876136510618317e-05, "loss": 0.6149, "step": 3416 }, { "epoch": 0.4688207450092612, "grad_norm": 1.2421875, "learning_rate": 1.8875471566894435e-05, "loss": 0.5649, "step": 3417 }, { "epoch": 0.46895794745146463, "grad_norm": 1.203125, "learning_rate": 1.8874806438237836e-05, "loss": 0.5325, "step": 3418 }, { "epoch": 0.46909514989366813, "grad_norm": 1.3125, "learning_rate": 1.887414112466238e-05, "loss": 0.5921, "step": 3419 }, { "epoch": 0.46923235233587157, "grad_norm": 1.1875, "learning_rate": 1.8873475626181933e-05, "loss": 0.551, "step": 3420 }, { "epoch": 0.46936955477807507, "grad_norm": 1.2265625, "learning_rate": 1.887280994281036e-05, "loss": 0.5737, "step": 3421 }, { "epoch": 0.4695067572202785, "grad_norm": 1.1796875, "learning_rate": 1.8872144074561524e-05, "loss": 0.519, "step": 3422 }, { "epoch": 0.469643959662482, "grad_norm": 1.1640625, "learning_rate": 1.8871478021449307e-05, "loss": 0.4991, "step": 3423 }, { "epoch": 0.46978116210468546, "grad_norm": 1.2578125, "learning_rate": 1.887081178348759e-05, "loss": 0.5704, "step": 3424 }, { "epoch": 0.46991836454688896, "grad_norm": 1.1328125, "learning_rate": 1.8870145360690245e-05, "loss": 0.479, "step": 3425 }, { "epoch": 0.4700555669890924, "grad_norm": 1.34375, "learning_rate": 1.8869478753071165e-05, "loss": 0.6565, "step": 3426 }, { "epoch": 0.4701927694312959, "grad_norm": 1.203125, "learning_rate": 1.8868811960644238e-05, "loss": 0.5053, "step": 3427 }, { "epoch": 0.47032997187349934, "grad_norm": 1.2421875, "learning_rate": 1.8868144983423355e-05, "loss": 0.5661, "step": 3428 }, { "epoch": 0.47046717431570284, "grad_norm": 1.2421875, "learning_rate": 1.886747782142242e-05, "loss": 0.531, "step": 3429 }, { "epoch": 0.4706043767579063, "grad_norm": 1.1953125, "learning_rate": 1.8866810474655327e-05, "loss": 0.491, "step": 3430 }, { "epoch": 0.4707415792001098, "grad_norm": 1.2578125, "learning_rate": 1.8866142943135984e-05, "loss": 0.5252, "step": 3431 }, { "epoch": 0.4708787816423132, "grad_norm": 1.1796875, "learning_rate": 1.8865475226878303e-05, "loss": 0.539, "step": 3432 }, { "epoch": 0.4710159840845167, "grad_norm": 1.2421875, "learning_rate": 1.886480732589619e-05, "loss": 0.6021, "step": 3433 }, { "epoch": 0.47115318652672017, "grad_norm": 1.078125, "learning_rate": 1.886413924020357e-05, "loss": 0.4493, "step": 3434 }, { "epoch": 0.47129038896892367, "grad_norm": 1.28125, "learning_rate": 1.8863470969814355e-05, "loss": 0.5295, "step": 3435 }, { "epoch": 0.4714275914111271, "grad_norm": 1.234375, "learning_rate": 1.8862802514742475e-05, "loss": 0.5485, "step": 3436 }, { "epoch": 0.4715647938533306, "grad_norm": 1.1953125, "learning_rate": 1.8862133875001857e-05, "loss": 0.5136, "step": 3437 }, { "epoch": 0.47170199629553405, "grad_norm": 1.359375, "learning_rate": 1.8861465050606434e-05, "loss": 0.6081, "step": 3438 }, { "epoch": 0.47183919873773755, "grad_norm": 1.21875, "learning_rate": 1.8860796041570134e-05, "loss": 0.5542, "step": 3439 }, { "epoch": 0.471976401179941, "grad_norm": 1.234375, "learning_rate": 1.886012684790691e-05, "loss": 0.4869, "step": 3440 }, { "epoch": 0.4721136036221445, "grad_norm": 1.46875, "learning_rate": 1.88594574696307e-05, "loss": 0.5906, "step": 3441 }, { "epoch": 0.47225080606434794, "grad_norm": 1.4140625, "learning_rate": 1.8858787906755442e-05, "loss": 0.569, "step": 3442 }, { "epoch": 0.47238800850655144, "grad_norm": 1.3359375, "learning_rate": 1.8858118159295104e-05, "loss": 0.6204, "step": 3443 }, { "epoch": 0.4725252109487549, "grad_norm": 1.28125, "learning_rate": 1.8857448227263627e-05, "loss": 0.4545, "step": 3444 }, { "epoch": 0.4726624133909584, "grad_norm": 1.140625, "learning_rate": 1.8856778110674977e-05, "loss": 0.5267, "step": 3445 }, { "epoch": 0.4727996158331618, "grad_norm": 1.265625, "learning_rate": 1.8856107809543113e-05, "loss": 0.5435, "step": 3446 }, { "epoch": 0.4729368182753653, "grad_norm": 1.1796875, "learning_rate": 1.8855437323882008e-05, "loss": 0.5318, "step": 3447 }, { "epoch": 0.47307402071756877, "grad_norm": 1.2265625, "learning_rate": 1.8854766653705624e-05, "loss": 0.5531, "step": 3448 }, { "epoch": 0.47321122315977227, "grad_norm": 1.171875, "learning_rate": 1.8854095799027944e-05, "loss": 0.4823, "step": 3449 }, { "epoch": 0.4733484256019757, "grad_norm": 1.140625, "learning_rate": 1.885342475986294e-05, "loss": 0.5091, "step": 3450 }, { "epoch": 0.4734856280441792, "grad_norm": 1.1875, "learning_rate": 1.8852753536224597e-05, "loss": 0.5726, "step": 3451 }, { "epoch": 0.47362283048638265, "grad_norm": 1.203125, "learning_rate": 1.88520821281269e-05, "loss": 0.5465, "step": 3452 }, { "epoch": 0.47376003292858615, "grad_norm": 1.21875, "learning_rate": 1.8851410535583836e-05, "loss": 0.5174, "step": 3453 }, { "epoch": 0.4738972353707896, "grad_norm": 1.234375, "learning_rate": 1.8850738758609405e-05, "loss": 0.532, "step": 3454 }, { "epoch": 0.4740344378129931, "grad_norm": 1.0234375, "learning_rate": 1.8850066797217596e-05, "loss": 0.418, "step": 3455 }, { "epoch": 0.47417164025519654, "grad_norm": 1.2734375, "learning_rate": 1.8849394651422418e-05, "loss": 0.5459, "step": 3456 }, { "epoch": 0.47430884269740003, "grad_norm": 1.2265625, "learning_rate": 1.8848722321237874e-05, "loss": 0.5419, "step": 3457 }, { "epoch": 0.4744460451396035, "grad_norm": 1.1953125, "learning_rate": 1.8848049806677967e-05, "loss": 0.4886, "step": 3458 }, { "epoch": 0.474583247581807, "grad_norm": 1.265625, "learning_rate": 1.884737710775672e-05, "loss": 0.5793, "step": 3459 }, { "epoch": 0.4747204500240104, "grad_norm": 1.28125, "learning_rate": 1.884670422448814e-05, "loss": 0.5512, "step": 3460 }, { "epoch": 0.4748576524662139, "grad_norm": 1.2265625, "learning_rate": 1.8846031156886257e-05, "loss": 0.5238, "step": 3461 }, { "epoch": 0.47499485490841736, "grad_norm": 1.1640625, "learning_rate": 1.8845357904965085e-05, "loss": 0.512, "step": 3462 }, { "epoch": 0.47513205735062086, "grad_norm": 1.2734375, "learning_rate": 1.884468446873866e-05, "loss": 0.5637, "step": 3463 }, { "epoch": 0.4752692597928243, "grad_norm": 1.3125, "learning_rate": 1.8844010848221008e-05, "loss": 0.5247, "step": 3464 }, { "epoch": 0.4754064622350278, "grad_norm": 1.234375, "learning_rate": 1.884333704342617e-05, "loss": 0.5096, "step": 3465 }, { "epoch": 0.47554366467723125, "grad_norm": 1.296875, "learning_rate": 1.8842663054368182e-05, "loss": 0.5567, "step": 3466 }, { "epoch": 0.47568086711943475, "grad_norm": 1.2578125, "learning_rate": 1.884198888106109e-05, "loss": 0.5158, "step": 3467 }, { "epoch": 0.4758180695616382, "grad_norm": 1.3203125, "learning_rate": 1.8841314523518942e-05, "loss": 0.5509, "step": 3468 }, { "epoch": 0.4759552720038417, "grad_norm": 1.21875, "learning_rate": 1.8840639981755788e-05, "loss": 0.5026, "step": 3469 }, { "epoch": 0.47609247444604513, "grad_norm": 1.234375, "learning_rate": 1.8839965255785678e-05, "loss": 0.5537, "step": 3470 }, { "epoch": 0.47622967688824863, "grad_norm": 1.1875, "learning_rate": 1.8839290345622677e-05, "loss": 0.5375, "step": 3471 }, { "epoch": 0.4763668793304521, "grad_norm": 1.546875, "learning_rate": 1.883861525128085e-05, "loss": 0.5176, "step": 3472 }, { "epoch": 0.4765040817726556, "grad_norm": 1.28125, "learning_rate": 1.8837939972774254e-05, "loss": 0.5935, "step": 3473 }, { "epoch": 0.476641284214859, "grad_norm": 1.140625, "learning_rate": 1.8837264510116968e-05, "loss": 0.4983, "step": 3474 }, { "epoch": 0.4767784866570625, "grad_norm": 1.5234375, "learning_rate": 1.883658886332306e-05, "loss": 0.6259, "step": 3475 }, { "epoch": 0.47691568909926596, "grad_norm": 1.1796875, "learning_rate": 1.8835913032406616e-05, "loss": 0.522, "step": 3476 }, { "epoch": 0.47705289154146946, "grad_norm": 1.2734375, "learning_rate": 1.883523701738171e-05, "loss": 0.4603, "step": 3477 }, { "epoch": 0.4771900939836729, "grad_norm": 1.1796875, "learning_rate": 1.883456081826243e-05, "loss": 0.5098, "step": 3478 }, { "epoch": 0.4773272964258764, "grad_norm": 1.234375, "learning_rate": 1.883388443506287e-05, "loss": 0.466, "step": 3479 }, { "epoch": 0.47746449886807985, "grad_norm": 1.328125, "learning_rate": 1.8833207867797117e-05, "loss": 0.5878, "step": 3480 }, { "epoch": 0.47760170131028334, "grad_norm": 1.328125, "learning_rate": 1.883253111647927e-05, "loss": 0.5303, "step": 3481 }, { "epoch": 0.4777389037524868, "grad_norm": 1.1484375, "learning_rate": 1.8831854181123433e-05, "loss": 0.5601, "step": 3482 }, { "epoch": 0.4778761061946903, "grad_norm": 1.21875, "learning_rate": 1.8831177061743705e-05, "loss": 0.5459, "step": 3483 }, { "epoch": 0.47801330863689373, "grad_norm": 1.0546875, "learning_rate": 1.8830499758354204e-05, "loss": 0.4545, "step": 3484 }, { "epoch": 0.47815051107909723, "grad_norm": 1.234375, "learning_rate": 1.8829822270969032e-05, "loss": 0.5485, "step": 3485 }, { "epoch": 0.4782877135213007, "grad_norm": 1.140625, "learning_rate": 1.8829144599602317e-05, "loss": 0.4691, "step": 3486 }, { "epoch": 0.47842491596350417, "grad_norm": 1.09375, "learning_rate": 1.8828466744268167e-05, "loss": 0.4699, "step": 3487 }, { "epoch": 0.4785621184057076, "grad_norm": 1.2109375, "learning_rate": 1.8827788704980714e-05, "loss": 0.5696, "step": 3488 }, { "epoch": 0.4786993208479111, "grad_norm": 1.171875, "learning_rate": 1.8827110481754084e-05, "loss": 0.5059, "step": 3489 }, { "epoch": 0.47883652329011456, "grad_norm": 1.265625, "learning_rate": 1.8826432074602404e-05, "loss": 0.6002, "step": 3490 }, { "epoch": 0.47897372573231806, "grad_norm": 1.1953125, "learning_rate": 1.882575348353982e-05, "loss": 0.5324, "step": 3491 }, { "epoch": 0.4791109281745215, "grad_norm": 1.2734375, "learning_rate": 1.8825074708580465e-05, "loss": 0.5273, "step": 3492 }, { "epoch": 0.479248130616725, "grad_norm": 1.171875, "learning_rate": 1.882439574973848e-05, "loss": 0.5085, "step": 3493 }, { "epoch": 0.47938533305892844, "grad_norm": 1.2265625, "learning_rate": 1.8823716607028013e-05, "loss": 0.5016, "step": 3494 }, { "epoch": 0.47952253550113194, "grad_norm": 1.1875, "learning_rate": 1.8823037280463217e-05, "loss": 0.4707, "step": 3495 }, { "epoch": 0.4796597379433354, "grad_norm": 1.2109375, "learning_rate": 1.882235777005825e-05, "loss": 0.5607, "step": 3496 }, { "epoch": 0.4797969403855389, "grad_norm": 1.171875, "learning_rate": 1.8821678075827263e-05, "loss": 0.5187, "step": 3497 }, { "epoch": 0.4799341428277423, "grad_norm": 1.203125, "learning_rate": 1.8820998197784422e-05, "loss": 0.5011, "step": 3498 }, { "epoch": 0.4800713452699458, "grad_norm": 1.3203125, "learning_rate": 1.8820318135943895e-05, "loss": 0.5887, "step": 3499 }, { "epoch": 0.48020854771214927, "grad_norm": 1.203125, "learning_rate": 1.881963789031985e-05, "loss": 0.5749, "step": 3500 }, { "epoch": 0.48034575015435277, "grad_norm": 1.140625, "learning_rate": 1.8818957460926463e-05, "loss": 0.4601, "step": 3501 }, { "epoch": 0.4804829525965562, "grad_norm": 1.265625, "learning_rate": 1.8818276847777906e-05, "loss": 0.5973, "step": 3502 }, { "epoch": 0.4806201550387597, "grad_norm": 1.2578125, "learning_rate": 1.8817596050888364e-05, "loss": 0.61, "step": 3503 }, { "epoch": 0.48075735748096315, "grad_norm": 1.234375, "learning_rate": 1.8816915070272026e-05, "loss": 0.5748, "step": 3504 }, { "epoch": 0.48089455992316665, "grad_norm": 1.1875, "learning_rate": 1.8816233905943074e-05, "loss": 0.5158, "step": 3505 }, { "epoch": 0.4810317623653701, "grad_norm": 1.1640625, "learning_rate": 1.881555255791571e-05, "loss": 0.4772, "step": 3506 }, { "epoch": 0.4811689648075736, "grad_norm": 1.125, "learning_rate": 1.8814871026204122e-05, "loss": 0.4961, "step": 3507 }, { "epoch": 0.48130616724977704, "grad_norm": 1.2265625, "learning_rate": 1.8814189310822514e-05, "loss": 0.5223, "step": 3508 }, { "epoch": 0.48144336969198054, "grad_norm": 1.109375, "learning_rate": 1.8813507411785092e-05, "loss": 0.4732, "step": 3509 }, { "epoch": 0.481580572134184, "grad_norm": 1.1484375, "learning_rate": 1.8812825329106065e-05, "loss": 0.5415, "step": 3510 }, { "epoch": 0.4817177745763875, "grad_norm": 1.234375, "learning_rate": 1.8812143062799643e-05, "loss": 0.5639, "step": 3511 }, { "epoch": 0.4818549770185909, "grad_norm": 1.2265625, "learning_rate": 1.881146061288004e-05, "loss": 0.5392, "step": 3512 }, { "epoch": 0.4819921794607944, "grad_norm": 1.1875, "learning_rate": 1.881077797936148e-05, "loss": 0.5155, "step": 3513 }, { "epoch": 0.48212938190299787, "grad_norm": 1.203125, "learning_rate": 1.8810095162258178e-05, "loss": 0.555, "step": 3514 }, { "epoch": 0.48226658434520137, "grad_norm": 1.203125, "learning_rate": 1.8809412161584375e-05, "loss": 0.5077, "step": 3515 }, { "epoch": 0.4824037867874048, "grad_norm": 1.1875, "learning_rate": 1.8808728977354294e-05, "loss": 0.5027, "step": 3516 }, { "epoch": 0.4825409892296083, "grad_norm": 1.1328125, "learning_rate": 1.880804560958217e-05, "loss": 0.4835, "step": 3517 }, { "epoch": 0.48267819167181175, "grad_norm": 1.203125, "learning_rate": 1.8807362058282245e-05, "loss": 0.4944, "step": 3518 }, { "epoch": 0.48281539411401525, "grad_norm": 1.25, "learning_rate": 1.8806678323468757e-05, "loss": 0.5283, "step": 3519 }, { "epoch": 0.4829525965562187, "grad_norm": 1.234375, "learning_rate": 1.880599440515596e-05, "loss": 0.5392, "step": 3520 }, { "epoch": 0.4830897989984222, "grad_norm": 1.28125, "learning_rate": 1.88053103033581e-05, "loss": 0.5708, "step": 3521 }, { "epoch": 0.48322700144062564, "grad_norm": 1.1875, "learning_rate": 1.8804626018089426e-05, "loss": 0.5331, "step": 3522 }, { "epoch": 0.48336420388282914, "grad_norm": 1.1640625, "learning_rate": 1.8803941549364208e-05, "loss": 0.4847, "step": 3523 }, { "epoch": 0.4835014063250326, "grad_norm": 1.234375, "learning_rate": 1.8803256897196694e-05, "loss": 0.5368, "step": 3524 }, { "epoch": 0.4836386087672361, "grad_norm": 1.328125, "learning_rate": 1.880257206160116e-05, "loss": 0.6261, "step": 3525 }, { "epoch": 0.4837758112094395, "grad_norm": 1.265625, "learning_rate": 1.8801887042591874e-05, "loss": 0.5797, "step": 3526 }, { "epoch": 0.483913013651643, "grad_norm": 1.140625, "learning_rate": 1.8801201840183105e-05, "loss": 0.4986, "step": 3527 }, { "epoch": 0.48405021609384646, "grad_norm": 1.25, "learning_rate": 1.8800516454389135e-05, "loss": 0.5388, "step": 3528 }, { "epoch": 0.48418741853604996, "grad_norm": 1.1953125, "learning_rate": 1.879983088522424e-05, "loss": 0.5596, "step": 3529 }, { "epoch": 0.4843246209782534, "grad_norm": 1.1328125, "learning_rate": 1.879914513270271e-05, "loss": 0.4949, "step": 3530 }, { "epoch": 0.4844618234204569, "grad_norm": 1.3125, "learning_rate": 1.879845919683883e-05, "loss": 0.6565, "step": 3531 }, { "epoch": 0.48459902586266035, "grad_norm": 1.2109375, "learning_rate": 1.879777307764689e-05, "loss": 0.527, "step": 3532 }, { "epoch": 0.48473622830486385, "grad_norm": 1.1875, "learning_rate": 1.8797086775141197e-05, "loss": 0.4988, "step": 3533 }, { "epoch": 0.4848734307470673, "grad_norm": 1.21875, "learning_rate": 1.8796400289336038e-05, "loss": 0.5408, "step": 3534 }, { "epoch": 0.4850106331892708, "grad_norm": 1.3984375, "learning_rate": 1.8795713620245726e-05, "loss": 0.602, "step": 3535 }, { "epoch": 0.48514783563147423, "grad_norm": 1.3984375, "learning_rate": 1.8795026767884562e-05, "loss": 0.6147, "step": 3536 }, { "epoch": 0.48528503807367773, "grad_norm": 1.28125, "learning_rate": 1.879433973226686e-05, "loss": 0.5334, "step": 3537 }, { "epoch": 0.4854222405158812, "grad_norm": 1.265625, "learning_rate": 1.8793652513406942e-05, "loss": 0.5655, "step": 3538 }, { "epoch": 0.4855594429580847, "grad_norm": 1.328125, "learning_rate": 1.879296511131912e-05, "loss": 0.6867, "step": 3539 }, { "epoch": 0.4856966454002881, "grad_norm": 1.15625, "learning_rate": 1.8792277526017716e-05, "loss": 0.4389, "step": 3540 }, { "epoch": 0.4858338478424916, "grad_norm": 1.140625, "learning_rate": 1.879158975751706e-05, "loss": 0.4789, "step": 3541 }, { "epoch": 0.48597105028469506, "grad_norm": 1.078125, "learning_rate": 1.8790901805831482e-05, "loss": 0.4363, "step": 3542 }, { "epoch": 0.48610825272689856, "grad_norm": 1.2265625, "learning_rate": 1.8790213670975313e-05, "loss": 0.5241, "step": 3543 }, { "epoch": 0.486245455169102, "grad_norm": 1.3671875, "learning_rate": 1.8789525352962897e-05, "loss": 0.6584, "step": 3544 }, { "epoch": 0.4863826576113055, "grad_norm": 1.34375, "learning_rate": 1.8788836851808574e-05, "loss": 0.5716, "step": 3545 }, { "epoch": 0.48651986005350895, "grad_norm": 1.1796875, "learning_rate": 1.878814816752669e-05, "loss": 0.5327, "step": 3546 }, { "epoch": 0.48665706249571244, "grad_norm": 1.2109375, "learning_rate": 1.878745930013159e-05, "loss": 0.5079, "step": 3547 }, { "epoch": 0.4867942649379159, "grad_norm": 1.328125, "learning_rate": 1.8786770249637637e-05, "loss": 0.6362, "step": 3548 }, { "epoch": 0.4869314673801194, "grad_norm": 1.1875, "learning_rate": 1.878608101605918e-05, "loss": 0.5113, "step": 3549 }, { "epoch": 0.48706866982232283, "grad_norm": 1.2109375, "learning_rate": 1.8785391599410583e-05, "loss": 0.5563, "step": 3550 }, { "epoch": 0.48720587226452633, "grad_norm": 1.265625, "learning_rate": 1.878470199970621e-05, "loss": 0.5763, "step": 3551 }, { "epoch": 0.4873430747067298, "grad_norm": 1.203125, "learning_rate": 1.8784012216960433e-05, "loss": 0.482, "step": 3552 }, { "epoch": 0.48748027714893327, "grad_norm": 1.21875, "learning_rate": 1.8783322251187618e-05, "loss": 0.4746, "step": 3553 }, { "epoch": 0.4876174795911367, "grad_norm": 1.1875, "learning_rate": 1.8782632102402153e-05, "loss": 0.5644, "step": 3554 }, { "epoch": 0.4877546820333402, "grad_norm": 1.15625, "learning_rate": 1.8781941770618405e-05, "loss": 0.4496, "step": 3555 }, { "epoch": 0.48789188447554366, "grad_norm": 1.5, "learning_rate": 1.8781251255850766e-05, "loss": 0.6083, "step": 3556 }, { "epoch": 0.48802908691774716, "grad_norm": 1.109375, "learning_rate": 1.878056055811362e-05, "loss": 0.4927, "step": 3557 }, { "epoch": 0.4881662893599506, "grad_norm": 1.1953125, "learning_rate": 1.877986967742136e-05, "loss": 0.4724, "step": 3558 }, { "epoch": 0.4883034918021541, "grad_norm": 1.1953125, "learning_rate": 1.8779178613788383e-05, "loss": 0.5522, "step": 3559 }, { "epoch": 0.48844069424435754, "grad_norm": 1.203125, "learning_rate": 1.877848736722909e-05, "loss": 0.5992, "step": 3560 }, { "epoch": 0.48857789668656104, "grad_norm": 1.171875, "learning_rate": 1.8777795937757875e-05, "loss": 0.4454, "step": 3561 }, { "epoch": 0.4887150991287645, "grad_norm": 1.171875, "learning_rate": 1.8777104325389155e-05, "loss": 0.5043, "step": 3562 }, { "epoch": 0.488852301570968, "grad_norm": 1.234375, "learning_rate": 1.8776412530137337e-05, "loss": 0.568, "step": 3563 }, { "epoch": 0.4889895040131714, "grad_norm": 1.2734375, "learning_rate": 1.8775720552016835e-05, "loss": 0.5549, "step": 3564 }, { "epoch": 0.4891267064553749, "grad_norm": 1.21875, "learning_rate": 1.8775028391042066e-05, "loss": 0.454, "step": 3565 }, { "epoch": 0.48926390889757837, "grad_norm": 1.1953125, "learning_rate": 1.8774336047227456e-05, "loss": 0.5114, "step": 3566 }, { "epoch": 0.48940111133978187, "grad_norm": 1.25, "learning_rate": 1.877364352058743e-05, "loss": 0.5101, "step": 3567 }, { "epoch": 0.4895383137819853, "grad_norm": 1.1171875, "learning_rate": 1.877295081113641e-05, "loss": 0.4771, "step": 3568 }, { "epoch": 0.4896755162241888, "grad_norm": 1.203125, "learning_rate": 1.877225791888884e-05, "loss": 0.5948, "step": 3569 }, { "epoch": 0.48981271866639225, "grad_norm": 1.203125, "learning_rate": 1.8771564843859152e-05, "loss": 0.4895, "step": 3570 }, { "epoch": 0.48994992110859575, "grad_norm": 1.1640625, "learning_rate": 1.877087158606179e-05, "loss": 0.5265, "step": 3571 }, { "epoch": 0.4900871235507992, "grad_norm": 1.15625, "learning_rate": 1.8770178145511196e-05, "loss": 0.5392, "step": 3572 }, { "epoch": 0.4902243259930027, "grad_norm": 1.234375, "learning_rate": 1.8769484522221823e-05, "loss": 0.5246, "step": 3573 }, { "epoch": 0.49036152843520614, "grad_norm": 1.1484375, "learning_rate": 1.8768790716208117e-05, "loss": 0.4619, "step": 3574 }, { "epoch": 0.49049873087740964, "grad_norm": 1.1328125, "learning_rate": 1.8768096727484543e-05, "loss": 0.4374, "step": 3575 }, { "epoch": 0.4906359333196131, "grad_norm": 1.1484375, "learning_rate": 1.876740255606555e-05, "loss": 0.4664, "step": 3576 }, { "epoch": 0.4907731357618166, "grad_norm": 1.2265625, "learning_rate": 1.8766708201965614e-05, "loss": 0.5277, "step": 3577 }, { "epoch": 0.49091033820402, "grad_norm": 1.203125, "learning_rate": 1.8766013665199194e-05, "loss": 0.5462, "step": 3578 }, { "epoch": 0.4910475406462235, "grad_norm": 1.171875, "learning_rate": 1.8765318945780767e-05, "loss": 0.5291, "step": 3579 }, { "epoch": 0.49118474308842697, "grad_norm": 1.2109375, "learning_rate": 1.8764624043724805e-05, "loss": 0.4777, "step": 3580 }, { "epoch": 0.49132194553063047, "grad_norm": 1.2265625, "learning_rate": 1.8763928959045792e-05, "loss": 0.621, "step": 3581 }, { "epoch": 0.4914591479728339, "grad_norm": 1.1953125, "learning_rate": 1.8763233691758205e-05, "loss": 0.4905, "step": 3582 }, { "epoch": 0.4915963504150374, "grad_norm": 1.21875, "learning_rate": 1.8762538241876532e-05, "loss": 0.4783, "step": 3583 }, { "epoch": 0.49173355285724085, "grad_norm": 1.2109375, "learning_rate": 1.8761842609415268e-05, "loss": 0.5736, "step": 3584 }, { "epoch": 0.49187075529944435, "grad_norm": 1.296875, "learning_rate": 1.8761146794388906e-05, "loss": 0.5552, "step": 3585 }, { "epoch": 0.4920079577416478, "grad_norm": 1.2734375, "learning_rate": 1.8760450796811938e-05, "loss": 0.5752, "step": 3586 }, { "epoch": 0.4921451601838513, "grad_norm": 1.265625, "learning_rate": 1.8759754616698872e-05, "loss": 0.5491, "step": 3587 }, { "epoch": 0.49228236262605474, "grad_norm": 1.3359375, "learning_rate": 1.8759058254064215e-05, "loss": 0.6857, "step": 3588 }, { "epoch": 0.49241956506825824, "grad_norm": 1.125, "learning_rate": 1.8758361708922477e-05, "loss": 0.4345, "step": 3589 }, { "epoch": 0.4925567675104617, "grad_norm": 1.2578125, "learning_rate": 1.8757664981288164e-05, "loss": 0.5691, "step": 3590 }, { "epoch": 0.4926939699526652, "grad_norm": 1.1171875, "learning_rate": 1.8756968071175802e-05, "loss": 0.4541, "step": 3591 }, { "epoch": 0.4928311723948686, "grad_norm": 1.1171875, "learning_rate": 1.8756270978599905e-05, "loss": 0.5025, "step": 3592 }, { "epoch": 0.4929683748370721, "grad_norm": 1.2109375, "learning_rate": 1.8755573703575005e-05, "loss": 0.5656, "step": 3593 }, { "epoch": 0.49310557727927556, "grad_norm": 1.140625, "learning_rate": 1.8754876246115624e-05, "loss": 0.4903, "step": 3594 }, { "epoch": 0.49324277972147906, "grad_norm": 1.1796875, "learning_rate": 1.87541786062363e-05, "loss": 0.519, "step": 3595 }, { "epoch": 0.4933799821636825, "grad_norm": 1.21875, "learning_rate": 1.8753480783951566e-05, "loss": 0.5672, "step": 3596 }, { "epoch": 0.493517184605886, "grad_norm": 1.2109375, "learning_rate": 1.875278277927596e-05, "loss": 0.5336, "step": 3597 }, { "epoch": 0.49365438704808945, "grad_norm": 1.1015625, "learning_rate": 1.875208459222403e-05, "loss": 0.5075, "step": 3598 }, { "epoch": 0.49379158949029295, "grad_norm": 1.2890625, "learning_rate": 1.8751386222810322e-05, "loss": 0.561, "step": 3599 }, { "epoch": 0.4939287919324964, "grad_norm": 1.1875, "learning_rate": 1.875068767104939e-05, "loss": 0.5332, "step": 3600 }, { "epoch": 0.4940659943746999, "grad_norm": 1.2109375, "learning_rate": 1.8749988936955787e-05, "loss": 0.552, "step": 3601 }, { "epoch": 0.49420319681690333, "grad_norm": 1.2109375, "learning_rate": 1.8749290020544072e-05, "loss": 0.5646, "step": 3602 }, { "epoch": 0.49434039925910683, "grad_norm": 1.1015625, "learning_rate": 1.8748590921828807e-05, "loss": 0.5016, "step": 3603 }, { "epoch": 0.4944776017013103, "grad_norm": 1.2578125, "learning_rate": 1.874789164082456e-05, "loss": 0.5634, "step": 3604 }, { "epoch": 0.4946148041435138, "grad_norm": 1.21875, "learning_rate": 1.8747192177545906e-05, "loss": 0.5972, "step": 3605 }, { "epoch": 0.4947520065857172, "grad_norm": 1.15625, "learning_rate": 1.8746492532007414e-05, "loss": 0.4937, "step": 3606 }, { "epoch": 0.4948892090279207, "grad_norm": 1.1953125, "learning_rate": 1.8745792704223657e-05, "loss": 0.5672, "step": 3607 }, { "epoch": 0.49502641147012416, "grad_norm": 1.1640625, "learning_rate": 1.8745092694209223e-05, "loss": 0.5201, "step": 3608 }, { "epoch": 0.49516361391232766, "grad_norm": 1.1875, "learning_rate": 1.8744392501978702e-05, "loss": 0.5093, "step": 3609 }, { "epoch": 0.4953008163545311, "grad_norm": 1.2265625, "learning_rate": 1.8743692127546675e-05, "loss": 0.5959, "step": 3610 }, { "epoch": 0.4954380187967346, "grad_norm": 1.0859375, "learning_rate": 1.874299157092774e-05, "loss": 0.4818, "step": 3611 }, { "epoch": 0.49557522123893805, "grad_norm": 1.15625, "learning_rate": 1.8742290832136495e-05, "loss": 0.4722, "step": 3612 }, { "epoch": 0.49571242368114155, "grad_norm": 1.28125, "learning_rate": 1.8741589911187538e-05, "loss": 0.5437, "step": 3613 }, { "epoch": 0.495849626123345, "grad_norm": 1.109375, "learning_rate": 1.8740888808095475e-05, "loss": 0.4453, "step": 3614 }, { "epoch": 0.4959868285655485, "grad_norm": 1.2265625, "learning_rate": 1.874018752287491e-05, "loss": 0.539, "step": 3615 }, { "epoch": 0.49612403100775193, "grad_norm": 1.21875, "learning_rate": 1.8739486055540466e-05, "loss": 0.5021, "step": 3616 }, { "epoch": 0.49626123344995543, "grad_norm": 1.1875, "learning_rate": 1.873878440610675e-05, "loss": 0.4817, "step": 3617 }, { "epoch": 0.4963984358921589, "grad_norm": 1.28125, "learning_rate": 1.873808257458838e-05, "loss": 0.587, "step": 3618 }, { "epoch": 0.4965356383343624, "grad_norm": 1.109375, "learning_rate": 1.873738056099999e-05, "loss": 0.4895, "step": 3619 }, { "epoch": 0.4966728407765658, "grad_norm": 1.2421875, "learning_rate": 1.87366783653562e-05, "loss": 0.5613, "step": 3620 }, { "epoch": 0.4968100432187693, "grad_norm": 1.2578125, "learning_rate": 1.8735975987671638e-05, "loss": 0.4691, "step": 3621 }, { "epoch": 0.49694724566097276, "grad_norm": 1.1875, "learning_rate": 1.8735273427960946e-05, "loss": 0.4586, "step": 3622 }, { "epoch": 0.49708444810317626, "grad_norm": 1.140625, "learning_rate": 1.873457068623876e-05, "loss": 0.5119, "step": 3623 }, { "epoch": 0.4972216505453797, "grad_norm": 1.28125, "learning_rate": 1.8733867762519723e-05, "loss": 0.589, "step": 3624 }, { "epoch": 0.4973588529875832, "grad_norm": 1.0859375, "learning_rate": 1.8733164656818482e-05, "loss": 0.4004, "step": 3625 }, { "epoch": 0.49749605542978664, "grad_norm": 1.2421875, "learning_rate": 1.8732461369149685e-05, "loss": 0.5979, "step": 3626 }, { "epoch": 0.49763325787199014, "grad_norm": 1.21875, "learning_rate": 1.8731757899527992e-05, "loss": 0.5215, "step": 3627 }, { "epoch": 0.4977704603141936, "grad_norm": 1.171875, "learning_rate": 1.8731054247968053e-05, "loss": 0.5334, "step": 3628 }, { "epoch": 0.4979076627563971, "grad_norm": 1.265625, "learning_rate": 1.873035041448453e-05, "loss": 0.5366, "step": 3629 }, { "epoch": 0.49804486519860053, "grad_norm": 1.21875, "learning_rate": 1.8729646399092092e-05, "loss": 0.5, "step": 3630 }, { "epoch": 0.498182067640804, "grad_norm": 1.25, "learning_rate": 1.872894220180541e-05, "loss": 0.5487, "step": 3631 }, { "epoch": 0.49831927008300747, "grad_norm": 1.171875, "learning_rate": 1.8728237822639153e-05, "loss": 0.5071, "step": 3632 }, { "epoch": 0.49845647252521097, "grad_norm": 1.203125, "learning_rate": 1.8727533261607996e-05, "loss": 0.521, "step": 3633 }, { "epoch": 0.4985936749674144, "grad_norm": 1.140625, "learning_rate": 1.8726828518726628e-05, "loss": 0.5106, "step": 3634 }, { "epoch": 0.4987308774096179, "grad_norm": 1.1328125, "learning_rate": 1.8726123594009725e-05, "loss": 0.502, "step": 3635 }, { "epoch": 0.49886807985182136, "grad_norm": 1.1796875, "learning_rate": 1.8725418487471975e-05, "loss": 0.4809, "step": 3636 }, { "epoch": 0.49900528229402485, "grad_norm": 1.1640625, "learning_rate": 1.8724713199128077e-05, "loss": 0.4858, "step": 3637 }, { "epoch": 0.4991424847362283, "grad_norm": 1.3203125, "learning_rate": 1.872400772899272e-05, "loss": 0.6058, "step": 3638 }, { "epoch": 0.4992796871784318, "grad_norm": 1.171875, "learning_rate": 1.8723302077080604e-05, "loss": 0.5306, "step": 3639 }, { "epoch": 0.49941688962063524, "grad_norm": 1.15625, "learning_rate": 1.872259624340644e-05, "loss": 0.4921, "step": 3640 }, { "epoch": 0.49955409206283874, "grad_norm": 1.21875, "learning_rate": 1.8721890227984922e-05, "loss": 0.514, "step": 3641 }, { "epoch": 0.4996912945050422, "grad_norm": 1.1953125, "learning_rate": 1.8721184030830772e-05, "loss": 0.4868, "step": 3642 }, { "epoch": 0.4998284969472457, "grad_norm": 1.3046875, "learning_rate": 1.8720477651958697e-05, "loss": 0.6002, "step": 3643 }, { "epoch": 0.4999656993894491, "grad_norm": 1.1328125, "learning_rate": 1.8719771091383424e-05, "loss": 0.5036, "step": 3644 }, { "epoch": 0.4999656993894491, "eval_loss": 1.6855067014694214, "eval_runtime": 118.5494, "eval_samples_per_second": 1.426, "eval_steps_per_second": 0.717, "step": 3644 }, { "epoch": 0.5001029018316526, "grad_norm": 1.0703125, "learning_rate": 1.8719064349119666e-05, "loss": 0.4447, "step": 3645 }, { "epoch": 0.5002401042738561, "grad_norm": 1.2265625, "learning_rate": 1.8718357425182153e-05, "loss": 0.5796, "step": 3646 }, { "epoch": 0.5003773067160595, "grad_norm": 1.203125, "learning_rate": 1.8717650319585614e-05, "loss": 0.5925, "step": 3647 }, { "epoch": 0.500514509158263, "grad_norm": 1.203125, "learning_rate": 1.8716943032344784e-05, "loss": 0.5547, "step": 3648 }, { "epoch": 0.5006517116004665, "grad_norm": 1.2578125, "learning_rate": 1.87162355634744e-05, "loss": 0.5446, "step": 3649 }, { "epoch": 0.50078891404267, "grad_norm": 1.2421875, "learning_rate": 1.8715527912989202e-05, "loss": 0.5809, "step": 3650 }, { "epoch": 0.5009261164848734, "grad_norm": 1.265625, "learning_rate": 1.8714820080903936e-05, "loss": 0.5152, "step": 3651 }, { "epoch": 0.5010633189270769, "grad_norm": 1.3359375, "learning_rate": 1.871411206723335e-05, "loss": 0.5288, "step": 3652 }, { "epoch": 0.5012005213692804, "grad_norm": 1.1796875, "learning_rate": 1.8713403871992194e-05, "loss": 0.5311, "step": 3653 }, { "epoch": 0.5013377238114839, "grad_norm": 1.171875, "learning_rate": 1.8712695495195228e-05, "loss": 0.5634, "step": 3654 }, { "epoch": 0.5014749262536873, "grad_norm": 1.2578125, "learning_rate": 1.871198693685721e-05, "loss": 0.5481, "step": 3655 }, { "epoch": 0.5016121286958908, "grad_norm": 1.1796875, "learning_rate": 1.871127819699291e-05, "loss": 0.5117, "step": 3656 }, { "epoch": 0.5017493311380943, "grad_norm": 1.21875, "learning_rate": 1.8710569275617083e-05, "loss": 0.553, "step": 3657 }, { "epoch": 0.5018865335802978, "grad_norm": 1.2578125, "learning_rate": 1.870986017274451e-05, "loss": 0.5523, "step": 3658 }, { "epoch": 0.5020237360225012, "grad_norm": 1.3359375, "learning_rate": 1.8709150888389965e-05, "loss": 0.5749, "step": 3659 }, { "epoch": 0.5021609384647047, "grad_norm": 1.203125, "learning_rate": 1.8708441422568224e-05, "loss": 0.53, "step": 3660 }, { "epoch": 0.5022981409069082, "grad_norm": 1.3125, "learning_rate": 1.870773177529407e-05, "loss": 0.6006, "step": 3661 }, { "epoch": 0.5024353433491117, "grad_norm": 1.2109375, "learning_rate": 1.8707021946582293e-05, "loss": 0.5391, "step": 3662 }, { "epoch": 0.502572545791315, "grad_norm": 1.125, "learning_rate": 1.8706311936447684e-05, "loss": 0.5149, "step": 3663 }, { "epoch": 0.5027097482335185, "grad_norm": 1.328125, "learning_rate": 1.8705601744905028e-05, "loss": 0.5968, "step": 3664 }, { "epoch": 0.502846950675722, "grad_norm": 1.0546875, "learning_rate": 1.870489137196913e-05, "loss": 0.4479, "step": 3665 }, { "epoch": 0.5029841531179255, "grad_norm": 1.09375, "learning_rate": 1.8704180817654794e-05, "loss": 0.502, "step": 3666 }, { "epoch": 0.5031213555601289, "grad_norm": 1.1328125, "learning_rate": 1.8703470081976817e-05, "loss": 0.4979, "step": 3667 }, { "epoch": 0.5032585580023324, "grad_norm": 1.171875, "learning_rate": 1.8702759164950015e-05, "loss": 0.5073, "step": 3668 }, { "epoch": 0.5033957604445359, "grad_norm": 1.109375, "learning_rate": 1.87020480665892e-05, "loss": 0.435, "step": 3669 }, { "epoch": 0.5035329628867394, "grad_norm": 1.1796875, "learning_rate": 1.8701336786909185e-05, "loss": 0.5218, "step": 3670 }, { "epoch": 0.5036701653289428, "grad_norm": 1.328125, "learning_rate": 1.8700625325924794e-05, "loss": 0.538, "step": 3671 }, { "epoch": 0.5038073677711463, "grad_norm": 1.171875, "learning_rate": 1.8699913683650855e-05, "loss": 0.4938, "step": 3672 }, { "epoch": 0.5039445702133498, "grad_norm": 1.2890625, "learning_rate": 1.8699201860102186e-05, "loss": 0.5766, "step": 3673 }, { "epoch": 0.5040817726555533, "grad_norm": 1.1328125, "learning_rate": 1.8698489855293623e-05, "loss": 0.4781, "step": 3674 }, { "epoch": 0.5042189750977567, "grad_norm": 1.1484375, "learning_rate": 1.869777766924001e-05, "loss": 0.4742, "step": 3675 }, { "epoch": 0.5043561775399602, "grad_norm": 1.1875, "learning_rate": 1.8697065301956172e-05, "loss": 0.5222, "step": 3676 }, { "epoch": 0.5044933799821637, "grad_norm": 1.15625, "learning_rate": 1.8696352753456964e-05, "loss": 0.5709, "step": 3677 }, { "epoch": 0.5046305824243672, "grad_norm": 1.28125, "learning_rate": 1.8695640023757225e-05, "loss": 0.6306, "step": 3678 }, { "epoch": 0.5047677848665706, "grad_norm": 1.171875, "learning_rate": 1.869492711287181e-05, "loss": 0.5077, "step": 3679 }, { "epoch": 0.5049049873087741, "grad_norm": 1.2734375, "learning_rate": 1.8694214020815577e-05, "loss": 0.5589, "step": 3680 }, { "epoch": 0.5050421897509776, "grad_norm": 1.2265625, "learning_rate": 1.8693500747603375e-05, "loss": 0.5861, "step": 3681 }, { "epoch": 0.5051793921931811, "grad_norm": 1.1953125, "learning_rate": 1.869278729325007e-05, "loss": 0.5251, "step": 3682 }, { "epoch": 0.5053165946353845, "grad_norm": 1.25, "learning_rate": 1.869207365777053e-05, "loss": 0.566, "step": 3683 }, { "epoch": 0.505453797077588, "grad_norm": 1.1640625, "learning_rate": 1.8691359841179624e-05, "loss": 0.5173, "step": 3684 }, { "epoch": 0.5055909995197915, "grad_norm": 1.0390625, "learning_rate": 1.8690645843492224e-05, "loss": 0.4342, "step": 3685 }, { "epoch": 0.505728201961995, "grad_norm": 1.25, "learning_rate": 1.8689931664723207e-05, "loss": 0.5646, "step": 3686 }, { "epoch": 0.5058654044041984, "grad_norm": 1.1171875, "learning_rate": 1.8689217304887453e-05, "loss": 0.5241, "step": 3687 }, { "epoch": 0.5060026068464019, "grad_norm": 1.21875, "learning_rate": 1.8688502763999854e-05, "loss": 0.5341, "step": 3688 }, { "epoch": 0.5061398092886054, "grad_norm": 1.1328125, "learning_rate": 1.868778804207529e-05, "loss": 0.5217, "step": 3689 }, { "epoch": 0.5062770117308089, "grad_norm": 1.140625, "learning_rate": 1.8687073139128654e-05, "loss": 0.4611, "step": 3690 }, { "epoch": 0.5064142141730122, "grad_norm": 1.2109375, "learning_rate": 1.8686358055174848e-05, "loss": 0.5978, "step": 3691 }, { "epoch": 0.5065514166152157, "grad_norm": 1.265625, "learning_rate": 1.868564279022876e-05, "loss": 0.5743, "step": 3692 }, { "epoch": 0.5066886190574192, "grad_norm": 1.2265625, "learning_rate": 1.868492734430531e-05, "loss": 0.5392, "step": 3693 }, { "epoch": 0.5068258214996227, "grad_norm": 1.1953125, "learning_rate": 1.8684211717419393e-05, "loss": 0.5554, "step": 3694 }, { "epoch": 0.5069630239418261, "grad_norm": 1.171875, "learning_rate": 1.8683495909585922e-05, "loss": 0.4821, "step": 3695 }, { "epoch": 0.5071002263840296, "grad_norm": 1.1640625, "learning_rate": 1.8682779920819813e-05, "loss": 0.5161, "step": 3696 }, { "epoch": 0.5072374288262331, "grad_norm": 1.1796875, "learning_rate": 1.8682063751135987e-05, "loss": 0.4938, "step": 3697 }, { "epoch": 0.5073746312684366, "grad_norm": 1.2109375, "learning_rate": 1.8681347400549367e-05, "loss": 0.564, "step": 3698 }, { "epoch": 0.50751183371064, "grad_norm": 1.21875, "learning_rate": 1.868063086907487e-05, "loss": 0.5256, "step": 3699 }, { "epoch": 0.5076490361528435, "grad_norm": 1.1640625, "learning_rate": 1.8679914156727437e-05, "loss": 0.542, "step": 3700 }, { "epoch": 0.507786238595047, "grad_norm": 1.234375, "learning_rate": 1.8679197263521996e-05, "loss": 0.5153, "step": 3701 }, { "epoch": 0.5079234410372505, "grad_norm": 1.3125, "learning_rate": 1.8678480189473487e-05, "loss": 0.4984, "step": 3702 }, { "epoch": 0.5080606434794539, "grad_norm": 1.078125, "learning_rate": 1.8677762934596847e-05, "loss": 0.3842, "step": 3703 }, { "epoch": 0.5081978459216574, "grad_norm": 1.1015625, "learning_rate": 1.8677045498907028e-05, "loss": 0.4603, "step": 3704 }, { "epoch": 0.5083350483638609, "grad_norm": 1.375, "learning_rate": 1.867632788241897e-05, "loss": 0.6251, "step": 3705 }, { "epoch": 0.5084722508060644, "grad_norm": 1.21875, "learning_rate": 1.8675610085147637e-05, "loss": 0.4747, "step": 3706 }, { "epoch": 0.5086094532482678, "grad_norm": 1.2890625, "learning_rate": 1.867489210710797e-05, "loss": 0.5207, "step": 3707 }, { "epoch": 0.5087466556904713, "grad_norm": 1.2421875, "learning_rate": 1.8674173948314944e-05, "loss": 0.5371, "step": 3708 }, { "epoch": 0.5088838581326748, "grad_norm": 1.234375, "learning_rate": 1.867345560878351e-05, "loss": 0.5168, "step": 3709 }, { "epoch": 0.5090210605748783, "grad_norm": 1.2265625, "learning_rate": 1.867273708852865e-05, "loss": 0.5422, "step": 3710 }, { "epoch": 0.5091582630170817, "grad_norm": 1.25, "learning_rate": 1.867201838756532e-05, "loss": 0.5493, "step": 3711 }, { "epoch": 0.5092954654592852, "grad_norm": 1.2265625, "learning_rate": 1.867129950590851e-05, "loss": 0.5099, "step": 3712 }, { "epoch": 0.5094326679014887, "grad_norm": 1.1875, "learning_rate": 1.8670580443573186e-05, "loss": 0.5213, "step": 3713 }, { "epoch": 0.5095698703436922, "grad_norm": 1.296875, "learning_rate": 1.8669861200574338e-05, "loss": 0.5768, "step": 3714 }, { "epoch": 0.5097070727858956, "grad_norm": 1.2421875, "learning_rate": 1.866914177692695e-05, "loss": 0.5556, "step": 3715 }, { "epoch": 0.509844275228099, "grad_norm": 1.2578125, "learning_rate": 1.8668422172646012e-05, "loss": 0.5919, "step": 3716 }, { "epoch": 0.5099814776703026, "grad_norm": 1.171875, "learning_rate": 1.866770238774652e-05, "loss": 0.5581, "step": 3717 }, { "epoch": 0.510118680112506, "grad_norm": 1.0390625, "learning_rate": 1.8666982422243468e-05, "loss": 0.4336, "step": 3718 }, { "epoch": 0.5102558825547094, "grad_norm": 1.1015625, "learning_rate": 1.866626227615186e-05, "loss": 0.4657, "step": 3719 }, { "epoch": 0.5103930849969129, "grad_norm": 1.1640625, "learning_rate": 1.86655419494867e-05, "loss": 0.474, "step": 3720 }, { "epoch": 0.5105302874391164, "grad_norm": 1.140625, "learning_rate": 1.8664821442263002e-05, "loss": 0.5043, "step": 3721 }, { "epoch": 0.5106674898813199, "grad_norm": 1.2265625, "learning_rate": 1.8664100754495773e-05, "loss": 0.5472, "step": 3722 }, { "epoch": 0.5108046923235233, "grad_norm": 1.125, "learning_rate": 1.866337988620003e-05, "loss": 0.4442, "step": 3723 }, { "epoch": 0.5109418947657268, "grad_norm": 1.2734375, "learning_rate": 1.8662658837390797e-05, "loss": 0.5418, "step": 3724 }, { "epoch": 0.5110790972079303, "grad_norm": 1.2734375, "learning_rate": 1.8661937608083092e-05, "loss": 0.5572, "step": 3725 }, { "epoch": 0.5112162996501338, "grad_norm": 1.1875, "learning_rate": 1.866121619829195e-05, "loss": 0.5976, "step": 3726 }, { "epoch": 0.5113535020923372, "grad_norm": 1.3125, "learning_rate": 1.86604946080324e-05, "loss": 0.5764, "step": 3727 }, { "epoch": 0.5114907045345407, "grad_norm": 1.21875, "learning_rate": 1.8659772837319472e-05, "loss": 0.5285, "step": 3728 }, { "epoch": 0.5116279069767442, "grad_norm": 1.2265625, "learning_rate": 1.865905088616821e-05, "loss": 0.5702, "step": 3729 }, { "epoch": 0.5117651094189477, "grad_norm": 1.375, "learning_rate": 1.8658328754593657e-05, "loss": 0.585, "step": 3730 }, { "epoch": 0.5119023118611511, "grad_norm": 1.1796875, "learning_rate": 1.8657606442610862e-05, "loss": 0.5034, "step": 3731 }, { "epoch": 0.5120395143033546, "grad_norm": 1.21875, "learning_rate": 1.865688395023487e-05, "loss": 0.5332, "step": 3732 }, { "epoch": 0.5121767167455581, "grad_norm": 1.28125, "learning_rate": 1.865616127748074e-05, "loss": 0.5871, "step": 3733 }, { "epoch": 0.5123139191877616, "grad_norm": 1.15625, "learning_rate": 1.8655438424363522e-05, "loss": 0.5132, "step": 3734 }, { "epoch": 0.512451121629965, "grad_norm": 1.34375, "learning_rate": 1.8654715390898282e-05, "loss": 0.5734, "step": 3735 }, { "epoch": 0.5125883240721685, "grad_norm": 1.328125, "learning_rate": 1.8653992177100094e-05, "loss": 0.6259, "step": 3736 }, { "epoch": 0.512725526514372, "grad_norm": 1.2421875, "learning_rate": 1.8653268782984015e-05, "loss": 0.5241, "step": 3737 }, { "epoch": 0.5128627289565755, "grad_norm": 1.234375, "learning_rate": 1.865254520856512e-05, "loss": 0.53, "step": 3738 }, { "epoch": 0.5129999313987789, "grad_norm": 1.3359375, "learning_rate": 1.8651821453858492e-05, "loss": 0.4842, "step": 3739 }, { "epoch": 0.5131371338409824, "grad_norm": 1.265625, "learning_rate": 1.8651097518879207e-05, "loss": 0.5086, "step": 3740 }, { "epoch": 0.5132743362831859, "grad_norm": 1.2265625, "learning_rate": 1.865037340364235e-05, "loss": 0.4473, "step": 3741 }, { "epoch": 0.5134115387253894, "grad_norm": 1.25, "learning_rate": 1.8649649108163003e-05, "loss": 0.5468, "step": 3742 }, { "epoch": 0.5135487411675927, "grad_norm": 1.1484375, "learning_rate": 1.8648924632456272e-05, "loss": 0.4781, "step": 3743 }, { "epoch": 0.5136859436097962, "grad_norm": 1.28125, "learning_rate": 1.8648199976537237e-05, "loss": 0.5852, "step": 3744 }, { "epoch": 0.5138231460519997, "grad_norm": 1.3046875, "learning_rate": 1.864747514042101e-05, "loss": 0.5615, "step": 3745 }, { "epoch": 0.5139603484942032, "grad_norm": 1.21875, "learning_rate": 1.8646750124122682e-05, "loss": 0.4917, "step": 3746 }, { "epoch": 0.5140975509364066, "grad_norm": 1.2890625, "learning_rate": 1.8646024927657368e-05, "loss": 0.5343, "step": 3747 }, { "epoch": 0.5142347533786101, "grad_norm": 1.2578125, "learning_rate": 1.8645299551040177e-05, "loss": 0.5329, "step": 3748 }, { "epoch": 0.5143719558208136, "grad_norm": 1.4453125, "learning_rate": 1.864457399428622e-05, "loss": 0.5589, "step": 3749 }, { "epoch": 0.5145091582630171, "grad_norm": 1.140625, "learning_rate": 1.864384825741062e-05, "loss": 0.5236, "step": 3750 }, { "epoch": 0.5146463607052205, "grad_norm": 1.125, "learning_rate": 1.8643122340428496e-05, "loss": 0.4459, "step": 3751 }, { "epoch": 0.514783563147424, "grad_norm": 1.28125, "learning_rate": 1.8642396243354973e-05, "loss": 0.5103, "step": 3752 }, { "epoch": 0.5149207655896275, "grad_norm": 1.203125, "learning_rate": 1.8641669966205177e-05, "loss": 0.5117, "step": 3753 }, { "epoch": 0.515057968031831, "grad_norm": 1.328125, "learning_rate": 1.864094350899425e-05, "loss": 0.5832, "step": 3754 }, { "epoch": 0.5151951704740344, "grad_norm": 1.359375, "learning_rate": 1.864021687173732e-05, "loss": 0.5137, "step": 3755 }, { "epoch": 0.5153323729162379, "grad_norm": 1.28125, "learning_rate": 1.8639490054449534e-05, "loss": 0.4866, "step": 3756 }, { "epoch": 0.5154695753584414, "grad_norm": 1.1953125, "learning_rate": 1.863876305714603e-05, "loss": 0.4517, "step": 3757 }, { "epoch": 0.5156067778006449, "grad_norm": 1.3046875, "learning_rate": 1.863803587984196e-05, "loss": 0.5926, "step": 3758 }, { "epoch": 0.5157439802428483, "grad_norm": 1.2265625, "learning_rate": 1.8637308522552478e-05, "loss": 0.4356, "step": 3759 }, { "epoch": 0.5158811826850518, "grad_norm": 1.2578125, "learning_rate": 1.8636580985292735e-05, "loss": 0.5406, "step": 3760 }, { "epoch": 0.5160183851272553, "grad_norm": 1.28125, "learning_rate": 1.863585326807789e-05, "loss": 0.5589, "step": 3761 }, { "epoch": 0.5161555875694588, "grad_norm": 1.15625, "learning_rate": 1.863512537092311e-05, "loss": 0.4955, "step": 3762 }, { "epoch": 0.5162927900116622, "grad_norm": 1.3125, "learning_rate": 1.8634397293843557e-05, "loss": 0.5896, "step": 3763 }, { "epoch": 0.5164299924538657, "grad_norm": 1.1953125, "learning_rate": 1.8633669036854406e-05, "loss": 0.4911, "step": 3764 }, { "epoch": 0.5165671948960692, "grad_norm": 1.109375, "learning_rate": 1.863294059997083e-05, "loss": 0.4496, "step": 3765 }, { "epoch": 0.5167043973382727, "grad_norm": 1.125, "learning_rate": 1.8632211983208006e-05, "loss": 0.44, "step": 3766 }, { "epoch": 0.5168415997804761, "grad_norm": 1.375, "learning_rate": 1.8631483186581114e-05, "loss": 0.5806, "step": 3767 }, { "epoch": 0.5169788022226796, "grad_norm": 1.25, "learning_rate": 1.863075421010534e-05, "loss": 0.5139, "step": 3768 }, { "epoch": 0.517116004664883, "grad_norm": 1.484375, "learning_rate": 1.863002505379588e-05, "loss": 0.6497, "step": 3769 }, { "epoch": 0.5172532071070866, "grad_norm": 1.25, "learning_rate": 1.8629295717667917e-05, "loss": 0.5432, "step": 3770 }, { "epoch": 0.5173904095492899, "grad_norm": 1.109375, "learning_rate": 1.8628566201736655e-05, "loss": 0.4501, "step": 3771 }, { "epoch": 0.5175276119914934, "grad_norm": 1.15625, "learning_rate": 1.862783650601729e-05, "loss": 0.4474, "step": 3772 }, { "epoch": 0.5176648144336969, "grad_norm": 1.203125, "learning_rate": 1.8627106630525027e-05, "loss": 0.4844, "step": 3773 }, { "epoch": 0.5178020168759004, "grad_norm": 1.1953125, "learning_rate": 1.8626376575275077e-05, "loss": 0.5372, "step": 3774 }, { "epoch": 0.5179392193181038, "grad_norm": 1.2578125, "learning_rate": 1.8625646340282648e-05, "loss": 0.5217, "step": 3775 }, { "epoch": 0.5180764217603073, "grad_norm": 1.3515625, "learning_rate": 1.8624915925562957e-05, "loss": 0.5779, "step": 3776 }, { "epoch": 0.5182136242025108, "grad_norm": 1.296875, "learning_rate": 1.8624185331131226e-05, "loss": 0.5257, "step": 3777 }, { "epoch": 0.5183508266447143, "grad_norm": 1.25, "learning_rate": 1.8623454557002672e-05, "loss": 0.5492, "step": 3778 }, { "epoch": 0.5184880290869177, "grad_norm": 1.1328125, "learning_rate": 1.8622723603192528e-05, "loss": 0.488, "step": 3779 }, { "epoch": 0.5186252315291212, "grad_norm": 1.15625, "learning_rate": 1.8621992469716023e-05, "loss": 0.4328, "step": 3780 }, { "epoch": 0.5187624339713247, "grad_norm": 1.1953125, "learning_rate": 1.8621261156588386e-05, "loss": 0.5147, "step": 3781 }, { "epoch": 0.5188996364135282, "grad_norm": 1.1796875, "learning_rate": 1.862052966382486e-05, "loss": 0.4785, "step": 3782 }, { "epoch": 0.5190368388557316, "grad_norm": 1.2265625, "learning_rate": 1.8619797991440683e-05, "loss": 0.5091, "step": 3783 }, { "epoch": 0.5191740412979351, "grad_norm": 1.15625, "learning_rate": 1.8619066139451103e-05, "loss": 0.4943, "step": 3784 }, { "epoch": 0.5193112437401386, "grad_norm": 1.3046875, "learning_rate": 1.861833410787137e-05, "loss": 0.5491, "step": 3785 }, { "epoch": 0.5194484461823421, "grad_norm": 1.2421875, "learning_rate": 1.8617601896716738e-05, "loss": 0.4695, "step": 3786 }, { "epoch": 0.5195856486245455, "grad_norm": 1.1171875, "learning_rate": 1.8616869506002456e-05, "loss": 0.4521, "step": 3787 }, { "epoch": 0.519722851066749, "grad_norm": 1.2578125, "learning_rate": 1.8616136935743797e-05, "loss": 0.5162, "step": 3788 }, { "epoch": 0.5198600535089525, "grad_norm": 1.2109375, "learning_rate": 1.8615404185956013e-05, "loss": 0.5534, "step": 3789 }, { "epoch": 0.519997255951156, "grad_norm": 1.1953125, "learning_rate": 1.8614671256654375e-05, "loss": 0.4629, "step": 3790 }, { "epoch": 0.5201344583933594, "grad_norm": 1.1328125, "learning_rate": 1.8613938147854157e-05, "loss": 0.4522, "step": 3791 }, { "epoch": 0.5202716608355629, "grad_norm": 1.234375, "learning_rate": 1.8613204859570635e-05, "loss": 0.542, "step": 3792 }, { "epoch": 0.5204088632777664, "grad_norm": 1.1171875, "learning_rate": 1.8612471391819087e-05, "loss": 0.4647, "step": 3793 }, { "epoch": 0.5205460657199699, "grad_norm": 1.203125, "learning_rate": 1.8611737744614797e-05, "loss": 0.474, "step": 3794 }, { "epoch": 0.5206832681621733, "grad_norm": 1.2578125, "learning_rate": 1.8611003917973044e-05, "loss": 0.5124, "step": 3795 }, { "epoch": 0.5208204706043768, "grad_norm": 1.1875, "learning_rate": 1.8610269911909127e-05, "loss": 0.4705, "step": 3796 }, { "epoch": 0.5209576730465802, "grad_norm": 1.234375, "learning_rate": 1.860953572643834e-05, "loss": 0.5268, "step": 3797 }, { "epoch": 0.5210948754887837, "grad_norm": 1.234375, "learning_rate": 1.8608801361575976e-05, "loss": 0.5749, "step": 3798 }, { "epoch": 0.5212320779309871, "grad_norm": 1.171875, "learning_rate": 1.8608066817337337e-05, "loss": 0.492, "step": 3799 }, { "epoch": 0.5213692803731906, "grad_norm": 1.296875, "learning_rate": 1.8607332093737733e-05, "loss": 0.5482, "step": 3800 }, { "epoch": 0.5215064828153941, "grad_norm": 1.1796875, "learning_rate": 1.8606597190792468e-05, "loss": 0.4594, "step": 3801 }, { "epoch": 0.5216436852575976, "grad_norm": 1.15625, "learning_rate": 1.8605862108516857e-05, "loss": 0.5172, "step": 3802 }, { "epoch": 0.521780887699801, "grad_norm": 1.171875, "learning_rate": 1.8605126846926215e-05, "loss": 0.535, "step": 3803 }, { "epoch": 0.5219180901420045, "grad_norm": 1.3046875, "learning_rate": 1.8604391406035865e-05, "loss": 0.5657, "step": 3804 }, { "epoch": 0.522055292584208, "grad_norm": 1.1796875, "learning_rate": 1.860365578586113e-05, "loss": 0.519, "step": 3805 }, { "epoch": 0.5221924950264115, "grad_norm": 1.265625, "learning_rate": 1.860291998641733e-05, "loss": 0.5581, "step": 3806 }, { "epoch": 0.5223296974686149, "grad_norm": 1.1171875, "learning_rate": 1.860218400771981e-05, "loss": 0.4664, "step": 3807 }, { "epoch": 0.5224668999108184, "grad_norm": 1.2734375, "learning_rate": 1.8601447849783897e-05, "loss": 0.521, "step": 3808 }, { "epoch": 0.5226041023530219, "grad_norm": 1.21875, "learning_rate": 1.860071151262493e-05, "loss": 0.564, "step": 3809 }, { "epoch": 0.5227413047952254, "grad_norm": 1.28125, "learning_rate": 1.8599974996258252e-05, "loss": 0.5376, "step": 3810 }, { "epoch": 0.5228785072374288, "grad_norm": 1.296875, "learning_rate": 1.859923830069921e-05, "loss": 0.5968, "step": 3811 }, { "epoch": 0.5230157096796323, "grad_norm": 1.234375, "learning_rate": 1.8598501425963156e-05, "loss": 0.6132, "step": 3812 }, { "epoch": 0.5231529121218358, "grad_norm": 1.3203125, "learning_rate": 1.8597764372065444e-05, "loss": 0.5277, "step": 3813 }, { "epoch": 0.5232901145640393, "grad_norm": 1.3828125, "learning_rate": 1.8597027139021423e-05, "loss": 0.6395, "step": 3814 }, { "epoch": 0.5234273170062427, "grad_norm": 1.1796875, "learning_rate": 1.8596289726846467e-05, "loss": 0.5247, "step": 3815 }, { "epoch": 0.5235645194484462, "grad_norm": 1.234375, "learning_rate": 1.859555213555593e-05, "loss": 0.5063, "step": 3816 }, { "epoch": 0.5237017218906497, "grad_norm": 1.1640625, "learning_rate": 1.859481436516519e-05, "loss": 0.4927, "step": 3817 }, { "epoch": 0.5238389243328532, "grad_norm": 1.2421875, "learning_rate": 1.859407641568961e-05, "loss": 0.5653, "step": 3818 }, { "epoch": 0.5239761267750566, "grad_norm": 1.2265625, "learning_rate": 1.8593338287144577e-05, "loss": 0.493, "step": 3819 }, { "epoch": 0.5241133292172601, "grad_norm": 1.3359375, "learning_rate": 1.859259997954546e-05, "loss": 0.6306, "step": 3820 }, { "epoch": 0.5242505316594636, "grad_norm": 1.1875, "learning_rate": 1.8591861492907653e-05, "loss": 0.4908, "step": 3821 }, { "epoch": 0.5243877341016671, "grad_norm": 1.2109375, "learning_rate": 1.8591122827246535e-05, "loss": 0.4904, "step": 3822 }, { "epoch": 0.5245249365438704, "grad_norm": 1.21875, "learning_rate": 1.85903839825775e-05, "loss": 0.5887, "step": 3823 }, { "epoch": 0.524662138986074, "grad_norm": 1.296875, "learning_rate": 1.8589644958915943e-05, "loss": 0.5222, "step": 3824 }, { "epoch": 0.5247993414282774, "grad_norm": 1.3515625, "learning_rate": 1.858890575627726e-05, "loss": 0.5634, "step": 3825 }, { "epoch": 0.5249365438704809, "grad_norm": 1.2578125, "learning_rate": 1.8588166374676865e-05, "loss": 0.5359, "step": 3826 }, { "epoch": 0.5250737463126843, "grad_norm": 1.2265625, "learning_rate": 1.8587426814130144e-05, "loss": 0.5565, "step": 3827 }, { "epoch": 0.5252109487548878, "grad_norm": 1.1640625, "learning_rate": 1.8586687074652524e-05, "loss": 0.5005, "step": 3828 }, { "epoch": 0.5253481511970913, "grad_norm": 0.97265625, "learning_rate": 1.8585947156259415e-05, "loss": 0.3455, "step": 3829 }, { "epoch": 0.5254853536392948, "grad_norm": 1.265625, "learning_rate": 1.8585207058966224e-05, "loss": 0.5057, "step": 3830 }, { "epoch": 0.5256225560814982, "grad_norm": 1.15625, "learning_rate": 1.8584466782788383e-05, "loss": 0.5008, "step": 3831 }, { "epoch": 0.5257597585237017, "grad_norm": 1.09375, "learning_rate": 1.8583726327741317e-05, "loss": 0.4443, "step": 3832 }, { "epoch": 0.5258969609659052, "grad_norm": 1.2890625, "learning_rate": 1.8582985693840446e-05, "loss": 0.5928, "step": 3833 }, { "epoch": 0.5260341634081087, "grad_norm": 1.203125, "learning_rate": 1.8582244881101205e-05, "loss": 0.511, "step": 3834 }, { "epoch": 0.5261713658503121, "grad_norm": 1.2109375, "learning_rate": 1.8581503889539036e-05, "loss": 0.5065, "step": 3835 }, { "epoch": 0.5263085682925156, "grad_norm": 1.078125, "learning_rate": 1.8580762719169373e-05, "loss": 0.4637, "step": 3836 }, { "epoch": 0.5264457707347191, "grad_norm": 1.2890625, "learning_rate": 1.858002137000766e-05, "loss": 0.5334, "step": 3837 }, { "epoch": 0.5265829731769226, "grad_norm": 1.359375, "learning_rate": 1.8579279842069344e-05, "loss": 0.5956, "step": 3838 }, { "epoch": 0.526720175619126, "grad_norm": 1.1328125, "learning_rate": 1.8578538135369874e-05, "loss": 0.4842, "step": 3839 }, { "epoch": 0.5268573780613295, "grad_norm": 1.2578125, "learning_rate": 1.8577796249924707e-05, "loss": 0.5825, "step": 3840 }, { "epoch": 0.526994580503533, "grad_norm": 1.1328125, "learning_rate": 1.8577054185749307e-05, "loss": 0.4563, "step": 3841 }, { "epoch": 0.5271317829457365, "grad_norm": 1.2109375, "learning_rate": 1.8576311942859123e-05, "loss": 0.5204, "step": 3842 }, { "epoch": 0.5272689853879399, "grad_norm": 1.421875, "learning_rate": 1.8575569521269633e-05, "loss": 0.5414, "step": 3843 }, { "epoch": 0.5274061878301434, "grad_norm": 1.2734375, "learning_rate": 1.8574826920996296e-05, "loss": 0.6348, "step": 3844 }, { "epoch": 0.5275433902723469, "grad_norm": 1.1796875, "learning_rate": 1.8574084142054593e-05, "loss": 0.5533, "step": 3845 }, { "epoch": 0.5276805927145504, "grad_norm": 1.1875, "learning_rate": 1.8573341184459997e-05, "loss": 0.4893, "step": 3846 }, { "epoch": 0.5278177951567538, "grad_norm": 1.15625, "learning_rate": 1.8572598048227986e-05, "loss": 0.4734, "step": 3847 }, { "epoch": 0.5279549975989573, "grad_norm": 1.125, "learning_rate": 1.8571854733374053e-05, "loss": 0.4506, "step": 3848 }, { "epoch": 0.5280922000411608, "grad_norm": 1.2109375, "learning_rate": 1.857111123991368e-05, "loss": 0.5416, "step": 3849 }, { "epoch": 0.5282294024833643, "grad_norm": 1.2578125, "learning_rate": 1.8570367567862357e-05, "loss": 0.5221, "step": 3850 }, { "epoch": 0.5283666049255676, "grad_norm": 1.203125, "learning_rate": 1.8569623717235586e-05, "loss": 0.494, "step": 3851 }, { "epoch": 0.5285038073677711, "grad_norm": 1.453125, "learning_rate": 1.8568879688048858e-05, "loss": 0.661, "step": 3852 }, { "epoch": 0.5286410098099746, "grad_norm": 1.25, "learning_rate": 1.8568135480317678e-05, "loss": 0.4974, "step": 3853 }, { "epoch": 0.5287782122521781, "grad_norm": 1.359375, "learning_rate": 1.8567391094057557e-05, "loss": 0.6055, "step": 3854 }, { "epoch": 0.5289154146943815, "grad_norm": 1.3125, "learning_rate": 1.8566646529284002e-05, "loss": 0.5411, "step": 3855 }, { "epoch": 0.529052617136585, "grad_norm": 1.34375, "learning_rate": 1.8565901786012532e-05, "loss": 0.5855, "step": 3856 }, { "epoch": 0.5291898195787885, "grad_norm": 1.21875, "learning_rate": 1.8565156864258656e-05, "loss": 0.5165, "step": 3857 }, { "epoch": 0.529327022020992, "grad_norm": 1.1953125, "learning_rate": 1.85644117640379e-05, "loss": 0.5481, "step": 3858 }, { "epoch": 0.5294642244631954, "grad_norm": 1.1640625, "learning_rate": 1.8563666485365786e-05, "loss": 0.5605, "step": 3859 }, { "epoch": 0.5296014269053989, "grad_norm": 1.1640625, "learning_rate": 1.856292102825785e-05, "loss": 0.5072, "step": 3860 }, { "epoch": 0.5297386293476024, "grad_norm": 1.234375, "learning_rate": 1.8562175392729623e-05, "loss": 0.5386, "step": 3861 }, { "epoch": 0.5298758317898059, "grad_norm": 1.1796875, "learning_rate": 1.8561429578796638e-05, "loss": 0.4803, "step": 3862 }, { "epoch": 0.5300130342320093, "grad_norm": 1.15625, "learning_rate": 1.8560683586474435e-05, "loss": 0.492, "step": 3863 }, { "epoch": 0.5301502366742128, "grad_norm": 1.3046875, "learning_rate": 1.855993741577856e-05, "loss": 0.5701, "step": 3864 }, { "epoch": 0.5302874391164163, "grad_norm": 1.2265625, "learning_rate": 1.855919106672456e-05, "loss": 0.5123, "step": 3865 }, { "epoch": 0.5304246415586198, "grad_norm": 1.28125, "learning_rate": 1.855844453932798e-05, "loss": 0.5079, "step": 3866 }, { "epoch": 0.5305618440008232, "grad_norm": 1.140625, "learning_rate": 1.8557697833604384e-05, "loss": 0.5059, "step": 3867 }, { "epoch": 0.5306990464430267, "grad_norm": 1.1640625, "learning_rate": 1.855695094956933e-05, "loss": 0.4985, "step": 3868 }, { "epoch": 0.5308362488852302, "grad_norm": 1.3203125, "learning_rate": 1.8556203887238374e-05, "loss": 0.5833, "step": 3869 }, { "epoch": 0.5309734513274337, "grad_norm": 1.140625, "learning_rate": 1.8555456646627088e-05, "loss": 0.4913, "step": 3870 }, { "epoch": 0.5311106537696371, "grad_norm": 1.296875, "learning_rate": 1.8554709227751037e-05, "loss": 0.5463, "step": 3871 }, { "epoch": 0.5312478562118406, "grad_norm": 1.3359375, "learning_rate": 1.85539616306258e-05, "loss": 0.6216, "step": 3872 }, { "epoch": 0.5313850586540441, "grad_norm": 1.34375, "learning_rate": 1.8553213855266947e-05, "loss": 0.6558, "step": 3873 }, { "epoch": 0.5315222610962476, "grad_norm": 1.203125, "learning_rate": 1.8552465901690066e-05, "loss": 0.5108, "step": 3874 }, { "epoch": 0.531659463538451, "grad_norm": 1.2265625, "learning_rate": 1.8551717769910737e-05, "loss": 0.5312, "step": 3875 }, { "epoch": 0.5317966659806544, "grad_norm": 1.34375, "learning_rate": 1.8550969459944553e-05, "loss": 0.5726, "step": 3876 }, { "epoch": 0.531933868422858, "grad_norm": 1.21875, "learning_rate": 1.8550220971807103e-05, "loss": 0.5109, "step": 3877 }, { "epoch": 0.5320710708650614, "grad_norm": 1.1875, "learning_rate": 1.8549472305513982e-05, "loss": 0.4955, "step": 3878 }, { "epoch": 0.5322082733072648, "grad_norm": 1.1796875, "learning_rate": 1.854872346108079e-05, "loss": 0.4829, "step": 3879 }, { "epoch": 0.5323454757494683, "grad_norm": 1.2734375, "learning_rate": 1.8547974438523127e-05, "loss": 0.6017, "step": 3880 }, { "epoch": 0.5324826781916718, "grad_norm": 1.1484375, "learning_rate": 1.854722523785661e-05, "loss": 0.4851, "step": 3881 }, { "epoch": 0.5326198806338753, "grad_norm": 1.2421875, "learning_rate": 1.854647585909684e-05, "loss": 0.5072, "step": 3882 }, { "epoch": 0.5327570830760787, "grad_norm": 1.21875, "learning_rate": 1.8545726302259434e-05, "loss": 0.5003, "step": 3883 }, { "epoch": 0.5328942855182822, "grad_norm": 1.25, "learning_rate": 1.8544976567360012e-05, "loss": 0.5624, "step": 3884 }, { "epoch": 0.5330314879604857, "grad_norm": 1.296875, "learning_rate": 1.8544226654414195e-05, "loss": 0.5588, "step": 3885 }, { "epoch": 0.5331686904026892, "grad_norm": 1.2578125, "learning_rate": 1.8543476563437606e-05, "loss": 0.5215, "step": 3886 }, { "epoch": 0.5333058928448926, "grad_norm": 1.1953125, "learning_rate": 1.8542726294445877e-05, "loss": 0.5393, "step": 3887 }, { "epoch": 0.5334430952870961, "grad_norm": 1.171875, "learning_rate": 1.854197584745464e-05, "loss": 0.4843, "step": 3888 }, { "epoch": 0.5335802977292996, "grad_norm": 1.171875, "learning_rate": 1.854122522247953e-05, "loss": 0.499, "step": 3889 }, { "epoch": 0.5337175001715031, "grad_norm": 1.15625, "learning_rate": 1.8540474419536187e-05, "loss": 0.4374, "step": 3890 }, { "epoch": 0.5338547026137065, "grad_norm": 1.125, "learning_rate": 1.853972343864026e-05, "loss": 0.4763, "step": 3891 }, { "epoch": 0.53399190505591, "grad_norm": 1.2890625, "learning_rate": 1.8538972279807393e-05, "loss": 0.5952, "step": 3892 }, { "epoch": 0.5341291074981135, "grad_norm": 1.21875, "learning_rate": 1.853822094305324e-05, "loss": 0.5246, "step": 3893 }, { "epoch": 0.534266309940317, "grad_norm": 1.1953125, "learning_rate": 1.853746942839345e-05, "loss": 0.5339, "step": 3894 }, { "epoch": 0.5344035123825204, "grad_norm": 1.203125, "learning_rate": 1.853671773584369e-05, "loss": 0.5284, "step": 3895 }, { "epoch": 0.5345407148247239, "grad_norm": 1.3125, "learning_rate": 1.8535965865419618e-05, "loss": 0.5554, "step": 3896 }, { "epoch": 0.5346779172669274, "grad_norm": 1.2265625, "learning_rate": 1.85352138171369e-05, "loss": 0.5673, "step": 3897 }, { "epoch": 0.5348151197091309, "grad_norm": 1.234375, "learning_rate": 1.8534461591011205e-05, "loss": 0.5551, "step": 3898 }, { "epoch": 0.5349523221513343, "grad_norm": 1.1640625, "learning_rate": 1.853370918705821e-05, "loss": 0.4851, "step": 3899 }, { "epoch": 0.5350895245935378, "grad_norm": 1.15625, "learning_rate": 1.853295660529359e-05, "loss": 0.467, "step": 3900 }, { "epoch": 0.5352267270357413, "grad_norm": 1.140625, "learning_rate": 1.8532203845733026e-05, "loss": 0.4326, "step": 3901 }, { "epoch": 0.5353639294779448, "grad_norm": 1.2890625, "learning_rate": 1.8531450908392207e-05, "loss": 0.5895, "step": 3902 }, { "epoch": 0.5355011319201481, "grad_norm": 1.3046875, "learning_rate": 1.8530697793286817e-05, "loss": 0.5841, "step": 3903 }, { "epoch": 0.5356383343623516, "grad_norm": 1.234375, "learning_rate": 1.8529944500432548e-05, "loss": 0.5352, "step": 3904 }, { "epoch": 0.5357755368045551, "grad_norm": 1.1953125, "learning_rate": 1.8529191029845098e-05, "loss": 0.5296, "step": 3905 }, { "epoch": 0.5359127392467586, "grad_norm": 1.0859375, "learning_rate": 1.852843738154017e-05, "loss": 0.4374, "step": 3906 }, { "epoch": 0.536049941688962, "grad_norm": 1.40625, "learning_rate": 1.8527683555533452e-05, "loss": 0.4962, "step": 3907 }, { "epoch": 0.5361871441311655, "grad_norm": 1.328125, "learning_rate": 1.8526929551840674e-05, "loss": 0.6196, "step": 3908 }, { "epoch": 0.536324346573369, "grad_norm": 1.21875, "learning_rate": 1.8526175370477528e-05, "loss": 0.5237, "step": 3909 }, { "epoch": 0.5364615490155725, "grad_norm": 1.1875, "learning_rate": 1.8525421011459737e-05, "loss": 0.5144, "step": 3910 }, { "epoch": 0.5365987514577759, "grad_norm": 1.2109375, "learning_rate": 1.8524666474803017e-05, "loss": 0.5361, "step": 3911 }, { "epoch": 0.5367359538999794, "grad_norm": 1.1328125, "learning_rate": 1.852391176052309e-05, "loss": 0.444, "step": 3912 }, { "epoch": 0.5368731563421829, "grad_norm": 1.265625, "learning_rate": 1.8523156868635684e-05, "loss": 0.5827, "step": 3913 }, { "epoch": 0.5370103587843864, "grad_norm": 1.1484375, "learning_rate": 1.8522401799156524e-05, "loss": 0.4662, "step": 3914 }, { "epoch": 0.5371475612265898, "grad_norm": 1.2578125, "learning_rate": 1.8521646552101345e-05, "loss": 0.554, "step": 3915 }, { "epoch": 0.5372847636687933, "grad_norm": 1.1328125, "learning_rate": 1.852089112748588e-05, "loss": 0.4621, "step": 3916 }, { "epoch": 0.5374219661109968, "grad_norm": 1.2734375, "learning_rate": 1.852013552532588e-05, "loss": 0.5546, "step": 3917 }, { "epoch": 0.5375591685532003, "grad_norm": 1.1484375, "learning_rate": 1.851937974563708e-05, "loss": 0.5131, "step": 3918 }, { "epoch": 0.5376963709954037, "grad_norm": 1.234375, "learning_rate": 1.8518623788435227e-05, "loss": 0.6072, "step": 3919 }, { "epoch": 0.5378335734376072, "grad_norm": 1.2265625, "learning_rate": 1.8517867653736076e-05, "loss": 0.531, "step": 3920 }, { "epoch": 0.5379707758798107, "grad_norm": 1.125, "learning_rate": 1.851711134155538e-05, "loss": 0.43, "step": 3921 }, { "epoch": 0.5381079783220142, "grad_norm": 1.1171875, "learning_rate": 1.85163548519089e-05, "loss": 0.4833, "step": 3922 }, { "epoch": 0.5382451807642176, "grad_norm": 1.1484375, "learning_rate": 1.8515598184812398e-05, "loss": 0.5139, "step": 3923 }, { "epoch": 0.5383823832064211, "grad_norm": 1.265625, "learning_rate": 1.8514841340281636e-05, "loss": 0.5624, "step": 3924 }, { "epoch": 0.5385195856486246, "grad_norm": 1.2109375, "learning_rate": 1.8514084318332392e-05, "loss": 0.4894, "step": 3925 }, { "epoch": 0.5386567880908281, "grad_norm": 1.3046875, "learning_rate": 1.8513327118980433e-05, "loss": 0.544, "step": 3926 }, { "epoch": 0.5387939905330315, "grad_norm": 1.1328125, "learning_rate": 1.8512569742241536e-05, "loss": 0.4932, "step": 3927 }, { "epoch": 0.538931192975235, "grad_norm": 1.1484375, "learning_rate": 1.851181218813149e-05, "loss": 0.4565, "step": 3928 }, { "epoch": 0.5390683954174385, "grad_norm": 1.28125, "learning_rate": 1.8511054456666067e-05, "loss": 0.4517, "step": 3929 }, { "epoch": 0.539205597859642, "grad_norm": 1.1875, "learning_rate": 1.851029654786107e-05, "loss": 0.5113, "step": 3930 }, { "epoch": 0.5393428003018453, "grad_norm": 1.296875, "learning_rate": 1.850953846173228e-05, "loss": 0.5979, "step": 3931 }, { "epoch": 0.5394800027440488, "grad_norm": 1.3046875, "learning_rate": 1.8508780198295495e-05, "loss": 0.5906, "step": 3932 }, { "epoch": 0.5396172051862523, "grad_norm": 1.1171875, "learning_rate": 1.8508021757566516e-05, "loss": 0.4599, "step": 3933 }, { "epoch": 0.5397544076284558, "grad_norm": 1.234375, "learning_rate": 1.8507263139561147e-05, "loss": 0.5419, "step": 3934 }, { "epoch": 0.5398916100706592, "grad_norm": 1.0625, "learning_rate": 1.8506504344295194e-05, "loss": 0.3784, "step": 3935 }, { "epoch": 0.5400288125128627, "grad_norm": 1.21875, "learning_rate": 1.8505745371784466e-05, "loss": 0.5405, "step": 3936 }, { "epoch": 0.5401660149550662, "grad_norm": 1.1484375, "learning_rate": 1.850498622204478e-05, "loss": 0.5135, "step": 3937 }, { "epoch": 0.5403032173972697, "grad_norm": 1.2109375, "learning_rate": 1.8504226895091953e-05, "loss": 0.5426, "step": 3938 }, { "epoch": 0.5404404198394731, "grad_norm": 1.2421875, "learning_rate": 1.8503467390941802e-05, "loss": 0.6101, "step": 3939 }, { "epoch": 0.5405776222816766, "grad_norm": 1.3125, "learning_rate": 1.850270770961016e-05, "loss": 0.5475, "step": 3940 }, { "epoch": 0.5407148247238801, "grad_norm": 1.1015625, "learning_rate": 1.8501947851112853e-05, "loss": 0.4548, "step": 3941 }, { "epoch": 0.5408520271660836, "grad_norm": 1.109375, "learning_rate": 1.8501187815465713e-05, "loss": 0.46, "step": 3942 }, { "epoch": 0.540989229608287, "grad_norm": 1.3359375, "learning_rate": 1.8500427602684573e-05, "loss": 0.5678, "step": 3943 }, { "epoch": 0.5411264320504905, "grad_norm": 1.2734375, "learning_rate": 1.849966721278528e-05, "loss": 0.5329, "step": 3944 }, { "epoch": 0.541263634492694, "grad_norm": 1.1328125, "learning_rate": 1.8498906645783672e-05, "loss": 0.498, "step": 3945 }, { "epoch": 0.5414008369348975, "grad_norm": 1.3203125, "learning_rate": 1.84981459016956e-05, "loss": 0.5946, "step": 3946 }, { "epoch": 0.5415380393771009, "grad_norm": 1.109375, "learning_rate": 1.8497384980536913e-05, "loss": 0.4142, "step": 3947 }, { "epoch": 0.5416752418193044, "grad_norm": 1.1015625, "learning_rate": 1.849662388232347e-05, "loss": 0.4773, "step": 3948 }, { "epoch": 0.5418124442615079, "grad_norm": 1.15625, "learning_rate": 1.8495862607071125e-05, "loss": 0.4742, "step": 3949 }, { "epoch": 0.5419496467037114, "grad_norm": 1.2578125, "learning_rate": 1.849510115479574e-05, "loss": 0.5609, "step": 3950 }, { "epoch": 0.5420868491459148, "grad_norm": 1.2890625, "learning_rate": 1.8494339525513183e-05, "loss": 0.6243, "step": 3951 }, { "epoch": 0.5422240515881183, "grad_norm": 1.2421875, "learning_rate": 1.8493577719239324e-05, "loss": 0.5471, "step": 3952 }, { "epoch": 0.5423612540303218, "grad_norm": 1.1484375, "learning_rate": 1.8492815735990036e-05, "loss": 0.443, "step": 3953 }, { "epoch": 0.5424984564725253, "grad_norm": 1.171875, "learning_rate": 1.8492053575781193e-05, "loss": 0.5314, "step": 3954 }, { "epoch": 0.5426356589147286, "grad_norm": 1.1484375, "learning_rate": 1.849129123862868e-05, "loss": 0.468, "step": 3955 }, { "epoch": 0.5427728613569321, "grad_norm": 1.1953125, "learning_rate": 1.8490528724548377e-05, "loss": 0.5076, "step": 3956 }, { "epoch": 0.5429100637991356, "grad_norm": 1.140625, "learning_rate": 1.8489766033556177e-05, "loss": 0.5056, "step": 3957 }, { "epoch": 0.5430472662413391, "grad_norm": 1.2734375, "learning_rate": 1.848900316566797e-05, "loss": 0.6012, "step": 3958 }, { "epoch": 0.5431844686835425, "grad_norm": 1.1875, "learning_rate": 1.8488240120899648e-05, "loss": 0.4972, "step": 3959 }, { "epoch": 0.543321671125746, "grad_norm": 1.1015625, "learning_rate": 1.848747689926711e-05, "loss": 0.4056, "step": 3960 }, { "epoch": 0.5434588735679495, "grad_norm": 1.171875, "learning_rate": 1.8486713500786267e-05, "loss": 0.5132, "step": 3961 }, { "epoch": 0.543596076010153, "grad_norm": 1.2265625, "learning_rate": 1.8485949925473015e-05, "loss": 0.5349, "step": 3962 }, { "epoch": 0.5437332784523564, "grad_norm": 1.2578125, "learning_rate": 1.8485186173343272e-05, "loss": 0.591, "step": 3963 }, { "epoch": 0.5438704808945599, "grad_norm": 1.1640625, "learning_rate": 1.8484422244412948e-05, "loss": 0.5278, "step": 3964 }, { "epoch": 0.5440076833367634, "grad_norm": 1.2109375, "learning_rate": 1.8483658138697965e-05, "loss": 0.5532, "step": 3965 }, { "epoch": 0.5441448857789669, "grad_norm": 1.078125, "learning_rate": 1.8482893856214235e-05, "loss": 0.4625, "step": 3966 }, { "epoch": 0.5442820882211703, "grad_norm": 1.2890625, "learning_rate": 1.848212939697769e-05, "loss": 0.5853, "step": 3967 }, { "epoch": 0.5444192906633738, "grad_norm": 1.2578125, "learning_rate": 1.848136476100426e-05, "loss": 0.5207, "step": 3968 }, { "epoch": 0.5445564931055773, "grad_norm": 1.25, "learning_rate": 1.8480599948309873e-05, "loss": 0.5119, "step": 3969 }, { "epoch": 0.5446936955477808, "grad_norm": 1.28125, "learning_rate": 1.8479834958910463e-05, "loss": 0.5313, "step": 3970 }, { "epoch": 0.5448308979899842, "grad_norm": 1.1484375, "learning_rate": 1.8479069792821977e-05, "loss": 0.4832, "step": 3971 }, { "epoch": 0.5449681004321877, "grad_norm": 1.21875, "learning_rate": 1.8478304450060355e-05, "loss": 0.5232, "step": 3972 }, { "epoch": 0.5451053028743912, "grad_norm": 1.2578125, "learning_rate": 1.8477538930641538e-05, "loss": 0.5082, "step": 3973 }, { "epoch": 0.5452425053165947, "grad_norm": 1.2265625, "learning_rate": 1.8476773234581484e-05, "loss": 0.5301, "step": 3974 }, { "epoch": 0.5453797077587981, "grad_norm": 1.203125, "learning_rate": 1.8476007361896144e-05, "loss": 0.4834, "step": 3975 }, { "epoch": 0.5455169102010016, "grad_norm": 1.328125, "learning_rate": 1.8475241312601478e-05, "loss": 0.581, "step": 3976 }, { "epoch": 0.5456541126432051, "grad_norm": 1.2734375, "learning_rate": 1.8474475086713445e-05, "loss": 0.5782, "step": 3977 }, { "epoch": 0.5457913150854086, "grad_norm": 1.3203125, "learning_rate": 1.847370868424802e-05, "loss": 0.5456, "step": 3978 }, { "epoch": 0.545928517527612, "grad_norm": 1.28125, "learning_rate": 1.8472942105221156e-05, "loss": 0.6032, "step": 3979 }, { "epoch": 0.5460657199698155, "grad_norm": 1.1328125, "learning_rate": 1.847217534964883e-05, "loss": 0.4771, "step": 3980 }, { "epoch": 0.546202922412019, "grad_norm": 1.21875, "learning_rate": 1.8471408417547035e-05, "loss": 0.5021, "step": 3981 }, { "epoch": 0.5463401248542225, "grad_norm": 1.171875, "learning_rate": 1.8470641308931728e-05, "loss": 0.5055, "step": 3982 }, { "epoch": 0.5464773272964258, "grad_norm": 1.171875, "learning_rate": 1.846987402381891e-05, "loss": 0.5078, "step": 3983 }, { "epoch": 0.5466145297386293, "grad_norm": 1.2734375, "learning_rate": 1.846910656222456e-05, "loss": 0.5651, "step": 3984 }, { "epoch": 0.5467517321808328, "grad_norm": 1.125, "learning_rate": 1.8468338924164667e-05, "loss": 0.496, "step": 3985 }, { "epoch": 0.5468889346230363, "grad_norm": 1.203125, "learning_rate": 1.8467571109655232e-05, "loss": 0.5265, "step": 3986 }, { "epoch": 0.5470261370652397, "grad_norm": 1.171875, "learning_rate": 1.846680311871225e-05, "loss": 0.5193, "step": 3987 }, { "epoch": 0.5471633395074432, "grad_norm": 1.2109375, "learning_rate": 1.8466034951351725e-05, "loss": 0.5359, "step": 3988 }, { "epoch": 0.5473005419496467, "grad_norm": 1.359375, "learning_rate": 1.8465266607589663e-05, "loss": 0.5864, "step": 3989 }, { "epoch": 0.5474377443918502, "grad_norm": 1.3359375, "learning_rate": 1.846449808744207e-05, "loss": 0.6137, "step": 3990 }, { "epoch": 0.5475749468340536, "grad_norm": 1.0703125, "learning_rate": 1.8463729390924963e-05, "loss": 0.4855, "step": 3991 }, { "epoch": 0.5477121492762571, "grad_norm": 1.2421875, "learning_rate": 1.846296051805436e-05, "loss": 0.5771, "step": 3992 }, { "epoch": 0.5478493517184606, "grad_norm": 1.2734375, "learning_rate": 1.8462191468846278e-05, "loss": 0.5306, "step": 3993 }, { "epoch": 0.5479865541606641, "grad_norm": 1.3359375, "learning_rate": 1.8461422243316742e-05, "loss": 0.5636, "step": 3994 }, { "epoch": 0.5481237566028675, "grad_norm": 1.2265625, "learning_rate": 1.8460652841481778e-05, "loss": 0.5398, "step": 3995 }, { "epoch": 0.548260959045071, "grad_norm": 1.109375, "learning_rate": 1.8459883263357423e-05, "loss": 0.4582, "step": 3996 }, { "epoch": 0.5483981614872745, "grad_norm": 1.203125, "learning_rate": 1.845911350895971e-05, "loss": 0.4976, "step": 3997 }, { "epoch": 0.548535363929478, "grad_norm": 1.109375, "learning_rate": 1.845834357830467e-05, "loss": 0.4581, "step": 3998 }, { "epoch": 0.5486725663716814, "grad_norm": 1.1953125, "learning_rate": 1.8457573471408357e-05, "loss": 0.5258, "step": 3999 }, { "epoch": 0.5488097688138849, "grad_norm": 1.265625, "learning_rate": 1.8456803188286812e-05, "loss": 0.6114, "step": 4000 }, { "epoch": 0.5489469712560884, "grad_norm": 1.140625, "learning_rate": 1.8456032728956085e-05, "loss": 0.5026, "step": 4001 }, { "epoch": 0.5490841736982919, "grad_norm": 1.234375, "learning_rate": 1.8455262093432232e-05, "loss": 0.5502, "step": 4002 }, { "epoch": 0.5492213761404953, "grad_norm": 1.2109375, "learning_rate": 1.845449128173131e-05, "loss": 0.4688, "step": 4003 }, { "epoch": 0.5493585785826988, "grad_norm": 1.21875, "learning_rate": 1.8453720293869374e-05, "loss": 0.5051, "step": 4004 }, { "epoch": 0.5494957810249023, "grad_norm": 1.171875, "learning_rate": 1.8452949129862495e-05, "loss": 0.5612, "step": 4005 }, { "epoch": 0.5496329834671058, "grad_norm": 1.21875, "learning_rate": 1.8452177789726736e-05, "loss": 0.4322, "step": 4006 }, { "epoch": 0.5497701859093092, "grad_norm": 1.125, "learning_rate": 1.8451406273478177e-05, "loss": 0.4358, "step": 4007 }, { "epoch": 0.5499073883515126, "grad_norm": 1.296875, "learning_rate": 1.845063458113289e-05, "loss": 0.5665, "step": 4008 }, { "epoch": 0.5500445907937161, "grad_norm": 1.3203125, "learning_rate": 1.8449862712706946e-05, "loss": 0.559, "step": 4009 }, { "epoch": 0.5501817932359196, "grad_norm": 1.203125, "learning_rate": 1.8449090668216437e-05, "loss": 0.4493, "step": 4010 }, { "epoch": 0.550318995678123, "grad_norm": 1.1640625, "learning_rate": 1.844831844767745e-05, "loss": 0.5175, "step": 4011 }, { "epoch": 0.5504561981203265, "grad_norm": 1.265625, "learning_rate": 1.8447546051106073e-05, "loss": 0.5392, "step": 4012 }, { "epoch": 0.55059340056253, "grad_norm": 1.25, "learning_rate": 1.84467734785184e-05, "loss": 0.5703, "step": 4013 }, { "epoch": 0.5507306030047335, "grad_norm": 1.2109375, "learning_rate": 1.8446000729930526e-05, "loss": 0.5671, "step": 4014 }, { "epoch": 0.5508678054469369, "grad_norm": 1.21875, "learning_rate": 1.8445227805358555e-05, "loss": 0.5735, "step": 4015 }, { "epoch": 0.5510050078891404, "grad_norm": 1.171875, "learning_rate": 1.8444454704818592e-05, "loss": 0.5333, "step": 4016 }, { "epoch": 0.5511422103313439, "grad_norm": 1.1875, "learning_rate": 1.844368142832675e-05, "loss": 0.4513, "step": 4017 }, { "epoch": 0.5512794127735474, "grad_norm": 1.046875, "learning_rate": 1.844290797589913e-05, "loss": 0.4462, "step": 4018 }, { "epoch": 0.5514166152157508, "grad_norm": 1.0703125, "learning_rate": 1.8442134347551856e-05, "loss": 0.477, "step": 4019 }, { "epoch": 0.5515538176579543, "grad_norm": 1.265625, "learning_rate": 1.8441360543301047e-05, "loss": 0.4724, "step": 4020 }, { "epoch": 0.5516910201001578, "grad_norm": 1.2578125, "learning_rate": 1.8440586563162827e-05, "loss": 0.5019, "step": 4021 }, { "epoch": 0.5518282225423613, "grad_norm": 1.2578125, "learning_rate": 1.843981240715332e-05, "loss": 0.5458, "step": 4022 }, { "epoch": 0.5519654249845647, "grad_norm": 1.1484375, "learning_rate": 1.8439038075288657e-05, "loss": 0.4918, "step": 4023 }, { "epoch": 0.5521026274267682, "grad_norm": 1.203125, "learning_rate": 1.8438263567584975e-05, "loss": 0.5494, "step": 4024 }, { "epoch": 0.5522398298689717, "grad_norm": 1.359375, "learning_rate": 1.8437488884058412e-05, "loss": 0.5195, "step": 4025 }, { "epoch": 0.5523770323111752, "grad_norm": 1.1875, "learning_rate": 1.8436714024725103e-05, "loss": 0.5272, "step": 4026 }, { "epoch": 0.5525142347533786, "grad_norm": 1.3046875, "learning_rate": 1.84359389896012e-05, "loss": 0.5741, "step": 4027 }, { "epoch": 0.5526514371955821, "grad_norm": 1.3203125, "learning_rate": 1.843516377870285e-05, "loss": 0.5493, "step": 4028 }, { "epoch": 0.5527886396377856, "grad_norm": 1.2421875, "learning_rate": 1.8434388392046206e-05, "loss": 0.5336, "step": 4029 }, { "epoch": 0.5529258420799891, "grad_norm": 1.1875, "learning_rate": 1.8433612829647425e-05, "loss": 0.4753, "step": 4030 }, { "epoch": 0.5530630445221925, "grad_norm": 1.1171875, "learning_rate": 1.8432837091522664e-05, "loss": 0.4876, "step": 4031 }, { "epoch": 0.553200246964396, "grad_norm": 1.1953125, "learning_rate": 1.8432061177688087e-05, "loss": 0.5636, "step": 4032 }, { "epoch": 0.5533374494065995, "grad_norm": 1.28125, "learning_rate": 1.8431285088159863e-05, "loss": 0.5379, "step": 4033 }, { "epoch": 0.553474651848803, "grad_norm": 1.203125, "learning_rate": 1.8430508822954167e-05, "loss": 0.5124, "step": 4034 }, { "epoch": 0.5536118542910063, "grad_norm": 1.3046875, "learning_rate": 1.8429732382087165e-05, "loss": 0.6106, "step": 4035 }, { "epoch": 0.5537490567332098, "grad_norm": 1.296875, "learning_rate": 1.8428955765575042e-05, "loss": 0.4937, "step": 4036 }, { "epoch": 0.5538862591754133, "grad_norm": 1.1875, "learning_rate": 1.8428178973433974e-05, "loss": 0.5581, "step": 4037 }, { "epoch": 0.5540234616176168, "grad_norm": 1.3828125, "learning_rate": 1.842740200568015e-05, "loss": 0.5929, "step": 4038 }, { "epoch": 0.5541606640598202, "grad_norm": 1.234375, "learning_rate": 1.842662486232976e-05, "loss": 0.5583, "step": 4039 }, { "epoch": 0.5542978665020237, "grad_norm": 1.09375, "learning_rate": 1.8425847543398992e-05, "loss": 0.4668, "step": 4040 }, { "epoch": 0.5544350689442272, "grad_norm": 1.2578125, "learning_rate": 1.842507004890405e-05, "loss": 0.5265, "step": 4041 }, { "epoch": 0.5545722713864307, "grad_norm": 1.2109375, "learning_rate": 1.8424292378861128e-05, "loss": 0.5505, "step": 4042 }, { "epoch": 0.5547094738286341, "grad_norm": 1.328125, "learning_rate": 1.8423514533286433e-05, "loss": 0.6029, "step": 4043 }, { "epoch": 0.5548466762708376, "grad_norm": 1.0859375, "learning_rate": 1.8422736512196175e-05, "loss": 0.4312, "step": 4044 }, { "epoch": 0.5549838787130411, "grad_norm": 1.125, "learning_rate": 1.842195831560656e-05, "loss": 0.5322, "step": 4045 }, { "epoch": 0.5551210811552446, "grad_norm": 1.15625, "learning_rate": 1.8421179943533803e-05, "loss": 0.5197, "step": 4046 }, { "epoch": 0.555258283597448, "grad_norm": 1.109375, "learning_rate": 1.8420401395994127e-05, "loss": 0.4644, "step": 4047 }, { "epoch": 0.5553954860396515, "grad_norm": 1.1015625, "learning_rate": 1.841962267300375e-05, "loss": 0.4216, "step": 4048 }, { "epoch": 0.555532688481855, "grad_norm": 1.171875, "learning_rate": 1.84188437745789e-05, "loss": 0.526, "step": 4049 }, { "epoch": 0.5556698909240585, "grad_norm": 1.15625, "learning_rate": 1.84180647007358e-05, "loss": 0.4573, "step": 4050 }, { "epoch": 0.5558070933662619, "grad_norm": 1.1640625, "learning_rate": 1.8417285451490695e-05, "loss": 0.518, "step": 4051 }, { "epoch": 0.5559442958084654, "grad_norm": 1.234375, "learning_rate": 1.8416506026859813e-05, "loss": 0.5419, "step": 4052 }, { "epoch": 0.5560814982506689, "grad_norm": 1.265625, "learning_rate": 1.8415726426859396e-05, "loss": 0.5484, "step": 4053 }, { "epoch": 0.5562187006928724, "grad_norm": 1.1953125, "learning_rate": 1.8414946651505686e-05, "loss": 0.4999, "step": 4054 }, { "epoch": 0.5563559031350758, "grad_norm": 1.1875, "learning_rate": 1.8414166700814936e-05, "loss": 0.4599, "step": 4055 }, { "epoch": 0.5564931055772793, "grad_norm": 1.125, "learning_rate": 1.8413386574803396e-05, "loss": 0.4308, "step": 4056 }, { "epoch": 0.5566303080194828, "grad_norm": 1.1953125, "learning_rate": 1.8412606273487318e-05, "loss": 0.5067, "step": 4057 }, { "epoch": 0.5567675104616863, "grad_norm": 1.21875, "learning_rate": 1.8411825796882963e-05, "loss": 0.546, "step": 4058 }, { "epoch": 0.5569047129038897, "grad_norm": 1.234375, "learning_rate": 1.8411045145006593e-05, "loss": 0.5489, "step": 4059 }, { "epoch": 0.5570419153460932, "grad_norm": 1.3359375, "learning_rate": 1.841026431787447e-05, "loss": 0.6004, "step": 4060 }, { "epoch": 0.5571791177882967, "grad_norm": 1.28125, "learning_rate": 1.8409483315502874e-05, "loss": 0.4778, "step": 4061 }, { "epoch": 0.5573163202305002, "grad_norm": 1.3125, "learning_rate": 1.8408702137908068e-05, "loss": 0.5597, "step": 4062 }, { "epoch": 0.5574535226727035, "grad_norm": 1.3125, "learning_rate": 1.8407920785106333e-05, "loss": 0.5961, "step": 4063 }, { "epoch": 0.557590725114907, "grad_norm": 1.203125, "learning_rate": 1.8407139257113946e-05, "loss": 0.5141, "step": 4064 }, { "epoch": 0.5577279275571105, "grad_norm": 1.21875, "learning_rate": 1.8406357553947197e-05, "loss": 0.5342, "step": 4065 }, { "epoch": 0.557865129999314, "grad_norm": 1.25, "learning_rate": 1.840557567562237e-05, "loss": 0.5982, "step": 4066 }, { "epoch": 0.5580023324415174, "grad_norm": 1.265625, "learning_rate": 1.840479362215576e-05, "loss": 0.5581, "step": 4067 }, { "epoch": 0.5581395348837209, "grad_norm": 1.1328125, "learning_rate": 1.8404011393563656e-05, "loss": 0.4751, "step": 4068 }, { "epoch": 0.5582767373259244, "grad_norm": 1.328125, "learning_rate": 1.8403228989862364e-05, "loss": 0.5618, "step": 4069 }, { "epoch": 0.5584139397681279, "grad_norm": 1.1640625, "learning_rate": 1.8402446411068184e-05, "loss": 0.4947, "step": 4070 }, { "epoch": 0.5585511422103313, "grad_norm": 1.2734375, "learning_rate": 1.8401663657197416e-05, "loss": 0.5672, "step": 4071 }, { "epoch": 0.5586883446525348, "grad_norm": 1.1796875, "learning_rate": 1.8400880728266376e-05, "loss": 0.459, "step": 4072 }, { "epoch": 0.5588255470947383, "grad_norm": 1.203125, "learning_rate": 1.840009762429138e-05, "loss": 0.5046, "step": 4073 }, { "epoch": 0.5589627495369418, "grad_norm": 1.09375, "learning_rate": 1.839931434528874e-05, "loss": 0.4646, "step": 4074 }, { "epoch": 0.5590999519791452, "grad_norm": 1.1640625, "learning_rate": 1.8398530891274778e-05, "loss": 0.468, "step": 4075 }, { "epoch": 0.5592371544213487, "grad_norm": 1.203125, "learning_rate": 1.8397747262265816e-05, "loss": 0.5474, "step": 4076 }, { "epoch": 0.5593743568635522, "grad_norm": 1.1875, "learning_rate": 1.8396963458278186e-05, "loss": 0.5075, "step": 4077 }, { "epoch": 0.5595115593057557, "grad_norm": 1.234375, "learning_rate": 1.8396179479328218e-05, "loss": 0.4998, "step": 4078 }, { "epoch": 0.5596487617479591, "grad_norm": 1.2265625, "learning_rate": 1.8395395325432247e-05, "loss": 0.4944, "step": 4079 }, { "epoch": 0.5597859641901626, "grad_norm": 1.171875, "learning_rate": 1.839461099660661e-05, "loss": 0.5496, "step": 4080 }, { "epoch": 0.5599231666323661, "grad_norm": 1.203125, "learning_rate": 1.839382649286766e-05, "loss": 0.5326, "step": 4081 }, { "epoch": 0.5600603690745696, "grad_norm": 1.28125, "learning_rate": 1.8393041814231726e-05, "loss": 0.5493, "step": 4082 }, { "epoch": 0.560197571516773, "grad_norm": 1.21875, "learning_rate": 1.8392256960715173e-05, "loss": 0.6121, "step": 4083 }, { "epoch": 0.5603347739589765, "grad_norm": 1.171875, "learning_rate": 1.8391471932334344e-05, "loss": 0.5122, "step": 4084 }, { "epoch": 0.56047197640118, "grad_norm": 1.1796875, "learning_rate": 1.8390686729105603e-05, "loss": 0.5443, "step": 4085 }, { "epoch": 0.5606091788433835, "grad_norm": 1.21875, "learning_rate": 1.8389901351045308e-05, "loss": 0.4606, "step": 4086 }, { "epoch": 0.5607463812855868, "grad_norm": 1.2421875, "learning_rate": 1.8389115798169823e-05, "loss": 0.4852, "step": 4087 }, { "epoch": 0.5608835837277903, "grad_norm": 1.2421875, "learning_rate": 1.838833007049552e-05, "loss": 0.5429, "step": 4088 }, { "epoch": 0.5610207861699938, "grad_norm": 1.09375, "learning_rate": 1.8387544168038763e-05, "loss": 0.4533, "step": 4089 }, { "epoch": 0.5611579886121973, "grad_norm": 1.1484375, "learning_rate": 1.838675809081593e-05, "loss": 0.4465, "step": 4090 }, { "epoch": 0.5612951910544007, "grad_norm": 1.171875, "learning_rate": 1.838597183884341e-05, "loss": 0.5167, "step": 4091 }, { "epoch": 0.5614323934966042, "grad_norm": 1.109375, "learning_rate": 1.8385185412137572e-05, "loss": 0.3988, "step": 4092 }, { "epoch": 0.5615695959388077, "grad_norm": 1.2109375, "learning_rate": 1.838439881071481e-05, "loss": 0.498, "step": 4093 }, { "epoch": 0.5617067983810112, "grad_norm": 1.203125, "learning_rate": 1.8383612034591515e-05, "loss": 0.5185, "step": 4094 }, { "epoch": 0.5618440008232146, "grad_norm": 1.2265625, "learning_rate": 1.8382825083784072e-05, "loss": 0.4997, "step": 4095 }, { "epoch": 0.5619812032654181, "grad_norm": 1.1171875, "learning_rate": 1.8382037958308884e-05, "loss": 0.4902, "step": 4096 }, { "epoch": 0.5621184057076216, "grad_norm": 1.25, "learning_rate": 1.8381250658182354e-05, "loss": 0.5838, "step": 4097 }, { "epoch": 0.5622556081498251, "grad_norm": 1.2890625, "learning_rate": 1.8380463183420882e-05, "loss": 0.5941, "step": 4098 }, { "epoch": 0.5623928105920285, "grad_norm": 1.2109375, "learning_rate": 1.837967553404088e-05, "loss": 0.5223, "step": 4099 }, { "epoch": 0.562530013034232, "grad_norm": 1.1640625, "learning_rate": 1.8378887710058757e-05, "loss": 0.5018, "step": 4100 }, { "epoch": 0.5626672154764355, "grad_norm": 1.1484375, "learning_rate": 1.8378099711490928e-05, "loss": 0.4863, "step": 4101 }, { "epoch": 0.562804417918639, "grad_norm": 1.1796875, "learning_rate": 1.8377311538353814e-05, "loss": 0.5409, "step": 4102 }, { "epoch": 0.5629416203608424, "grad_norm": 1.28125, "learning_rate": 1.837652319066384e-05, "loss": 0.5162, "step": 4103 }, { "epoch": 0.5630788228030459, "grad_norm": 1.140625, "learning_rate": 1.8375734668437423e-05, "loss": 0.5231, "step": 4104 }, { "epoch": 0.5632160252452494, "grad_norm": 1.328125, "learning_rate": 1.8374945971691005e-05, "loss": 0.6161, "step": 4105 }, { "epoch": 0.5633532276874529, "grad_norm": 1.203125, "learning_rate": 1.837415710044101e-05, "loss": 0.5292, "step": 4106 }, { "epoch": 0.5634904301296563, "grad_norm": 1.1953125, "learning_rate": 1.837336805470388e-05, "loss": 0.519, "step": 4107 }, { "epoch": 0.5636276325718598, "grad_norm": 1.109375, "learning_rate": 1.8372578834496054e-05, "loss": 0.4783, "step": 4108 }, { "epoch": 0.5637648350140633, "grad_norm": 1.1875, "learning_rate": 1.837178943983398e-05, "loss": 0.5299, "step": 4109 }, { "epoch": 0.5639020374562668, "grad_norm": 1.34375, "learning_rate": 1.8370999870734103e-05, "loss": 0.6318, "step": 4110 }, { "epoch": 0.5640392398984702, "grad_norm": 1.2734375, "learning_rate": 1.8370210127212873e-05, "loss": 0.5105, "step": 4111 }, { "epoch": 0.5641764423406737, "grad_norm": 1.296875, "learning_rate": 1.836942020928675e-05, "loss": 0.5924, "step": 4112 }, { "epoch": 0.5643136447828772, "grad_norm": 1.2265625, "learning_rate": 1.836863011697219e-05, "loss": 0.5093, "step": 4113 }, { "epoch": 0.5644508472250807, "grad_norm": 1.3046875, "learning_rate": 1.8367839850285656e-05, "loss": 0.5614, "step": 4114 }, { "epoch": 0.564588049667284, "grad_norm": 1.2890625, "learning_rate": 1.8367049409243612e-05, "loss": 0.5763, "step": 4115 }, { "epoch": 0.5647252521094875, "grad_norm": 1.234375, "learning_rate": 1.8366258793862533e-05, "loss": 0.5298, "step": 4116 }, { "epoch": 0.564862454551691, "grad_norm": 1.203125, "learning_rate": 1.8365468004158896e-05, "loss": 0.5143, "step": 4117 }, { "epoch": 0.5649996569938945, "grad_norm": 1.125, "learning_rate": 1.8364677040149166e-05, "loss": 0.4838, "step": 4118 }, { "epoch": 0.5651368594360979, "grad_norm": 1.1875, "learning_rate": 1.836388590184983e-05, "loss": 0.5468, "step": 4119 }, { "epoch": 0.5652740618783014, "grad_norm": 1.1640625, "learning_rate": 1.8363094589277373e-05, "loss": 0.4597, "step": 4120 }, { "epoch": 0.5654112643205049, "grad_norm": 1.1640625, "learning_rate": 1.8362303102448283e-05, "loss": 0.5029, "step": 4121 }, { "epoch": 0.5655484667627084, "grad_norm": 1.21875, "learning_rate": 1.836151144137905e-05, "loss": 0.5134, "step": 4122 }, { "epoch": 0.5656856692049118, "grad_norm": 1.0859375, "learning_rate": 1.8360719606086176e-05, "loss": 0.428, "step": 4123 }, { "epoch": 0.5658228716471153, "grad_norm": 1.203125, "learning_rate": 1.8359927596586153e-05, "loss": 0.5304, "step": 4124 }, { "epoch": 0.5659600740893188, "grad_norm": 1.234375, "learning_rate": 1.835913541289548e-05, "loss": 0.6028, "step": 4125 }, { "epoch": 0.5660972765315223, "grad_norm": 1.1796875, "learning_rate": 1.8358343055030675e-05, "loss": 0.5086, "step": 4126 }, { "epoch": 0.5662344789737257, "grad_norm": 1.140625, "learning_rate": 1.8357550523008237e-05, "loss": 0.5213, "step": 4127 }, { "epoch": 0.5663716814159292, "grad_norm": 1.2109375, "learning_rate": 1.8356757816844685e-05, "loss": 0.5694, "step": 4128 }, { "epoch": 0.5665088838581327, "grad_norm": 1.4375, "learning_rate": 1.8355964936556537e-05, "loss": 0.6442, "step": 4129 }, { "epoch": 0.5666460863003362, "grad_norm": 1.1640625, "learning_rate": 1.8355171882160312e-05, "loss": 0.4986, "step": 4130 }, { "epoch": 0.5667832887425396, "grad_norm": 1.2265625, "learning_rate": 1.8354378653672532e-05, "loss": 0.5727, "step": 4131 }, { "epoch": 0.5669204911847431, "grad_norm": 1.1796875, "learning_rate": 1.835358525110973e-05, "loss": 0.5009, "step": 4132 }, { "epoch": 0.5670576936269466, "grad_norm": 1.3515625, "learning_rate": 1.8352791674488433e-05, "loss": 0.6172, "step": 4133 }, { "epoch": 0.5671948960691501, "grad_norm": 1.2265625, "learning_rate": 1.8351997923825176e-05, "loss": 0.5538, "step": 4134 }, { "epoch": 0.5673320985113535, "grad_norm": 1.2734375, "learning_rate": 1.8351203999136507e-05, "loss": 0.5632, "step": 4135 }, { "epoch": 0.567469300953557, "grad_norm": 1.2734375, "learning_rate": 1.835040990043896e-05, "loss": 0.4909, "step": 4136 }, { "epoch": 0.5676065033957605, "grad_norm": 1.1875, "learning_rate": 1.8349615627749076e-05, "loss": 0.4804, "step": 4137 }, { "epoch": 0.567743705837964, "grad_norm": 1.3828125, "learning_rate": 1.8348821181083417e-05, "loss": 0.5666, "step": 4138 }, { "epoch": 0.5678809082801674, "grad_norm": 1.25, "learning_rate": 1.834802656045853e-05, "loss": 0.5401, "step": 4139 }, { "epoch": 0.5680181107223709, "grad_norm": 1.25, "learning_rate": 1.8347231765890973e-05, "loss": 0.5613, "step": 4140 }, { "epoch": 0.5681553131645743, "grad_norm": 1.2890625, "learning_rate": 1.8346436797397307e-05, "loss": 0.5654, "step": 4141 }, { "epoch": 0.5682925156067778, "grad_norm": 1.171875, "learning_rate": 1.8345641654994093e-05, "loss": 0.5071, "step": 4142 }, { "epoch": 0.5684297180489812, "grad_norm": 1.234375, "learning_rate": 1.8344846338697905e-05, "loss": 0.5524, "step": 4143 }, { "epoch": 0.5685669204911847, "grad_norm": 1.3515625, "learning_rate": 1.8344050848525307e-05, "loss": 0.5901, "step": 4144 }, { "epoch": 0.5687041229333882, "grad_norm": 1.296875, "learning_rate": 1.8343255184492884e-05, "loss": 0.5615, "step": 4145 }, { "epoch": 0.5688413253755917, "grad_norm": 1.1171875, "learning_rate": 1.8342459346617206e-05, "loss": 0.4518, "step": 4146 }, { "epoch": 0.5689785278177951, "grad_norm": 1.1640625, "learning_rate": 1.834166333491486e-05, "loss": 0.507, "step": 4147 }, { "epoch": 0.5691157302599986, "grad_norm": 1.1171875, "learning_rate": 1.8340867149402428e-05, "loss": 0.5352, "step": 4148 }, { "epoch": 0.5692529327022021, "grad_norm": 1.1875, "learning_rate": 1.8340070790096503e-05, "loss": 0.5012, "step": 4149 }, { "epoch": 0.5693901351444056, "grad_norm": 1.125, "learning_rate": 1.8339274257013677e-05, "loss": 0.5025, "step": 4150 }, { "epoch": 0.569527337586609, "grad_norm": 1.0859375, "learning_rate": 1.833847755017055e-05, "loss": 0.4437, "step": 4151 }, { "epoch": 0.5696645400288125, "grad_norm": 1.1484375, "learning_rate": 1.8337680669583717e-05, "loss": 0.4202, "step": 4152 }, { "epoch": 0.569801742471016, "grad_norm": 1.203125, "learning_rate": 1.833688361526979e-05, "loss": 0.5341, "step": 4153 }, { "epoch": 0.5699389449132195, "grad_norm": 1.171875, "learning_rate": 1.8336086387245365e-05, "loss": 0.4863, "step": 4154 }, { "epoch": 0.5700761473554229, "grad_norm": 1.2734375, "learning_rate": 1.8335288985527063e-05, "loss": 0.5084, "step": 4155 }, { "epoch": 0.5702133497976264, "grad_norm": 1.0703125, "learning_rate": 1.8334491410131494e-05, "loss": 0.4596, "step": 4156 }, { "epoch": 0.5703505522398299, "grad_norm": 1.203125, "learning_rate": 1.833369366107528e-05, "loss": 0.4912, "step": 4157 }, { "epoch": 0.5704877546820334, "grad_norm": 1.25, "learning_rate": 1.833289573837504e-05, "loss": 0.5386, "step": 4158 }, { "epoch": 0.5706249571242368, "grad_norm": 1.234375, "learning_rate": 1.8332097642047405e-05, "loss": 0.5307, "step": 4159 }, { "epoch": 0.5707621595664403, "grad_norm": 1.2578125, "learning_rate": 1.8331299372108996e-05, "loss": 0.5765, "step": 4160 }, { "epoch": 0.5708993620086438, "grad_norm": 1.1875, "learning_rate": 1.8330500928576457e-05, "loss": 0.5478, "step": 4161 }, { "epoch": 0.5710365644508473, "grad_norm": 1.265625, "learning_rate": 1.8329702311466416e-05, "loss": 0.5293, "step": 4162 }, { "epoch": 0.5711737668930507, "grad_norm": 1.2578125, "learning_rate": 1.8328903520795516e-05, "loss": 0.62, "step": 4163 }, { "epoch": 0.5713109693352542, "grad_norm": 1.2265625, "learning_rate": 1.83281045565804e-05, "loss": 0.5109, "step": 4164 }, { "epoch": 0.5714481717774577, "grad_norm": 1.296875, "learning_rate": 1.8327305418837715e-05, "loss": 0.5808, "step": 4165 }, { "epoch": 0.5715853742196612, "grad_norm": 1.1875, "learning_rate": 1.8326506107584116e-05, "loss": 0.5059, "step": 4166 }, { "epoch": 0.5717225766618645, "grad_norm": 1.2578125, "learning_rate": 1.8325706622836258e-05, "loss": 0.5237, "step": 4167 }, { "epoch": 0.571859779104068, "grad_norm": 1.1796875, "learning_rate": 1.832490696461079e-05, "loss": 0.5314, "step": 4168 }, { "epoch": 0.5719969815462715, "grad_norm": 1.359375, "learning_rate": 1.8324107132924388e-05, "loss": 0.5822, "step": 4169 }, { "epoch": 0.572134183988475, "grad_norm": 1.203125, "learning_rate": 1.8323307127793704e-05, "loss": 0.5459, "step": 4170 }, { "epoch": 0.5722713864306784, "grad_norm": 1.1640625, "learning_rate": 1.8322506949235417e-05, "loss": 0.5287, "step": 4171 }, { "epoch": 0.5724085888728819, "grad_norm": 1.1328125, "learning_rate": 1.8321706597266195e-05, "loss": 0.481, "step": 4172 }, { "epoch": 0.5725457913150854, "grad_norm": 1.234375, "learning_rate": 1.8320906071902715e-05, "loss": 0.5311, "step": 4173 }, { "epoch": 0.5726829937572889, "grad_norm": 1.2265625, "learning_rate": 1.8320105373161657e-05, "loss": 0.5619, "step": 4174 }, { "epoch": 0.5728201961994923, "grad_norm": 1.0859375, "learning_rate": 1.831930450105971e-05, "loss": 0.4555, "step": 4175 }, { "epoch": 0.5729573986416958, "grad_norm": 1.28125, "learning_rate": 1.8318503455613553e-05, "loss": 0.5264, "step": 4176 }, { "epoch": 0.5730946010838993, "grad_norm": 1.140625, "learning_rate": 1.8317702236839883e-05, "loss": 0.4689, "step": 4177 }, { "epoch": 0.5732318035261028, "grad_norm": 1.1796875, "learning_rate": 1.831690084475539e-05, "loss": 0.5023, "step": 4178 }, { "epoch": 0.5733690059683062, "grad_norm": 1.28125, "learning_rate": 1.8316099279376775e-05, "loss": 0.6275, "step": 4179 }, { "epoch": 0.5735062084105097, "grad_norm": 1.3046875, "learning_rate": 1.831529754072074e-05, "loss": 0.5918, "step": 4180 }, { "epoch": 0.5736434108527132, "grad_norm": 1.203125, "learning_rate": 1.8314495628803987e-05, "loss": 0.5989, "step": 4181 }, { "epoch": 0.5737806132949167, "grad_norm": 1.0546875, "learning_rate": 1.831369354364323e-05, "loss": 0.4316, "step": 4182 }, { "epoch": 0.5739178157371201, "grad_norm": 1.1796875, "learning_rate": 1.8312891285255178e-05, "loss": 0.5007, "step": 4183 }, { "epoch": 0.5740550181793236, "grad_norm": 1.2265625, "learning_rate": 1.8312088853656546e-05, "loss": 0.5618, "step": 4184 }, { "epoch": 0.5741922206215271, "grad_norm": 1.3203125, "learning_rate": 1.8311286248864054e-05, "loss": 0.5594, "step": 4185 }, { "epoch": 0.5743294230637306, "grad_norm": 1.25, "learning_rate": 1.8310483470894432e-05, "loss": 0.5299, "step": 4186 }, { "epoch": 0.574466625505934, "grad_norm": 1.125, "learning_rate": 1.83096805197644e-05, "loss": 0.4797, "step": 4187 }, { "epoch": 0.5746038279481375, "grad_norm": 1.3125, "learning_rate": 1.830887739549069e-05, "loss": 0.6351, "step": 4188 }, { "epoch": 0.574741030390341, "grad_norm": 1.125, "learning_rate": 1.8308074098090038e-05, "loss": 0.4784, "step": 4189 }, { "epoch": 0.5748782328325445, "grad_norm": 1.2890625, "learning_rate": 1.830727062757918e-05, "loss": 0.5497, "step": 4190 }, { "epoch": 0.5750154352747479, "grad_norm": 1.1640625, "learning_rate": 1.830646698397486e-05, "loss": 0.5274, "step": 4191 }, { "epoch": 0.5751526377169514, "grad_norm": 1.1640625, "learning_rate": 1.830566316729382e-05, "loss": 0.4976, "step": 4192 }, { "epoch": 0.5752898401591549, "grad_norm": 1.296875, "learning_rate": 1.8304859177552806e-05, "loss": 0.5508, "step": 4193 }, { "epoch": 0.5754270426013584, "grad_norm": 1.2734375, "learning_rate": 1.8304055014768582e-05, "loss": 0.5472, "step": 4194 }, { "epoch": 0.5755642450435617, "grad_norm": 1.2578125, "learning_rate": 1.8303250678957888e-05, "loss": 0.5676, "step": 4195 }, { "epoch": 0.5757014474857652, "grad_norm": 1.1484375, "learning_rate": 1.8302446170137496e-05, "loss": 0.4434, "step": 4196 }, { "epoch": 0.5758386499279687, "grad_norm": 1.1328125, "learning_rate": 1.8301641488324164e-05, "loss": 0.4894, "step": 4197 }, { "epoch": 0.5759758523701722, "grad_norm": 1.1796875, "learning_rate": 1.830083663353466e-05, "loss": 0.5599, "step": 4198 }, { "epoch": 0.5761130548123756, "grad_norm": 1.203125, "learning_rate": 1.8300031605785745e-05, "loss": 0.4775, "step": 4199 }, { "epoch": 0.5762502572545791, "grad_norm": 1.171875, "learning_rate": 1.829922640509421e-05, "loss": 0.5145, "step": 4200 }, { "epoch": 0.5763874596967826, "grad_norm": 1.125, "learning_rate": 1.829842103147682e-05, "loss": 0.4505, "step": 4201 }, { "epoch": 0.5765246621389861, "grad_norm": 1.1953125, "learning_rate": 1.829761548495036e-05, "loss": 0.5919, "step": 4202 }, { "epoch": 0.5766618645811895, "grad_norm": 1.1171875, "learning_rate": 1.8296809765531615e-05, "loss": 0.4485, "step": 4203 }, { "epoch": 0.576799067023393, "grad_norm": 1.140625, "learning_rate": 1.8296003873237372e-05, "loss": 0.5049, "step": 4204 }, { "epoch": 0.5769362694655965, "grad_norm": 1.328125, "learning_rate": 1.8295197808084427e-05, "loss": 0.6632, "step": 4205 }, { "epoch": 0.5770734719078, "grad_norm": 1.1640625, "learning_rate": 1.829439157008957e-05, "loss": 0.5022, "step": 4206 }, { "epoch": 0.5772106743500034, "grad_norm": 1.21875, "learning_rate": 1.82935851592696e-05, "loss": 0.5499, "step": 4207 }, { "epoch": 0.5773478767922069, "grad_norm": 1.1015625, "learning_rate": 1.8292778575641323e-05, "loss": 0.4661, "step": 4208 }, { "epoch": 0.5774850792344104, "grad_norm": 1.2421875, "learning_rate": 1.8291971819221543e-05, "loss": 0.538, "step": 4209 }, { "epoch": 0.5776222816766139, "grad_norm": 1.2890625, "learning_rate": 1.8291164890027073e-05, "loss": 0.5922, "step": 4210 }, { "epoch": 0.5777594841188173, "grad_norm": 1.21875, "learning_rate": 1.829035778807472e-05, "loss": 0.5391, "step": 4211 }, { "epoch": 0.5778966865610208, "grad_norm": 1.1640625, "learning_rate": 1.828955051338131e-05, "loss": 0.4835, "step": 4212 }, { "epoch": 0.5780338890032243, "grad_norm": 1.0625, "learning_rate": 1.8288743065963657e-05, "loss": 0.4174, "step": 4213 }, { "epoch": 0.5781710914454278, "grad_norm": 1.2265625, "learning_rate": 1.828793544583859e-05, "loss": 0.5682, "step": 4214 }, { "epoch": 0.5783082938876312, "grad_norm": 1.1953125, "learning_rate": 1.828712765302293e-05, "loss": 0.5134, "step": 4215 }, { "epoch": 0.5784454963298347, "grad_norm": 1.2265625, "learning_rate": 1.828631968753352e-05, "loss": 0.5441, "step": 4216 }, { "epoch": 0.5785826987720382, "grad_norm": 1.1640625, "learning_rate": 1.8285511549387182e-05, "loss": 0.5282, "step": 4217 }, { "epoch": 0.5787199012142417, "grad_norm": 1.265625, "learning_rate": 1.828470323860076e-05, "loss": 0.607, "step": 4218 }, { "epoch": 0.578857103656445, "grad_norm": 1.203125, "learning_rate": 1.82838947551911e-05, "loss": 0.54, "step": 4219 }, { "epoch": 0.5789943060986485, "grad_norm": 1.390625, "learning_rate": 1.8283086099175045e-05, "loss": 0.6287, "step": 4220 }, { "epoch": 0.579131508540852, "grad_norm": 1.1484375, "learning_rate": 1.8282277270569442e-05, "loss": 0.4877, "step": 4221 }, { "epoch": 0.5792687109830555, "grad_norm": 1.1484375, "learning_rate": 1.8281468269391148e-05, "loss": 0.4662, "step": 4222 }, { "epoch": 0.5794059134252589, "grad_norm": 1.2109375, "learning_rate": 1.8280659095657017e-05, "loss": 0.5577, "step": 4223 }, { "epoch": 0.5795431158674624, "grad_norm": 1.234375, "learning_rate": 1.8279849749383907e-05, "loss": 0.457, "step": 4224 }, { "epoch": 0.5796803183096659, "grad_norm": 1.1328125, "learning_rate": 1.8279040230588692e-05, "loss": 0.5025, "step": 4225 }, { "epoch": 0.5798175207518694, "grad_norm": 1.2265625, "learning_rate": 1.8278230539288228e-05, "loss": 0.4877, "step": 4226 }, { "epoch": 0.5799547231940728, "grad_norm": 1.125, "learning_rate": 1.8277420675499394e-05, "loss": 0.4482, "step": 4227 }, { "epoch": 0.5800919256362763, "grad_norm": 1.2578125, "learning_rate": 1.8276610639239056e-05, "loss": 0.5604, "step": 4228 }, { "epoch": 0.5802291280784798, "grad_norm": 1.1640625, "learning_rate": 1.82758004305241e-05, "loss": 0.5034, "step": 4229 }, { "epoch": 0.5803663305206833, "grad_norm": 1.1484375, "learning_rate": 1.8274990049371405e-05, "loss": 0.4677, "step": 4230 }, { "epoch": 0.5805035329628867, "grad_norm": 1.1875, "learning_rate": 1.8274179495797855e-05, "loss": 0.5413, "step": 4231 }, { "epoch": 0.5806407354050902, "grad_norm": 1.2421875, "learning_rate": 1.827336876982034e-05, "loss": 0.5486, "step": 4232 }, { "epoch": 0.5807779378472937, "grad_norm": 1.1875, "learning_rate": 1.8272557871455757e-05, "loss": 0.5193, "step": 4233 }, { "epoch": 0.5809151402894972, "grad_norm": 1.21875, "learning_rate": 1.8271746800720996e-05, "loss": 0.5378, "step": 4234 }, { "epoch": 0.5810523427317006, "grad_norm": 1.109375, "learning_rate": 1.827093555763296e-05, "loss": 0.4507, "step": 4235 }, { "epoch": 0.5811895451739041, "grad_norm": 1.15625, "learning_rate": 1.8270124142208553e-05, "loss": 0.4431, "step": 4236 }, { "epoch": 0.5813267476161076, "grad_norm": 1.203125, "learning_rate": 1.8269312554464678e-05, "loss": 0.5494, "step": 4237 }, { "epoch": 0.5814639500583111, "grad_norm": 1.15625, "learning_rate": 1.8268500794418245e-05, "loss": 0.5393, "step": 4238 }, { "epoch": 0.5816011525005145, "grad_norm": 1.4453125, "learning_rate": 1.8267688862086178e-05, "loss": 0.5468, "step": 4239 }, { "epoch": 0.581738354942718, "grad_norm": 1.203125, "learning_rate": 1.8266876757485384e-05, "loss": 0.5177, "step": 4240 }, { "epoch": 0.5818755573849215, "grad_norm": 1.1484375, "learning_rate": 1.826606448063279e-05, "loss": 0.4897, "step": 4241 }, { "epoch": 0.582012759827125, "grad_norm": 1.1875, "learning_rate": 1.8265252031545318e-05, "loss": 0.4939, "step": 4242 }, { "epoch": 0.5821499622693284, "grad_norm": 1.3984375, "learning_rate": 1.8264439410239897e-05, "loss": 0.6036, "step": 4243 }, { "epoch": 0.5822871647115319, "grad_norm": 1.2265625, "learning_rate": 1.826362661673346e-05, "loss": 0.5513, "step": 4244 }, { "epoch": 0.5824243671537354, "grad_norm": 1.421875, "learning_rate": 1.8262813651042943e-05, "loss": 0.5988, "step": 4245 }, { "epoch": 0.5825615695959389, "grad_norm": 1.1640625, "learning_rate": 1.8262000513185285e-05, "loss": 0.4815, "step": 4246 }, { "epoch": 0.5826987720381422, "grad_norm": 1.15625, "learning_rate": 1.8261187203177425e-05, "loss": 0.5205, "step": 4247 }, { "epoch": 0.5828359744803457, "grad_norm": 1.09375, "learning_rate": 1.826037372103632e-05, "loss": 0.4776, "step": 4248 }, { "epoch": 0.5829731769225492, "grad_norm": 1.2421875, "learning_rate": 1.8259560066778906e-05, "loss": 0.5564, "step": 4249 }, { "epoch": 0.5831103793647527, "grad_norm": 1.2265625, "learning_rate": 1.8258746240422148e-05, "loss": 0.562, "step": 4250 }, { "epoch": 0.5832475818069561, "grad_norm": 1.1640625, "learning_rate": 1.8257932241982996e-05, "loss": 0.4907, "step": 4251 }, { "epoch": 0.5833847842491596, "grad_norm": 1.28125, "learning_rate": 1.8257118071478418e-05, "loss": 0.5491, "step": 4252 }, { "epoch": 0.5835219866913631, "grad_norm": 1.25, "learning_rate": 1.8256303728925368e-05, "loss": 0.5753, "step": 4253 }, { "epoch": 0.5836591891335666, "grad_norm": 1.109375, "learning_rate": 1.825548921434082e-05, "loss": 0.4913, "step": 4254 }, { "epoch": 0.58379639157577, "grad_norm": 1.21875, "learning_rate": 1.825467452774175e-05, "loss": 0.5354, "step": 4255 }, { "epoch": 0.5839335940179735, "grad_norm": 1.09375, "learning_rate": 1.8253859669145128e-05, "loss": 0.434, "step": 4256 }, { "epoch": 0.584070796460177, "grad_norm": 1.1328125, "learning_rate": 1.825304463856793e-05, "loss": 0.4869, "step": 4257 }, { "epoch": 0.5842079989023805, "grad_norm": 1.1875, "learning_rate": 1.8252229436027144e-05, "loss": 0.4682, "step": 4258 }, { "epoch": 0.5843452013445839, "grad_norm": 1.2109375, "learning_rate": 1.8251414061539753e-05, "loss": 0.5342, "step": 4259 }, { "epoch": 0.5844824037867874, "grad_norm": 1.140625, "learning_rate": 1.8250598515122747e-05, "loss": 0.4703, "step": 4260 }, { "epoch": 0.5846196062289909, "grad_norm": 1.1796875, "learning_rate": 1.824978279679312e-05, "loss": 0.524, "step": 4261 }, { "epoch": 0.5847568086711944, "grad_norm": 0.96875, "learning_rate": 1.8248966906567865e-05, "loss": 0.3807, "step": 4262 }, { "epoch": 0.5848940111133978, "grad_norm": 1.1171875, "learning_rate": 1.8248150844463987e-05, "loss": 0.5285, "step": 4263 }, { "epoch": 0.5850312135556013, "grad_norm": 1.1875, "learning_rate": 1.8247334610498485e-05, "loss": 0.4614, "step": 4264 }, { "epoch": 0.5851684159978048, "grad_norm": 1.078125, "learning_rate": 1.824651820468837e-05, "loss": 0.426, "step": 4265 }, { "epoch": 0.5853056184400083, "grad_norm": 1.2265625, "learning_rate": 1.824570162705065e-05, "loss": 0.5235, "step": 4266 }, { "epoch": 0.5854428208822117, "grad_norm": 1.1796875, "learning_rate": 1.8244884877602344e-05, "loss": 0.4487, "step": 4267 }, { "epoch": 0.5855800233244152, "grad_norm": 1.1796875, "learning_rate": 1.8244067956360466e-05, "loss": 0.4722, "step": 4268 }, { "epoch": 0.5857172257666187, "grad_norm": 1.125, "learning_rate": 1.824325086334204e-05, "loss": 0.4854, "step": 4269 }, { "epoch": 0.5858544282088222, "grad_norm": 1.3203125, "learning_rate": 1.824243359856409e-05, "loss": 0.5466, "step": 4270 }, { "epoch": 0.5859916306510256, "grad_norm": 1.2265625, "learning_rate": 1.8241616162043646e-05, "loss": 0.513, "step": 4271 }, { "epoch": 0.586128833093229, "grad_norm": 1.2265625, "learning_rate": 1.824079855379774e-05, "loss": 0.5226, "step": 4272 }, { "epoch": 0.5862660355354326, "grad_norm": 1.0625, "learning_rate": 1.8239980773843405e-05, "loss": 0.4603, "step": 4273 }, { "epoch": 0.586403237977636, "grad_norm": 1.4140625, "learning_rate": 1.8239162822197686e-05, "loss": 0.5681, "step": 4274 }, { "epoch": 0.5865404404198394, "grad_norm": 1.2109375, "learning_rate": 1.8238344698877623e-05, "loss": 0.471, "step": 4275 }, { "epoch": 0.5866776428620429, "grad_norm": 1.1640625, "learning_rate": 1.8237526403900262e-05, "loss": 0.4489, "step": 4276 }, { "epoch": 0.5868148453042464, "grad_norm": 1.28125, "learning_rate": 1.8236707937282655e-05, "loss": 0.5154, "step": 4277 }, { "epoch": 0.5869520477464499, "grad_norm": 1.1640625, "learning_rate": 1.8235889299041854e-05, "loss": 0.4085, "step": 4278 }, { "epoch": 0.5870892501886533, "grad_norm": 1.1875, "learning_rate": 1.823507048919492e-05, "loss": 0.5029, "step": 4279 }, { "epoch": 0.5872264526308568, "grad_norm": 1.1953125, "learning_rate": 1.823425150775891e-05, "loss": 0.5398, "step": 4280 }, { "epoch": 0.5873636550730603, "grad_norm": 1.046875, "learning_rate": 1.823343235475089e-05, "loss": 0.4016, "step": 4281 }, { "epoch": 0.5875008575152638, "grad_norm": 1.1796875, "learning_rate": 1.823261303018793e-05, "loss": 0.5453, "step": 4282 }, { "epoch": 0.5876380599574672, "grad_norm": 1.2265625, "learning_rate": 1.8231793534087102e-05, "loss": 0.492, "step": 4283 }, { "epoch": 0.5877752623996707, "grad_norm": 1.2421875, "learning_rate": 1.8230973866465477e-05, "loss": 0.4796, "step": 4284 }, { "epoch": 0.5879124648418742, "grad_norm": 1.3046875, "learning_rate": 1.8230154027340138e-05, "loss": 0.5878, "step": 4285 }, { "epoch": 0.5880496672840777, "grad_norm": 1.203125, "learning_rate": 1.8229334016728164e-05, "loss": 0.5106, "step": 4286 }, { "epoch": 0.5881868697262811, "grad_norm": 1.25, "learning_rate": 1.822851383464664e-05, "loss": 0.4439, "step": 4287 }, { "epoch": 0.5883240721684846, "grad_norm": 1.0859375, "learning_rate": 1.8227693481112663e-05, "loss": 0.4119, "step": 4288 }, { "epoch": 0.5884612746106881, "grad_norm": 1.2578125, "learning_rate": 1.8226872956143324e-05, "loss": 0.5597, "step": 4289 }, { "epoch": 0.5885984770528916, "grad_norm": 1.171875, "learning_rate": 1.8226052259755715e-05, "loss": 0.4843, "step": 4290 }, { "epoch": 0.588735679495095, "grad_norm": 1.140625, "learning_rate": 1.8225231391966944e-05, "loss": 0.4441, "step": 4291 }, { "epoch": 0.5888728819372985, "grad_norm": 1.1953125, "learning_rate": 1.8224410352794103e-05, "loss": 0.5595, "step": 4292 }, { "epoch": 0.589010084379502, "grad_norm": 1.328125, "learning_rate": 1.822358914225431e-05, "loss": 0.5923, "step": 4293 }, { "epoch": 0.5891472868217055, "grad_norm": 1.28125, "learning_rate": 1.822276776036467e-05, "loss": 0.5874, "step": 4294 }, { "epoch": 0.5892844892639089, "grad_norm": 1.2890625, "learning_rate": 1.8221946207142302e-05, "loss": 0.5665, "step": 4295 }, { "epoch": 0.5894216917061124, "grad_norm": 1.1875, "learning_rate": 1.8221124482604323e-05, "loss": 0.5135, "step": 4296 }, { "epoch": 0.5895588941483159, "grad_norm": 1.2421875, "learning_rate": 1.8220302586767852e-05, "loss": 0.5897, "step": 4297 }, { "epoch": 0.5896960965905194, "grad_norm": 1.203125, "learning_rate": 1.8219480519650016e-05, "loss": 0.5359, "step": 4298 }, { "epoch": 0.5898332990327227, "grad_norm": 1.234375, "learning_rate": 1.8218658281267947e-05, "loss": 0.5415, "step": 4299 }, { "epoch": 0.5899705014749262, "grad_norm": 1.28125, "learning_rate": 1.8217835871638773e-05, "loss": 0.4973, "step": 4300 }, { "epoch": 0.5901077039171297, "grad_norm": 1.25, "learning_rate": 1.8217013290779632e-05, "loss": 0.5682, "step": 4301 }, { "epoch": 0.5902449063593332, "grad_norm": 1.3515625, "learning_rate": 1.8216190538707665e-05, "loss": 0.5728, "step": 4302 }, { "epoch": 0.5903821088015366, "grad_norm": 1.234375, "learning_rate": 1.8215367615440013e-05, "loss": 0.524, "step": 4303 }, { "epoch": 0.5905193112437401, "grad_norm": 1.234375, "learning_rate": 1.821454452099382e-05, "loss": 0.5453, "step": 4304 }, { "epoch": 0.5906565136859436, "grad_norm": 1.3125, "learning_rate": 1.8213721255386247e-05, "loss": 0.5529, "step": 4305 }, { "epoch": 0.5907937161281471, "grad_norm": 1.1875, "learning_rate": 1.8212897818634438e-05, "loss": 0.5024, "step": 4306 }, { "epoch": 0.5909309185703505, "grad_norm": 1.40625, "learning_rate": 1.8212074210755547e-05, "loss": 0.5601, "step": 4307 }, { "epoch": 0.591068121012554, "grad_norm": 1.0625, "learning_rate": 1.8211250431766747e-05, "loss": 0.4211, "step": 4308 }, { "epoch": 0.5912053234547575, "grad_norm": 1.234375, "learning_rate": 1.8210426481685193e-05, "loss": 0.5376, "step": 4309 }, { "epoch": 0.591342525896961, "grad_norm": 1.1328125, "learning_rate": 1.820960236052806e-05, "loss": 0.4554, "step": 4310 }, { "epoch": 0.5914797283391644, "grad_norm": 1.2265625, "learning_rate": 1.8208778068312517e-05, "loss": 0.562, "step": 4311 }, { "epoch": 0.5916169307813679, "grad_norm": 1.265625, "learning_rate": 1.8207953605055733e-05, "loss": 0.5515, "step": 4312 }, { "epoch": 0.5917541332235714, "grad_norm": 1.125, "learning_rate": 1.82071289707749e-05, "loss": 0.5196, "step": 4313 }, { "epoch": 0.5918913356657749, "grad_norm": 1.2734375, "learning_rate": 1.8206304165487188e-05, "loss": 0.5605, "step": 4314 }, { "epoch": 0.5920285381079783, "grad_norm": 1.1796875, "learning_rate": 1.820547918920979e-05, "loss": 0.5083, "step": 4315 }, { "epoch": 0.5921657405501818, "grad_norm": 1.2578125, "learning_rate": 1.8204654041959894e-05, "loss": 0.5792, "step": 4316 }, { "epoch": 0.5923029429923853, "grad_norm": 1.0859375, "learning_rate": 1.8203828723754688e-05, "loss": 0.4183, "step": 4317 }, { "epoch": 0.5924401454345888, "grad_norm": 1.140625, "learning_rate": 1.8203003234611375e-05, "loss": 0.4246, "step": 4318 }, { "epoch": 0.5925773478767922, "grad_norm": 1.171875, "learning_rate": 1.8202177574547155e-05, "loss": 0.4798, "step": 4319 }, { "epoch": 0.5927145503189957, "grad_norm": 1.1484375, "learning_rate": 1.820135174357923e-05, "loss": 0.466, "step": 4320 }, { "epoch": 0.5928517527611992, "grad_norm": 1.0546875, "learning_rate": 1.820052574172481e-05, "loss": 0.4342, "step": 4321 }, { "epoch": 0.5929889552034027, "grad_norm": 1.1953125, "learning_rate": 1.81996995690011e-05, "loss": 0.5548, "step": 4322 }, { "epoch": 0.5931261576456061, "grad_norm": 1.1953125, "learning_rate": 1.8198873225425316e-05, "loss": 0.5061, "step": 4323 }, { "epoch": 0.5932633600878096, "grad_norm": 1.2265625, "learning_rate": 1.8198046711014682e-05, "loss": 0.5464, "step": 4324 }, { "epoch": 0.593400562530013, "grad_norm": 1.140625, "learning_rate": 1.819722002578641e-05, "loss": 0.4919, "step": 4325 }, { "epoch": 0.5935377649722166, "grad_norm": 1.2265625, "learning_rate": 1.8196393169757737e-05, "loss": 0.552, "step": 4326 }, { "epoch": 0.5936749674144199, "grad_norm": 1.125, "learning_rate": 1.819556614294588e-05, "loss": 0.4976, "step": 4327 }, { "epoch": 0.5938121698566234, "grad_norm": 1.21875, "learning_rate": 1.8194738945368076e-05, "loss": 0.5073, "step": 4328 }, { "epoch": 0.5939493722988269, "grad_norm": 1.1875, "learning_rate": 1.8193911577041567e-05, "loss": 0.524, "step": 4329 }, { "epoch": 0.5940865747410304, "grad_norm": 1.09375, "learning_rate": 1.8193084037983583e-05, "loss": 0.4485, "step": 4330 }, { "epoch": 0.5942237771832338, "grad_norm": 1.28125, "learning_rate": 1.8192256328211367e-05, "loss": 0.5358, "step": 4331 }, { "epoch": 0.5943609796254373, "grad_norm": 1.1640625, "learning_rate": 1.8191428447742175e-05, "loss": 0.4865, "step": 4332 }, { "epoch": 0.5944981820676408, "grad_norm": 1.1640625, "learning_rate": 1.819060039659325e-05, "loss": 0.504, "step": 4333 }, { "epoch": 0.5946353845098443, "grad_norm": 1.1953125, "learning_rate": 1.8189772174781844e-05, "loss": 0.5329, "step": 4334 }, { "epoch": 0.5947725869520477, "grad_norm": 1.265625, "learning_rate": 1.818894378232522e-05, "loss": 0.5851, "step": 4335 }, { "epoch": 0.5949097893942512, "grad_norm": 1.140625, "learning_rate": 1.8188115219240632e-05, "loss": 0.4941, "step": 4336 }, { "epoch": 0.5950469918364547, "grad_norm": 1.28125, "learning_rate": 1.818728648554535e-05, "loss": 0.5496, "step": 4337 }, { "epoch": 0.5951841942786582, "grad_norm": 1.21875, "learning_rate": 1.818645758125664e-05, "loss": 0.4951, "step": 4338 }, { "epoch": 0.5953213967208616, "grad_norm": 1.171875, "learning_rate": 1.818562850639177e-05, "loss": 0.4574, "step": 4339 }, { "epoch": 0.5954585991630651, "grad_norm": 1.265625, "learning_rate": 1.818479926096802e-05, "loss": 0.5648, "step": 4340 }, { "epoch": 0.5955958016052686, "grad_norm": 1.2265625, "learning_rate": 1.8183969845002666e-05, "loss": 0.5067, "step": 4341 }, { "epoch": 0.5957330040474721, "grad_norm": 1.21875, "learning_rate": 1.818314025851299e-05, "loss": 0.5491, "step": 4342 }, { "epoch": 0.5958702064896755, "grad_norm": 1.2734375, "learning_rate": 1.818231050151628e-05, "loss": 0.5652, "step": 4343 }, { "epoch": 0.596007408931879, "grad_norm": 1.15625, "learning_rate": 1.818148057402982e-05, "loss": 0.4991, "step": 4344 }, { "epoch": 0.5961446113740825, "grad_norm": 1.390625, "learning_rate": 1.8180650476070907e-05, "loss": 0.6376, "step": 4345 }, { "epoch": 0.596281813816286, "grad_norm": 1.2578125, "learning_rate": 1.8179820207656832e-05, "loss": 0.5254, "step": 4346 }, { "epoch": 0.5964190162584894, "grad_norm": 1.2421875, "learning_rate": 1.8178989768804904e-05, "loss": 0.5255, "step": 4347 }, { "epoch": 0.5965562187006929, "grad_norm": 1.234375, "learning_rate": 1.817815915953242e-05, "loss": 0.5169, "step": 4348 }, { "epoch": 0.5966934211428964, "grad_norm": 1.078125, "learning_rate": 1.817732837985669e-05, "loss": 0.4795, "step": 4349 }, { "epoch": 0.5968306235850999, "grad_norm": 1.25, "learning_rate": 1.817649742979502e-05, "loss": 0.4986, "step": 4350 }, { "epoch": 0.5969678260273032, "grad_norm": 1.171875, "learning_rate": 1.8175666309364723e-05, "loss": 0.4712, "step": 4351 }, { "epoch": 0.5971050284695067, "grad_norm": 1.1171875, "learning_rate": 1.8174835018583127e-05, "loss": 0.479, "step": 4352 }, { "epoch": 0.5972422309117102, "grad_norm": 1.1875, "learning_rate": 1.8174003557467542e-05, "loss": 0.5187, "step": 4353 }, { "epoch": 0.5973794333539137, "grad_norm": 1.171875, "learning_rate": 1.8173171926035295e-05, "loss": 0.5267, "step": 4354 }, { "epoch": 0.5975166357961171, "grad_norm": 1.2109375, "learning_rate": 1.817234012430372e-05, "loss": 0.5007, "step": 4355 }, { "epoch": 0.5976538382383206, "grad_norm": 1.1328125, "learning_rate": 1.817150815229014e-05, "loss": 0.503, "step": 4356 }, { "epoch": 0.5977910406805241, "grad_norm": 1.171875, "learning_rate": 1.81706760100119e-05, "loss": 0.4772, "step": 4357 }, { "epoch": 0.5979282431227276, "grad_norm": 1.140625, "learning_rate": 1.816984369748633e-05, "loss": 0.428, "step": 4358 }, { "epoch": 0.598065445564931, "grad_norm": 1.296875, "learning_rate": 1.8169011214730775e-05, "loss": 0.5224, "step": 4359 }, { "epoch": 0.5982026480071345, "grad_norm": 1.21875, "learning_rate": 1.8168178561762588e-05, "loss": 0.4864, "step": 4360 }, { "epoch": 0.598339850449338, "grad_norm": 1.1953125, "learning_rate": 1.8167345738599107e-05, "loss": 0.5454, "step": 4361 }, { "epoch": 0.5984770528915415, "grad_norm": 1.265625, "learning_rate": 1.8166512745257692e-05, "loss": 0.5051, "step": 4362 }, { "epoch": 0.5986142553337449, "grad_norm": 1.3203125, "learning_rate": 1.81656795817557e-05, "loss": 0.5457, "step": 4363 }, { "epoch": 0.5987514577759484, "grad_norm": 1.109375, "learning_rate": 1.8164846248110487e-05, "loss": 0.4499, "step": 4364 }, { "epoch": 0.5988886602181519, "grad_norm": 1.234375, "learning_rate": 1.816401274433942e-05, "loss": 0.5155, "step": 4365 }, { "epoch": 0.5990258626603554, "grad_norm": 1.1875, "learning_rate": 1.8163179070459865e-05, "loss": 0.5292, "step": 4366 }, { "epoch": 0.5991630651025588, "grad_norm": 1.2734375, "learning_rate": 1.816234522648919e-05, "loss": 0.5471, "step": 4367 }, { "epoch": 0.5993002675447623, "grad_norm": 1.2109375, "learning_rate": 1.8161511212444776e-05, "loss": 0.4901, "step": 4368 }, { "epoch": 0.5994374699869658, "grad_norm": 1.171875, "learning_rate": 1.8160677028343997e-05, "loss": 0.5026, "step": 4369 }, { "epoch": 0.5995746724291693, "grad_norm": 1.15625, "learning_rate": 1.8159842674204233e-05, "loss": 0.4632, "step": 4370 }, { "epoch": 0.5997118748713727, "grad_norm": 1.328125, "learning_rate": 1.815900815004287e-05, "loss": 0.5568, "step": 4371 }, { "epoch": 0.5998490773135762, "grad_norm": 1.1796875, "learning_rate": 1.8158173455877297e-05, "loss": 0.4767, "step": 4372 }, { "epoch": 0.5999862797557797, "grad_norm": 1.28125, "learning_rate": 1.8157338591724908e-05, "loss": 0.5867, "step": 4373 }, { "epoch": 0.6001234821979832, "grad_norm": 1.171875, "learning_rate": 1.8156503557603093e-05, "loss": 0.5013, "step": 4374 }, { "epoch": 0.6002606846401866, "grad_norm": 1.1484375, "learning_rate": 1.8155668353529257e-05, "loss": 0.516, "step": 4375 }, { "epoch": 0.6003978870823901, "grad_norm": 1.15625, "learning_rate": 1.8154832979520795e-05, "loss": 0.5298, "step": 4376 }, { "epoch": 0.6005350895245936, "grad_norm": 1.3671875, "learning_rate": 1.8153997435595124e-05, "loss": 0.5654, "step": 4377 }, { "epoch": 0.6006722919667971, "grad_norm": 1.2890625, "learning_rate": 1.8153161721769647e-05, "loss": 0.5826, "step": 4378 }, { "epoch": 0.6008094944090004, "grad_norm": 1.2265625, "learning_rate": 1.8152325838061775e-05, "loss": 0.5828, "step": 4379 }, { "epoch": 0.6009466968512039, "grad_norm": 1.1953125, "learning_rate": 1.815148978448893e-05, "loss": 0.5208, "step": 4380 }, { "epoch": 0.6010838992934074, "grad_norm": 1.3203125, "learning_rate": 1.8150653561068526e-05, "loss": 0.5348, "step": 4381 }, { "epoch": 0.6012211017356109, "grad_norm": 1.0234375, "learning_rate": 1.8149817167818e-05, "loss": 0.4081, "step": 4382 }, { "epoch": 0.6013583041778143, "grad_norm": 1.203125, "learning_rate": 1.8148980604754765e-05, "loss": 0.5089, "step": 4383 }, { "epoch": 0.6014955066200178, "grad_norm": 1.1328125, "learning_rate": 1.8148143871896255e-05, "loss": 0.4992, "step": 4384 }, { "epoch": 0.6016327090622213, "grad_norm": 1.046875, "learning_rate": 1.814730696925991e-05, "loss": 0.4359, "step": 4385 }, { "epoch": 0.6017699115044248, "grad_norm": 1.3359375, "learning_rate": 1.814646989686317e-05, "loss": 0.5299, "step": 4386 }, { "epoch": 0.6019071139466282, "grad_norm": 1.1484375, "learning_rate": 1.8145632654723463e-05, "loss": 0.4701, "step": 4387 }, { "epoch": 0.6020443163888317, "grad_norm": 1.2890625, "learning_rate": 1.8144795242858247e-05, "loss": 0.592, "step": 4388 }, { "epoch": 0.6021815188310352, "grad_norm": 1.2421875, "learning_rate": 1.8143957661284965e-05, "loss": 0.5572, "step": 4389 }, { "epoch": 0.6023187212732387, "grad_norm": 1.078125, "learning_rate": 1.8143119910021073e-05, "loss": 0.4735, "step": 4390 }, { "epoch": 0.6024559237154421, "grad_norm": 1.1875, "learning_rate": 1.814228198908402e-05, "loss": 0.5056, "step": 4391 }, { "epoch": 0.6025931261576456, "grad_norm": 1.1875, "learning_rate": 1.8141443898491273e-05, "loss": 0.4795, "step": 4392 }, { "epoch": 0.6027303285998491, "grad_norm": 1.25, "learning_rate": 1.814060563826029e-05, "loss": 0.6072, "step": 4393 }, { "epoch": 0.6028675310420526, "grad_norm": 1.15625, "learning_rate": 1.8139767208408535e-05, "loss": 0.5466, "step": 4394 }, { "epoch": 0.603004733484256, "grad_norm": 1.2109375, "learning_rate": 1.8138928608953487e-05, "loss": 0.5215, "step": 4395 }, { "epoch": 0.6031419359264595, "grad_norm": 1.171875, "learning_rate": 1.813808983991261e-05, "loss": 0.5047, "step": 4396 }, { "epoch": 0.603279138368663, "grad_norm": 1.2109375, "learning_rate": 1.8137250901303388e-05, "loss": 0.5252, "step": 4397 }, { "epoch": 0.6034163408108665, "grad_norm": 1.1171875, "learning_rate": 1.8136411793143296e-05, "loss": 0.4542, "step": 4398 }, { "epoch": 0.6035535432530699, "grad_norm": 1.1796875, "learning_rate": 1.813557251544982e-05, "loss": 0.5124, "step": 4399 }, { "epoch": 0.6036907456952734, "grad_norm": 1.2421875, "learning_rate": 1.8134733068240445e-05, "loss": 0.4899, "step": 4400 }, { "epoch": 0.6038279481374769, "grad_norm": 1.171875, "learning_rate": 1.8133893451532668e-05, "loss": 0.5266, "step": 4401 }, { "epoch": 0.6039651505796804, "grad_norm": 1.3203125, "learning_rate": 1.813305366534398e-05, "loss": 0.5622, "step": 4402 }, { "epoch": 0.6041023530218838, "grad_norm": 1.203125, "learning_rate": 1.813221370969188e-05, "loss": 0.4817, "step": 4403 }, { "epoch": 0.6042395554640873, "grad_norm": 1.2109375, "learning_rate": 1.8131373584593866e-05, "loss": 0.5062, "step": 4404 }, { "epoch": 0.6043767579062908, "grad_norm": 1.1953125, "learning_rate": 1.8130533290067448e-05, "loss": 0.5296, "step": 4405 }, { "epoch": 0.6045139603484943, "grad_norm": 1.2421875, "learning_rate": 1.8129692826130134e-05, "loss": 0.5214, "step": 4406 }, { "epoch": 0.6046511627906976, "grad_norm": 1.328125, "learning_rate": 1.8128852192799434e-05, "loss": 0.6014, "step": 4407 }, { "epoch": 0.6047883652329011, "grad_norm": 1.15625, "learning_rate": 1.8128011390092862e-05, "loss": 0.4946, "step": 4408 }, { "epoch": 0.6049255676751046, "grad_norm": 1.1640625, "learning_rate": 1.8127170418027944e-05, "loss": 0.5233, "step": 4409 }, { "epoch": 0.6050627701173081, "grad_norm": 1.1875, "learning_rate": 1.8126329276622196e-05, "loss": 0.5544, "step": 4410 }, { "epoch": 0.6051999725595115, "grad_norm": 1.234375, "learning_rate": 1.812548796589315e-05, "loss": 0.5388, "step": 4411 }, { "epoch": 0.605337175001715, "grad_norm": 1.1796875, "learning_rate": 1.812464648585833e-05, "loss": 0.4989, "step": 4412 }, { "epoch": 0.6054743774439185, "grad_norm": 1.1953125, "learning_rate": 1.8123804836535273e-05, "loss": 0.5123, "step": 4413 }, { "epoch": 0.605611579886122, "grad_norm": 1.234375, "learning_rate": 1.812296301794152e-05, "loss": 0.5702, "step": 4414 }, { "epoch": 0.6057487823283254, "grad_norm": 1.2265625, "learning_rate": 1.81221210300946e-05, "loss": 0.537, "step": 4415 }, { "epoch": 0.6058859847705289, "grad_norm": 1.2578125, "learning_rate": 1.8121278873012066e-05, "loss": 0.5845, "step": 4416 }, { "epoch": 0.6060231872127324, "grad_norm": 1.1875, "learning_rate": 1.8120436546711465e-05, "loss": 0.4853, "step": 4417 }, { "epoch": 0.6061603896549359, "grad_norm": 1.2578125, "learning_rate": 1.811959405121035e-05, "loss": 0.5411, "step": 4418 }, { "epoch": 0.6062975920971393, "grad_norm": 1.2578125, "learning_rate": 1.8118751386526266e-05, "loss": 0.5372, "step": 4419 }, { "epoch": 0.6064347945393428, "grad_norm": 1.203125, "learning_rate": 1.811790855267678e-05, "loss": 0.5612, "step": 4420 }, { "epoch": 0.6065719969815463, "grad_norm": 1.1640625, "learning_rate": 1.8117065549679447e-05, "loss": 0.4853, "step": 4421 }, { "epoch": 0.6067091994237498, "grad_norm": 1.3125, "learning_rate": 1.811622237755184e-05, "loss": 0.6122, "step": 4422 }, { "epoch": 0.6068464018659532, "grad_norm": 1.15625, "learning_rate": 1.8115379036311522e-05, "loss": 0.4997, "step": 4423 }, { "epoch": 0.6069836043081567, "grad_norm": 1.203125, "learning_rate": 1.8114535525976066e-05, "loss": 0.5289, "step": 4424 }, { "epoch": 0.6071208067503602, "grad_norm": 1.0703125, "learning_rate": 1.8113691846563045e-05, "loss": 0.4469, "step": 4425 }, { "epoch": 0.6072580091925637, "grad_norm": 1.109375, "learning_rate": 1.8112847998090048e-05, "loss": 0.4957, "step": 4426 }, { "epoch": 0.6073952116347671, "grad_norm": 1.2421875, "learning_rate": 1.8112003980574647e-05, "loss": 0.5242, "step": 4427 }, { "epoch": 0.6075324140769706, "grad_norm": 1.2734375, "learning_rate": 1.8111159794034436e-05, "loss": 0.6024, "step": 4428 }, { "epoch": 0.6076696165191741, "grad_norm": 1.28125, "learning_rate": 1.8110315438486998e-05, "loss": 0.568, "step": 4429 }, { "epoch": 0.6078068189613776, "grad_norm": 1.3046875, "learning_rate": 1.810947091394993e-05, "loss": 0.6571, "step": 4430 }, { "epoch": 0.607944021403581, "grad_norm": 1.3203125, "learning_rate": 1.8108626220440832e-05, "loss": 0.5866, "step": 4431 }, { "epoch": 0.6080812238457844, "grad_norm": 1.328125, "learning_rate": 1.81077813579773e-05, "loss": 0.5143, "step": 4432 }, { "epoch": 0.608218426287988, "grad_norm": 1.203125, "learning_rate": 1.8106936326576937e-05, "loss": 0.4858, "step": 4433 }, { "epoch": 0.6083556287301914, "grad_norm": 1.296875, "learning_rate": 1.810609112625735e-05, "loss": 0.6624, "step": 4434 }, { "epoch": 0.6084928311723948, "grad_norm": 1.1640625, "learning_rate": 1.8105245757036155e-05, "loss": 0.5338, "step": 4435 }, { "epoch": 0.6086300336145983, "grad_norm": 1.296875, "learning_rate": 1.810440021893096e-05, "loss": 0.509, "step": 4436 }, { "epoch": 0.6087672360568018, "grad_norm": 1.171875, "learning_rate": 1.810355451195939e-05, "loss": 0.4967, "step": 4437 }, { "epoch": 0.6089044384990053, "grad_norm": 1.21875, "learning_rate": 1.8102708636139064e-05, "loss": 0.489, "step": 4438 }, { "epoch": 0.6090416409412087, "grad_norm": 1.1875, "learning_rate": 1.8101862591487603e-05, "loss": 0.5382, "step": 4439 }, { "epoch": 0.6091788433834122, "grad_norm": 1.234375, "learning_rate": 1.810101637802264e-05, "loss": 0.5198, "step": 4440 }, { "epoch": 0.6093160458256157, "grad_norm": 1.2109375, "learning_rate": 1.8100169995761808e-05, "loss": 0.5149, "step": 4441 }, { "epoch": 0.6094532482678192, "grad_norm": 1.296875, "learning_rate": 1.8099323444722734e-05, "loss": 0.5856, "step": 4442 }, { "epoch": 0.6095904507100226, "grad_norm": 1.265625, "learning_rate": 1.8098476724923065e-05, "loss": 0.521, "step": 4443 }, { "epoch": 0.6097276531522261, "grad_norm": 1.2578125, "learning_rate": 1.8097629836380444e-05, "loss": 0.558, "step": 4444 }, { "epoch": 0.6098648555944296, "grad_norm": 1.1796875, "learning_rate": 1.8096782779112513e-05, "loss": 0.4785, "step": 4445 }, { "epoch": 0.6100020580366331, "grad_norm": 1.28125, "learning_rate": 1.8095935553136926e-05, "loss": 0.5074, "step": 4446 }, { "epoch": 0.6101392604788365, "grad_norm": 1.2734375, "learning_rate": 1.809508815847133e-05, "loss": 0.5351, "step": 4447 }, { "epoch": 0.61027646292104, "grad_norm": 1.28125, "learning_rate": 1.8094240595133382e-05, "loss": 0.5392, "step": 4448 }, { "epoch": 0.6104136653632435, "grad_norm": 1.2578125, "learning_rate": 1.8093392863140753e-05, "loss": 0.5473, "step": 4449 }, { "epoch": 0.610550867805447, "grad_norm": 1.1796875, "learning_rate": 1.8092544962511094e-05, "loss": 0.4884, "step": 4450 }, { "epoch": 0.6106880702476504, "grad_norm": 1.25, "learning_rate": 1.809169689326208e-05, "loss": 0.5717, "step": 4451 }, { "epoch": 0.6108252726898539, "grad_norm": 1.171875, "learning_rate": 1.8090848655411375e-05, "loss": 0.4818, "step": 4452 }, { "epoch": 0.6109624751320574, "grad_norm": 1.28125, "learning_rate": 1.809000024897666e-05, "loss": 0.5959, "step": 4453 }, { "epoch": 0.6110996775742609, "grad_norm": 1.1796875, "learning_rate": 1.8089151673975607e-05, "loss": 0.5438, "step": 4454 }, { "epoch": 0.6112368800164643, "grad_norm": 1.2890625, "learning_rate": 1.8088302930425902e-05, "loss": 0.5588, "step": 4455 }, { "epoch": 0.6113740824586678, "grad_norm": 1.1796875, "learning_rate": 1.808745401834523e-05, "loss": 0.5145, "step": 4456 }, { "epoch": 0.6115112849008713, "grad_norm": 1.265625, "learning_rate": 1.8086604937751272e-05, "loss": 0.5604, "step": 4457 }, { "epoch": 0.6116484873430748, "grad_norm": 1.21875, "learning_rate": 1.808575568866173e-05, "loss": 0.5318, "step": 4458 }, { "epoch": 0.6117856897852781, "grad_norm": 1.1015625, "learning_rate": 1.808490627109429e-05, "loss": 0.4568, "step": 4459 }, { "epoch": 0.6119228922274816, "grad_norm": 1.09375, "learning_rate": 1.8084056685066656e-05, "loss": 0.4663, "step": 4460 }, { "epoch": 0.6120600946696851, "grad_norm": 1.1640625, "learning_rate": 1.8083206930596532e-05, "loss": 0.4693, "step": 4461 }, { "epoch": 0.6121972971118886, "grad_norm": 1.2109375, "learning_rate": 1.8082357007701614e-05, "loss": 0.5441, "step": 4462 }, { "epoch": 0.612334499554092, "grad_norm": 1.140625, "learning_rate": 1.8081506916399622e-05, "loss": 0.4826, "step": 4463 }, { "epoch": 0.6124717019962955, "grad_norm": 1.2734375, "learning_rate": 1.8080656656708267e-05, "loss": 0.555, "step": 4464 }, { "epoch": 0.612608904438499, "grad_norm": 1.3203125, "learning_rate": 1.807980622864526e-05, "loss": 0.6193, "step": 4465 }, { "epoch": 0.6127461068807025, "grad_norm": 1.1796875, "learning_rate": 1.8078955632228327e-05, "loss": 0.5015, "step": 4466 }, { "epoch": 0.6128833093229059, "grad_norm": 1.1796875, "learning_rate": 1.807810486747519e-05, "loss": 0.5452, "step": 4467 }, { "epoch": 0.6130205117651094, "grad_norm": 1.2109375, "learning_rate": 1.8077253934403568e-05, "loss": 0.4799, "step": 4468 }, { "epoch": 0.6131577142073129, "grad_norm": 1.4296875, "learning_rate": 1.8076402833031202e-05, "loss": 0.6153, "step": 4469 }, { "epoch": 0.6132949166495164, "grad_norm": 1.2578125, "learning_rate": 1.8075551563375823e-05, "loss": 0.5065, "step": 4470 }, { "epoch": 0.6134321190917198, "grad_norm": 1.203125, "learning_rate": 1.8074700125455163e-05, "loss": 0.507, "step": 4471 }, { "epoch": 0.6135693215339233, "grad_norm": 1.140625, "learning_rate": 1.807384851928697e-05, "loss": 0.4976, "step": 4472 }, { "epoch": 0.6137065239761268, "grad_norm": 1.2578125, "learning_rate": 1.8072996744888984e-05, "loss": 0.5659, "step": 4473 }, { "epoch": 0.6138437264183303, "grad_norm": 1.2734375, "learning_rate": 1.8072144802278955e-05, "loss": 0.5492, "step": 4474 }, { "epoch": 0.6139809288605337, "grad_norm": 1.1640625, "learning_rate": 1.8071292691474634e-05, "loss": 0.5397, "step": 4475 }, { "epoch": 0.6141181313027372, "grad_norm": 1.171875, "learning_rate": 1.8070440412493775e-05, "loss": 0.5059, "step": 4476 }, { "epoch": 0.6142553337449407, "grad_norm": 1.1796875, "learning_rate": 1.8069587965354138e-05, "loss": 0.5206, "step": 4477 }, { "epoch": 0.6143925361871442, "grad_norm": 1.1171875, "learning_rate": 1.806873535007348e-05, "loss": 0.4702, "step": 4478 }, { "epoch": 0.6145297386293476, "grad_norm": 1.2265625, "learning_rate": 1.8067882566669573e-05, "loss": 0.5249, "step": 4479 }, { "epoch": 0.6146669410715511, "grad_norm": 1.171875, "learning_rate": 1.8067029615160186e-05, "loss": 0.528, "step": 4480 }, { "epoch": 0.6148041435137546, "grad_norm": 1.125, "learning_rate": 1.8066176495563085e-05, "loss": 0.4171, "step": 4481 }, { "epoch": 0.6149413459559581, "grad_norm": 1.1171875, "learning_rate": 1.806532320789605e-05, "loss": 0.4554, "step": 4482 }, { "epoch": 0.6150785483981615, "grad_norm": 1.203125, "learning_rate": 1.806446975217686e-05, "loss": 0.5046, "step": 4483 }, { "epoch": 0.615215750840365, "grad_norm": 1.2421875, "learning_rate": 1.8063616128423298e-05, "loss": 0.5495, "step": 4484 }, { "epoch": 0.6153529532825684, "grad_norm": 1.171875, "learning_rate": 1.806276233665315e-05, "loss": 0.5653, "step": 4485 }, { "epoch": 0.615490155724772, "grad_norm": 1.203125, "learning_rate": 1.806190837688421e-05, "loss": 0.5246, "step": 4486 }, { "epoch": 0.6156273581669753, "grad_norm": 1.375, "learning_rate": 1.806105424913426e-05, "loss": 0.577, "step": 4487 }, { "epoch": 0.6157645606091788, "grad_norm": 1.1875, "learning_rate": 1.8060199953421113e-05, "loss": 0.5119, "step": 4488 }, { "epoch": 0.6159017630513823, "grad_norm": 1.671875, "learning_rate": 1.8059345489762553e-05, "loss": 0.5437, "step": 4489 }, { "epoch": 0.6160389654935858, "grad_norm": 1.2265625, "learning_rate": 1.8058490858176397e-05, "loss": 0.5181, "step": 4490 }, { "epoch": 0.6161761679357892, "grad_norm": 1.1484375, "learning_rate": 1.8057636058680447e-05, "loss": 0.4722, "step": 4491 }, { "epoch": 0.6163133703779927, "grad_norm": 1.15625, "learning_rate": 1.8056781091292514e-05, "loss": 0.5582, "step": 4492 }, { "epoch": 0.6164505728201962, "grad_norm": 1.2265625, "learning_rate": 1.805592595603041e-05, "loss": 0.607, "step": 4493 }, { "epoch": 0.6165877752623997, "grad_norm": 1.1015625, "learning_rate": 1.805507065291196e-05, "loss": 0.4614, "step": 4494 }, { "epoch": 0.6167249777046031, "grad_norm": 1.34375, "learning_rate": 1.805421518195498e-05, "loss": 0.5892, "step": 4495 }, { "epoch": 0.6168621801468066, "grad_norm": 1.3359375, "learning_rate": 1.8053359543177296e-05, "loss": 0.6133, "step": 4496 }, { "epoch": 0.6169993825890101, "grad_norm": 1.3203125, "learning_rate": 1.8052503736596736e-05, "loss": 0.6404, "step": 4497 }, { "epoch": 0.6171365850312136, "grad_norm": 1.2734375, "learning_rate": 1.805164776223113e-05, "loss": 0.4723, "step": 4498 }, { "epoch": 0.617273787473417, "grad_norm": 1.28125, "learning_rate": 1.805079162009832e-05, "loss": 0.5158, "step": 4499 }, { "epoch": 0.6174109899156205, "grad_norm": 1.234375, "learning_rate": 1.804993531021614e-05, "loss": 0.5601, "step": 4500 }, { "epoch": 0.617548192357824, "grad_norm": 1.2109375, "learning_rate": 1.804907883260243e-05, "loss": 0.5033, "step": 4501 }, { "epoch": 0.6176853948000275, "grad_norm": 1.125, "learning_rate": 1.804822218727504e-05, "loss": 0.4946, "step": 4502 }, { "epoch": 0.6178225972422309, "grad_norm": 1.3125, "learning_rate": 1.8047365374251818e-05, "loss": 0.5389, "step": 4503 }, { "epoch": 0.6179597996844344, "grad_norm": 1.1640625, "learning_rate": 1.8046508393550622e-05, "loss": 0.4993, "step": 4504 }, { "epoch": 0.6180970021266379, "grad_norm": 1.1171875, "learning_rate": 1.8045651245189297e-05, "loss": 0.5145, "step": 4505 }, { "epoch": 0.6182342045688414, "grad_norm": 1.15625, "learning_rate": 1.8044793929185712e-05, "loss": 0.4979, "step": 4506 }, { "epoch": 0.6183714070110448, "grad_norm": 1.234375, "learning_rate": 1.804393644555773e-05, "loss": 0.5409, "step": 4507 }, { "epoch": 0.6185086094532483, "grad_norm": 1.265625, "learning_rate": 1.8043078794323215e-05, "loss": 0.5581, "step": 4508 }, { "epoch": 0.6186458118954518, "grad_norm": 1.2890625, "learning_rate": 1.8042220975500035e-05, "loss": 0.582, "step": 4509 }, { "epoch": 0.6187830143376553, "grad_norm": 1.234375, "learning_rate": 1.804136298910607e-05, "loss": 0.5196, "step": 4510 }, { "epoch": 0.6189202167798586, "grad_norm": 1.234375, "learning_rate": 1.8040504835159197e-05, "loss": 0.617, "step": 4511 }, { "epoch": 0.6190574192220621, "grad_norm": 1.2265625, "learning_rate": 1.8039646513677288e-05, "loss": 0.4927, "step": 4512 }, { "epoch": 0.6191946216642656, "grad_norm": 1.3359375, "learning_rate": 1.8038788024678235e-05, "loss": 0.6359, "step": 4513 }, { "epoch": 0.6193318241064691, "grad_norm": 1.15625, "learning_rate": 1.8037929368179928e-05, "loss": 0.4894, "step": 4514 }, { "epoch": 0.6194690265486725, "grad_norm": 1.1796875, "learning_rate": 1.803707054420025e-05, "loss": 0.443, "step": 4515 }, { "epoch": 0.619606228990876, "grad_norm": 1.09375, "learning_rate": 1.8036211552757098e-05, "loss": 0.4205, "step": 4516 }, { "epoch": 0.6197434314330795, "grad_norm": 0.9921875, "learning_rate": 1.8035352393868378e-05, "loss": 0.4045, "step": 4517 }, { "epoch": 0.619880633875283, "grad_norm": 1.25, "learning_rate": 1.8034493067551985e-05, "loss": 0.5501, "step": 4518 }, { "epoch": 0.6200178363174864, "grad_norm": 1.3125, "learning_rate": 1.8033633573825828e-05, "loss": 0.54, "step": 4519 }, { "epoch": 0.6201550387596899, "grad_norm": 1.1640625, "learning_rate": 1.8032773912707807e-05, "loss": 0.5545, "step": 4520 }, { "epoch": 0.6202922412018934, "grad_norm": 1.2265625, "learning_rate": 1.8031914084215845e-05, "loss": 0.5261, "step": 4521 }, { "epoch": 0.6204294436440969, "grad_norm": 1.1796875, "learning_rate": 1.8031054088367852e-05, "loss": 0.5329, "step": 4522 }, { "epoch": 0.6205666460863003, "grad_norm": 1.21875, "learning_rate": 1.803019392518175e-05, "loss": 0.5203, "step": 4523 }, { "epoch": 0.6207038485285038, "grad_norm": 1.2421875, "learning_rate": 1.8029333594675456e-05, "loss": 0.5443, "step": 4524 }, { "epoch": 0.6208410509707073, "grad_norm": 1.1484375, "learning_rate": 1.8028473096866906e-05, "loss": 0.4917, "step": 4525 }, { "epoch": 0.6209782534129108, "grad_norm": 1.203125, "learning_rate": 1.8027612431774018e-05, "loss": 0.4875, "step": 4526 }, { "epoch": 0.6211154558551142, "grad_norm": 1.1953125, "learning_rate": 1.8026751599414735e-05, "loss": 0.5279, "step": 4527 }, { "epoch": 0.6212526582973177, "grad_norm": 1.1484375, "learning_rate": 1.802589059980699e-05, "loss": 0.4664, "step": 4528 }, { "epoch": 0.6213898607395212, "grad_norm": 1.265625, "learning_rate": 1.8025029432968718e-05, "loss": 0.5692, "step": 4529 }, { "epoch": 0.6215270631817247, "grad_norm": 1.2421875, "learning_rate": 1.8024168098917875e-05, "loss": 0.5834, "step": 4530 }, { "epoch": 0.6216642656239281, "grad_norm": 1.2109375, "learning_rate": 1.8023306597672398e-05, "loss": 0.5114, "step": 4531 }, { "epoch": 0.6218014680661316, "grad_norm": 1.1328125, "learning_rate": 1.8022444929250237e-05, "loss": 0.4697, "step": 4532 }, { "epoch": 0.6219386705083351, "grad_norm": 1.265625, "learning_rate": 1.802158309366935e-05, "loss": 0.5916, "step": 4533 }, { "epoch": 0.6220758729505386, "grad_norm": 1.21875, "learning_rate": 1.8020721090947692e-05, "loss": 0.527, "step": 4534 }, { "epoch": 0.622213075392742, "grad_norm": 1.296875, "learning_rate": 1.801985892110323e-05, "loss": 0.5382, "step": 4535 }, { "epoch": 0.6223502778349455, "grad_norm": 1.265625, "learning_rate": 1.801899658415392e-05, "loss": 0.6017, "step": 4536 }, { "epoch": 0.622487480277149, "grad_norm": 1.2421875, "learning_rate": 1.801813408011774e-05, "loss": 0.5597, "step": 4537 }, { "epoch": 0.6226246827193525, "grad_norm": 1.25, "learning_rate": 1.8017271409012652e-05, "loss": 0.5443, "step": 4538 }, { "epoch": 0.6227618851615558, "grad_norm": 1.1953125, "learning_rate": 1.8016408570856633e-05, "loss": 0.4598, "step": 4539 }, { "epoch": 0.6228990876037593, "grad_norm": 1.109375, "learning_rate": 1.8015545565667668e-05, "loss": 0.4808, "step": 4540 }, { "epoch": 0.6230362900459628, "grad_norm": 1.2265625, "learning_rate": 1.801468239346373e-05, "loss": 0.5722, "step": 4541 }, { "epoch": 0.6231734924881663, "grad_norm": 1.15625, "learning_rate": 1.801381905426281e-05, "loss": 0.4413, "step": 4542 }, { "epoch": 0.6233106949303697, "grad_norm": 1.1484375, "learning_rate": 1.8012955548082893e-05, "loss": 0.4927, "step": 4543 }, { "epoch": 0.6234478973725732, "grad_norm": 1.21875, "learning_rate": 1.8012091874941977e-05, "loss": 0.5598, "step": 4544 }, { "epoch": 0.6235850998147767, "grad_norm": 1.25, "learning_rate": 1.801122803485805e-05, "loss": 0.5236, "step": 4545 }, { "epoch": 0.6237223022569802, "grad_norm": 1.2734375, "learning_rate": 1.801036402784912e-05, "loss": 0.5187, "step": 4546 }, { "epoch": 0.6238595046991836, "grad_norm": 1.3203125, "learning_rate": 1.800949985393318e-05, "loss": 0.5694, "step": 4547 }, { "epoch": 0.6239967071413871, "grad_norm": 1.234375, "learning_rate": 1.8008635513128248e-05, "loss": 0.4924, "step": 4548 }, { "epoch": 0.6241339095835906, "grad_norm": 1.1953125, "learning_rate": 1.8007771005452325e-05, "loss": 0.5824, "step": 4549 }, { "epoch": 0.6242711120257941, "grad_norm": 1.3359375, "learning_rate": 1.8006906330923425e-05, "loss": 0.6116, "step": 4550 }, { "epoch": 0.6244083144679975, "grad_norm": 1.0234375, "learning_rate": 1.8006041489559567e-05, "loss": 0.3915, "step": 4551 }, { "epoch": 0.624545516910201, "grad_norm": 1.1171875, "learning_rate": 1.800517648137877e-05, "loss": 0.4922, "step": 4552 }, { "epoch": 0.6246827193524045, "grad_norm": 1.2109375, "learning_rate": 1.800431130639906e-05, "loss": 0.5321, "step": 4553 }, { "epoch": 0.624819921794608, "grad_norm": 1.171875, "learning_rate": 1.8003445964638462e-05, "loss": 0.4812, "step": 4554 }, { "epoch": 0.6249571242368114, "grad_norm": 1.109375, "learning_rate": 1.8002580456115006e-05, "loss": 0.459, "step": 4555 }, { "epoch": 0.6250943266790149, "grad_norm": 1.1640625, "learning_rate": 1.8001714780846727e-05, "loss": 0.5492, "step": 4556 }, { "epoch": 0.6252315291212184, "grad_norm": 1.1015625, "learning_rate": 1.800084893885166e-05, "loss": 0.4797, "step": 4557 }, { "epoch": 0.6253687315634219, "grad_norm": 1.3671875, "learning_rate": 1.7999982930147853e-05, "loss": 0.627, "step": 4558 }, { "epoch": 0.6255059340056253, "grad_norm": 1.21875, "learning_rate": 1.799911675475334e-05, "loss": 0.5231, "step": 4559 }, { "epoch": 0.6256431364478288, "grad_norm": 1.09375, "learning_rate": 1.799825041268618e-05, "loss": 0.4561, "step": 4560 }, { "epoch": 0.6257803388900323, "grad_norm": 1.1875, "learning_rate": 1.7997383903964422e-05, "loss": 0.4555, "step": 4561 }, { "epoch": 0.6259175413322358, "grad_norm": 1.21875, "learning_rate": 1.7996517228606115e-05, "loss": 0.4968, "step": 4562 }, { "epoch": 0.6260547437744391, "grad_norm": 1.1328125, "learning_rate": 1.7995650386629318e-05, "loss": 0.454, "step": 4563 }, { "epoch": 0.6261919462166426, "grad_norm": 1.1875, "learning_rate": 1.7994783378052098e-05, "loss": 0.4674, "step": 4564 }, { "epoch": 0.6263291486588461, "grad_norm": 1.1171875, "learning_rate": 1.7993916202892517e-05, "loss": 0.4813, "step": 4565 }, { "epoch": 0.6264663511010496, "grad_norm": 1.21875, "learning_rate": 1.7993048861168646e-05, "loss": 0.5274, "step": 4566 }, { "epoch": 0.626603553543253, "grad_norm": 1.1875, "learning_rate": 1.7992181352898556e-05, "loss": 0.5106, "step": 4567 }, { "epoch": 0.6267407559854565, "grad_norm": 1.15625, "learning_rate": 1.799131367810032e-05, "loss": 0.5034, "step": 4568 }, { "epoch": 0.62687795842766, "grad_norm": 1.1484375, "learning_rate": 1.7990445836792024e-05, "loss": 0.5111, "step": 4569 }, { "epoch": 0.6270151608698635, "grad_norm": 1.2421875, "learning_rate": 1.7989577828991744e-05, "loss": 0.6093, "step": 4570 }, { "epoch": 0.6271523633120669, "grad_norm": 1.1015625, "learning_rate": 1.7988709654717563e-05, "loss": 0.4343, "step": 4571 }, { "epoch": 0.6272895657542704, "grad_norm": 1.28125, "learning_rate": 1.7987841313987585e-05, "loss": 0.5949, "step": 4572 }, { "epoch": 0.6274267681964739, "grad_norm": 1.203125, "learning_rate": 1.798697280681989e-05, "loss": 0.5442, "step": 4573 }, { "epoch": 0.6275639706386774, "grad_norm": 1.1328125, "learning_rate": 1.7986104133232582e-05, "loss": 0.5021, "step": 4574 }, { "epoch": 0.6277011730808808, "grad_norm": 1.21875, "learning_rate": 1.7985235293243756e-05, "loss": 0.5462, "step": 4575 }, { "epoch": 0.6278383755230843, "grad_norm": 1.203125, "learning_rate": 1.7984366286871516e-05, "loss": 0.5038, "step": 4576 }, { "epoch": 0.6279755779652878, "grad_norm": 1.1328125, "learning_rate": 1.798349711413397e-05, "loss": 0.4868, "step": 4577 }, { "epoch": 0.6281127804074913, "grad_norm": 1.2265625, "learning_rate": 1.798262777504923e-05, "loss": 0.5482, "step": 4578 }, { "epoch": 0.6282499828496947, "grad_norm": 1.1484375, "learning_rate": 1.7981758269635408e-05, "loss": 0.4979, "step": 4579 }, { "epoch": 0.6283871852918982, "grad_norm": 1.2578125, "learning_rate": 1.7980888597910623e-05, "loss": 0.5459, "step": 4580 }, { "epoch": 0.6285243877341017, "grad_norm": 1.25, "learning_rate": 1.7980018759892993e-05, "loss": 0.5608, "step": 4581 }, { "epoch": 0.6286615901763052, "grad_norm": 1.1640625, "learning_rate": 1.7979148755600647e-05, "loss": 0.5107, "step": 4582 }, { "epoch": 0.6287987926185086, "grad_norm": 1.1640625, "learning_rate": 1.7978278585051708e-05, "loss": 0.4998, "step": 4583 }, { "epoch": 0.6289359950607121, "grad_norm": 0.9921875, "learning_rate": 1.797740824826431e-05, "loss": 0.365, "step": 4584 }, { "epoch": 0.6290731975029156, "grad_norm": 1.3203125, "learning_rate": 1.7976537745256583e-05, "loss": 0.6004, "step": 4585 }, { "epoch": 0.6292103999451191, "grad_norm": 1.1484375, "learning_rate": 1.7975667076046673e-05, "loss": 0.4715, "step": 4586 }, { "epoch": 0.6293476023873225, "grad_norm": 1.2734375, "learning_rate": 1.7974796240652714e-05, "loss": 0.565, "step": 4587 }, { "epoch": 0.629484804829526, "grad_norm": 1.1796875, "learning_rate": 1.7973925239092857e-05, "loss": 0.5052, "step": 4588 }, { "epoch": 0.6296220072717295, "grad_norm": 1.078125, "learning_rate": 1.7973054071385247e-05, "loss": 0.3947, "step": 4589 }, { "epoch": 0.629759209713933, "grad_norm": 1.3125, "learning_rate": 1.7972182737548038e-05, "loss": 0.5299, "step": 4590 }, { "epoch": 0.6298964121561363, "grad_norm": 1.2109375, "learning_rate": 1.797131123759938e-05, "loss": 0.5075, "step": 4591 }, { "epoch": 0.6300336145983398, "grad_norm": 1.140625, "learning_rate": 1.7970439571557443e-05, "loss": 0.5092, "step": 4592 }, { "epoch": 0.6301708170405433, "grad_norm": 1.28125, "learning_rate": 1.796956773944038e-05, "loss": 0.5721, "step": 4593 }, { "epoch": 0.6303080194827468, "grad_norm": 1.3046875, "learning_rate": 1.7968695741266358e-05, "loss": 0.6172, "step": 4594 }, { "epoch": 0.6304452219249502, "grad_norm": 1.265625, "learning_rate": 1.796782357705355e-05, "loss": 0.5692, "step": 4595 }, { "epoch": 0.6305824243671537, "grad_norm": 1.265625, "learning_rate": 1.7966951246820124e-05, "loss": 0.5219, "step": 4596 }, { "epoch": 0.6307196268093572, "grad_norm": 1.1875, "learning_rate": 1.796607875058426e-05, "loss": 0.5516, "step": 4597 }, { "epoch": 0.6308568292515607, "grad_norm": 1.28125, "learning_rate": 1.7965206088364136e-05, "loss": 0.5061, "step": 4598 }, { "epoch": 0.6309940316937641, "grad_norm": 1.140625, "learning_rate": 1.7964333260177937e-05, "loss": 0.4762, "step": 4599 }, { "epoch": 0.6311312341359676, "grad_norm": 1.3359375, "learning_rate": 1.7963460266043846e-05, "loss": 0.5918, "step": 4600 }, { "epoch": 0.6312684365781711, "grad_norm": 1.1953125, "learning_rate": 1.796258710598006e-05, "loss": 0.4969, "step": 4601 }, { "epoch": 0.6314056390203746, "grad_norm": 1.203125, "learning_rate": 1.796171378000476e-05, "loss": 0.5105, "step": 4602 }, { "epoch": 0.631542841462578, "grad_norm": 1.2421875, "learning_rate": 1.7960840288136155e-05, "loss": 0.4615, "step": 4603 }, { "epoch": 0.6316800439047815, "grad_norm": 1.1953125, "learning_rate": 1.795996663039244e-05, "loss": 0.5157, "step": 4604 }, { "epoch": 0.631817246346985, "grad_norm": 1.2265625, "learning_rate": 1.7959092806791822e-05, "loss": 0.5107, "step": 4605 }, { "epoch": 0.6319544487891885, "grad_norm": 1.265625, "learning_rate": 1.7958218817352506e-05, "loss": 0.5406, "step": 4606 }, { "epoch": 0.6320916512313919, "grad_norm": 1.2578125, "learning_rate": 1.79573446620927e-05, "loss": 0.5255, "step": 4607 }, { "epoch": 0.6322288536735954, "grad_norm": 1.1640625, "learning_rate": 1.795647034103062e-05, "loss": 0.5405, "step": 4608 }, { "epoch": 0.6323660561157989, "grad_norm": 1.2734375, "learning_rate": 1.7955595854184488e-05, "loss": 0.5086, "step": 4609 }, { "epoch": 0.6325032585580024, "grad_norm": 1.2421875, "learning_rate": 1.795472120157252e-05, "loss": 0.4944, "step": 4610 }, { "epoch": 0.6326404610002058, "grad_norm": 1.1953125, "learning_rate": 1.7953846383212945e-05, "loss": 0.5168, "step": 4611 }, { "epoch": 0.6327776634424093, "grad_norm": 1.125, "learning_rate": 1.7952971399123988e-05, "loss": 0.418, "step": 4612 }, { "epoch": 0.6329148658846128, "grad_norm": 1.21875, "learning_rate": 1.795209624932388e-05, "loss": 0.5005, "step": 4613 }, { "epoch": 0.6330520683268163, "grad_norm": 1.1875, "learning_rate": 1.795122093383086e-05, "loss": 0.505, "step": 4614 }, { "epoch": 0.6331892707690197, "grad_norm": 1.21875, "learning_rate": 1.795034545266316e-05, "loss": 0.5023, "step": 4615 }, { "epoch": 0.6333264732112232, "grad_norm": 1.1015625, "learning_rate": 1.7949469805839025e-05, "loss": 0.4721, "step": 4616 }, { "epoch": 0.6334636756534266, "grad_norm": 1.1953125, "learning_rate": 1.7948593993376704e-05, "loss": 0.4845, "step": 4617 }, { "epoch": 0.6336008780956301, "grad_norm": 1.1953125, "learning_rate": 1.794771801529444e-05, "loss": 0.4936, "step": 4618 }, { "epoch": 0.6337380805378335, "grad_norm": 1.2734375, "learning_rate": 1.794684187161049e-05, "loss": 0.5982, "step": 4619 }, { "epoch": 0.633875282980037, "grad_norm": 1.0703125, "learning_rate": 1.7945965562343105e-05, "loss": 0.458, "step": 4620 }, { "epoch": 0.6340124854222405, "grad_norm": 1.171875, "learning_rate": 1.794508908751055e-05, "loss": 0.5306, "step": 4621 }, { "epoch": 0.634149687864444, "grad_norm": 1.1015625, "learning_rate": 1.794421244713108e-05, "loss": 0.4578, "step": 4622 }, { "epoch": 0.6342868903066474, "grad_norm": 1.28125, "learning_rate": 1.7943335641222966e-05, "loss": 0.5937, "step": 4623 }, { "epoch": 0.6344240927488509, "grad_norm": 1.2265625, "learning_rate": 1.7942458669804478e-05, "loss": 0.4801, "step": 4624 }, { "epoch": 0.6345612951910544, "grad_norm": 1.2578125, "learning_rate": 1.7941581532893888e-05, "loss": 0.5347, "step": 4625 }, { "epoch": 0.6346984976332579, "grad_norm": 1.25, "learning_rate": 1.794070423050947e-05, "loss": 0.569, "step": 4626 }, { "epoch": 0.6348357000754613, "grad_norm": 1.2421875, "learning_rate": 1.7939826762669504e-05, "loss": 0.5219, "step": 4627 }, { "epoch": 0.6349729025176648, "grad_norm": 1.1875, "learning_rate": 1.7938949129392278e-05, "loss": 0.5023, "step": 4628 }, { "epoch": 0.6351101049598683, "grad_norm": 1.21875, "learning_rate": 1.7938071330696073e-05, "loss": 0.563, "step": 4629 }, { "epoch": 0.6352473074020718, "grad_norm": 1.0546875, "learning_rate": 1.7937193366599186e-05, "loss": 0.4164, "step": 4630 }, { "epoch": 0.6353845098442752, "grad_norm": 1.234375, "learning_rate": 1.7936315237119905e-05, "loss": 0.488, "step": 4631 }, { "epoch": 0.6355217122864787, "grad_norm": 1.171875, "learning_rate": 1.7935436942276523e-05, "loss": 0.505, "step": 4632 }, { "epoch": 0.6356589147286822, "grad_norm": 1.140625, "learning_rate": 1.793455848208735e-05, "loss": 0.4636, "step": 4633 }, { "epoch": 0.6357961171708857, "grad_norm": 1.203125, "learning_rate": 1.7933679856570684e-05, "loss": 0.4524, "step": 4634 }, { "epoch": 0.6359333196130891, "grad_norm": 1.2890625, "learning_rate": 1.7932801065744835e-05, "loss": 0.5398, "step": 4635 }, { "epoch": 0.6360705220552926, "grad_norm": 1.2578125, "learning_rate": 1.7931922109628114e-05, "loss": 0.5344, "step": 4636 }, { "epoch": 0.6362077244974961, "grad_norm": 1.2890625, "learning_rate": 1.7931042988238835e-05, "loss": 0.5463, "step": 4637 }, { "epoch": 0.6363449269396996, "grad_norm": 1.21875, "learning_rate": 1.7930163701595312e-05, "loss": 0.559, "step": 4638 }, { "epoch": 0.636482129381903, "grad_norm": 1.1171875, "learning_rate": 1.792928424971587e-05, "loss": 0.4657, "step": 4639 }, { "epoch": 0.6366193318241065, "grad_norm": 1.28125, "learning_rate": 1.7928404632618832e-05, "loss": 0.5629, "step": 4640 }, { "epoch": 0.63675653426631, "grad_norm": 1.2578125, "learning_rate": 1.7927524850322528e-05, "loss": 0.5507, "step": 4641 }, { "epoch": 0.6368937367085135, "grad_norm": 1.15625, "learning_rate": 1.792664490284529e-05, "loss": 0.4829, "step": 4642 }, { "epoch": 0.6370309391507168, "grad_norm": 1.28125, "learning_rate": 1.7925764790205448e-05, "loss": 0.493, "step": 4643 }, { "epoch": 0.6371681415929203, "grad_norm": 1.2265625, "learning_rate": 1.7924884512421345e-05, "loss": 0.5308, "step": 4644 }, { "epoch": 0.6373053440351238, "grad_norm": 1.21875, "learning_rate": 1.792400406951132e-05, "loss": 0.5715, "step": 4645 }, { "epoch": 0.6374425464773273, "grad_norm": 1.2265625, "learning_rate": 1.7923123461493722e-05, "loss": 0.5059, "step": 4646 }, { "epoch": 0.6375797489195307, "grad_norm": 1.21875, "learning_rate": 1.7922242688386892e-05, "loss": 0.5869, "step": 4647 }, { "epoch": 0.6377169513617342, "grad_norm": 1.2578125, "learning_rate": 1.7921361750209195e-05, "loss": 0.5982, "step": 4648 }, { "epoch": 0.6378541538039377, "grad_norm": 1.171875, "learning_rate": 1.7920480646978972e-05, "loss": 0.53, "step": 4649 }, { "epoch": 0.6379913562461412, "grad_norm": 1.1328125, "learning_rate": 1.791959937871459e-05, "loss": 0.5003, "step": 4650 }, { "epoch": 0.6381285586883446, "grad_norm": 1.2734375, "learning_rate": 1.7918717945434413e-05, "loss": 0.5808, "step": 4651 }, { "epoch": 0.6382657611305481, "grad_norm": 1.2734375, "learning_rate": 1.7917836347156804e-05, "loss": 0.5116, "step": 4652 }, { "epoch": 0.6384029635727516, "grad_norm": 1.234375, "learning_rate": 1.791695458390013e-05, "loss": 0.5033, "step": 4653 }, { "epoch": 0.6385401660149551, "grad_norm": 1.234375, "learning_rate": 1.791607265568277e-05, "loss": 0.5017, "step": 4654 }, { "epoch": 0.6386773684571585, "grad_norm": 1.140625, "learning_rate": 1.791519056252309e-05, "loss": 0.4913, "step": 4655 }, { "epoch": 0.638814570899362, "grad_norm": 1.1640625, "learning_rate": 1.7914308304439485e-05, "loss": 0.5235, "step": 4656 }, { "epoch": 0.6389517733415655, "grad_norm": 1.1953125, "learning_rate": 1.7913425881450324e-05, "loss": 0.4973, "step": 4657 }, { "epoch": 0.639088975783769, "grad_norm": 1.2421875, "learning_rate": 1.7912543293574e-05, "loss": 0.5529, "step": 4658 }, { "epoch": 0.6392261782259724, "grad_norm": 1.1796875, "learning_rate": 1.7911660540828898e-05, "loss": 0.4622, "step": 4659 }, { "epoch": 0.6393633806681759, "grad_norm": 1.0078125, "learning_rate": 1.791077762323342e-05, "loss": 0.3821, "step": 4660 }, { "epoch": 0.6395005831103794, "grad_norm": 1.1640625, "learning_rate": 1.7909894540805956e-05, "loss": 0.5253, "step": 4661 }, { "epoch": 0.6396377855525829, "grad_norm": 1.171875, "learning_rate": 1.790901129356491e-05, "loss": 0.4515, "step": 4662 }, { "epoch": 0.6397749879947863, "grad_norm": 1.21875, "learning_rate": 1.790812788152868e-05, "loss": 0.5205, "step": 4663 }, { "epoch": 0.6399121904369898, "grad_norm": 1.109375, "learning_rate": 1.790724430471568e-05, "loss": 0.4606, "step": 4664 }, { "epoch": 0.6400493928791933, "grad_norm": 1.1484375, "learning_rate": 1.7906360563144314e-05, "loss": 0.4473, "step": 4665 }, { "epoch": 0.6401865953213968, "grad_norm": 1.453125, "learning_rate": 1.7905476656833e-05, "loss": 0.5948, "step": 4666 }, { "epoch": 0.6403237977636002, "grad_norm": 1.234375, "learning_rate": 1.7904592585800156e-05, "loss": 0.5145, "step": 4667 }, { "epoch": 0.6404610002058037, "grad_norm": 1.2109375, "learning_rate": 1.7903708350064204e-05, "loss": 0.4961, "step": 4668 }, { "epoch": 0.6405982026480072, "grad_norm": 1.1953125, "learning_rate": 1.7902823949643562e-05, "loss": 0.4975, "step": 4669 }, { "epoch": 0.6407354050902107, "grad_norm": 1.2578125, "learning_rate": 1.790193938455666e-05, "loss": 0.5954, "step": 4670 }, { "epoch": 0.640872607532414, "grad_norm": 1.3203125, "learning_rate": 1.7901054654821935e-05, "loss": 0.5859, "step": 4671 }, { "epoch": 0.6410098099746175, "grad_norm": 1.234375, "learning_rate": 1.790016976045782e-05, "loss": 0.5152, "step": 4672 }, { "epoch": 0.641147012416821, "grad_norm": 1.1875, "learning_rate": 1.7899284701482745e-05, "loss": 0.501, "step": 4673 }, { "epoch": 0.6412842148590245, "grad_norm": 1.2265625, "learning_rate": 1.7898399477915157e-05, "loss": 0.5502, "step": 4674 }, { "epoch": 0.6414214173012279, "grad_norm": 1.2421875, "learning_rate": 1.7897514089773503e-05, "loss": 0.6184, "step": 4675 }, { "epoch": 0.6415586197434314, "grad_norm": 1.2265625, "learning_rate": 1.7896628537076223e-05, "loss": 0.5309, "step": 4676 }, { "epoch": 0.6416958221856349, "grad_norm": 1.2109375, "learning_rate": 1.789574281984178e-05, "loss": 0.5535, "step": 4677 }, { "epoch": 0.6418330246278384, "grad_norm": 0.94921875, "learning_rate": 1.7894856938088627e-05, "loss": 0.3575, "step": 4678 }, { "epoch": 0.6419702270700418, "grad_norm": 1.1015625, "learning_rate": 1.7893970891835213e-05, "loss": 0.4545, "step": 4679 }, { "epoch": 0.6421074295122453, "grad_norm": 1.046875, "learning_rate": 1.789308468110001e-05, "loss": 0.4275, "step": 4680 }, { "epoch": 0.6422446319544488, "grad_norm": 1.421875, "learning_rate": 1.7892198305901476e-05, "loss": 0.5318, "step": 4681 }, { "epoch": 0.6423818343966523, "grad_norm": 1.2265625, "learning_rate": 1.7891311766258082e-05, "loss": 0.5463, "step": 4682 }, { "epoch": 0.6425190368388557, "grad_norm": 1.0859375, "learning_rate": 1.7890425062188308e-05, "loss": 0.4499, "step": 4683 }, { "epoch": 0.6426562392810592, "grad_norm": 1.3203125, "learning_rate": 1.788953819371062e-05, "loss": 0.5569, "step": 4684 }, { "epoch": 0.6427934417232627, "grad_norm": 1.140625, "learning_rate": 1.7888651160843503e-05, "loss": 0.4831, "step": 4685 }, { "epoch": 0.6429306441654662, "grad_norm": 1.1171875, "learning_rate": 1.7887763963605437e-05, "loss": 0.4887, "step": 4686 }, { "epoch": 0.6430678466076696, "grad_norm": 1.375, "learning_rate": 1.7886876602014905e-05, "loss": 0.5783, "step": 4687 }, { "epoch": 0.6432050490498731, "grad_norm": 1.1171875, "learning_rate": 1.78859890760904e-05, "loss": 0.5248, "step": 4688 }, { "epoch": 0.6433422514920766, "grad_norm": 1.2421875, "learning_rate": 1.7885101385850415e-05, "loss": 0.5204, "step": 4689 }, { "epoch": 0.6434794539342801, "grad_norm": 1.109375, "learning_rate": 1.7884213531313447e-05, "loss": 0.44, "step": 4690 }, { "epoch": 0.6436166563764835, "grad_norm": 1.25, "learning_rate": 1.7883325512497997e-05, "loss": 0.5709, "step": 4691 }, { "epoch": 0.643753858818687, "grad_norm": 1.171875, "learning_rate": 1.788243732942256e-05, "loss": 0.5411, "step": 4692 }, { "epoch": 0.6438910612608905, "grad_norm": 1.1953125, "learning_rate": 1.7881548982105652e-05, "loss": 0.5326, "step": 4693 }, { "epoch": 0.644028263703094, "grad_norm": 1.25, "learning_rate": 1.7880660470565776e-05, "loss": 0.5378, "step": 4694 }, { "epoch": 0.6441654661452973, "grad_norm": 1.21875, "learning_rate": 1.787977179482145e-05, "loss": 0.5839, "step": 4695 }, { "epoch": 0.6443026685875008, "grad_norm": 1.1875, "learning_rate": 1.787888295489119e-05, "loss": 0.5293, "step": 4696 }, { "epoch": 0.6444398710297043, "grad_norm": 1.1640625, "learning_rate": 1.7877993950793512e-05, "loss": 0.4718, "step": 4697 }, { "epoch": 0.6445770734719078, "grad_norm": 1.1953125, "learning_rate": 1.7877104782546946e-05, "loss": 0.5347, "step": 4698 }, { "epoch": 0.6447142759141112, "grad_norm": 1.1328125, "learning_rate": 1.7876215450170016e-05, "loss": 0.4879, "step": 4699 }, { "epoch": 0.6448514783563147, "grad_norm": 1.1796875, "learning_rate": 1.787532595368125e-05, "loss": 0.4871, "step": 4700 }, { "epoch": 0.6449886807985182, "grad_norm": 1.2109375, "learning_rate": 1.7874436293099188e-05, "loss": 0.4962, "step": 4701 }, { "epoch": 0.6451258832407217, "grad_norm": 1.09375, "learning_rate": 1.787354646844236e-05, "loss": 0.4129, "step": 4702 }, { "epoch": 0.6452630856829251, "grad_norm": 1.359375, "learning_rate": 1.7872656479729313e-05, "loss": 0.543, "step": 4703 }, { "epoch": 0.6454002881251286, "grad_norm": 1.203125, "learning_rate": 1.7871766326978585e-05, "loss": 0.5137, "step": 4704 }, { "epoch": 0.6455374905673321, "grad_norm": 1.1640625, "learning_rate": 1.787087601020873e-05, "loss": 0.5118, "step": 4705 }, { "epoch": 0.6456746930095356, "grad_norm": 1.078125, "learning_rate": 1.7869985529438295e-05, "loss": 0.4156, "step": 4706 }, { "epoch": 0.645811895451739, "grad_norm": 1.234375, "learning_rate": 1.7869094884685837e-05, "loss": 0.5126, "step": 4707 }, { "epoch": 0.6459490978939425, "grad_norm": 1.1171875, "learning_rate": 1.786820407596991e-05, "loss": 0.4755, "step": 4708 }, { "epoch": 0.646086300336146, "grad_norm": 1.3359375, "learning_rate": 1.7867313103309078e-05, "loss": 0.6187, "step": 4709 }, { "epoch": 0.6462235027783495, "grad_norm": 1.2421875, "learning_rate": 1.7866421966721908e-05, "loss": 0.5658, "step": 4710 }, { "epoch": 0.6463607052205529, "grad_norm": 1.4921875, "learning_rate": 1.786553066622696e-05, "loss": 0.6378, "step": 4711 }, { "epoch": 0.6464979076627564, "grad_norm": 1.265625, "learning_rate": 1.7864639201842813e-05, "loss": 0.5071, "step": 4712 }, { "epoch": 0.6466351101049599, "grad_norm": 1.125, "learning_rate": 1.786374757358804e-05, "loss": 0.4316, "step": 4713 }, { "epoch": 0.6467723125471634, "grad_norm": 1.1796875, "learning_rate": 1.786285578148122e-05, "loss": 0.4772, "step": 4714 }, { "epoch": 0.6469095149893668, "grad_norm": 1.1875, "learning_rate": 1.7861963825540935e-05, "loss": 0.4766, "step": 4715 }, { "epoch": 0.6470467174315703, "grad_norm": 1.21875, "learning_rate": 1.7861071705785763e-05, "loss": 0.4439, "step": 4716 }, { "epoch": 0.6471839198737738, "grad_norm": 1.3046875, "learning_rate": 1.7860179422234304e-05, "loss": 0.5468, "step": 4717 }, { "epoch": 0.6473211223159773, "grad_norm": 1.3203125, "learning_rate": 1.7859286974905143e-05, "loss": 0.5532, "step": 4718 }, { "epoch": 0.6474583247581807, "grad_norm": 1.15625, "learning_rate": 1.7858394363816872e-05, "loss": 0.4743, "step": 4719 }, { "epoch": 0.6475955272003842, "grad_norm": 1.1171875, "learning_rate": 1.78575015889881e-05, "loss": 0.4497, "step": 4720 }, { "epoch": 0.6477327296425877, "grad_norm": 1.4140625, "learning_rate": 1.785660865043742e-05, "loss": 0.529, "step": 4721 }, { "epoch": 0.6478699320847912, "grad_norm": 1.15625, "learning_rate": 1.7855715548183444e-05, "loss": 0.5186, "step": 4722 }, { "epoch": 0.6480071345269945, "grad_norm": 1.1796875, "learning_rate": 1.7854822282244775e-05, "loss": 0.5159, "step": 4723 }, { "epoch": 0.648144336969198, "grad_norm": 1.0703125, "learning_rate": 1.7853928852640033e-05, "loss": 0.4301, "step": 4724 }, { "epoch": 0.6482815394114015, "grad_norm": 1.34375, "learning_rate": 1.7853035259387827e-05, "loss": 0.5347, "step": 4725 }, { "epoch": 0.648418741853605, "grad_norm": 1.1171875, "learning_rate": 1.785214150250678e-05, "loss": 0.5099, "step": 4726 }, { "epoch": 0.6485559442958084, "grad_norm": 1.2265625, "learning_rate": 1.7851247582015513e-05, "loss": 0.5049, "step": 4727 }, { "epoch": 0.6486931467380119, "grad_norm": 1.0703125, "learning_rate": 1.7850353497932647e-05, "loss": 0.5108, "step": 4728 }, { "epoch": 0.6488303491802154, "grad_norm": 1.140625, "learning_rate": 1.7849459250276823e-05, "loss": 0.4638, "step": 4729 }, { "epoch": 0.6489675516224189, "grad_norm": 1.1640625, "learning_rate": 1.7848564839066665e-05, "loss": 0.5007, "step": 4730 }, { "epoch": 0.6491047540646223, "grad_norm": 1.2265625, "learning_rate": 1.7847670264320815e-05, "loss": 0.4893, "step": 4731 }, { "epoch": 0.6492419565068258, "grad_norm": 1.1953125, "learning_rate": 1.7846775526057908e-05, "loss": 0.461, "step": 4732 }, { "epoch": 0.6493791589490293, "grad_norm": 1.1484375, "learning_rate": 1.784588062429659e-05, "loss": 0.4546, "step": 4733 }, { "epoch": 0.6495163613912328, "grad_norm": 1.1640625, "learning_rate": 1.7844985559055506e-05, "loss": 0.5188, "step": 4734 }, { "epoch": 0.6496535638334362, "grad_norm": 1.28125, "learning_rate": 1.784409033035331e-05, "loss": 0.5817, "step": 4735 }, { "epoch": 0.6497907662756397, "grad_norm": 1.2734375, "learning_rate": 1.7843194938208647e-05, "loss": 0.491, "step": 4736 }, { "epoch": 0.6499279687178432, "grad_norm": 1.2265625, "learning_rate": 1.7842299382640176e-05, "loss": 0.5106, "step": 4737 }, { "epoch": 0.6500651711600467, "grad_norm": 1.0703125, "learning_rate": 1.7841403663666567e-05, "loss": 0.4273, "step": 4738 }, { "epoch": 0.6502023736022501, "grad_norm": 1.2890625, "learning_rate": 1.784050778130647e-05, "loss": 0.6004, "step": 4739 }, { "epoch": 0.6503395760444536, "grad_norm": 1.3125, "learning_rate": 1.7839611735578558e-05, "loss": 0.579, "step": 4740 }, { "epoch": 0.6504767784866571, "grad_norm": 1.21875, "learning_rate": 1.7838715526501508e-05, "loss": 0.5457, "step": 4741 }, { "epoch": 0.6506139809288606, "grad_norm": 1.234375, "learning_rate": 1.783781915409398e-05, "loss": 0.5691, "step": 4742 }, { "epoch": 0.650751183371064, "grad_norm": 1.203125, "learning_rate": 1.783692261837466e-05, "loss": 0.5707, "step": 4743 }, { "epoch": 0.6508883858132675, "grad_norm": 1.171875, "learning_rate": 1.783602591936223e-05, "loss": 0.4893, "step": 4744 }, { "epoch": 0.651025588255471, "grad_norm": 1.203125, "learning_rate": 1.7835129057075368e-05, "loss": 0.5883, "step": 4745 }, { "epoch": 0.6511627906976745, "grad_norm": 1.140625, "learning_rate": 1.7834232031532767e-05, "loss": 0.4956, "step": 4746 }, { "epoch": 0.6512999931398779, "grad_norm": 1.1171875, "learning_rate": 1.783333484275311e-05, "loss": 0.4562, "step": 4747 }, { "epoch": 0.6514371955820814, "grad_norm": 1.2265625, "learning_rate": 1.7832437490755098e-05, "loss": 0.5202, "step": 4748 }, { "epoch": 0.6515743980242849, "grad_norm": 1.046875, "learning_rate": 1.783153997555743e-05, "loss": 0.4042, "step": 4749 }, { "epoch": 0.6517116004664883, "grad_norm": 1.171875, "learning_rate": 1.7830642297178802e-05, "loss": 0.4841, "step": 4750 }, { "epoch": 0.6518488029086917, "grad_norm": 1.2109375, "learning_rate": 1.782974445563792e-05, "loss": 0.5031, "step": 4751 }, { "epoch": 0.6519860053508952, "grad_norm": 1.1640625, "learning_rate": 1.782884645095349e-05, "loss": 0.4464, "step": 4752 }, { "epoch": 0.6521232077930987, "grad_norm": 1.1953125, "learning_rate": 1.782794828314423e-05, "loss": 0.5125, "step": 4753 }, { "epoch": 0.6522604102353022, "grad_norm": 1.1171875, "learning_rate": 1.7827049952228847e-05, "loss": 0.4544, "step": 4754 }, { "epoch": 0.6523976126775056, "grad_norm": 1.1640625, "learning_rate": 1.782615145822606e-05, "loss": 0.4959, "step": 4755 }, { "epoch": 0.6525348151197091, "grad_norm": 1.28125, "learning_rate": 1.7825252801154592e-05, "loss": 0.5365, "step": 4756 }, { "epoch": 0.6526720175619126, "grad_norm": 1.015625, "learning_rate": 1.7824353981033166e-05, "loss": 0.4221, "step": 4757 }, { "epoch": 0.6528092200041161, "grad_norm": 1.2109375, "learning_rate": 1.7823454997880513e-05, "loss": 0.5095, "step": 4758 }, { "epoch": 0.6529464224463195, "grad_norm": 1.140625, "learning_rate": 1.7822555851715367e-05, "loss": 0.482, "step": 4759 }, { "epoch": 0.653083624888523, "grad_norm": 1.15625, "learning_rate": 1.7821656542556457e-05, "loss": 0.5203, "step": 4760 }, { "epoch": 0.6532208273307265, "grad_norm": 1.25, "learning_rate": 1.7820757070422522e-05, "loss": 0.554, "step": 4761 }, { "epoch": 0.65335802977293, "grad_norm": 1.1171875, "learning_rate": 1.781985743533231e-05, "loss": 0.4832, "step": 4762 }, { "epoch": 0.6534952322151334, "grad_norm": 1.328125, "learning_rate": 1.7818957637304556e-05, "loss": 0.5706, "step": 4763 }, { "epoch": 0.6536324346573369, "grad_norm": 1.203125, "learning_rate": 1.7818057676358014e-05, "loss": 0.526, "step": 4764 }, { "epoch": 0.6537696370995404, "grad_norm": 1.1875, "learning_rate": 1.7817157552511438e-05, "loss": 0.5517, "step": 4765 }, { "epoch": 0.6539068395417439, "grad_norm": 1.34375, "learning_rate": 1.7816257265783584e-05, "loss": 0.5885, "step": 4766 }, { "epoch": 0.6540440419839473, "grad_norm": 1.2265625, "learning_rate": 1.7815356816193205e-05, "loss": 0.5285, "step": 4767 }, { "epoch": 0.6541812444261508, "grad_norm": 1.1640625, "learning_rate": 1.781445620375907e-05, "loss": 0.4639, "step": 4768 }, { "epoch": 0.6543184468683543, "grad_norm": 1.2265625, "learning_rate": 1.7813555428499936e-05, "loss": 0.5102, "step": 4769 }, { "epoch": 0.6544556493105578, "grad_norm": 1.3046875, "learning_rate": 1.7812654490434577e-05, "loss": 0.5669, "step": 4770 }, { "epoch": 0.6545928517527612, "grad_norm": 1.28125, "learning_rate": 1.7811753389581768e-05, "loss": 0.5526, "step": 4771 }, { "epoch": 0.6547300541949647, "grad_norm": 1.15625, "learning_rate": 1.7810852125960282e-05, "loss": 0.4809, "step": 4772 }, { "epoch": 0.6548672566371682, "grad_norm": 1.1640625, "learning_rate": 1.7809950699588893e-05, "loss": 0.5213, "step": 4773 }, { "epoch": 0.6550044590793717, "grad_norm": 1.25, "learning_rate": 1.780904911048639e-05, "loss": 0.5335, "step": 4774 }, { "epoch": 0.655141661521575, "grad_norm": 1.1640625, "learning_rate": 1.7808147358671556e-05, "loss": 0.4822, "step": 4775 }, { "epoch": 0.6552788639637785, "grad_norm": 1.234375, "learning_rate": 1.7807245444163187e-05, "loss": 0.5549, "step": 4776 }, { "epoch": 0.655416066405982, "grad_norm": 1.390625, "learning_rate": 1.7806343366980064e-05, "loss": 0.5997, "step": 4777 }, { "epoch": 0.6555532688481855, "grad_norm": 1.2734375, "learning_rate": 1.780544112714099e-05, "loss": 0.5564, "step": 4778 }, { "epoch": 0.6556904712903889, "grad_norm": 1.140625, "learning_rate": 1.7804538724664763e-05, "loss": 0.4729, "step": 4779 }, { "epoch": 0.6558276737325924, "grad_norm": 1.3828125, "learning_rate": 1.7803636159570188e-05, "loss": 0.5604, "step": 4780 }, { "epoch": 0.6559648761747959, "grad_norm": 1.0859375, "learning_rate": 1.7802733431876067e-05, "loss": 0.4971, "step": 4781 }, { "epoch": 0.6561020786169994, "grad_norm": 1.1484375, "learning_rate": 1.7801830541601213e-05, "loss": 0.4989, "step": 4782 }, { "epoch": 0.6562392810592028, "grad_norm": 1.1875, "learning_rate": 1.780092748876444e-05, "loss": 0.4853, "step": 4783 }, { "epoch": 0.6563764835014063, "grad_norm": 1.234375, "learning_rate": 1.7800024273384557e-05, "loss": 0.4979, "step": 4784 }, { "epoch": 0.6565136859436098, "grad_norm": 1.2109375, "learning_rate": 1.7799120895480394e-05, "loss": 0.5277, "step": 4785 }, { "epoch": 0.6566508883858133, "grad_norm": 1.03125, "learning_rate": 1.7798217355070765e-05, "loss": 0.4679, "step": 4786 }, { "epoch": 0.6567880908280167, "grad_norm": 1.0859375, "learning_rate": 1.77973136521745e-05, "loss": 0.4908, "step": 4787 }, { "epoch": 0.6569252932702202, "grad_norm": 1.046875, "learning_rate": 1.7796409786810432e-05, "loss": 0.4623, "step": 4788 }, { "epoch": 0.6570624957124237, "grad_norm": 1.171875, "learning_rate": 1.7795505758997394e-05, "loss": 0.4689, "step": 4789 }, { "epoch": 0.6571996981546272, "grad_norm": 1.21875, "learning_rate": 1.7794601568754218e-05, "loss": 0.5969, "step": 4790 }, { "epoch": 0.6573369005968306, "grad_norm": 1.15625, "learning_rate": 1.7793697216099745e-05, "loss": 0.5419, "step": 4791 }, { "epoch": 0.6574741030390341, "grad_norm": 1.296875, "learning_rate": 1.779279270105282e-05, "loss": 0.5717, "step": 4792 }, { "epoch": 0.6576113054812376, "grad_norm": 1.3671875, "learning_rate": 1.7791888023632292e-05, "loss": 0.5718, "step": 4793 }, { "epoch": 0.6577485079234411, "grad_norm": 1.1796875, "learning_rate": 1.779098318385701e-05, "loss": 0.5136, "step": 4794 }, { "epoch": 0.6578857103656445, "grad_norm": 1.296875, "learning_rate": 1.7790078181745824e-05, "loss": 0.5586, "step": 4795 }, { "epoch": 0.658022912807848, "grad_norm": 1.0625, "learning_rate": 1.778917301731759e-05, "loss": 0.4439, "step": 4796 }, { "epoch": 0.6581601152500515, "grad_norm": 1.109375, "learning_rate": 1.778826769059118e-05, "loss": 0.4557, "step": 4797 }, { "epoch": 0.658297317692255, "grad_norm": 1.28125, "learning_rate": 1.7787362201585443e-05, "loss": 0.5332, "step": 4798 }, { "epoch": 0.6584345201344584, "grad_norm": 1.1796875, "learning_rate": 1.7786456550319254e-05, "loss": 0.5167, "step": 4799 }, { "epoch": 0.6585717225766619, "grad_norm": 1.1875, "learning_rate": 1.7785550736811482e-05, "loss": 0.496, "step": 4800 }, { "epoch": 0.6587089250188654, "grad_norm": 1.1015625, "learning_rate": 1.7784644761081003e-05, "loss": 0.4233, "step": 4801 }, { "epoch": 0.6588461274610689, "grad_norm": 1.21875, "learning_rate": 1.778373862314669e-05, "loss": 0.5278, "step": 4802 }, { "epoch": 0.6589833299032722, "grad_norm": 1.1015625, "learning_rate": 1.7782832323027432e-05, "loss": 0.4127, "step": 4803 }, { "epoch": 0.6591205323454757, "grad_norm": 1.15625, "learning_rate": 1.77819258607421e-05, "loss": 0.502, "step": 4804 }, { "epoch": 0.6592577347876792, "grad_norm": 1.203125, "learning_rate": 1.778101923630959e-05, "loss": 0.5079, "step": 4805 }, { "epoch": 0.6593949372298827, "grad_norm": 1.1328125, "learning_rate": 1.7780112449748788e-05, "loss": 0.4695, "step": 4806 }, { "epoch": 0.6595321396720861, "grad_norm": 1.1796875, "learning_rate": 1.7779205501078597e-05, "loss": 0.5002, "step": 4807 }, { "epoch": 0.6596693421142896, "grad_norm": 1.3046875, "learning_rate": 1.777829839031791e-05, "loss": 0.605, "step": 4808 }, { "epoch": 0.6598065445564931, "grad_norm": 1.2265625, "learning_rate": 1.777739111748562e-05, "loss": 0.5652, "step": 4809 }, { "epoch": 0.6599437469986966, "grad_norm": 1.1875, "learning_rate": 1.7776483682600642e-05, "loss": 0.4989, "step": 4810 }, { "epoch": 0.6600809494409, "grad_norm": 1.203125, "learning_rate": 1.777557608568188e-05, "loss": 0.4984, "step": 4811 }, { "epoch": 0.6602181518831035, "grad_norm": 1.1796875, "learning_rate": 1.7774668326748242e-05, "loss": 0.4966, "step": 4812 }, { "epoch": 0.660355354325307, "grad_norm": 1.1640625, "learning_rate": 1.7773760405818648e-05, "loss": 0.5411, "step": 4813 }, { "epoch": 0.6604925567675105, "grad_norm": 1.265625, "learning_rate": 1.777285232291201e-05, "loss": 0.567, "step": 4814 }, { "epoch": 0.6606297592097139, "grad_norm": 1.3515625, "learning_rate": 1.7771944078047254e-05, "loss": 0.5366, "step": 4815 }, { "epoch": 0.6607669616519174, "grad_norm": 1.2265625, "learning_rate": 1.7771035671243303e-05, "loss": 0.5185, "step": 4816 }, { "epoch": 0.6609041640941209, "grad_norm": 1.140625, "learning_rate": 1.7770127102519086e-05, "loss": 0.4743, "step": 4817 }, { "epoch": 0.6610413665363244, "grad_norm": 1.1875, "learning_rate": 1.7769218371893527e-05, "loss": 0.5092, "step": 4818 }, { "epoch": 0.6611785689785278, "grad_norm": 1.09375, "learning_rate": 1.776830947938557e-05, "loss": 0.4639, "step": 4819 }, { "epoch": 0.6613157714207313, "grad_norm": 1.1875, "learning_rate": 1.7767400425014152e-05, "loss": 0.4817, "step": 4820 }, { "epoch": 0.6614529738629348, "grad_norm": 1.203125, "learning_rate": 1.7766491208798208e-05, "loss": 0.5644, "step": 4821 }, { "epoch": 0.6615901763051383, "grad_norm": 1.25, "learning_rate": 1.7765581830756687e-05, "loss": 0.5386, "step": 4822 }, { "epoch": 0.6617273787473417, "grad_norm": 1.21875, "learning_rate": 1.776467229090854e-05, "loss": 0.5359, "step": 4823 }, { "epoch": 0.6618645811895452, "grad_norm": 1.2890625, "learning_rate": 1.7763762589272714e-05, "loss": 0.5735, "step": 4824 }, { "epoch": 0.6620017836317487, "grad_norm": 1.234375, "learning_rate": 1.776285272586816e-05, "loss": 0.5302, "step": 4825 }, { "epoch": 0.6621389860739522, "grad_norm": 1.171875, "learning_rate": 1.776194270071385e-05, "loss": 0.5409, "step": 4826 }, { "epoch": 0.6622761885161556, "grad_norm": 1.203125, "learning_rate": 1.776103251382873e-05, "loss": 0.5099, "step": 4827 }, { "epoch": 0.662413390958359, "grad_norm": 1.203125, "learning_rate": 1.7760122165231774e-05, "loss": 0.5277, "step": 4828 }, { "epoch": 0.6625505934005625, "grad_norm": 1.21875, "learning_rate": 1.7759211654941953e-05, "loss": 0.5173, "step": 4829 }, { "epoch": 0.662687795842766, "grad_norm": 1.1015625, "learning_rate": 1.775830098297823e-05, "loss": 0.4555, "step": 4830 }, { "epoch": 0.6628249982849694, "grad_norm": 1.2734375, "learning_rate": 1.7757390149359583e-05, "loss": 0.5444, "step": 4831 }, { "epoch": 0.6629622007271729, "grad_norm": 1.234375, "learning_rate": 1.7756479154104995e-05, "loss": 0.5656, "step": 4832 }, { "epoch": 0.6630994031693764, "grad_norm": 1.265625, "learning_rate": 1.7755567997233444e-05, "loss": 0.5262, "step": 4833 }, { "epoch": 0.6632366056115799, "grad_norm": 1.2421875, "learning_rate": 1.7754656678763914e-05, "loss": 0.501, "step": 4834 }, { "epoch": 0.6633738080537833, "grad_norm": 1.296875, "learning_rate": 1.7753745198715395e-05, "loss": 0.5804, "step": 4835 }, { "epoch": 0.6635110104959868, "grad_norm": 1.265625, "learning_rate": 1.7752833557106882e-05, "loss": 0.544, "step": 4836 }, { "epoch": 0.6636482129381903, "grad_norm": 1.1875, "learning_rate": 1.7751921753957368e-05, "loss": 0.4795, "step": 4837 }, { "epoch": 0.6637854153803938, "grad_norm": 1.109375, "learning_rate": 1.775100978928585e-05, "loss": 0.4775, "step": 4838 }, { "epoch": 0.6639226178225972, "grad_norm": 1.1796875, "learning_rate": 1.7750097663111333e-05, "loss": 0.4859, "step": 4839 }, { "epoch": 0.6640598202648007, "grad_norm": 1.15625, "learning_rate": 1.7749185375452817e-05, "loss": 0.5294, "step": 4840 }, { "epoch": 0.6641970227070042, "grad_norm": 1.21875, "learning_rate": 1.7748272926329316e-05, "loss": 0.5823, "step": 4841 }, { "epoch": 0.6643342251492077, "grad_norm": 1.203125, "learning_rate": 1.7747360315759842e-05, "loss": 0.5454, "step": 4842 }, { "epoch": 0.6644714275914111, "grad_norm": 1.1328125, "learning_rate": 1.774644754376341e-05, "loss": 0.4977, "step": 4843 }, { "epoch": 0.6646086300336146, "grad_norm": 1.078125, "learning_rate": 1.7745534610359038e-05, "loss": 0.4114, "step": 4844 }, { "epoch": 0.6647458324758181, "grad_norm": 1.203125, "learning_rate": 1.7744621515565747e-05, "loss": 0.5231, "step": 4845 }, { "epoch": 0.6648830349180216, "grad_norm": 1.28125, "learning_rate": 1.7743708259402562e-05, "loss": 0.5887, "step": 4846 }, { "epoch": 0.665020237360225, "grad_norm": 1.25, "learning_rate": 1.774279484188852e-05, "loss": 0.5095, "step": 4847 }, { "epoch": 0.6651574398024285, "grad_norm": 1.1875, "learning_rate": 1.774188126304264e-05, "loss": 0.506, "step": 4848 }, { "epoch": 0.665294642244632, "grad_norm": 1.21875, "learning_rate": 1.7740967522883967e-05, "loss": 0.4572, "step": 4849 }, { "epoch": 0.6654318446868355, "grad_norm": 1.09375, "learning_rate": 1.774005362143154e-05, "loss": 0.4442, "step": 4850 }, { "epoch": 0.6655690471290389, "grad_norm": 1.28125, "learning_rate": 1.77391395587044e-05, "loss": 0.5804, "step": 4851 }, { "epoch": 0.6657062495712424, "grad_norm": 1.1484375, "learning_rate": 1.773822533472159e-05, "loss": 0.4822, "step": 4852 }, { "epoch": 0.6658434520134459, "grad_norm": 1.40625, "learning_rate": 1.773731094950216e-05, "loss": 0.6265, "step": 4853 }, { "epoch": 0.6659806544556494, "grad_norm": 1.0859375, "learning_rate": 1.7736396403065166e-05, "loss": 0.4636, "step": 4854 }, { "epoch": 0.6661178568978527, "grad_norm": 1.1171875, "learning_rate": 1.773548169542966e-05, "loss": 0.471, "step": 4855 }, { "epoch": 0.6662550593400562, "grad_norm": 1.234375, "learning_rate": 1.7734566826614705e-05, "loss": 0.5653, "step": 4856 }, { "epoch": 0.6663922617822597, "grad_norm": 1.265625, "learning_rate": 1.773365179663936e-05, "loss": 0.4998, "step": 4857 }, { "epoch": 0.6665294642244632, "grad_norm": 1.2578125, "learning_rate": 1.773273660552269e-05, "loss": 0.5632, "step": 4858 }, { "epoch": 0.6666666666666666, "grad_norm": 1.2578125, "learning_rate": 1.773182125328377e-05, "loss": 0.549, "step": 4859 }, { "epoch": 0.6668038691088701, "grad_norm": 1.1640625, "learning_rate": 1.7730905739941667e-05, "loss": 0.5169, "step": 4860 }, { "epoch": 0.6669410715510736, "grad_norm": 1.0859375, "learning_rate": 1.772999006551546e-05, "loss": 0.3962, "step": 4861 }, { "epoch": 0.6670782739932771, "grad_norm": 1.40625, "learning_rate": 1.772907423002423e-05, "loss": 0.5092, "step": 4862 }, { "epoch": 0.6672154764354805, "grad_norm": 1.3671875, "learning_rate": 1.772815823348705e-05, "loss": 0.6415, "step": 4863 }, { "epoch": 0.667352678877684, "grad_norm": 1.46875, "learning_rate": 1.772724207592302e-05, "loss": 0.6121, "step": 4864 }, { "epoch": 0.6674898813198875, "grad_norm": 1.2734375, "learning_rate": 1.7726325757351218e-05, "loss": 0.5174, "step": 4865 }, { "epoch": 0.667627083762091, "grad_norm": 1.3125, "learning_rate": 1.7725409277790743e-05, "loss": 0.5494, "step": 4866 }, { "epoch": 0.6677642862042944, "grad_norm": 1.234375, "learning_rate": 1.7724492637260692e-05, "loss": 0.5168, "step": 4867 }, { "epoch": 0.6679014886464979, "grad_norm": 1.2109375, "learning_rate": 1.7723575835780158e-05, "loss": 0.534, "step": 4868 }, { "epoch": 0.6680386910887014, "grad_norm": 1.2578125, "learning_rate": 1.772265887336825e-05, "loss": 0.5513, "step": 4869 }, { "epoch": 0.6681758935309049, "grad_norm": 1.21875, "learning_rate": 1.7721741750044072e-05, "loss": 0.5572, "step": 4870 }, { "epoch": 0.6683130959731083, "grad_norm": 1.3125, "learning_rate": 1.7720824465826732e-05, "loss": 0.5434, "step": 4871 }, { "epoch": 0.6684502984153118, "grad_norm": 1.2578125, "learning_rate": 1.771990702073534e-05, "loss": 0.5595, "step": 4872 }, { "epoch": 0.6685875008575153, "grad_norm": 1.140625, "learning_rate": 1.7718989414789026e-05, "loss": 0.5022, "step": 4873 }, { "epoch": 0.6687247032997188, "grad_norm": 1.25, "learning_rate": 1.7718071648006896e-05, "loss": 0.5769, "step": 4874 }, { "epoch": 0.6688619057419222, "grad_norm": 1.3203125, "learning_rate": 1.7717153720408076e-05, "loss": 0.5564, "step": 4875 }, { "epoch": 0.6689991081841257, "grad_norm": 1.25, "learning_rate": 1.7716235632011693e-05, "loss": 0.5655, "step": 4876 }, { "epoch": 0.6691363106263292, "grad_norm": 1.3046875, "learning_rate": 1.7715317382836882e-05, "loss": 0.6076, "step": 4877 }, { "epoch": 0.6692735130685327, "grad_norm": 1.3125, "learning_rate": 1.7714398972902764e-05, "loss": 0.5514, "step": 4878 }, { "epoch": 0.669410715510736, "grad_norm": 1.1640625, "learning_rate": 1.771348040222849e-05, "loss": 0.5233, "step": 4879 }, { "epoch": 0.6695479179529396, "grad_norm": 1.1796875, "learning_rate": 1.7712561670833185e-05, "loss": 0.4771, "step": 4880 }, { "epoch": 0.669685120395143, "grad_norm": 1.203125, "learning_rate": 1.7711642778736e-05, "loss": 0.5305, "step": 4881 }, { "epoch": 0.6698223228373466, "grad_norm": 1.109375, "learning_rate": 1.7710723725956085e-05, "loss": 0.5064, "step": 4882 }, { "epoch": 0.6699595252795499, "grad_norm": 1.125, "learning_rate": 1.770980451251258e-05, "loss": 0.4284, "step": 4883 }, { "epoch": 0.6700967277217534, "grad_norm": 1.2109375, "learning_rate": 1.7708885138424646e-05, "loss": 0.4747, "step": 4884 }, { "epoch": 0.6702339301639569, "grad_norm": 1.1953125, "learning_rate": 1.7707965603711435e-05, "loss": 0.5348, "step": 4885 }, { "epoch": 0.6703711326061604, "grad_norm": 1.2578125, "learning_rate": 1.7707045908392114e-05, "loss": 0.4964, "step": 4886 }, { "epoch": 0.6705083350483638, "grad_norm": 1.453125, "learning_rate": 1.770612605248584e-05, "loss": 0.6278, "step": 4887 }, { "epoch": 0.6706455374905673, "grad_norm": 1.234375, "learning_rate": 1.7705206036011773e-05, "loss": 0.5681, "step": 4888 }, { "epoch": 0.6707827399327708, "grad_norm": 1.203125, "learning_rate": 1.7704285858989093e-05, "loss": 0.5451, "step": 4889 }, { "epoch": 0.6709199423749743, "grad_norm": 1.265625, "learning_rate": 1.770336552143697e-05, "loss": 0.5269, "step": 4890 }, { "epoch": 0.6710571448171777, "grad_norm": 1.1796875, "learning_rate": 1.7702445023374584e-05, "loss": 0.4649, "step": 4891 }, { "epoch": 0.6711943472593812, "grad_norm": 1.2109375, "learning_rate": 1.770152436482111e-05, "loss": 0.5499, "step": 4892 }, { "epoch": 0.6713315497015847, "grad_norm": 1.3125, "learning_rate": 1.7700603545795728e-05, "loss": 0.6131, "step": 4893 }, { "epoch": 0.6714687521437882, "grad_norm": 1.203125, "learning_rate": 1.7699682566317636e-05, "loss": 0.4892, "step": 4894 }, { "epoch": 0.6716059545859916, "grad_norm": 1.1875, "learning_rate": 1.7698761426406012e-05, "loss": 0.5278, "step": 4895 }, { "epoch": 0.6717431570281951, "grad_norm": 1.203125, "learning_rate": 1.7697840126080056e-05, "loss": 0.5249, "step": 4896 }, { "epoch": 0.6718803594703986, "grad_norm": 1.234375, "learning_rate": 1.7696918665358965e-05, "loss": 0.4953, "step": 4897 }, { "epoch": 0.6720175619126021, "grad_norm": 1.265625, "learning_rate": 1.7695997044261933e-05, "loss": 0.5738, "step": 4898 }, { "epoch": 0.6721547643548055, "grad_norm": 1.1484375, "learning_rate": 1.7695075262808168e-05, "loss": 0.5103, "step": 4899 }, { "epoch": 0.672291966797009, "grad_norm": 1.2109375, "learning_rate": 1.7694153321016877e-05, "loss": 0.5426, "step": 4900 }, { "epoch": 0.6724291692392125, "grad_norm": 1.25, "learning_rate": 1.7693231218907266e-05, "loss": 0.5747, "step": 4901 }, { "epoch": 0.672566371681416, "grad_norm": 1.265625, "learning_rate": 1.769230895649855e-05, "loss": 0.616, "step": 4902 }, { "epoch": 0.6727035741236194, "grad_norm": 1.171875, "learning_rate": 1.769138653380995e-05, "loss": 0.4618, "step": 4903 }, { "epoch": 0.6728407765658229, "grad_norm": 1.140625, "learning_rate": 1.7690463950860682e-05, "loss": 0.5217, "step": 4904 }, { "epoch": 0.6729779790080264, "grad_norm": 1.2109375, "learning_rate": 1.768954120766997e-05, "loss": 0.5102, "step": 4905 }, { "epoch": 0.6731151814502299, "grad_norm": 1.2265625, "learning_rate": 1.7688618304257038e-05, "loss": 0.5583, "step": 4906 }, { "epoch": 0.6732523838924332, "grad_norm": 1.1171875, "learning_rate": 1.7687695240641118e-05, "loss": 0.4492, "step": 4907 }, { "epoch": 0.6733895863346367, "grad_norm": 1.21875, "learning_rate": 1.7686772016841446e-05, "loss": 0.529, "step": 4908 }, { "epoch": 0.6735267887768402, "grad_norm": 1.1640625, "learning_rate": 1.7685848632877255e-05, "loss": 0.4937, "step": 4909 }, { "epoch": 0.6736639912190437, "grad_norm": 1.1796875, "learning_rate": 1.7684925088767785e-05, "loss": 0.5016, "step": 4910 }, { "epoch": 0.6738011936612471, "grad_norm": 1.1796875, "learning_rate": 1.7684001384532282e-05, "loss": 0.564, "step": 4911 }, { "epoch": 0.6739383961034506, "grad_norm": 1.15625, "learning_rate": 1.768307752018999e-05, "loss": 0.5031, "step": 4912 }, { "epoch": 0.6740755985456541, "grad_norm": 1.1875, "learning_rate": 1.768215349576016e-05, "loss": 0.5102, "step": 4913 }, { "epoch": 0.6742128009878576, "grad_norm": 1.1015625, "learning_rate": 1.768122931126205e-05, "loss": 0.4633, "step": 4914 }, { "epoch": 0.674350003430061, "grad_norm": 1.1796875, "learning_rate": 1.7680304966714906e-05, "loss": 0.5409, "step": 4915 }, { "epoch": 0.6744872058722645, "grad_norm": 1.1796875, "learning_rate": 1.7679380462137995e-05, "loss": 0.5028, "step": 4916 }, { "epoch": 0.674624408314468, "grad_norm": 1.1953125, "learning_rate": 1.767845579755058e-05, "loss": 0.4998, "step": 4917 }, { "epoch": 0.6747616107566715, "grad_norm": 1.2578125, "learning_rate": 1.767753097297193e-05, "loss": 0.5261, "step": 4918 }, { "epoch": 0.6748988131988749, "grad_norm": 1.3046875, "learning_rate": 1.767660598842131e-05, "loss": 0.555, "step": 4919 }, { "epoch": 0.6750360156410784, "grad_norm": 1.1640625, "learning_rate": 1.7675680843917997e-05, "loss": 0.4832, "step": 4920 }, { "epoch": 0.6751732180832819, "grad_norm": 1.1953125, "learning_rate": 1.7674755539481263e-05, "loss": 0.4946, "step": 4921 }, { "epoch": 0.6753104205254854, "grad_norm": 1.171875, "learning_rate": 1.7673830075130395e-05, "loss": 0.4846, "step": 4922 }, { "epoch": 0.6754476229676888, "grad_norm": 1.25, "learning_rate": 1.7672904450884666e-05, "loss": 0.5751, "step": 4923 }, { "epoch": 0.6755848254098923, "grad_norm": 1.1015625, "learning_rate": 1.7671978666763373e-05, "loss": 0.5018, "step": 4924 }, { "epoch": 0.6757220278520958, "grad_norm": 1.1328125, "learning_rate": 1.7671052722785804e-05, "loss": 0.4362, "step": 4925 }, { "epoch": 0.6758592302942993, "grad_norm": 1.1640625, "learning_rate": 1.7670126618971247e-05, "loss": 0.5426, "step": 4926 }, { "epoch": 0.6759964327365027, "grad_norm": 1.1484375, "learning_rate": 1.7669200355339005e-05, "loss": 0.4978, "step": 4927 }, { "epoch": 0.6761336351787062, "grad_norm": 1.0390625, "learning_rate": 1.7668273931908374e-05, "loss": 0.4296, "step": 4928 }, { "epoch": 0.6762708376209097, "grad_norm": 1.265625, "learning_rate": 1.7667347348698658e-05, "loss": 0.5453, "step": 4929 }, { "epoch": 0.6764080400631132, "grad_norm": 1.171875, "learning_rate": 1.7666420605729163e-05, "loss": 0.4888, "step": 4930 }, { "epoch": 0.6765452425053166, "grad_norm": 1.265625, "learning_rate": 1.7665493703019202e-05, "loss": 0.5522, "step": 4931 }, { "epoch": 0.6766824449475201, "grad_norm": 1.1328125, "learning_rate": 1.7664566640588084e-05, "loss": 0.4969, "step": 4932 }, { "epoch": 0.6768196473897236, "grad_norm": 1.203125, "learning_rate": 1.7663639418455128e-05, "loss": 0.4977, "step": 4933 }, { "epoch": 0.676956849831927, "grad_norm": 1.3515625, "learning_rate": 1.7662712036639656e-05, "loss": 0.5916, "step": 4934 }, { "epoch": 0.6770940522741304, "grad_norm": 1.1796875, "learning_rate": 1.766178449516099e-05, "loss": 0.535, "step": 4935 }, { "epoch": 0.6772312547163339, "grad_norm": 1.109375, "learning_rate": 1.766085679403845e-05, "loss": 0.4546, "step": 4936 }, { "epoch": 0.6773684571585374, "grad_norm": 1.125, "learning_rate": 1.7659928933291378e-05, "loss": 0.4692, "step": 4937 }, { "epoch": 0.6775056596007409, "grad_norm": 1.2578125, "learning_rate": 1.76590009129391e-05, "loss": 0.5937, "step": 4938 }, { "epoch": 0.6776428620429443, "grad_norm": 1.2109375, "learning_rate": 1.765807273300095e-05, "loss": 0.5307, "step": 4939 }, { "epoch": 0.6777800644851478, "grad_norm": 1.1328125, "learning_rate": 1.7657144393496275e-05, "loss": 0.4873, "step": 4940 }, { "epoch": 0.6779172669273513, "grad_norm": 1.1953125, "learning_rate": 1.7656215894444414e-05, "loss": 0.4985, "step": 4941 }, { "epoch": 0.6780544693695548, "grad_norm": 1.2578125, "learning_rate": 1.765528723586471e-05, "loss": 0.5108, "step": 4942 }, { "epoch": 0.6781916718117582, "grad_norm": 1.0703125, "learning_rate": 1.765435841777652e-05, "loss": 0.4685, "step": 4943 }, { "epoch": 0.6783288742539617, "grad_norm": 1.2578125, "learning_rate": 1.7653429440199196e-05, "loss": 0.5373, "step": 4944 }, { "epoch": 0.6784660766961652, "grad_norm": 1.2734375, "learning_rate": 1.765250030315209e-05, "loss": 0.587, "step": 4945 }, { "epoch": 0.6786032791383687, "grad_norm": 1.3046875, "learning_rate": 1.7651571006654568e-05, "loss": 0.5631, "step": 4946 }, { "epoch": 0.6787404815805721, "grad_norm": 1.2421875, "learning_rate": 1.7650641550725988e-05, "loss": 0.4996, "step": 4947 }, { "epoch": 0.6788776840227756, "grad_norm": 1.2421875, "learning_rate": 1.764971193538572e-05, "loss": 0.5232, "step": 4948 }, { "epoch": 0.6790148864649791, "grad_norm": 1.203125, "learning_rate": 1.7648782160653128e-05, "loss": 0.5093, "step": 4949 }, { "epoch": 0.6791520889071826, "grad_norm": 1.34375, "learning_rate": 1.7647852226547594e-05, "loss": 0.679, "step": 4950 }, { "epoch": 0.679289291349386, "grad_norm": 1.28125, "learning_rate": 1.7646922133088485e-05, "loss": 0.5644, "step": 4951 }, { "epoch": 0.6794264937915895, "grad_norm": 1.2109375, "learning_rate": 1.7645991880295188e-05, "loss": 0.5388, "step": 4952 }, { "epoch": 0.679563696233793, "grad_norm": 1.1875, "learning_rate": 1.7645061468187082e-05, "loss": 0.5137, "step": 4953 }, { "epoch": 0.6797008986759965, "grad_norm": 1.1953125, "learning_rate": 1.764413089678356e-05, "loss": 0.5263, "step": 4954 }, { "epoch": 0.6798381011181999, "grad_norm": 1.25, "learning_rate": 1.7643200166104005e-05, "loss": 0.5418, "step": 4955 }, { "epoch": 0.6799753035604034, "grad_norm": 1.171875, "learning_rate": 1.764226927616781e-05, "loss": 0.4603, "step": 4956 }, { "epoch": 0.6801125060026069, "grad_norm": 1.1640625, "learning_rate": 1.7641338226994375e-05, "loss": 0.5254, "step": 4957 }, { "epoch": 0.6802497084448104, "grad_norm": 1.203125, "learning_rate": 1.7640407018603097e-05, "loss": 0.5759, "step": 4958 }, { "epoch": 0.6803869108870138, "grad_norm": 1.15625, "learning_rate": 1.7639475651013376e-05, "loss": 0.5021, "step": 4959 }, { "epoch": 0.6805241133292173, "grad_norm": 1.09375, "learning_rate": 1.763854412424463e-05, "loss": 0.3979, "step": 4960 }, { "epoch": 0.6806613157714207, "grad_norm": 1.15625, "learning_rate": 1.7637612438316255e-05, "loss": 0.5236, "step": 4961 }, { "epoch": 0.6807985182136242, "grad_norm": 1.1328125, "learning_rate": 1.7636680593247675e-05, "loss": 0.4855, "step": 4962 }, { "epoch": 0.6809357206558276, "grad_norm": 1.3203125, "learning_rate": 1.7635748589058298e-05, "loss": 0.5747, "step": 4963 }, { "epoch": 0.6810729230980311, "grad_norm": 1.2265625, "learning_rate": 1.7634816425767548e-05, "loss": 0.4858, "step": 4964 }, { "epoch": 0.6812101255402346, "grad_norm": 1.3125, "learning_rate": 1.7633884103394842e-05, "loss": 0.5379, "step": 4965 }, { "epoch": 0.6813473279824381, "grad_norm": 1.1640625, "learning_rate": 1.7632951621959618e-05, "loss": 0.4621, "step": 4966 }, { "epoch": 0.6814845304246415, "grad_norm": 1.203125, "learning_rate": 1.7632018981481293e-05, "loss": 0.473, "step": 4967 }, { "epoch": 0.681621732866845, "grad_norm": 1.2578125, "learning_rate": 1.7631086181979313e-05, "loss": 0.5521, "step": 4968 }, { "epoch": 0.6817589353090485, "grad_norm": 1.1328125, "learning_rate": 1.7630153223473096e-05, "loss": 0.4687, "step": 4969 }, { "epoch": 0.681896137751252, "grad_norm": 1.28125, "learning_rate": 1.76292201059821e-05, "loss": 0.5427, "step": 4970 }, { "epoch": 0.6820333401934554, "grad_norm": 1.0859375, "learning_rate": 1.7628286829525756e-05, "loss": 0.4722, "step": 4971 }, { "epoch": 0.6821705426356589, "grad_norm": 1.296875, "learning_rate": 1.7627353394123514e-05, "loss": 0.528, "step": 4972 }, { "epoch": 0.6823077450778624, "grad_norm": 1.2109375, "learning_rate": 1.7626419799794824e-05, "loss": 0.5442, "step": 4973 }, { "epoch": 0.6824449475200659, "grad_norm": 1.25, "learning_rate": 1.762548604655914e-05, "loss": 0.5318, "step": 4974 }, { "epoch": 0.6825821499622693, "grad_norm": 1.1953125, "learning_rate": 1.7624552134435912e-05, "loss": 0.5918, "step": 4975 }, { "epoch": 0.6827193524044728, "grad_norm": 1.25, "learning_rate": 1.762361806344461e-05, "loss": 0.6226, "step": 4976 }, { "epoch": 0.6828565548466763, "grad_norm": 1.2109375, "learning_rate": 1.7622683833604683e-05, "loss": 0.5338, "step": 4977 }, { "epoch": 0.6829937572888798, "grad_norm": 1.1796875, "learning_rate": 1.7621749444935608e-05, "loss": 0.5218, "step": 4978 }, { "epoch": 0.6831309597310832, "grad_norm": 1.109375, "learning_rate": 1.762081489745685e-05, "loss": 0.4381, "step": 4979 }, { "epoch": 0.6832681621732867, "grad_norm": 1.765625, "learning_rate": 1.7619880191187878e-05, "loss": 0.4907, "step": 4980 }, { "epoch": 0.6834053646154902, "grad_norm": 1.34375, "learning_rate": 1.7618945326148174e-05, "loss": 0.5748, "step": 4981 }, { "epoch": 0.6835425670576937, "grad_norm": 1.1484375, "learning_rate": 1.7618010302357217e-05, "loss": 0.513, "step": 4982 }, { "epoch": 0.6836797694998971, "grad_norm": 1.2890625, "learning_rate": 1.7617075119834487e-05, "loss": 0.5786, "step": 4983 }, { "epoch": 0.6838169719421006, "grad_norm": 1.25, "learning_rate": 1.761613977859947e-05, "loss": 0.5276, "step": 4984 }, { "epoch": 0.6839541743843041, "grad_norm": 1.078125, "learning_rate": 1.7615204278671652e-05, "loss": 0.4099, "step": 4985 }, { "epoch": 0.6840913768265076, "grad_norm": 1.203125, "learning_rate": 1.7614268620070534e-05, "loss": 0.5051, "step": 4986 }, { "epoch": 0.684228579268711, "grad_norm": 1.203125, "learning_rate": 1.7613332802815605e-05, "loss": 0.5373, "step": 4987 }, { "epoch": 0.6843657817109144, "grad_norm": 1.125, "learning_rate": 1.7612396826926367e-05, "loss": 0.4522, "step": 4988 }, { "epoch": 0.6845029841531179, "grad_norm": 1.2734375, "learning_rate": 1.761146069242232e-05, "loss": 0.4801, "step": 4989 }, { "epoch": 0.6846401865953214, "grad_norm": 1.1953125, "learning_rate": 1.761052439932297e-05, "loss": 0.5169, "step": 4990 }, { "epoch": 0.6847773890375248, "grad_norm": 1.1953125, "learning_rate": 1.7609587947647826e-05, "loss": 0.5031, "step": 4991 }, { "epoch": 0.6849145914797283, "grad_norm": 1.03125, "learning_rate": 1.76086513374164e-05, "loss": 0.4258, "step": 4992 }, { "epoch": 0.6850517939219318, "grad_norm": 1.25, "learning_rate": 1.7607714568648213e-05, "loss": 0.5245, "step": 4993 }, { "epoch": 0.6851889963641353, "grad_norm": 1.09375, "learning_rate": 1.7606777641362776e-05, "loss": 0.4692, "step": 4994 }, { "epoch": 0.6853261988063387, "grad_norm": 1.265625, "learning_rate": 1.7605840555579613e-05, "loss": 0.5158, "step": 4995 }, { "epoch": 0.6854634012485422, "grad_norm": 1.15625, "learning_rate": 1.7604903311318255e-05, "loss": 0.4744, "step": 4996 }, { "epoch": 0.6856006036907457, "grad_norm": 1.3515625, "learning_rate": 1.7603965908598223e-05, "loss": 0.4918, "step": 4997 }, { "epoch": 0.6857378061329492, "grad_norm": 1.234375, "learning_rate": 1.7603028347439055e-05, "loss": 0.5409, "step": 4998 }, { "epoch": 0.6858750085751526, "grad_norm": 1.171875, "learning_rate": 1.7602090627860287e-05, "loss": 0.5245, "step": 4999 }, { "epoch": 0.6860122110173561, "grad_norm": 1.2578125, "learning_rate": 1.7601152749881452e-05, "loss": 0.506, "step": 5000 }, { "epoch": 0.6861494134595596, "grad_norm": 1.21875, "learning_rate": 1.7600214713522096e-05, "loss": 0.5345, "step": 5001 }, { "epoch": 0.6862866159017631, "grad_norm": 1.171875, "learning_rate": 1.759927651880176e-05, "loss": 0.5353, "step": 5002 }, { "epoch": 0.6864238183439665, "grad_norm": 1.2734375, "learning_rate": 1.7598338165739995e-05, "loss": 0.5869, "step": 5003 }, { "epoch": 0.68656102078617, "grad_norm": 1.15625, "learning_rate": 1.759739965435636e-05, "loss": 0.4242, "step": 5004 }, { "epoch": 0.6866982232283735, "grad_norm": 1.0546875, "learning_rate": 1.75964609846704e-05, "loss": 0.4356, "step": 5005 }, { "epoch": 0.686835425670577, "grad_norm": 1.3515625, "learning_rate": 1.759552215670168e-05, "loss": 0.5789, "step": 5006 }, { "epoch": 0.6869726281127804, "grad_norm": 1.0859375, "learning_rate": 1.7594583170469754e-05, "loss": 0.5034, "step": 5007 }, { "epoch": 0.6871098305549839, "grad_norm": 1.3359375, "learning_rate": 1.7593644025994195e-05, "loss": 0.5458, "step": 5008 }, { "epoch": 0.6872470329971874, "grad_norm": 1.109375, "learning_rate": 1.759270472329457e-05, "loss": 0.4868, "step": 5009 }, { "epoch": 0.6873842354393909, "grad_norm": 1.1796875, "learning_rate": 1.759176526239045e-05, "loss": 0.5259, "step": 5010 }, { "epoch": 0.6875214378815943, "grad_norm": 1.171875, "learning_rate": 1.7590825643301404e-05, "loss": 0.5267, "step": 5011 }, { "epoch": 0.6876586403237978, "grad_norm": 1.1328125, "learning_rate": 1.758988586604702e-05, "loss": 0.4834, "step": 5012 }, { "epoch": 0.6877958427660013, "grad_norm": 1.1875, "learning_rate": 1.7588945930646873e-05, "loss": 0.4707, "step": 5013 }, { "epoch": 0.6879330452082048, "grad_norm": 1.15625, "learning_rate": 1.7588005837120553e-05, "loss": 0.4631, "step": 5014 }, { "epoch": 0.6880702476504081, "grad_norm": 1.28125, "learning_rate": 1.758706558548764e-05, "loss": 0.5799, "step": 5015 }, { "epoch": 0.6882074500926116, "grad_norm": 1.265625, "learning_rate": 1.7586125175767735e-05, "loss": 0.5483, "step": 5016 }, { "epoch": 0.6883446525348151, "grad_norm": 1.1796875, "learning_rate": 1.7585184607980424e-05, "loss": 0.5185, "step": 5017 }, { "epoch": 0.6884818549770186, "grad_norm": 1.1171875, "learning_rate": 1.758424388214531e-05, "loss": 0.4541, "step": 5018 }, { "epoch": 0.688619057419222, "grad_norm": 1.3046875, "learning_rate": 1.7583302998281997e-05, "loss": 0.5831, "step": 5019 }, { "epoch": 0.6887562598614255, "grad_norm": 1.21875, "learning_rate": 1.7582361956410084e-05, "loss": 0.5933, "step": 5020 }, { "epoch": 0.688893462303629, "grad_norm": 1.2265625, "learning_rate": 1.7581420756549183e-05, "loss": 0.5532, "step": 5021 }, { "epoch": 0.6890306647458325, "grad_norm": 1.2421875, "learning_rate": 1.75804793987189e-05, "loss": 0.5296, "step": 5022 }, { "epoch": 0.6891678671880359, "grad_norm": 1.234375, "learning_rate": 1.7579537882938855e-05, "loss": 0.5228, "step": 5023 }, { "epoch": 0.6893050696302394, "grad_norm": 1.1796875, "learning_rate": 1.757859620922866e-05, "loss": 0.5203, "step": 5024 }, { "epoch": 0.6894422720724429, "grad_norm": 1.1796875, "learning_rate": 1.7577654377607945e-05, "loss": 0.5461, "step": 5025 }, { "epoch": 0.6895794745146464, "grad_norm": 1.234375, "learning_rate": 1.7576712388096327e-05, "loss": 0.5747, "step": 5026 }, { "epoch": 0.6897166769568498, "grad_norm": 1.1484375, "learning_rate": 1.7575770240713435e-05, "loss": 0.486, "step": 5027 }, { "epoch": 0.6898538793990533, "grad_norm": 1.15625, "learning_rate": 1.7574827935478898e-05, "loss": 0.536, "step": 5028 }, { "epoch": 0.6899910818412568, "grad_norm": 1.2265625, "learning_rate": 1.757388547241236e-05, "loss": 0.5323, "step": 5029 }, { "epoch": 0.6901282842834603, "grad_norm": 1.1875, "learning_rate": 1.7572942851533444e-05, "loss": 0.54, "step": 5030 }, { "epoch": 0.6902654867256637, "grad_norm": 1.2421875, "learning_rate": 1.7572000072861802e-05, "loss": 0.5429, "step": 5031 }, { "epoch": 0.6904026891678672, "grad_norm": 1.2109375, "learning_rate": 1.7571057136417073e-05, "loss": 0.5468, "step": 5032 }, { "epoch": 0.6905398916100707, "grad_norm": 1.25, "learning_rate": 1.7570114042218905e-05, "loss": 0.6026, "step": 5033 }, { "epoch": 0.6906770940522742, "grad_norm": 1.25, "learning_rate": 1.756917079028695e-05, "loss": 0.5595, "step": 5034 }, { "epoch": 0.6908142964944776, "grad_norm": 1.1875, "learning_rate": 1.7568227380640865e-05, "loss": 0.5332, "step": 5035 }, { "epoch": 0.6909514989366811, "grad_norm": 1.1484375, "learning_rate": 1.7567283813300298e-05, "loss": 0.4884, "step": 5036 }, { "epoch": 0.6910887013788846, "grad_norm": 1.09375, "learning_rate": 1.7566340088284917e-05, "loss": 0.4915, "step": 5037 }, { "epoch": 0.6912259038210881, "grad_norm": 1.2109375, "learning_rate": 1.7565396205614384e-05, "loss": 0.5469, "step": 5038 }, { "epoch": 0.6913631062632914, "grad_norm": 1.25, "learning_rate": 1.7564452165308366e-05, "loss": 0.5792, "step": 5039 }, { "epoch": 0.691500308705495, "grad_norm": 1.171875, "learning_rate": 1.7563507967386532e-05, "loss": 0.5024, "step": 5040 }, { "epoch": 0.6916375111476984, "grad_norm": 1.1015625, "learning_rate": 1.7562563611868557e-05, "loss": 0.4984, "step": 5041 }, { "epoch": 0.691774713589902, "grad_norm": 1.125, "learning_rate": 1.7561619098774118e-05, "loss": 0.4307, "step": 5042 }, { "epoch": 0.6919119160321053, "grad_norm": 1.1875, "learning_rate": 1.7560674428122898e-05, "loss": 0.4697, "step": 5043 }, { "epoch": 0.6920491184743088, "grad_norm": 1.0859375, "learning_rate": 1.7559729599934575e-05, "loss": 0.4762, "step": 5044 }, { "epoch": 0.6921863209165123, "grad_norm": 1.1796875, "learning_rate": 1.755878461422884e-05, "loss": 0.522, "step": 5045 }, { "epoch": 0.6923235233587158, "grad_norm": 1.09375, "learning_rate": 1.7557839471025383e-05, "loss": 0.3752, "step": 5046 }, { "epoch": 0.6924607258009192, "grad_norm": 1.1328125, "learning_rate": 1.755689417034389e-05, "loss": 0.4971, "step": 5047 }, { "epoch": 0.6925979282431227, "grad_norm": 1.171875, "learning_rate": 1.755594871220407e-05, "loss": 0.5543, "step": 5048 }, { "epoch": 0.6927351306853262, "grad_norm": 1.4296875, "learning_rate": 1.7555003096625614e-05, "loss": 0.551, "step": 5049 }, { "epoch": 0.6928723331275297, "grad_norm": 1.2265625, "learning_rate": 1.7554057323628227e-05, "loss": 0.5349, "step": 5050 }, { "epoch": 0.6930095355697331, "grad_norm": 1.1640625, "learning_rate": 1.755311139323162e-05, "loss": 0.4688, "step": 5051 }, { "epoch": 0.6931467380119366, "grad_norm": 1.109375, "learning_rate": 1.7552165305455492e-05, "loss": 0.4415, "step": 5052 }, { "epoch": 0.6932839404541401, "grad_norm": 1.1484375, "learning_rate": 1.755121906031957e-05, "loss": 0.4638, "step": 5053 }, { "epoch": 0.6934211428963436, "grad_norm": 1.2421875, "learning_rate": 1.7550272657843558e-05, "loss": 0.5046, "step": 5054 }, { "epoch": 0.693558345338547, "grad_norm": 1.0625, "learning_rate": 1.7549326098047183e-05, "loss": 0.4538, "step": 5055 }, { "epoch": 0.6936955477807505, "grad_norm": 1.265625, "learning_rate": 1.7548379380950167e-05, "loss": 0.5183, "step": 5056 }, { "epoch": 0.693832750222954, "grad_norm": 1.140625, "learning_rate": 1.7547432506572237e-05, "loss": 0.5248, "step": 5057 }, { "epoch": 0.6939699526651575, "grad_norm": 1.1484375, "learning_rate": 1.7546485474933116e-05, "loss": 0.4676, "step": 5058 }, { "epoch": 0.6941071551073609, "grad_norm": 1.09375, "learning_rate": 1.754553828605254e-05, "loss": 0.4715, "step": 5059 }, { "epoch": 0.6942443575495644, "grad_norm": 1.2265625, "learning_rate": 1.754459093995025e-05, "loss": 0.4799, "step": 5060 }, { "epoch": 0.6943815599917679, "grad_norm": 1.1328125, "learning_rate": 1.754364343664598e-05, "loss": 0.4917, "step": 5061 }, { "epoch": 0.6945187624339714, "grad_norm": 1.234375, "learning_rate": 1.7542695776159475e-05, "loss": 0.5539, "step": 5062 }, { "epoch": 0.6946559648761748, "grad_norm": 1.09375, "learning_rate": 1.7541747958510476e-05, "loss": 0.475, "step": 5063 }, { "epoch": 0.6947931673183783, "grad_norm": 1.1796875, "learning_rate": 1.7540799983718742e-05, "loss": 0.5323, "step": 5064 }, { "epoch": 0.6949303697605818, "grad_norm": 1.25, "learning_rate": 1.7539851851804017e-05, "loss": 0.5547, "step": 5065 }, { "epoch": 0.6950675722027853, "grad_norm": 1.2109375, "learning_rate": 1.753890356278606e-05, "loss": 0.5289, "step": 5066 }, { "epoch": 0.6952047746449886, "grad_norm": 1.203125, "learning_rate": 1.7537955116684624e-05, "loss": 0.4474, "step": 5067 }, { "epoch": 0.6953419770871921, "grad_norm": 1.1875, "learning_rate": 1.753700651351948e-05, "loss": 0.5244, "step": 5068 }, { "epoch": 0.6954791795293956, "grad_norm": 1.2890625, "learning_rate": 1.7536057753310387e-05, "loss": 0.5457, "step": 5069 }, { "epoch": 0.6956163819715991, "grad_norm": 1.2578125, "learning_rate": 1.7535108836077118e-05, "loss": 0.5448, "step": 5070 }, { "epoch": 0.6957535844138025, "grad_norm": 1.2890625, "learning_rate": 1.753415976183944e-05, "loss": 0.6019, "step": 5071 }, { "epoch": 0.695890786856006, "grad_norm": 1.21875, "learning_rate": 1.7533210530617136e-05, "loss": 0.5274, "step": 5072 }, { "epoch": 0.6960279892982095, "grad_norm": 1.2265625, "learning_rate": 1.7532261142429977e-05, "loss": 0.5354, "step": 5073 }, { "epoch": 0.696165191740413, "grad_norm": 1.2265625, "learning_rate": 1.7531311597297747e-05, "loss": 0.5856, "step": 5074 }, { "epoch": 0.6963023941826164, "grad_norm": 1.234375, "learning_rate": 1.7530361895240236e-05, "loss": 0.539, "step": 5075 }, { "epoch": 0.6964395966248199, "grad_norm": 1.15625, "learning_rate": 1.7529412036277223e-05, "loss": 0.5004, "step": 5076 }, { "epoch": 0.6965767990670234, "grad_norm": 1.140625, "learning_rate": 1.752846202042851e-05, "loss": 0.5224, "step": 5077 }, { "epoch": 0.6967140015092269, "grad_norm": 1.15625, "learning_rate": 1.752751184771388e-05, "loss": 0.4867, "step": 5078 }, { "epoch": 0.6968512039514303, "grad_norm": 1.203125, "learning_rate": 1.7526561518153147e-05, "loss": 0.5064, "step": 5079 }, { "epoch": 0.6969884063936338, "grad_norm": 1.0703125, "learning_rate": 1.7525611031766098e-05, "loss": 0.4314, "step": 5080 }, { "epoch": 0.6971256088358373, "grad_norm": 1.2109375, "learning_rate": 1.7524660388572542e-05, "loss": 0.5233, "step": 5081 }, { "epoch": 0.6972628112780408, "grad_norm": 1.109375, "learning_rate": 1.752370958859229e-05, "loss": 0.4652, "step": 5082 }, { "epoch": 0.6974000137202442, "grad_norm": 1.1171875, "learning_rate": 1.7522758631845148e-05, "loss": 0.4838, "step": 5083 }, { "epoch": 0.6975372161624477, "grad_norm": 1.15625, "learning_rate": 1.752180751835094e-05, "loss": 0.4632, "step": 5084 }, { "epoch": 0.6976744186046512, "grad_norm": 1.2890625, "learning_rate": 1.7520856248129472e-05, "loss": 0.5215, "step": 5085 }, { "epoch": 0.6978116210468547, "grad_norm": 1.3515625, "learning_rate": 1.751990482120057e-05, "loss": 0.5675, "step": 5086 }, { "epoch": 0.6979488234890581, "grad_norm": 1.1953125, "learning_rate": 1.7518953237584062e-05, "loss": 0.4919, "step": 5087 }, { "epoch": 0.6980860259312616, "grad_norm": 1.21875, "learning_rate": 1.7518001497299772e-05, "loss": 0.5459, "step": 5088 }, { "epoch": 0.6982232283734651, "grad_norm": 1.234375, "learning_rate": 1.751704960036753e-05, "loss": 0.5312, "step": 5089 }, { "epoch": 0.6983604308156686, "grad_norm": 1.140625, "learning_rate": 1.7516097546807173e-05, "loss": 0.5244, "step": 5090 }, { "epoch": 0.698497633257872, "grad_norm": 1.2265625, "learning_rate": 1.7515145336638534e-05, "loss": 0.5592, "step": 5091 }, { "epoch": 0.6986348357000755, "grad_norm": 1.2265625, "learning_rate": 1.751419296988146e-05, "loss": 0.5513, "step": 5092 }, { "epoch": 0.698772038142279, "grad_norm": 1.0859375, "learning_rate": 1.751324044655579e-05, "loss": 0.4083, "step": 5093 }, { "epoch": 0.6989092405844824, "grad_norm": 0.9609375, "learning_rate": 1.751228776668137e-05, "loss": 0.3709, "step": 5094 }, { "epoch": 0.6990464430266858, "grad_norm": 1.3046875, "learning_rate": 1.7511334930278054e-05, "loss": 0.556, "step": 5095 }, { "epoch": 0.6991836454688893, "grad_norm": 1.2734375, "learning_rate": 1.7510381937365693e-05, "loss": 0.5422, "step": 5096 }, { "epoch": 0.6993208479110928, "grad_norm": 1.15625, "learning_rate": 1.7509428787964148e-05, "loss": 0.491, "step": 5097 }, { "epoch": 0.6994580503532963, "grad_norm": 1.1953125, "learning_rate": 1.7508475482093273e-05, "loss": 0.5037, "step": 5098 }, { "epoch": 0.6995952527954997, "grad_norm": 1.328125, "learning_rate": 1.750752201977294e-05, "loss": 0.5892, "step": 5099 }, { "epoch": 0.6997324552377032, "grad_norm": 1.1171875, "learning_rate": 1.7506568401023004e-05, "loss": 0.4975, "step": 5100 }, { "epoch": 0.6998696576799067, "grad_norm": 1.2265625, "learning_rate": 1.7505614625863347e-05, "loss": 0.4912, "step": 5101 }, { "epoch": 0.7000068601221102, "grad_norm": 1.3046875, "learning_rate": 1.7504660694313834e-05, "loss": 0.5205, "step": 5102 }, { "epoch": 0.7001440625643136, "grad_norm": 1.046875, "learning_rate": 1.7503706606394342e-05, "loss": 0.421, "step": 5103 }, { "epoch": 0.7002812650065171, "grad_norm": 1.21875, "learning_rate": 1.750275236212476e-05, "loss": 0.5569, "step": 5104 }, { "epoch": 0.7004184674487206, "grad_norm": 1.09375, "learning_rate": 1.750179796152496e-05, "loss": 0.4172, "step": 5105 }, { "epoch": 0.7005556698909241, "grad_norm": 1.1328125, "learning_rate": 1.750084340461483e-05, "loss": 0.4429, "step": 5106 }, { "epoch": 0.7006928723331275, "grad_norm": 1.1015625, "learning_rate": 1.7499888691414264e-05, "loss": 0.4754, "step": 5107 }, { "epoch": 0.700830074775331, "grad_norm": 1.3671875, "learning_rate": 1.749893382194315e-05, "loss": 0.5875, "step": 5108 }, { "epoch": 0.7009672772175345, "grad_norm": 1.328125, "learning_rate": 1.749797879622139e-05, "loss": 0.5419, "step": 5109 }, { "epoch": 0.701104479659738, "grad_norm": 1.15625, "learning_rate": 1.749702361426888e-05, "loss": 0.4946, "step": 5110 }, { "epoch": 0.7012416821019414, "grad_norm": 1.234375, "learning_rate": 1.7496068276105523e-05, "loss": 0.5249, "step": 5111 }, { "epoch": 0.7013788845441449, "grad_norm": 1.2578125, "learning_rate": 1.749511278175122e-05, "loss": 0.5914, "step": 5112 }, { "epoch": 0.7015160869863484, "grad_norm": 1.1953125, "learning_rate": 1.7494157131225887e-05, "loss": 0.5256, "step": 5113 }, { "epoch": 0.7016532894285519, "grad_norm": 1.2265625, "learning_rate": 1.7493201324549432e-05, "loss": 0.5052, "step": 5114 }, { "epoch": 0.7017904918707553, "grad_norm": 1.2890625, "learning_rate": 1.7492245361741776e-05, "loss": 0.5778, "step": 5115 }, { "epoch": 0.7019276943129588, "grad_norm": 1.2734375, "learning_rate": 1.7491289242822828e-05, "loss": 0.5219, "step": 5116 }, { "epoch": 0.7020648967551623, "grad_norm": 1.3203125, "learning_rate": 1.749033296781252e-05, "loss": 0.5751, "step": 5117 }, { "epoch": 0.7022020991973658, "grad_norm": 1.0859375, "learning_rate": 1.7489376536730772e-05, "loss": 0.4084, "step": 5118 }, { "epoch": 0.7023393016395691, "grad_norm": 1.2734375, "learning_rate": 1.7488419949597513e-05, "loss": 0.5507, "step": 5119 }, { "epoch": 0.7024765040817726, "grad_norm": 1.1796875, "learning_rate": 1.7487463206432677e-05, "loss": 0.5435, "step": 5120 }, { "epoch": 0.7026137065239761, "grad_norm": 1.375, "learning_rate": 1.7486506307256196e-05, "loss": 0.6824, "step": 5121 }, { "epoch": 0.7027509089661796, "grad_norm": 1.1796875, "learning_rate": 1.748554925208801e-05, "loss": 0.5297, "step": 5122 }, { "epoch": 0.702888111408383, "grad_norm": 1.171875, "learning_rate": 1.7484592040948066e-05, "loss": 0.4212, "step": 5123 }, { "epoch": 0.7030253138505865, "grad_norm": 1.1875, "learning_rate": 1.7483634673856298e-05, "loss": 0.4873, "step": 5124 }, { "epoch": 0.70316251629279, "grad_norm": 1.109375, "learning_rate": 1.748267715083266e-05, "loss": 0.4936, "step": 5125 }, { "epoch": 0.7032997187349935, "grad_norm": 1.15625, "learning_rate": 1.7481719471897106e-05, "loss": 0.4628, "step": 5126 }, { "epoch": 0.7034369211771969, "grad_norm": 1.25, "learning_rate": 1.748076163706958e-05, "loss": 0.4645, "step": 5127 }, { "epoch": 0.7035741236194004, "grad_norm": 1.1484375, "learning_rate": 1.7479803646370052e-05, "loss": 0.5113, "step": 5128 }, { "epoch": 0.7037113260616039, "grad_norm": 1.2109375, "learning_rate": 1.747884549981848e-05, "loss": 0.5591, "step": 5129 }, { "epoch": 0.7038485285038074, "grad_norm": 1.2109375, "learning_rate": 1.7477887197434824e-05, "loss": 0.5072, "step": 5130 }, { "epoch": 0.7039857309460108, "grad_norm": 1.140625, "learning_rate": 1.7476928739239055e-05, "loss": 0.4592, "step": 5131 }, { "epoch": 0.7041229333882143, "grad_norm": 1.2265625, "learning_rate": 1.7475970125251143e-05, "loss": 0.5435, "step": 5132 }, { "epoch": 0.7042601358304178, "grad_norm": 1.1875, "learning_rate": 1.747501135549106e-05, "loss": 0.4919, "step": 5133 }, { "epoch": 0.7043973382726213, "grad_norm": 1.1328125, "learning_rate": 1.7474052429978787e-05, "loss": 0.4788, "step": 5134 }, { "epoch": 0.7045345407148247, "grad_norm": 1.21875, "learning_rate": 1.74730933487343e-05, "loss": 0.5403, "step": 5135 }, { "epoch": 0.7046717431570282, "grad_norm": 1.0859375, "learning_rate": 1.747213411177759e-05, "loss": 0.456, "step": 5136 }, { "epoch": 0.7048089455992317, "grad_norm": 1.21875, "learning_rate": 1.747117471912864e-05, "loss": 0.509, "step": 5137 }, { "epoch": 0.7049461480414352, "grad_norm": 1.2265625, "learning_rate": 1.7470215170807435e-05, "loss": 0.5651, "step": 5138 }, { "epoch": 0.7050833504836386, "grad_norm": 1.1484375, "learning_rate": 1.7469255466833975e-05, "loss": 0.4197, "step": 5139 }, { "epoch": 0.7052205529258421, "grad_norm": 1.140625, "learning_rate": 1.7468295607228257e-05, "loss": 0.4997, "step": 5140 }, { "epoch": 0.7053577553680456, "grad_norm": 1.0859375, "learning_rate": 1.7467335592010277e-05, "loss": 0.4594, "step": 5141 }, { "epoch": 0.7054949578102491, "grad_norm": 1.1796875, "learning_rate": 1.7466375421200043e-05, "loss": 0.5906, "step": 5142 }, { "epoch": 0.7056321602524525, "grad_norm": 1.140625, "learning_rate": 1.7465415094817554e-05, "loss": 0.5094, "step": 5143 }, { "epoch": 0.705769362694656, "grad_norm": 1.140625, "learning_rate": 1.7464454612882828e-05, "loss": 0.4999, "step": 5144 }, { "epoch": 0.7059065651368595, "grad_norm": 1.1953125, "learning_rate": 1.746349397541587e-05, "loss": 0.486, "step": 5145 }, { "epoch": 0.706043767579063, "grad_norm": 1.1640625, "learning_rate": 1.746253318243671e-05, "loss": 0.5053, "step": 5146 }, { "epoch": 0.7061809700212663, "grad_norm": 1.265625, "learning_rate": 1.7461572233965353e-05, "loss": 0.5348, "step": 5147 }, { "epoch": 0.7063181724634698, "grad_norm": 1.1484375, "learning_rate": 1.7460611130021824e-05, "loss": 0.524, "step": 5148 }, { "epoch": 0.7064553749056733, "grad_norm": 1.1875, "learning_rate": 1.7459649870626158e-05, "loss": 0.4893, "step": 5149 }, { "epoch": 0.7065925773478768, "grad_norm": 1.34375, "learning_rate": 1.7458688455798373e-05, "loss": 0.5811, "step": 5150 }, { "epoch": 0.7067297797900802, "grad_norm": 1.15625, "learning_rate": 1.7457726885558508e-05, "loss": 0.4545, "step": 5151 }, { "epoch": 0.7068669822322837, "grad_norm": 1.140625, "learning_rate": 1.74567651599266e-05, "loss": 0.493, "step": 5152 }, { "epoch": 0.7070041846744872, "grad_norm": 1.078125, "learning_rate": 1.745580327892268e-05, "loss": 0.4315, "step": 5153 }, { "epoch": 0.7071413871166907, "grad_norm": 1.2421875, "learning_rate": 1.7454841242566797e-05, "loss": 0.4874, "step": 5154 }, { "epoch": 0.7072785895588941, "grad_norm": 1.3125, "learning_rate": 1.7453879050878994e-05, "loss": 0.5832, "step": 5155 }, { "epoch": 0.7074157920010976, "grad_norm": 1.1796875, "learning_rate": 1.7452916703879318e-05, "loss": 0.513, "step": 5156 }, { "epoch": 0.7075529944433011, "grad_norm": 1.296875, "learning_rate": 1.7451954201587827e-05, "loss": 0.5346, "step": 5157 }, { "epoch": 0.7076901968855046, "grad_norm": 1.3046875, "learning_rate": 1.745099154402457e-05, "loss": 0.5306, "step": 5158 }, { "epoch": 0.707827399327708, "grad_norm": 1.25, "learning_rate": 1.7450028731209605e-05, "loss": 0.5304, "step": 5159 }, { "epoch": 0.7079646017699115, "grad_norm": 1.28125, "learning_rate": 1.7449065763163e-05, "loss": 0.6139, "step": 5160 }, { "epoch": 0.708101804212115, "grad_norm": 1.15625, "learning_rate": 1.7448102639904815e-05, "loss": 0.5393, "step": 5161 }, { "epoch": 0.7082390066543185, "grad_norm": 1.2578125, "learning_rate": 1.7447139361455113e-05, "loss": 0.5102, "step": 5162 }, { "epoch": 0.7083762090965219, "grad_norm": 1.2421875, "learning_rate": 1.7446175927833975e-05, "loss": 0.5023, "step": 5163 }, { "epoch": 0.7085134115387254, "grad_norm": 1.203125, "learning_rate": 1.744521233906147e-05, "loss": 0.4956, "step": 5164 }, { "epoch": 0.7086506139809289, "grad_norm": 1.0546875, "learning_rate": 1.744424859515768e-05, "loss": 0.3849, "step": 5165 }, { "epoch": 0.7087878164231324, "grad_norm": 1.1328125, "learning_rate": 1.744328469614268e-05, "loss": 0.485, "step": 5166 }, { "epoch": 0.7089250188653358, "grad_norm": 1.3203125, "learning_rate": 1.7442320642036556e-05, "loss": 0.5613, "step": 5167 }, { "epoch": 0.7090622213075393, "grad_norm": 1.1796875, "learning_rate": 1.7441356432859398e-05, "loss": 0.5207, "step": 5168 }, { "epoch": 0.7091994237497428, "grad_norm": 1.2890625, "learning_rate": 1.7440392068631293e-05, "loss": 0.5304, "step": 5169 }, { "epoch": 0.7093366261919463, "grad_norm": 1.234375, "learning_rate": 1.743942754937234e-05, "loss": 0.5444, "step": 5170 }, { "epoch": 0.7094738286341497, "grad_norm": 1.1953125, "learning_rate": 1.7438462875102632e-05, "loss": 0.546, "step": 5171 }, { "epoch": 0.7096110310763531, "grad_norm": 1.25, "learning_rate": 1.7437498045842268e-05, "loss": 0.5926, "step": 5172 }, { "epoch": 0.7097482335185566, "grad_norm": 1.171875, "learning_rate": 1.743653306161136e-05, "loss": 0.5026, "step": 5173 }, { "epoch": 0.7098854359607601, "grad_norm": 1.2890625, "learning_rate": 1.7435567922430002e-05, "loss": 0.5313, "step": 5174 }, { "epoch": 0.7100226384029635, "grad_norm": 0.9921875, "learning_rate": 1.7434602628318313e-05, "loss": 0.4088, "step": 5175 }, { "epoch": 0.710159840845167, "grad_norm": 1.2265625, "learning_rate": 1.7433637179296404e-05, "loss": 0.528, "step": 5176 }, { "epoch": 0.7102970432873705, "grad_norm": 1.2109375, "learning_rate": 1.7432671575384393e-05, "loss": 0.4781, "step": 5177 }, { "epoch": 0.710434245729574, "grad_norm": 1.1953125, "learning_rate": 1.7431705816602392e-05, "loss": 0.5351, "step": 5178 }, { "epoch": 0.7105714481717774, "grad_norm": 1.109375, "learning_rate": 1.7430739902970538e-05, "loss": 0.464, "step": 5179 }, { "epoch": 0.7107086506139809, "grad_norm": 1.1953125, "learning_rate": 1.7429773834508945e-05, "loss": 0.5368, "step": 5180 }, { "epoch": 0.7108458530561844, "grad_norm": 1.2578125, "learning_rate": 1.742880761123775e-05, "loss": 0.5949, "step": 5181 }, { "epoch": 0.7109830554983879, "grad_norm": 1.21875, "learning_rate": 1.7427841233177076e-05, "loss": 0.5782, "step": 5182 }, { "epoch": 0.7111202579405913, "grad_norm": 1.25, "learning_rate": 1.7426874700347068e-05, "loss": 0.5088, "step": 5183 }, { "epoch": 0.7112574603827948, "grad_norm": 1.171875, "learning_rate": 1.7425908012767863e-05, "loss": 0.5484, "step": 5184 }, { "epoch": 0.7113946628249983, "grad_norm": 1.125, "learning_rate": 1.74249411704596e-05, "loss": 0.4633, "step": 5185 }, { "epoch": 0.7115318652672018, "grad_norm": 1.15625, "learning_rate": 1.7423974173442427e-05, "loss": 0.5024, "step": 5186 }, { "epoch": 0.7116690677094052, "grad_norm": 1.1796875, "learning_rate": 1.7423007021736493e-05, "loss": 0.559, "step": 5187 }, { "epoch": 0.7118062701516087, "grad_norm": 1.2734375, "learning_rate": 1.742203971536195e-05, "loss": 0.5009, "step": 5188 }, { "epoch": 0.7119434725938122, "grad_norm": 1.015625, "learning_rate": 1.742107225433895e-05, "loss": 0.4141, "step": 5189 }, { "epoch": 0.7120806750360157, "grad_norm": 1.2265625, "learning_rate": 1.7420104638687657e-05, "loss": 0.5438, "step": 5190 }, { "epoch": 0.7122178774782191, "grad_norm": 1.15625, "learning_rate": 1.741913686842823e-05, "loss": 0.5441, "step": 5191 }, { "epoch": 0.7123550799204226, "grad_norm": 1.1640625, "learning_rate": 1.741816894358083e-05, "loss": 0.4482, "step": 5192 }, { "epoch": 0.7124922823626261, "grad_norm": 1.1953125, "learning_rate": 1.741720086416563e-05, "loss": 0.4977, "step": 5193 }, { "epoch": 0.7126294848048296, "grad_norm": 1.078125, "learning_rate": 1.74162326302028e-05, "loss": 0.4523, "step": 5194 }, { "epoch": 0.712766687247033, "grad_norm": 1.0703125, "learning_rate": 1.7415264241712514e-05, "loss": 0.4147, "step": 5195 }, { "epoch": 0.7129038896892365, "grad_norm": 1.21875, "learning_rate": 1.741429569871495e-05, "loss": 0.5485, "step": 5196 }, { "epoch": 0.71304109213144, "grad_norm": 1.1796875, "learning_rate": 1.741332700123029e-05, "loss": 0.5185, "step": 5197 }, { "epoch": 0.7131782945736435, "grad_norm": 1.1953125, "learning_rate": 1.7412358149278713e-05, "loss": 0.4783, "step": 5198 }, { "epoch": 0.7133154970158468, "grad_norm": 1.2421875, "learning_rate": 1.741138914288041e-05, "loss": 0.4829, "step": 5199 }, { "epoch": 0.7134526994580503, "grad_norm": 1.1484375, "learning_rate": 1.741041998205558e-05, "loss": 0.4711, "step": 5200 }, { "epoch": 0.7135899019002538, "grad_norm": 1.2421875, "learning_rate": 1.74094506668244e-05, "loss": 0.5763, "step": 5201 }, { "epoch": 0.7137271043424573, "grad_norm": 1.1328125, "learning_rate": 1.7408481197207084e-05, "loss": 0.4592, "step": 5202 }, { "epoch": 0.7138643067846607, "grad_norm": 1.203125, "learning_rate": 1.740751157322382e-05, "loss": 0.5694, "step": 5203 }, { "epoch": 0.7140015092268642, "grad_norm": 1.171875, "learning_rate": 1.7406541794894812e-05, "loss": 0.4789, "step": 5204 }, { "epoch": 0.7141387116690677, "grad_norm": 1.265625, "learning_rate": 1.7405571862240274e-05, "loss": 0.5367, "step": 5205 }, { "epoch": 0.7142759141112712, "grad_norm": 1.1171875, "learning_rate": 1.740460177528041e-05, "loss": 0.4804, "step": 5206 }, { "epoch": 0.7144131165534746, "grad_norm": 1.1015625, "learning_rate": 1.7403631534035437e-05, "loss": 0.4548, "step": 5207 }, { "epoch": 0.7145503189956781, "grad_norm": 1.1796875, "learning_rate": 1.740266113852557e-05, "loss": 0.5353, "step": 5208 }, { "epoch": 0.7146875214378816, "grad_norm": 1.125, "learning_rate": 1.7401690588771025e-05, "loss": 0.4862, "step": 5209 }, { "epoch": 0.7148247238800851, "grad_norm": 1.1015625, "learning_rate": 1.740071988479203e-05, "loss": 0.4589, "step": 5210 }, { "epoch": 0.7149619263222885, "grad_norm": 1.1328125, "learning_rate": 1.7399749026608808e-05, "loss": 0.5113, "step": 5211 }, { "epoch": 0.715099128764492, "grad_norm": 1.0859375, "learning_rate": 1.7398778014241585e-05, "loss": 0.4762, "step": 5212 }, { "epoch": 0.7152363312066955, "grad_norm": 1.171875, "learning_rate": 1.7397806847710605e-05, "loss": 0.5522, "step": 5213 }, { "epoch": 0.715373533648899, "grad_norm": 1.21875, "learning_rate": 1.7396835527036092e-05, "loss": 0.5376, "step": 5214 }, { "epoch": 0.7155107360911024, "grad_norm": 1.1640625, "learning_rate": 1.7395864052238286e-05, "loss": 0.5362, "step": 5215 }, { "epoch": 0.7156479385333059, "grad_norm": 1.171875, "learning_rate": 1.7394892423337434e-05, "loss": 0.4808, "step": 5216 }, { "epoch": 0.7157851409755094, "grad_norm": 1.1640625, "learning_rate": 1.7393920640353782e-05, "loss": 0.5048, "step": 5217 }, { "epoch": 0.7159223434177129, "grad_norm": 1.2421875, "learning_rate": 1.7392948703307572e-05, "loss": 0.5708, "step": 5218 }, { "epoch": 0.7160595458599163, "grad_norm": 1.1015625, "learning_rate": 1.7391976612219056e-05, "loss": 0.4713, "step": 5219 }, { "epoch": 0.7161967483021198, "grad_norm": 1.28125, "learning_rate": 1.7391004367108495e-05, "loss": 0.6068, "step": 5220 }, { "epoch": 0.7163339507443233, "grad_norm": 1.265625, "learning_rate": 1.7390031967996143e-05, "loss": 0.6141, "step": 5221 }, { "epoch": 0.7164711531865268, "grad_norm": 1.2265625, "learning_rate": 1.7389059414902264e-05, "loss": 0.5308, "step": 5222 }, { "epoch": 0.7166083556287302, "grad_norm": 1.265625, "learning_rate": 1.738808670784712e-05, "loss": 0.5551, "step": 5223 }, { "epoch": 0.7167455580709337, "grad_norm": 1.3046875, "learning_rate": 1.738711384685098e-05, "loss": 0.5704, "step": 5224 }, { "epoch": 0.7168827605131372, "grad_norm": 1.2890625, "learning_rate": 1.7386140831934113e-05, "loss": 0.5779, "step": 5225 }, { "epoch": 0.7170199629553407, "grad_norm": 1.171875, "learning_rate": 1.7385167663116796e-05, "loss": 0.4531, "step": 5226 }, { "epoch": 0.717157165397544, "grad_norm": 1.2578125, "learning_rate": 1.73841943404193e-05, "loss": 0.5166, "step": 5227 }, { "epoch": 0.7172943678397475, "grad_norm": 1.234375, "learning_rate": 1.7383220863861914e-05, "loss": 0.5327, "step": 5228 }, { "epoch": 0.717431570281951, "grad_norm": 1.1484375, "learning_rate": 1.738224723346492e-05, "loss": 0.4953, "step": 5229 }, { "epoch": 0.7175687727241545, "grad_norm": 1.2109375, "learning_rate": 1.7381273449248598e-05, "loss": 0.5197, "step": 5230 }, { "epoch": 0.7177059751663579, "grad_norm": 1.1953125, "learning_rate": 1.7380299511233248e-05, "loss": 0.5032, "step": 5231 }, { "epoch": 0.7178431776085614, "grad_norm": 1.1171875, "learning_rate": 1.7379325419439155e-05, "loss": 0.476, "step": 5232 }, { "epoch": 0.7179803800507649, "grad_norm": 1.3203125, "learning_rate": 1.737835117388662e-05, "loss": 0.5776, "step": 5233 }, { "epoch": 0.7181175824929684, "grad_norm": 1.2421875, "learning_rate": 1.737737677459594e-05, "loss": 0.5157, "step": 5234 }, { "epoch": 0.7182547849351718, "grad_norm": 1.2734375, "learning_rate": 1.737640222158742e-05, "loss": 0.5194, "step": 5235 }, { "epoch": 0.7183919873773753, "grad_norm": 1.2109375, "learning_rate": 1.737542751488137e-05, "loss": 0.4133, "step": 5236 }, { "epoch": 0.7185291898195788, "grad_norm": 1.234375, "learning_rate": 1.737445265449809e-05, "loss": 0.4735, "step": 5237 }, { "epoch": 0.7186663922617823, "grad_norm": 1.1875, "learning_rate": 1.73734776404579e-05, "loss": 0.4964, "step": 5238 }, { "epoch": 0.7188035947039857, "grad_norm": 1.2109375, "learning_rate": 1.7372502472781115e-05, "loss": 0.5338, "step": 5239 }, { "epoch": 0.7189407971461892, "grad_norm": 1.3515625, "learning_rate": 1.7371527151488053e-05, "loss": 0.5704, "step": 5240 }, { "epoch": 0.7190779995883927, "grad_norm": 1.28125, "learning_rate": 1.737055167659903e-05, "loss": 0.5093, "step": 5241 }, { "epoch": 0.7192152020305962, "grad_norm": 1.2734375, "learning_rate": 1.736957604813438e-05, "loss": 0.5488, "step": 5242 }, { "epoch": 0.7193524044727996, "grad_norm": 1.2421875, "learning_rate": 1.736860026611443e-05, "loss": 0.5713, "step": 5243 }, { "epoch": 0.7194896069150031, "grad_norm": 1.2578125, "learning_rate": 1.736762433055951e-05, "loss": 0.5298, "step": 5244 }, { "epoch": 0.7196268093572066, "grad_norm": 1.34375, "learning_rate": 1.7366648241489953e-05, "loss": 0.5842, "step": 5245 }, { "epoch": 0.7197640117994101, "grad_norm": 1.1953125, "learning_rate": 1.73656719989261e-05, "loss": 0.4762, "step": 5246 }, { "epoch": 0.7199012142416135, "grad_norm": 1.28125, "learning_rate": 1.7364695602888293e-05, "loss": 0.5267, "step": 5247 }, { "epoch": 0.720038416683817, "grad_norm": 1.0703125, "learning_rate": 1.7363719053396875e-05, "loss": 0.4573, "step": 5248 }, { "epoch": 0.7201756191260205, "grad_norm": 1.2890625, "learning_rate": 1.7362742350472195e-05, "loss": 0.5776, "step": 5249 }, { "epoch": 0.720312821568224, "grad_norm": 1.1171875, "learning_rate": 1.73617654941346e-05, "loss": 0.509, "step": 5250 }, { "epoch": 0.7204500240104273, "grad_norm": 1.1484375, "learning_rate": 1.7360788484404453e-05, "loss": 0.4821, "step": 5251 }, { "epoch": 0.7205872264526308, "grad_norm": 1.1875, "learning_rate": 1.73598113213021e-05, "loss": 0.545, "step": 5252 }, { "epoch": 0.7207244288948343, "grad_norm": 1.1171875, "learning_rate": 1.7358834004847912e-05, "loss": 0.4901, "step": 5253 }, { "epoch": 0.7208616313370378, "grad_norm": 1.1484375, "learning_rate": 1.7357856535062244e-05, "loss": 0.4816, "step": 5254 }, { "epoch": 0.7209988337792412, "grad_norm": 1.3203125, "learning_rate": 1.7356878911965466e-05, "loss": 0.6033, "step": 5255 }, { "epoch": 0.7211360362214447, "grad_norm": 1.265625, "learning_rate": 1.735590113557795e-05, "loss": 0.5951, "step": 5256 }, { "epoch": 0.7212732386636482, "grad_norm": 1.1171875, "learning_rate": 1.7354923205920067e-05, "loss": 0.4835, "step": 5257 }, { "epoch": 0.7214104411058517, "grad_norm": 1.1953125, "learning_rate": 1.73539451230122e-05, "loss": 0.5138, "step": 5258 }, { "epoch": 0.7215476435480551, "grad_norm": 1.171875, "learning_rate": 1.735296688687472e-05, "loss": 0.5913, "step": 5259 }, { "epoch": 0.7216848459902586, "grad_norm": 1.2890625, "learning_rate": 1.7351988497528013e-05, "loss": 0.5869, "step": 5260 }, { "epoch": 0.7218220484324621, "grad_norm": 1.1796875, "learning_rate": 1.7351009954992465e-05, "loss": 0.5532, "step": 5261 }, { "epoch": 0.7219592508746656, "grad_norm": 1.234375, "learning_rate": 1.7350031259288467e-05, "loss": 0.4842, "step": 5262 }, { "epoch": 0.722096453316869, "grad_norm": 1.0859375, "learning_rate": 1.734905241043641e-05, "loss": 0.4616, "step": 5263 }, { "epoch": 0.7222336557590725, "grad_norm": 1.203125, "learning_rate": 1.734807340845669e-05, "loss": 0.496, "step": 5264 }, { "epoch": 0.722370858201276, "grad_norm": 1.2265625, "learning_rate": 1.7347094253369705e-05, "loss": 0.5127, "step": 5265 }, { "epoch": 0.7225080606434795, "grad_norm": 1.2421875, "learning_rate": 1.734611494519586e-05, "loss": 0.5895, "step": 5266 }, { "epoch": 0.7226452630856829, "grad_norm": 1.125, "learning_rate": 1.7345135483955554e-05, "loss": 0.4654, "step": 5267 }, { "epoch": 0.7227824655278864, "grad_norm": 1.2265625, "learning_rate": 1.7344155869669204e-05, "loss": 0.5224, "step": 5268 }, { "epoch": 0.7229196679700899, "grad_norm": 1.234375, "learning_rate": 1.7343176102357214e-05, "loss": 0.5198, "step": 5269 }, { "epoch": 0.7230568704122934, "grad_norm": 1.09375, "learning_rate": 1.734219618204e-05, "loss": 0.4628, "step": 5270 }, { "epoch": 0.7231940728544968, "grad_norm": 1.2421875, "learning_rate": 1.734121610873799e-05, "loss": 0.5234, "step": 5271 }, { "epoch": 0.7233312752967003, "grad_norm": 1.15625, "learning_rate": 1.734023588247159e-05, "loss": 0.524, "step": 5272 }, { "epoch": 0.7234684777389038, "grad_norm": 1.1171875, "learning_rate": 1.7339255503261233e-05, "loss": 0.4799, "step": 5273 }, { "epoch": 0.7236056801811073, "grad_norm": 1.171875, "learning_rate": 1.7338274971127345e-05, "loss": 0.4627, "step": 5274 }, { "epoch": 0.7237428826233107, "grad_norm": 1.359375, "learning_rate": 1.7337294286090356e-05, "loss": 0.5497, "step": 5275 }, { "epoch": 0.7238800850655142, "grad_norm": 1.2421875, "learning_rate": 1.7336313448170703e-05, "loss": 0.5928, "step": 5276 }, { "epoch": 0.7240172875077177, "grad_norm": 1.046875, "learning_rate": 1.7335332457388817e-05, "loss": 0.4258, "step": 5277 }, { "epoch": 0.7241544899499212, "grad_norm": 1.171875, "learning_rate": 1.7334351313765148e-05, "loss": 0.4796, "step": 5278 }, { "epoch": 0.7242916923921245, "grad_norm": 1.1328125, "learning_rate": 1.7333370017320127e-05, "loss": 0.5127, "step": 5279 }, { "epoch": 0.724428894834328, "grad_norm": 1.1484375, "learning_rate": 1.733238856807421e-05, "loss": 0.4578, "step": 5280 }, { "epoch": 0.7245660972765315, "grad_norm": 1.3203125, "learning_rate": 1.7331406966047847e-05, "loss": 0.6407, "step": 5281 }, { "epoch": 0.724703299718735, "grad_norm": 1.2265625, "learning_rate": 1.7330425211261484e-05, "loss": 0.5245, "step": 5282 }, { "epoch": 0.7248405021609384, "grad_norm": 1.234375, "learning_rate": 1.732944330373558e-05, "loss": 0.5166, "step": 5283 }, { "epoch": 0.7249777046031419, "grad_norm": 1.21875, "learning_rate": 1.73284612434906e-05, "loss": 0.5123, "step": 5284 }, { "epoch": 0.7251149070453454, "grad_norm": 1.1796875, "learning_rate": 1.7327479030547002e-05, "loss": 0.5071, "step": 5285 }, { "epoch": 0.7252521094875489, "grad_norm": 1.1171875, "learning_rate": 1.732649666492525e-05, "loss": 0.5019, "step": 5286 }, { "epoch": 0.7253893119297523, "grad_norm": 1.1328125, "learning_rate": 1.7325514146645815e-05, "loss": 0.4914, "step": 5287 }, { "epoch": 0.7255265143719558, "grad_norm": 1.2265625, "learning_rate": 1.732453147572917e-05, "loss": 0.5542, "step": 5288 }, { "epoch": 0.7256637168141593, "grad_norm": 1.1953125, "learning_rate": 1.7323548652195792e-05, "loss": 0.5211, "step": 5289 }, { "epoch": 0.7258009192563628, "grad_norm": 1.2109375, "learning_rate": 1.7322565676066154e-05, "loss": 0.531, "step": 5290 }, { "epoch": 0.7259381216985662, "grad_norm": 1.203125, "learning_rate": 1.7321582547360742e-05, "loss": 0.5239, "step": 5291 }, { "epoch": 0.7260753241407697, "grad_norm": 1.1484375, "learning_rate": 1.7320599266100037e-05, "loss": 0.4722, "step": 5292 }, { "epoch": 0.7262125265829732, "grad_norm": 1.2109375, "learning_rate": 1.7319615832304532e-05, "loss": 0.5266, "step": 5293 }, { "epoch": 0.7263497290251767, "grad_norm": 1.0234375, "learning_rate": 1.7318632245994714e-05, "loss": 0.3938, "step": 5294 }, { "epoch": 0.7264869314673801, "grad_norm": 1.171875, "learning_rate": 1.731764850719108e-05, "loss": 0.5138, "step": 5295 }, { "epoch": 0.7266241339095836, "grad_norm": 1.171875, "learning_rate": 1.7316664615914127e-05, "loss": 0.5383, "step": 5296 }, { "epoch": 0.7267613363517871, "grad_norm": 1.15625, "learning_rate": 1.7315680572184355e-05, "loss": 0.4948, "step": 5297 }, { "epoch": 0.7268985387939906, "grad_norm": 1.2265625, "learning_rate": 1.731469637602227e-05, "loss": 0.4713, "step": 5298 }, { "epoch": 0.727035741236194, "grad_norm": 1.234375, "learning_rate": 1.7313712027448373e-05, "loss": 0.4998, "step": 5299 }, { "epoch": 0.7271729436783975, "grad_norm": 1.3203125, "learning_rate": 1.7312727526483183e-05, "loss": 0.568, "step": 5300 }, { "epoch": 0.727310146120601, "grad_norm": 1.1484375, "learning_rate": 1.7311742873147204e-05, "loss": 0.5139, "step": 5301 }, { "epoch": 0.7274473485628045, "grad_norm": 1.2734375, "learning_rate": 1.731075806746096e-05, "loss": 0.5869, "step": 5302 }, { "epoch": 0.7275845510050079, "grad_norm": 1.21875, "learning_rate": 1.7309773109444972e-05, "loss": 0.5036, "step": 5303 }, { "epoch": 0.7277217534472114, "grad_norm": 1.1875, "learning_rate": 1.7308787999119756e-05, "loss": 0.4784, "step": 5304 }, { "epoch": 0.7278589558894148, "grad_norm": 1.2578125, "learning_rate": 1.730780273650584e-05, "loss": 0.564, "step": 5305 }, { "epoch": 0.7279961583316183, "grad_norm": 1.109375, "learning_rate": 1.7306817321623758e-05, "loss": 0.4742, "step": 5306 }, { "epoch": 0.7281333607738217, "grad_norm": 1.0234375, "learning_rate": 1.7305831754494036e-05, "loss": 0.4116, "step": 5307 }, { "epoch": 0.7282705632160252, "grad_norm": 1.09375, "learning_rate": 1.7304846035137217e-05, "loss": 0.5076, "step": 5308 }, { "epoch": 0.7284077656582287, "grad_norm": 1.0390625, "learning_rate": 1.7303860163573832e-05, "loss": 0.3971, "step": 5309 }, { "epoch": 0.7285449681004322, "grad_norm": 1.1328125, "learning_rate": 1.7302874139824434e-05, "loss": 0.436, "step": 5310 }, { "epoch": 0.7286821705426356, "grad_norm": 1.046875, "learning_rate": 1.7301887963909554e-05, "loss": 0.4074, "step": 5311 }, { "epoch": 0.7288193729848391, "grad_norm": 1.1328125, "learning_rate": 1.7300901635849746e-05, "loss": 0.4671, "step": 5312 }, { "epoch": 0.7289565754270426, "grad_norm": 1.1796875, "learning_rate": 1.7299915155665568e-05, "loss": 0.5447, "step": 5313 }, { "epoch": 0.7290937778692461, "grad_norm": 1.3125, "learning_rate": 1.7298928523377567e-05, "loss": 0.5804, "step": 5314 }, { "epoch": 0.7292309803114495, "grad_norm": 1.1484375, "learning_rate": 1.7297941739006307e-05, "loss": 0.5293, "step": 5315 }, { "epoch": 0.729368182753653, "grad_norm": 1.21875, "learning_rate": 1.7296954802572342e-05, "loss": 0.525, "step": 5316 }, { "epoch": 0.7295053851958565, "grad_norm": 1.2421875, "learning_rate": 1.7295967714096237e-05, "loss": 0.4864, "step": 5317 }, { "epoch": 0.72964258763806, "grad_norm": 1.1875, "learning_rate": 1.7294980473598565e-05, "loss": 0.539, "step": 5318 }, { "epoch": 0.7297797900802634, "grad_norm": 1.203125, "learning_rate": 1.7293993081099893e-05, "loss": 0.475, "step": 5319 }, { "epoch": 0.7299169925224669, "grad_norm": 1.078125, "learning_rate": 1.7293005536620796e-05, "loss": 0.4452, "step": 5320 }, { "epoch": 0.7300541949646704, "grad_norm": 1.203125, "learning_rate": 1.729201784018185e-05, "loss": 0.5008, "step": 5321 }, { "epoch": 0.7301913974068739, "grad_norm": 1.1171875, "learning_rate": 1.729102999180363e-05, "loss": 0.4788, "step": 5322 }, { "epoch": 0.7303285998490773, "grad_norm": 1.3125, "learning_rate": 1.7290041991506733e-05, "loss": 0.4841, "step": 5323 }, { "epoch": 0.7304658022912808, "grad_norm": 1.109375, "learning_rate": 1.728905383931173e-05, "loss": 0.4469, "step": 5324 }, { "epoch": 0.7306030047334843, "grad_norm": 1.15625, "learning_rate": 1.728806553523922e-05, "loss": 0.5147, "step": 5325 }, { "epoch": 0.7307402071756878, "grad_norm": 1.21875, "learning_rate": 1.7287077079309792e-05, "loss": 0.498, "step": 5326 }, { "epoch": 0.7308774096178912, "grad_norm": 1.2109375, "learning_rate": 1.728608847154404e-05, "loss": 0.549, "step": 5327 }, { "epoch": 0.7310146120600947, "grad_norm": 1.1796875, "learning_rate": 1.728509971196257e-05, "loss": 0.5099, "step": 5328 }, { "epoch": 0.7311518145022982, "grad_norm": 1.1796875, "learning_rate": 1.7284110800585977e-05, "loss": 0.4625, "step": 5329 }, { "epoch": 0.7312890169445017, "grad_norm": 1.3046875, "learning_rate": 1.7283121737434868e-05, "loss": 0.5596, "step": 5330 }, { "epoch": 0.731426219386705, "grad_norm": 1.15625, "learning_rate": 1.7282132522529854e-05, "loss": 0.5186, "step": 5331 }, { "epoch": 0.7315634218289085, "grad_norm": 1.3203125, "learning_rate": 1.7281143155891546e-05, "loss": 0.5084, "step": 5332 }, { "epoch": 0.731700624271112, "grad_norm": 1.1328125, "learning_rate": 1.7280153637540557e-05, "loss": 0.4365, "step": 5333 }, { "epoch": 0.7318378267133155, "grad_norm": 1.1875, "learning_rate": 1.7279163967497506e-05, "loss": 0.5109, "step": 5334 }, { "epoch": 0.7319750291555189, "grad_norm": 1.1953125, "learning_rate": 1.7278174145783016e-05, "loss": 0.5365, "step": 5335 }, { "epoch": 0.7321122315977224, "grad_norm": 1.140625, "learning_rate": 1.7277184172417708e-05, "loss": 0.4498, "step": 5336 }, { "epoch": 0.7322494340399259, "grad_norm": 1.2265625, "learning_rate": 1.727619404742221e-05, "loss": 0.5188, "step": 5337 }, { "epoch": 0.7323866364821294, "grad_norm": 1.296875, "learning_rate": 1.7275203770817155e-05, "loss": 0.5857, "step": 5338 }, { "epoch": 0.7325238389243328, "grad_norm": 1.2734375, "learning_rate": 1.7274213342623177e-05, "loss": 0.5443, "step": 5339 }, { "epoch": 0.7326610413665363, "grad_norm": 1.15625, "learning_rate": 1.7273222762860908e-05, "loss": 0.5048, "step": 5340 }, { "epoch": 0.7327982438087398, "grad_norm": 1.2109375, "learning_rate": 1.7272232031550993e-05, "loss": 0.5166, "step": 5341 }, { "epoch": 0.7329354462509433, "grad_norm": 1.2109375, "learning_rate": 1.7271241148714073e-05, "loss": 0.5013, "step": 5342 }, { "epoch": 0.7330726486931467, "grad_norm": 1.15625, "learning_rate": 1.7270250114370796e-05, "loss": 0.5035, "step": 5343 }, { "epoch": 0.7332098511353502, "grad_norm": 1.2421875, "learning_rate": 1.726925892854181e-05, "loss": 0.5253, "step": 5344 }, { "epoch": 0.7333470535775537, "grad_norm": 1.15625, "learning_rate": 1.726826759124777e-05, "loss": 0.5282, "step": 5345 }, { "epoch": 0.7334842560197572, "grad_norm": 1.265625, "learning_rate": 1.7267276102509327e-05, "loss": 0.5321, "step": 5346 }, { "epoch": 0.7336214584619606, "grad_norm": 1.171875, "learning_rate": 1.7266284462347145e-05, "loss": 0.4988, "step": 5347 }, { "epoch": 0.7337586609041641, "grad_norm": 1.2578125, "learning_rate": 1.7265292670781885e-05, "loss": 0.5539, "step": 5348 }, { "epoch": 0.7338958633463676, "grad_norm": 1.1796875, "learning_rate": 1.7264300727834212e-05, "loss": 0.5108, "step": 5349 }, { "epoch": 0.7340330657885711, "grad_norm": 1.234375, "learning_rate": 1.7263308633524794e-05, "loss": 0.5089, "step": 5350 }, { "epoch": 0.7341702682307745, "grad_norm": 1.2890625, "learning_rate": 1.72623163878743e-05, "loss": 0.5947, "step": 5351 }, { "epoch": 0.734307470672978, "grad_norm": 1.0546875, "learning_rate": 1.726132399090341e-05, "loss": 0.4172, "step": 5352 }, { "epoch": 0.7344446731151815, "grad_norm": 1.234375, "learning_rate": 1.7260331442632804e-05, "loss": 0.5273, "step": 5353 }, { "epoch": 0.734581875557385, "grad_norm": 1.1015625, "learning_rate": 1.7259338743083153e-05, "loss": 0.464, "step": 5354 }, { "epoch": 0.7347190779995884, "grad_norm": 1.203125, "learning_rate": 1.725834589227515e-05, "loss": 0.5354, "step": 5355 }, { "epoch": 0.7348562804417919, "grad_norm": 1.1875, "learning_rate": 1.7257352890229477e-05, "loss": 0.4925, "step": 5356 }, { "epoch": 0.7349934828839954, "grad_norm": 1.109375, "learning_rate": 1.725635973696683e-05, "loss": 0.5007, "step": 5357 }, { "epoch": 0.7351306853261989, "grad_norm": 1.2421875, "learning_rate": 1.72553664325079e-05, "loss": 0.4823, "step": 5358 }, { "epoch": 0.7352678877684022, "grad_norm": 1.1328125, "learning_rate": 1.725437297687338e-05, "loss": 0.522, "step": 5359 }, { "epoch": 0.7354050902106057, "grad_norm": 1.2265625, "learning_rate": 1.7253379370083978e-05, "loss": 0.5105, "step": 5360 }, { "epoch": 0.7355422926528092, "grad_norm": 1.1640625, "learning_rate": 1.725238561216039e-05, "loss": 0.4735, "step": 5361 }, { "epoch": 0.7356794950950127, "grad_norm": 1.2109375, "learning_rate": 1.7251391703123324e-05, "loss": 0.4492, "step": 5362 }, { "epoch": 0.7358166975372161, "grad_norm": 1.2578125, "learning_rate": 1.7250397642993497e-05, "loss": 0.5003, "step": 5363 }, { "epoch": 0.7359538999794196, "grad_norm": 1.140625, "learning_rate": 1.724940343179161e-05, "loss": 0.4365, "step": 5364 }, { "epoch": 0.7360911024216231, "grad_norm": 1.1796875, "learning_rate": 1.7248409069538386e-05, "loss": 0.4892, "step": 5365 }, { "epoch": 0.7362283048638266, "grad_norm": 1.2265625, "learning_rate": 1.724741455625454e-05, "loss": 0.5435, "step": 5366 }, { "epoch": 0.73636550730603, "grad_norm": 1.1328125, "learning_rate": 1.7246419891960798e-05, "loss": 0.4463, "step": 5367 }, { "epoch": 0.7365027097482335, "grad_norm": 1.1875, "learning_rate": 1.7245425076677882e-05, "loss": 0.4678, "step": 5368 }, { "epoch": 0.736639912190437, "grad_norm": 1.2109375, "learning_rate": 1.7244430110426522e-05, "loss": 0.5389, "step": 5369 }, { "epoch": 0.7367771146326405, "grad_norm": 1.2109375, "learning_rate": 1.7243434993227447e-05, "loss": 0.5303, "step": 5370 }, { "epoch": 0.7369143170748439, "grad_norm": 1.21875, "learning_rate": 1.72424397251014e-05, "loss": 0.5744, "step": 5371 }, { "epoch": 0.7370515195170474, "grad_norm": 1.265625, "learning_rate": 1.7241444306069105e-05, "loss": 0.5891, "step": 5372 }, { "epoch": 0.7371887219592509, "grad_norm": 1.1171875, "learning_rate": 1.7240448736151316e-05, "loss": 0.4721, "step": 5373 }, { "epoch": 0.7373259244014544, "grad_norm": 1.2109375, "learning_rate": 1.723945301536877e-05, "loss": 0.5513, "step": 5374 }, { "epoch": 0.7374631268436578, "grad_norm": 1.2421875, "learning_rate": 1.7238457143742216e-05, "loss": 0.5368, "step": 5375 }, { "epoch": 0.7376003292858613, "grad_norm": 1.25, "learning_rate": 1.7237461121292403e-05, "loss": 0.5276, "step": 5376 }, { "epoch": 0.7377375317280648, "grad_norm": 1.1953125, "learning_rate": 1.7236464948040086e-05, "loss": 0.5122, "step": 5377 }, { "epoch": 0.7378747341702683, "grad_norm": 1.234375, "learning_rate": 1.7235468624006018e-05, "loss": 0.5497, "step": 5378 }, { "epoch": 0.7380119366124717, "grad_norm": 1.4296875, "learning_rate": 1.723447214921097e-05, "loss": 0.6013, "step": 5379 }, { "epoch": 0.7381491390546752, "grad_norm": 1.1640625, "learning_rate": 1.7233475523675692e-05, "loss": 0.4514, "step": 5380 }, { "epoch": 0.7382863414968787, "grad_norm": 1.1640625, "learning_rate": 1.7232478747420952e-05, "loss": 0.4808, "step": 5381 }, { "epoch": 0.7384235439390822, "grad_norm": 1.203125, "learning_rate": 1.7231481820467528e-05, "loss": 0.5461, "step": 5382 }, { "epoch": 0.7385607463812855, "grad_norm": 1.15625, "learning_rate": 1.7230484742836185e-05, "loss": 0.464, "step": 5383 }, { "epoch": 0.738697948823489, "grad_norm": 1.1875, "learning_rate": 1.7229487514547697e-05, "loss": 0.549, "step": 5384 }, { "epoch": 0.7388351512656925, "grad_norm": 1.0703125, "learning_rate": 1.722849013562285e-05, "loss": 0.4444, "step": 5385 }, { "epoch": 0.738972353707896, "grad_norm": 1.09375, "learning_rate": 1.722749260608242e-05, "loss": 0.4551, "step": 5386 }, { "epoch": 0.7391095561500994, "grad_norm": 1.28125, "learning_rate": 1.7226494925947194e-05, "loss": 0.5226, "step": 5387 }, { "epoch": 0.7392467585923029, "grad_norm": 1.2578125, "learning_rate": 1.7225497095237957e-05, "loss": 0.5713, "step": 5388 }, { "epoch": 0.7393839610345064, "grad_norm": 1.2421875, "learning_rate": 1.7224499113975505e-05, "loss": 0.5314, "step": 5389 }, { "epoch": 0.7395211634767099, "grad_norm": 1.1640625, "learning_rate": 1.7223500982180627e-05, "loss": 0.4359, "step": 5390 }, { "epoch": 0.7396583659189133, "grad_norm": 1.234375, "learning_rate": 1.7222502699874127e-05, "loss": 0.5111, "step": 5391 }, { "epoch": 0.7397955683611168, "grad_norm": 1.2734375, "learning_rate": 1.7221504267076802e-05, "loss": 0.5298, "step": 5392 }, { "epoch": 0.7399327708033203, "grad_norm": 1.1484375, "learning_rate": 1.7220505683809454e-05, "loss": 0.5157, "step": 5393 }, { "epoch": 0.7400699732455238, "grad_norm": 1.1171875, "learning_rate": 1.721950695009289e-05, "loss": 0.5185, "step": 5394 }, { "epoch": 0.7402071756877272, "grad_norm": 1.1640625, "learning_rate": 1.721850806594792e-05, "loss": 0.5266, "step": 5395 }, { "epoch": 0.7403443781299307, "grad_norm": 1.109375, "learning_rate": 1.7217509031395368e-05, "loss": 0.4395, "step": 5396 }, { "epoch": 0.7404815805721342, "grad_norm": 1.140625, "learning_rate": 1.7216509846456035e-05, "loss": 0.4634, "step": 5397 }, { "epoch": 0.7406187830143377, "grad_norm": 1.25, "learning_rate": 1.7215510511150745e-05, "loss": 0.569, "step": 5398 }, { "epoch": 0.7407559854565411, "grad_norm": 1.3125, "learning_rate": 1.7214511025500323e-05, "loss": 0.5509, "step": 5399 }, { "epoch": 0.7408931878987446, "grad_norm": 1.296875, "learning_rate": 1.721351138952559e-05, "loss": 0.5823, "step": 5400 }, { "epoch": 0.7410303903409481, "grad_norm": 1.09375, "learning_rate": 1.7212511603247387e-05, "loss": 0.4985, "step": 5401 }, { "epoch": 0.7411675927831516, "grad_norm": 1.1484375, "learning_rate": 1.721151166668653e-05, "loss": 0.4868, "step": 5402 }, { "epoch": 0.741304795225355, "grad_norm": 1.203125, "learning_rate": 1.7210511579863866e-05, "loss": 0.575, "step": 5403 }, { "epoch": 0.7414419976675585, "grad_norm": 1.1796875, "learning_rate": 1.7209511342800226e-05, "loss": 0.5331, "step": 5404 }, { "epoch": 0.741579200109762, "grad_norm": 1.2890625, "learning_rate": 1.7208510955516455e-05, "loss": 0.5904, "step": 5405 }, { "epoch": 0.7417164025519655, "grad_norm": 1.3046875, "learning_rate": 1.7207510418033394e-05, "loss": 0.5992, "step": 5406 }, { "epoch": 0.7418536049941689, "grad_norm": 1.15625, "learning_rate": 1.7206509730371896e-05, "loss": 0.5373, "step": 5407 }, { "epoch": 0.7419908074363724, "grad_norm": 1.171875, "learning_rate": 1.7205508892552808e-05, "loss": 0.5157, "step": 5408 }, { "epoch": 0.7421280098785759, "grad_norm": 1.171875, "learning_rate": 1.7204507904596985e-05, "loss": 0.548, "step": 5409 }, { "epoch": 0.7422652123207794, "grad_norm": 1.3359375, "learning_rate": 1.7203506766525283e-05, "loss": 0.6235, "step": 5410 }, { "epoch": 0.7424024147629827, "grad_norm": 1.1875, "learning_rate": 1.720250547835856e-05, "loss": 0.4808, "step": 5411 }, { "epoch": 0.7425396172051862, "grad_norm": 1.2890625, "learning_rate": 1.7201504040117685e-05, "loss": 0.5464, "step": 5412 }, { "epoch": 0.7426768196473897, "grad_norm": 1.2734375, "learning_rate": 1.7200502451823516e-05, "loss": 0.5389, "step": 5413 }, { "epoch": 0.7428140220895932, "grad_norm": 1.265625, "learning_rate": 1.7199500713496933e-05, "loss": 0.502, "step": 5414 }, { "epoch": 0.7429512245317966, "grad_norm": 1.21875, "learning_rate": 1.7198498825158802e-05, "loss": 0.5278, "step": 5415 }, { "epoch": 0.7430884269740001, "grad_norm": 1.1796875, "learning_rate": 1.7197496786829998e-05, "loss": 0.4865, "step": 5416 }, { "epoch": 0.7432256294162036, "grad_norm": 1.296875, "learning_rate": 1.7196494598531404e-05, "loss": 0.5577, "step": 5417 }, { "epoch": 0.7433628318584071, "grad_norm": 1.171875, "learning_rate": 1.7195492260283897e-05, "loss": 0.5249, "step": 5418 }, { "epoch": 0.7435000343006105, "grad_norm": 1.1484375, "learning_rate": 1.7194489772108366e-05, "loss": 0.5287, "step": 5419 }, { "epoch": 0.743637236742814, "grad_norm": 1.21875, "learning_rate": 1.7193487134025703e-05, "loss": 0.5095, "step": 5420 }, { "epoch": 0.7437744391850175, "grad_norm": 1.171875, "learning_rate": 1.7192484346056786e-05, "loss": 0.4875, "step": 5421 }, { "epoch": 0.743911641627221, "grad_norm": 1.296875, "learning_rate": 1.7191481408222522e-05, "loss": 0.5431, "step": 5422 }, { "epoch": 0.7440488440694244, "grad_norm": 1.1015625, "learning_rate": 1.7190478320543807e-05, "loss": 0.4623, "step": 5423 }, { "epoch": 0.7441860465116279, "grad_norm": 1.1953125, "learning_rate": 1.7189475083041535e-05, "loss": 0.5131, "step": 5424 }, { "epoch": 0.7443232489538314, "grad_norm": 1.03125, "learning_rate": 1.7188471695736617e-05, "loss": 0.4137, "step": 5425 }, { "epoch": 0.7444604513960349, "grad_norm": 1.25, "learning_rate": 1.7187468158649956e-05, "loss": 0.5732, "step": 5426 }, { "epoch": 0.7445976538382383, "grad_norm": 1.1328125, "learning_rate": 1.718646447180246e-05, "loss": 0.4641, "step": 5427 }, { "epoch": 0.7447348562804418, "grad_norm": 1.109375, "learning_rate": 1.718546063521505e-05, "loss": 0.4661, "step": 5428 }, { "epoch": 0.7448720587226453, "grad_norm": 1.109375, "learning_rate": 1.7184456648908635e-05, "loss": 0.4874, "step": 5429 }, { "epoch": 0.7450092611648488, "grad_norm": 1.15625, "learning_rate": 1.7183452512904136e-05, "loss": 0.507, "step": 5430 }, { "epoch": 0.7451464636070522, "grad_norm": 1.21875, "learning_rate": 1.7182448227222478e-05, "loss": 0.5255, "step": 5431 }, { "epoch": 0.7452836660492557, "grad_norm": 1.2109375, "learning_rate": 1.7181443791884586e-05, "loss": 0.4685, "step": 5432 }, { "epoch": 0.7454208684914592, "grad_norm": 1.109375, "learning_rate": 1.7180439206911388e-05, "loss": 0.4993, "step": 5433 }, { "epoch": 0.7455580709336627, "grad_norm": 1.1328125, "learning_rate": 1.7179434472323816e-05, "loss": 0.4183, "step": 5434 }, { "epoch": 0.745695273375866, "grad_norm": 1.2109375, "learning_rate": 1.7178429588142806e-05, "loss": 0.5232, "step": 5435 }, { "epoch": 0.7458324758180696, "grad_norm": 1.1953125, "learning_rate": 1.7177424554389293e-05, "loss": 0.5515, "step": 5436 }, { "epoch": 0.745969678260273, "grad_norm": 1.1875, "learning_rate": 1.7176419371084222e-05, "loss": 0.5007, "step": 5437 }, { "epoch": 0.7461068807024765, "grad_norm": 1.1875, "learning_rate": 1.7175414038248535e-05, "loss": 0.4742, "step": 5438 }, { "epoch": 0.7462440831446799, "grad_norm": 1.140625, "learning_rate": 1.717440855590318e-05, "loss": 0.5176, "step": 5439 }, { "epoch": 0.7463812855868834, "grad_norm": 1.21875, "learning_rate": 1.717340292406911e-05, "loss": 0.5127, "step": 5440 }, { "epoch": 0.7465184880290869, "grad_norm": 1.21875, "learning_rate": 1.7172397142767275e-05, "loss": 0.4376, "step": 5441 }, { "epoch": 0.7466556904712904, "grad_norm": 1.234375, "learning_rate": 1.7171391212018633e-05, "loss": 0.4842, "step": 5442 }, { "epoch": 0.7467928929134938, "grad_norm": 1.2890625, "learning_rate": 1.7170385131844147e-05, "loss": 0.5317, "step": 5443 }, { "epoch": 0.7469300953556973, "grad_norm": 1.1796875, "learning_rate": 1.7169378902264773e-05, "loss": 0.5307, "step": 5444 }, { "epoch": 0.7470672977979008, "grad_norm": 1.1875, "learning_rate": 1.7168372523301485e-05, "loss": 0.5042, "step": 5445 }, { "epoch": 0.7472045002401043, "grad_norm": 1.2265625, "learning_rate": 1.7167365994975253e-05, "loss": 0.4896, "step": 5446 }, { "epoch": 0.7473417026823077, "grad_norm": 1.171875, "learning_rate": 1.7166359317307036e-05, "loss": 0.4818, "step": 5447 }, { "epoch": 0.7474789051245112, "grad_norm": 1.2734375, "learning_rate": 1.7165352490317827e-05, "loss": 0.5034, "step": 5448 }, { "epoch": 0.7476161075667147, "grad_norm": 1.1875, "learning_rate": 1.7164345514028592e-05, "loss": 0.4947, "step": 5449 }, { "epoch": 0.7477533100089182, "grad_norm": 1.1953125, "learning_rate": 1.716333838846032e-05, "loss": 0.5426, "step": 5450 }, { "epoch": 0.7478905124511216, "grad_norm": 1.265625, "learning_rate": 1.716233111363399e-05, "loss": 0.5694, "step": 5451 }, { "epoch": 0.7480277148933251, "grad_norm": 1.0859375, "learning_rate": 1.7161323689570597e-05, "loss": 0.4241, "step": 5452 }, { "epoch": 0.7481649173355286, "grad_norm": 1.09375, "learning_rate": 1.7160316116291125e-05, "loss": 0.4918, "step": 5453 }, { "epoch": 0.7483021197777321, "grad_norm": 1.1796875, "learning_rate": 1.7159308393816573e-05, "loss": 0.4861, "step": 5454 }, { "epoch": 0.7484393222199355, "grad_norm": 1.2421875, "learning_rate": 1.7158300522167936e-05, "loss": 0.5004, "step": 5455 }, { "epoch": 0.748576524662139, "grad_norm": 1.21875, "learning_rate": 1.715729250136622e-05, "loss": 0.5049, "step": 5456 }, { "epoch": 0.7487137271043425, "grad_norm": 1.234375, "learning_rate": 1.7156284331432415e-05, "loss": 0.5269, "step": 5457 }, { "epoch": 0.748850929546546, "grad_norm": 1.28125, "learning_rate": 1.7155276012387544e-05, "loss": 0.5921, "step": 5458 }, { "epoch": 0.7489881319887494, "grad_norm": 1.1875, "learning_rate": 1.7154267544252603e-05, "loss": 0.5138, "step": 5459 }, { "epoch": 0.7491253344309529, "grad_norm": 1.1796875, "learning_rate": 1.715325892704861e-05, "loss": 0.5214, "step": 5460 }, { "epoch": 0.7492625368731564, "grad_norm": 1.1953125, "learning_rate": 1.715225016079659e-05, "loss": 0.4742, "step": 5461 }, { "epoch": 0.7493997393153599, "grad_norm": 1.1171875, "learning_rate": 1.7151241245517546e-05, "loss": 0.4799, "step": 5462 }, { "epoch": 0.7495369417575632, "grad_norm": 1.25, "learning_rate": 1.7150232181232513e-05, "loss": 0.529, "step": 5463 }, { "epoch": 0.7496741441997667, "grad_norm": 1.2265625, "learning_rate": 1.7149222967962508e-05, "loss": 0.5257, "step": 5464 }, { "epoch": 0.7498113466419702, "grad_norm": 1.2890625, "learning_rate": 1.7148213605728563e-05, "loss": 0.5531, "step": 5465 }, { "epoch": 0.7499485490841737, "grad_norm": 1.2421875, "learning_rate": 1.7147204094551708e-05, "loss": 0.4412, "step": 5466 }, { "epoch": 0.7500857515263771, "grad_norm": 1.1171875, "learning_rate": 1.714619443445298e-05, "loss": 0.4218, "step": 5467 }, { "epoch": 0.7502229539685806, "grad_norm": 1.28125, "learning_rate": 1.7145184625453414e-05, "loss": 0.5598, "step": 5468 }, { "epoch": 0.7503601564107841, "grad_norm": 1.2578125, "learning_rate": 1.7144174667574055e-05, "loss": 0.5092, "step": 5469 }, { "epoch": 0.7504973588529876, "grad_norm": 1.1484375, "learning_rate": 1.7143164560835937e-05, "loss": 0.4574, "step": 5470 }, { "epoch": 0.750634561295191, "grad_norm": 4.25, "learning_rate": 1.714215430526012e-05, "loss": 0.553, "step": 5471 }, { "epoch": 0.7507717637373945, "grad_norm": 1.1328125, "learning_rate": 1.7141143900867646e-05, "loss": 0.4425, "step": 5472 }, { "epoch": 0.750908966179598, "grad_norm": 1.171875, "learning_rate": 1.714013334767957e-05, "loss": 0.4776, "step": 5473 }, { "epoch": 0.7510461686218015, "grad_norm": 1.0546875, "learning_rate": 1.713912264571695e-05, "loss": 0.3859, "step": 5474 }, { "epoch": 0.7511833710640049, "grad_norm": 1.078125, "learning_rate": 1.713811179500084e-05, "loss": 0.4417, "step": 5475 }, { "epoch": 0.7513205735062084, "grad_norm": 1.25, "learning_rate": 1.713710079555231e-05, "loss": 0.5741, "step": 5476 }, { "epoch": 0.7514577759484119, "grad_norm": 1.234375, "learning_rate": 1.713608964739242e-05, "loss": 0.5342, "step": 5477 }, { "epoch": 0.7515949783906154, "grad_norm": 1.1953125, "learning_rate": 1.7135078350542242e-05, "loss": 0.4987, "step": 5478 }, { "epoch": 0.7517321808328188, "grad_norm": 1.3125, "learning_rate": 1.7134066905022845e-05, "loss": 0.5445, "step": 5479 }, { "epoch": 0.7518693832750223, "grad_norm": 1.1796875, "learning_rate": 1.7133055310855302e-05, "loss": 0.5347, "step": 5480 }, { "epoch": 0.7520065857172258, "grad_norm": 1.140625, "learning_rate": 1.7132043568060697e-05, "loss": 0.474, "step": 5481 }, { "epoch": 0.7521437881594293, "grad_norm": 1.203125, "learning_rate": 1.7131031676660107e-05, "loss": 0.4961, "step": 5482 }, { "epoch": 0.7522809906016327, "grad_norm": 1.234375, "learning_rate": 1.7130019636674618e-05, "loss": 0.495, "step": 5483 }, { "epoch": 0.7524181930438362, "grad_norm": 1.2265625, "learning_rate": 1.7129007448125316e-05, "loss": 0.4902, "step": 5484 }, { "epoch": 0.7525553954860397, "grad_norm": 1.296875, "learning_rate": 1.7127995111033293e-05, "loss": 0.5999, "step": 5485 }, { "epoch": 0.7526925979282432, "grad_norm": 1.1484375, "learning_rate": 1.712698262541964e-05, "loss": 0.5111, "step": 5486 }, { "epoch": 0.7528298003704466, "grad_norm": 1.21875, "learning_rate": 1.712596999130545e-05, "loss": 0.5141, "step": 5487 }, { "epoch": 0.75296700281265, "grad_norm": 1.1796875, "learning_rate": 1.7124957208711834e-05, "loss": 0.5293, "step": 5488 }, { "epoch": 0.7531042052548536, "grad_norm": 1.1328125, "learning_rate": 1.7123944277659888e-05, "loss": 0.4711, "step": 5489 }, { "epoch": 0.753241407697057, "grad_norm": 1.125, "learning_rate": 1.7122931198170717e-05, "loss": 0.4596, "step": 5490 }, { "epoch": 0.7533786101392604, "grad_norm": 1.234375, "learning_rate": 1.712191797026543e-05, "loss": 0.5359, "step": 5491 }, { "epoch": 0.7535158125814639, "grad_norm": 1.1875, "learning_rate": 1.712090459396514e-05, "loss": 0.554, "step": 5492 }, { "epoch": 0.7536530150236674, "grad_norm": 0.99609375, "learning_rate": 1.7119891069290958e-05, "loss": 0.3755, "step": 5493 }, { "epoch": 0.7537902174658709, "grad_norm": 1.1953125, "learning_rate": 1.7118877396264012e-05, "loss": 0.524, "step": 5494 }, { "epoch": 0.7539274199080743, "grad_norm": 1.2578125, "learning_rate": 1.7117863574905415e-05, "loss": 0.4857, "step": 5495 }, { "epoch": 0.7540646223502778, "grad_norm": 1.125, "learning_rate": 1.7116849605236296e-05, "loss": 0.4758, "step": 5496 }, { "epoch": 0.7542018247924813, "grad_norm": 1.1796875, "learning_rate": 1.7115835487277775e-05, "loss": 0.5033, "step": 5497 }, { "epoch": 0.7543390272346848, "grad_norm": 1.203125, "learning_rate": 1.7114821221050993e-05, "loss": 0.5274, "step": 5498 }, { "epoch": 0.7544762296768882, "grad_norm": 1.1796875, "learning_rate": 1.7113806806577077e-05, "loss": 0.5017, "step": 5499 }, { "epoch": 0.7546134321190917, "grad_norm": 1.2421875, "learning_rate": 1.7112792243877165e-05, "loss": 0.5646, "step": 5500 }, { "epoch": 0.7547506345612952, "grad_norm": 1.1953125, "learning_rate": 1.7111777532972397e-05, "loss": 0.5147, "step": 5501 }, { "epoch": 0.7548878370034987, "grad_norm": 1.1484375, "learning_rate": 1.7110762673883914e-05, "loss": 0.5157, "step": 5502 }, { "epoch": 0.7550250394457021, "grad_norm": 1.140625, "learning_rate": 1.7109747666632865e-05, "loss": 0.4859, "step": 5503 }, { "epoch": 0.7551622418879056, "grad_norm": 1.1953125, "learning_rate": 1.71087325112404e-05, "loss": 0.528, "step": 5504 }, { "epoch": 0.7552994443301091, "grad_norm": 1.1328125, "learning_rate": 1.7107717207727666e-05, "loss": 0.489, "step": 5505 }, { "epoch": 0.7554366467723126, "grad_norm": 1.078125, "learning_rate": 1.7106701756115825e-05, "loss": 0.4012, "step": 5506 }, { "epoch": 0.755573849214516, "grad_norm": 1.1484375, "learning_rate": 1.7105686156426026e-05, "loss": 0.5118, "step": 5507 }, { "epoch": 0.7557110516567195, "grad_norm": 1.1171875, "learning_rate": 1.710467040867944e-05, "loss": 0.4491, "step": 5508 }, { "epoch": 0.755848254098923, "grad_norm": 1.1796875, "learning_rate": 1.710365451289723e-05, "loss": 0.5264, "step": 5509 }, { "epoch": 0.7559854565411265, "grad_norm": 1.2265625, "learning_rate": 1.710263846910056e-05, "loss": 0.5256, "step": 5510 }, { "epoch": 0.7561226589833299, "grad_norm": 1.265625, "learning_rate": 1.71016222773106e-05, "loss": 0.598, "step": 5511 }, { "epoch": 0.7562598614255334, "grad_norm": 1.2109375, "learning_rate": 1.7100605937548525e-05, "loss": 0.5315, "step": 5512 }, { "epoch": 0.7563970638677369, "grad_norm": 1.25, "learning_rate": 1.7099589449835513e-05, "loss": 0.5454, "step": 5513 }, { "epoch": 0.7565342663099404, "grad_norm": 1.09375, "learning_rate": 1.7098572814192744e-05, "loss": 0.4688, "step": 5514 }, { "epoch": 0.7566714687521437, "grad_norm": 1.2109375, "learning_rate": 1.70975560306414e-05, "loss": 0.5069, "step": 5515 }, { "epoch": 0.7568086711943472, "grad_norm": 1.1328125, "learning_rate": 1.7096539099202667e-05, "loss": 0.5412, "step": 5516 }, { "epoch": 0.7569458736365507, "grad_norm": 1.1640625, "learning_rate": 1.7095522019897735e-05, "loss": 0.5054, "step": 5517 }, { "epoch": 0.7570830760787542, "grad_norm": 1.0546875, "learning_rate": 1.70945047927478e-05, "loss": 0.4398, "step": 5518 }, { "epoch": 0.7572202785209576, "grad_norm": 1.125, "learning_rate": 1.7093487417774047e-05, "loss": 0.4618, "step": 5519 }, { "epoch": 0.7573574809631611, "grad_norm": 1.140625, "learning_rate": 1.7092469894997683e-05, "loss": 0.54, "step": 5520 }, { "epoch": 0.7574946834053646, "grad_norm": 1.140625, "learning_rate": 1.709145222443991e-05, "loss": 0.4684, "step": 5521 }, { "epoch": 0.7576318858475681, "grad_norm": 1.2109375, "learning_rate": 1.7090434406121928e-05, "loss": 0.5238, "step": 5522 }, { "epoch": 0.7577690882897715, "grad_norm": 1.203125, "learning_rate": 1.7089416440064946e-05, "loss": 0.5453, "step": 5523 }, { "epoch": 0.757906290731975, "grad_norm": 1.1796875, "learning_rate": 1.7088398326290175e-05, "loss": 0.4764, "step": 5524 }, { "epoch": 0.7580434931741785, "grad_norm": 1.21875, "learning_rate": 1.7087380064818827e-05, "loss": 0.5436, "step": 5525 }, { "epoch": 0.758180695616382, "grad_norm": 1.1953125, "learning_rate": 1.7086361655672124e-05, "loss": 0.5075, "step": 5526 }, { "epoch": 0.7583178980585854, "grad_norm": 1.28125, "learning_rate": 1.7085343098871284e-05, "loss": 0.5884, "step": 5527 }, { "epoch": 0.7584551005007889, "grad_norm": 1.046875, "learning_rate": 1.7084324394437525e-05, "loss": 0.4289, "step": 5528 }, { "epoch": 0.7585923029429924, "grad_norm": 1.3125, "learning_rate": 1.7083305542392076e-05, "loss": 0.6664, "step": 5529 }, { "epoch": 0.7587295053851959, "grad_norm": 1.265625, "learning_rate": 1.708228654275617e-05, "loss": 0.5538, "step": 5530 }, { "epoch": 0.7588667078273993, "grad_norm": 1.21875, "learning_rate": 1.708126739555104e-05, "loss": 0.5397, "step": 5531 }, { "epoch": 0.7590039102696028, "grad_norm": 1.25, "learning_rate": 1.708024810079791e-05, "loss": 0.5178, "step": 5532 }, { "epoch": 0.7591411127118063, "grad_norm": 1.34375, "learning_rate": 1.7079228658518034e-05, "loss": 0.5873, "step": 5533 }, { "epoch": 0.7592783151540098, "grad_norm": 1.0859375, "learning_rate": 1.7078209068732637e-05, "loss": 0.4341, "step": 5534 }, { "epoch": 0.7594155175962132, "grad_norm": 1.2109375, "learning_rate": 1.707718933146298e-05, "loss": 0.434, "step": 5535 }, { "epoch": 0.7595527200384167, "grad_norm": 1.1796875, "learning_rate": 1.70761694467303e-05, "loss": 0.4817, "step": 5536 }, { "epoch": 0.7596899224806202, "grad_norm": 1.1328125, "learning_rate": 1.707514941455585e-05, "loss": 0.4712, "step": 5537 }, { "epoch": 0.7598271249228237, "grad_norm": 1.171875, "learning_rate": 1.7074129234960884e-05, "loss": 0.4426, "step": 5538 }, { "epoch": 0.7599643273650271, "grad_norm": 1.265625, "learning_rate": 1.707310890796666e-05, "loss": 0.6079, "step": 5539 }, { "epoch": 0.7601015298072306, "grad_norm": 1.1328125, "learning_rate": 1.7072088433594437e-05, "loss": 0.4353, "step": 5540 }, { "epoch": 0.7602387322494341, "grad_norm": 1.234375, "learning_rate": 1.7071067811865477e-05, "loss": 0.5777, "step": 5541 }, { "epoch": 0.7603759346916376, "grad_norm": 1.0703125, "learning_rate": 1.707004704280105e-05, "loss": 0.4435, "step": 5542 }, { "epoch": 0.7605131371338409, "grad_norm": 1.1875, "learning_rate": 1.706902612642242e-05, "loss": 0.5104, "step": 5543 }, { "epoch": 0.7606503395760444, "grad_norm": 1.2265625, "learning_rate": 1.706800506275086e-05, "loss": 0.4618, "step": 5544 }, { "epoch": 0.7607875420182479, "grad_norm": 1.1875, "learning_rate": 1.706698385180765e-05, "loss": 0.4906, "step": 5545 }, { "epoch": 0.7609247444604514, "grad_norm": 1.2109375, "learning_rate": 1.7065962493614062e-05, "loss": 0.4936, "step": 5546 }, { "epoch": 0.7610619469026548, "grad_norm": 1.1484375, "learning_rate": 1.7064940988191384e-05, "loss": 0.5172, "step": 5547 }, { "epoch": 0.7611991493448583, "grad_norm": 1.3203125, "learning_rate": 1.7063919335560894e-05, "loss": 0.6074, "step": 5548 }, { "epoch": 0.7613363517870618, "grad_norm": 1.3046875, "learning_rate": 1.7062897535743884e-05, "loss": 0.577, "step": 5549 }, { "epoch": 0.7614735542292653, "grad_norm": 1.3125, "learning_rate": 1.7061875588761646e-05, "loss": 0.5838, "step": 5550 }, { "epoch": 0.7616107566714687, "grad_norm": 1.2265625, "learning_rate": 1.7060853494635467e-05, "loss": 0.5106, "step": 5551 }, { "epoch": 0.7617479591136722, "grad_norm": 1.15625, "learning_rate": 1.7059831253386647e-05, "loss": 0.453, "step": 5552 }, { "epoch": 0.7618851615558757, "grad_norm": 1.109375, "learning_rate": 1.705880886503649e-05, "loss": 0.4242, "step": 5553 }, { "epoch": 0.7620223639980792, "grad_norm": 1.3984375, "learning_rate": 1.705778632960629e-05, "loss": 0.5924, "step": 5554 }, { "epoch": 0.7621595664402826, "grad_norm": 1.21875, "learning_rate": 1.7056763647117364e-05, "loss": 0.5171, "step": 5555 }, { "epoch": 0.7622967688824861, "grad_norm": 1.15625, "learning_rate": 1.705574081759101e-05, "loss": 0.5193, "step": 5556 }, { "epoch": 0.7624339713246896, "grad_norm": 1.265625, "learning_rate": 1.705471784104855e-05, "loss": 0.5446, "step": 5557 }, { "epoch": 0.7625711737668931, "grad_norm": 1.2109375, "learning_rate": 1.7053694717511292e-05, "loss": 0.5416, "step": 5558 }, { "epoch": 0.7627083762090965, "grad_norm": 1.171875, "learning_rate": 1.7052671447000558e-05, "loss": 0.496, "step": 5559 }, { "epoch": 0.7628455786513, "grad_norm": 1.0703125, "learning_rate": 1.7051648029537664e-05, "loss": 0.4436, "step": 5560 }, { "epoch": 0.7629827810935035, "grad_norm": 1.1953125, "learning_rate": 1.7050624465143943e-05, "loss": 0.4329, "step": 5561 }, { "epoch": 0.763119983535707, "grad_norm": 1.328125, "learning_rate": 1.7049600753840712e-05, "loss": 0.6009, "step": 5562 }, { "epoch": 0.7632571859779104, "grad_norm": 1.2421875, "learning_rate": 1.704857689564931e-05, "loss": 0.549, "step": 5563 }, { "epoch": 0.7633943884201139, "grad_norm": 1.1796875, "learning_rate": 1.704755289059107e-05, "loss": 0.5097, "step": 5564 }, { "epoch": 0.7635315908623174, "grad_norm": 1.2265625, "learning_rate": 1.7046528738687323e-05, "loss": 0.5183, "step": 5565 }, { "epoch": 0.7636687933045209, "grad_norm": 1.0703125, "learning_rate": 1.7045504439959415e-05, "loss": 0.4414, "step": 5566 }, { "epoch": 0.7638059957467243, "grad_norm": 1.265625, "learning_rate": 1.7044479994428683e-05, "loss": 0.5735, "step": 5567 }, { "epoch": 0.7639431981889278, "grad_norm": 1.21875, "learning_rate": 1.7043455402116475e-05, "loss": 0.5665, "step": 5568 }, { "epoch": 0.7640804006311313, "grad_norm": 1.203125, "learning_rate": 1.704243066304414e-05, "loss": 0.5539, "step": 5569 }, { "epoch": 0.7642176030733348, "grad_norm": 1.171875, "learning_rate": 1.7041405777233027e-05, "loss": 0.4627, "step": 5570 }, { "epoch": 0.7643548055155381, "grad_norm": 1.1484375, "learning_rate": 1.7040380744704502e-05, "loss": 0.5457, "step": 5571 }, { "epoch": 0.7644920079577416, "grad_norm": 1.2578125, "learning_rate": 1.7039355565479906e-05, "loss": 0.5055, "step": 5572 }, { "epoch": 0.7646292103999451, "grad_norm": 1.2578125, "learning_rate": 1.7038330239580614e-05, "loss": 0.5135, "step": 5573 }, { "epoch": 0.7647664128421486, "grad_norm": 1.1484375, "learning_rate": 1.703730476702798e-05, "loss": 0.4841, "step": 5574 }, { "epoch": 0.764903615284352, "grad_norm": 1.171875, "learning_rate": 1.7036279147843378e-05, "loss": 0.5546, "step": 5575 }, { "epoch": 0.7650408177265555, "grad_norm": 1.3046875, "learning_rate": 1.7035253382048177e-05, "loss": 0.6096, "step": 5576 }, { "epoch": 0.765178020168759, "grad_norm": 1.1796875, "learning_rate": 1.7034227469663748e-05, "loss": 0.509, "step": 5577 }, { "epoch": 0.7653152226109625, "grad_norm": 1.1484375, "learning_rate": 1.703320141071147e-05, "loss": 0.5007, "step": 5578 }, { "epoch": 0.7654524250531659, "grad_norm": 1.109375, "learning_rate": 1.703217520521272e-05, "loss": 0.4568, "step": 5579 }, { "epoch": 0.7655896274953694, "grad_norm": 1.1484375, "learning_rate": 1.7031148853188883e-05, "loss": 0.5259, "step": 5580 }, { "epoch": 0.7657268299375729, "grad_norm": 1.3671875, "learning_rate": 1.703012235466134e-05, "loss": 0.592, "step": 5581 }, { "epoch": 0.7658640323797764, "grad_norm": 1.140625, "learning_rate": 1.7029095709651483e-05, "loss": 0.5016, "step": 5582 }, { "epoch": 0.7660012348219798, "grad_norm": 1.140625, "learning_rate": 1.7028068918180706e-05, "loss": 0.4846, "step": 5583 }, { "epoch": 0.7661384372641833, "grad_norm": 1.25, "learning_rate": 1.7027041980270397e-05, "loss": 0.5301, "step": 5584 }, { "epoch": 0.7662756397063868, "grad_norm": 1.3046875, "learning_rate": 1.702601489594196e-05, "loss": 0.5488, "step": 5585 }, { "epoch": 0.7664128421485903, "grad_norm": 1.2421875, "learning_rate": 1.702498766521679e-05, "loss": 0.5087, "step": 5586 }, { "epoch": 0.7665500445907937, "grad_norm": 1.1484375, "learning_rate": 1.7023960288116294e-05, "loss": 0.4905, "step": 5587 }, { "epoch": 0.7666872470329972, "grad_norm": 1.1171875, "learning_rate": 1.702293276466188e-05, "loss": 0.3959, "step": 5588 }, { "epoch": 0.7668244494752007, "grad_norm": 1.203125, "learning_rate": 1.702190509487496e-05, "loss": 0.5119, "step": 5589 }, { "epoch": 0.7669616519174042, "grad_norm": 1.3046875, "learning_rate": 1.7020877278776936e-05, "loss": 0.5458, "step": 5590 }, { "epoch": 0.7670988543596076, "grad_norm": 1.125, "learning_rate": 1.7019849316389235e-05, "loss": 0.4377, "step": 5591 }, { "epoch": 0.7672360568018111, "grad_norm": 1.1484375, "learning_rate": 1.7018821207733273e-05, "loss": 0.4742, "step": 5592 }, { "epoch": 0.7673732592440146, "grad_norm": 1.09375, "learning_rate": 1.701779295283047e-05, "loss": 0.4507, "step": 5593 }, { "epoch": 0.7675104616862181, "grad_norm": 1.1484375, "learning_rate": 1.7016764551702253e-05, "loss": 0.5542, "step": 5594 }, { "epoch": 0.7676476641284214, "grad_norm": 1.1171875, "learning_rate": 1.701573600437005e-05, "loss": 0.5026, "step": 5595 }, { "epoch": 0.767784866570625, "grad_norm": 1.2890625, "learning_rate": 1.701470731085529e-05, "loss": 0.5601, "step": 5596 }, { "epoch": 0.7679220690128284, "grad_norm": 1.0390625, "learning_rate": 1.701367847117941e-05, "loss": 0.4308, "step": 5597 }, { "epoch": 0.7680592714550319, "grad_norm": 1.265625, "learning_rate": 1.701264948536385e-05, "loss": 0.5586, "step": 5598 }, { "epoch": 0.7681964738972353, "grad_norm": 1.09375, "learning_rate": 1.701162035343004e-05, "loss": 0.4015, "step": 5599 }, { "epoch": 0.7683336763394388, "grad_norm": 1.234375, "learning_rate": 1.7010591075399435e-05, "loss": 0.5221, "step": 5600 }, { "epoch": 0.7684708787816423, "grad_norm": 1.1171875, "learning_rate": 1.7009561651293473e-05, "loss": 0.4881, "step": 5601 }, { "epoch": 0.7686080812238458, "grad_norm": 1.1484375, "learning_rate": 1.7008532081133612e-05, "loss": 0.5184, "step": 5602 }, { "epoch": 0.7687452836660492, "grad_norm": 1.2734375, "learning_rate": 1.7007502364941294e-05, "loss": 0.5603, "step": 5603 }, { "epoch": 0.7688824861082527, "grad_norm": 1.25, "learning_rate": 1.7006472502737982e-05, "loss": 0.5496, "step": 5604 }, { "epoch": 0.7690196885504562, "grad_norm": 1.125, "learning_rate": 1.7005442494545136e-05, "loss": 0.4615, "step": 5605 }, { "epoch": 0.7691568909926597, "grad_norm": 1.0546875, "learning_rate": 1.7004412340384214e-05, "loss": 0.4074, "step": 5606 }, { "epoch": 0.7692940934348631, "grad_norm": 1.21875, "learning_rate": 1.7003382040276675e-05, "loss": 0.5612, "step": 5607 }, { "epoch": 0.7694312958770666, "grad_norm": 1.3125, "learning_rate": 1.7002351594244e-05, "loss": 0.5489, "step": 5608 }, { "epoch": 0.7695684983192701, "grad_norm": 1.2578125, "learning_rate": 1.7001321002307647e-05, "loss": 0.5445, "step": 5609 }, { "epoch": 0.7697057007614736, "grad_norm": 1.1484375, "learning_rate": 1.70002902644891e-05, "loss": 0.5036, "step": 5610 }, { "epoch": 0.769842903203677, "grad_norm": 1.3828125, "learning_rate": 1.6999259380809828e-05, "loss": 0.6349, "step": 5611 }, { "epoch": 0.7699801056458805, "grad_norm": 1.140625, "learning_rate": 1.6998228351291315e-05, "loss": 0.5156, "step": 5612 }, { "epoch": 0.770117308088084, "grad_norm": 1.1640625, "learning_rate": 1.699719717595504e-05, "loss": 0.5373, "step": 5613 }, { "epoch": 0.7702545105302875, "grad_norm": 1.0703125, "learning_rate": 1.69961658548225e-05, "loss": 0.4155, "step": 5614 }, { "epoch": 0.7703917129724909, "grad_norm": 1.1875, "learning_rate": 1.6995134387915166e-05, "loss": 0.5486, "step": 5615 }, { "epoch": 0.7705289154146944, "grad_norm": 1.1171875, "learning_rate": 1.6994102775254548e-05, "loss": 0.4774, "step": 5616 }, { "epoch": 0.7706661178568979, "grad_norm": 1.125, "learning_rate": 1.6993071016862124e-05, "loss": 0.4239, "step": 5617 }, { "epoch": 0.7708033202991014, "grad_norm": 1.1171875, "learning_rate": 1.6992039112759407e-05, "loss": 0.438, "step": 5618 }, { "epoch": 0.7709405227413048, "grad_norm": 1.1171875, "learning_rate": 1.6991007062967897e-05, "loss": 0.4827, "step": 5619 }, { "epoch": 0.7710777251835083, "grad_norm": 1.171875, "learning_rate": 1.698997486750909e-05, "loss": 0.4361, "step": 5620 }, { "epoch": 0.7712149276257118, "grad_norm": 1.2109375, "learning_rate": 1.6988942526404493e-05, "loss": 0.5151, "step": 5621 }, { "epoch": 0.7713521300679153, "grad_norm": 1.1953125, "learning_rate": 1.6987910039675624e-05, "loss": 0.5048, "step": 5622 }, { "epoch": 0.7714893325101186, "grad_norm": 1.2109375, "learning_rate": 1.6986877407343988e-05, "loss": 0.4664, "step": 5623 }, { "epoch": 0.7716265349523221, "grad_norm": 1.3046875, "learning_rate": 1.698584462943111e-05, "loss": 0.5583, "step": 5624 }, { "epoch": 0.7717637373945256, "grad_norm": 1.1640625, "learning_rate": 1.6984811705958502e-05, "loss": 0.4474, "step": 5625 }, { "epoch": 0.7719009398367291, "grad_norm": 1.1640625, "learning_rate": 1.6983778636947693e-05, "loss": 0.5353, "step": 5626 }, { "epoch": 0.7720381422789325, "grad_norm": 1.1875, "learning_rate": 1.69827454224202e-05, "loss": 0.5332, "step": 5627 }, { "epoch": 0.772175344721136, "grad_norm": 1.296875, "learning_rate": 1.698171206239756e-05, "loss": 0.4829, "step": 5628 }, { "epoch": 0.7723125471633395, "grad_norm": 1.1015625, "learning_rate": 1.69806785569013e-05, "loss": 0.4866, "step": 5629 }, { "epoch": 0.772449749605543, "grad_norm": 1.1484375, "learning_rate": 1.6979644905952955e-05, "loss": 0.542, "step": 5630 }, { "epoch": 0.7725869520477464, "grad_norm": 1.2421875, "learning_rate": 1.6978611109574063e-05, "loss": 0.5088, "step": 5631 }, { "epoch": 0.7727241544899499, "grad_norm": 1.140625, "learning_rate": 1.6977577167786163e-05, "loss": 0.496, "step": 5632 }, { "epoch": 0.7728613569321534, "grad_norm": 1.1953125, "learning_rate": 1.6976543080610798e-05, "loss": 0.5289, "step": 5633 }, { "epoch": 0.7729985593743569, "grad_norm": 1.375, "learning_rate": 1.697550884806952e-05, "loss": 0.6067, "step": 5634 }, { "epoch": 0.7731357618165603, "grad_norm": 1.21875, "learning_rate": 1.6974474470183876e-05, "loss": 0.5667, "step": 5635 }, { "epoch": 0.7732729642587638, "grad_norm": 1.1015625, "learning_rate": 1.6973439946975418e-05, "loss": 0.4774, "step": 5636 }, { "epoch": 0.7734101667009673, "grad_norm": 1.203125, "learning_rate": 1.69724052784657e-05, "loss": 0.5567, "step": 5637 }, { "epoch": 0.7735473691431708, "grad_norm": 1.1171875, "learning_rate": 1.697137046467628e-05, "loss": 0.4929, "step": 5638 }, { "epoch": 0.7736845715853742, "grad_norm": 1.28125, "learning_rate": 1.6970335505628727e-05, "loss": 0.5416, "step": 5639 }, { "epoch": 0.7738217740275777, "grad_norm": 1.2734375, "learning_rate": 1.6969300401344597e-05, "loss": 0.5351, "step": 5640 }, { "epoch": 0.7739589764697812, "grad_norm": 1.2890625, "learning_rate": 1.6968265151845464e-05, "loss": 0.63, "step": 5641 }, { "epoch": 0.7740961789119847, "grad_norm": 1.140625, "learning_rate": 1.6967229757152894e-05, "loss": 0.5302, "step": 5642 }, { "epoch": 0.7742333813541881, "grad_norm": 1.21875, "learning_rate": 1.6966194217288468e-05, "loss": 0.5992, "step": 5643 }, { "epoch": 0.7743705837963916, "grad_norm": 1.265625, "learning_rate": 1.6965158532273756e-05, "loss": 0.5538, "step": 5644 }, { "epoch": 0.7745077862385951, "grad_norm": 1.0703125, "learning_rate": 1.696412270213034e-05, "loss": 0.4043, "step": 5645 }, { "epoch": 0.7746449886807986, "grad_norm": 1.0625, "learning_rate": 1.6963086726879804e-05, "loss": 0.4223, "step": 5646 }, { "epoch": 0.774782191123002, "grad_norm": 1.203125, "learning_rate": 1.696205060654373e-05, "loss": 0.5298, "step": 5647 }, { "epoch": 0.7749193935652054, "grad_norm": 1.140625, "learning_rate": 1.6961014341143716e-05, "loss": 0.4559, "step": 5648 }, { "epoch": 0.775056596007409, "grad_norm": 1.2265625, "learning_rate": 1.6959977930701345e-05, "loss": 0.5659, "step": 5649 }, { "epoch": 0.7751937984496124, "grad_norm": 1.1171875, "learning_rate": 1.6958941375238215e-05, "loss": 0.4661, "step": 5650 }, { "epoch": 0.7753310008918158, "grad_norm": 1.203125, "learning_rate": 1.695790467477593e-05, "loss": 0.5456, "step": 5651 }, { "epoch": 0.7754682033340193, "grad_norm": 1.1171875, "learning_rate": 1.6956867829336078e-05, "loss": 0.4684, "step": 5652 }, { "epoch": 0.7756054057762228, "grad_norm": 1.1796875, "learning_rate": 1.6955830838940276e-05, "loss": 0.4761, "step": 5653 }, { "epoch": 0.7757426082184263, "grad_norm": 1.0625, "learning_rate": 1.695479370361012e-05, "loss": 0.4632, "step": 5654 }, { "epoch": 0.7758798106606297, "grad_norm": 1.09375, "learning_rate": 1.6953756423367233e-05, "loss": 0.4571, "step": 5655 }, { "epoch": 0.7760170131028332, "grad_norm": 1.15625, "learning_rate": 1.6952718998233214e-05, "loss": 0.486, "step": 5656 }, { "epoch": 0.7761542155450367, "grad_norm": 1.203125, "learning_rate": 1.695168142822969e-05, "loss": 0.5675, "step": 5657 }, { "epoch": 0.7762914179872402, "grad_norm": 1.203125, "learning_rate": 1.6950643713378278e-05, "loss": 0.4861, "step": 5658 }, { "epoch": 0.7764286204294436, "grad_norm": 1.21875, "learning_rate": 1.6949605853700596e-05, "loss": 0.5416, "step": 5659 }, { "epoch": 0.7765658228716471, "grad_norm": 1.09375, "learning_rate": 1.6948567849218274e-05, "loss": 0.4716, "step": 5660 }, { "epoch": 0.7767030253138506, "grad_norm": 1.1328125, "learning_rate": 1.6947529699952933e-05, "loss": 0.5209, "step": 5661 }, { "epoch": 0.7768402277560541, "grad_norm": 1.3515625, "learning_rate": 1.6946491405926215e-05, "loss": 0.5898, "step": 5662 }, { "epoch": 0.7769774301982575, "grad_norm": 1.2265625, "learning_rate": 1.6945452967159747e-05, "loss": 0.547, "step": 5663 }, { "epoch": 0.777114632640461, "grad_norm": 1.2265625, "learning_rate": 1.694441438367517e-05, "loss": 0.5549, "step": 5664 }, { "epoch": 0.7772518350826645, "grad_norm": 1.2109375, "learning_rate": 1.6943375655494118e-05, "loss": 0.5109, "step": 5665 }, { "epoch": 0.777389037524868, "grad_norm": 1.140625, "learning_rate": 1.694233678263824e-05, "loss": 0.4828, "step": 5666 }, { "epoch": 0.7775262399670714, "grad_norm": 1.2109375, "learning_rate": 1.6941297765129183e-05, "loss": 0.4756, "step": 5667 }, { "epoch": 0.7776634424092749, "grad_norm": 1.1328125, "learning_rate": 1.6940258602988596e-05, "loss": 0.5159, "step": 5668 }, { "epoch": 0.7778006448514784, "grad_norm": 1.234375, "learning_rate": 1.693921929623813e-05, "loss": 0.54, "step": 5669 }, { "epoch": 0.7779378472936819, "grad_norm": 1.171875, "learning_rate": 1.6938179844899433e-05, "loss": 0.5633, "step": 5670 }, { "epoch": 0.7780750497358853, "grad_norm": 1.1953125, "learning_rate": 1.6937140248994178e-05, "loss": 0.5423, "step": 5671 }, { "epoch": 0.7782122521780888, "grad_norm": 1.1171875, "learning_rate": 1.6936100508544015e-05, "loss": 0.4832, "step": 5672 }, { "epoch": 0.7783494546202923, "grad_norm": 1.2109375, "learning_rate": 1.6935060623570613e-05, "loss": 0.4786, "step": 5673 }, { "epoch": 0.7784866570624958, "grad_norm": 1.1015625, "learning_rate": 1.693402059409564e-05, "loss": 0.4543, "step": 5674 }, { "epoch": 0.7786238595046991, "grad_norm": 1.21875, "learning_rate": 1.6932980420140767e-05, "loss": 0.5621, "step": 5675 }, { "epoch": 0.7787610619469026, "grad_norm": 1.1640625, "learning_rate": 1.6931940101727663e-05, "loss": 0.5016, "step": 5676 }, { "epoch": 0.7788982643891061, "grad_norm": 1.1640625, "learning_rate": 1.693089963887801e-05, "loss": 0.4667, "step": 5677 }, { "epoch": 0.7790354668313096, "grad_norm": 1.21875, "learning_rate": 1.6929859031613482e-05, "loss": 0.5173, "step": 5678 }, { "epoch": 0.779172669273513, "grad_norm": 1.265625, "learning_rate": 1.6928818279955762e-05, "loss": 0.5847, "step": 5679 }, { "epoch": 0.7793098717157165, "grad_norm": 1.25, "learning_rate": 1.692777738392654e-05, "loss": 0.5352, "step": 5680 }, { "epoch": 0.77944707415792, "grad_norm": 1.234375, "learning_rate": 1.6926736343547504e-05, "loss": 0.6216, "step": 5681 }, { "epoch": 0.7795842766001235, "grad_norm": 1.3046875, "learning_rate": 1.6925695158840344e-05, "loss": 0.6027, "step": 5682 }, { "epoch": 0.7797214790423269, "grad_norm": 1.1328125, "learning_rate": 1.6924653829826752e-05, "loss": 0.4727, "step": 5683 }, { "epoch": 0.7798586814845304, "grad_norm": 1.25, "learning_rate": 1.692361235652843e-05, "loss": 0.5003, "step": 5684 }, { "epoch": 0.7799958839267339, "grad_norm": 1.1171875, "learning_rate": 1.6922570738967075e-05, "loss": 0.4916, "step": 5685 }, { "epoch": 0.7801330863689374, "grad_norm": 1.28125, "learning_rate": 1.6921528977164395e-05, "loss": 0.6123, "step": 5686 }, { "epoch": 0.7802702888111408, "grad_norm": 1.1796875, "learning_rate": 1.692048707114209e-05, "loss": 0.5123, "step": 5687 }, { "epoch": 0.7804074912533443, "grad_norm": 1.203125, "learning_rate": 1.6919445020921876e-05, "loss": 0.5653, "step": 5688 }, { "epoch": 0.7805446936955478, "grad_norm": 1.2109375, "learning_rate": 1.691840282652546e-05, "loss": 0.5336, "step": 5689 }, { "epoch": 0.7806818961377513, "grad_norm": 1.0703125, "learning_rate": 1.691736048797456e-05, "loss": 0.4192, "step": 5690 }, { "epoch": 0.7808190985799547, "grad_norm": 1.15625, "learning_rate": 1.6916318005290895e-05, "loss": 0.4642, "step": 5691 }, { "epoch": 0.7809563010221582, "grad_norm": 1.1953125, "learning_rate": 1.6915275378496187e-05, "loss": 0.5345, "step": 5692 }, { "epoch": 0.7810935034643617, "grad_norm": 1.1953125, "learning_rate": 1.691423260761216e-05, "loss": 0.5487, "step": 5693 }, { "epoch": 0.7812307059065652, "grad_norm": 1.234375, "learning_rate": 1.6913189692660538e-05, "loss": 0.5038, "step": 5694 }, { "epoch": 0.7813679083487686, "grad_norm": 1.1875, "learning_rate": 1.6912146633663057e-05, "loss": 0.5407, "step": 5695 }, { "epoch": 0.7815051107909721, "grad_norm": 1.171875, "learning_rate": 1.691110343064145e-05, "loss": 0.5116, "step": 5696 }, { "epoch": 0.7816423132331756, "grad_norm": 1.21875, "learning_rate": 1.691006008361745e-05, "loss": 0.5366, "step": 5697 }, { "epoch": 0.7817795156753791, "grad_norm": 1.140625, "learning_rate": 1.6909016592612802e-05, "loss": 0.4825, "step": 5698 }, { "epoch": 0.7819167181175825, "grad_norm": 1.1328125, "learning_rate": 1.690797295764924e-05, "loss": 0.4545, "step": 5699 }, { "epoch": 0.782053920559786, "grad_norm": 1.234375, "learning_rate": 1.6906929178748518e-05, "loss": 0.5361, "step": 5700 }, { "epoch": 0.7821911230019895, "grad_norm": 1.2109375, "learning_rate": 1.6905885255932383e-05, "loss": 0.5366, "step": 5701 }, { "epoch": 0.782328325444193, "grad_norm": 1.2578125, "learning_rate": 1.6904841189222586e-05, "loss": 0.5383, "step": 5702 }, { "epoch": 0.7824655278863963, "grad_norm": 1.21875, "learning_rate": 1.6903796978640876e-05, "loss": 0.5096, "step": 5703 }, { "epoch": 0.7826027303285998, "grad_norm": 1.2265625, "learning_rate": 1.6902752624209017e-05, "loss": 0.5038, "step": 5704 }, { "epoch": 0.7827399327708033, "grad_norm": 1.1640625, "learning_rate": 1.6901708125948768e-05, "loss": 0.4778, "step": 5705 }, { "epoch": 0.7828771352130068, "grad_norm": 1.21875, "learning_rate": 1.690066348388189e-05, "loss": 0.528, "step": 5706 }, { "epoch": 0.7830143376552102, "grad_norm": 1.1640625, "learning_rate": 1.6899618698030154e-05, "loss": 0.4787, "step": 5707 }, { "epoch": 0.7831515400974137, "grad_norm": 1.0625, "learning_rate": 1.6898573768415327e-05, "loss": 0.4532, "step": 5708 }, { "epoch": 0.7832887425396172, "grad_norm": 1.1953125, "learning_rate": 1.6897528695059186e-05, "loss": 0.457, "step": 5709 }, { "epoch": 0.7834259449818207, "grad_norm": 1.171875, "learning_rate": 1.6896483477983498e-05, "loss": 0.4912, "step": 5710 }, { "epoch": 0.7835631474240241, "grad_norm": 1.2578125, "learning_rate": 1.6895438117210046e-05, "loss": 0.6019, "step": 5711 }, { "epoch": 0.7837003498662276, "grad_norm": 1.1953125, "learning_rate": 1.6894392612760612e-05, "loss": 0.5328, "step": 5712 }, { "epoch": 0.7838375523084311, "grad_norm": 1.2734375, "learning_rate": 1.689334696465698e-05, "loss": 0.5146, "step": 5713 }, { "epoch": 0.7839747547506346, "grad_norm": 1.2421875, "learning_rate": 1.689230117292094e-05, "loss": 0.4848, "step": 5714 }, { "epoch": 0.784111957192838, "grad_norm": 1.2109375, "learning_rate": 1.689125523757428e-05, "loss": 0.5482, "step": 5715 }, { "epoch": 0.7842491596350415, "grad_norm": 1.046875, "learning_rate": 1.689020915863879e-05, "loss": 0.4328, "step": 5716 }, { "epoch": 0.784386362077245, "grad_norm": 1.0390625, "learning_rate": 1.6889162936136273e-05, "loss": 0.4343, "step": 5717 }, { "epoch": 0.7845235645194485, "grad_norm": 1.15625, "learning_rate": 1.6888116570088524e-05, "loss": 0.4628, "step": 5718 }, { "epoch": 0.7846607669616519, "grad_norm": 1.2890625, "learning_rate": 1.6887070060517347e-05, "loss": 0.5088, "step": 5719 }, { "epoch": 0.7847979694038554, "grad_norm": 1.1484375, "learning_rate": 1.688602340744455e-05, "loss": 0.4777, "step": 5720 }, { "epoch": 0.7849351718460589, "grad_norm": 1.2265625, "learning_rate": 1.6884976610891938e-05, "loss": 0.5171, "step": 5721 }, { "epoch": 0.7850723742882624, "grad_norm": 1.1875, "learning_rate": 1.688392967088132e-05, "loss": 0.5613, "step": 5722 }, { "epoch": 0.7852095767304658, "grad_norm": 1.3359375, "learning_rate": 1.6882882587434518e-05, "loss": 0.613, "step": 5723 }, { "epoch": 0.7853467791726693, "grad_norm": 1.125, "learning_rate": 1.6881835360573344e-05, "loss": 0.5295, "step": 5724 }, { "epoch": 0.7854839816148728, "grad_norm": 1.1953125, "learning_rate": 1.688078799031962e-05, "loss": 0.5167, "step": 5725 }, { "epoch": 0.7856211840570763, "grad_norm": 1.2578125, "learning_rate": 1.687974047669517e-05, "loss": 0.5947, "step": 5726 }, { "epoch": 0.7857583864992796, "grad_norm": 1.28125, "learning_rate": 1.687869281972182e-05, "loss": 0.6109, "step": 5727 }, { "epoch": 0.7858955889414831, "grad_norm": 1.1484375, "learning_rate": 1.6877645019421397e-05, "loss": 0.516, "step": 5728 }, { "epoch": 0.7860327913836866, "grad_norm": 1.109375, "learning_rate": 1.6876597075815736e-05, "loss": 0.4261, "step": 5729 }, { "epoch": 0.7861699938258901, "grad_norm": 1.2734375, "learning_rate": 1.6875548988926673e-05, "loss": 0.561, "step": 5730 }, { "epoch": 0.7863071962680935, "grad_norm": 1.171875, "learning_rate": 1.6874500758776044e-05, "loss": 0.4577, "step": 5731 }, { "epoch": 0.786444398710297, "grad_norm": 1.2109375, "learning_rate": 1.6873452385385692e-05, "loss": 0.5168, "step": 5732 }, { "epoch": 0.7865816011525005, "grad_norm": 1.1796875, "learning_rate": 1.687240386877746e-05, "loss": 0.5073, "step": 5733 }, { "epoch": 0.786718803594704, "grad_norm": 1.1640625, "learning_rate": 1.6871355208973195e-05, "loss": 0.4722, "step": 5734 }, { "epoch": 0.7868560060369074, "grad_norm": 1.0859375, "learning_rate": 1.687030640599475e-05, "loss": 0.3985, "step": 5735 }, { "epoch": 0.7869932084791109, "grad_norm": 1.171875, "learning_rate": 1.6869257459863976e-05, "loss": 0.4646, "step": 5736 }, { "epoch": 0.7871304109213144, "grad_norm": 1.2421875, "learning_rate": 1.686820837060273e-05, "loss": 0.565, "step": 5737 }, { "epoch": 0.7872676133635179, "grad_norm": 1.21875, "learning_rate": 1.6867159138232868e-05, "loss": 0.5111, "step": 5738 }, { "epoch": 0.7874048158057213, "grad_norm": 1.171875, "learning_rate": 1.6866109762776258e-05, "loss": 0.4592, "step": 5739 }, { "epoch": 0.7875420182479248, "grad_norm": 1.2734375, "learning_rate": 1.6865060244254764e-05, "loss": 0.5642, "step": 5740 }, { "epoch": 0.7876792206901283, "grad_norm": 1.1484375, "learning_rate": 1.6864010582690248e-05, "loss": 0.4731, "step": 5741 }, { "epoch": 0.7878164231323318, "grad_norm": 1.15625, "learning_rate": 1.6862960778104592e-05, "loss": 0.5585, "step": 5742 }, { "epoch": 0.7879536255745352, "grad_norm": 1.1484375, "learning_rate": 1.686191083051966e-05, "loss": 0.4911, "step": 5743 }, { "epoch": 0.7880908280167387, "grad_norm": 1.171875, "learning_rate": 1.686086073995733e-05, "loss": 0.4943, "step": 5744 }, { "epoch": 0.7882280304589422, "grad_norm": 1.2265625, "learning_rate": 1.6859810506439493e-05, "loss": 0.5285, "step": 5745 }, { "epoch": 0.7883652329011457, "grad_norm": 1.2265625, "learning_rate": 1.6858760129988016e-05, "loss": 0.6075, "step": 5746 }, { "epoch": 0.7885024353433491, "grad_norm": 1.203125, "learning_rate": 1.68577096106248e-05, "loss": 0.5492, "step": 5747 }, { "epoch": 0.7886396377855526, "grad_norm": 1.1015625, "learning_rate": 1.685665894837172e-05, "loss": 0.4539, "step": 5748 }, { "epoch": 0.7887768402277561, "grad_norm": 1.203125, "learning_rate": 1.685560814325068e-05, "loss": 0.5235, "step": 5749 }, { "epoch": 0.7889140426699596, "grad_norm": 1.1875, "learning_rate": 1.685455719528357e-05, "loss": 0.5285, "step": 5750 }, { "epoch": 0.789051245112163, "grad_norm": 1.171875, "learning_rate": 1.6853506104492285e-05, "loss": 0.5165, "step": 5751 }, { "epoch": 0.7891884475543665, "grad_norm": 1.1953125, "learning_rate": 1.685245487089873e-05, "loss": 0.5011, "step": 5752 }, { "epoch": 0.78932564999657, "grad_norm": 1.1015625, "learning_rate": 1.685140349452481e-05, "loss": 0.4411, "step": 5753 }, { "epoch": 0.7894628524387735, "grad_norm": 1.1171875, "learning_rate": 1.6850351975392428e-05, "loss": 0.4842, "step": 5754 }, { "epoch": 0.7896000548809768, "grad_norm": 1.3125, "learning_rate": 1.6849300313523497e-05, "loss": 0.5909, "step": 5755 }, { "epoch": 0.7897372573231803, "grad_norm": 1.2421875, "learning_rate": 1.6848248508939926e-05, "loss": 0.5199, "step": 5756 }, { "epoch": 0.7898744597653838, "grad_norm": 1.3046875, "learning_rate": 1.6847196561663633e-05, "loss": 0.5784, "step": 5757 }, { "epoch": 0.7900116622075873, "grad_norm": 1.265625, "learning_rate": 1.6846144471716543e-05, "loss": 0.5937, "step": 5758 }, { "epoch": 0.7901488646497907, "grad_norm": 1.359375, "learning_rate": 1.684509223912057e-05, "loss": 0.5587, "step": 5759 }, { "epoch": 0.7902860670919942, "grad_norm": 1.2109375, "learning_rate": 1.6844039863897634e-05, "loss": 0.5427, "step": 5760 }, { "epoch": 0.7904232695341977, "grad_norm": 1.203125, "learning_rate": 1.6842987346069675e-05, "loss": 0.4716, "step": 5761 }, { "epoch": 0.7905604719764012, "grad_norm": 1.2421875, "learning_rate": 1.6841934685658617e-05, "loss": 0.5212, "step": 5762 }, { "epoch": 0.7906976744186046, "grad_norm": 1.1875, "learning_rate": 1.684088188268639e-05, "loss": 0.51, "step": 5763 }, { "epoch": 0.7908348768608081, "grad_norm": 1.0703125, "learning_rate": 1.6839828937174943e-05, "loss": 0.4475, "step": 5764 }, { "epoch": 0.7909720793030116, "grad_norm": 1.0703125, "learning_rate": 1.6838775849146203e-05, "loss": 0.4301, "step": 5765 }, { "epoch": 0.7911092817452151, "grad_norm": 1.2265625, "learning_rate": 1.6837722618622117e-05, "loss": 0.5119, "step": 5766 }, { "epoch": 0.7912464841874185, "grad_norm": 1.2109375, "learning_rate": 1.6836669245624633e-05, "loss": 0.5631, "step": 5767 }, { "epoch": 0.791383686629622, "grad_norm": 1.09375, "learning_rate": 1.6835615730175695e-05, "loss": 0.4858, "step": 5768 }, { "epoch": 0.7915208890718255, "grad_norm": 1.15625, "learning_rate": 1.683456207229726e-05, "loss": 0.4881, "step": 5769 }, { "epoch": 0.791658091514029, "grad_norm": 1.1484375, "learning_rate": 1.6833508272011274e-05, "loss": 0.5197, "step": 5770 }, { "epoch": 0.7917952939562324, "grad_norm": 1.1796875, "learning_rate": 1.68324543293397e-05, "loss": 0.5072, "step": 5771 }, { "epoch": 0.7919324963984359, "grad_norm": 1.1796875, "learning_rate": 1.6831400244304505e-05, "loss": 0.5243, "step": 5772 }, { "epoch": 0.7920696988406394, "grad_norm": 1.1640625, "learning_rate": 1.6830346016927638e-05, "loss": 0.5367, "step": 5773 }, { "epoch": 0.7922069012828429, "grad_norm": 1.265625, "learning_rate": 1.6829291647231076e-05, "loss": 0.5523, "step": 5774 }, { "epoch": 0.7923441037250463, "grad_norm": 1.2265625, "learning_rate": 1.682823713523678e-05, "loss": 0.568, "step": 5775 }, { "epoch": 0.7924813061672498, "grad_norm": 1.15625, "learning_rate": 1.6827182480966732e-05, "loss": 0.4826, "step": 5776 }, { "epoch": 0.7926185086094533, "grad_norm": 1.1484375, "learning_rate": 1.6826127684442898e-05, "loss": 0.4544, "step": 5777 }, { "epoch": 0.7927557110516568, "grad_norm": 1.1953125, "learning_rate": 1.6825072745687264e-05, "loss": 0.5186, "step": 5778 }, { "epoch": 0.7928929134938602, "grad_norm": 1.25, "learning_rate": 1.6824017664721804e-05, "loss": 0.5703, "step": 5779 }, { "epoch": 0.7930301159360637, "grad_norm": 1.0703125, "learning_rate": 1.682296244156851e-05, "loss": 0.4797, "step": 5780 }, { "epoch": 0.7931673183782671, "grad_norm": 1.1796875, "learning_rate": 1.682190707624936e-05, "loss": 0.5019, "step": 5781 }, { "epoch": 0.7933045208204706, "grad_norm": 1.21875, "learning_rate": 1.682085156878635e-05, "loss": 0.5325, "step": 5782 }, { "epoch": 0.793441723262674, "grad_norm": 1.171875, "learning_rate": 1.6819795919201473e-05, "loss": 0.5257, "step": 5783 }, { "epoch": 0.7935789257048775, "grad_norm": 1.1796875, "learning_rate": 1.681874012751672e-05, "loss": 0.4813, "step": 5784 }, { "epoch": 0.793716128147081, "grad_norm": 1.328125, "learning_rate": 1.6817684193754097e-05, "loss": 0.6442, "step": 5785 }, { "epoch": 0.7938533305892845, "grad_norm": 1.109375, "learning_rate": 1.68166281179356e-05, "loss": 0.517, "step": 5786 }, { "epoch": 0.7939905330314879, "grad_norm": 1.2109375, "learning_rate": 1.6815571900083237e-05, "loss": 0.4852, "step": 5787 }, { "epoch": 0.7941277354736914, "grad_norm": 1.0859375, "learning_rate": 1.681451554021901e-05, "loss": 0.4431, "step": 5788 }, { "epoch": 0.7942649379158949, "grad_norm": 1.21875, "learning_rate": 1.681345903836494e-05, "loss": 0.4946, "step": 5789 }, { "epoch": 0.7944021403580984, "grad_norm": 1.1875, "learning_rate": 1.681240239454303e-05, "loss": 0.5071, "step": 5790 }, { "epoch": 0.7945393428003018, "grad_norm": 1.234375, "learning_rate": 1.6811345608775304e-05, "loss": 0.5086, "step": 5791 }, { "epoch": 0.7946765452425053, "grad_norm": 1.2734375, "learning_rate": 1.681028868108378e-05, "loss": 0.6071, "step": 5792 }, { "epoch": 0.7948137476847088, "grad_norm": 1.1875, "learning_rate": 1.680923161149048e-05, "loss": 0.5358, "step": 5793 }, { "epoch": 0.7949509501269123, "grad_norm": 1.1328125, "learning_rate": 1.680817440001743e-05, "loss": 0.5004, "step": 5794 }, { "epoch": 0.7950881525691157, "grad_norm": 1.34375, "learning_rate": 1.6807117046686654e-05, "loss": 0.5131, "step": 5795 }, { "epoch": 0.7952253550113192, "grad_norm": 1.15625, "learning_rate": 1.6806059551520187e-05, "loss": 0.5287, "step": 5796 }, { "epoch": 0.7953625574535227, "grad_norm": 1.171875, "learning_rate": 1.680500191454007e-05, "loss": 0.516, "step": 5797 }, { "epoch": 0.7954997598957262, "grad_norm": 1.21875, "learning_rate": 1.680394413576833e-05, "loss": 0.5171, "step": 5798 }, { "epoch": 0.7956369623379296, "grad_norm": 1.21875, "learning_rate": 1.680288621522701e-05, "loss": 0.5317, "step": 5799 }, { "epoch": 0.7957741647801331, "grad_norm": 1.1796875, "learning_rate": 1.6801828152938157e-05, "loss": 0.4868, "step": 5800 }, { "epoch": 0.7959113672223366, "grad_norm": 1.1640625, "learning_rate": 1.680076994892381e-05, "loss": 0.5049, "step": 5801 }, { "epoch": 0.7960485696645401, "grad_norm": 1.109375, "learning_rate": 1.6799711603206027e-05, "loss": 0.4834, "step": 5802 }, { "epoch": 0.7961857721067435, "grad_norm": 1.140625, "learning_rate": 1.6798653115806853e-05, "loss": 0.4481, "step": 5803 }, { "epoch": 0.796322974548947, "grad_norm": 1.1640625, "learning_rate": 1.6797594486748347e-05, "loss": 0.4927, "step": 5804 }, { "epoch": 0.7964601769911505, "grad_norm": 1.1484375, "learning_rate": 1.6796535716052566e-05, "loss": 0.3817, "step": 5805 }, { "epoch": 0.796597379433354, "grad_norm": 1.0703125, "learning_rate": 1.679547680374157e-05, "loss": 0.4598, "step": 5806 }, { "epoch": 0.7967345818755573, "grad_norm": 1.2265625, "learning_rate": 1.6794417749837424e-05, "loss": 0.4973, "step": 5807 }, { "epoch": 0.7968717843177608, "grad_norm": 1.1796875, "learning_rate": 1.6793358554362196e-05, "loss": 0.4451, "step": 5808 }, { "epoch": 0.7970089867599643, "grad_norm": 1.203125, "learning_rate": 1.6792299217337954e-05, "loss": 0.5095, "step": 5809 }, { "epoch": 0.7971461892021678, "grad_norm": 1.21875, "learning_rate": 1.6791239738786768e-05, "loss": 0.5104, "step": 5810 }, { "epoch": 0.7972833916443712, "grad_norm": 1.1015625, "learning_rate": 1.679018011873072e-05, "loss": 0.459, "step": 5811 }, { "epoch": 0.7974205940865747, "grad_norm": 1.1328125, "learning_rate": 1.6789120357191886e-05, "loss": 0.4323, "step": 5812 }, { "epoch": 0.7975577965287782, "grad_norm": 1.078125, "learning_rate": 1.6788060454192344e-05, "loss": 0.4752, "step": 5813 }, { "epoch": 0.7976949989709817, "grad_norm": 1.234375, "learning_rate": 1.6787000409754184e-05, "loss": 0.5258, "step": 5814 }, { "epoch": 0.7978322014131851, "grad_norm": 1.296875, "learning_rate": 1.678594022389949e-05, "loss": 0.5124, "step": 5815 }, { "epoch": 0.7979694038553886, "grad_norm": 1.15625, "learning_rate": 1.6784879896650354e-05, "loss": 0.5488, "step": 5816 }, { "epoch": 0.7981066062975921, "grad_norm": 1.109375, "learning_rate": 1.6783819428028868e-05, "loss": 0.4532, "step": 5817 }, { "epoch": 0.7982438087397956, "grad_norm": 1.1796875, "learning_rate": 1.678275881805713e-05, "loss": 0.4481, "step": 5818 }, { "epoch": 0.798381011181999, "grad_norm": 1.25, "learning_rate": 1.6781698066757238e-05, "loss": 0.5313, "step": 5819 }, { "epoch": 0.7985182136242025, "grad_norm": 1.2265625, "learning_rate": 1.6780637174151293e-05, "loss": 0.5149, "step": 5820 }, { "epoch": 0.798655416066406, "grad_norm": 1.15625, "learning_rate": 1.6779576140261404e-05, "loss": 0.5024, "step": 5821 }, { "epoch": 0.7987926185086095, "grad_norm": 1.203125, "learning_rate": 1.6778514965109676e-05, "loss": 0.5212, "step": 5822 }, { "epoch": 0.7989298209508129, "grad_norm": 1.1328125, "learning_rate": 1.6777453648718222e-05, "loss": 0.4637, "step": 5823 }, { "epoch": 0.7990670233930164, "grad_norm": 1.0703125, "learning_rate": 1.6776392191109155e-05, "loss": 0.4501, "step": 5824 }, { "epoch": 0.7992042258352199, "grad_norm": 1.171875, "learning_rate": 1.6775330592304594e-05, "loss": 0.5196, "step": 5825 }, { "epoch": 0.7993414282774234, "grad_norm": 1.1640625, "learning_rate": 1.677426885232665e-05, "loss": 0.5304, "step": 5826 }, { "epoch": 0.7994786307196268, "grad_norm": 1.109375, "learning_rate": 1.677320697119746e-05, "loss": 0.4337, "step": 5827 }, { "epoch": 0.7996158331618303, "grad_norm": 1.1796875, "learning_rate": 1.677214494893914e-05, "loss": 0.4996, "step": 5828 }, { "epoch": 0.7997530356040338, "grad_norm": 1.2109375, "learning_rate": 1.677108278557382e-05, "loss": 0.5344, "step": 5829 }, { "epoch": 0.7998902380462373, "grad_norm": 1.25, "learning_rate": 1.6770020481123633e-05, "loss": 0.601, "step": 5830 }, { "epoch": 0.8000274404884407, "grad_norm": 1.2421875, "learning_rate": 1.6768958035610715e-05, "loss": 0.4999, "step": 5831 }, { "epoch": 0.8001646429306442, "grad_norm": 1.1953125, "learning_rate": 1.6767895449057203e-05, "loss": 0.5359, "step": 5832 }, { "epoch": 0.8003018453728477, "grad_norm": 1.1484375, "learning_rate": 1.676683272148523e-05, "loss": 0.4382, "step": 5833 }, { "epoch": 0.8004390478150512, "grad_norm": 1.2734375, "learning_rate": 1.6765769852916953e-05, "loss": 0.5581, "step": 5834 }, { "epoch": 0.8005762502572545, "grad_norm": 1.15625, "learning_rate": 1.6764706843374506e-05, "loss": 0.4949, "step": 5835 }, { "epoch": 0.800713452699458, "grad_norm": 1.1796875, "learning_rate": 1.6763643692880046e-05, "loss": 0.5278, "step": 5836 }, { "epoch": 0.8008506551416615, "grad_norm": 1.171875, "learning_rate": 1.676258040145572e-05, "loss": 0.4724, "step": 5837 }, { "epoch": 0.800987857583865, "grad_norm": 1.1875, "learning_rate": 1.676151696912369e-05, "loss": 0.5036, "step": 5838 }, { "epoch": 0.8011250600260684, "grad_norm": 1.21875, "learning_rate": 1.6760453395906108e-05, "loss": 0.4763, "step": 5839 }, { "epoch": 0.8012622624682719, "grad_norm": 1.1875, "learning_rate": 1.6759389681825135e-05, "loss": 0.5332, "step": 5840 }, { "epoch": 0.8013994649104754, "grad_norm": 1.140625, "learning_rate": 1.675832582690294e-05, "loss": 0.473, "step": 5841 }, { "epoch": 0.8015366673526789, "grad_norm": 1.3203125, "learning_rate": 1.675726183116168e-05, "loss": 0.5635, "step": 5842 }, { "epoch": 0.8016738697948823, "grad_norm": 1.1953125, "learning_rate": 1.6756197694623537e-05, "loss": 0.5616, "step": 5843 }, { "epoch": 0.8018110722370858, "grad_norm": 1.3125, "learning_rate": 1.6755133417310675e-05, "loss": 0.5397, "step": 5844 }, { "epoch": 0.8019482746792893, "grad_norm": 1.25, "learning_rate": 1.6754068999245278e-05, "loss": 0.5307, "step": 5845 }, { "epoch": 0.8020854771214928, "grad_norm": 1.203125, "learning_rate": 1.6753004440449514e-05, "loss": 0.516, "step": 5846 }, { "epoch": 0.8022226795636962, "grad_norm": 1.1953125, "learning_rate": 1.6751939740945572e-05, "loss": 0.4728, "step": 5847 }, { "epoch": 0.8023598820058997, "grad_norm": 1.2421875, "learning_rate": 1.6750874900755637e-05, "loss": 0.5495, "step": 5848 }, { "epoch": 0.8024970844481032, "grad_norm": 1.203125, "learning_rate": 1.674980991990189e-05, "loss": 0.4772, "step": 5849 }, { "epoch": 0.8026342868903067, "grad_norm": 1.2890625, "learning_rate": 1.6748744798406525e-05, "loss": 0.5186, "step": 5850 }, { "epoch": 0.8027714893325101, "grad_norm": 1.234375, "learning_rate": 1.674767953629174e-05, "loss": 0.4812, "step": 5851 }, { "epoch": 0.8029086917747136, "grad_norm": 1.2109375, "learning_rate": 1.6746614133579723e-05, "loss": 0.5666, "step": 5852 }, { "epoch": 0.8030458942169171, "grad_norm": 1.171875, "learning_rate": 1.674554859029268e-05, "loss": 0.4849, "step": 5853 }, { "epoch": 0.8031830966591206, "grad_norm": 1.1875, "learning_rate": 1.674448290645281e-05, "loss": 0.4829, "step": 5854 }, { "epoch": 0.803320299101324, "grad_norm": 1.0390625, "learning_rate": 1.6743417082082317e-05, "loss": 0.408, "step": 5855 }, { "epoch": 0.8034575015435275, "grad_norm": 1.15625, "learning_rate": 1.674235111720341e-05, "loss": 0.4854, "step": 5856 }, { "epoch": 0.803594703985731, "grad_norm": 1.203125, "learning_rate": 1.6741285011838296e-05, "loss": 0.5495, "step": 5857 }, { "epoch": 0.8037319064279345, "grad_norm": 1.2578125, "learning_rate": 1.6740218766009196e-05, "loss": 0.5412, "step": 5858 }, { "epoch": 0.8038691088701378, "grad_norm": 1.1640625, "learning_rate": 1.6739152379738324e-05, "loss": 0.5221, "step": 5859 }, { "epoch": 0.8040063113123413, "grad_norm": 1.1484375, "learning_rate": 1.67380858530479e-05, "loss": 0.4917, "step": 5860 }, { "epoch": 0.8041435137545448, "grad_norm": 1.15625, "learning_rate": 1.6737019185960145e-05, "loss": 0.4487, "step": 5861 }, { "epoch": 0.8042807161967483, "grad_norm": 1.2109375, "learning_rate": 1.673595237849728e-05, "loss": 0.5391, "step": 5862 }, { "epoch": 0.8044179186389517, "grad_norm": 1.1953125, "learning_rate": 1.6734885430681546e-05, "loss": 0.5329, "step": 5863 }, { "epoch": 0.8045551210811552, "grad_norm": 1.1328125, "learning_rate": 1.673381834253516e-05, "loss": 0.5068, "step": 5864 }, { "epoch": 0.8046923235233587, "grad_norm": 1.140625, "learning_rate": 1.6732751114080365e-05, "loss": 0.4891, "step": 5865 }, { "epoch": 0.8048295259655622, "grad_norm": 1.2734375, "learning_rate": 1.6731683745339396e-05, "loss": 0.5425, "step": 5866 }, { "epoch": 0.8049667284077656, "grad_norm": 1.1796875, "learning_rate": 1.6730616236334497e-05, "loss": 0.5201, "step": 5867 }, { "epoch": 0.8051039308499691, "grad_norm": 1.1171875, "learning_rate": 1.67295485870879e-05, "loss": 0.4867, "step": 5868 }, { "epoch": 0.8052411332921726, "grad_norm": 1.125, "learning_rate": 1.6728480797621866e-05, "loss": 0.4784, "step": 5869 }, { "epoch": 0.8053783357343761, "grad_norm": 1.0703125, "learning_rate": 1.672741286795863e-05, "loss": 0.4414, "step": 5870 }, { "epoch": 0.8055155381765795, "grad_norm": 1.328125, "learning_rate": 1.6726344798120454e-05, "loss": 0.5576, "step": 5871 }, { "epoch": 0.805652740618783, "grad_norm": 1.2421875, "learning_rate": 1.6725276588129583e-05, "loss": 0.5511, "step": 5872 }, { "epoch": 0.8057899430609865, "grad_norm": 1.109375, "learning_rate": 1.6724208238008283e-05, "loss": 0.4364, "step": 5873 }, { "epoch": 0.80592714550319, "grad_norm": 1.2734375, "learning_rate": 1.6723139747778815e-05, "loss": 0.5737, "step": 5874 }, { "epoch": 0.8060643479453934, "grad_norm": 1.2578125, "learning_rate": 1.6722071117463437e-05, "loss": 0.5757, "step": 5875 }, { "epoch": 0.8062015503875969, "grad_norm": 1.1796875, "learning_rate": 1.6721002347084415e-05, "loss": 0.5012, "step": 5876 }, { "epoch": 0.8063387528298004, "grad_norm": 1.2421875, "learning_rate": 1.6719933436664027e-05, "loss": 0.5091, "step": 5877 }, { "epoch": 0.8064759552720039, "grad_norm": 1.3203125, "learning_rate": 1.6718864386224535e-05, "loss": 0.5747, "step": 5878 }, { "epoch": 0.8066131577142073, "grad_norm": 1.15625, "learning_rate": 1.6717795195788217e-05, "loss": 0.4834, "step": 5879 }, { "epoch": 0.8067503601564108, "grad_norm": 1.25, "learning_rate": 1.6716725865377354e-05, "loss": 0.5018, "step": 5880 }, { "epoch": 0.8068875625986143, "grad_norm": 1.1953125, "learning_rate": 1.6715656395014224e-05, "loss": 0.5132, "step": 5881 }, { "epoch": 0.8070247650408178, "grad_norm": 1.234375, "learning_rate": 1.6714586784721113e-05, "loss": 0.5216, "step": 5882 }, { "epoch": 0.8071619674830212, "grad_norm": 1.28125, "learning_rate": 1.671351703452031e-05, "loss": 0.5369, "step": 5883 }, { "epoch": 0.8072991699252247, "grad_norm": 1.1171875, "learning_rate": 1.6712447144434098e-05, "loss": 0.4399, "step": 5884 }, { "epoch": 0.8074363723674282, "grad_norm": 1.0390625, "learning_rate": 1.6711377114484774e-05, "loss": 0.4507, "step": 5885 }, { "epoch": 0.8075735748096317, "grad_norm": 1.1953125, "learning_rate": 1.6710306944694634e-05, "loss": 0.4443, "step": 5886 }, { "epoch": 0.807710777251835, "grad_norm": 1.2109375, "learning_rate": 1.6709236635085976e-05, "loss": 0.5591, "step": 5887 }, { "epoch": 0.8078479796940385, "grad_norm": 1.2109375, "learning_rate": 1.6708166185681102e-05, "loss": 0.5366, "step": 5888 }, { "epoch": 0.807985182136242, "grad_norm": 1.2109375, "learning_rate": 1.670709559650231e-05, "loss": 0.5651, "step": 5889 }, { "epoch": 0.8081223845784455, "grad_norm": 1.140625, "learning_rate": 1.6706024867571915e-05, "loss": 0.4647, "step": 5890 }, { "epoch": 0.8082595870206489, "grad_norm": 1.2890625, "learning_rate": 1.6704953998912224e-05, "loss": 0.526, "step": 5891 }, { "epoch": 0.8083967894628524, "grad_norm": 1.2578125, "learning_rate": 1.670388299054555e-05, "loss": 0.544, "step": 5892 }, { "epoch": 0.8085339919050559, "grad_norm": 1.2265625, "learning_rate": 1.670281184249421e-05, "loss": 0.5527, "step": 5893 }, { "epoch": 0.8086711943472594, "grad_norm": 1.2890625, "learning_rate": 1.6701740554780525e-05, "loss": 0.574, "step": 5894 }, { "epoch": 0.8088083967894628, "grad_norm": 1.1015625, "learning_rate": 1.670066912742681e-05, "loss": 0.4252, "step": 5895 }, { "epoch": 0.8089455992316663, "grad_norm": 1.21875, "learning_rate": 1.6699597560455396e-05, "loss": 0.4931, "step": 5896 }, { "epoch": 0.8090828016738698, "grad_norm": 1.203125, "learning_rate": 1.6698525853888606e-05, "loss": 0.5385, "step": 5897 }, { "epoch": 0.8092200041160733, "grad_norm": 1.25, "learning_rate": 1.6697454007748774e-05, "loss": 0.5655, "step": 5898 }, { "epoch": 0.8093572065582767, "grad_norm": 1.34375, "learning_rate": 1.6696382022058235e-05, "loss": 0.5686, "step": 5899 }, { "epoch": 0.8094944090004802, "grad_norm": 1.2421875, "learning_rate": 1.669530989683932e-05, "loss": 0.5457, "step": 5900 }, { "epoch": 0.8096316114426837, "grad_norm": 1.2578125, "learning_rate": 1.6694237632114368e-05, "loss": 0.5814, "step": 5901 }, { "epoch": 0.8097688138848872, "grad_norm": 1.2265625, "learning_rate": 1.6693165227905726e-05, "loss": 0.412, "step": 5902 }, { "epoch": 0.8099060163270906, "grad_norm": 1.25, "learning_rate": 1.6692092684235736e-05, "loss": 0.5683, "step": 5903 }, { "epoch": 0.8100432187692941, "grad_norm": 1.296875, "learning_rate": 1.6691020001126745e-05, "loss": 0.6095, "step": 5904 }, { "epoch": 0.8101804212114976, "grad_norm": 1.1796875, "learning_rate": 1.668994717860111e-05, "loss": 0.5342, "step": 5905 }, { "epoch": 0.8103176236537011, "grad_norm": 1.1953125, "learning_rate": 1.6688874216681176e-05, "loss": 0.5619, "step": 5906 }, { "epoch": 0.8104548260959045, "grad_norm": 1.140625, "learning_rate": 1.6687801115389305e-05, "loss": 0.5264, "step": 5907 }, { "epoch": 0.810592028538108, "grad_norm": 1.203125, "learning_rate": 1.668672787474786e-05, "loss": 0.5371, "step": 5908 }, { "epoch": 0.8107292309803115, "grad_norm": 1.2265625, "learning_rate": 1.6685654494779195e-05, "loss": 0.5155, "step": 5909 }, { "epoch": 0.810866433422515, "grad_norm": 1.0546875, "learning_rate": 1.6684580975505678e-05, "loss": 0.4436, "step": 5910 }, { "epoch": 0.8110036358647184, "grad_norm": 1.203125, "learning_rate": 1.668350731694968e-05, "loss": 0.4926, "step": 5911 }, { "epoch": 0.8111408383069219, "grad_norm": 1.15625, "learning_rate": 1.6682433519133573e-05, "loss": 0.4675, "step": 5912 }, { "epoch": 0.8112780407491254, "grad_norm": 1.25, "learning_rate": 1.6681359582079727e-05, "loss": 0.5336, "step": 5913 }, { "epoch": 0.8114152431913288, "grad_norm": 1.09375, "learning_rate": 1.6680285505810518e-05, "loss": 0.5199, "step": 5914 }, { "epoch": 0.8115524456335322, "grad_norm": 1.1796875, "learning_rate": 1.667921129034833e-05, "loss": 0.5126, "step": 5915 }, { "epoch": 0.8116896480757357, "grad_norm": 1.1015625, "learning_rate": 1.667813693571555e-05, "loss": 0.4834, "step": 5916 }, { "epoch": 0.8118268505179392, "grad_norm": 1.2265625, "learning_rate": 1.6677062441934547e-05, "loss": 0.4783, "step": 5917 }, { "epoch": 0.8119640529601427, "grad_norm": 1.140625, "learning_rate": 1.6675987809027733e-05, "loss": 0.5054, "step": 5918 }, { "epoch": 0.8121012554023461, "grad_norm": 1.2109375, "learning_rate": 1.667491303701748e-05, "loss": 0.5349, "step": 5919 }, { "epoch": 0.8122384578445496, "grad_norm": 1.2890625, "learning_rate": 1.6673838125926186e-05, "loss": 0.5645, "step": 5920 }, { "epoch": 0.8123756602867531, "grad_norm": 1.015625, "learning_rate": 1.6672763075776258e-05, "loss": 0.4264, "step": 5921 }, { "epoch": 0.8125128627289566, "grad_norm": 1.1875, "learning_rate": 1.6671687886590087e-05, "loss": 0.4825, "step": 5922 }, { "epoch": 0.81265006517116, "grad_norm": 1.2265625, "learning_rate": 1.6670612558390077e-05, "loss": 0.5358, "step": 5923 }, { "epoch": 0.8127872676133635, "grad_norm": 1.0625, "learning_rate": 1.6669537091198643e-05, "loss": 0.4397, "step": 5924 }, { "epoch": 0.812924470055567, "grad_norm": 1.21875, "learning_rate": 1.666846148503818e-05, "loss": 0.5045, "step": 5925 }, { "epoch": 0.8130616724977705, "grad_norm": 1.1484375, "learning_rate": 1.6667385739931104e-05, "loss": 0.4503, "step": 5926 }, { "epoch": 0.8131988749399739, "grad_norm": 1.203125, "learning_rate": 1.6666309855899842e-05, "loss": 0.5403, "step": 5927 }, { "epoch": 0.8133360773821774, "grad_norm": 1.1875, "learning_rate": 1.666523383296679e-05, "loss": 0.4869, "step": 5928 }, { "epoch": 0.8134732798243809, "grad_norm": 1.265625, "learning_rate": 1.6664157671154387e-05, "loss": 0.5055, "step": 5929 }, { "epoch": 0.8136104822665844, "grad_norm": 1.1171875, "learning_rate": 1.666308137048505e-05, "loss": 0.4351, "step": 5930 }, { "epoch": 0.8137476847087878, "grad_norm": 1.1953125, "learning_rate": 1.6662004930981202e-05, "loss": 0.5186, "step": 5931 }, { "epoch": 0.8138848871509913, "grad_norm": 1.125, "learning_rate": 1.6660928352665276e-05, "loss": 0.4436, "step": 5932 }, { "epoch": 0.8140220895931948, "grad_norm": 1.15625, "learning_rate": 1.6659851635559702e-05, "loss": 0.4822, "step": 5933 }, { "epoch": 0.8141592920353983, "grad_norm": 1.1640625, "learning_rate": 1.6658774779686916e-05, "loss": 0.4888, "step": 5934 }, { "epoch": 0.8142964944776017, "grad_norm": 1.1328125, "learning_rate": 1.665769778506935e-05, "loss": 0.464, "step": 5935 }, { "epoch": 0.8144336969198052, "grad_norm": 1.2265625, "learning_rate": 1.6656620651729458e-05, "loss": 0.4315, "step": 5936 }, { "epoch": 0.8145708993620087, "grad_norm": 1.1171875, "learning_rate": 1.665554337968967e-05, "loss": 0.4241, "step": 5937 }, { "epoch": 0.8147081018042122, "grad_norm": 1.203125, "learning_rate": 1.665446596897244e-05, "loss": 0.5285, "step": 5938 }, { "epoch": 0.8148453042464155, "grad_norm": 1.015625, "learning_rate": 1.6653388419600216e-05, "loss": 0.3285, "step": 5939 }, { "epoch": 0.814982506688619, "grad_norm": 1.171875, "learning_rate": 1.6652310731595445e-05, "loss": 0.5179, "step": 5940 }, { "epoch": 0.8151197091308225, "grad_norm": 1.0859375, "learning_rate": 1.6651232904980594e-05, "loss": 0.49, "step": 5941 }, { "epoch": 0.815256911573026, "grad_norm": 1.1484375, "learning_rate": 1.6650154939778108e-05, "loss": 0.4855, "step": 5942 }, { "epoch": 0.8153941140152294, "grad_norm": 1.3203125, "learning_rate": 1.6649076836010457e-05, "loss": 0.571, "step": 5943 }, { "epoch": 0.8155313164574329, "grad_norm": 1.0546875, "learning_rate": 1.66479985937001e-05, "loss": 0.402, "step": 5944 }, { "epoch": 0.8156685188996364, "grad_norm": 1.09375, "learning_rate": 1.6646920212869503e-05, "loss": 0.4091, "step": 5945 }, { "epoch": 0.8158057213418399, "grad_norm": 1.40625, "learning_rate": 1.664584169354114e-05, "loss": 0.5505, "step": 5946 }, { "epoch": 0.8159429237840433, "grad_norm": 1.2265625, "learning_rate": 1.664476303573748e-05, "loss": 0.531, "step": 5947 }, { "epoch": 0.8160801262262468, "grad_norm": 1.1640625, "learning_rate": 1.6643684239481e-05, "loss": 0.5096, "step": 5948 }, { "epoch": 0.8162173286684503, "grad_norm": 1.09375, "learning_rate": 1.6642605304794173e-05, "loss": 0.4475, "step": 5949 }, { "epoch": 0.8163545311106538, "grad_norm": 1.2265625, "learning_rate": 1.664152623169949e-05, "loss": 0.5012, "step": 5950 }, { "epoch": 0.8164917335528572, "grad_norm": 1.1875, "learning_rate": 1.664044702021943e-05, "loss": 0.5142, "step": 5951 }, { "epoch": 0.8166289359950607, "grad_norm": 1.078125, "learning_rate": 1.663936767037648e-05, "loss": 0.4497, "step": 5952 }, { "epoch": 0.8167661384372642, "grad_norm": 1.1875, "learning_rate": 1.6638288182193124e-05, "loss": 0.4612, "step": 5953 }, { "epoch": 0.8169033408794677, "grad_norm": 1.234375, "learning_rate": 1.6637208555691864e-05, "loss": 0.5105, "step": 5954 }, { "epoch": 0.8170405433216711, "grad_norm": 1.2265625, "learning_rate": 1.6636128790895193e-05, "loss": 0.511, "step": 5955 }, { "epoch": 0.8171777457638746, "grad_norm": 1.140625, "learning_rate": 1.6635048887825604e-05, "loss": 0.4767, "step": 5956 }, { "epoch": 0.8173149482060781, "grad_norm": 1.15625, "learning_rate": 1.66339688465056e-05, "loss": 0.4407, "step": 5957 }, { "epoch": 0.8174521506482816, "grad_norm": 1.109375, "learning_rate": 1.663288866695769e-05, "loss": 0.4558, "step": 5958 }, { "epoch": 0.817589353090485, "grad_norm": 1.21875, "learning_rate": 1.663180834920438e-05, "loss": 0.4809, "step": 5959 }, { "epoch": 0.8177265555326885, "grad_norm": 1.1796875, "learning_rate": 1.6630727893268177e-05, "loss": 0.4712, "step": 5960 }, { "epoch": 0.817863757974892, "grad_norm": 1.2265625, "learning_rate": 1.6629647299171593e-05, "loss": 0.5253, "step": 5961 }, { "epoch": 0.8180009604170955, "grad_norm": 1.1875, "learning_rate": 1.662856656693715e-05, "loss": 0.4995, "step": 5962 }, { "epoch": 0.8181381628592989, "grad_norm": 1.1953125, "learning_rate": 1.6627485696587358e-05, "loss": 0.4835, "step": 5963 }, { "epoch": 0.8182753653015024, "grad_norm": 1.21875, "learning_rate": 1.6626404688144743e-05, "loss": 0.5293, "step": 5964 }, { "epoch": 0.8184125677437059, "grad_norm": 1.1875, "learning_rate": 1.662532354163183e-05, "loss": 0.4756, "step": 5965 }, { "epoch": 0.8185497701859094, "grad_norm": 1.3046875, "learning_rate": 1.6624242257071146e-05, "loss": 0.564, "step": 5966 }, { "epoch": 0.8186869726281127, "grad_norm": 1.15625, "learning_rate": 1.6623160834485216e-05, "loss": 0.4873, "step": 5967 }, { "epoch": 0.8188241750703162, "grad_norm": 1.1796875, "learning_rate": 1.662207927389658e-05, "loss": 0.4512, "step": 5968 }, { "epoch": 0.8189613775125197, "grad_norm": 1.0625, "learning_rate": 1.662099757532777e-05, "loss": 0.4304, "step": 5969 }, { "epoch": 0.8190985799547232, "grad_norm": 1.1328125, "learning_rate": 1.6619915738801328e-05, "loss": 0.5422, "step": 5970 }, { "epoch": 0.8192357823969266, "grad_norm": 1.078125, "learning_rate": 1.661883376433979e-05, "loss": 0.4479, "step": 5971 }, { "epoch": 0.8193729848391301, "grad_norm": 1.2265625, "learning_rate": 1.6617751651965706e-05, "loss": 0.548, "step": 5972 }, { "epoch": 0.8195101872813336, "grad_norm": 1.171875, "learning_rate": 1.661666940170162e-05, "loss": 0.514, "step": 5973 }, { "epoch": 0.8196473897235371, "grad_norm": 1.359375, "learning_rate": 1.661558701357008e-05, "loss": 0.5712, "step": 5974 }, { "epoch": 0.8197845921657405, "grad_norm": 1.1640625, "learning_rate": 1.6614504487593645e-05, "loss": 0.4858, "step": 5975 }, { "epoch": 0.819921794607944, "grad_norm": 1.1796875, "learning_rate": 1.661342182379487e-05, "loss": 0.4934, "step": 5976 }, { "epoch": 0.8200589970501475, "grad_norm": 1.171875, "learning_rate": 1.661233902219631e-05, "loss": 0.4781, "step": 5977 }, { "epoch": 0.820196199492351, "grad_norm": 1.1484375, "learning_rate": 1.6611256082820524e-05, "loss": 0.4879, "step": 5978 }, { "epoch": 0.8203334019345544, "grad_norm": 1.203125, "learning_rate": 1.6610173005690088e-05, "loss": 0.4774, "step": 5979 }, { "epoch": 0.8204706043767579, "grad_norm": 1.2109375, "learning_rate": 1.660908979082756e-05, "loss": 0.4969, "step": 5980 }, { "epoch": 0.8206078068189614, "grad_norm": 1.28125, "learning_rate": 1.660800643825551e-05, "loss": 0.529, "step": 5981 }, { "epoch": 0.8207450092611649, "grad_norm": 1.1484375, "learning_rate": 1.660692294799652e-05, "loss": 0.497, "step": 5982 }, { "epoch": 0.8208822117033683, "grad_norm": 1.2578125, "learning_rate": 1.6605839320073154e-05, "loss": 0.5271, "step": 5983 }, { "epoch": 0.8210194141455718, "grad_norm": 1.0859375, "learning_rate": 1.6604755554507995e-05, "loss": 0.4534, "step": 5984 }, { "epoch": 0.8211566165877753, "grad_norm": 1.2890625, "learning_rate": 1.6603671651323632e-05, "loss": 0.5514, "step": 5985 }, { "epoch": 0.8212938190299788, "grad_norm": 1.171875, "learning_rate": 1.660258761054264e-05, "loss": 0.4984, "step": 5986 }, { "epoch": 0.8214310214721822, "grad_norm": 1.1171875, "learning_rate": 1.6601503432187613e-05, "loss": 0.4768, "step": 5987 }, { "epoch": 0.8215682239143857, "grad_norm": 1.265625, "learning_rate": 1.660041911628114e-05, "loss": 0.571, "step": 5988 }, { "epoch": 0.8217054263565892, "grad_norm": 1.1328125, "learning_rate": 1.659933466284581e-05, "loss": 0.5011, "step": 5989 }, { "epoch": 0.8218426287987927, "grad_norm": 1.203125, "learning_rate": 1.6598250071904224e-05, "loss": 0.5168, "step": 5990 }, { "epoch": 0.821979831240996, "grad_norm": 1.265625, "learning_rate": 1.659716534347898e-05, "loss": 0.4885, "step": 5991 }, { "epoch": 0.8221170336831995, "grad_norm": 1.0625, "learning_rate": 1.6596080477592677e-05, "loss": 0.4557, "step": 5992 }, { "epoch": 0.822254236125403, "grad_norm": 1.1875, "learning_rate": 1.6594995474267922e-05, "loss": 0.4931, "step": 5993 }, { "epoch": 0.8223914385676065, "grad_norm": 1.15625, "learning_rate": 1.6593910333527325e-05, "loss": 0.5195, "step": 5994 }, { "epoch": 0.8225286410098099, "grad_norm": 1.265625, "learning_rate": 1.659282505539349e-05, "loss": 0.5611, "step": 5995 }, { "epoch": 0.8226658434520134, "grad_norm": 1.0390625, "learning_rate": 1.6591739639889037e-05, "loss": 0.416, "step": 5996 }, { "epoch": 0.8228030458942169, "grad_norm": 1.15625, "learning_rate": 1.6590654087036577e-05, "loss": 0.4366, "step": 5997 }, { "epoch": 0.8229402483364204, "grad_norm": 1.2109375, "learning_rate": 1.6589568396858734e-05, "loss": 0.4554, "step": 5998 }, { "epoch": 0.8230774507786238, "grad_norm": 1.1875, "learning_rate": 1.6588482569378122e-05, "loss": 0.4881, "step": 5999 }, { "epoch": 0.8232146532208273, "grad_norm": 1.1875, "learning_rate": 1.6587396604617375e-05, "loss": 0.4993, "step": 6000 }, { "epoch": 0.8233518556630308, "grad_norm": 1.109375, "learning_rate": 1.6586310502599113e-05, "loss": 0.4944, "step": 6001 }, { "epoch": 0.8234890581052343, "grad_norm": 1.328125, "learning_rate": 1.6585224263345973e-05, "loss": 0.6214, "step": 6002 }, { "epoch": 0.8236262605474377, "grad_norm": 1.1953125, "learning_rate": 1.658413788688058e-05, "loss": 0.475, "step": 6003 }, { "epoch": 0.8237634629896412, "grad_norm": 1.1484375, "learning_rate": 1.658305137322558e-05, "loss": 0.4956, "step": 6004 }, { "epoch": 0.8239006654318447, "grad_norm": 1.203125, "learning_rate": 1.6581964722403602e-05, "loss": 0.5367, "step": 6005 }, { "epoch": 0.8240378678740482, "grad_norm": 1.21875, "learning_rate": 1.6580877934437297e-05, "loss": 0.5502, "step": 6006 }, { "epoch": 0.8241750703162516, "grad_norm": 1.15625, "learning_rate": 1.6579791009349304e-05, "loss": 0.4653, "step": 6007 }, { "epoch": 0.8243122727584551, "grad_norm": 1.2421875, "learning_rate": 1.6578703947162273e-05, "loss": 0.5285, "step": 6008 }, { "epoch": 0.8244494752006586, "grad_norm": 1.3984375, "learning_rate": 1.6577616747898856e-05, "loss": 0.5185, "step": 6009 }, { "epoch": 0.8245866776428621, "grad_norm": 1.2890625, "learning_rate": 1.65765294115817e-05, "loss": 0.5753, "step": 6010 }, { "epoch": 0.8247238800850655, "grad_norm": 1.265625, "learning_rate": 1.657544193823347e-05, "loss": 0.5326, "step": 6011 }, { "epoch": 0.824861082527269, "grad_norm": 1.171875, "learning_rate": 1.6574354327876814e-05, "loss": 0.5124, "step": 6012 }, { "epoch": 0.8249982849694725, "grad_norm": 1.15625, "learning_rate": 1.6573266580534403e-05, "loss": 0.5249, "step": 6013 }, { "epoch": 0.825135487411676, "grad_norm": 1.203125, "learning_rate": 1.65721786962289e-05, "loss": 0.5572, "step": 6014 }, { "epoch": 0.8252726898538794, "grad_norm": 1.1640625, "learning_rate": 1.6571090674982972e-05, "loss": 0.4838, "step": 6015 }, { "epoch": 0.8254098922960829, "grad_norm": 1.34375, "learning_rate": 1.6570002516819285e-05, "loss": 0.598, "step": 6016 }, { "epoch": 0.8255470947382864, "grad_norm": 1.140625, "learning_rate": 1.656891422176052e-05, "loss": 0.4651, "step": 6017 }, { "epoch": 0.8256842971804899, "grad_norm": 1.1171875, "learning_rate": 1.6567825789829342e-05, "loss": 0.5023, "step": 6018 }, { "epoch": 0.8258214996226932, "grad_norm": 1.2265625, "learning_rate": 1.6566737221048446e-05, "loss": 0.5191, "step": 6019 }, { "epoch": 0.8259587020648967, "grad_norm": 1.2109375, "learning_rate": 1.65656485154405e-05, "loss": 0.4909, "step": 6020 }, { "epoch": 0.8260959045071002, "grad_norm": 1.3203125, "learning_rate": 1.6564559673028194e-05, "loss": 0.5695, "step": 6021 }, { "epoch": 0.8262331069493037, "grad_norm": 1.1171875, "learning_rate": 1.6563470693834218e-05, "loss": 0.4913, "step": 6022 }, { "epoch": 0.8263703093915071, "grad_norm": 1.25, "learning_rate": 1.656238157788126e-05, "loss": 0.593, "step": 6023 }, { "epoch": 0.8265075118337106, "grad_norm": 1.09375, "learning_rate": 1.656129232519201e-05, "loss": 0.4736, "step": 6024 }, { "epoch": 0.8266447142759141, "grad_norm": 1.0859375, "learning_rate": 1.6560202935789166e-05, "loss": 0.4421, "step": 6025 }, { "epoch": 0.8267819167181176, "grad_norm": 1.109375, "learning_rate": 1.655911340969543e-05, "loss": 0.4559, "step": 6026 }, { "epoch": 0.826919119160321, "grad_norm": 1.1640625, "learning_rate": 1.65580237469335e-05, "loss": 0.4869, "step": 6027 }, { "epoch": 0.8270563216025245, "grad_norm": 1.234375, "learning_rate": 1.6556933947526082e-05, "loss": 0.5341, "step": 6028 }, { "epoch": 0.827193524044728, "grad_norm": 1.28125, "learning_rate": 1.6555844011495886e-05, "loss": 0.5721, "step": 6029 }, { "epoch": 0.8273307264869315, "grad_norm": 1.1328125, "learning_rate": 1.655475393886562e-05, "loss": 0.5029, "step": 6030 }, { "epoch": 0.8274679289291349, "grad_norm": 1.109375, "learning_rate": 1.6553663729657995e-05, "loss": 0.4617, "step": 6031 }, { "epoch": 0.8276051313713384, "grad_norm": 1.1015625, "learning_rate": 1.6552573383895733e-05, "loss": 0.4299, "step": 6032 }, { "epoch": 0.8277423338135419, "grad_norm": 1.1015625, "learning_rate": 1.6551482901601547e-05, "loss": 0.4997, "step": 6033 }, { "epoch": 0.8278795362557454, "grad_norm": 1.2109375, "learning_rate": 1.655039228279816e-05, "loss": 0.5622, "step": 6034 }, { "epoch": 0.8280167386979488, "grad_norm": 1.171875, "learning_rate": 1.6549301527508298e-05, "loss": 0.5085, "step": 6035 }, { "epoch": 0.8281539411401523, "grad_norm": 1.203125, "learning_rate": 1.6548210635754693e-05, "loss": 0.5057, "step": 6036 }, { "epoch": 0.8282911435823558, "grad_norm": 1.1640625, "learning_rate": 1.654711960756006e-05, "loss": 0.5188, "step": 6037 }, { "epoch": 0.8284283460245593, "grad_norm": 1.1015625, "learning_rate": 1.654602844294715e-05, "loss": 0.4799, "step": 6038 }, { "epoch": 0.8285655484667627, "grad_norm": 1.2265625, "learning_rate": 1.6544937141938686e-05, "loss": 0.5481, "step": 6039 }, { "epoch": 0.8287027509089662, "grad_norm": 1.2109375, "learning_rate": 1.6543845704557413e-05, "loss": 0.5091, "step": 6040 }, { "epoch": 0.8288399533511697, "grad_norm": 1.1640625, "learning_rate": 1.6542754130826073e-05, "loss": 0.4935, "step": 6041 }, { "epoch": 0.8289771557933732, "grad_norm": 1.1484375, "learning_rate": 1.654166242076741e-05, "loss": 0.4759, "step": 6042 }, { "epoch": 0.8291143582355766, "grad_norm": 1.1953125, "learning_rate": 1.6540570574404165e-05, "loss": 0.5021, "step": 6043 }, { "epoch": 0.82925156067778, "grad_norm": 1.1015625, "learning_rate": 1.6539478591759097e-05, "loss": 0.4638, "step": 6044 }, { "epoch": 0.8293887631199836, "grad_norm": 1.2421875, "learning_rate": 1.6538386472854956e-05, "loss": 0.4766, "step": 6045 }, { "epoch": 0.829525965562187, "grad_norm": 1.2421875, "learning_rate": 1.6537294217714493e-05, "loss": 0.484, "step": 6046 }, { "epoch": 0.8296631680043904, "grad_norm": 1.296875, "learning_rate": 1.6536201826360473e-05, "loss": 0.6386, "step": 6047 }, { "epoch": 0.8298003704465939, "grad_norm": 1.2265625, "learning_rate": 1.6535109298815655e-05, "loss": 0.5557, "step": 6048 }, { "epoch": 0.8299375728887974, "grad_norm": 1.28125, "learning_rate": 1.6534016635102805e-05, "loss": 0.5506, "step": 6049 }, { "epoch": 0.8300747753310009, "grad_norm": 1.2265625, "learning_rate": 1.6532923835244687e-05, "loss": 0.5359, "step": 6050 }, { "epoch": 0.8302119777732043, "grad_norm": 1.265625, "learning_rate": 1.6531830899264074e-05, "loss": 0.4783, "step": 6051 }, { "epoch": 0.8303491802154078, "grad_norm": 1.1796875, "learning_rate": 1.6530737827183735e-05, "loss": 0.5143, "step": 6052 }, { "epoch": 0.8304863826576113, "grad_norm": 1.21875, "learning_rate": 1.652964461902645e-05, "loss": 0.4922, "step": 6053 }, { "epoch": 0.8306235850998148, "grad_norm": 1.1875, "learning_rate": 1.6528551274814994e-05, "loss": 0.5327, "step": 6054 }, { "epoch": 0.8307607875420182, "grad_norm": 1.203125, "learning_rate": 1.652745779457215e-05, "loss": 0.478, "step": 6055 }, { "epoch": 0.8308979899842217, "grad_norm": 1.171875, "learning_rate": 1.6526364178320706e-05, "loss": 0.4663, "step": 6056 }, { "epoch": 0.8310351924264252, "grad_norm": 1.2109375, "learning_rate": 1.652527042608344e-05, "loss": 0.4952, "step": 6057 }, { "epoch": 0.8311723948686287, "grad_norm": 1.203125, "learning_rate": 1.652417653788315e-05, "loss": 0.5033, "step": 6058 }, { "epoch": 0.8313095973108321, "grad_norm": 1.21875, "learning_rate": 1.652308251374263e-05, "loss": 0.5644, "step": 6059 }, { "epoch": 0.8314467997530356, "grad_norm": 1.2578125, "learning_rate": 1.6521988353684664e-05, "loss": 0.5646, "step": 6060 }, { "epoch": 0.8315840021952391, "grad_norm": 1.1484375, "learning_rate": 1.652089405773206e-05, "loss": 0.4549, "step": 6061 }, { "epoch": 0.8317212046374426, "grad_norm": 1.234375, "learning_rate": 1.6519799625907615e-05, "loss": 0.5165, "step": 6062 }, { "epoch": 0.831858407079646, "grad_norm": 1.2578125, "learning_rate": 1.6518705058234137e-05, "loss": 0.5482, "step": 6063 }, { "epoch": 0.8319956095218495, "grad_norm": 1.1640625, "learning_rate": 1.651761035473443e-05, "loss": 0.5025, "step": 6064 }, { "epoch": 0.832132811964053, "grad_norm": 1.171875, "learning_rate": 1.6516515515431303e-05, "loss": 0.5476, "step": 6065 }, { "epoch": 0.8322700144062565, "grad_norm": 1.1328125, "learning_rate": 1.6515420540347568e-05, "loss": 0.4485, "step": 6066 }, { "epoch": 0.8324072168484599, "grad_norm": 1.1953125, "learning_rate": 1.6514325429506047e-05, "loss": 0.5067, "step": 6067 }, { "epoch": 0.8325444192906634, "grad_norm": 1.296875, "learning_rate": 1.651323018292955e-05, "loss": 0.6065, "step": 6068 }, { "epoch": 0.8326816217328669, "grad_norm": 1.109375, "learning_rate": 1.6512134800640902e-05, "loss": 0.4883, "step": 6069 }, { "epoch": 0.8328188241750704, "grad_norm": 1.34375, "learning_rate": 1.6511039282662923e-05, "loss": 0.5659, "step": 6070 }, { "epoch": 0.8329560266172737, "grad_norm": 1.125, "learning_rate": 1.6509943629018444e-05, "loss": 0.5017, "step": 6071 }, { "epoch": 0.8330932290594772, "grad_norm": 1.1796875, "learning_rate": 1.65088478397303e-05, "loss": 0.5242, "step": 6072 }, { "epoch": 0.8332304315016807, "grad_norm": 1.03125, "learning_rate": 1.6507751914821306e-05, "loss": 0.3965, "step": 6073 }, { "epoch": 0.8333676339438842, "grad_norm": 1.109375, "learning_rate": 1.650665585431431e-05, "loss": 0.4511, "step": 6074 }, { "epoch": 0.8335048363860876, "grad_norm": 1.1953125, "learning_rate": 1.650555965823215e-05, "loss": 0.5834, "step": 6075 }, { "epoch": 0.8336420388282911, "grad_norm": 1.1875, "learning_rate": 1.6504463326597662e-05, "loss": 0.5069, "step": 6076 }, { "epoch": 0.8337792412704946, "grad_norm": 1.1796875, "learning_rate": 1.6503366859433695e-05, "loss": 0.4383, "step": 6077 }, { "epoch": 0.8339164437126981, "grad_norm": 1.1796875, "learning_rate": 1.650227025676309e-05, "loss": 0.5244, "step": 6078 }, { "epoch": 0.8340536461549015, "grad_norm": 1.1875, "learning_rate": 1.65011735186087e-05, "loss": 0.5424, "step": 6079 }, { "epoch": 0.834190848597105, "grad_norm": 1.09375, "learning_rate": 1.650007664499337e-05, "loss": 0.4733, "step": 6080 }, { "epoch": 0.8343280510393085, "grad_norm": 1.2421875, "learning_rate": 1.649897963593996e-05, "loss": 0.5225, "step": 6081 }, { "epoch": 0.834465253481512, "grad_norm": 1.03125, "learning_rate": 1.6497882491471332e-05, "loss": 0.4024, "step": 6082 }, { "epoch": 0.8346024559237154, "grad_norm": 1.3359375, "learning_rate": 1.649678521161034e-05, "loss": 0.5401, "step": 6083 }, { "epoch": 0.8347396583659189, "grad_norm": 1.1953125, "learning_rate": 1.6495687796379855e-05, "loss": 0.5193, "step": 6084 }, { "epoch": 0.8348768608081224, "grad_norm": 1.3125, "learning_rate": 1.6494590245802733e-05, "loss": 0.5898, "step": 6085 }, { "epoch": 0.8350140632503259, "grad_norm": 1.21875, "learning_rate": 1.649349255990185e-05, "loss": 0.4699, "step": 6086 }, { "epoch": 0.8351512656925293, "grad_norm": 1.1015625, "learning_rate": 1.6492394738700074e-05, "loss": 0.496, "step": 6087 }, { "epoch": 0.8352884681347328, "grad_norm": 1.140625, "learning_rate": 1.649129678222028e-05, "loss": 0.4338, "step": 6088 }, { "epoch": 0.8354256705769363, "grad_norm": 1.21875, "learning_rate": 1.6490198690485353e-05, "loss": 0.5702, "step": 6089 }, { "epoch": 0.8355628730191398, "grad_norm": 1.1328125, "learning_rate": 1.6489100463518165e-05, "loss": 0.5104, "step": 6090 }, { "epoch": 0.8357000754613432, "grad_norm": 1.359375, "learning_rate": 1.64880021013416e-05, "loss": 0.547, "step": 6091 }, { "epoch": 0.8358372779035467, "grad_norm": 1.1640625, "learning_rate": 1.6486903603978545e-05, "loss": 0.5163, "step": 6092 }, { "epoch": 0.8359744803457502, "grad_norm": 1.125, "learning_rate": 1.6485804971451887e-05, "loss": 0.492, "step": 6093 }, { "epoch": 0.8361116827879537, "grad_norm": 1.1171875, "learning_rate": 1.648470620378452e-05, "loss": 0.4504, "step": 6094 }, { "epoch": 0.8362488852301571, "grad_norm": 1.0703125, "learning_rate": 1.648360730099934e-05, "loss": 0.411, "step": 6095 }, { "epoch": 0.8363860876723606, "grad_norm": 1.2109375, "learning_rate": 1.648250826311924e-05, "loss": 0.5751, "step": 6096 }, { "epoch": 0.836523290114564, "grad_norm": 1.234375, "learning_rate": 1.648140909016712e-05, "loss": 0.568, "step": 6097 }, { "epoch": 0.8366604925567676, "grad_norm": 1.2109375, "learning_rate": 1.6480309782165887e-05, "loss": 0.5849, "step": 6098 }, { "epoch": 0.8367976949989709, "grad_norm": 1.3046875, "learning_rate": 1.6479210339138443e-05, "loss": 0.5925, "step": 6099 }, { "epoch": 0.8369348974411744, "grad_norm": 1.2421875, "learning_rate": 1.6478110761107697e-05, "loss": 0.4945, "step": 6100 }, { "epoch": 0.8370720998833779, "grad_norm": 1.15625, "learning_rate": 1.6477011048096562e-05, "loss": 0.5552, "step": 6101 }, { "epoch": 0.8372093023255814, "grad_norm": 1.359375, "learning_rate": 1.647591120012795e-05, "loss": 0.6073, "step": 6102 }, { "epoch": 0.8373465047677848, "grad_norm": 1.171875, "learning_rate": 1.647481121722478e-05, "loss": 0.4343, "step": 6103 }, { "epoch": 0.8374837072099883, "grad_norm": 1.1796875, "learning_rate": 1.6473711099409967e-05, "loss": 0.5036, "step": 6104 }, { "epoch": 0.8376209096521918, "grad_norm": 1.09375, "learning_rate": 1.647261084670644e-05, "loss": 0.4905, "step": 6105 }, { "epoch": 0.8377581120943953, "grad_norm": 1.1484375, "learning_rate": 1.647151045913712e-05, "loss": 0.515, "step": 6106 }, { "epoch": 0.8378953145365987, "grad_norm": 1.109375, "learning_rate": 1.647040993672494e-05, "loss": 0.4717, "step": 6107 }, { "epoch": 0.8380325169788022, "grad_norm": 1.171875, "learning_rate": 1.6469309279492822e-05, "loss": 0.5029, "step": 6108 }, { "epoch": 0.8381697194210057, "grad_norm": 1.15625, "learning_rate": 1.6468208487463705e-05, "loss": 0.5086, "step": 6109 }, { "epoch": 0.8383069218632092, "grad_norm": 1.1796875, "learning_rate": 1.6467107560660526e-05, "loss": 0.528, "step": 6110 }, { "epoch": 0.8384441243054126, "grad_norm": 1.1796875, "learning_rate": 1.6466006499106227e-05, "loss": 0.4863, "step": 6111 }, { "epoch": 0.8385813267476161, "grad_norm": 1.0625, "learning_rate": 1.646490530282374e-05, "loss": 0.4613, "step": 6112 }, { "epoch": 0.8387185291898196, "grad_norm": 1.234375, "learning_rate": 1.6463803971836023e-05, "loss": 0.5345, "step": 6113 }, { "epoch": 0.8388557316320231, "grad_norm": 1.109375, "learning_rate": 1.6462702506166015e-05, "loss": 0.4848, "step": 6114 }, { "epoch": 0.8389929340742265, "grad_norm": 1.3125, "learning_rate": 1.646160090583667e-05, "loss": 0.6446, "step": 6115 }, { "epoch": 0.83913013651643, "grad_norm": 1.3125, "learning_rate": 1.646049917087094e-05, "loss": 0.5746, "step": 6116 }, { "epoch": 0.8392673389586335, "grad_norm": 1.1640625, "learning_rate": 1.645939730129178e-05, "loss": 0.5084, "step": 6117 }, { "epoch": 0.839404541400837, "grad_norm": 1.125, "learning_rate": 1.645829529712215e-05, "loss": 0.4659, "step": 6118 }, { "epoch": 0.8395417438430404, "grad_norm": 1.203125, "learning_rate": 1.6457193158385017e-05, "loss": 0.4859, "step": 6119 }, { "epoch": 0.8396789462852439, "grad_norm": 1.3359375, "learning_rate": 1.6456090885103337e-05, "loss": 0.6108, "step": 6120 }, { "epoch": 0.8398161487274474, "grad_norm": 1.203125, "learning_rate": 1.6454988477300086e-05, "loss": 0.4506, "step": 6121 }, { "epoch": 0.8399533511696509, "grad_norm": 1.234375, "learning_rate": 1.6453885934998224e-05, "loss": 0.4911, "step": 6122 }, { "epoch": 0.8400905536118543, "grad_norm": 1.2109375, "learning_rate": 1.645278325822073e-05, "loss": 0.494, "step": 6123 }, { "epoch": 0.8402277560540578, "grad_norm": 1.203125, "learning_rate": 1.6451680446990585e-05, "loss": 0.5145, "step": 6124 }, { "epoch": 0.8403649584962612, "grad_norm": 1.140625, "learning_rate": 1.6450577501330753e-05, "loss": 0.4905, "step": 6125 }, { "epoch": 0.8405021609384647, "grad_norm": 1.140625, "learning_rate": 1.644947442126423e-05, "loss": 0.5004, "step": 6126 }, { "epoch": 0.8406393633806681, "grad_norm": 1.125, "learning_rate": 1.6448371206813994e-05, "loss": 0.4478, "step": 6127 }, { "epoch": 0.8407765658228716, "grad_norm": 1.1171875, "learning_rate": 1.644726785800303e-05, "loss": 0.5213, "step": 6128 }, { "epoch": 0.8409137682650751, "grad_norm": 1.15625, "learning_rate": 1.6446164374854333e-05, "loss": 0.5066, "step": 6129 }, { "epoch": 0.8410509707072786, "grad_norm": 1.2578125, "learning_rate": 1.6445060757390895e-05, "loss": 0.5721, "step": 6130 }, { "epoch": 0.841188173149482, "grad_norm": 1.1953125, "learning_rate": 1.6443957005635706e-05, "loss": 0.5284, "step": 6131 }, { "epoch": 0.8413253755916855, "grad_norm": 1.1875, "learning_rate": 1.6442853119611765e-05, "loss": 0.5137, "step": 6132 }, { "epoch": 0.841462578033889, "grad_norm": 1.1015625, "learning_rate": 1.644174909934208e-05, "loss": 0.4306, "step": 6133 }, { "epoch": 0.8415997804760925, "grad_norm": 1.2890625, "learning_rate": 1.6440644944849647e-05, "loss": 0.5172, "step": 6134 }, { "epoch": 0.8417369829182959, "grad_norm": 1.1484375, "learning_rate": 1.643954065615748e-05, "loss": 0.4381, "step": 6135 }, { "epoch": 0.8418741853604994, "grad_norm": 1.265625, "learning_rate": 1.6438436233288578e-05, "loss": 0.5125, "step": 6136 }, { "epoch": 0.8420113878027029, "grad_norm": 1.140625, "learning_rate": 1.6437331676265966e-05, "loss": 0.4492, "step": 6137 }, { "epoch": 0.8421485902449064, "grad_norm": 1.0703125, "learning_rate": 1.6436226985112648e-05, "loss": 0.4236, "step": 6138 }, { "epoch": 0.8422857926871098, "grad_norm": 1.1953125, "learning_rate": 1.643512215985165e-05, "loss": 0.5636, "step": 6139 }, { "epoch": 0.8424229951293133, "grad_norm": 1.2109375, "learning_rate": 1.6434017200505982e-05, "loss": 0.5107, "step": 6140 }, { "epoch": 0.8425601975715168, "grad_norm": 1.1328125, "learning_rate": 1.643291210709868e-05, "loss": 0.496, "step": 6141 }, { "epoch": 0.8426974000137203, "grad_norm": 1.140625, "learning_rate": 1.6431806879652763e-05, "loss": 0.4798, "step": 6142 }, { "epoch": 0.8428346024559237, "grad_norm": 1.1875, "learning_rate": 1.643070151819126e-05, "loss": 0.4529, "step": 6143 }, { "epoch": 0.8429718048981272, "grad_norm": 1.1328125, "learning_rate": 1.6429596022737205e-05, "loss": 0.475, "step": 6144 }, { "epoch": 0.8431090073403307, "grad_norm": 1.2421875, "learning_rate": 1.6428490393313633e-05, "loss": 0.5259, "step": 6145 }, { "epoch": 0.8432462097825342, "grad_norm": 1.2421875, "learning_rate": 1.6427384629943577e-05, "loss": 0.5067, "step": 6146 }, { "epoch": 0.8433834122247376, "grad_norm": 1.1796875, "learning_rate": 1.6426278732650078e-05, "loss": 0.4813, "step": 6147 }, { "epoch": 0.8435206146669411, "grad_norm": 1.140625, "learning_rate": 1.6425172701456183e-05, "loss": 0.4905, "step": 6148 }, { "epoch": 0.8436578171091446, "grad_norm": 1.1875, "learning_rate": 1.6424066536384936e-05, "loss": 0.5031, "step": 6149 }, { "epoch": 0.8437950195513481, "grad_norm": 1.2890625, "learning_rate": 1.6422960237459387e-05, "loss": 0.5717, "step": 6150 }, { "epoch": 0.8439322219935514, "grad_norm": 1.1015625, "learning_rate": 1.642185380470258e-05, "loss": 0.4123, "step": 6151 }, { "epoch": 0.844069424435755, "grad_norm": 1.28125, "learning_rate": 1.642074723813758e-05, "loss": 0.5112, "step": 6152 }, { "epoch": 0.8442066268779584, "grad_norm": 1.203125, "learning_rate": 1.6419640537787433e-05, "loss": 0.5223, "step": 6153 }, { "epoch": 0.8443438293201619, "grad_norm": 1.3203125, "learning_rate": 1.6418533703675205e-05, "loss": 0.6065, "step": 6154 }, { "epoch": 0.8444810317623653, "grad_norm": 1.2578125, "learning_rate": 1.6417426735823956e-05, "loss": 0.4924, "step": 6155 }, { "epoch": 0.8446182342045688, "grad_norm": 1.1328125, "learning_rate": 1.6416319634256756e-05, "loss": 0.4713, "step": 6156 }, { "epoch": 0.8447554366467723, "grad_norm": 1.1171875, "learning_rate": 1.6415212398996663e-05, "loss": 0.4761, "step": 6157 }, { "epoch": 0.8448926390889758, "grad_norm": 1.1953125, "learning_rate": 1.641410503006676e-05, "loss": 0.5568, "step": 6158 }, { "epoch": 0.8450298415311792, "grad_norm": 1.1875, "learning_rate": 1.6412997527490112e-05, "loss": 0.5062, "step": 6159 }, { "epoch": 0.8451670439733827, "grad_norm": 1.1484375, "learning_rate": 1.64118898912898e-05, "loss": 0.5033, "step": 6160 }, { "epoch": 0.8453042464155862, "grad_norm": 1.2421875, "learning_rate": 1.64107821214889e-05, "loss": 0.5157, "step": 6161 }, { "epoch": 0.8454414488577897, "grad_norm": 1.203125, "learning_rate": 1.6409674218110495e-05, "loss": 0.524, "step": 6162 }, { "epoch": 0.8455786512999931, "grad_norm": 1.140625, "learning_rate": 1.640856618117767e-05, "loss": 0.5041, "step": 6163 }, { "epoch": 0.8457158537421966, "grad_norm": 1.1953125, "learning_rate": 1.640745801071351e-05, "loss": 0.4913, "step": 6164 }, { "epoch": 0.8458530561844001, "grad_norm": 1.3203125, "learning_rate": 1.640634970674111e-05, "loss": 0.5332, "step": 6165 }, { "epoch": 0.8459902586266036, "grad_norm": 1.2421875, "learning_rate": 1.640524126928356e-05, "loss": 0.5584, "step": 6166 }, { "epoch": 0.846127461068807, "grad_norm": 1.1015625, "learning_rate": 1.6404132698363956e-05, "loss": 0.4561, "step": 6167 }, { "epoch": 0.8462646635110105, "grad_norm": 1.3671875, "learning_rate": 1.6403023994005396e-05, "loss": 0.5132, "step": 6168 }, { "epoch": 0.846401865953214, "grad_norm": 1.1875, "learning_rate": 1.6401915156230982e-05, "loss": 0.4981, "step": 6169 }, { "epoch": 0.8465390683954175, "grad_norm": 1.1328125, "learning_rate": 1.640080618506382e-05, "loss": 0.509, "step": 6170 }, { "epoch": 0.8466762708376209, "grad_norm": 1.2109375, "learning_rate": 1.6399697080527016e-05, "loss": 0.5287, "step": 6171 }, { "epoch": 0.8468134732798244, "grad_norm": 1.15625, "learning_rate": 1.639858784264368e-05, "loss": 0.5657, "step": 6172 }, { "epoch": 0.8469506757220279, "grad_norm": 1.2734375, "learning_rate": 1.6397478471436922e-05, "loss": 0.5441, "step": 6173 }, { "epoch": 0.8470878781642314, "grad_norm": 1.125, "learning_rate": 1.6396368966929858e-05, "loss": 0.4417, "step": 6174 }, { "epoch": 0.8472250806064348, "grad_norm": 1.1953125, "learning_rate": 1.639525932914561e-05, "loss": 0.5003, "step": 6175 }, { "epoch": 0.8473622830486383, "grad_norm": 1.2421875, "learning_rate": 1.6394149558107292e-05, "loss": 0.572, "step": 6176 }, { "epoch": 0.8474994854908418, "grad_norm": 1.2109375, "learning_rate": 1.639303965383803e-05, "loss": 0.5618, "step": 6177 }, { "epoch": 0.8476366879330453, "grad_norm": 1.25, "learning_rate": 1.639192961636096e-05, "loss": 0.5086, "step": 6178 }, { "epoch": 0.8477738903752486, "grad_norm": 1.1796875, "learning_rate": 1.6390819445699197e-05, "loss": 0.4886, "step": 6179 }, { "epoch": 0.8479110928174521, "grad_norm": 1.0390625, "learning_rate": 1.638970914187588e-05, "loss": 0.4077, "step": 6180 }, { "epoch": 0.8480482952596556, "grad_norm": 1.2578125, "learning_rate": 1.6388598704914143e-05, "loss": 0.4857, "step": 6181 }, { "epoch": 0.8481854977018591, "grad_norm": 1.1875, "learning_rate": 1.6387488134837124e-05, "loss": 0.5479, "step": 6182 }, { "epoch": 0.8483227001440625, "grad_norm": 1.109375, "learning_rate": 1.6386377431667963e-05, "loss": 0.4091, "step": 6183 }, { "epoch": 0.848459902586266, "grad_norm": 1.296875, "learning_rate": 1.6385266595429797e-05, "loss": 0.5472, "step": 6184 }, { "epoch": 0.8485971050284695, "grad_norm": 1.140625, "learning_rate": 1.6384155626145784e-05, "loss": 0.4613, "step": 6185 }, { "epoch": 0.848734307470673, "grad_norm": 1.125, "learning_rate": 1.6383044523839062e-05, "loss": 0.4992, "step": 6186 }, { "epoch": 0.8488715099128764, "grad_norm": 1.234375, "learning_rate": 1.6381933288532788e-05, "loss": 0.5023, "step": 6187 }, { "epoch": 0.8490087123550799, "grad_norm": 1.2109375, "learning_rate": 1.6380821920250114e-05, "loss": 0.5642, "step": 6188 }, { "epoch": 0.8491459147972834, "grad_norm": 1.296875, "learning_rate": 1.6379710419014198e-05, "loss": 0.5453, "step": 6189 }, { "epoch": 0.8492831172394869, "grad_norm": 1.1328125, "learning_rate": 1.6378598784848193e-05, "loss": 0.5072, "step": 6190 }, { "epoch": 0.8494203196816903, "grad_norm": 1.1796875, "learning_rate": 1.6377487017775276e-05, "loss": 0.5128, "step": 6191 }, { "epoch": 0.8495575221238938, "grad_norm": 1.171875, "learning_rate": 1.63763751178186e-05, "loss": 0.5293, "step": 6192 }, { "epoch": 0.8496947245660973, "grad_norm": 1.125, "learning_rate": 1.6375263085001333e-05, "loss": 0.5592, "step": 6193 }, { "epoch": 0.8498319270083008, "grad_norm": 1.25, "learning_rate": 1.6374150919346654e-05, "loss": 0.5291, "step": 6194 }, { "epoch": 0.8499691294505042, "grad_norm": 1.1875, "learning_rate": 1.637303862087773e-05, "loss": 0.5336, "step": 6195 }, { "epoch": 0.8501063318927077, "grad_norm": 1.140625, "learning_rate": 1.6371926189617737e-05, "loss": 0.5106, "step": 6196 }, { "epoch": 0.8502435343349112, "grad_norm": 1.2890625, "learning_rate": 1.637081362558986e-05, "loss": 0.5138, "step": 6197 }, { "epoch": 0.8503807367771147, "grad_norm": 1.2265625, "learning_rate": 1.6369700928817275e-05, "loss": 0.5373, "step": 6198 }, { "epoch": 0.8505179392193181, "grad_norm": 1.1171875, "learning_rate": 1.6368588099323166e-05, "loss": 0.4502, "step": 6199 }, { "epoch": 0.8506551416615216, "grad_norm": 1.140625, "learning_rate": 1.6367475137130725e-05, "loss": 0.4919, "step": 6200 }, { "epoch": 0.8507923441037251, "grad_norm": 1.1953125, "learning_rate": 1.6366362042263138e-05, "loss": 0.5075, "step": 6201 }, { "epoch": 0.8509295465459286, "grad_norm": 1.046875, "learning_rate": 1.6365248814743602e-05, "loss": 0.4246, "step": 6202 }, { "epoch": 0.851066748988132, "grad_norm": 1.5703125, "learning_rate": 1.6364135454595312e-05, "loss": 0.4296, "step": 6203 }, { "epoch": 0.8512039514303354, "grad_norm": 1.15625, "learning_rate": 1.6363021961841466e-05, "loss": 0.5205, "step": 6204 }, { "epoch": 0.851341153872539, "grad_norm": 1.109375, "learning_rate": 1.636190833650526e-05, "loss": 0.5401, "step": 6205 }, { "epoch": 0.8514783563147424, "grad_norm": 1.0546875, "learning_rate": 1.63607945786099e-05, "loss": 0.4018, "step": 6206 }, { "epoch": 0.8516155587569458, "grad_norm": 1.2109375, "learning_rate": 1.63596806881786e-05, "loss": 0.5158, "step": 6207 }, { "epoch": 0.8517527611991493, "grad_norm": 1.2109375, "learning_rate": 1.635856666523456e-05, "loss": 0.5218, "step": 6208 }, { "epoch": 0.8518899636413528, "grad_norm": 1.2578125, "learning_rate": 1.6357452509801e-05, "loss": 0.5694, "step": 6209 }, { "epoch": 0.8520271660835563, "grad_norm": 1.140625, "learning_rate": 1.635633822190113e-05, "loss": 0.525, "step": 6210 }, { "epoch": 0.8521643685257597, "grad_norm": 1.2578125, "learning_rate": 1.6355223801558166e-05, "loss": 0.5326, "step": 6211 }, { "epoch": 0.8523015709679632, "grad_norm": 1.2734375, "learning_rate": 1.6354109248795333e-05, "loss": 0.5656, "step": 6212 }, { "epoch": 0.8524387734101667, "grad_norm": 1.1875, "learning_rate": 1.635299456363585e-05, "loss": 0.5186, "step": 6213 }, { "epoch": 0.8525759758523702, "grad_norm": 1.1796875, "learning_rate": 1.635187974610295e-05, "loss": 0.5134, "step": 6214 }, { "epoch": 0.8527131782945736, "grad_norm": 1.21875, "learning_rate": 1.6350764796219855e-05, "loss": 0.5324, "step": 6215 }, { "epoch": 0.8528503807367771, "grad_norm": 1.2265625, "learning_rate": 1.6349649714009802e-05, "loss": 0.5345, "step": 6216 }, { "epoch": 0.8529875831789806, "grad_norm": 1.0546875, "learning_rate": 1.6348534499496018e-05, "loss": 0.4389, "step": 6217 }, { "epoch": 0.8531247856211841, "grad_norm": 1.1796875, "learning_rate": 1.6347419152701745e-05, "loss": 0.5487, "step": 6218 }, { "epoch": 0.8532619880633875, "grad_norm": 1.1328125, "learning_rate": 1.6346303673650227e-05, "loss": 0.4504, "step": 6219 }, { "epoch": 0.853399190505591, "grad_norm": 1.2109375, "learning_rate": 1.6345188062364696e-05, "loss": 0.5299, "step": 6220 }, { "epoch": 0.8535363929477945, "grad_norm": 1.171875, "learning_rate": 1.6344072318868404e-05, "loss": 0.4758, "step": 6221 }, { "epoch": 0.853673595389998, "grad_norm": 1.171875, "learning_rate": 1.6342956443184598e-05, "loss": 0.4483, "step": 6222 }, { "epoch": 0.8538107978322014, "grad_norm": 1.171875, "learning_rate": 1.634184043533653e-05, "loss": 0.4855, "step": 6223 }, { "epoch": 0.8539480002744049, "grad_norm": 1.0703125, "learning_rate": 1.6340724295347453e-05, "loss": 0.3733, "step": 6224 }, { "epoch": 0.8540852027166084, "grad_norm": 1.234375, "learning_rate": 1.633960802324062e-05, "loss": 0.5505, "step": 6225 }, { "epoch": 0.8542224051588119, "grad_norm": 1.2265625, "learning_rate": 1.6338491619039296e-05, "loss": 0.5516, "step": 6226 }, { "epoch": 0.8543596076010153, "grad_norm": 1.1171875, "learning_rate": 1.633737508276674e-05, "loss": 0.4764, "step": 6227 }, { "epoch": 0.8544968100432188, "grad_norm": 1.125, "learning_rate": 1.633625841444621e-05, "loss": 0.419, "step": 6228 }, { "epoch": 0.8546340124854223, "grad_norm": 1.0234375, "learning_rate": 1.633514161410098e-05, "loss": 0.4272, "step": 6229 }, { "epoch": 0.8547712149276258, "grad_norm": 1.2109375, "learning_rate": 1.6334024681754324e-05, "loss": 0.5179, "step": 6230 }, { "epoch": 0.8549084173698291, "grad_norm": 1.2890625, "learning_rate": 1.6332907617429512e-05, "loss": 0.5314, "step": 6231 }, { "epoch": 0.8550456198120326, "grad_norm": 1.2109375, "learning_rate": 1.6331790421149817e-05, "loss": 0.4906, "step": 6232 }, { "epoch": 0.8551828222542361, "grad_norm": 1.1875, "learning_rate": 1.6330673092938514e-05, "loss": 0.5493, "step": 6233 }, { "epoch": 0.8553200246964396, "grad_norm": 1.203125, "learning_rate": 1.632955563281889e-05, "loss": 0.4976, "step": 6234 }, { "epoch": 0.855457227138643, "grad_norm": 1.15625, "learning_rate": 1.6328438040814227e-05, "loss": 0.4761, "step": 6235 }, { "epoch": 0.8555944295808465, "grad_norm": 1.3515625, "learning_rate": 1.6327320316947813e-05, "loss": 0.6157, "step": 6236 }, { "epoch": 0.85573163202305, "grad_norm": 1.1484375, "learning_rate": 1.6326202461242937e-05, "loss": 0.493, "step": 6237 }, { "epoch": 0.8558688344652535, "grad_norm": 1.125, "learning_rate": 1.6325084473722888e-05, "loss": 0.4276, "step": 6238 }, { "epoch": 0.8560060369074569, "grad_norm": 1.1953125, "learning_rate": 1.6323966354410964e-05, "loss": 0.5309, "step": 6239 }, { "epoch": 0.8561432393496604, "grad_norm": 1.171875, "learning_rate": 1.632284810333046e-05, "loss": 0.4544, "step": 6240 }, { "epoch": 0.8562804417918639, "grad_norm": 1.171875, "learning_rate": 1.632172972050468e-05, "loss": 0.4589, "step": 6241 }, { "epoch": 0.8564176442340674, "grad_norm": 1.171875, "learning_rate": 1.6320611205956923e-05, "loss": 0.4929, "step": 6242 }, { "epoch": 0.8565548466762708, "grad_norm": 1.15625, "learning_rate": 1.6319492559710498e-05, "loss": 0.4389, "step": 6243 }, { "epoch": 0.8566920491184743, "grad_norm": 1.3515625, "learning_rate": 1.631837378178871e-05, "loss": 0.5722, "step": 6244 }, { "epoch": 0.8568292515606778, "grad_norm": 1.296875, "learning_rate": 1.6317254872214876e-05, "loss": 0.6221, "step": 6245 }, { "epoch": 0.8569664540028813, "grad_norm": 1.21875, "learning_rate": 1.6316135831012302e-05, "loss": 0.5319, "step": 6246 }, { "epoch": 0.8571036564450847, "grad_norm": 1.1328125, "learning_rate": 1.6315016658204315e-05, "loss": 0.4754, "step": 6247 }, { "epoch": 0.8572408588872882, "grad_norm": 1.2578125, "learning_rate": 1.6313897353814225e-05, "loss": 0.5663, "step": 6248 }, { "epoch": 0.8573780613294917, "grad_norm": 1.203125, "learning_rate": 1.631277791786536e-05, "loss": 0.5142, "step": 6249 }, { "epoch": 0.8575152637716952, "grad_norm": 1.2890625, "learning_rate": 1.631165835038104e-05, "loss": 0.501, "step": 6250 }, { "epoch": 0.8576524662138986, "grad_norm": 1.1875, "learning_rate": 1.63105386513846e-05, "loss": 0.4908, "step": 6251 }, { "epoch": 0.8577896686561021, "grad_norm": 1.28125, "learning_rate": 1.630941882089936e-05, "loss": 0.5591, "step": 6252 }, { "epoch": 0.8579268710983056, "grad_norm": 1.1796875, "learning_rate": 1.6308298858948667e-05, "loss": 0.5428, "step": 6253 }, { "epoch": 0.8580640735405091, "grad_norm": 1.1875, "learning_rate": 1.6307178765555844e-05, "loss": 0.4728, "step": 6254 }, { "epoch": 0.8582012759827125, "grad_norm": 1.046875, "learning_rate": 1.6306058540744236e-05, "loss": 0.424, "step": 6255 }, { "epoch": 0.858338478424916, "grad_norm": 1.1875, "learning_rate": 1.6304938184537184e-05, "loss": 0.5566, "step": 6256 }, { "epoch": 0.8584756808671195, "grad_norm": 1.2265625, "learning_rate": 1.6303817696958033e-05, "loss": 0.5531, "step": 6257 }, { "epoch": 0.858612883309323, "grad_norm": 1.1015625, "learning_rate": 1.630269707803013e-05, "loss": 0.4599, "step": 6258 }, { "epoch": 0.8587500857515263, "grad_norm": 1.3125, "learning_rate": 1.630157632777682e-05, "loss": 0.4828, "step": 6259 }, { "epoch": 0.8588872881937298, "grad_norm": 1.21875, "learning_rate": 1.6300455446221457e-05, "loss": 0.5623, "step": 6260 }, { "epoch": 0.8590244906359333, "grad_norm": 1.1953125, "learning_rate": 1.62993344333874e-05, "loss": 0.5444, "step": 6261 }, { "epoch": 0.8591616930781368, "grad_norm": 1.171875, "learning_rate": 1.6298213289298006e-05, "loss": 0.4767, "step": 6262 }, { "epoch": 0.8592988955203402, "grad_norm": 1.234375, "learning_rate": 1.6297092013976635e-05, "loss": 0.5336, "step": 6263 }, { "epoch": 0.8594360979625437, "grad_norm": 1.2265625, "learning_rate": 1.6295970607446648e-05, "loss": 0.4636, "step": 6264 }, { "epoch": 0.8595733004047472, "grad_norm": 1.15625, "learning_rate": 1.6294849069731414e-05, "loss": 0.4797, "step": 6265 }, { "epoch": 0.8597105028469507, "grad_norm": 1.171875, "learning_rate": 1.6293727400854297e-05, "loss": 0.4965, "step": 6266 }, { "epoch": 0.8598477052891541, "grad_norm": 1.203125, "learning_rate": 1.6292605600838675e-05, "loss": 0.5021, "step": 6267 }, { "epoch": 0.8599849077313576, "grad_norm": 1.2265625, "learning_rate": 1.6291483669707916e-05, "loss": 0.524, "step": 6268 }, { "epoch": 0.8601221101735611, "grad_norm": 1.2578125, "learning_rate": 1.62903616074854e-05, "loss": 0.5445, "step": 6269 }, { "epoch": 0.8602593126157646, "grad_norm": 1.1953125, "learning_rate": 1.6289239414194513e-05, "loss": 0.5178, "step": 6270 }, { "epoch": 0.860396515057968, "grad_norm": 1.25, "learning_rate": 1.6288117089858626e-05, "loss": 0.5668, "step": 6271 }, { "epoch": 0.8605337175001715, "grad_norm": 1.3203125, "learning_rate": 1.628699463450113e-05, "loss": 0.5961, "step": 6272 }, { "epoch": 0.860670919942375, "grad_norm": 1.2109375, "learning_rate": 1.6285872048145413e-05, "loss": 0.5145, "step": 6273 }, { "epoch": 0.8608081223845785, "grad_norm": 1.2421875, "learning_rate": 1.6284749330814868e-05, "loss": 0.501, "step": 6274 }, { "epoch": 0.8609453248267819, "grad_norm": 1.171875, "learning_rate": 1.628362648253288e-05, "loss": 0.5425, "step": 6275 }, { "epoch": 0.8610825272689854, "grad_norm": 1.34375, "learning_rate": 1.628250350332285e-05, "loss": 0.5835, "step": 6276 }, { "epoch": 0.8612197297111889, "grad_norm": 1.203125, "learning_rate": 1.6281380393208183e-05, "loss": 0.5238, "step": 6277 }, { "epoch": 0.8613569321533924, "grad_norm": 1.1484375, "learning_rate": 1.628025715221227e-05, "loss": 0.4841, "step": 6278 }, { "epoch": 0.8614941345955958, "grad_norm": 1.15625, "learning_rate": 1.6279133780358523e-05, "loss": 0.4964, "step": 6279 }, { "epoch": 0.8616313370377993, "grad_norm": 1.078125, "learning_rate": 1.6278010277670342e-05, "loss": 0.4631, "step": 6280 }, { "epoch": 0.8617685394800028, "grad_norm": 1.28125, "learning_rate": 1.6276886644171147e-05, "loss": 0.5142, "step": 6281 }, { "epoch": 0.8619057419222063, "grad_norm": 1.2109375, "learning_rate": 1.627576287988434e-05, "loss": 0.5126, "step": 6282 }, { "epoch": 0.8620429443644096, "grad_norm": 1.1953125, "learning_rate": 1.6274638984833345e-05, "loss": 0.4851, "step": 6283 }, { "epoch": 0.8621801468066131, "grad_norm": 1.2109375, "learning_rate": 1.6273514959041573e-05, "loss": 0.4723, "step": 6284 }, { "epoch": 0.8623173492488166, "grad_norm": 1.375, "learning_rate": 1.6272390802532445e-05, "loss": 0.6198, "step": 6285 }, { "epoch": 0.8624545516910201, "grad_norm": 1.28125, "learning_rate": 1.627126651532939e-05, "loss": 0.5764, "step": 6286 }, { "epoch": 0.8625917541332235, "grad_norm": 1.09375, "learning_rate": 1.6270142097455827e-05, "loss": 0.4582, "step": 6287 }, { "epoch": 0.862728956575427, "grad_norm": 1.1015625, "learning_rate": 1.6269017548935188e-05, "loss": 0.4871, "step": 6288 }, { "epoch": 0.8628661590176305, "grad_norm": 1.234375, "learning_rate": 1.6267892869790905e-05, "loss": 0.5577, "step": 6289 }, { "epoch": 0.863003361459834, "grad_norm": 1.25, "learning_rate": 1.6266768060046417e-05, "loss": 0.5135, "step": 6290 }, { "epoch": 0.8631405639020374, "grad_norm": 1.1953125, "learning_rate": 1.6265643119725153e-05, "loss": 0.4725, "step": 6291 }, { "epoch": 0.8632777663442409, "grad_norm": 1.203125, "learning_rate": 1.6264518048850554e-05, "loss": 0.5219, "step": 6292 }, { "epoch": 0.8634149687864444, "grad_norm": 1.078125, "learning_rate": 1.6263392847446064e-05, "loss": 0.4569, "step": 6293 }, { "epoch": 0.8635521712286479, "grad_norm": 1.203125, "learning_rate": 1.626226751553513e-05, "loss": 0.5345, "step": 6294 }, { "epoch": 0.8636893736708513, "grad_norm": 1.203125, "learning_rate": 1.6261142053141198e-05, "loss": 0.5075, "step": 6295 }, { "epoch": 0.8638265761130548, "grad_norm": 1.1640625, "learning_rate": 1.6260016460287716e-05, "loss": 0.4861, "step": 6296 }, { "epoch": 0.8639637785552583, "grad_norm": 1.140625, "learning_rate": 1.6258890736998143e-05, "loss": 0.4508, "step": 6297 }, { "epoch": 0.8641009809974618, "grad_norm": 1.25, "learning_rate": 1.6257764883295932e-05, "loss": 0.5959, "step": 6298 }, { "epoch": 0.8642381834396652, "grad_norm": 1.1796875, "learning_rate": 1.6256638899204542e-05, "loss": 0.4538, "step": 6299 }, { "epoch": 0.8643753858818687, "grad_norm": 1.21875, "learning_rate": 1.625551278474743e-05, "loss": 0.4671, "step": 6300 }, { "epoch": 0.8645125883240722, "grad_norm": 1.1640625, "learning_rate": 1.6254386539948067e-05, "loss": 0.5128, "step": 6301 }, { "epoch": 0.8646497907662757, "grad_norm": 1.109375, "learning_rate": 1.6253260164829918e-05, "loss": 0.4543, "step": 6302 }, { "epoch": 0.8647869932084791, "grad_norm": 1.140625, "learning_rate": 1.625213365941645e-05, "loss": 0.511, "step": 6303 }, { "epoch": 0.8649241956506826, "grad_norm": 1.234375, "learning_rate": 1.625100702373114e-05, "loss": 0.5369, "step": 6304 }, { "epoch": 0.8650613980928861, "grad_norm": 1.2734375, "learning_rate": 1.6249880257797456e-05, "loss": 0.5868, "step": 6305 }, { "epoch": 0.8651986005350896, "grad_norm": 1.25, "learning_rate": 1.624875336163888e-05, "loss": 0.5742, "step": 6306 }, { "epoch": 0.865335802977293, "grad_norm": 1.234375, "learning_rate": 1.6247626335278898e-05, "loss": 0.5065, "step": 6307 }, { "epoch": 0.8654730054194965, "grad_norm": 1.203125, "learning_rate": 1.6246499178740982e-05, "loss": 0.5546, "step": 6308 }, { "epoch": 0.8656102078617, "grad_norm": 1.1875, "learning_rate": 1.624537189204862e-05, "loss": 0.5319, "step": 6309 }, { "epoch": 0.8657474103039035, "grad_norm": 1.1640625, "learning_rate": 1.624424447522531e-05, "loss": 0.5467, "step": 6310 }, { "epoch": 0.8658846127461068, "grad_norm": 1.125, "learning_rate": 1.6243116928294534e-05, "loss": 0.4811, "step": 6311 }, { "epoch": 0.8660218151883103, "grad_norm": 1.3046875, "learning_rate": 1.6241989251279793e-05, "loss": 0.5192, "step": 6312 }, { "epoch": 0.8661590176305138, "grad_norm": 1.3203125, "learning_rate": 1.6240861444204578e-05, "loss": 0.5386, "step": 6313 }, { "epoch": 0.8662962200727173, "grad_norm": 1.0703125, "learning_rate": 1.6239733507092384e-05, "loss": 0.4584, "step": 6314 }, { "epoch": 0.8664334225149207, "grad_norm": 1.21875, "learning_rate": 1.6238605439966725e-05, "loss": 0.5265, "step": 6315 }, { "epoch": 0.8665706249571242, "grad_norm": 1.234375, "learning_rate": 1.62374772428511e-05, "loss": 0.5175, "step": 6316 }, { "epoch": 0.8667078273993277, "grad_norm": 1.1640625, "learning_rate": 1.6236348915769017e-05, "loss": 0.5544, "step": 6317 }, { "epoch": 0.8668450298415312, "grad_norm": 1.234375, "learning_rate": 1.6235220458743986e-05, "loss": 0.5349, "step": 6318 }, { "epoch": 0.8669822322837346, "grad_norm": 1.2109375, "learning_rate": 1.6234091871799516e-05, "loss": 0.4801, "step": 6319 }, { "epoch": 0.8671194347259381, "grad_norm": 1.09375, "learning_rate": 1.623296315495913e-05, "loss": 0.4111, "step": 6320 }, { "epoch": 0.8672566371681416, "grad_norm": 1.1640625, "learning_rate": 1.6231834308246344e-05, "loss": 0.5156, "step": 6321 }, { "epoch": 0.8673938396103451, "grad_norm": 1.0390625, "learning_rate": 1.6230705331684676e-05, "loss": 0.4372, "step": 6322 }, { "epoch": 0.8675310420525485, "grad_norm": 1.1796875, "learning_rate": 1.622957622529765e-05, "loss": 0.4841, "step": 6323 }, { "epoch": 0.867668244494752, "grad_norm": 1.1953125, "learning_rate": 1.6228446989108797e-05, "loss": 0.4833, "step": 6324 }, { "epoch": 0.8678054469369555, "grad_norm": 1.28125, "learning_rate": 1.6227317623141644e-05, "loss": 0.5193, "step": 6325 }, { "epoch": 0.867942649379159, "grad_norm": 1.2890625, "learning_rate": 1.6226188127419723e-05, "loss": 0.5253, "step": 6326 }, { "epoch": 0.8680798518213624, "grad_norm": 1.3046875, "learning_rate": 1.6225058501966565e-05, "loss": 0.5971, "step": 6327 }, { "epoch": 0.8682170542635659, "grad_norm": 1.2578125, "learning_rate": 1.622392874680571e-05, "loss": 0.5683, "step": 6328 }, { "epoch": 0.8683542567057694, "grad_norm": 1.234375, "learning_rate": 1.62227988619607e-05, "loss": 0.5909, "step": 6329 }, { "epoch": 0.8684914591479729, "grad_norm": 1.28125, "learning_rate": 1.6221668847455078e-05, "loss": 0.5404, "step": 6330 }, { "epoch": 0.8686286615901763, "grad_norm": 1.1484375, "learning_rate": 1.6220538703312383e-05, "loss": 0.5106, "step": 6331 }, { "epoch": 0.8687658640323798, "grad_norm": 1.0625, "learning_rate": 1.621940842955617e-05, "loss": 0.5196, "step": 6332 }, { "epoch": 0.8689030664745833, "grad_norm": 1.1640625, "learning_rate": 1.6218278026209988e-05, "loss": 0.5087, "step": 6333 }, { "epoch": 0.8690402689167868, "grad_norm": 1.28125, "learning_rate": 1.621714749329739e-05, "loss": 0.5948, "step": 6334 }, { "epoch": 0.8691774713589902, "grad_norm": 1.1875, "learning_rate": 1.621601683084193e-05, "loss": 0.5005, "step": 6335 }, { "epoch": 0.8693146738011936, "grad_norm": 1.0703125, "learning_rate": 1.6214886038867172e-05, "loss": 0.4249, "step": 6336 }, { "epoch": 0.8694518762433971, "grad_norm": 1.2109375, "learning_rate": 1.621375511739667e-05, "loss": 0.3915, "step": 6337 }, { "epoch": 0.8695890786856006, "grad_norm": 1.234375, "learning_rate": 1.6212624066453994e-05, "loss": 0.5517, "step": 6338 }, { "epoch": 0.869726281127804, "grad_norm": 1.2421875, "learning_rate": 1.6211492886062715e-05, "loss": 0.5499, "step": 6339 }, { "epoch": 0.8698634835700075, "grad_norm": 1.1875, "learning_rate": 1.6210361576246394e-05, "loss": 0.5249, "step": 6340 }, { "epoch": 0.870000686012211, "grad_norm": 1.21875, "learning_rate": 1.6209230137028605e-05, "loss": 0.4185, "step": 6341 }, { "epoch": 0.8701378884544145, "grad_norm": 1.1015625, "learning_rate": 1.6208098568432928e-05, "loss": 0.4569, "step": 6342 }, { "epoch": 0.8702750908966179, "grad_norm": 1.2109375, "learning_rate": 1.6206966870482935e-05, "loss": 0.5257, "step": 6343 }, { "epoch": 0.8704122933388214, "grad_norm": 1.2578125, "learning_rate": 1.6205835043202216e-05, "loss": 0.5644, "step": 6344 }, { "epoch": 0.8705494957810249, "grad_norm": 1.203125, "learning_rate": 1.6204703086614342e-05, "loss": 0.5032, "step": 6345 }, { "epoch": 0.8706866982232284, "grad_norm": 1.2265625, "learning_rate": 1.6203571000742905e-05, "loss": 0.5929, "step": 6346 }, { "epoch": 0.8708239006654318, "grad_norm": 1.15625, "learning_rate": 1.6202438785611495e-05, "loss": 0.6023, "step": 6347 }, { "epoch": 0.8709611031076353, "grad_norm": 1.1640625, "learning_rate": 1.62013064412437e-05, "loss": 0.5093, "step": 6348 }, { "epoch": 0.8710983055498388, "grad_norm": 1.2265625, "learning_rate": 1.6200173967663115e-05, "loss": 0.5442, "step": 6349 }, { "epoch": 0.8712355079920423, "grad_norm": 1.140625, "learning_rate": 1.6199041364893338e-05, "loss": 0.4632, "step": 6350 }, { "epoch": 0.8713727104342457, "grad_norm": 1.171875, "learning_rate": 1.619790863295797e-05, "loss": 0.4208, "step": 6351 }, { "epoch": 0.8715099128764492, "grad_norm": 1.3203125, "learning_rate": 1.6196775771880605e-05, "loss": 0.5273, "step": 6352 }, { "epoch": 0.8716471153186527, "grad_norm": 1.3203125, "learning_rate": 1.6195642781684856e-05, "loss": 0.5381, "step": 6353 }, { "epoch": 0.8717843177608562, "grad_norm": 1.265625, "learning_rate": 1.6194509662394327e-05, "loss": 0.5197, "step": 6354 }, { "epoch": 0.8719215202030596, "grad_norm": 1.1640625, "learning_rate": 1.6193376414032626e-05, "loss": 0.4772, "step": 6355 }, { "epoch": 0.8720587226452631, "grad_norm": 1.2421875, "learning_rate": 1.6192243036623372e-05, "loss": 0.4992, "step": 6356 }, { "epoch": 0.8721959250874666, "grad_norm": 1.203125, "learning_rate": 1.6191109530190176e-05, "loss": 0.4912, "step": 6357 }, { "epoch": 0.8723331275296701, "grad_norm": 1.3046875, "learning_rate": 1.6189975894756655e-05, "loss": 0.616, "step": 6358 }, { "epoch": 0.8724703299718735, "grad_norm": 1.2109375, "learning_rate": 1.6188842130346432e-05, "loss": 0.5174, "step": 6359 }, { "epoch": 0.872607532414077, "grad_norm": 1.1640625, "learning_rate": 1.6187708236983135e-05, "loss": 0.541, "step": 6360 }, { "epoch": 0.8727447348562805, "grad_norm": 1.1015625, "learning_rate": 1.618657421469038e-05, "loss": 0.4708, "step": 6361 }, { "epoch": 0.872881937298484, "grad_norm": 1.234375, "learning_rate": 1.618544006349181e-05, "loss": 0.4765, "step": 6362 }, { "epoch": 0.8730191397406873, "grad_norm": 1.1875, "learning_rate": 1.618430578341104e-05, "loss": 0.4679, "step": 6363 }, { "epoch": 0.8731563421828908, "grad_norm": 1.1328125, "learning_rate": 1.6183171374471713e-05, "loss": 0.4569, "step": 6364 }, { "epoch": 0.8732935446250943, "grad_norm": 1.2578125, "learning_rate": 1.618203683669747e-05, "loss": 0.5248, "step": 6365 }, { "epoch": 0.8734307470672978, "grad_norm": 1.125, "learning_rate": 1.6180902170111946e-05, "loss": 0.492, "step": 6366 }, { "epoch": 0.8735679495095012, "grad_norm": 1.2578125, "learning_rate": 1.6179767374738777e-05, "loss": 0.5708, "step": 6367 }, { "epoch": 0.8737051519517047, "grad_norm": 1.28125, "learning_rate": 1.6178632450601618e-05, "loss": 0.5695, "step": 6368 }, { "epoch": 0.8738423543939082, "grad_norm": 1.2109375, "learning_rate": 1.6177497397724114e-05, "loss": 0.472, "step": 6369 }, { "epoch": 0.8739795568361117, "grad_norm": 1.2265625, "learning_rate": 1.617636221612992e-05, "loss": 0.5172, "step": 6370 }, { "epoch": 0.8741167592783151, "grad_norm": 1.2265625, "learning_rate": 1.6175226905842674e-05, "loss": 0.5588, "step": 6371 }, { "epoch": 0.8742539617205186, "grad_norm": 1.109375, "learning_rate": 1.6174091466886048e-05, "loss": 0.4348, "step": 6372 }, { "epoch": 0.8743911641627221, "grad_norm": 1.1171875, "learning_rate": 1.617295589928369e-05, "loss": 0.4571, "step": 6373 }, { "epoch": 0.8745283666049256, "grad_norm": 1.2265625, "learning_rate": 1.6171820203059267e-05, "loss": 0.4943, "step": 6374 }, { "epoch": 0.874665569047129, "grad_norm": 1.2734375, "learning_rate": 1.617068437823644e-05, "loss": 0.5478, "step": 6375 }, { "epoch": 0.8748027714893325, "grad_norm": 1.203125, "learning_rate": 1.6169548424838878e-05, "loss": 0.5238, "step": 6376 }, { "epoch": 0.874939973931536, "grad_norm": 1.15625, "learning_rate": 1.616841234289025e-05, "loss": 0.4978, "step": 6377 }, { "epoch": 0.8750771763737395, "grad_norm": 1.234375, "learning_rate": 1.6167276132414222e-05, "loss": 0.5529, "step": 6378 }, { "epoch": 0.8752143788159429, "grad_norm": 1.171875, "learning_rate": 1.6166139793434475e-05, "loss": 0.4641, "step": 6379 }, { "epoch": 0.8753515812581464, "grad_norm": 1.109375, "learning_rate": 1.616500332597468e-05, "loss": 0.4672, "step": 6380 }, { "epoch": 0.8754887837003499, "grad_norm": 1.203125, "learning_rate": 1.6163866730058527e-05, "loss": 0.5461, "step": 6381 }, { "epoch": 0.8756259861425534, "grad_norm": 1.2421875, "learning_rate": 1.616273000570969e-05, "loss": 0.5282, "step": 6382 }, { "epoch": 0.8757631885847568, "grad_norm": 1.203125, "learning_rate": 1.616159315295186e-05, "loss": 0.4745, "step": 6383 }, { "epoch": 0.8759003910269603, "grad_norm": 1.3828125, "learning_rate": 1.6160456171808716e-05, "loss": 0.5781, "step": 6384 }, { "epoch": 0.8760375934691638, "grad_norm": 1.1953125, "learning_rate": 1.6159319062303954e-05, "loss": 0.51, "step": 6385 }, { "epoch": 0.8761747959113673, "grad_norm": 1.109375, "learning_rate": 1.615818182446127e-05, "loss": 0.4621, "step": 6386 }, { "epoch": 0.8763119983535707, "grad_norm": 1.15625, "learning_rate": 1.615704445830436e-05, "loss": 0.5064, "step": 6387 }, { "epoch": 0.8764492007957742, "grad_norm": 1.125, "learning_rate": 1.6155906963856912e-05, "loss": 0.4734, "step": 6388 }, { "epoch": 0.8765864032379777, "grad_norm": 1.25, "learning_rate": 1.6154769341142643e-05, "loss": 0.5403, "step": 6389 }, { "epoch": 0.8767236056801812, "grad_norm": 1.109375, "learning_rate": 1.6153631590185245e-05, "loss": 0.4994, "step": 6390 }, { "epoch": 0.8768608081223845, "grad_norm": 1.265625, "learning_rate": 1.615249371100843e-05, "loss": 0.5223, "step": 6391 }, { "epoch": 0.876998010564588, "grad_norm": 1.2109375, "learning_rate": 1.6151355703635903e-05, "loss": 0.501, "step": 6392 }, { "epoch": 0.8771352130067915, "grad_norm": 1.1953125, "learning_rate": 1.6150217568091384e-05, "loss": 0.4796, "step": 6393 }, { "epoch": 0.877272415448995, "grad_norm": 1.1875, "learning_rate": 1.614907930439858e-05, "loss": 0.5212, "step": 6394 }, { "epoch": 0.8774096178911984, "grad_norm": 1.171875, "learning_rate": 1.614794091258121e-05, "loss": 0.5241, "step": 6395 }, { "epoch": 0.8775468203334019, "grad_norm": 1.1171875, "learning_rate": 1.6146802392663e-05, "loss": 0.4936, "step": 6396 }, { "epoch": 0.8776840227756054, "grad_norm": 1.140625, "learning_rate": 1.6145663744667663e-05, "loss": 0.4706, "step": 6397 }, { "epoch": 0.8778212252178089, "grad_norm": 1.2109375, "learning_rate": 1.6144524968618926e-05, "loss": 0.4715, "step": 6398 }, { "epoch": 0.8779584276600123, "grad_norm": 1.3125, "learning_rate": 1.6143386064540523e-05, "loss": 0.5095, "step": 6399 }, { "epoch": 0.8780956301022158, "grad_norm": 1.1484375, "learning_rate": 1.614224703245618e-05, "loss": 0.4942, "step": 6400 }, { "epoch": 0.8782328325444193, "grad_norm": 1.125, "learning_rate": 1.6141107872389634e-05, "loss": 0.4903, "step": 6401 }, { "epoch": 0.8783700349866228, "grad_norm": 1.3046875, "learning_rate": 1.613996858436462e-05, "loss": 0.5781, "step": 6402 }, { "epoch": 0.8785072374288262, "grad_norm": 1.1953125, "learning_rate": 1.613882916840487e-05, "loss": 0.5173, "step": 6403 }, { "epoch": 0.8786444398710297, "grad_norm": 1.296875, "learning_rate": 1.6137689624534136e-05, "loss": 0.5072, "step": 6404 }, { "epoch": 0.8787816423132332, "grad_norm": 1.109375, "learning_rate": 1.6136549952776152e-05, "loss": 0.4913, "step": 6405 }, { "epoch": 0.8789188447554367, "grad_norm": 1.1015625, "learning_rate": 1.6135410153154672e-05, "loss": 0.5056, "step": 6406 }, { "epoch": 0.8790560471976401, "grad_norm": 1.1640625, "learning_rate": 1.6134270225693444e-05, "loss": 0.4874, "step": 6407 }, { "epoch": 0.8791932496398436, "grad_norm": 1.2109375, "learning_rate": 1.6133130170416214e-05, "loss": 0.5476, "step": 6408 }, { "epoch": 0.8793304520820471, "grad_norm": 1.203125, "learning_rate": 1.6131989987346744e-05, "loss": 0.5258, "step": 6409 }, { "epoch": 0.8794676545242506, "grad_norm": 1.2734375, "learning_rate": 1.6130849676508788e-05, "loss": 0.5753, "step": 6410 }, { "epoch": 0.879604856966454, "grad_norm": 1.3046875, "learning_rate": 1.6129709237926107e-05, "loss": 0.571, "step": 6411 }, { "epoch": 0.8797420594086575, "grad_norm": 1.140625, "learning_rate": 1.6128568671622465e-05, "loss": 0.5089, "step": 6412 }, { "epoch": 0.879879261850861, "grad_norm": 1.140625, "learning_rate": 1.612742797762162e-05, "loss": 0.4852, "step": 6413 }, { "epoch": 0.8800164642930645, "grad_norm": 1.0703125, "learning_rate": 1.6126287155947347e-05, "loss": 0.4594, "step": 6414 }, { "epoch": 0.8801536667352678, "grad_norm": 1.1796875, "learning_rate": 1.6125146206623416e-05, "loss": 0.4638, "step": 6415 }, { "epoch": 0.8802908691774713, "grad_norm": 1.1640625, "learning_rate": 1.61240051296736e-05, "loss": 0.4642, "step": 6416 }, { "epoch": 0.8804280716196748, "grad_norm": 1.2578125, "learning_rate": 1.6122863925121673e-05, "loss": 0.5031, "step": 6417 }, { "epoch": 0.8805652740618783, "grad_norm": 1.15625, "learning_rate": 1.6121722592991414e-05, "loss": 0.491, "step": 6418 }, { "epoch": 0.8807024765040817, "grad_norm": 1.203125, "learning_rate": 1.6120581133306605e-05, "loss": 0.5403, "step": 6419 }, { "epoch": 0.8808396789462852, "grad_norm": 1.1484375, "learning_rate": 1.611943954609103e-05, "loss": 0.4669, "step": 6420 }, { "epoch": 0.8809768813884887, "grad_norm": 1.125, "learning_rate": 1.6118297831368472e-05, "loss": 0.4704, "step": 6421 }, { "epoch": 0.8811140838306922, "grad_norm": 1.1796875, "learning_rate": 1.611715598916273e-05, "loss": 0.5502, "step": 6422 }, { "epoch": 0.8812512862728956, "grad_norm": 1.2890625, "learning_rate": 1.6116014019497585e-05, "loss": 0.5089, "step": 6423 }, { "epoch": 0.8813884887150991, "grad_norm": 1.1953125, "learning_rate": 1.6114871922396834e-05, "loss": 0.5201, "step": 6424 }, { "epoch": 0.8815256911573026, "grad_norm": 1.234375, "learning_rate": 1.611372969788428e-05, "loss": 0.5508, "step": 6425 }, { "epoch": 0.8816628935995061, "grad_norm": 1.15625, "learning_rate": 1.6112587345983716e-05, "loss": 0.4865, "step": 6426 }, { "epoch": 0.8818000960417095, "grad_norm": 1.1875, "learning_rate": 1.611144486671895e-05, "loss": 0.4931, "step": 6427 }, { "epoch": 0.881937298483913, "grad_norm": 1.1796875, "learning_rate": 1.6110302260113782e-05, "loss": 0.4733, "step": 6428 }, { "epoch": 0.8820745009261165, "grad_norm": 1.203125, "learning_rate": 1.610915952619202e-05, "loss": 0.4463, "step": 6429 }, { "epoch": 0.88221170336832, "grad_norm": 1.0859375, "learning_rate": 1.6108016664977482e-05, "loss": 0.4485, "step": 6430 }, { "epoch": 0.8823489058105234, "grad_norm": 1.25, "learning_rate": 1.6106873676493972e-05, "loss": 0.5376, "step": 6431 }, { "epoch": 0.8824861082527269, "grad_norm": 1.1796875, "learning_rate": 1.610573056076531e-05, "loss": 0.4934, "step": 6432 }, { "epoch": 0.8826233106949304, "grad_norm": 1.21875, "learning_rate": 1.6104587317815316e-05, "loss": 0.4716, "step": 6433 }, { "epoch": 0.8827605131371339, "grad_norm": 1.203125, "learning_rate": 1.6103443947667807e-05, "loss": 0.5311, "step": 6434 }, { "epoch": 0.8828977155793373, "grad_norm": 1.2890625, "learning_rate": 1.610230045034661e-05, "loss": 0.5305, "step": 6435 }, { "epoch": 0.8830349180215408, "grad_norm": 1.3046875, "learning_rate": 1.610115682587555e-05, "loss": 0.5555, "step": 6436 }, { "epoch": 0.8831721204637443, "grad_norm": 1.421875, "learning_rate": 1.6100013074278454e-05, "loss": 0.573, "step": 6437 }, { "epoch": 0.8833093229059478, "grad_norm": 1.203125, "learning_rate": 1.6098869195579157e-05, "loss": 0.4853, "step": 6438 }, { "epoch": 0.8834465253481512, "grad_norm": 1.15625, "learning_rate": 1.609772518980149e-05, "loss": 0.5182, "step": 6439 }, { "epoch": 0.8835837277903547, "grad_norm": 1.1640625, "learning_rate": 1.6096581056969297e-05, "loss": 0.4796, "step": 6440 }, { "epoch": 0.8837209302325582, "grad_norm": 1.125, "learning_rate": 1.6095436797106406e-05, "loss": 0.5204, "step": 6441 }, { "epoch": 0.8838581326747617, "grad_norm": 1.3203125, "learning_rate": 1.6094292410236668e-05, "loss": 0.5113, "step": 6442 }, { "epoch": 0.883995335116965, "grad_norm": 1.2890625, "learning_rate": 1.6093147896383928e-05, "loss": 0.6049, "step": 6443 }, { "epoch": 0.8841325375591685, "grad_norm": 1.09375, "learning_rate": 1.609200325557203e-05, "loss": 0.4246, "step": 6444 }, { "epoch": 0.884269740001372, "grad_norm": 1.15625, "learning_rate": 1.609085848782482e-05, "loss": 0.4169, "step": 6445 }, { "epoch": 0.8844069424435755, "grad_norm": 1.2890625, "learning_rate": 1.6089713593166163e-05, "loss": 0.5823, "step": 6446 }, { "epoch": 0.8845441448857789, "grad_norm": 1.2265625, "learning_rate": 1.6088568571619906e-05, "loss": 0.5441, "step": 6447 }, { "epoch": 0.8846813473279824, "grad_norm": 1.1484375, "learning_rate": 1.6087423423209906e-05, "loss": 0.471, "step": 6448 }, { "epoch": 0.8848185497701859, "grad_norm": 1.1484375, "learning_rate": 1.6086278147960026e-05, "loss": 0.4719, "step": 6449 }, { "epoch": 0.8849557522123894, "grad_norm": 1.1875, "learning_rate": 1.6085132745894134e-05, "loss": 0.4457, "step": 6450 }, { "epoch": 0.8850929546545928, "grad_norm": 1.28125, "learning_rate": 1.6083987217036087e-05, "loss": 0.5483, "step": 6451 }, { "epoch": 0.8852301570967963, "grad_norm": 1.1484375, "learning_rate": 1.608284156140976e-05, "loss": 0.4808, "step": 6452 }, { "epoch": 0.8853673595389998, "grad_norm": 1.1484375, "learning_rate": 1.6081695779039026e-05, "loss": 0.5028, "step": 6453 }, { "epoch": 0.8855045619812033, "grad_norm": 1.21875, "learning_rate": 1.6080549869947754e-05, "loss": 0.5502, "step": 6454 }, { "epoch": 0.8856417644234067, "grad_norm": 1.1171875, "learning_rate": 1.607940383415982e-05, "loss": 0.4518, "step": 6455 }, { "epoch": 0.8857789668656102, "grad_norm": 1.265625, "learning_rate": 1.607825767169911e-05, "loss": 0.4958, "step": 6456 }, { "epoch": 0.8859161693078137, "grad_norm": 1.03125, "learning_rate": 1.6077111382589497e-05, "loss": 0.4284, "step": 6457 }, { "epoch": 0.8860533717500172, "grad_norm": 1.171875, "learning_rate": 1.607596496685487e-05, "loss": 0.4931, "step": 6458 }, { "epoch": 0.8861905741922206, "grad_norm": 1.21875, "learning_rate": 1.607481842451912e-05, "loss": 0.531, "step": 6459 }, { "epoch": 0.8863277766344241, "grad_norm": 1.140625, "learning_rate": 1.607367175560613e-05, "loss": 0.4692, "step": 6460 }, { "epoch": 0.8864649790766276, "grad_norm": 1.3203125, "learning_rate": 1.6072524960139796e-05, "loss": 0.559, "step": 6461 }, { "epoch": 0.8866021815188311, "grad_norm": 1.1015625, "learning_rate": 1.607137803814401e-05, "loss": 0.4855, "step": 6462 }, { "epoch": 0.8867393839610345, "grad_norm": 1.1875, "learning_rate": 1.6070230989642676e-05, "loss": 0.5035, "step": 6463 }, { "epoch": 0.886876586403238, "grad_norm": 1.1171875, "learning_rate": 1.606908381465969e-05, "loss": 0.4548, "step": 6464 }, { "epoch": 0.8870137888454415, "grad_norm": 1.328125, "learning_rate": 1.6067936513218954e-05, "loss": 0.571, "step": 6465 }, { "epoch": 0.887150991287645, "grad_norm": 1.1328125, "learning_rate": 1.6066789085344377e-05, "loss": 0.4492, "step": 6466 }, { "epoch": 0.8872881937298484, "grad_norm": 1.2109375, "learning_rate": 1.6065641531059862e-05, "loss": 0.5401, "step": 6467 }, { "epoch": 0.8874253961720519, "grad_norm": 1.265625, "learning_rate": 1.6064493850389328e-05, "loss": 0.5318, "step": 6468 }, { "epoch": 0.8875625986142553, "grad_norm": 1.1484375, "learning_rate": 1.606334604335668e-05, "loss": 0.4965, "step": 6469 }, { "epoch": 0.8876998010564588, "grad_norm": 1.1796875, "learning_rate": 1.6062198109985837e-05, "loss": 0.5254, "step": 6470 }, { "epoch": 0.8878370034986622, "grad_norm": 1.2109375, "learning_rate": 1.6061050050300724e-05, "loss": 0.5101, "step": 6471 }, { "epoch": 0.8879742059408657, "grad_norm": 1.140625, "learning_rate": 1.605990186432525e-05, "loss": 0.5116, "step": 6472 }, { "epoch": 0.8881114083830692, "grad_norm": 1.1796875, "learning_rate": 1.6058753552083353e-05, "loss": 0.5224, "step": 6473 }, { "epoch": 0.8882486108252727, "grad_norm": 1.265625, "learning_rate": 1.605760511359895e-05, "loss": 0.541, "step": 6474 }, { "epoch": 0.8883858132674761, "grad_norm": 1.328125, "learning_rate": 1.605645654889597e-05, "loss": 0.6152, "step": 6475 }, { "epoch": 0.8885230157096796, "grad_norm": 1.2578125, "learning_rate": 1.605530785799835e-05, "loss": 0.557, "step": 6476 }, { "epoch": 0.8886602181518831, "grad_norm": 1.5078125, "learning_rate": 1.6054159040930025e-05, "loss": 0.6293, "step": 6477 }, { "epoch": 0.8887974205940866, "grad_norm": 1.1875, "learning_rate": 1.6053010097714925e-05, "loss": 0.5104, "step": 6478 }, { "epoch": 0.88893462303629, "grad_norm": 1.2265625, "learning_rate": 1.6051861028376998e-05, "loss": 0.5293, "step": 6479 }, { "epoch": 0.8890718254784935, "grad_norm": 1.1796875, "learning_rate": 1.605071183294018e-05, "loss": 0.5092, "step": 6480 }, { "epoch": 0.889209027920697, "grad_norm": 1.15625, "learning_rate": 1.6049562511428426e-05, "loss": 0.4681, "step": 6481 }, { "epoch": 0.8893462303629005, "grad_norm": 1.265625, "learning_rate": 1.604841306386567e-05, "loss": 0.5514, "step": 6482 }, { "epoch": 0.8894834328051039, "grad_norm": 1.25, "learning_rate": 1.6047263490275874e-05, "loss": 0.5893, "step": 6483 }, { "epoch": 0.8896206352473074, "grad_norm": 1.171875, "learning_rate": 1.604611379068298e-05, "loss": 0.4931, "step": 6484 }, { "epoch": 0.8897578376895109, "grad_norm": 1.0703125, "learning_rate": 1.6044963965110955e-05, "loss": 0.4188, "step": 6485 }, { "epoch": 0.8898950401317144, "grad_norm": 1.1875, "learning_rate": 1.604381401358375e-05, "loss": 0.556, "step": 6486 }, { "epoch": 0.8900322425739178, "grad_norm": 1.21875, "learning_rate": 1.6042663936125326e-05, "loss": 0.5307, "step": 6487 }, { "epoch": 0.8901694450161213, "grad_norm": 1.0703125, "learning_rate": 1.6041513732759652e-05, "loss": 0.4217, "step": 6488 }, { "epoch": 0.8903066474583248, "grad_norm": 1.2578125, "learning_rate": 1.6040363403510686e-05, "loss": 0.4736, "step": 6489 }, { "epoch": 0.8904438499005283, "grad_norm": 1.28125, "learning_rate": 1.60392129484024e-05, "loss": 0.5563, "step": 6490 }, { "epoch": 0.8905810523427317, "grad_norm": 1.2890625, "learning_rate": 1.6038062367458774e-05, "loss": 0.5551, "step": 6491 }, { "epoch": 0.8907182547849352, "grad_norm": 1.140625, "learning_rate": 1.6036911660703767e-05, "loss": 0.474, "step": 6492 }, { "epoch": 0.8908554572271387, "grad_norm": 1.0859375, "learning_rate": 1.6035760828161365e-05, "loss": 0.4484, "step": 6493 }, { "epoch": 0.8909926596693422, "grad_norm": 1.203125, "learning_rate": 1.6034609869855544e-05, "loss": 0.4573, "step": 6494 }, { "epoch": 0.8911298621115455, "grad_norm": 1.2578125, "learning_rate": 1.603345878581029e-05, "loss": 0.5352, "step": 6495 }, { "epoch": 0.891267064553749, "grad_norm": 1.21875, "learning_rate": 1.603230757604958e-05, "loss": 0.5486, "step": 6496 }, { "epoch": 0.8914042669959525, "grad_norm": 1.3125, "learning_rate": 1.6031156240597408e-05, "loss": 0.526, "step": 6497 }, { "epoch": 0.891541469438156, "grad_norm": 1.1953125, "learning_rate": 1.6030004779477758e-05, "loss": 0.487, "step": 6498 }, { "epoch": 0.8916786718803594, "grad_norm": 1.1875, "learning_rate": 1.602885319271463e-05, "loss": 0.5016, "step": 6499 }, { "epoch": 0.8918158743225629, "grad_norm": 1.0390625, "learning_rate": 1.602770148033201e-05, "loss": 0.4423, "step": 6500 }, { "epoch": 0.8919530767647664, "grad_norm": 1.1015625, "learning_rate": 1.60265496423539e-05, "loss": 0.4528, "step": 6501 }, { "epoch": 0.8920902792069699, "grad_norm": 1.1875, "learning_rate": 1.6025397678804302e-05, "loss": 0.4993, "step": 6502 }, { "epoch": 0.8922274816491733, "grad_norm": 1.171875, "learning_rate": 1.6024245589707216e-05, "loss": 0.5104, "step": 6503 }, { "epoch": 0.8923646840913768, "grad_norm": 1.203125, "learning_rate": 1.6023093375086647e-05, "loss": 0.4993, "step": 6504 }, { "epoch": 0.8925018865335803, "grad_norm": 1.2265625, "learning_rate": 1.6021941034966603e-05, "loss": 0.4481, "step": 6505 }, { "epoch": 0.8926390889757838, "grad_norm": 1.296875, "learning_rate": 1.6020788569371096e-05, "loss": 0.5837, "step": 6506 }, { "epoch": 0.8927762914179872, "grad_norm": 1.234375, "learning_rate": 1.601963597832414e-05, "loss": 0.5451, "step": 6507 }, { "epoch": 0.8929134938601907, "grad_norm": 1.15625, "learning_rate": 1.6018483261849745e-05, "loss": 0.5161, "step": 6508 }, { "epoch": 0.8930506963023942, "grad_norm": 1.3046875, "learning_rate": 1.6017330419971938e-05, "loss": 0.546, "step": 6509 }, { "epoch": 0.8931878987445977, "grad_norm": 1.171875, "learning_rate": 1.6016177452714737e-05, "loss": 0.4972, "step": 6510 }, { "epoch": 0.8933251011868011, "grad_norm": 1.1015625, "learning_rate": 1.601502436010216e-05, "loss": 0.4435, "step": 6511 }, { "epoch": 0.8934623036290046, "grad_norm": 1.1484375, "learning_rate": 1.601387114215824e-05, "loss": 0.4859, "step": 6512 }, { "epoch": 0.8935995060712081, "grad_norm": 1.125, "learning_rate": 1.6012717798907005e-05, "loss": 0.4385, "step": 6513 }, { "epoch": 0.8937367085134116, "grad_norm": 1.296875, "learning_rate": 1.6011564330372485e-05, "loss": 0.5838, "step": 6514 }, { "epoch": 0.893873910955615, "grad_norm": 1.1796875, "learning_rate": 1.6010410736578715e-05, "loss": 0.5037, "step": 6515 }, { "epoch": 0.8940111133978185, "grad_norm": 1.25, "learning_rate": 1.6009257017549728e-05, "loss": 0.5016, "step": 6516 }, { "epoch": 0.894148315840022, "grad_norm": 1.1640625, "learning_rate": 1.600810317330957e-05, "loss": 0.5134, "step": 6517 }, { "epoch": 0.8942855182822255, "grad_norm": 1.1640625, "learning_rate": 1.600694920388228e-05, "loss": 0.484, "step": 6518 }, { "epoch": 0.8944227207244289, "grad_norm": 1.1484375, "learning_rate": 1.6005795109291897e-05, "loss": 0.462, "step": 6519 }, { "epoch": 0.8945599231666324, "grad_norm": 1.3515625, "learning_rate": 1.6004640889562474e-05, "loss": 0.6266, "step": 6520 }, { "epoch": 0.8946971256088359, "grad_norm": 1.1796875, "learning_rate": 1.6003486544718064e-05, "loss": 0.4849, "step": 6521 }, { "epoch": 0.8948343280510394, "grad_norm": 1.2109375, "learning_rate": 1.6002332074782715e-05, "loss": 0.5188, "step": 6522 }, { "epoch": 0.8949715304932427, "grad_norm": 1.1328125, "learning_rate": 1.600117747978048e-05, "loss": 0.4785, "step": 6523 }, { "epoch": 0.8951087329354462, "grad_norm": 1.0859375, "learning_rate": 1.6000022759735417e-05, "loss": 0.4423, "step": 6524 }, { "epoch": 0.8952459353776497, "grad_norm": 1.28125, "learning_rate": 1.599886791467159e-05, "loss": 0.5722, "step": 6525 }, { "epoch": 0.8953831378198532, "grad_norm": 1.15625, "learning_rate": 1.599771294461306e-05, "loss": 0.5039, "step": 6526 }, { "epoch": 0.8955203402620566, "grad_norm": 1.09375, "learning_rate": 1.5996557849583893e-05, "loss": 0.46, "step": 6527 }, { "epoch": 0.8956575427042601, "grad_norm": 1.171875, "learning_rate": 1.5995402629608153e-05, "loss": 0.5035, "step": 6528 }, { "epoch": 0.8957947451464636, "grad_norm": 1.2265625, "learning_rate": 1.5994247284709917e-05, "loss": 0.5056, "step": 6529 }, { "epoch": 0.8959319475886671, "grad_norm": 1.203125, "learning_rate": 1.5993091814913254e-05, "loss": 0.5144, "step": 6530 }, { "epoch": 0.8960691500308705, "grad_norm": 1.3671875, "learning_rate": 1.599193622024224e-05, "loss": 0.5175, "step": 6531 }, { "epoch": 0.896206352473074, "grad_norm": 1.1484375, "learning_rate": 1.5990780500720953e-05, "loss": 0.5177, "step": 6532 }, { "epoch": 0.8963435549152775, "grad_norm": 1.1796875, "learning_rate": 1.598962465637348e-05, "loss": 0.5186, "step": 6533 }, { "epoch": 0.896480757357481, "grad_norm": 1.09375, "learning_rate": 1.59884686872239e-05, "loss": 0.4556, "step": 6534 }, { "epoch": 0.8966179597996844, "grad_norm": 1.140625, "learning_rate": 1.5987312593296294e-05, "loss": 0.4757, "step": 6535 }, { "epoch": 0.8967551622418879, "grad_norm": 1.15625, "learning_rate": 1.5986156374614757e-05, "loss": 0.4757, "step": 6536 }, { "epoch": 0.8968923646840914, "grad_norm": 1.171875, "learning_rate": 1.598500003120338e-05, "loss": 0.5391, "step": 6537 }, { "epoch": 0.8970295671262949, "grad_norm": 1.2109375, "learning_rate": 1.5983843563086255e-05, "loss": 0.5511, "step": 6538 }, { "epoch": 0.8971667695684983, "grad_norm": 1.203125, "learning_rate": 1.5982686970287483e-05, "loss": 0.5058, "step": 6539 }, { "epoch": 0.8973039720107018, "grad_norm": 1.15625, "learning_rate": 1.5981530252831157e-05, "loss": 0.4992, "step": 6540 }, { "epoch": 0.8974411744529053, "grad_norm": 1.296875, "learning_rate": 1.5980373410741387e-05, "loss": 0.531, "step": 6541 }, { "epoch": 0.8975783768951088, "grad_norm": 1.078125, "learning_rate": 1.5979216444042265e-05, "loss": 0.5207, "step": 6542 }, { "epoch": 0.8977155793373122, "grad_norm": 1.0859375, "learning_rate": 1.597805935275791e-05, "loss": 0.4195, "step": 6543 }, { "epoch": 0.8978527817795157, "grad_norm": 1.15625, "learning_rate": 1.5976902136912425e-05, "loss": 0.4869, "step": 6544 }, { "epoch": 0.8979899842217192, "grad_norm": 1.15625, "learning_rate": 1.5975744796529926e-05, "loss": 0.4988, "step": 6545 }, { "epoch": 0.8981271866639227, "grad_norm": 1.1484375, "learning_rate": 1.5974587331634526e-05, "loss": 0.5137, "step": 6546 }, { "epoch": 0.898264389106126, "grad_norm": 1.1953125, "learning_rate": 1.5973429742250338e-05, "loss": 0.553, "step": 6547 }, { "epoch": 0.8984015915483295, "grad_norm": 1.171875, "learning_rate": 1.5972272028401492e-05, "loss": 0.518, "step": 6548 }, { "epoch": 0.898538793990533, "grad_norm": 1.171875, "learning_rate": 1.5971114190112102e-05, "loss": 0.5077, "step": 6549 }, { "epoch": 0.8986759964327365, "grad_norm": 1.140625, "learning_rate": 1.5969956227406297e-05, "loss": 0.4724, "step": 6550 }, { "epoch": 0.8988131988749399, "grad_norm": 1.203125, "learning_rate": 1.5968798140308202e-05, "loss": 0.5647, "step": 6551 }, { "epoch": 0.8989504013171434, "grad_norm": 1.21875, "learning_rate": 1.596763992884195e-05, "loss": 0.5179, "step": 6552 }, { "epoch": 0.8990876037593469, "grad_norm": 1.2890625, "learning_rate": 1.596648159303167e-05, "loss": 0.5859, "step": 6553 }, { "epoch": 0.8992248062015504, "grad_norm": 1.328125, "learning_rate": 1.596532313290151e-05, "loss": 0.5707, "step": 6554 }, { "epoch": 0.8993620086437538, "grad_norm": 1.2265625, "learning_rate": 1.5964164548475592e-05, "loss": 0.5564, "step": 6555 }, { "epoch": 0.8994992110859573, "grad_norm": 1.1796875, "learning_rate": 1.596300583977806e-05, "loss": 0.533, "step": 6556 }, { "epoch": 0.8996364135281608, "grad_norm": 1.2265625, "learning_rate": 1.5961847006833068e-05, "loss": 0.5499, "step": 6557 }, { "epoch": 0.8997736159703643, "grad_norm": 1.1328125, "learning_rate": 1.596068804966475e-05, "loss": 0.4523, "step": 6558 }, { "epoch": 0.8999108184125677, "grad_norm": 1.0859375, "learning_rate": 1.595952896829726e-05, "loss": 0.4287, "step": 6559 }, { "epoch": 0.9000480208547712, "grad_norm": 1.3359375, "learning_rate": 1.5958369762754746e-05, "loss": 0.5783, "step": 6560 }, { "epoch": 0.9001852232969747, "grad_norm": 1.15625, "learning_rate": 1.5957210433061367e-05, "loss": 0.4992, "step": 6561 }, { "epoch": 0.9003224257391782, "grad_norm": 1.171875, "learning_rate": 1.5956050979241276e-05, "loss": 0.5065, "step": 6562 }, { "epoch": 0.9004596281813816, "grad_norm": 1.1640625, "learning_rate": 1.5954891401318627e-05, "loss": 0.3943, "step": 6563 }, { "epoch": 0.9005968306235851, "grad_norm": 1.0078125, "learning_rate": 1.5953731699317592e-05, "loss": 0.3938, "step": 6564 }, { "epoch": 0.9007340330657886, "grad_norm": 1.203125, "learning_rate": 1.5952571873262326e-05, "loss": 0.5286, "step": 6565 }, { "epoch": 0.9008712355079921, "grad_norm": 1.0625, "learning_rate": 1.5951411923177e-05, "loss": 0.4335, "step": 6566 }, { "epoch": 0.9010084379501955, "grad_norm": 1.203125, "learning_rate": 1.595025184908578e-05, "loss": 0.5421, "step": 6567 }, { "epoch": 0.901145640392399, "grad_norm": 1.21875, "learning_rate": 1.594909165101284e-05, "loss": 0.5541, "step": 6568 }, { "epoch": 0.9012828428346025, "grad_norm": 1.1640625, "learning_rate": 1.5947931328982357e-05, "loss": 0.4707, "step": 6569 }, { "epoch": 0.901420045276806, "grad_norm": 1.234375, "learning_rate": 1.5946770883018504e-05, "loss": 0.5819, "step": 6570 }, { "epoch": 0.9015572477190094, "grad_norm": 1.0859375, "learning_rate": 1.5945610313145457e-05, "loss": 0.4581, "step": 6571 }, { "epoch": 0.9016944501612129, "grad_norm": 1.0703125, "learning_rate": 1.594444961938741e-05, "loss": 0.4593, "step": 6572 }, { "epoch": 0.9018316526034164, "grad_norm": 1.078125, "learning_rate": 1.5943288801768537e-05, "loss": 0.4408, "step": 6573 }, { "epoch": 0.9019688550456199, "grad_norm": 1.1171875, "learning_rate": 1.5942127860313026e-05, "loss": 0.4419, "step": 6574 }, { "epoch": 0.9021060574878232, "grad_norm": 1.1171875, "learning_rate": 1.594096679504507e-05, "loss": 0.4585, "step": 6575 }, { "epoch": 0.9022432599300267, "grad_norm": 1.3046875, "learning_rate": 1.593980560598886e-05, "loss": 0.6058, "step": 6576 }, { "epoch": 0.9023804623722302, "grad_norm": 1.265625, "learning_rate": 1.5938644293168593e-05, "loss": 0.5668, "step": 6577 }, { "epoch": 0.9025176648144337, "grad_norm": 1.171875, "learning_rate": 1.5937482856608467e-05, "loss": 0.5066, "step": 6578 }, { "epoch": 0.9026548672566371, "grad_norm": 1.09375, "learning_rate": 1.593632129633268e-05, "loss": 0.3968, "step": 6579 }, { "epoch": 0.9027920696988406, "grad_norm": 1.203125, "learning_rate": 1.5935159612365435e-05, "loss": 0.4769, "step": 6580 }, { "epoch": 0.9029292721410441, "grad_norm": 1.2421875, "learning_rate": 1.5933997804730935e-05, "loss": 0.4846, "step": 6581 }, { "epoch": 0.9030664745832476, "grad_norm": 1.1796875, "learning_rate": 1.593283587345339e-05, "loss": 0.5146, "step": 6582 }, { "epoch": 0.903203677025451, "grad_norm": 1.3125, "learning_rate": 1.5931673818557012e-05, "loss": 0.612, "step": 6583 }, { "epoch": 0.9033408794676545, "grad_norm": 1.2578125, "learning_rate": 1.593051164006601e-05, "loss": 0.5534, "step": 6584 }, { "epoch": 0.903478081909858, "grad_norm": 1.1796875, "learning_rate": 1.5929349338004605e-05, "loss": 0.5296, "step": 6585 }, { "epoch": 0.9036152843520615, "grad_norm": 1.1796875, "learning_rate": 1.5928186912397012e-05, "loss": 0.4902, "step": 6586 }, { "epoch": 0.9037524867942649, "grad_norm": 1.0546875, "learning_rate": 1.592702436326745e-05, "loss": 0.3928, "step": 6587 }, { "epoch": 0.9038896892364684, "grad_norm": 1.21875, "learning_rate": 1.5925861690640148e-05, "loss": 0.5253, "step": 6588 }, { "epoch": 0.9040268916786719, "grad_norm": 1.2109375, "learning_rate": 1.5924698894539327e-05, "loss": 0.5104, "step": 6589 }, { "epoch": 0.9041640941208754, "grad_norm": 1.1640625, "learning_rate": 1.5923535974989217e-05, "loss": 0.4723, "step": 6590 }, { "epoch": 0.9043012965630788, "grad_norm": 1.1015625, "learning_rate": 1.592237293201405e-05, "loss": 0.444, "step": 6591 }, { "epoch": 0.9044384990052823, "grad_norm": 1.015625, "learning_rate": 1.5921209765638055e-05, "loss": 0.3912, "step": 6592 }, { "epoch": 0.9045757014474858, "grad_norm": 1.21875, "learning_rate": 1.5920046475885474e-05, "loss": 0.5239, "step": 6593 }, { "epoch": 0.9047129038896893, "grad_norm": 1.21875, "learning_rate": 1.591888306278054e-05, "loss": 0.4953, "step": 6594 }, { "epoch": 0.9048501063318927, "grad_norm": 1.125, "learning_rate": 1.5917719526347504e-05, "loss": 0.4494, "step": 6595 }, { "epoch": 0.9049873087740962, "grad_norm": 1.265625, "learning_rate": 1.59165558666106e-05, "loss": 0.5424, "step": 6596 }, { "epoch": 0.9051245112162997, "grad_norm": 1.078125, "learning_rate": 1.5915392083594076e-05, "loss": 0.499, "step": 6597 }, { "epoch": 0.9052617136585032, "grad_norm": 1.1796875, "learning_rate": 1.5914228177322188e-05, "loss": 0.5107, "step": 6598 }, { "epoch": 0.9053989161007066, "grad_norm": 1.2109375, "learning_rate": 1.5913064147819176e-05, "loss": 0.4959, "step": 6599 }, { "epoch": 0.90553611854291, "grad_norm": 1.25, "learning_rate": 1.5911899995109306e-05, "loss": 0.5578, "step": 6600 }, { "epoch": 0.9056733209851136, "grad_norm": 1.2109375, "learning_rate": 1.5910735719216827e-05, "loss": 0.5453, "step": 6601 }, { "epoch": 0.905810523427317, "grad_norm": 1.09375, "learning_rate": 1.5909571320166004e-05, "loss": 0.4871, "step": 6602 }, { "epoch": 0.9059477258695204, "grad_norm": 1.15625, "learning_rate": 1.5908406797981095e-05, "loss": 0.4983, "step": 6603 }, { "epoch": 0.9060849283117239, "grad_norm": 1.046875, "learning_rate": 1.5907242152686363e-05, "loss": 0.4446, "step": 6604 }, { "epoch": 0.9062221307539274, "grad_norm": 1.2265625, "learning_rate": 1.590607738430608e-05, "loss": 0.4779, "step": 6605 }, { "epoch": 0.9063593331961309, "grad_norm": 1.0546875, "learning_rate": 1.5904912492864508e-05, "loss": 0.4234, "step": 6606 }, { "epoch": 0.9064965356383343, "grad_norm": 1.5390625, "learning_rate": 1.5903747478385926e-05, "loss": 0.6641, "step": 6607 }, { "epoch": 0.9066337380805378, "grad_norm": 1.125, "learning_rate": 1.5902582340894606e-05, "loss": 0.4617, "step": 6608 }, { "epoch": 0.9067709405227413, "grad_norm": 1.15625, "learning_rate": 1.590141708041483e-05, "loss": 0.508, "step": 6609 }, { "epoch": 0.9069081429649448, "grad_norm": 1.1640625, "learning_rate": 1.590025169697087e-05, "loss": 0.4798, "step": 6610 }, { "epoch": 0.9070453454071482, "grad_norm": 1.0859375, "learning_rate": 1.589908619058701e-05, "loss": 0.4793, "step": 6611 }, { "epoch": 0.9071825478493517, "grad_norm": 1.1953125, "learning_rate": 1.589792056128754e-05, "loss": 0.5109, "step": 6612 }, { "epoch": 0.9073197502915552, "grad_norm": 1.2109375, "learning_rate": 1.5896754809096748e-05, "loss": 0.5326, "step": 6613 }, { "epoch": 0.9074569527337587, "grad_norm": 1.125, "learning_rate": 1.5895588934038916e-05, "loss": 0.4471, "step": 6614 }, { "epoch": 0.9075941551759621, "grad_norm": 1.21875, "learning_rate": 1.589442293613834e-05, "loss": 0.5042, "step": 6615 }, { "epoch": 0.9077313576181656, "grad_norm": 1.1875, "learning_rate": 1.5893256815419318e-05, "loss": 0.5526, "step": 6616 }, { "epoch": 0.9078685600603691, "grad_norm": 1.140625, "learning_rate": 1.5892090571906145e-05, "loss": 0.4928, "step": 6617 }, { "epoch": 0.9080057625025726, "grad_norm": 1.2109375, "learning_rate": 1.5890924205623123e-05, "loss": 0.4626, "step": 6618 }, { "epoch": 0.908142964944776, "grad_norm": 1.328125, "learning_rate": 1.5889757716594556e-05, "loss": 0.5213, "step": 6619 }, { "epoch": 0.9082801673869795, "grad_norm": 1.140625, "learning_rate": 1.5888591104844743e-05, "loss": 0.4767, "step": 6620 }, { "epoch": 0.908417369829183, "grad_norm": 1.125, "learning_rate": 1.5887424370398e-05, "loss": 0.5177, "step": 6621 }, { "epoch": 0.9085545722713865, "grad_norm": 1.1875, "learning_rate": 1.5886257513278635e-05, "loss": 0.4827, "step": 6622 }, { "epoch": 0.9086917747135899, "grad_norm": 1.0859375, "learning_rate": 1.5885090533510957e-05, "loss": 0.4617, "step": 6623 }, { "epoch": 0.9088289771557934, "grad_norm": 1.125, "learning_rate": 1.5883923431119287e-05, "loss": 0.4255, "step": 6624 }, { "epoch": 0.9089661795979969, "grad_norm": 1.1484375, "learning_rate": 1.5882756206127937e-05, "loss": 0.4793, "step": 6625 }, { "epoch": 0.9091033820402004, "grad_norm": 1.2421875, "learning_rate": 1.5881588858561236e-05, "loss": 0.5409, "step": 6626 }, { "epoch": 0.9092405844824037, "grad_norm": 1.1328125, "learning_rate": 1.58804213884435e-05, "loss": 0.4742, "step": 6627 }, { "epoch": 0.9093777869246072, "grad_norm": 1.2109375, "learning_rate": 1.5879253795799062e-05, "loss": 0.5401, "step": 6628 }, { "epoch": 0.9095149893668107, "grad_norm": 1.28125, "learning_rate": 1.587808608065224e-05, "loss": 0.5701, "step": 6629 }, { "epoch": 0.9096521918090142, "grad_norm": 1.2109375, "learning_rate": 1.5876918243027376e-05, "loss": 0.5169, "step": 6630 }, { "epoch": 0.9097893942512176, "grad_norm": 1.1640625, "learning_rate": 1.5875750282948796e-05, "loss": 0.5034, "step": 6631 }, { "epoch": 0.9099265966934211, "grad_norm": 1.125, "learning_rate": 1.5874582200440843e-05, "loss": 0.4833, "step": 6632 }, { "epoch": 0.9100637991356246, "grad_norm": 1.109375, "learning_rate": 1.5873413995527847e-05, "loss": 0.4511, "step": 6633 }, { "epoch": 0.9102010015778281, "grad_norm": 1.140625, "learning_rate": 1.5872245668234155e-05, "loss": 0.4823, "step": 6634 }, { "epoch": 0.9103382040200315, "grad_norm": 1.2421875, "learning_rate": 1.587107721858411e-05, "loss": 0.5429, "step": 6635 }, { "epoch": 0.910475406462235, "grad_norm": 1.171875, "learning_rate": 1.5869908646602055e-05, "loss": 0.5154, "step": 6636 }, { "epoch": 0.9106126089044385, "grad_norm": 1.1640625, "learning_rate": 1.5868739952312345e-05, "loss": 0.5446, "step": 6637 }, { "epoch": 0.910749811346642, "grad_norm": 1.171875, "learning_rate": 1.5867571135739325e-05, "loss": 0.4804, "step": 6638 }, { "epoch": 0.9108870137888454, "grad_norm": 1.2890625, "learning_rate": 1.5866402196907354e-05, "loss": 0.5203, "step": 6639 }, { "epoch": 0.9110242162310489, "grad_norm": 1.1640625, "learning_rate": 1.5865233135840783e-05, "loss": 0.5538, "step": 6640 }, { "epoch": 0.9111614186732524, "grad_norm": 1.25, "learning_rate": 1.5864063952563977e-05, "loss": 0.5227, "step": 6641 }, { "epoch": 0.9112986211154559, "grad_norm": 1.1875, "learning_rate": 1.586289464710129e-05, "loss": 0.5093, "step": 6642 }, { "epoch": 0.9114358235576593, "grad_norm": 1.171875, "learning_rate": 1.5861725219477095e-05, "loss": 0.5345, "step": 6643 }, { "epoch": 0.9115730259998628, "grad_norm": 1.171875, "learning_rate": 1.5860555669715757e-05, "loss": 0.527, "step": 6644 }, { "epoch": 0.9117102284420663, "grad_norm": 1.1640625, "learning_rate": 1.5859385997841636e-05, "loss": 0.5186, "step": 6645 }, { "epoch": 0.9118474308842698, "grad_norm": 1.25, "learning_rate": 1.5858216203879113e-05, "loss": 0.5724, "step": 6646 }, { "epoch": 0.9119846333264732, "grad_norm": 1.1015625, "learning_rate": 1.585704628785256e-05, "loss": 0.482, "step": 6647 }, { "epoch": 0.9121218357686767, "grad_norm": 1.078125, "learning_rate": 1.5855876249786353e-05, "loss": 0.4373, "step": 6648 }, { "epoch": 0.9122590382108802, "grad_norm": 1.140625, "learning_rate": 1.585470608970487e-05, "loss": 0.5072, "step": 6649 }, { "epoch": 0.9123962406530837, "grad_norm": 1.1328125, "learning_rate": 1.5853535807632497e-05, "loss": 0.4467, "step": 6650 }, { "epoch": 0.9125334430952871, "grad_norm": 1.21875, "learning_rate": 1.5852365403593617e-05, "loss": 0.4798, "step": 6651 }, { "epoch": 0.9126706455374906, "grad_norm": 1.3125, "learning_rate": 1.5851194877612614e-05, "loss": 0.4931, "step": 6652 }, { "epoch": 0.912807847979694, "grad_norm": 1.2734375, "learning_rate": 1.585002422971388e-05, "loss": 0.5282, "step": 6653 }, { "epoch": 0.9129450504218976, "grad_norm": 1.1953125, "learning_rate": 1.5848853459921805e-05, "loss": 0.5138, "step": 6654 }, { "epoch": 0.9130822528641009, "grad_norm": 1.328125, "learning_rate": 1.5847682568260786e-05, "loss": 0.5377, "step": 6655 }, { "epoch": 0.9132194553063044, "grad_norm": 1.203125, "learning_rate": 1.5846511554755225e-05, "loss": 0.4775, "step": 6656 }, { "epoch": 0.9133566577485079, "grad_norm": 1.3203125, "learning_rate": 1.584534041942951e-05, "loss": 0.586, "step": 6657 }, { "epoch": 0.9134938601907114, "grad_norm": 1.2578125, "learning_rate": 1.5844169162308052e-05, "loss": 0.5017, "step": 6658 }, { "epoch": 0.9136310626329148, "grad_norm": 1.28125, "learning_rate": 1.584299778341525e-05, "loss": 0.5465, "step": 6659 }, { "epoch": 0.9137682650751183, "grad_norm": 1.1640625, "learning_rate": 1.5841826282775518e-05, "loss": 0.4574, "step": 6660 }, { "epoch": 0.9139054675173218, "grad_norm": 1.2734375, "learning_rate": 1.584065466041326e-05, "loss": 0.5154, "step": 6661 }, { "epoch": 0.9140426699595253, "grad_norm": 1.1953125, "learning_rate": 1.5839482916352887e-05, "loss": 0.4685, "step": 6662 }, { "epoch": 0.9141798724017287, "grad_norm": 1.3125, "learning_rate": 1.5838311050618825e-05, "loss": 0.5878, "step": 6663 }, { "epoch": 0.9143170748439322, "grad_norm": 1.1640625, "learning_rate": 1.583713906323548e-05, "loss": 0.4512, "step": 6664 }, { "epoch": 0.9144542772861357, "grad_norm": 1.2265625, "learning_rate": 1.5835966954227277e-05, "loss": 0.5279, "step": 6665 }, { "epoch": 0.9145914797283392, "grad_norm": 1.1171875, "learning_rate": 1.5834794723618637e-05, "loss": 0.4533, "step": 6666 }, { "epoch": 0.9147286821705426, "grad_norm": 1.2109375, "learning_rate": 1.5833622371433984e-05, "loss": 0.4756, "step": 6667 }, { "epoch": 0.9148658846127461, "grad_norm": 1.1328125, "learning_rate": 1.5832449897697747e-05, "loss": 0.4534, "step": 6668 }, { "epoch": 0.9150030870549496, "grad_norm": 1.1953125, "learning_rate": 1.5831277302434356e-05, "loss": 0.5489, "step": 6669 }, { "epoch": 0.9151402894971531, "grad_norm": 1.2265625, "learning_rate": 1.583010458566825e-05, "loss": 0.5332, "step": 6670 }, { "epoch": 0.9152774919393565, "grad_norm": 1.203125, "learning_rate": 1.582893174742385e-05, "loss": 0.5096, "step": 6671 }, { "epoch": 0.91541469438156, "grad_norm": 1.203125, "learning_rate": 1.5827758787725604e-05, "loss": 0.5284, "step": 6672 }, { "epoch": 0.9155518968237635, "grad_norm": 1.171875, "learning_rate": 1.5826585706597952e-05, "loss": 0.4713, "step": 6673 }, { "epoch": 0.915689099265967, "grad_norm": 1.0625, "learning_rate": 1.5825412504065335e-05, "loss": 0.4419, "step": 6674 }, { "epoch": 0.9158263017081704, "grad_norm": 1.2734375, "learning_rate": 1.5824239180152195e-05, "loss": 0.4997, "step": 6675 }, { "epoch": 0.9159635041503739, "grad_norm": 1.296875, "learning_rate": 1.582306573488298e-05, "loss": 0.4814, "step": 6676 }, { "epoch": 0.9161007065925774, "grad_norm": 1.1484375, "learning_rate": 1.582189216828215e-05, "loss": 0.474, "step": 6677 }, { "epoch": 0.9162379090347809, "grad_norm": 1.2265625, "learning_rate": 1.5820718480374148e-05, "loss": 0.5381, "step": 6678 }, { "epoch": 0.9163751114769842, "grad_norm": 1.1953125, "learning_rate": 1.5819544671183432e-05, "loss": 0.5065, "step": 6679 }, { "epoch": 0.9165123139191877, "grad_norm": 1.0390625, "learning_rate": 1.5818370740734465e-05, "loss": 0.4564, "step": 6680 }, { "epoch": 0.9166495163613912, "grad_norm": 1.0703125, "learning_rate": 1.5817196689051698e-05, "loss": 0.4547, "step": 6681 }, { "epoch": 0.9167867188035947, "grad_norm": 1.25, "learning_rate": 1.5816022516159597e-05, "loss": 0.5278, "step": 6682 }, { "epoch": 0.9169239212457981, "grad_norm": 1.09375, "learning_rate": 1.581484822208263e-05, "loss": 0.4673, "step": 6683 }, { "epoch": 0.9170611236880016, "grad_norm": 1.2421875, "learning_rate": 1.581367380684527e-05, "loss": 0.5537, "step": 6684 }, { "epoch": 0.9171983261302051, "grad_norm": 1.0546875, "learning_rate": 1.5812499270471978e-05, "loss": 0.4387, "step": 6685 }, { "epoch": 0.9173355285724086, "grad_norm": 1.1953125, "learning_rate": 1.5811324612987232e-05, "loss": 0.4598, "step": 6686 }, { "epoch": 0.917472731014612, "grad_norm": 1.1953125, "learning_rate": 1.58101498344155e-05, "loss": 0.4956, "step": 6687 }, { "epoch": 0.9176099334568155, "grad_norm": 1.2421875, "learning_rate": 1.5808974934781277e-05, "loss": 0.5135, "step": 6688 }, { "epoch": 0.917747135899019, "grad_norm": 1.3203125, "learning_rate": 1.580779991410903e-05, "loss": 0.564, "step": 6689 }, { "epoch": 0.9178843383412225, "grad_norm": 1.203125, "learning_rate": 1.5806624772423244e-05, "loss": 0.498, "step": 6690 }, { "epoch": 0.9180215407834259, "grad_norm": 1.171875, "learning_rate": 1.5805449509748406e-05, "loss": 0.5526, "step": 6691 }, { "epoch": 0.9181587432256294, "grad_norm": 1.171875, "learning_rate": 1.5804274126109005e-05, "loss": 0.527, "step": 6692 }, { "epoch": 0.9182959456678329, "grad_norm": 1.171875, "learning_rate": 1.5803098621529533e-05, "loss": 0.4994, "step": 6693 }, { "epoch": 0.9184331481100364, "grad_norm": 1.28125, "learning_rate": 1.580192299603448e-05, "loss": 0.5359, "step": 6694 }, { "epoch": 0.9185703505522398, "grad_norm": 1.2109375, "learning_rate": 1.5800747249648343e-05, "loss": 0.5033, "step": 6695 }, { "epoch": 0.9187075529944433, "grad_norm": 1.203125, "learning_rate": 1.5799571382395624e-05, "loss": 0.5389, "step": 6696 }, { "epoch": 0.9188447554366468, "grad_norm": 1.2421875, "learning_rate": 1.5798395394300817e-05, "loss": 0.5381, "step": 6697 }, { "epoch": 0.9189819578788503, "grad_norm": 1.0859375, "learning_rate": 1.579721928538843e-05, "loss": 0.44, "step": 6698 }, { "epoch": 0.9191191603210537, "grad_norm": 1.1875, "learning_rate": 1.5796043055682967e-05, "loss": 0.5197, "step": 6699 }, { "epoch": 0.9192563627632572, "grad_norm": 1.328125, "learning_rate": 1.5794866705208936e-05, "loss": 0.4826, "step": 6700 }, { "epoch": 0.9193935652054607, "grad_norm": 1.28125, "learning_rate": 1.579369023399085e-05, "loss": 0.522, "step": 6701 }, { "epoch": 0.9195307676476642, "grad_norm": 1.109375, "learning_rate": 1.579251364205322e-05, "loss": 0.389, "step": 6702 }, { "epoch": 0.9196679700898676, "grad_norm": 1.1953125, "learning_rate": 1.5791336929420563e-05, "loss": 0.5092, "step": 6703 }, { "epoch": 0.9198051725320711, "grad_norm": 1.0859375, "learning_rate": 1.5790160096117403e-05, "loss": 0.403, "step": 6704 }, { "epoch": 0.9199423749742746, "grad_norm": 1.3046875, "learning_rate": 1.5788983142168254e-05, "loss": 0.5209, "step": 6705 }, { "epoch": 0.9200795774164781, "grad_norm": 1.28125, "learning_rate": 1.5787806067597635e-05, "loss": 0.5421, "step": 6706 }, { "epoch": 0.9202167798586814, "grad_norm": 1.2265625, "learning_rate": 1.578662887243008e-05, "loss": 0.4673, "step": 6707 }, { "epoch": 0.9203539823008849, "grad_norm": 1.421875, "learning_rate": 1.578545155669012e-05, "loss": 0.6213, "step": 6708 }, { "epoch": 0.9204911847430884, "grad_norm": 1.09375, "learning_rate": 1.5784274120402277e-05, "loss": 0.4346, "step": 6709 }, { "epoch": 0.9206283871852919, "grad_norm": 1.265625, "learning_rate": 1.5783096563591095e-05, "loss": 0.5643, "step": 6710 }, { "epoch": 0.9207655896274953, "grad_norm": 1.1875, "learning_rate": 1.5781918886281098e-05, "loss": 0.5796, "step": 6711 }, { "epoch": 0.9209027920696988, "grad_norm": 1.140625, "learning_rate": 1.5780741088496832e-05, "loss": 0.4874, "step": 6712 }, { "epoch": 0.9210399945119023, "grad_norm": 1.15625, "learning_rate": 1.5779563170262837e-05, "loss": 0.4795, "step": 6713 }, { "epoch": 0.9211771969541058, "grad_norm": 1.1875, "learning_rate": 1.5778385131603658e-05, "loss": 0.5255, "step": 6714 }, { "epoch": 0.9213143993963092, "grad_norm": 1.1796875, "learning_rate": 1.5777206972543838e-05, "loss": 0.4985, "step": 6715 }, { "epoch": 0.9214516018385127, "grad_norm": 1.2421875, "learning_rate": 1.5776028693107924e-05, "loss": 0.496, "step": 6716 }, { "epoch": 0.9215888042807162, "grad_norm": 1.2421875, "learning_rate": 1.5774850293320473e-05, "loss": 0.4697, "step": 6717 }, { "epoch": 0.9217260067229197, "grad_norm": 1.140625, "learning_rate": 1.5773671773206035e-05, "loss": 0.4686, "step": 6718 }, { "epoch": 0.9218632091651231, "grad_norm": 1.1328125, "learning_rate": 1.5772493132789168e-05, "loss": 0.4721, "step": 6719 }, { "epoch": 0.9220004116073266, "grad_norm": 1.1484375, "learning_rate": 1.5771314372094426e-05, "loss": 0.463, "step": 6720 }, { "epoch": 0.9221376140495301, "grad_norm": 1.25, "learning_rate": 1.5770135491146375e-05, "loss": 0.5737, "step": 6721 }, { "epoch": 0.9222748164917336, "grad_norm": 1.15625, "learning_rate": 1.576895648996958e-05, "loss": 0.5199, "step": 6722 }, { "epoch": 0.922412018933937, "grad_norm": 1.2109375, "learning_rate": 1.5767777368588598e-05, "loss": 0.5341, "step": 6723 }, { "epoch": 0.9225492213761405, "grad_norm": 1.1171875, "learning_rate": 1.5766598127028007e-05, "loss": 0.4289, "step": 6724 }, { "epoch": 0.922686423818344, "grad_norm": 1.1328125, "learning_rate": 1.5765418765312376e-05, "loss": 0.454, "step": 6725 }, { "epoch": 0.9228236262605475, "grad_norm": 1.234375, "learning_rate": 1.5764239283466275e-05, "loss": 0.4939, "step": 6726 }, { "epoch": 0.9229608287027509, "grad_norm": 1.0625, "learning_rate": 1.5763059681514286e-05, "loss": 0.4363, "step": 6727 }, { "epoch": 0.9230980311449544, "grad_norm": 1.140625, "learning_rate": 1.576187995948098e-05, "loss": 0.4824, "step": 6728 }, { "epoch": 0.9232352335871579, "grad_norm": 1.1328125, "learning_rate": 1.5760700117390943e-05, "loss": 0.478, "step": 6729 }, { "epoch": 0.9233724360293614, "grad_norm": 1.21875, "learning_rate": 1.575952015526876e-05, "loss": 0.5358, "step": 6730 }, { "epoch": 0.9235096384715648, "grad_norm": 1.1328125, "learning_rate": 1.5758340073139015e-05, "loss": 0.4458, "step": 6731 }, { "epoch": 0.9236468409137683, "grad_norm": 1.2109375, "learning_rate": 1.5757159871026294e-05, "loss": 0.4498, "step": 6732 }, { "epoch": 0.9237840433559718, "grad_norm": 1.234375, "learning_rate": 1.5755979548955194e-05, "loss": 0.551, "step": 6733 }, { "epoch": 0.9239212457981753, "grad_norm": 1.125, "learning_rate": 1.5754799106950304e-05, "loss": 0.5017, "step": 6734 }, { "epoch": 0.9240584482403786, "grad_norm": 1.2109375, "learning_rate": 1.5753618545036222e-05, "loss": 0.4863, "step": 6735 }, { "epoch": 0.9241956506825821, "grad_norm": 1.265625, "learning_rate": 1.5752437863237548e-05, "loss": 0.5432, "step": 6736 }, { "epoch": 0.9243328531247856, "grad_norm": 1.1640625, "learning_rate": 1.5751257061578877e-05, "loss": 0.4826, "step": 6737 }, { "epoch": 0.9244700555669891, "grad_norm": 1.1875, "learning_rate": 1.575007614008482e-05, "loss": 0.5067, "step": 6738 }, { "epoch": 0.9246072580091925, "grad_norm": 1.1875, "learning_rate": 1.574889509877998e-05, "loss": 0.5448, "step": 6739 }, { "epoch": 0.924744460451396, "grad_norm": 1.0625, "learning_rate": 1.5747713937688967e-05, "loss": 0.4297, "step": 6740 }, { "epoch": 0.9248816628935995, "grad_norm": 1.2421875, "learning_rate": 1.5746532656836388e-05, "loss": 0.5505, "step": 6741 }, { "epoch": 0.925018865335803, "grad_norm": 1.1796875, "learning_rate": 1.5745351256246863e-05, "loss": 0.5204, "step": 6742 }, { "epoch": 0.9251560677780064, "grad_norm": 1.0546875, "learning_rate": 1.5744169735945002e-05, "loss": 0.4197, "step": 6743 }, { "epoch": 0.9252932702202099, "grad_norm": 1.1484375, "learning_rate": 1.5742988095955428e-05, "loss": 0.4994, "step": 6744 }, { "epoch": 0.9254304726624134, "grad_norm": 1.234375, "learning_rate": 1.574180633630276e-05, "loss": 0.5359, "step": 6745 }, { "epoch": 0.9255676751046169, "grad_norm": 1.25, "learning_rate": 1.574062445701162e-05, "loss": 0.5419, "step": 6746 }, { "epoch": 0.9257048775468203, "grad_norm": 1.1875, "learning_rate": 1.5739442458106638e-05, "loss": 0.4963, "step": 6747 }, { "epoch": 0.9258420799890238, "grad_norm": 1.2421875, "learning_rate": 1.573826033961244e-05, "loss": 0.5319, "step": 6748 }, { "epoch": 0.9259792824312273, "grad_norm": 1.171875, "learning_rate": 1.573707810155366e-05, "loss": 0.5094, "step": 6749 }, { "epoch": 0.9261164848734308, "grad_norm": 1.3203125, "learning_rate": 1.573589574395493e-05, "loss": 0.5491, "step": 6750 }, { "epoch": 0.9262536873156342, "grad_norm": 1.21875, "learning_rate": 1.573471326684088e-05, "loss": 0.5695, "step": 6751 }, { "epoch": 0.9263908897578377, "grad_norm": 1.2578125, "learning_rate": 1.5733530670236158e-05, "loss": 0.4684, "step": 6752 }, { "epoch": 0.9265280922000412, "grad_norm": 1.1640625, "learning_rate": 1.57323479541654e-05, "loss": 0.5235, "step": 6753 }, { "epoch": 0.9266652946422447, "grad_norm": 1.109375, "learning_rate": 1.573116511865325e-05, "loss": 0.4733, "step": 6754 }, { "epoch": 0.9268024970844481, "grad_norm": 1.125, "learning_rate": 1.5729982163724355e-05, "loss": 0.4793, "step": 6755 }, { "epoch": 0.9269396995266516, "grad_norm": 1.34375, "learning_rate": 1.5728799089403363e-05, "loss": 0.5299, "step": 6756 }, { "epoch": 0.9270769019688551, "grad_norm": 1.2109375, "learning_rate": 1.5727615895714925e-05, "loss": 0.4788, "step": 6757 }, { "epoch": 0.9272141044110586, "grad_norm": 1.28125, "learning_rate": 1.572643258268369e-05, "loss": 0.5367, "step": 6758 }, { "epoch": 0.927351306853262, "grad_norm": 1.2578125, "learning_rate": 1.572524915033433e-05, "loss": 0.578, "step": 6759 }, { "epoch": 0.9274885092954654, "grad_norm": 1.15625, "learning_rate": 1.572406559869148e-05, "loss": 0.4973, "step": 6760 }, { "epoch": 0.927625711737669, "grad_norm": 1.1796875, "learning_rate": 1.5722881927779815e-05, "loss": 0.5389, "step": 6761 }, { "epoch": 0.9277629141798724, "grad_norm": 1.1171875, "learning_rate": 1.5721698137624e-05, "loss": 0.4792, "step": 6762 }, { "epoch": 0.9279001166220758, "grad_norm": 1.2578125, "learning_rate": 1.5720514228248696e-05, "loss": 0.565, "step": 6763 }, { "epoch": 0.9280373190642793, "grad_norm": 1.1171875, "learning_rate": 1.5719330199678574e-05, "loss": 0.4775, "step": 6764 }, { "epoch": 0.9281745215064828, "grad_norm": 1.1328125, "learning_rate": 1.5718146051938302e-05, "loss": 0.4973, "step": 6765 }, { "epoch": 0.9283117239486863, "grad_norm": 1.1953125, "learning_rate": 1.571696178505255e-05, "loss": 0.5142, "step": 6766 }, { "epoch": 0.9284489263908897, "grad_norm": 1.1875, "learning_rate": 1.5715777399046003e-05, "loss": 0.5231, "step": 6767 }, { "epoch": 0.9285861288330932, "grad_norm": 1.2421875, "learning_rate": 1.5714592893943338e-05, "loss": 0.4827, "step": 6768 }, { "epoch": 0.9287233312752967, "grad_norm": 1.265625, "learning_rate": 1.571340826976923e-05, "loss": 0.5401, "step": 6769 }, { "epoch": 0.9288605337175002, "grad_norm": 1.171875, "learning_rate": 1.5712223526548363e-05, "loss": 0.4928, "step": 6770 }, { "epoch": 0.9289977361597036, "grad_norm": 1.203125, "learning_rate": 1.5711038664305426e-05, "loss": 0.4918, "step": 6771 }, { "epoch": 0.9291349386019071, "grad_norm": 1.234375, "learning_rate": 1.5709853683065107e-05, "loss": 0.5274, "step": 6772 }, { "epoch": 0.9292721410441106, "grad_norm": 1.1484375, "learning_rate": 1.5708668582852092e-05, "loss": 0.456, "step": 6773 }, { "epoch": 0.9294093434863141, "grad_norm": 1.2109375, "learning_rate": 1.570748336369108e-05, "loss": 0.5529, "step": 6774 }, { "epoch": 0.9295465459285175, "grad_norm": 1.234375, "learning_rate": 1.5706298025606765e-05, "loss": 0.5607, "step": 6775 }, { "epoch": 0.929683748370721, "grad_norm": 1.1796875, "learning_rate": 1.5705112568623845e-05, "loss": 0.5017, "step": 6776 }, { "epoch": 0.9298209508129245, "grad_norm": 2.765625, "learning_rate": 1.570392699276702e-05, "loss": 0.5574, "step": 6777 }, { "epoch": 0.929958153255128, "grad_norm": 1.171875, "learning_rate": 1.5702741298060994e-05, "loss": 0.5251, "step": 6778 }, { "epoch": 0.9300953556973314, "grad_norm": 1.125, "learning_rate": 1.5701555484530472e-05, "loss": 0.4314, "step": 6779 }, { "epoch": 0.9302325581395349, "grad_norm": 1.203125, "learning_rate": 1.5700369552200163e-05, "loss": 0.5386, "step": 6780 }, { "epoch": 0.9303697605817384, "grad_norm": 1.15625, "learning_rate": 1.5699183501094775e-05, "loss": 0.484, "step": 6781 }, { "epoch": 0.9305069630239419, "grad_norm": 1.2578125, "learning_rate": 1.5697997331239023e-05, "loss": 0.5002, "step": 6782 }, { "epoch": 0.9306441654661453, "grad_norm": 1.1015625, "learning_rate": 1.569681104265762e-05, "loss": 0.4347, "step": 6783 }, { "epoch": 0.9307813679083488, "grad_norm": 1.1484375, "learning_rate": 1.5695624635375292e-05, "loss": 0.4651, "step": 6784 }, { "epoch": 0.9309185703505523, "grad_norm": 1.2109375, "learning_rate": 1.569443810941675e-05, "loss": 0.5325, "step": 6785 }, { "epoch": 0.9310557727927558, "grad_norm": 1.1875, "learning_rate": 1.569325146480672e-05, "loss": 0.4877, "step": 6786 }, { "epoch": 0.9311929752349591, "grad_norm": 1.140625, "learning_rate": 1.569206470156993e-05, "loss": 0.426, "step": 6787 }, { "epoch": 0.9313301776771626, "grad_norm": 1.1640625, "learning_rate": 1.5690877819731106e-05, "loss": 0.5322, "step": 6788 }, { "epoch": 0.9314673801193661, "grad_norm": 1.21875, "learning_rate": 1.5689690819314972e-05, "loss": 0.5428, "step": 6789 }, { "epoch": 0.9316045825615696, "grad_norm": 1.1953125, "learning_rate": 1.5688503700346274e-05, "loss": 0.5011, "step": 6790 }, { "epoch": 0.931741785003773, "grad_norm": 1.1953125, "learning_rate": 1.5687316462849738e-05, "loss": 0.4988, "step": 6791 }, { "epoch": 0.9318789874459765, "grad_norm": 1.25, "learning_rate": 1.56861291068501e-05, "loss": 0.5759, "step": 6792 }, { "epoch": 0.93201618988818, "grad_norm": 1.171875, "learning_rate": 1.568494163237211e-05, "loss": 0.4823, "step": 6793 }, { "epoch": 0.9321533923303835, "grad_norm": 1.2734375, "learning_rate": 1.5683754039440498e-05, "loss": 0.4923, "step": 6794 }, { "epoch": 0.9322905947725869, "grad_norm": 1.171875, "learning_rate": 1.568256632808002e-05, "loss": 0.458, "step": 6795 }, { "epoch": 0.9324277972147904, "grad_norm": 1.2734375, "learning_rate": 1.568137849831542e-05, "loss": 0.5445, "step": 6796 }, { "epoch": 0.9325649996569939, "grad_norm": 1.3359375, "learning_rate": 1.568019055017145e-05, "loss": 0.5374, "step": 6797 }, { "epoch": 0.9327022020991974, "grad_norm": 1.171875, "learning_rate": 1.5679002483672854e-05, "loss": 0.4757, "step": 6798 }, { "epoch": 0.9328394045414008, "grad_norm": 1.171875, "learning_rate": 1.5677814298844394e-05, "loss": 0.4539, "step": 6799 }, { "epoch": 0.9329766069836043, "grad_norm": 1.421875, "learning_rate": 1.567662599571083e-05, "loss": 0.5707, "step": 6800 }, { "epoch": 0.9331138094258078, "grad_norm": 1.296875, "learning_rate": 1.5675437574296917e-05, "loss": 0.5775, "step": 6801 }, { "epoch": 0.9332510118680113, "grad_norm": 1.2578125, "learning_rate": 1.5674249034627416e-05, "loss": 0.5481, "step": 6802 }, { "epoch": 0.9333882143102147, "grad_norm": 1.1875, "learning_rate": 1.56730603767271e-05, "loss": 0.4879, "step": 6803 }, { "epoch": 0.9335254167524182, "grad_norm": 1.2890625, "learning_rate": 1.5671871600620728e-05, "loss": 0.5662, "step": 6804 }, { "epoch": 0.9336626191946217, "grad_norm": 1.15625, "learning_rate": 1.567068270633307e-05, "loss": 0.493, "step": 6805 }, { "epoch": 0.9337998216368252, "grad_norm": 1.25, "learning_rate": 1.5669493693888903e-05, "loss": 0.5708, "step": 6806 }, { "epoch": 0.9339370240790286, "grad_norm": 1.15625, "learning_rate": 1.5668304563313005e-05, "loss": 0.4924, "step": 6807 }, { "epoch": 0.9340742265212321, "grad_norm": 1.1328125, "learning_rate": 1.5667115314630143e-05, "loss": 0.482, "step": 6808 }, { "epoch": 0.9342114289634356, "grad_norm": 1.0625, "learning_rate": 1.5665925947865105e-05, "loss": 0.4092, "step": 6809 }, { "epoch": 0.9343486314056391, "grad_norm": 1.2578125, "learning_rate": 1.5664736463042664e-05, "loss": 0.4739, "step": 6810 }, { "epoch": 0.9344858338478425, "grad_norm": 1.3515625, "learning_rate": 1.5663546860187615e-05, "loss": 0.5996, "step": 6811 }, { "epoch": 0.934623036290046, "grad_norm": 1.2734375, "learning_rate": 1.566235713932474e-05, "loss": 0.533, "step": 6812 }, { "epoch": 0.9347602387322494, "grad_norm": 1.203125, "learning_rate": 1.5661167300478827e-05, "loss": 0.4878, "step": 6813 }, { "epoch": 0.934897441174453, "grad_norm": 1.140625, "learning_rate": 1.5659977343674673e-05, "loss": 0.4373, "step": 6814 }, { "epoch": 0.9350346436166563, "grad_norm": 1.21875, "learning_rate": 1.5658787268937065e-05, "loss": 0.5455, "step": 6815 }, { "epoch": 0.9351718460588598, "grad_norm": 1.265625, "learning_rate": 1.5657597076290807e-05, "loss": 0.5761, "step": 6816 }, { "epoch": 0.9353090485010633, "grad_norm": 1.125, "learning_rate": 1.5656406765760693e-05, "loss": 0.4454, "step": 6817 }, { "epoch": 0.9354462509432668, "grad_norm": 1.2421875, "learning_rate": 1.565521633737153e-05, "loss": 0.4855, "step": 6818 }, { "epoch": 0.9355834533854702, "grad_norm": 1.21875, "learning_rate": 1.5654025791148117e-05, "loss": 0.5331, "step": 6819 }, { "epoch": 0.9357206558276737, "grad_norm": 1.1484375, "learning_rate": 1.5652835127115264e-05, "loss": 0.5014, "step": 6820 }, { "epoch": 0.9358578582698772, "grad_norm": 1.125, "learning_rate": 1.565164434529778e-05, "loss": 0.5013, "step": 6821 }, { "epoch": 0.9359950607120807, "grad_norm": 1.140625, "learning_rate": 1.5650453445720473e-05, "loss": 0.4537, "step": 6822 }, { "epoch": 0.9361322631542841, "grad_norm": 1.21875, "learning_rate": 1.5649262428408162e-05, "loss": 0.5145, "step": 6823 }, { "epoch": 0.9362694655964876, "grad_norm": 1.203125, "learning_rate": 1.5648071293385658e-05, "loss": 0.5405, "step": 6824 }, { "epoch": 0.9364066680386911, "grad_norm": 1.1796875, "learning_rate": 1.5646880040677788e-05, "loss": 0.518, "step": 6825 }, { "epoch": 0.9365438704808946, "grad_norm": 1.0625, "learning_rate": 1.5645688670309364e-05, "loss": 0.4658, "step": 6826 }, { "epoch": 0.936681072923098, "grad_norm": 1.2109375, "learning_rate": 1.5644497182305218e-05, "loss": 0.5474, "step": 6827 }, { "epoch": 0.9368182753653015, "grad_norm": 1.234375, "learning_rate": 1.564330557669017e-05, "loss": 0.5224, "step": 6828 }, { "epoch": 0.936955477807505, "grad_norm": 1.28125, "learning_rate": 1.5642113853489052e-05, "loss": 0.5205, "step": 6829 }, { "epoch": 0.9370926802497085, "grad_norm": 1.109375, "learning_rate": 1.56409220127267e-05, "loss": 0.452, "step": 6830 }, { "epoch": 0.9372298826919119, "grad_norm": 1.1328125, "learning_rate": 1.5639730054427936e-05, "loss": 0.4391, "step": 6831 }, { "epoch": 0.9373670851341154, "grad_norm": 1.140625, "learning_rate": 1.5638537978617602e-05, "loss": 0.472, "step": 6832 }, { "epoch": 0.9375042875763189, "grad_norm": 1.1171875, "learning_rate": 1.5637345785320542e-05, "loss": 0.4994, "step": 6833 }, { "epoch": 0.9376414900185224, "grad_norm": 1.1875, "learning_rate": 1.5636153474561585e-05, "loss": 0.4874, "step": 6834 }, { "epoch": 0.9377786924607258, "grad_norm": 0.95703125, "learning_rate": 1.563496104636559e-05, "loss": 0.3766, "step": 6835 }, { "epoch": 0.9379158949029293, "grad_norm": 1.125, "learning_rate": 1.5633768500757386e-05, "loss": 0.4804, "step": 6836 }, { "epoch": 0.9380530973451328, "grad_norm": 1.140625, "learning_rate": 1.5632575837761835e-05, "loss": 0.4721, "step": 6837 }, { "epoch": 0.9381902997873363, "grad_norm": 1.2890625, "learning_rate": 1.563138305740378e-05, "loss": 0.5057, "step": 6838 }, { "epoch": 0.9383275022295396, "grad_norm": 1.1875, "learning_rate": 1.5630190159708077e-05, "loss": 0.4943, "step": 6839 }, { "epoch": 0.9384647046717431, "grad_norm": 1.3203125, "learning_rate": 1.562899714469958e-05, "loss": 0.5574, "step": 6840 }, { "epoch": 0.9386019071139466, "grad_norm": 1.0859375, "learning_rate": 1.562780401240315e-05, "loss": 0.4319, "step": 6841 }, { "epoch": 0.9387391095561501, "grad_norm": 1.1484375, "learning_rate": 1.5626610762843647e-05, "loss": 0.4489, "step": 6842 }, { "epoch": 0.9388763119983535, "grad_norm": 1.125, "learning_rate": 1.5625417396045932e-05, "loss": 0.4861, "step": 6843 }, { "epoch": 0.939013514440557, "grad_norm": 1.1875, "learning_rate": 1.5624223912034874e-05, "loss": 0.5144, "step": 6844 }, { "epoch": 0.9391507168827605, "grad_norm": 1.3828125, "learning_rate": 1.5623030310835336e-05, "loss": 0.5691, "step": 6845 }, { "epoch": 0.939287919324964, "grad_norm": 1.1953125, "learning_rate": 1.562183659247219e-05, "loss": 0.5064, "step": 6846 }, { "epoch": 0.9394251217671674, "grad_norm": 1.234375, "learning_rate": 1.562064275697031e-05, "loss": 0.5435, "step": 6847 }, { "epoch": 0.9395623242093709, "grad_norm": 1.1171875, "learning_rate": 1.5619448804354572e-05, "loss": 0.5237, "step": 6848 }, { "epoch": 0.9396995266515744, "grad_norm": 1.1171875, "learning_rate": 1.561825473464985e-05, "loss": 0.465, "step": 6849 }, { "epoch": 0.9398367290937779, "grad_norm": 1.265625, "learning_rate": 1.5617060547881026e-05, "loss": 0.5452, "step": 6850 }, { "epoch": 0.9399739315359813, "grad_norm": 1.15625, "learning_rate": 1.5615866244072985e-05, "loss": 0.4964, "step": 6851 }, { "epoch": 0.9401111339781848, "grad_norm": 1.1640625, "learning_rate": 1.5614671823250605e-05, "loss": 0.4751, "step": 6852 }, { "epoch": 0.9402483364203883, "grad_norm": 1.1640625, "learning_rate": 1.5613477285438786e-05, "loss": 0.506, "step": 6853 }, { "epoch": 0.9403855388625918, "grad_norm": 1.15625, "learning_rate": 1.5612282630662406e-05, "loss": 0.5035, "step": 6854 }, { "epoch": 0.9405227413047952, "grad_norm": 1.2265625, "learning_rate": 1.5611087858946362e-05, "loss": 0.507, "step": 6855 }, { "epoch": 0.9406599437469987, "grad_norm": 1.3359375, "learning_rate": 1.5609892970315546e-05, "loss": 0.5974, "step": 6856 }, { "epoch": 0.9407971461892022, "grad_norm": 1.1640625, "learning_rate": 1.5608697964794857e-05, "loss": 0.4726, "step": 6857 }, { "epoch": 0.9409343486314057, "grad_norm": 1.2265625, "learning_rate": 1.56075028424092e-05, "loss": 0.5603, "step": 6858 }, { "epoch": 0.9410715510736091, "grad_norm": 1.3515625, "learning_rate": 1.5606307603183467e-05, "loss": 0.638, "step": 6859 }, { "epoch": 0.9412087535158126, "grad_norm": 1.171875, "learning_rate": 1.560511224714257e-05, "loss": 0.4677, "step": 6860 }, { "epoch": 0.9413459559580161, "grad_norm": 1.1796875, "learning_rate": 1.5603916774311413e-05, "loss": 0.4529, "step": 6861 }, { "epoch": 0.9414831584002196, "grad_norm": 1.28125, "learning_rate": 1.5602721184714904e-05, "loss": 0.6599, "step": 6862 }, { "epoch": 0.941620360842423, "grad_norm": 1.09375, "learning_rate": 1.5601525478377955e-05, "loss": 0.4422, "step": 6863 }, { "epoch": 0.9417575632846265, "grad_norm": 1.109375, "learning_rate": 1.5600329655325488e-05, "loss": 0.4605, "step": 6864 }, { "epoch": 0.94189476572683, "grad_norm": 1.1640625, "learning_rate": 1.5599133715582407e-05, "loss": 0.4744, "step": 6865 }, { "epoch": 0.9420319681690335, "grad_norm": 1.25, "learning_rate": 1.559793765917364e-05, "loss": 0.5723, "step": 6866 }, { "epoch": 0.9421691706112368, "grad_norm": 1.1640625, "learning_rate": 1.5596741486124106e-05, "loss": 0.4877, "step": 6867 }, { "epoch": 0.9423063730534403, "grad_norm": 1.109375, "learning_rate": 1.5595545196458724e-05, "loss": 0.4094, "step": 6868 }, { "epoch": 0.9424435754956438, "grad_norm": 1.1328125, "learning_rate": 1.5594348790202432e-05, "loss": 0.4661, "step": 6869 }, { "epoch": 0.9425807779378473, "grad_norm": 1.234375, "learning_rate": 1.5593152267380146e-05, "loss": 0.4881, "step": 6870 }, { "epoch": 0.9427179803800507, "grad_norm": 1.296875, "learning_rate": 1.559195562801681e-05, "loss": 0.5648, "step": 6871 }, { "epoch": 0.9428551828222542, "grad_norm": 1.2890625, "learning_rate": 1.5590758872137345e-05, "loss": 0.4799, "step": 6872 }, { "epoch": 0.9429923852644577, "grad_norm": 1.21875, "learning_rate": 1.558956199976669e-05, "loss": 0.5746, "step": 6873 }, { "epoch": 0.9431295877066612, "grad_norm": 1.2265625, "learning_rate": 1.558836501092979e-05, "loss": 0.5301, "step": 6874 }, { "epoch": 0.9432667901488646, "grad_norm": 1.0703125, "learning_rate": 1.5587167905651583e-05, "loss": 0.4279, "step": 6875 }, { "epoch": 0.9434039925910681, "grad_norm": 1.265625, "learning_rate": 1.5585970683957012e-05, "loss": 0.5718, "step": 6876 }, { "epoch": 0.9435411950332716, "grad_norm": 1.0625, "learning_rate": 1.558477334587102e-05, "loss": 0.4087, "step": 6877 }, { "epoch": 0.9436783974754751, "grad_norm": 1.21875, "learning_rate": 1.5583575891418557e-05, "loss": 0.5004, "step": 6878 }, { "epoch": 0.9438155999176785, "grad_norm": 1.1484375, "learning_rate": 1.5582378320624574e-05, "loss": 0.5158, "step": 6879 }, { "epoch": 0.943952802359882, "grad_norm": 1.140625, "learning_rate": 1.5581180633514025e-05, "loss": 0.4966, "step": 6880 }, { "epoch": 0.9440900048020855, "grad_norm": 1.3046875, "learning_rate": 1.5579982830111866e-05, "loss": 0.5297, "step": 6881 }, { "epoch": 0.944227207244289, "grad_norm": 1.203125, "learning_rate": 1.557878491044305e-05, "loss": 0.5242, "step": 6882 }, { "epoch": 0.9443644096864924, "grad_norm": 1.1328125, "learning_rate": 1.5577586874532543e-05, "loss": 0.4383, "step": 6883 }, { "epoch": 0.9445016121286959, "grad_norm": 1.15625, "learning_rate": 1.5576388722405306e-05, "loss": 0.5074, "step": 6884 }, { "epoch": 0.9446388145708994, "grad_norm": 1.1796875, "learning_rate": 1.55751904540863e-05, "loss": 0.521, "step": 6885 }, { "epoch": 0.9447760170131029, "grad_norm": 1.125, "learning_rate": 1.5573992069600503e-05, "loss": 0.4684, "step": 6886 }, { "epoch": 0.9449132194553063, "grad_norm": 1.171875, "learning_rate": 1.5572793568972873e-05, "loss": 0.5589, "step": 6887 }, { "epoch": 0.9450504218975098, "grad_norm": 1.2265625, "learning_rate": 1.5571594952228387e-05, "loss": 0.4277, "step": 6888 }, { "epoch": 0.9451876243397133, "grad_norm": 1.0703125, "learning_rate": 1.557039621939202e-05, "loss": 0.4166, "step": 6889 }, { "epoch": 0.9453248267819168, "grad_norm": 1.2734375, "learning_rate": 1.5569197370488754e-05, "loss": 0.5989, "step": 6890 }, { "epoch": 0.9454620292241201, "grad_norm": 1.125, "learning_rate": 1.556799840554356e-05, "loss": 0.4708, "step": 6891 }, { "epoch": 0.9455992316663236, "grad_norm": 1.3046875, "learning_rate": 1.5566799324581428e-05, "loss": 0.6, "step": 6892 }, { "epoch": 0.9457364341085271, "grad_norm": 1.28125, "learning_rate": 1.556560012762734e-05, "loss": 0.552, "step": 6893 }, { "epoch": 0.9458736365507306, "grad_norm": 1.234375, "learning_rate": 1.5564400814706277e-05, "loss": 0.5287, "step": 6894 }, { "epoch": 0.946010838992934, "grad_norm": 1.234375, "learning_rate": 1.5563201385843233e-05, "loss": 0.4686, "step": 6895 }, { "epoch": 0.9461480414351375, "grad_norm": 1.1484375, "learning_rate": 1.5562001841063205e-05, "loss": 0.4796, "step": 6896 }, { "epoch": 0.946285243877341, "grad_norm": 1.1171875, "learning_rate": 1.5560802180391178e-05, "loss": 0.454, "step": 6897 }, { "epoch": 0.9464224463195445, "grad_norm": 1.203125, "learning_rate": 1.5559602403852155e-05, "loss": 0.4644, "step": 6898 }, { "epoch": 0.9465596487617479, "grad_norm": 1.2265625, "learning_rate": 1.555840251147113e-05, "loss": 0.4835, "step": 6899 }, { "epoch": 0.9466968512039514, "grad_norm": 1.1171875, "learning_rate": 1.555720250327311e-05, "loss": 0.5045, "step": 6900 }, { "epoch": 0.9468340536461549, "grad_norm": 1.2578125, "learning_rate": 1.55560023792831e-05, "loss": 0.5648, "step": 6901 }, { "epoch": 0.9469712560883584, "grad_norm": 1.15625, "learning_rate": 1.5554802139526094e-05, "loss": 0.5173, "step": 6902 }, { "epoch": 0.9471084585305618, "grad_norm": 1.1796875, "learning_rate": 1.5553601784027112e-05, "loss": 0.5558, "step": 6903 }, { "epoch": 0.9472456609727653, "grad_norm": 1.203125, "learning_rate": 1.555240131281116e-05, "loss": 0.5151, "step": 6904 }, { "epoch": 0.9473828634149688, "grad_norm": 1.046875, "learning_rate": 1.555120072590326e-05, "loss": 0.4696, "step": 6905 }, { "epoch": 0.9475200658571723, "grad_norm": 1.1328125, "learning_rate": 1.5550000023328414e-05, "loss": 0.4581, "step": 6906 }, { "epoch": 0.9476572682993757, "grad_norm": 1.15625, "learning_rate": 1.5548799205111654e-05, "loss": 0.4571, "step": 6907 }, { "epoch": 0.9477944707415792, "grad_norm": 1.2578125, "learning_rate": 1.554759827127799e-05, "loss": 0.4976, "step": 6908 }, { "epoch": 0.9479316731837827, "grad_norm": 1.1171875, "learning_rate": 1.554639722185245e-05, "loss": 0.4819, "step": 6909 }, { "epoch": 0.9480688756259862, "grad_norm": 1.171875, "learning_rate": 1.554519605686006e-05, "loss": 0.484, "step": 6910 }, { "epoch": 0.9482060780681896, "grad_norm": 1.125, "learning_rate": 1.5543994776325845e-05, "loss": 0.4687, "step": 6911 }, { "epoch": 0.9483432805103931, "grad_norm": 1.171875, "learning_rate": 1.5542793380274836e-05, "loss": 0.4687, "step": 6912 }, { "epoch": 0.9484804829525966, "grad_norm": 1.1484375, "learning_rate": 1.554159186873207e-05, "loss": 0.4285, "step": 6913 }, { "epoch": 0.9486176853948001, "grad_norm": 1.09375, "learning_rate": 1.5540390241722577e-05, "loss": 0.455, "step": 6914 }, { "epoch": 0.9487548878370035, "grad_norm": 1.171875, "learning_rate": 1.55391884992714e-05, "loss": 0.4927, "step": 6915 }, { "epoch": 0.948892090279207, "grad_norm": 1.2265625, "learning_rate": 1.5537986641403574e-05, "loss": 0.5641, "step": 6916 }, { "epoch": 0.9490292927214105, "grad_norm": 1.2109375, "learning_rate": 1.5536784668144142e-05, "loss": 0.5366, "step": 6917 }, { "epoch": 0.949166495163614, "grad_norm": 1.203125, "learning_rate": 1.553558257951815e-05, "loss": 0.4597, "step": 6918 }, { "epoch": 0.9493036976058173, "grad_norm": 1.0390625, "learning_rate": 1.5534380375550645e-05, "loss": 0.4178, "step": 6919 }, { "epoch": 0.9494409000480208, "grad_norm": 1.15625, "learning_rate": 1.553317805626668e-05, "loss": 0.4867, "step": 6920 }, { "epoch": 0.9495781024902243, "grad_norm": 1.171875, "learning_rate": 1.55319756216913e-05, "loss": 0.4925, "step": 6921 }, { "epoch": 0.9497153049324278, "grad_norm": 1.25, "learning_rate": 1.5530773071849563e-05, "loss": 0.5355, "step": 6922 }, { "epoch": 0.9498525073746312, "grad_norm": 1.3046875, "learning_rate": 1.5529570406766525e-05, "loss": 0.4971, "step": 6923 }, { "epoch": 0.9499897098168347, "grad_norm": 1.171875, "learning_rate": 1.552836762646725e-05, "loss": 0.5122, "step": 6924 }, { "epoch": 0.9501269122590382, "grad_norm": 1.109375, "learning_rate": 1.552716473097679e-05, "loss": 0.497, "step": 6925 }, { "epoch": 0.9502641147012417, "grad_norm": 1.265625, "learning_rate": 1.552596172032022e-05, "loss": 0.5703, "step": 6926 }, { "epoch": 0.9504013171434451, "grad_norm": 1.0, "learning_rate": 1.5524758594522596e-05, "loss": 0.3504, "step": 6927 }, { "epoch": 0.9505385195856486, "grad_norm": 1.140625, "learning_rate": 1.5523555353608993e-05, "loss": 0.4993, "step": 6928 }, { "epoch": 0.9506757220278521, "grad_norm": 1.109375, "learning_rate": 1.5522351997604483e-05, "loss": 0.4612, "step": 6929 }, { "epoch": 0.9508129244700556, "grad_norm": 1.1171875, "learning_rate": 1.5521148526534136e-05, "loss": 0.4484, "step": 6930 }, { "epoch": 0.950950126912259, "grad_norm": 1.3359375, "learning_rate": 1.551994494042303e-05, "loss": 0.5602, "step": 6931 }, { "epoch": 0.9510873293544625, "grad_norm": 1.0625, "learning_rate": 1.551874123929624e-05, "loss": 0.4577, "step": 6932 }, { "epoch": 0.951224531796666, "grad_norm": 1.1328125, "learning_rate": 1.5517537423178853e-05, "loss": 0.4482, "step": 6933 }, { "epoch": 0.9513617342388695, "grad_norm": 1.2421875, "learning_rate": 1.5516333492095947e-05, "loss": 0.5551, "step": 6934 }, { "epoch": 0.9514989366810729, "grad_norm": 1.3359375, "learning_rate": 1.551512944607261e-05, "loss": 0.5821, "step": 6935 }, { "epoch": 0.9516361391232764, "grad_norm": 1.15625, "learning_rate": 1.551392528513393e-05, "loss": 0.4726, "step": 6936 }, { "epoch": 0.9517733415654799, "grad_norm": 1.125, "learning_rate": 1.5512721009304995e-05, "loss": 0.432, "step": 6937 }, { "epoch": 0.9519105440076834, "grad_norm": 1.1796875, "learning_rate": 1.5511516618610902e-05, "loss": 0.5378, "step": 6938 }, { "epoch": 0.9520477464498868, "grad_norm": 1.09375, "learning_rate": 1.551031211307674e-05, "loss": 0.417, "step": 6939 }, { "epoch": 0.9521849488920903, "grad_norm": 1.2578125, "learning_rate": 1.550910749272761e-05, "loss": 0.5492, "step": 6940 }, { "epoch": 0.9523221513342938, "grad_norm": 1.125, "learning_rate": 1.5507902757588612e-05, "loss": 0.5417, "step": 6941 }, { "epoch": 0.9524593537764973, "grad_norm": 1.140625, "learning_rate": 1.550669790768485e-05, "loss": 0.5637, "step": 6942 }, { "epoch": 0.9525965562187007, "grad_norm": 1.25, "learning_rate": 1.550549294304143e-05, "loss": 0.5049, "step": 6943 }, { "epoch": 0.9527337586609042, "grad_norm": 1.2578125, "learning_rate": 1.5504287863683454e-05, "loss": 0.5718, "step": 6944 }, { "epoch": 0.9528709611031076, "grad_norm": 1.4375, "learning_rate": 1.550308266963603e-05, "loss": 0.592, "step": 6945 }, { "epoch": 0.9530081635453111, "grad_norm": 1.125, "learning_rate": 1.5501877360924278e-05, "loss": 0.477, "step": 6946 }, { "epoch": 0.9531453659875145, "grad_norm": 1.4375, "learning_rate": 1.5500671937573305e-05, "loss": 0.4699, "step": 6947 }, { "epoch": 0.953282568429718, "grad_norm": 1.1484375, "learning_rate": 1.5499466399608233e-05, "loss": 0.4724, "step": 6948 }, { "epoch": 0.9534197708719215, "grad_norm": 1.0703125, "learning_rate": 1.5498260747054173e-05, "loss": 0.4042, "step": 6949 }, { "epoch": 0.953556973314125, "grad_norm": 1.234375, "learning_rate": 1.549705497993626e-05, "loss": 0.5913, "step": 6950 }, { "epoch": 0.9536941757563284, "grad_norm": 1.1015625, "learning_rate": 1.5495849098279606e-05, "loss": 0.4588, "step": 6951 }, { "epoch": 0.9538313781985319, "grad_norm": 1.2265625, "learning_rate": 1.549464310210934e-05, "loss": 0.4769, "step": 6952 }, { "epoch": 0.9539685806407354, "grad_norm": 1.171875, "learning_rate": 1.5493436991450593e-05, "loss": 0.4909, "step": 6953 }, { "epoch": 0.9541057830829389, "grad_norm": 1.203125, "learning_rate": 1.549223076632849e-05, "loss": 0.572, "step": 6954 }, { "epoch": 0.9542429855251423, "grad_norm": 1.1875, "learning_rate": 1.5491024426768176e-05, "loss": 0.507, "step": 6955 }, { "epoch": 0.9543801879673458, "grad_norm": 1.21875, "learning_rate": 1.5489817972794777e-05, "loss": 0.5129, "step": 6956 }, { "epoch": 0.9545173904095493, "grad_norm": 1.3203125, "learning_rate": 1.548861140443343e-05, "loss": 0.5736, "step": 6957 }, { "epoch": 0.9546545928517528, "grad_norm": 1.2578125, "learning_rate": 1.5487404721709284e-05, "loss": 0.5392, "step": 6958 }, { "epoch": 0.9547917952939562, "grad_norm": 1.2109375, "learning_rate": 1.5486197924647477e-05, "loss": 0.4983, "step": 6959 }, { "epoch": 0.9549289977361597, "grad_norm": 1.234375, "learning_rate": 1.5484991013273152e-05, "loss": 0.544, "step": 6960 }, { "epoch": 0.9550662001783632, "grad_norm": 1.109375, "learning_rate": 1.548378398761146e-05, "loss": 0.448, "step": 6961 }, { "epoch": 0.9552034026205667, "grad_norm": 1.109375, "learning_rate": 1.5482576847687552e-05, "loss": 0.4828, "step": 6962 }, { "epoch": 0.9553406050627701, "grad_norm": 1.046875, "learning_rate": 1.548136959352658e-05, "loss": 0.4283, "step": 6963 }, { "epoch": 0.9554778075049736, "grad_norm": 1.0625, "learning_rate": 1.548016222515369e-05, "loss": 0.4351, "step": 6964 }, { "epoch": 0.9556150099471771, "grad_norm": 1.265625, "learning_rate": 1.5478954742594052e-05, "loss": 0.4908, "step": 6965 }, { "epoch": 0.9557522123893806, "grad_norm": 1.1875, "learning_rate": 1.547774714587282e-05, "loss": 0.5251, "step": 6966 }, { "epoch": 0.955889414831584, "grad_norm": 1.1328125, "learning_rate": 1.5476539435015156e-05, "loss": 0.4681, "step": 6967 }, { "epoch": 0.9560266172737875, "grad_norm": 1.1328125, "learning_rate": 1.5475331610046226e-05, "loss": 0.456, "step": 6968 }, { "epoch": 0.956163819715991, "grad_norm": 1.1640625, "learning_rate": 1.5474123670991194e-05, "loss": 0.5118, "step": 6969 }, { "epoch": 0.9563010221581945, "grad_norm": 1.2734375, "learning_rate": 1.5472915617875227e-05, "loss": 0.5088, "step": 6970 }, { "epoch": 0.9564382246003978, "grad_norm": 1.171875, "learning_rate": 1.5471707450723503e-05, "loss": 0.5027, "step": 6971 }, { "epoch": 0.9565754270426013, "grad_norm": 1.1875, "learning_rate": 1.547049916956119e-05, "loss": 0.49, "step": 6972 }, { "epoch": 0.9567126294848048, "grad_norm": 1.3125, "learning_rate": 1.546929077441347e-05, "loss": 0.5215, "step": 6973 }, { "epoch": 0.9568498319270083, "grad_norm": 1.234375, "learning_rate": 1.5468082265305518e-05, "loss": 0.548, "step": 6974 }, { "epoch": 0.9569870343692117, "grad_norm": 1.234375, "learning_rate": 1.5466873642262514e-05, "loss": 0.5039, "step": 6975 }, { "epoch": 0.9571242368114152, "grad_norm": 1.2421875, "learning_rate": 1.5465664905309642e-05, "loss": 0.5813, "step": 6976 }, { "epoch": 0.9572614392536187, "grad_norm": 1.2265625, "learning_rate": 1.546445605447209e-05, "loss": 0.4846, "step": 6977 }, { "epoch": 0.9573986416958222, "grad_norm": 1.1796875, "learning_rate": 1.5463247089775043e-05, "loss": 0.4656, "step": 6978 }, { "epoch": 0.9575358441380256, "grad_norm": 1.21875, "learning_rate": 1.5462038011243694e-05, "loss": 0.5406, "step": 6979 }, { "epoch": 0.9576730465802291, "grad_norm": 1.203125, "learning_rate": 1.5460828818903233e-05, "loss": 0.5042, "step": 6980 }, { "epoch": 0.9578102490224326, "grad_norm": 1.1953125, "learning_rate": 1.545961951277886e-05, "loss": 0.4957, "step": 6981 }, { "epoch": 0.9579474514646361, "grad_norm": 1.15625, "learning_rate": 1.545841009289577e-05, "loss": 0.5228, "step": 6982 }, { "epoch": 0.9580846539068395, "grad_norm": 1.2109375, "learning_rate": 1.5457200559279157e-05, "loss": 0.5406, "step": 6983 }, { "epoch": 0.958221856349043, "grad_norm": 1.140625, "learning_rate": 1.545599091195423e-05, "loss": 0.4438, "step": 6984 }, { "epoch": 0.9583590587912465, "grad_norm": 1.125, "learning_rate": 1.5454781150946195e-05, "loss": 0.4825, "step": 6985 }, { "epoch": 0.95849626123345, "grad_norm": 1.21875, "learning_rate": 1.5453571276280258e-05, "loss": 0.5468, "step": 6986 }, { "epoch": 0.9586334636756534, "grad_norm": 1.3671875, "learning_rate": 1.5452361287981622e-05, "loss": 0.6141, "step": 6987 }, { "epoch": 0.9587706661178569, "grad_norm": 1.34375, "learning_rate": 1.5451151186075505e-05, "loss": 0.4984, "step": 6988 }, { "epoch": 0.9589078685600604, "grad_norm": 1.1484375, "learning_rate": 1.544994097058712e-05, "loss": 0.4949, "step": 6989 }, { "epoch": 0.9590450710022639, "grad_norm": 1.171875, "learning_rate": 1.5448730641541686e-05, "loss": 0.4881, "step": 6990 }, { "epoch": 0.9591822734444673, "grad_norm": 1.0546875, "learning_rate": 1.5447520198964416e-05, "loss": 0.4106, "step": 6991 }, { "epoch": 0.9593194758866708, "grad_norm": 1.2265625, "learning_rate": 1.5446309642880537e-05, "loss": 0.5035, "step": 6992 }, { "epoch": 0.9594566783288743, "grad_norm": 1.2265625, "learning_rate": 1.5445098973315267e-05, "loss": 0.4984, "step": 6993 }, { "epoch": 0.9595938807710778, "grad_norm": 1.2109375, "learning_rate": 1.5443888190293836e-05, "loss": 0.4663, "step": 6994 }, { "epoch": 0.9597310832132812, "grad_norm": 1.234375, "learning_rate": 1.5442677293841473e-05, "loss": 0.5295, "step": 6995 }, { "epoch": 0.9598682856554847, "grad_norm": 1.234375, "learning_rate": 1.5441466283983406e-05, "loss": 0.5473, "step": 6996 }, { "epoch": 0.9600054880976882, "grad_norm": 1.2109375, "learning_rate": 1.5440255160744867e-05, "loss": 0.4659, "step": 6997 }, { "epoch": 0.9601426905398917, "grad_norm": 1.1953125, "learning_rate": 1.5439043924151092e-05, "loss": 0.5123, "step": 6998 }, { "epoch": 0.960279892982095, "grad_norm": 1.1796875, "learning_rate": 1.5437832574227325e-05, "loss": 0.4928, "step": 6999 }, { "epoch": 0.9604170954242985, "grad_norm": 1.109375, "learning_rate": 1.54366211109988e-05, "loss": 0.4346, "step": 7000 }, { "epoch": 0.960554297866502, "grad_norm": 1.2109375, "learning_rate": 1.5435409534490762e-05, "loss": 0.5509, "step": 7001 }, { "epoch": 0.9606915003087055, "grad_norm": 1.1875, "learning_rate": 1.5434197844728452e-05, "loss": 0.5064, "step": 7002 }, { "epoch": 0.9608287027509089, "grad_norm": 1.1796875, "learning_rate": 1.5432986041737123e-05, "loss": 0.5175, "step": 7003 }, { "epoch": 0.9609659051931124, "grad_norm": 1.1953125, "learning_rate": 1.5431774125542022e-05, "loss": 0.5583, "step": 7004 }, { "epoch": 0.9611031076353159, "grad_norm": 1.1640625, "learning_rate": 1.5430562096168394e-05, "loss": 0.5349, "step": 7005 }, { "epoch": 0.9612403100775194, "grad_norm": 1.15625, "learning_rate": 1.5429349953641506e-05, "loss": 0.4904, "step": 7006 }, { "epoch": 0.9613775125197228, "grad_norm": 1.1640625, "learning_rate": 1.5428137697986607e-05, "loss": 0.506, "step": 7007 }, { "epoch": 0.9615147149619263, "grad_norm": 1.03125, "learning_rate": 1.542692532922896e-05, "loss": 0.395, "step": 7008 }, { "epoch": 0.9616519174041298, "grad_norm": 1.1953125, "learning_rate": 1.542571284739382e-05, "loss": 0.4968, "step": 7009 }, { "epoch": 0.9617891198463333, "grad_norm": 1.140625, "learning_rate": 1.542450025250646e-05, "loss": 0.453, "step": 7010 }, { "epoch": 0.9619263222885367, "grad_norm": 1.2109375, "learning_rate": 1.5423287544592135e-05, "loss": 0.4949, "step": 7011 }, { "epoch": 0.9620635247307402, "grad_norm": 1.1796875, "learning_rate": 1.5422074723676122e-05, "loss": 0.511, "step": 7012 }, { "epoch": 0.9622007271729437, "grad_norm": 1.2421875, "learning_rate": 1.5420861789783684e-05, "loss": 0.5512, "step": 7013 }, { "epoch": 0.9623379296151472, "grad_norm": 1.1328125, "learning_rate": 1.5419648742940108e-05, "loss": 0.457, "step": 7014 }, { "epoch": 0.9624751320573506, "grad_norm": 1.203125, "learning_rate": 1.5418435583170655e-05, "loss": 0.4688, "step": 7015 }, { "epoch": 0.9626123344995541, "grad_norm": 1.171875, "learning_rate": 1.541722231050061e-05, "loss": 0.5114, "step": 7016 }, { "epoch": 0.9627495369417576, "grad_norm": 1.1953125, "learning_rate": 1.541600892495525e-05, "loss": 0.5164, "step": 7017 }, { "epoch": 0.9628867393839611, "grad_norm": 1.2578125, "learning_rate": 1.5414795426559863e-05, "loss": 0.5059, "step": 7018 }, { "epoch": 0.9630239418261645, "grad_norm": 1.234375, "learning_rate": 1.5413581815339728e-05, "loss": 0.5527, "step": 7019 }, { "epoch": 0.963161144268368, "grad_norm": 1.28125, "learning_rate": 1.5412368091320134e-05, "loss": 0.565, "step": 7020 }, { "epoch": 0.9632983467105715, "grad_norm": 1.265625, "learning_rate": 1.5411154254526374e-05, "loss": 0.5573, "step": 7021 }, { "epoch": 0.963435549152775, "grad_norm": 1.1953125, "learning_rate": 1.5409940304983732e-05, "loss": 0.5408, "step": 7022 }, { "epoch": 0.9635727515949783, "grad_norm": 1.2890625, "learning_rate": 1.540872624271751e-05, "loss": 0.5787, "step": 7023 }, { "epoch": 0.9637099540371818, "grad_norm": 1.171875, "learning_rate": 1.5407512067753004e-05, "loss": 0.4541, "step": 7024 }, { "epoch": 0.9638471564793853, "grad_norm": 1.1875, "learning_rate": 1.540629778011551e-05, "loss": 0.5585, "step": 7025 }, { "epoch": 0.9639843589215888, "grad_norm": 1.25, "learning_rate": 1.540508337983033e-05, "loss": 0.5641, "step": 7026 }, { "epoch": 0.9641215613637922, "grad_norm": 1.0703125, "learning_rate": 1.5403868866922764e-05, "loss": 0.372, "step": 7027 }, { "epoch": 0.9642587638059957, "grad_norm": 1.078125, "learning_rate": 1.5402654241418127e-05, "loss": 0.465, "step": 7028 }, { "epoch": 0.9643959662481992, "grad_norm": 1.25, "learning_rate": 1.540143950334172e-05, "loss": 0.5345, "step": 7029 }, { "epoch": 0.9645331686904027, "grad_norm": 1.15625, "learning_rate": 1.540022465271886e-05, "loss": 0.4945, "step": 7030 }, { "epoch": 0.9646703711326061, "grad_norm": 1.1328125, "learning_rate": 1.5399009689574853e-05, "loss": 0.4499, "step": 7031 }, { "epoch": 0.9648075735748096, "grad_norm": 1.109375, "learning_rate": 1.539779461393502e-05, "loss": 0.4521, "step": 7032 }, { "epoch": 0.9649447760170131, "grad_norm": 1.140625, "learning_rate": 1.5396579425824676e-05, "loss": 0.497, "step": 7033 }, { "epoch": 0.9650819784592166, "grad_norm": 1.0859375, "learning_rate": 1.5395364125269142e-05, "loss": 0.4549, "step": 7034 }, { "epoch": 0.96521918090142, "grad_norm": 1.28125, "learning_rate": 1.539414871229374e-05, "loss": 0.581, "step": 7035 }, { "epoch": 0.9653563833436235, "grad_norm": 1.09375, "learning_rate": 1.5392933186923797e-05, "loss": 0.4449, "step": 7036 }, { "epoch": 0.965493585785827, "grad_norm": 1.203125, "learning_rate": 1.5391717549184633e-05, "loss": 0.5242, "step": 7037 }, { "epoch": 0.9656307882280305, "grad_norm": 1.109375, "learning_rate": 1.539050179910159e-05, "loss": 0.4426, "step": 7038 }, { "epoch": 0.9657679906702339, "grad_norm": 1.2109375, "learning_rate": 1.5389285936699985e-05, "loss": 0.5035, "step": 7039 }, { "epoch": 0.9659051931124374, "grad_norm": 1.203125, "learning_rate": 1.5388069962005165e-05, "loss": 0.4929, "step": 7040 }, { "epoch": 0.9660423955546409, "grad_norm": 1.140625, "learning_rate": 1.5386853875042458e-05, "loss": 0.4653, "step": 7041 }, { "epoch": 0.9661795979968444, "grad_norm": 1.1953125, "learning_rate": 1.5385637675837206e-05, "loss": 0.4954, "step": 7042 }, { "epoch": 0.9663168004390478, "grad_norm": 1.1015625, "learning_rate": 1.5384421364414752e-05, "loss": 0.4516, "step": 7043 }, { "epoch": 0.9664540028812513, "grad_norm": 1.1875, "learning_rate": 1.538320494080044e-05, "loss": 0.4978, "step": 7044 }, { "epoch": 0.9665912053234548, "grad_norm": 1.21875, "learning_rate": 1.538198840501961e-05, "loss": 0.4848, "step": 7045 }, { "epoch": 0.9667284077656583, "grad_norm": 1.265625, "learning_rate": 1.5380771757097616e-05, "loss": 0.5573, "step": 7046 }, { "epoch": 0.9668656102078617, "grad_norm": 1.078125, "learning_rate": 1.5379554997059804e-05, "loss": 0.4552, "step": 7047 }, { "epoch": 0.9670028126500652, "grad_norm": 1.1640625, "learning_rate": 1.537833812493153e-05, "loss": 0.504, "step": 7048 }, { "epoch": 0.9671400150922687, "grad_norm": 1.21875, "learning_rate": 1.537712114073815e-05, "loss": 0.5404, "step": 7049 }, { "epoch": 0.9672772175344722, "grad_norm": 1.234375, "learning_rate": 1.537590404450502e-05, "loss": 0.5173, "step": 7050 }, { "epoch": 0.9674144199766755, "grad_norm": 1.1015625, "learning_rate": 1.5374686836257498e-05, "loss": 0.4226, "step": 7051 }, { "epoch": 0.967551622418879, "grad_norm": 1.1875, "learning_rate": 1.5373469516020948e-05, "loss": 0.5904, "step": 7052 }, { "epoch": 0.9676888248610825, "grad_norm": 1.234375, "learning_rate": 1.5372252083820738e-05, "loss": 0.53, "step": 7053 }, { "epoch": 0.967826027303286, "grad_norm": 1.25, "learning_rate": 1.5371034539682227e-05, "loss": 0.4902, "step": 7054 }, { "epoch": 0.9679632297454894, "grad_norm": 1.1875, "learning_rate": 1.536981688363079e-05, "loss": 0.491, "step": 7055 }, { "epoch": 0.9681004321876929, "grad_norm": 1.1015625, "learning_rate": 1.5368599115691803e-05, "loss": 0.4645, "step": 7056 }, { "epoch": 0.9682376346298964, "grad_norm": 1.203125, "learning_rate": 1.5367381235890628e-05, "loss": 0.5185, "step": 7057 }, { "epoch": 0.9683748370720999, "grad_norm": 1.140625, "learning_rate": 1.536616324425265e-05, "loss": 0.4688, "step": 7058 }, { "epoch": 0.9685120395143033, "grad_norm": 1.21875, "learning_rate": 1.5364945140803245e-05, "loss": 0.4679, "step": 7059 }, { "epoch": 0.9686492419565068, "grad_norm": 1.1484375, "learning_rate": 1.5363726925567793e-05, "loss": 0.4944, "step": 7060 }, { "epoch": 0.9687864443987103, "grad_norm": 1.2109375, "learning_rate": 1.5362508598571678e-05, "loss": 0.485, "step": 7061 }, { "epoch": 0.9689236468409138, "grad_norm": 1.109375, "learning_rate": 1.536129015984028e-05, "loss": 0.4512, "step": 7062 }, { "epoch": 0.9690608492831172, "grad_norm": 1.1953125, "learning_rate": 1.5360071609399002e-05, "loss": 0.4493, "step": 7063 }, { "epoch": 0.9691980517253207, "grad_norm": 1.171875, "learning_rate": 1.5358852947273223e-05, "loss": 0.4644, "step": 7064 }, { "epoch": 0.9693352541675242, "grad_norm": 1.1953125, "learning_rate": 1.5357634173488333e-05, "loss": 0.4915, "step": 7065 }, { "epoch": 0.9694724566097277, "grad_norm": 1.2265625, "learning_rate": 1.5356415288069734e-05, "loss": 0.5359, "step": 7066 }, { "epoch": 0.9696096590519311, "grad_norm": 1.1640625, "learning_rate": 1.5355196291042815e-05, "loss": 0.4752, "step": 7067 }, { "epoch": 0.9697468614941346, "grad_norm": 1.1875, "learning_rate": 1.5353977182432983e-05, "loss": 0.487, "step": 7068 }, { "epoch": 0.9698840639363381, "grad_norm": 1.109375, "learning_rate": 1.535275796226564e-05, "loss": 0.4774, "step": 7069 }, { "epoch": 0.9700212663785416, "grad_norm": 1.25, "learning_rate": 1.5351538630566187e-05, "loss": 0.6032, "step": 7070 }, { "epoch": 0.970158468820745, "grad_norm": 1.28125, "learning_rate": 1.535031918736003e-05, "loss": 0.5, "step": 7071 }, { "epoch": 0.9702956712629485, "grad_norm": 1.2109375, "learning_rate": 1.534909963267258e-05, "loss": 0.5026, "step": 7072 }, { "epoch": 0.970432873705152, "grad_norm": 1.140625, "learning_rate": 1.5347879966529247e-05, "loss": 0.5026, "step": 7073 }, { "epoch": 0.9705700761473555, "grad_norm": 1.3359375, "learning_rate": 1.5346660188955443e-05, "loss": 0.5601, "step": 7074 }, { "epoch": 0.9707072785895589, "grad_norm": 1.1796875, "learning_rate": 1.5345440299976586e-05, "loss": 0.4654, "step": 7075 }, { "epoch": 0.9708444810317624, "grad_norm": 1.140625, "learning_rate": 1.5344220299618094e-05, "loss": 0.4615, "step": 7076 }, { "epoch": 0.9709816834739659, "grad_norm": 1.21875, "learning_rate": 1.534300018790538e-05, "loss": 0.5044, "step": 7077 }, { "epoch": 0.9711188859161693, "grad_norm": 1.1640625, "learning_rate": 1.5341779964863882e-05, "loss": 0.4921, "step": 7078 }, { "epoch": 0.9712560883583727, "grad_norm": 1.140625, "learning_rate": 1.5340559630519008e-05, "loss": 0.4903, "step": 7079 }, { "epoch": 0.9713932908005762, "grad_norm": 1.1640625, "learning_rate": 1.5339339184896204e-05, "loss": 0.5026, "step": 7080 }, { "epoch": 0.9715304932427797, "grad_norm": 1.359375, "learning_rate": 1.533811862802088e-05, "loss": 0.5704, "step": 7081 }, { "epoch": 0.9716676956849832, "grad_norm": 1.2734375, "learning_rate": 1.533689795991848e-05, "loss": 0.51, "step": 7082 }, { "epoch": 0.9718048981271866, "grad_norm": 1.2421875, "learning_rate": 1.5335677180614438e-05, "loss": 0.5257, "step": 7083 }, { "epoch": 0.9719421005693901, "grad_norm": 1.2265625, "learning_rate": 1.533445629013419e-05, "loss": 0.5634, "step": 7084 }, { "epoch": 0.9720793030115936, "grad_norm": 1.1015625, "learning_rate": 1.533323528850317e-05, "loss": 0.4707, "step": 7085 }, { "epoch": 0.9722165054537971, "grad_norm": 1.09375, "learning_rate": 1.5332014175746817e-05, "loss": 0.4858, "step": 7086 }, { "epoch": 0.9723537078960005, "grad_norm": 1.125, "learning_rate": 1.5330792951890585e-05, "loss": 0.5007, "step": 7087 }, { "epoch": 0.972490910338204, "grad_norm": 1.1953125, "learning_rate": 1.5329571616959914e-05, "loss": 0.5405, "step": 7088 }, { "epoch": 0.9726281127804075, "grad_norm": 1.1328125, "learning_rate": 1.5328350170980255e-05, "loss": 0.4867, "step": 7089 }, { "epoch": 0.972765315222611, "grad_norm": 1.203125, "learning_rate": 1.5327128613977057e-05, "loss": 0.5162, "step": 7090 }, { "epoch": 0.9729025176648144, "grad_norm": 1.21875, "learning_rate": 1.532590694597577e-05, "loss": 0.5172, "step": 7091 }, { "epoch": 0.9730397201070179, "grad_norm": 1.1640625, "learning_rate": 1.5324685167001854e-05, "loss": 0.469, "step": 7092 }, { "epoch": 0.9731769225492214, "grad_norm": 1.25, "learning_rate": 1.5323463277080762e-05, "loss": 0.5939, "step": 7093 }, { "epoch": 0.9733141249914249, "grad_norm": 1.1171875, "learning_rate": 1.5322241276237955e-05, "loss": 0.4576, "step": 7094 }, { "epoch": 0.9734513274336283, "grad_norm": 1.171875, "learning_rate": 1.5321019164498897e-05, "loss": 0.5406, "step": 7095 }, { "epoch": 0.9735885298758318, "grad_norm": 1.1953125, "learning_rate": 1.5319796941889054e-05, "loss": 0.4841, "step": 7096 }, { "epoch": 0.9737257323180353, "grad_norm": 1.2109375, "learning_rate": 1.5318574608433885e-05, "loss": 0.4753, "step": 7097 }, { "epoch": 0.9738629347602388, "grad_norm": 1.2109375, "learning_rate": 1.5317352164158867e-05, "loss": 0.4854, "step": 7098 }, { "epoch": 0.9740001372024422, "grad_norm": 1.2734375, "learning_rate": 1.5316129609089466e-05, "loss": 0.5195, "step": 7099 }, { "epoch": 0.9741373396446457, "grad_norm": 1.3046875, "learning_rate": 1.531490694325116e-05, "loss": 0.5694, "step": 7100 }, { "epoch": 0.9742745420868492, "grad_norm": 1.2109375, "learning_rate": 1.5313684166669425e-05, "loss": 0.5361, "step": 7101 }, { "epoch": 0.9744117445290527, "grad_norm": 1.34375, "learning_rate": 1.5312461279369734e-05, "loss": 0.5841, "step": 7102 }, { "epoch": 0.974548946971256, "grad_norm": 1.0546875, "learning_rate": 1.531123828137757e-05, "loss": 0.4341, "step": 7103 }, { "epoch": 0.9746861494134595, "grad_norm": 1.1796875, "learning_rate": 1.531001517271842e-05, "loss": 0.5105, "step": 7104 }, { "epoch": 0.974823351855663, "grad_norm": 1.265625, "learning_rate": 1.5308791953417758e-05, "loss": 0.5203, "step": 7105 }, { "epoch": 0.9749605542978665, "grad_norm": 1.1796875, "learning_rate": 1.5307568623501086e-05, "loss": 0.4873, "step": 7106 }, { "epoch": 0.9750977567400699, "grad_norm": 1.1796875, "learning_rate": 1.5306345182993885e-05, "loss": 0.4755, "step": 7107 }, { "epoch": 0.9752349591822734, "grad_norm": 1.1484375, "learning_rate": 1.530512163192165e-05, "loss": 0.4783, "step": 7108 }, { "epoch": 0.9753721616244769, "grad_norm": 1.15625, "learning_rate": 1.5303897970309874e-05, "loss": 0.5253, "step": 7109 }, { "epoch": 0.9755093640666804, "grad_norm": 1.171875, "learning_rate": 1.5302674198184053e-05, "loss": 0.5028, "step": 7110 }, { "epoch": 0.9756465665088838, "grad_norm": 1.109375, "learning_rate": 1.530145031556969e-05, "loss": 0.4616, "step": 7111 }, { "epoch": 0.9757837689510873, "grad_norm": 1.1875, "learning_rate": 1.5300226322492274e-05, "loss": 0.5631, "step": 7112 }, { "epoch": 0.9759209713932908, "grad_norm": 1.2734375, "learning_rate": 1.529900221897733e-05, "loss": 0.5071, "step": 7113 }, { "epoch": 0.9760581738354943, "grad_norm": 1.1640625, "learning_rate": 1.529777800505034e-05, "loss": 0.5006, "step": 7114 }, { "epoch": 0.9761953762776977, "grad_norm": 1.1875, "learning_rate": 1.5296553680736832e-05, "loss": 0.5423, "step": 7115 }, { "epoch": 0.9763325787199012, "grad_norm": 1.125, "learning_rate": 1.5295329246062304e-05, "loss": 0.446, "step": 7116 }, { "epoch": 0.9764697811621047, "grad_norm": 1.1640625, "learning_rate": 1.5294104701052278e-05, "loss": 0.5203, "step": 7117 }, { "epoch": 0.9766069836043082, "grad_norm": 1.1328125, "learning_rate": 1.5292880045732263e-05, "loss": 0.5176, "step": 7118 }, { "epoch": 0.9767441860465116, "grad_norm": 1.203125, "learning_rate": 1.5291655280127777e-05, "loss": 0.5281, "step": 7119 }, { "epoch": 0.9768813884887151, "grad_norm": 1.2421875, "learning_rate": 1.529043040426434e-05, "loss": 0.5507, "step": 7120 }, { "epoch": 0.9770185909309186, "grad_norm": 1.078125, "learning_rate": 1.5289205418167474e-05, "loss": 0.3901, "step": 7121 }, { "epoch": 0.9771557933731221, "grad_norm": 1.1953125, "learning_rate": 1.5287980321862703e-05, "loss": 0.5219, "step": 7122 }, { "epoch": 0.9772929958153255, "grad_norm": 1.15625, "learning_rate": 1.5286755115375555e-05, "loss": 0.5041, "step": 7123 }, { "epoch": 0.977430198257529, "grad_norm": 1.1640625, "learning_rate": 1.5285529798731558e-05, "loss": 0.4577, "step": 7124 }, { "epoch": 0.9775674006997325, "grad_norm": 1.21875, "learning_rate": 1.5284304371956243e-05, "loss": 0.5135, "step": 7125 }, { "epoch": 0.977704603141936, "grad_norm": 1.1015625, "learning_rate": 1.5283078835075142e-05, "loss": 0.466, "step": 7126 }, { "epoch": 0.9778418055841394, "grad_norm": 1.1640625, "learning_rate": 1.5281853188113798e-05, "loss": 0.5054, "step": 7127 }, { "epoch": 0.9779790080263429, "grad_norm": 1.1796875, "learning_rate": 1.5280627431097738e-05, "loss": 0.4607, "step": 7128 }, { "epoch": 0.9781162104685464, "grad_norm": 1.140625, "learning_rate": 1.5279401564052512e-05, "loss": 0.4783, "step": 7129 }, { "epoch": 0.9782534129107499, "grad_norm": 1.3203125, "learning_rate": 1.5278175587003655e-05, "loss": 0.5998, "step": 7130 }, { "epoch": 0.9783906153529532, "grad_norm": 1.1640625, "learning_rate": 1.5276949499976715e-05, "loss": 0.4989, "step": 7131 }, { "epoch": 0.9785278177951567, "grad_norm": 1.21875, "learning_rate": 1.5275723302997244e-05, "loss": 0.515, "step": 7132 }, { "epoch": 0.9786650202373602, "grad_norm": 1.15625, "learning_rate": 1.5274496996090783e-05, "loss": 0.4958, "step": 7133 }, { "epoch": 0.9788022226795637, "grad_norm": 1.171875, "learning_rate": 1.527327057928289e-05, "loss": 0.4967, "step": 7134 }, { "epoch": 0.9789394251217671, "grad_norm": 1.140625, "learning_rate": 1.5272044052599115e-05, "loss": 0.4737, "step": 7135 }, { "epoch": 0.9790766275639706, "grad_norm": 1.1953125, "learning_rate": 1.527081741606502e-05, "loss": 0.504, "step": 7136 }, { "epoch": 0.9792138300061741, "grad_norm": 1.1328125, "learning_rate": 1.526959066970616e-05, "loss": 0.4831, "step": 7137 }, { "epoch": 0.9793510324483776, "grad_norm": 1.1171875, "learning_rate": 1.526836381354809e-05, "loss": 0.4653, "step": 7138 }, { "epoch": 0.979488234890581, "grad_norm": 1.140625, "learning_rate": 1.5267136847616384e-05, "loss": 0.4708, "step": 7139 }, { "epoch": 0.9796254373327845, "grad_norm": 1.21875, "learning_rate": 1.52659097719366e-05, "loss": 0.4944, "step": 7140 }, { "epoch": 0.979762639774988, "grad_norm": 1.1015625, "learning_rate": 1.526468258653431e-05, "loss": 0.446, "step": 7141 }, { "epoch": 0.9798998422171915, "grad_norm": 1.1640625, "learning_rate": 1.5263455291435083e-05, "loss": 0.5077, "step": 7142 }, { "epoch": 0.9800370446593949, "grad_norm": 1.171875, "learning_rate": 1.5262227886664492e-05, "loss": 0.4936, "step": 7143 }, { "epoch": 0.9801742471015984, "grad_norm": 1.2734375, "learning_rate": 1.526100037224811e-05, "loss": 0.5465, "step": 7144 }, { "epoch": 0.9803114495438019, "grad_norm": 1.1875, "learning_rate": 1.5259772748211513e-05, "loss": 0.479, "step": 7145 }, { "epoch": 0.9804486519860054, "grad_norm": 1.15625, "learning_rate": 1.5258545014580286e-05, "loss": 0.4843, "step": 7146 }, { "epoch": 0.9805858544282088, "grad_norm": 1.09375, "learning_rate": 1.5257317171380004e-05, "loss": 0.4365, "step": 7147 }, { "epoch": 0.9807230568704123, "grad_norm": 1.2265625, "learning_rate": 1.5256089218636252e-05, "loss": 0.4945, "step": 7148 }, { "epoch": 0.9808602593126158, "grad_norm": 1.1328125, "learning_rate": 1.5254861156374618e-05, "loss": 0.4722, "step": 7149 }, { "epoch": 0.9809974617548193, "grad_norm": 1.125, "learning_rate": 1.5253632984620692e-05, "loss": 0.4713, "step": 7150 }, { "epoch": 0.9811346641970227, "grad_norm": 1.2265625, "learning_rate": 1.5252404703400062e-05, "loss": 0.5397, "step": 7151 }, { "epoch": 0.9812718666392262, "grad_norm": 1.2421875, "learning_rate": 1.5251176312738318e-05, "loss": 0.4744, "step": 7152 }, { "epoch": 0.9814090690814297, "grad_norm": 1.2109375, "learning_rate": 1.5249947812661061e-05, "loss": 0.5338, "step": 7153 }, { "epoch": 0.9815462715236332, "grad_norm": 1.203125, "learning_rate": 1.5248719203193889e-05, "loss": 0.517, "step": 7154 }, { "epoch": 0.9816834739658366, "grad_norm": 1.1796875, "learning_rate": 1.5247490484362394e-05, "loss": 0.5566, "step": 7155 }, { "epoch": 0.98182067640804, "grad_norm": 1.2109375, "learning_rate": 1.5246261656192186e-05, "loss": 0.5313, "step": 7156 }, { "epoch": 0.9819578788502435, "grad_norm": 1.1015625, "learning_rate": 1.5245032718708865e-05, "loss": 0.4105, "step": 7157 }, { "epoch": 0.982095081292447, "grad_norm": 1.1015625, "learning_rate": 1.5243803671938038e-05, "loss": 0.4143, "step": 7158 }, { "epoch": 0.9822322837346504, "grad_norm": 1.1328125, "learning_rate": 1.5242574515905314e-05, "loss": 0.4782, "step": 7159 }, { "epoch": 0.9823694861768539, "grad_norm": 1.28125, "learning_rate": 1.5241345250636306e-05, "loss": 0.5178, "step": 7160 }, { "epoch": 0.9825066886190574, "grad_norm": 1.1640625, "learning_rate": 1.5240115876156628e-05, "loss": 0.5205, "step": 7161 }, { "epoch": 0.9826438910612609, "grad_norm": 1.3046875, "learning_rate": 1.5238886392491892e-05, "loss": 0.5185, "step": 7162 }, { "epoch": 0.9827810935034643, "grad_norm": 1.109375, "learning_rate": 1.5237656799667721e-05, "loss": 0.4853, "step": 7163 }, { "epoch": 0.9829182959456678, "grad_norm": 1.15625, "learning_rate": 1.5236427097709729e-05, "loss": 0.5234, "step": 7164 }, { "epoch": 0.9830554983878713, "grad_norm": 1.1015625, "learning_rate": 1.5235197286643543e-05, "loss": 0.4365, "step": 7165 }, { "epoch": 0.9831927008300748, "grad_norm": 1.234375, "learning_rate": 1.5233967366494788e-05, "loss": 0.5638, "step": 7166 }, { "epoch": 0.9833299032722782, "grad_norm": 1.171875, "learning_rate": 1.5232737337289087e-05, "loss": 0.4268, "step": 7167 }, { "epoch": 0.9834671057144817, "grad_norm": 1.1015625, "learning_rate": 1.5231507199052074e-05, "loss": 0.4748, "step": 7168 }, { "epoch": 0.9836043081566852, "grad_norm": 1.1328125, "learning_rate": 1.5230276951809378e-05, "loss": 0.5034, "step": 7169 }, { "epoch": 0.9837415105988887, "grad_norm": 1.125, "learning_rate": 1.5229046595586635e-05, "loss": 0.4782, "step": 7170 }, { "epoch": 0.9838787130410921, "grad_norm": 1.140625, "learning_rate": 1.5227816130409478e-05, "loss": 0.5154, "step": 7171 }, { "epoch": 0.9840159154832956, "grad_norm": 1.1875, "learning_rate": 1.522658555630355e-05, "loss": 0.5058, "step": 7172 }, { "epoch": 0.9841531179254991, "grad_norm": 1.1953125, "learning_rate": 1.5225354873294486e-05, "loss": 0.4922, "step": 7173 }, { "epoch": 0.9842903203677026, "grad_norm": 1.1953125, "learning_rate": 1.5224124081407935e-05, "loss": 0.4737, "step": 7174 }, { "epoch": 0.984427522809906, "grad_norm": 1.2109375, "learning_rate": 1.5222893180669536e-05, "loss": 0.514, "step": 7175 }, { "epoch": 0.9845647252521095, "grad_norm": 1.2421875, "learning_rate": 1.5221662171104941e-05, "loss": 0.5604, "step": 7176 }, { "epoch": 0.984701927694313, "grad_norm": 1.28125, "learning_rate": 1.5220431052739799e-05, "loss": 0.5217, "step": 7177 }, { "epoch": 0.9848391301365165, "grad_norm": 1.234375, "learning_rate": 1.521919982559976e-05, "loss": 0.5785, "step": 7178 }, { "epoch": 0.9849763325787199, "grad_norm": 1.25, "learning_rate": 1.521796848971048e-05, "loss": 0.4789, "step": 7179 }, { "epoch": 0.9851135350209234, "grad_norm": 1.1484375, "learning_rate": 1.5216737045097619e-05, "loss": 0.4466, "step": 7180 }, { "epoch": 0.9852507374631269, "grad_norm": 1.140625, "learning_rate": 1.521550549178683e-05, "loss": 0.4362, "step": 7181 }, { "epoch": 0.9853879399053304, "grad_norm": 1.25, "learning_rate": 1.5214273829803774e-05, "loss": 0.4761, "step": 7182 }, { "epoch": 0.9855251423475337, "grad_norm": 1.3515625, "learning_rate": 1.521304205917412e-05, "loss": 0.5378, "step": 7183 }, { "epoch": 0.9856623447897372, "grad_norm": 1.2265625, "learning_rate": 1.521181017992353e-05, "loss": 0.5476, "step": 7184 }, { "epoch": 0.9857995472319407, "grad_norm": 1.1640625, "learning_rate": 1.521057819207767e-05, "loss": 0.5338, "step": 7185 }, { "epoch": 0.9859367496741442, "grad_norm": 1.234375, "learning_rate": 1.5209346095662215e-05, "loss": 0.5249, "step": 7186 }, { "epoch": 0.9860739521163476, "grad_norm": 1.2109375, "learning_rate": 1.5208113890702834e-05, "loss": 0.5109, "step": 7187 }, { "epoch": 0.9862111545585511, "grad_norm": 1.2578125, "learning_rate": 1.5206881577225203e-05, "loss": 0.4937, "step": 7188 }, { "epoch": 0.9863483570007546, "grad_norm": 1.1015625, "learning_rate": 1.5205649155255e-05, "loss": 0.409, "step": 7189 }, { "epoch": 0.9864855594429581, "grad_norm": 1.1953125, "learning_rate": 1.5204416624817899e-05, "loss": 0.5006, "step": 7190 }, { "epoch": 0.9866227618851615, "grad_norm": 1.1875, "learning_rate": 1.5203183985939592e-05, "loss": 0.5111, "step": 7191 }, { "epoch": 0.986759964327365, "grad_norm": 1.2109375, "learning_rate": 1.520195123864575e-05, "loss": 0.5277, "step": 7192 }, { "epoch": 0.9868971667695685, "grad_norm": 1.078125, "learning_rate": 1.5200718382962067e-05, "loss": 0.4119, "step": 7193 }, { "epoch": 0.987034369211772, "grad_norm": 1.171875, "learning_rate": 1.5199485418914228e-05, "loss": 0.4841, "step": 7194 }, { "epoch": 0.9871715716539754, "grad_norm": 1.1640625, "learning_rate": 1.5198252346527926e-05, "loss": 0.5243, "step": 7195 }, { "epoch": 0.9873087740961789, "grad_norm": 1.2109375, "learning_rate": 1.5197019165828852e-05, "loss": 0.501, "step": 7196 }, { "epoch": 0.9874459765383824, "grad_norm": 1.1484375, "learning_rate": 1.5195785876842703e-05, "loss": 0.4521, "step": 7197 }, { "epoch": 0.9875831789805859, "grad_norm": 1.2890625, "learning_rate": 1.5194552479595173e-05, "loss": 0.4753, "step": 7198 }, { "epoch": 0.9877203814227893, "grad_norm": 1.1640625, "learning_rate": 1.5193318974111965e-05, "loss": 0.535, "step": 7199 }, { "epoch": 0.9878575838649928, "grad_norm": 1.28125, "learning_rate": 1.519208536041878e-05, "loss": 0.5333, "step": 7200 }, { "epoch": 0.9879947863071963, "grad_norm": 1.2890625, "learning_rate": 1.5190851638541318e-05, "loss": 0.4981, "step": 7201 }, { "epoch": 0.9881319887493998, "grad_norm": 1.1796875, "learning_rate": 1.518961780850529e-05, "loss": 0.5507, "step": 7202 }, { "epoch": 0.9882691911916032, "grad_norm": 1.140625, "learning_rate": 1.5188383870336404e-05, "loss": 0.5105, "step": 7203 }, { "epoch": 0.9884063936338067, "grad_norm": 1.2109375, "learning_rate": 1.5187149824060368e-05, "loss": 0.4984, "step": 7204 }, { "epoch": 0.9885435960760102, "grad_norm": 1.109375, "learning_rate": 1.5185915669702897e-05, "loss": 0.4309, "step": 7205 }, { "epoch": 0.9886807985182137, "grad_norm": 1.203125, "learning_rate": 1.5184681407289708e-05, "loss": 0.5609, "step": 7206 }, { "epoch": 0.988818000960417, "grad_norm": 1.1953125, "learning_rate": 1.5183447036846516e-05, "loss": 0.5324, "step": 7207 }, { "epoch": 0.9889552034026206, "grad_norm": 1.1640625, "learning_rate": 1.5182212558399045e-05, "loss": 0.4603, "step": 7208 }, { "epoch": 0.989092405844824, "grad_norm": 1.171875, "learning_rate": 1.5180977971973007e-05, "loss": 0.5621, "step": 7209 }, { "epoch": 0.9892296082870276, "grad_norm": 1.171875, "learning_rate": 1.517974327759414e-05, "loss": 0.534, "step": 7210 }, { "epoch": 0.9893668107292309, "grad_norm": 1.2734375, "learning_rate": 1.5178508475288158e-05, "loss": 0.5628, "step": 7211 }, { "epoch": 0.9895040131714344, "grad_norm": 1.234375, "learning_rate": 1.51772735650808e-05, "loss": 0.555, "step": 7212 }, { "epoch": 0.9896412156136379, "grad_norm": 1.2265625, "learning_rate": 1.517603854699779e-05, "loss": 0.5542, "step": 7213 }, { "epoch": 0.9897784180558414, "grad_norm": 1.203125, "learning_rate": 1.5174803421064861e-05, "loss": 0.5431, "step": 7214 }, { "epoch": 0.9899156204980448, "grad_norm": 1.296875, "learning_rate": 1.5173568187307754e-05, "loss": 0.48, "step": 7215 }, { "epoch": 0.9900528229402483, "grad_norm": 1.2890625, "learning_rate": 1.5172332845752204e-05, "loss": 0.5944, "step": 7216 }, { "epoch": 0.9901900253824518, "grad_norm": 1.3046875, "learning_rate": 1.5171097396423953e-05, "loss": 0.5575, "step": 7217 }, { "epoch": 0.9903272278246553, "grad_norm": 1.2578125, "learning_rate": 1.5169861839348739e-05, "loss": 0.5583, "step": 7218 }, { "epoch": 0.9904644302668587, "grad_norm": 1.203125, "learning_rate": 1.516862617455231e-05, "loss": 0.5009, "step": 7219 }, { "epoch": 0.9906016327090622, "grad_norm": 1.2109375, "learning_rate": 1.5167390402060412e-05, "loss": 0.5313, "step": 7220 }, { "epoch": 0.9907388351512657, "grad_norm": 1.1796875, "learning_rate": 1.5166154521898793e-05, "loss": 0.5208, "step": 7221 }, { "epoch": 0.9908760375934692, "grad_norm": 1.203125, "learning_rate": 1.5164918534093205e-05, "loss": 0.4941, "step": 7222 }, { "epoch": 0.9910132400356726, "grad_norm": 1.25, "learning_rate": 1.5163682438669403e-05, "loss": 0.5337, "step": 7223 }, { "epoch": 0.9911504424778761, "grad_norm": 1.1015625, "learning_rate": 1.5162446235653141e-05, "loss": 0.487, "step": 7224 }, { "epoch": 0.9912876449200796, "grad_norm": 1.15625, "learning_rate": 1.5161209925070177e-05, "loss": 0.5077, "step": 7225 }, { "epoch": 0.9914248473622831, "grad_norm": 1.2265625, "learning_rate": 1.5159973506946273e-05, "loss": 0.5424, "step": 7226 }, { "epoch": 0.9915620498044865, "grad_norm": 1.2109375, "learning_rate": 1.5158736981307188e-05, "loss": 0.5145, "step": 7227 }, { "epoch": 0.99169925224669, "grad_norm": 1.21875, "learning_rate": 1.5157500348178689e-05, "loss": 0.4873, "step": 7228 }, { "epoch": 0.9918364546888935, "grad_norm": 1.2890625, "learning_rate": 1.5156263607586543e-05, "loss": 0.5844, "step": 7229 }, { "epoch": 0.991973657131097, "grad_norm": 1.109375, "learning_rate": 1.5155026759556517e-05, "loss": 0.5034, "step": 7230 }, { "epoch": 0.9921108595733004, "grad_norm": 1.1484375, "learning_rate": 1.5153789804114388e-05, "loss": 0.4799, "step": 7231 }, { "epoch": 0.9922480620155039, "grad_norm": 1.28125, "learning_rate": 1.5152552741285924e-05, "loss": 0.5533, "step": 7232 }, { "epoch": 0.9923852644577074, "grad_norm": 1.203125, "learning_rate": 1.5151315571096902e-05, "loss": 0.5016, "step": 7233 }, { "epoch": 0.9925224668999109, "grad_norm": 1.234375, "learning_rate": 1.5150078293573106e-05, "loss": 0.5462, "step": 7234 }, { "epoch": 0.9926596693421142, "grad_norm": 1.2265625, "learning_rate": 1.5148840908740308e-05, "loss": 0.5127, "step": 7235 }, { "epoch": 0.9927968717843177, "grad_norm": 1.1953125, "learning_rate": 1.5147603416624294e-05, "loss": 0.5322, "step": 7236 }, { "epoch": 0.9929340742265212, "grad_norm": 1.3125, "learning_rate": 1.514636581725085e-05, "loss": 0.5906, "step": 7237 }, { "epoch": 0.9930712766687247, "grad_norm": 1.1796875, "learning_rate": 1.5145128110645759e-05, "loss": 0.462, "step": 7238 }, { "epoch": 0.9932084791109281, "grad_norm": 1.109375, "learning_rate": 1.5143890296834816e-05, "loss": 0.4279, "step": 7239 }, { "epoch": 0.9933456815531316, "grad_norm": 1.1484375, "learning_rate": 1.5142652375843811e-05, "loss": 0.499, "step": 7240 }, { "epoch": 0.9934828839953351, "grad_norm": 1.1953125, "learning_rate": 1.5141414347698535e-05, "loss": 0.5158, "step": 7241 }, { "epoch": 0.9936200864375386, "grad_norm": 1.2421875, "learning_rate": 1.5140176212424786e-05, "loss": 0.5397, "step": 7242 }, { "epoch": 0.993757288879742, "grad_norm": 1.2578125, "learning_rate": 1.5138937970048363e-05, "loss": 0.5024, "step": 7243 }, { "epoch": 0.9938944913219455, "grad_norm": 1.265625, "learning_rate": 1.5137699620595068e-05, "loss": 0.5623, "step": 7244 }, { "epoch": 0.994031693764149, "grad_norm": 1.1875, "learning_rate": 1.5136461164090697e-05, "loss": 0.5202, "step": 7245 }, { "epoch": 0.9941688962063525, "grad_norm": 1.2578125, "learning_rate": 1.5135222600561061e-05, "loss": 0.5789, "step": 7246 }, { "epoch": 0.9943060986485559, "grad_norm": 1.203125, "learning_rate": 1.5133983930031964e-05, "loss": 0.5299, "step": 7247 }, { "epoch": 0.9944433010907594, "grad_norm": 1.078125, "learning_rate": 1.5132745152529217e-05, "loss": 0.3952, "step": 7248 }, { "epoch": 0.9945805035329629, "grad_norm": 1.15625, "learning_rate": 1.5131506268078635e-05, "loss": 0.4339, "step": 7249 }, { "epoch": 0.9947177059751664, "grad_norm": 1.15625, "learning_rate": 1.5130267276706026e-05, "loss": 0.4629, "step": 7250 }, { "epoch": 0.9948549084173698, "grad_norm": 1.0703125, "learning_rate": 1.5129028178437208e-05, "loss": 0.4336, "step": 7251 }, { "epoch": 0.9949921108595733, "grad_norm": 1.1875, "learning_rate": 1.5127788973297999e-05, "loss": 0.4884, "step": 7252 }, { "epoch": 0.9951293133017768, "grad_norm": 1.234375, "learning_rate": 1.5126549661314223e-05, "loss": 0.5102, "step": 7253 }, { "epoch": 0.9952665157439803, "grad_norm": 1.21875, "learning_rate": 1.5125310242511697e-05, "loss": 0.5455, "step": 7254 }, { "epoch": 0.9954037181861837, "grad_norm": 1.171875, "learning_rate": 1.512407071691625e-05, "loss": 0.4619, "step": 7255 }, { "epoch": 0.9955409206283872, "grad_norm": 1.3125, "learning_rate": 1.512283108455371e-05, "loss": 0.578, "step": 7256 }, { "epoch": 0.9956781230705907, "grad_norm": 1.09375, "learning_rate": 1.5121591345449904e-05, "loss": 0.452, "step": 7257 }, { "epoch": 0.9958153255127942, "grad_norm": 1.2421875, "learning_rate": 1.512035149963066e-05, "loss": 0.5618, "step": 7258 }, { "epoch": 0.9959525279549976, "grad_norm": 1.3125, "learning_rate": 1.5119111547121822e-05, "loss": 0.5792, "step": 7259 }, { "epoch": 0.9960897303972011, "grad_norm": 1.125, "learning_rate": 1.5117871487949218e-05, "loss": 0.4822, "step": 7260 }, { "epoch": 0.9962269328394046, "grad_norm": 1.2265625, "learning_rate": 1.511663132213869e-05, "loss": 0.5005, "step": 7261 }, { "epoch": 0.996364135281608, "grad_norm": 1.265625, "learning_rate": 1.511539104971608e-05, "loss": 0.5611, "step": 7262 }, { "epoch": 0.9965013377238114, "grad_norm": 1.203125, "learning_rate": 1.5114150670707223e-05, "loss": 0.5248, "step": 7263 }, { "epoch": 0.9966385401660149, "grad_norm": 1.25, "learning_rate": 1.5112910185137973e-05, "loss": 0.56, "step": 7264 }, { "epoch": 0.9967757426082184, "grad_norm": 1.1796875, "learning_rate": 1.511166959303417e-05, "loss": 0.536, "step": 7265 }, { "epoch": 0.9969129450504219, "grad_norm": 1.2109375, "learning_rate": 1.511042889442167e-05, "loss": 0.5318, "step": 7266 }, { "epoch": 0.9970501474926253, "grad_norm": 1.2265625, "learning_rate": 1.5109188089326319e-05, "loss": 0.5297, "step": 7267 }, { "epoch": 0.9971873499348288, "grad_norm": 1.171875, "learning_rate": 1.5107947177773974e-05, "loss": 0.5102, "step": 7268 }, { "epoch": 0.9973245523770323, "grad_norm": 1.171875, "learning_rate": 1.5106706159790492e-05, "loss": 0.5278, "step": 7269 }, { "epoch": 0.9974617548192358, "grad_norm": 1.1484375, "learning_rate": 1.5105465035401729e-05, "loss": 0.4887, "step": 7270 }, { "epoch": 0.9975989572614392, "grad_norm": 1.1484375, "learning_rate": 1.5104223804633549e-05, "loss": 0.4516, "step": 7271 }, { "epoch": 0.9977361597036427, "grad_norm": 1.1015625, "learning_rate": 1.5102982467511809e-05, "loss": 0.4839, "step": 7272 }, { "epoch": 0.9978733621458462, "grad_norm": 1.2421875, "learning_rate": 1.5101741024062377e-05, "loss": 0.5286, "step": 7273 }, { "epoch": 0.9980105645880497, "grad_norm": 1.2421875, "learning_rate": 1.5100499474311121e-05, "loss": 0.4775, "step": 7274 }, { "epoch": 0.9981477670302531, "grad_norm": 1.2109375, "learning_rate": 1.509925781828391e-05, "loss": 0.4428, "step": 7275 }, { "epoch": 0.9982849694724566, "grad_norm": 1.140625, "learning_rate": 1.5098016056006615e-05, "loss": 0.4671, "step": 7276 }, { "epoch": 0.9984221719146601, "grad_norm": 1.2421875, "learning_rate": 1.509677418750511e-05, "loss": 0.4792, "step": 7277 }, { "epoch": 0.9985593743568636, "grad_norm": 1.15625, "learning_rate": 1.5095532212805273e-05, "loss": 0.5243, "step": 7278 }, { "epoch": 0.998696576799067, "grad_norm": 1.1484375, "learning_rate": 1.5094290131932981e-05, "loss": 0.4698, "step": 7279 }, { "epoch": 0.9988337792412705, "grad_norm": 1.03125, "learning_rate": 1.5093047944914112e-05, "loss": 0.4022, "step": 7280 }, { "epoch": 0.998970981683474, "grad_norm": 1.15625, "learning_rate": 1.5091805651774548e-05, "loss": 0.54, "step": 7281 }, { "epoch": 0.9991081841256775, "grad_norm": 1.140625, "learning_rate": 1.5090563252540181e-05, "loss": 0.4592, "step": 7282 }, { "epoch": 0.9992453865678809, "grad_norm": 1.28125, "learning_rate": 1.5089320747236894e-05, "loss": 0.487, "step": 7283 }, { "epoch": 0.9993825890100844, "grad_norm": 1.3125, "learning_rate": 1.5088078135890572e-05, "loss": 0.5969, "step": 7284 }, { "epoch": 0.9995197914522879, "grad_norm": 1.125, "learning_rate": 1.5086835418527112e-05, "loss": 0.4543, "step": 7285 }, { "epoch": 0.9996569938944914, "grad_norm": 1.1875, "learning_rate": 1.5085592595172406e-05, "loss": 0.5009, "step": 7286 }, { "epoch": 0.9997941963366948, "grad_norm": 1.046875, "learning_rate": 1.5084349665852349e-05, "loss": 0.3979, "step": 7287 }, { "epoch": 0.9999313987788983, "grad_norm": 1.1171875, "learning_rate": 1.5083106630592845e-05, "loss": 0.4234, "step": 7288 }, { "epoch": 0.9999313987788983, "eval_loss": 1.6794263124465942, "eval_runtime": 118.9697, "eval_samples_per_second": 1.421, "eval_steps_per_second": 0.714, "step": 7288 }, { "epoch": 1.0, "grad_norm": 1.84375, "learning_rate": 1.5081863489419788e-05, "loss": 0.521, "step": 7289 }, { "epoch": 1.0001372024422035, "grad_norm": 1.1875, "learning_rate": 1.5080620242359078e-05, "loss": 0.422, "step": 7290 }, { "epoch": 1.000274404884407, "grad_norm": 1.2578125, "learning_rate": 1.5079376889436627e-05, "loss": 0.4901, "step": 7291 }, { "epoch": 1.0004116073266105, "grad_norm": 1.0703125, "learning_rate": 1.5078133430678341e-05, "loss": 0.3814, "step": 7292 }, { "epoch": 1.000548809768814, "grad_norm": 1.1796875, "learning_rate": 1.5076889866110124e-05, "loss": 0.4322, "step": 7293 }, { "epoch": 1.0006860122110173, "grad_norm": 1.203125, "learning_rate": 1.5075646195757894e-05, "loss": 0.5203, "step": 7294 }, { "epoch": 1.0008232146532208, "grad_norm": 1.1640625, "learning_rate": 1.5074402419647559e-05, "loss": 0.4622, "step": 7295 }, { "epoch": 1.0009604170954243, "grad_norm": 1.1796875, "learning_rate": 1.5073158537805037e-05, "loss": 0.4385, "step": 7296 }, { "epoch": 1.0010976195376278, "grad_norm": 1.3125, "learning_rate": 1.5071914550256245e-05, "loss": 0.5221, "step": 7297 }, { "epoch": 1.0012348219798313, "grad_norm": 1.2109375, "learning_rate": 1.5070670457027105e-05, "loss": 0.4749, "step": 7298 }, { "epoch": 1.0013720244220348, "grad_norm": 1.2890625, "learning_rate": 1.5069426258143538e-05, "loss": 0.5353, "step": 7299 }, { "epoch": 1.0015092268642383, "grad_norm": 1.0625, "learning_rate": 1.5068181953631472e-05, "loss": 0.3808, "step": 7300 }, { "epoch": 1.0016464293064415, "grad_norm": 1.1875, "learning_rate": 1.5066937543516827e-05, "loss": 0.3953, "step": 7301 }, { "epoch": 1.001783631748645, "grad_norm": 1.1796875, "learning_rate": 1.5065693027825537e-05, "loss": 0.3944, "step": 7302 }, { "epoch": 1.0019208341908485, "grad_norm": 1.0859375, "learning_rate": 1.5064448406583533e-05, "loss": 0.3519, "step": 7303 }, { "epoch": 1.002058036633052, "grad_norm": 1.203125, "learning_rate": 1.5063203679816748e-05, "loss": 0.4225, "step": 7304 }, { "epoch": 1.0021952390752555, "grad_norm": 1.265625, "learning_rate": 1.5061958847551117e-05, "loss": 0.4705, "step": 7305 }, { "epoch": 1.002332441517459, "grad_norm": 1.15625, "learning_rate": 1.5060713909812577e-05, "loss": 0.3817, "step": 7306 }, { "epoch": 1.0024696439596625, "grad_norm": 1.171875, "learning_rate": 1.5059468866627072e-05, "loss": 0.4737, "step": 7307 }, { "epoch": 1.002606846401866, "grad_norm": 1.203125, "learning_rate": 1.5058223718020536e-05, "loss": 0.4594, "step": 7308 }, { "epoch": 1.0027440488440695, "grad_norm": 1.2578125, "learning_rate": 1.5056978464018923e-05, "loss": 0.4956, "step": 7309 }, { "epoch": 1.0028812512862728, "grad_norm": 1.1953125, "learning_rate": 1.5055733104648172e-05, "loss": 0.5122, "step": 7310 }, { "epoch": 1.0030184537284763, "grad_norm": 1.1484375, "learning_rate": 1.5054487639934236e-05, "loss": 0.425, "step": 7311 }, { "epoch": 1.0031556561706798, "grad_norm": 1.1953125, "learning_rate": 1.5053242069903064e-05, "loss": 0.4749, "step": 7312 }, { "epoch": 1.0032928586128833, "grad_norm": 1.1875, "learning_rate": 1.5051996394580612e-05, "loss": 0.4577, "step": 7313 }, { "epoch": 1.0034300610550868, "grad_norm": 1.2578125, "learning_rate": 1.5050750613992833e-05, "loss": 0.4638, "step": 7314 }, { "epoch": 1.0035672634972903, "grad_norm": 1.2578125, "learning_rate": 1.5049504728165683e-05, "loss": 0.5266, "step": 7315 }, { "epoch": 1.0037044659394938, "grad_norm": 1.28125, "learning_rate": 1.5048258737125128e-05, "loss": 0.4057, "step": 7316 }, { "epoch": 1.003841668381697, "grad_norm": 1.203125, "learning_rate": 1.5047012640897123e-05, "loss": 0.4388, "step": 7317 }, { "epoch": 1.0039788708239006, "grad_norm": 1.21875, "learning_rate": 1.5045766439507634e-05, "loss": 0.4369, "step": 7318 }, { "epoch": 1.004116073266104, "grad_norm": 1.2734375, "learning_rate": 1.5044520132982627e-05, "loss": 0.3875, "step": 7319 }, { "epoch": 1.0042532757083076, "grad_norm": 1.2890625, "learning_rate": 1.5043273721348076e-05, "loss": 0.4294, "step": 7320 }, { "epoch": 1.004390478150511, "grad_norm": 1.2109375, "learning_rate": 1.5042027204629946e-05, "loss": 0.4528, "step": 7321 }, { "epoch": 1.0045276805927146, "grad_norm": 1.1796875, "learning_rate": 1.5040780582854208e-05, "loss": 0.4375, "step": 7322 }, { "epoch": 1.004664883034918, "grad_norm": 1.234375, "learning_rate": 1.5039533856046842e-05, "loss": 0.4654, "step": 7323 }, { "epoch": 1.0048020854771216, "grad_norm": 1.1171875, "learning_rate": 1.5038287024233827e-05, "loss": 0.419, "step": 7324 }, { "epoch": 1.004939287919325, "grad_norm": 1.21875, "learning_rate": 1.5037040087441135e-05, "loss": 0.439, "step": 7325 }, { "epoch": 1.0050764903615284, "grad_norm": 1.1484375, "learning_rate": 1.5035793045694753e-05, "loss": 0.4354, "step": 7326 }, { "epoch": 1.0052136928037319, "grad_norm": 1.15625, "learning_rate": 1.5034545899020665e-05, "loss": 0.4346, "step": 7327 }, { "epoch": 1.0053508952459353, "grad_norm": 1.2578125, "learning_rate": 1.5033298647444854e-05, "loss": 0.4963, "step": 7328 }, { "epoch": 1.0054880976881388, "grad_norm": 1.1796875, "learning_rate": 1.5032051290993308e-05, "loss": 0.4798, "step": 7329 }, { "epoch": 1.0056253001303423, "grad_norm": 1.25, "learning_rate": 1.5030803829692023e-05, "loss": 0.4703, "step": 7330 }, { "epoch": 1.0057625025725458, "grad_norm": 1.3671875, "learning_rate": 1.5029556263566987e-05, "loss": 0.4636, "step": 7331 }, { "epoch": 1.0058997050147493, "grad_norm": 1.21875, "learning_rate": 1.5028308592644191e-05, "loss": 0.4697, "step": 7332 }, { "epoch": 1.0060369074569526, "grad_norm": 1.2109375, "learning_rate": 1.5027060816949645e-05, "loss": 0.455, "step": 7333 }, { "epoch": 1.0061741098991561, "grad_norm": 1.109375, "learning_rate": 1.5025812936509334e-05, "loss": 0.3951, "step": 7334 }, { "epoch": 1.0063113123413596, "grad_norm": 1.2421875, "learning_rate": 1.5024564951349266e-05, "loss": 0.4701, "step": 7335 }, { "epoch": 1.0064485147835631, "grad_norm": 1.2109375, "learning_rate": 1.5023316861495443e-05, "loss": 0.4648, "step": 7336 }, { "epoch": 1.0065857172257666, "grad_norm": 1.2734375, "learning_rate": 1.5022068666973872e-05, "loss": 0.4863, "step": 7337 }, { "epoch": 1.0067229196679701, "grad_norm": 1.1875, "learning_rate": 1.5020820367810557e-05, "loss": 0.4459, "step": 7338 }, { "epoch": 1.0068601221101736, "grad_norm": 1.390625, "learning_rate": 1.5019571964031516e-05, "loss": 0.4632, "step": 7339 }, { "epoch": 1.0069973245523771, "grad_norm": 1.234375, "learning_rate": 1.5018323455662754e-05, "loss": 0.4866, "step": 7340 }, { "epoch": 1.0071345269945806, "grad_norm": 1.25, "learning_rate": 1.5017074842730285e-05, "loss": 0.4649, "step": 7341 }, { "epoch": 1.007271729436784, "grad_norm": 1.234375, "learning_rate": 1.5015826125260136e-05, "loss": 0.4419, "step": 7342 }, { "epoch": 1.0074089318789874, "grad_norm": 1.296875, "learning_rate": 1.5014577303278311e-05, "loss": 0.4691, "step": 7343 }, { "epoch": 1.007546134321191, "grad_norm": 1.21875, "learning_rate": 1.5013328376810839e-05, "loss": 0.4572, "step": 7344 }, { "epoch": 1.0076833367633944, "grad_norm": 1.1796875, "learning_rate": 1.5012079345883743e-05, "loss": 0.4392, "step": 7345 }, { "epoch": 1.0078205392055979, "grad_norm": 1.125, "learning_rate": 1.5010830210523047e-05, "loss": 0.447, "step": 7346 }, { "epoch": 1.0079577416478014, "grad_norm": 1.546875, "learning_rate": 1.5009580970754777e-05, "loss": 0.4697, "step": 7347 }, { "epoch": 1.0080949440900049, "grad_norm": 1.171875, "learning_rate": 1.5008331626604969e-05, "loss": 0.42, "step": 7348 }, { "epoch": 1.0082321465322082, "grad_norm": 1.2421875, "learning_rate": 1.5007082178099646e-05, "loss": 0.4732, "step": 7349 }, { "epoch": 1.0083693489744117, "grad_norm": 1.1953125, "learning_rate": 1.5005832625264848e-05, "loss": 0.4812, "step": 7350 }, { "epoch": 1.0085065514166152, "grad_norm": 1.2890625, "learning_rate": 1.5004582968126608e-05, "loss": 0.5265, "step": 7351 }, { "epoch": 1.0086437538588187, "grad_norm": 1.34375, "learning_rate": 1.5003333206710968e-05, "loss": 0.5104, "step": 7352 }, { "epoch": 1.0087809563010222, "grad_norm": 1.140625, "learning_rate": 1.5002083341043966e-05, "loss": 0.4067, "step": 7353 }, { "epoch": 1.0089181587432257, "grad_norm": 1.2265625, "learning_rate": 1.5000833371151644e-05, "loss": 0.4821, "step": 7354 }, { "epoch": 1.0090553611854292, "grad_norm": 1.171875, "learning_rate": 1.4999583297060045e-05, "loss": 0.453, "step": 7355 }, { "epoch": 1.0091925636276327, "grad_norm": 1.15625, "learning_rate": 1.4998333118795221e-05, "loss": 0.4306, "step": 7356 }, { "epoch": 1.0093297660698362, "grad_norm": 1.2109375, "learning_rate": 1.4997082836383219e-05, "loss": 0.4632, "step": 7357 }, { "epoch": 1.0094669685120394, "grad_norm": 1.1796875, "learning_rate": 1.4995832449850092e-05, "loss": 0.4396, "step": 7358 }, { "epoch": 1.009604170954243, "grad_norm": 1.171875, "learning_rate": 1.4994581959221892e-05, "loss": 0.3955, "step": 7359 }, { "epoch": 1.0097413733964464, "grad_norm": 1.2109375, "learning_rate": 1.4993331364524673e-05, "loss": 0.4347, "step": 7360 }, { "epoch": 1.00987857583865, "grad_norm": 1.296875, "learning_rate": 1.4992080665784494e-05, "loss": 0.4877, "step": 7361 }, { "epoch": 1.0100157782808534, "grad_norm": 1.1875, "learning_rate": 1.4990829863027414e-05, "loss": 0.4454, "step": 7362 }, { "epoch": 1.010152980723057, "grad_norm": 1.203125, "learning_rate": 1.49895789562795e-05, "loss": 0.4414, "step": 7363 }, { "epoch": 1.0102901831652604, "grad_norm": 1.2265625, "learning_rate": 1.4988327945566812e-05, "loss": 0.4687, "step": 7364 }, { "epoch": 1.0104273856074637, "grad_norm": 1.21875, "learning_rate": 1.4987076830915417e-05, "loss": 0.4711, "step": 7365 }, { "epoch": 1.0105645880496672, "grad_norm": 1.1328125, "learning_rate": 1.4985825612351383e-05, "loss": 0.4148, "step": 7366 }, { "epoch": 1.0107017904918707, "grad_norm": 1.1875, "learning_rate": 1.4984574289900785e-05, "loss": 0.4344, "step": 7367 }, { "epoch": 1.0108389929340742, "grad_norm": 1.328125, "learning_rate": 1.4983322863589691e-05, "loss": 0.4939, "step": 7368 }, { "epoch": 1.0109761953762777, "grad_norm": 1.078125, "learning_rate": 1.4982071333444182e-05, "loss": 0.4003, "step": 7369 }, { "epoch": 1.0111133978184812, "grad_norm": 1.15625, "learning_rate": 1.4980819699490327e-05, "loss": 0.418, "step": 7370 }, { "epoch": 1.0112506002606847, "grad_norm": 1.1796875, "learning_rate": 1.4979567961754212e-05, "loss": 0.4801, "step": 7371 }, { "epoch": 1.0113878027028882, "grad_norm": 1.203125, "learning_rate": 1.4978316120261918e-05, "loss": 0.451, "step": 7372 }, { "epoch": 1.0115250051450917, "grad_norm": 1.203125, "learning_rate": 1.4977064175039524e-05, "loss": 0.4613, "step": 7373 }, { "epoch": 1.011662207587295, "grad_norm": 1.2578125, "learning_rate": 1.4975812126113122e-05, "loss": 0.4793, "step": 7374 }, { "epoch": 1.0117994100294985, "grad_norm": 1.234375, "learning_rate": 1.4974559973508798e-05, "loss": 0.4487, "step": 7375 }, { "epoch": 1.011936612471702, "grad_norm": 1.3046875, "learning_rate": 1.4973307717252643e-05, "loss": 0.4706, "step": 7376 }, { "epoch": 1.0120738149139055, "grad_norm": 1.1484375, "learning_rate": 1.497205535737075e-05, "loss": 0.4142, "step": 7377 }, { "epoch": 1.012211017356109, "grad_norm": 1.2265625, "learning_rate": 1.4970802893889212e-05, "loss": 0.4737, "step": 7378 }, { "epoch": 1.0123482197983125, "grad_norm": 1.1953125, "learning_rate": 1.4969550326834124e-05, "loss": 0.4021, "step": 7379 }, { "epoch": 1.012485422240516, "grad_norm": 1.265625, "learning_rate": 1.4968297656231587e-05, "loss": 0.4393, "step": 7380 }, { "epoch": 1.0126226246827192, "grad_norm": 1.203125, "learning_rate": 1.4967044882107704e-05, "loss": 0.4625, "step": 7381 }, { "epoch": 1.0127598271249227, "grad_norm": 1.140625, "learning_rate": 1.4965792004488578e-05, "loss": 0.4422, "step": 7382 }, { "epoch": 1.0128970295671262, "grad_norm": 1.078125, "learning_rate": 1.4964539023400309e-05, "loss": 0.3778, "step": 7383 }, { "epoch": 1.0130342320093297, "grad_norm": 1.1328125, "learning_rate": 1.496328593886901e-05, "loss": 0.4072, "step": 7384 }, { "epoch": 1.0131714344515332, "grad_norm": 1.3671875, "learning_rate": 1.4962032750920789e-05, "loss": 0.4618, "step": 7385 }, { "epoch": 1.0133086368937367, "grad_norm": 1.328125, "learning_rate": 1.4960779459581761e-05, "loss": 0.4703, "step": 7386 }, { "epoch": 1.0134458393359402, "grad_norm": 1.1953125, "learning_rate": 1.4959526064878039e-05, "loss": 0.4398, "step": 7387 }, { "epoch": 1.0135830417781437, "grad_norm": 1.28125, "learning_rate": 1.4958272566835731e-05, "loss": 0.4534, "step": 7388 }, { "epoch": 1.0137202442203472, "grad_norm": 1.1640625, "learning_rate": 1.4957018965480968e-05, "loss": 0.426, "step": 7389 }, { "epoch": 1.0138574466625505, "grad_norm": 1.140625, "learning_rate": 1.495576526083986e-05, "loss": 0.4287, "step": 7390 }, { "epoch": 1.013994649104754, "grad_norm": 1.2265625, "learning_rate": 1.4954511452938536e-05, "loss": 0.4507, "step": 7391 }, { "epoch": 1.0141318515469575, "grad_norm": 1.2734375, "learning_rate": 1.4953257541803119e-05, "loss": 0.4683, "step": 7392 }, { "epoch": 1.014269053989161, "grad_norm": 1.1328125, "learning_rate": 1.4952003527459736e-05, "loss": 0.4404, "step": 7393 }, { "epoch": 1.0144062564313645, "grad_norm": 1.1953125, "learning_rate": 1.4950749409934515e-05, "loss": 0.4421, "step": 7394 }, { "epoch": 1.014543458873568, "grad_norm": 1.2890625, "learning_rate": 1.4949495189253589e-05, "loss": 0.4867, "step": 7395 }, { "epoch": 1.0146806613157715, "grad_norm": 1.203125, "learning_rate": 1.4948240865443091e-05, "loss": 0.4768, "step": 7396 }, { "epoch": 1.0148178637579748, "grad_norm": 1.2265625, "learning_rate": 1.4946986438529156e-05, "loss": 0.4727, "step": 7397 }, { "epoch": 1.0149550662001783, "grad_norm": 1.1953125, "learning_rate": 1.4945731908537922e-05, "loss": 0.4741, "step": 7398 }, { "epoch": 1.0150922686423818, "grad_norm": 1.1796875, "learning_rate": 1.4944477275495527e-05, "loss": 0.4559, "step": 7399 }, { "epoch": 1.0152294710845853, "grad_norm": 1.265625, "learning_rate": 1.4943222539428118e-05, "loss": 0.4607, "step": 7400 }, { "epoch": 1.0153666735267888, "grad_norm": 1.203125, "learning_rate": 1.4941967700361835e-05, "loss": 0.4447, "step": 7401 }, { "epoch": 1.0155038759689923, "grad_norm": 1.2421875, "learning_rate": 1.4940712758322824e-05, "loss": 0.4348, "step": 7402 }, { "epoch": 1.0156410784111958, "grad_norm": 1.328125, "learning_rate": 1.4939457713337235e-05, "loss": 0.5013, "step": 7403 }, { "epoch": 1.0157782808533993, "grad_norm": 1.2421875, "learning_rate": 1.4938202565431219e-05, "loss": 0.4674, "step": 7404 }, { "epoch": 1.0159154832956028, "grad_norm": 1.1015625, "learning_rate": 1.493694731463093e-05, "loss": 0.3682, "step": 7405 }, { "epoch": 1.016052685737806, "grad_norm": 1.2109375, "learning_rate": 1.4935691960962518e-05, "loss": 0.4802, "step": 7406 }, { "epoch": 1.0161898881800095, "grad_norm": 1.2421875, "learning_rate": 1.4934436504452143e-05, "loss": 0.4485, "step": 7407 }, { "epoch": 1.016327090622213, "grad_norm": 1.234375, "learning_rate": 1.4933180945125968e-05, "loss": 0.4282, "step": 7408 }, { "epoch": 1.0164642930644165, "grad_norm": 1.2109375, "learning_rate": 1.4931925283010147e-05, "loss": 0.4561, "step": 7409 }, { "epoch": 1.01660149550662, "grad_norm": 1.25, "learning_rate": 1.4930669518130848e-05, "loss": 0.4087, "step": 7410 }, { "epoch": 1.0167386979488235, "grad_norm": 1.21875, "learning_rate": 1.4929413650514234e-05, "loss": 0.4361, "step": 7411 }, { "epoch": 1.016875900391027, "grad_norm": 1.140625, "learning_rate": 1.4928157680186477e-05, "loss": 0.3732, "step": 7412 }, { "epoch": 1.0170131028332303, "grad_norm": 1.2421875, "learning_rate": 1.4926901607173746e-05, "loss": 0.471, "step": 7413 }, { "epoch": 1.0171503052754338, "grad_norm": 1.3359375, "learning_rate": 1.4925645431502207e-05, "loss": 0.5039, "step": 7414 }, { "epoch": 1.0172875077176373, "grad_norm": 1.1640625, "learning_rate": 1.4924389153198039e-05, "loss": 0.4241, "step": 7415 }, { "epoch": 1.0174247101598408, "grad_norm": 1.2265625, "learning_rate": 1.4923132772287416e-05, "loss": 0.4745, "step": 7416 }, { "epoch": 1.0175619126020443, "grad_norm": 1.1875, "learning_rate": 1.4921876288796522e-05, "loss": 0.4753, "step": 7417 }, { "epoch": 1.0176991150442478, "grad_norm": 1.0625, "learning_rate": 1.4920619702751531e-05, "loss": 0.37, "step": 7418 }, { "epoch": 1.0178363174864513, "grad_norm": 1.203125, "learning_rate": 1.4919363014178629e-05, "loss": 0.4769, "step": 7419 }, { "epoch": 1.0179735199286548, "grad_norm": 1.1484375, "learning_rate": 1.4918106223104001e-05, "loss": 0.4332, "step": 7420 }, { "epoch": 1.0181107223708583, "grad_norm": 1.1796875, "learning_rate": 1.4916849329553835e-05, "loss": 0.4652, "step": 7421 }, { "epoch": 1.0182479248130616, "grad_norm": 1.2421875, "learning_rate": 1.4915592333554317e-05, "loss": 0.3496, "step": 7422 }, { "epoch": 1.018385127255265, "grad_norm": 1.1875, "learning_rate": 1.4914335235131638e-05, "loss": 0.4541, "step": 7423 }, { "epoch": 1.0185223296974686, "grad_norm": 1.1328125, "learning_rate": 1.4913078034311995e-05, "loss": 0.4003, "step": 7424 }, { "epoch": 1.018659532139672, "grad_norm": 1.2421875, "learning_rate": 1.491182073112158e-05, "loss": 0.4743, "step": 7425 }, { "epoch": 1.0187967345818756, "grad_norm": 1.2734375, "learning_rate": 1.4910563325586591e-05, "loss": 0.4813, "step": 7426 }, { "epoch": 1.018933937024079, "grad_norm": 1.203125, "learning_rate": 1.490930581773323e-05, "loss": 0.3763, "step": 7427 }, { "epoch": 1.0190711394662826, "grad_norm": 1.28125, "learning_rate": 1.4908048207587697e-05, "loss": 0.4952, "step": 7428 }, { "epoch": 1.0192083419084859, "grad_norm": 1.0703125, "learning_rate": 1.4906790495176198e-05, "loss": 0.3602, "step": 7429 }, { "epoch": 1.0193455443506894, "grad_norm": 1.1953125, "learning_rate": 1.490553268052494e-05, "loss": 0.4514, "step": 7430 }, { "epoch": 1.0194827467928929, "grad_norm": 1.203125, "learning_rate": 1.4904274763660126e-05, "loss": 0.4619, "step": 7431 }, { "epoch": 1.0196199492350964, "grad_norm": 1.2265625, "learning_rate": 1.4903016744607971e-05, "loss": 0.4708, "step": 7432 }, { "epoch": 1.0197571516772999, "grad_norm": 1.25, "learning_rate": 1.4901758623394686e-05, "loss": 0.4948, "step": 7433 }, { "epoch": 1.0198943541195034, "grad_norm": 1.2265625, "learning_rate": 1.4900500400046487e-05, "loss": 0.4592, "step": 7434 }, { "epoch": 1.0200315565617069, "grad_norm": 1.0703125, "learning_rate": 1.4899242074589589e-05, "loss": 0.354, "step": 7435 }, { "epoch": 1.0201687590039104, "grad_norm": 1.2109375, "learning_rate": 1.4897983647050209e-05, "loss": 0.4724, "step": 7436 }, { "epoch": 1.0203059614461139, "grad_norm": 1.265625, "learning_rate": 1.4896725117454573e-05, "loss": 0.4538, "step": 7437 }, { "epoch": 1.0204431638883171, "grad_norm": 1.234375, "learning_rate": 1.4895466485828905e-05, "loss": 0.4579, "step": 7438 }, { "epoch": 1.0205803663305206, "grad_norm": 1.4375, "learning_rate": 1.4894207752199426e-05, "loss": 0.5047, "step": 7439 }, { "epoch": 1.0207175687727241, "grad_norm": 1.4296875, "learning_rate": 1.4892948916592364e-05, "loss": 0.523, "step": 7440 }, { "epoch": 1.0208547712149276, "grad_norm": 1.25, "learning_rate": 1.489168997903395e-05, "loss": 0.4632, "step": 7441 }, { "epoch": 1.0209919736571311, "grad_norm": 1.3046875, "learning_rate": 1.4890430939550413e-05, "loss": 0.5303, "step": 7442 }, { "epoch": 1.0211291760993346, "grad_norm": 1.2109375, "learning_rate": 1.4889171798167991e-05, "loss": 0.4589, "step": 7443 }, { "epoch": 1.0212663785415381, "grad_norm": 1.2421875, "learning_rate": 1.4887912554912918e-05, "loss": 0.4459, "step": 7444 }, { "epoch": 1.0214035809837414, "grad_norm": 1.203125, "learning_rate": 1.488665320981143e-05, "loss": 0.4491, "step": 7445 }, { "epoch": 1.021540783425945, "grad_norm": 1.2734375, "learning_rate": 1.488539376288977e-05, "loss": 0.4959, "step": 7446 }, { "epoch": 1.0216779858681484, "grad_norm": 1.1484375, "learning_rate": 1.488413421417418e-05, "loss": 0.4066, "step": 7447 }, { "epoch": 1.021815188310352, "grad_norm": 1.359375, "learning_rate": 1.4882874563690904e-05, "loss": 0.5141, "step": 7448 }, { "epoch": 1.0219523907525554, "grad_norm": 1.3125, "learning_rate": 1.488161481146619e-05, "loss": 0.5101, "step": 7449 }, { "epoch": 1.022089593194759, "grad_norm": 1.15625, "learning_rate": 1.4880354957526284e-05, "loss": 0.3873, "step": 7450 }, { "epoch": 1.0222267956369624, "grad_norm": 1.203125, "learning_rate": 1.4879095001897436e-05, "loss": 0.4508, "step": 7451 }, { "epoch": 1.022363998079166, "grad_norm": 1.2421875, "learning_rate": 1.4877834944605904e-05, "loss": 0.4851, "step": 7452 }, { "epoch": 1.0225012005213694, "grad_norm": 1.109375, "learning_rate": 1.487657478567794e-05, "loss": 0.4326, "step": 7453 }, { "epoch": 1.0226384029635727, "grad_norm": 1.1875, "learning_rate": 1.4875314525139798e-05, "loss": 0.4652, "step": 7454 }, { "epoch": 1.0227756054057762, "grad_norm": 1.0859375, "learning_rate": 1.4874054163017741e-05, "loss": 0.3853, "step": 7455 }, { "epoch": 1.0229128078479797, "grad_norm": 1.1484375, "learning_rate": 1.4872793699338033e-05, "loss": 0.4205, "step": 7456 }, { "epoch": 1.0230500102901832, "grad_norm": 1.296875, "learning_rate": 1.487153313412693e-05, "loss": 0.4977, "step": 7457 }, { "epoch": 1.0231872127323867, "grad_norm": 1.171875, "learning_rate": 1.4870272467410708e-05, "loss": 0.4012, "step": 7458 }, { "epoch": 1.0233244151745902, "grad_norm": 1.234375, "learning_rate": 1.4869011699215623e-05, "loss": 0.4501, "step": 7459 }, { "epoch": 1.0234616176167937, "grad_norm": 1.2578125, "learning_rate": 1.4867750829567951e-05, "loss": 0.4539, "step": 7460 }, { "epoch": 1.023598820058997, "grad_norm": 1.3515625, "learning_rate": 1.4866489858493964e-05, "loss": 0.478, "step": 7461 }, { "epoch": 1.0237360225012004, "grad_norm": 1.234375, "learning_rate": 1.4865228786019937e-05, "loss": 0.4799, "step": 7462 }, { "epoch": 1.023873224943404, "grad_norm": 1.2109375, "learning_rate": 1.4863967612172143e-05, "loss": 0.4503, "step": 7463 }, { "epoch": 1.0240104273856074, "grad_norm": 1.140625, "learning_rate": 1.4862706336976861e-05, "loss": 0.4031, "step": 7464 }, { "epoch": 1.024147629827811, "grad_norm": 1.28125, "learning_rate": 1.4861444960460374e-05, "loss": 0.4736, "step": 7465 }, { "epoch": 1.0242848322700144, "grad_norm": 1.3125, "learning_rate": 1.4860183482648964e-05, "loss": 0.4497, "step": 7466 }, { "epoch": 1.024422034712218, "grad_norm": 1.2421875, "learning_rate": 1.4858921903568912e-05, "loss": 0.4635, "step": 7467 }, { "epoch": 1.0245592371544214, "grad_norm": 1.2109375, "learning_rate": 1.485766022324651e-05, "loss": 0.4855, "step": 7468 }, { "epoch": 1.024696439596625, "grad_norm": 1.15625, "learning_rate": 1.4856398441708043e-05, "loss": 0.4273, "step": 7469 }, { "epoch": 1.0248336420388282, "grad_norm": 1.25, "learning_rate": 1.4855136558979801e-05, "loss": 0.4765, "step": 7470 }, { "epoch": 1.0249708444810317, "grad_norm": 1.1875, "learning_rate": 1.485387457508808e-05, "loss": 0.4074, "step": 7471 }, { "epoch": 1.0251080469232352, "grad_norm": 1.2734375, "learning_rate": 1.4852612490059177e-05, "loss": 0.427, "step": 7472 }, { "epoch": 1.0252452493654387, "grad_norm": 1.1328125, "learning_rate": 1.4851350303919385e-05, "loss": 0.3893, "step": 7473 }, { "epoch": 1.0253824518076422, "grad_norm": 1.2578125, "learning_rate": 1.4850088016695003e-05, "loss": 0.4293, "step": 7474 }, { "epoch": 1.0255196542498457, "grad_norm": 1.171875, "learning_rate": 1.484882562841234e-05, "loss": 0.4406, "step": 7475 }, { "epoch": 1.0256568566920492, "grad_norm": 1.203125, "learning_rate": 1.484756313909769e-05, "loss": 0.4155, "step": 7476 }, { "epoch": 1.0257940591342525, "grad_norm": 1.2265625, "learning_rate": 1.4846300548777361e-05, "loss": 0.4711, "step": 7477 }, { "epoch": 1.025931261576456, "grad_norm": 1.265625, "learning_rate": 1.4845037857477667e-05, "loss": 0.4965, "step": 7478 }, { "epoch": 1.0260684640186595, "grad_norm": 1.2421875, "learning_rate": 1.484377506522491e-05, "loss": 0.468, "step": 7479 }, { "epoch": 1.026205666460863, "grad_norm": 1.3203125, "learning_rate": 1.4842512172045407e-05, "loss": 0.5175, "step": 7480 }, { "epoch": 1.0263428689030665, "grad_norm": 1.171875, "learning_rate": 1.4841249177965473e-05, "loss": 0.4439, "step": 7481 }, { "epoch": 1.02648007134527, "grad_norm": 1.171875, "learning_rate": 1.483998608301142e-05, "loss": 0.3882, "step": 7482 }, { "epoch": 1.0266172737874735, "grad_norm": 1.2734375, "learning_rate": 1.4838722887209567e-05, "loss": 0.495, "step": 7483 }, { "epoch": 1.026754476229677, "grad_norm": 1.3125, "learning_rate": 1.4837459590586236e-05, "loss": 0.5131, "step": 7484 }, { "epoch": 1.0268916786718805, "grad_norm": 1.109375, "learning_rate": 1.4836196193167752e-05, "loss": 0.3746, "step": 7485 }, { "epoch": 1.0270288811140837, "grad_norm": 1.15625, "learning_rate": 1.4834932694980435e-05, "loss": 0.4036, "step": 7486 }, { "epoch": 1.0271660835562872, "grad_norm": 1.2578125, "learning_rate": 1.4833669096050614e-05, "loss": 0.478, "step": 7487 }, { "epoch": 1.0273032859984907, "grad_norm": 1.25, "learning_rate": 1.4832405396404617e-05, "loss": 0.3858, "step": 7488 }, { "epoch": 1.0274404884406942, "grad_norm": 1.328125, "learning_rate": 1.4831141596068774e-05, "loss": 0.5167, "step": 7489 }, { "epoch": 1.0275776908828977, "grad_norm": 1.2109375, "learning_rate": 1.4829877695069421e-05, "loss": 0.4719, "step": 7490 }, { "epoch": 1.0277148933251012, "grad_norm": 1.2578125, "learning_rate": 1.4828613693432892e-05, "loss": 0.4413, "step": 7491 }, { "epoch": 1.0278520957673047, "grad_norm": 1.203125, "learning_rate": 1.4827349591185521e-05, "loss": 0.4519, "step": 7492 }, { "epoch": 1.027989298209508, "grad_norm": 1.1796875, "learning_rate": 1.4826085388353653e-05, "loss": 0.4044, "step": 7493 }, { "epoch": 1.0281265006517115, "grad_norm": 1.1796875, "learning_rate": 1.4824821084963625e-05, "loss": 0.4355, "step": 7494 }, { "epoch": 1.028263703093915, "grad_norm": 1.1328125, "learning_rate": 1.4823556681041784e-05, "loss": 0.3613, "step": 7495 }, { "epoch": 1.0284009055361185, "grad_norm": 1.1875, "learning_rate": 1.482229217661447e-05, "loss": 0.4209, "step": 7496 }, { "epoch": 1.028538107978322, "grad_norm": 1.28125, "learning_rate": 1.4821027571708038e-05, "loss": 0.5004, "step": 7497 }, { "epoch": 1.0286753104205255, "grad_norm": 1.171875, "learning_rate": 1.481976286634883e-05, "loss": 0.4315, "step": 7498 }, { "epoch": 1.028812512862729, "grad_norm": 1.2734375, "learning_rate": 1.4818498060563205e-05, "loss": 0.4794, "step": 7499 }, { "epoch": 1.0289497153049325, "grad_norm": 1.265625, "learning_rate": 1.4817233154377512e-05, "loss": 0.4462, "step": 7500 }, { "epoch": 1.029086917747136, "grad_norm": 1.234375, "learning_rate": 1.481596814781811e-05, "loss": 0.4769, "step": 7501 }, { "epoch": 1.0292241201893393, "grad_norm": 1.234375, "learning_rate": 1.4814703040911356e-05, "loss": 0.4531, "step": 7502 }, { "epoch": 1.0293613226315428, "grad_norm": 1.2421875, "learning_rate": 1.4813437833683612e-05, "loss": 0.5043, "step": 7503 }, { "epoch": 1.0294985250737463, "grad_norm": 1.15625, "learning_rate": 1.4812172526161238e-05, "loss": 0.4659, "step": 7504 }, { "epoch": 1.0296357275159498, "grad_norm": 1.2265625, "learning_rate": 1.4810907118370596e-05, "loss": 0.4626, "step": 7505 }, { "epoch": 1.0297729299581533, "grad_norm": 1.28125, "learning_rate": 1.4809641610338059e-05, "loss": 0.4778, "step": 7506 }, { "epoch": 1.0299101324003568, "grad_norm": 1.2421875, "learning_rate": 1.4808376002089993e-05, "loss": 0.4772, "step": 7507 }, { "epoch": 1.0300473348425603, "grad_norm": 1.28125, "learning_rate": 1.4807110293652765e-05, "loss": 0.5014, "step": 7508 }, { "epoch": 1.0301845372847636, "grad_norm": 1.203125, "learning_rate": 1.4805844485052752e-05, "loss": 0.4243, "step": 7509 }, { "epoch": 1.030321739726967, "grad_norm": 1.1015625, "learning_rate": 1.4804578576316327e-05, "loss": 0.3832, "step": 7510 }, { "epoch": 1.0304589421691706, "grad_norm": 1.1953125, "learning_rate": 1.480331256746987e-05, "loss": 0.4158, "step": 7511 }, { "epoch": 1.030596144611374, "grad_norm": 1.3125, "learning_rate": 1.4802046458539755e-05, "loss": 0.4657, "step": 7512 }, { "epoch": 1.0307333470535776, "grad_norm": 1.203125, "learning_rate": 1.4800780249552368e-05, "loss": 0.4087, "step": 7513 }, { "epoch": 1.030870549495781, "grad_norm": 1.2734375, "learning_rate": 1.4799513940534089e-05, "loss": 0.4485, "step": 7514 }, { "epoch": 1.0310077519379846, "grad_norm": 1.1875, "learning_rate": 1.4798247531511303e-05, "loss": 0.4205, "step": 7515 }, { "epoch": 1.031144954380188, "grad_norm": 1.3125, "learning_rate": 1.4796981022510399e-05, "loss": 0.4654, "step": 7516 }, { "epoch": 1.0312821568223915, "grad_norm": 1.296875, "learning_rate": 1.4795714413557766e-05, "loss": 0.4876, "step": 7517 }, { "epoch": 1.0314193592645948, "grad_norm": 1.265625, "learning_rate": 1.4794447704679798e-05, "loss": 0.5014, "step": 7518 }, { "epoch": 1.0315565617067983, "grad_norm": 1.2890625, "learning_rate": 1.4793180895902884e-05, "loss": 0.4247, "step": 7519 }, { "epoch": 1.0316937641490018, "grad_norm": 1.171875, "learning_rate": 1.4791913987253426e-05, "loss": 0.448, "step": 7520 }, { "epoch": 1.0318309665912053, "grad_norm": 1.25, "learning_rate": 1.4790646978757815e-05, "loss": 0.4598, "step": 7521 }, { "epoch": 1.0319681690334088, "grad_norm": 1.1796875, "learning_rate": 1.4789379870442453e-05, "loss": 0.4537, "step": 7522 }, { "epoch": 1.0321053714756123, "grad_norm": 1.1953125, "learning_rate": 1.4788112662333743e-05, "loss": 0.5347, "step": 7523 }, { "epoch": 1.0322425739178158, "grad_norm": 1.21875, "learning_rate": 1.4786845354458086e-05, "loss": 0.4408, "step": 7524 }, { "epoch": 1.032379776360019, "grad_norm": 1.1796875, "learning_rate": 1.4785577946841895e-05, "loss": 0.4389, "step": 7525 }, { "epoch": 1.0325169788022226, "grad_norm": 1.2890625, "learning_rate": 1.4784310439511572e-05, "loss": 0.5387, "step": 7526 }, { "epoch": 1.032654181244426, "grad_norm": 1.15625, "learning_rate": 1.478304283249353e-05, "loss": 0.4604, "step": 7527 }, { "epoch": 1.0327913836866296, "grad_norm": 1.203125, "learning_rate": 1.4781775125814179e-05, "loss": 0.4236, "step": 7528 }, { "epoch": 1.032928586128833, "grad_norm": 1.1640625, "learning_rate": 1.478050731949994e-05, "loss": 0.4445, "step": 7529 }, { "epoch": 1.0330657885710366, "grad_norm": 1.234375, "learning_rate": 1.477923941357722e-05, "loss": 0.4402, "step": 7530 }, { "epoch": 1.03320299101324, "grad_norm": 1.296875, "learning_rate": 1.4777971408072443e-05, "loss": 0.5113, "step": 7531 }, { "epoch": 1.0333401934554436, "grad_norm": 1.2265625, "learning_rate": 1.4776703303012027e-05, "loss": 0.4773, "step": 7532 }, { "epoch": 1.033477395897647, "grad_norm": 1.203125, "learning_rate": 1.47754350984224e-05, "loss": 0.3967, "step": 7533 }, { "epoch": 1.0336145983398504, "grad_norm": 1.28125, "learning_rate": 1.4774166794329978e-05, "loss": 0.4644, "step": 7534 }, { "epoch": 1.0337518007820539, "grad_norm": 1.125, "learning_rate": 1.4772898390761196e-05, "loss": 0.4083, "step": 7535 }, { "epoch": 1.0338890032242574, "grad_norm": 1.28125, "learning_rate": 1.4771629887742483e-05, "loss": 0.4603, "step": 7536 }, { "epoch": 1.0340262056664609, "grad_norm": 1.2265625, "learning_rate": 1.4770361285300265e-05, "loss": 0.3661, "step": 7537 }, { "epoch": 1.0341634081086644, "grad_norm": 1.21875, "learning_rate": 1.4769092583460975e-05, "loss": 0.4354, "step": 7538 }, { "epoch": 1.0343006105508679, "grad_norm": 1.2265625, "learning_rate": 1.4767823782251057e-05, "loss": 0.4326, "step": 7539 }, { "epoch": 1.0344378129930714, "grad_norm": 1.15625, "learning_rate": 1.4766554881696935e-05, "loss": 0.4255, "step": 7540 }, { "epoch": 1.0345750154352746, "grad_norm": 1.1953125, "learning_rate": 1.4765285881825056e-05, "loss": 0.4244, "step": 7541 }, { "epoch": 1.0347122178774781, "grad_norm": 1.28125, "learning_rate": 1.476401678266186e-05, "loss": 0.4841, "step": 7542 }, { "epoch": 1.0348494203196816, "grad_norm": 1.2890625, "learning_rate": 1.476274758423379e-05, "loss": 0.4867, "step": 7543 }, { "epoch": 1.0349866227618851, "grad_norm": 1.125, "learning_rate": 1.4761478286567295e-05, "loss": 0.3679, "step": 7544 }, { "epoch": 1.0351238252040886, "grad_norm": 1.203125, "learning_rate": 1.4760208889688817e-05, "loss": 0.3985, "step": 7545 }, { "epoch": 1.0352610276462921, "grad_norm": 1.2734375, "learning_rate": 1.475893939362481e-05, "loss": 0.5053, "step": 7546 }, { "epoch": 1.0353982300884956, "grad_norm": 1.1796875, "learning_rate": 1.475766979840172e-05, "loss": 0.4713, "step": 7547 }, { "epoch": 1.0355354325306991, "grad_norm": 1.2421875, "learning_rate": 1.4756400104046012e-05, "loss": 0.4837, "step": 7548 }, { "epoch": 1.0356726349729026, "grad_norm": 1.125, "learning_rate": 1.4755130310584131e-05, "loss": 0.3672, "step": 7549 }, { "epoch": 1.035809837415106, "grad_norm": 1.1953125, "learning_rate": 1.4753860418042539e-05, "loss": 0.4813, "step": 7550 }, { "epoch": 1.0359470398573094, "grad_norm": 1.21875, "learning_rate": 1.4752590426447695e-05, "loss": 0.4497, "step": 7551 }, { "epoch": 1.036084242299513, "grad_norm": 1.296875, "learning_rate": 1.475132033582606e-05, "loss": 0.5161, "step": 7552 }, { "epoch": 1.0362214447417164, "grad_norm": 1.1796875, "learning_rate": 1.47500501462041e-05, "loss": 0.4367, "step": 7553 }, { "epoch": 1.03635864718392, "grad_norm": 1.2578125, "learning_rate": 1.4748779857608282e-05, "loss": 0.4506, "step": 7554 }, { "epoch": 1.0364958496261234, "grad_norm": 1.265625, "learning_rate": 1.4747509470065073e-05, "loss": 0.446, "step": 7555 }, { "epoch": 1.036633052068327, "grad_norm": 1.1328125, "learning_rate": 1.4746238983600942e-05, "loss": 0.3647, "step": 7556 }, { "epoch": 1.0367702545105302, "grad_norm": 1.140625, "learning_rate": 1.4744968398242367e-05, "loss": 0.4196, "step": 7557 }, { "epoch": 1.0369074569527337, "grad_norm": 1.2109375, "learning_rate": 1.4743697714015814e-05, "loss": 0.465, "step": 7558 }, { "epoch": 1.0370446593949372, "grad_norm": 1.390625, "learning_rate": 1.4742426930947767e-05, "loss": 0.5236, "step": 7559 }, { "epoch": 1.0371818618371407, "grad_norm": 1.1875, "learning_rate": 1.4741156049064697e-05, "loss": 0.4367, "step": 7560 }, { "epoch": 1.0373190642793442, "grad_norm": 1.3046875, "learning_rate": 1.4739885068393089e-05, "loss": 0.4905, "step": 7561 }, { "epoch": 1.0374562667215477, "grad_norm": 1.1796875, "learning_rate": 1.4738613988959425e-05, "loss": 0.4052, "step": 7562 }, { "epoch": 1.0375934691637512, "grad_norm": 1.203125, "learning_rate": 1.4737342810790192e-05, "loss": 0.4472, "step": 7563 }, { "epoch": 1.0377306716059547, "grad_norm": 1.15625, "learning_rate": 1.4736071533911874e-05, "loss": 0.4207, "step": 7564 }, { "epoch": 1.0378678740481582, "grad_norm": 1.296875, "learning_rate": 1.4734800158350961e-05, "loss": 0.5152, "step": 7565 }, { "epoch": 1.0380050764903614, "grad_norm": 1.109375, "learning_rate": 1.4733528684133945e-05, "loss": 0.3932, "step": 7566 }, { "epoch": 1.038142278932565, "grad_norm": 1.34375, "learning_rate": 1.4732257111287314e-05, "loss": 0.5624, "step": 7567 }, { "epoch": 1.0382794813747684, "grad_norm": 1.3046875, "learning_rate": 1.4730985439837566e-05, "loss": 0.5221, "step": 7568 }, { "epoch": 1.038416683816972, "grad_norm": 1.2578125, "learning_rate": 1.4729713669811201e-05, "loss": 0.421, "step": 7569 }, { "epoch": 1.0385538862591754, "grad_norm": 1.1875, "learning_rate": 1.4728441801234713e-05, "loss": 0.4441, "step": 7570 }, { "epoch": 1.038691088701379, "grad_norm": 1.25, "learning_rate": 1.4727169834134608e-05, "loss": 0.463, "step": 7571 }, { "epoch": 1.0388282911435824, "grad_norm": 1.1484375, "learning_rate": 1.4725897768537385e-05, "loss": 0.4135, "step": 7572 }, { "epoch": 1.0389654935857857, "grad_norm": 1.2265625, "learning_rate": 1.4724625604469548e-05, "loss": 0.4748, "step": 7573 }, { "epoch": 1.0391026960279892, "grad_norm": 1.1953125, "learning_rate": 1.4723353341957614e-05, "loss": 0.4212, "step": 7574 }, { "epoch": 1.0392398984701927, "grad_norm": 1.28125, "learning_rate": 1.4722080981028082e-05, "loss": 0.4415, "step": 7575 }, { "epoch": 1.0393771009123962, "grad_norm": 1.2890625, "learning_rate": 1.4720808521707465e-05, "loss": 0.459, "step": 7576 }, { "epoch": 1.0395143033545997, "grad_norm": 1.2265625, "learning_rate": 1.4719535964022283e-05, "loss": 0.4646, "step": 7577 }, { "epoch": 1.0396515057968032, "grad_norm": 1.0703125, "learning_rate": 1.4718263307999043e-05, "loss": 0.365, "step": 7578 }, { "epoch": 1.0397887082390067, "grad_norm": 1.140625, "learning_rate": 1.4716990553664268e-05, "loss": 0.4335, "step": 7579 }, { "epoch": 1.0399259106812102, "grad_norm": 1.171875, "learning_rate": 1.4715717701044476e-05, "loss": 0.4145, "step": 7580 }, { "epoch": 1.0400631131234137, "grad_norm": 1.1015625, "learning_rate": 1.471444475016619e-05, "loss": 0.3945, "step": 7581 }, { "epoch": 1.040200315565617, "grad_norm": 1.1796875, "learning_rate": 1.4713171701055934e-05, "loss": 0.4373, "step": 7582 }, { "epoch": 1.0403375180078205, "grad_norm": 1.1953125, "learning_rate": 1.4711898553740227e-05, "loss": 0.4206, "step": 7583 }, { "epoch": 1.040474720450024, "grad_norm": 1.2109375, "learning_rate": 1.4710625308245606e-05, "loss": 0.4326, "step": 7584 }, { "epoch": 1.0406119228922275, "grad_norm": 1.1953125, "learning_rate": 1.4709351964598596e-05, "loss": 0.4168, "step": 7585 }, { "epoch": 1.040749125334431, "grad_norm": 1.1015625, "learning_rate": 1.4708078522825725e-05, "loss": 0.371, "step": 7586 }, { "epoch": 1.0408863277766345, "grad_norm": 1.1953125, "learning_rate": 1.4706804982953538e-05, "loss": 0.4525, "step": 7587 }, { "epoch": 1.041023530218838, "grad_norm": 1.1796875, "learning_rate": 1.470553134500856e-05, "loss": 0.3984, "step": 7588 }, { "epoch": 1.0411607326610413, "grad_norm": 1.25, "learning_rate": 1.4704257609017336e-05, "loss": 0.451, "step": 7589 }, { "epoch": 1.0412979351032448, "grad_norm": 1.234375, "learning_rate": 1.4702983775006403e-05, "loss": 0.4391, "step": 7590 }, { "epoch": 1.0414351375454483, "grad_norm": 1.2578125, "learning_rate": 1.4701709843002305e-05, "loss": 0.4635, "step": 7591 }, { "epoch": 1.0415723399876518, "grad_norm": 1.2109375, "learning_rate": 1.4700435813031584e-05, "loss": 0.4386, "step": 7592 }, { "epoch": 1.0417095424298553, "grad_norm": 1.28125, "learning_rate": 1.4699161685120787e-05, "loss": 0.4779, "step": 7593 }, { "epoch": 1.0418467448720587, "grad_norm": 1.2265625, "learning_rate": 1.469788745929646e-05, "loss": 0.4283, "step": 7594 }, { "epoch": 1.0419839473142622, "grad_norm": 1.2265625, "learning_rate": 1.4696613135585158e-05, "loss": 0.4414, "step": 7595 }, { "epoch": 1.0421211497564657, "grad_norm": 1.1484375, "learning_rate": 1.4695338714013433e-05, "loss": 0.4359, "step": 7596 }, { "epoch": 1.0422583521986692, "grad_norm": 1.2578125, "learning_rate": 1.4694064194607833e-05, "loss": 0.449, "step": 7597 }, { "epoch": 1.0423955546408725, "grad_norm": 1.3125, "learning_rate": 1.4692789577394919e-05, "loss": 0.4597, "step": 7598 }, { "epoch": 1.042532757083076, "grad_norm": 1.328125, "learning_rate": 1.469151486240125e-05, "loss": 0.5088, "step": 7599 }, { "epoch": 1.0426699595252795, "grad_norm": 1.109375, "learning_rate": 1.4690240049653386e-05, "loss": 0.375, "step": 7600 }, { "epoch": 1.042807161967483, "grad_norm": 1.140625, "learning_rate": 1.4688965139177886e-05, "loss": 0.4262, "step": 7601 }, { "epoch": 1.0429443644096865, "grad_norm": 1.2421875, "learning_rate": 1.4687690131001322e-05, "loss": 0.461, "step": 7602 }, { "epoch": 1.04308156685189, "grad_norm": 1.1328125, "learning_rate": 1.4686415025150251e-05, "loss": 0.4165, "step": 7603 }, { "epoch": 1.0432187692940935, "grad_norm": 1.1484375, "learning_rate": 1.468513982165125e-05, "loss": 0.4245, "step": 7604 }, { "epoch": 1.0433559717362968, "grad_norm": 1.1953125, "learning_rate": 1.4683864520530884e-05, "loss": 0.4503, "step": 7605 }, { "epoch": 1.0434931741785003, "grad_norm": 1.2421875, "learning_rate": 1.4682589121815727e-05, "loss": 0.4375, "step": 7606 }, { "epoch": 1.0436303766207038, "grad_norm": 1.2578125, "learning_rate": 1.4681313625532356e-05, "loss": 0.4672, "step": 7607 }, { "epoch": 1.0437675790629073, "grad_norm": 1.3046875, "learning_rate": 1.4680038031707346e-05, "loss": 0.456, "step": 7608 }, { "epoch": 1.0439047815051108, "grad_norm": 1.1796875, "learning_rate": 1.4678762340367276e-05, "loss": 0.4338, "step": 7609 }, { "epoch": 1.0440419839473143, "grad_norm": 1.3046875, "learning_rate": 1.4677486551538729e-05, "loss": 0.5235, "step": 7610 }, { "epoch": 1.0441791863895178, "grad_norm": 1.2890625, "learning_rate": 1.4676210665248283e-05, "loss": 0.4822, "step": 7611 }, { "epoch": 1.0443163888317213, "grad_norm": 1.0546875, "learning_rate": 1.4674934681522525e-05, "loss": 0.348, "step": 7612 }, { "epoch": 1.0444535912739248, "grad_norm": 1.234375, "learning_rate": 1.4673658600388044e-05, "loss": 0.4671, "step": 7613 }, { "epoch": 1.044590793716128, "grad_norm": 1.2578125, "learning_rate": 1.4672382421871425e-05, "loss": 0.4736, "step": 7614 }, { "epoch": 1.0447279961583316, "grad_norm": 1.171875, "learning_rate": 1.4671106145999263e-05, "loss": 0.4669, "step": 7615 }, { "epoch": 1.044865198600535, "grad_norm": 1.171875, "learning_rate": 1.4669829772798147e-05, "loss": 0.3782, "step": 7616 }, { "epoch": 1.0450024010427386, "grad_norm": 1.3515625, "learning_rate": 1.4668553302294677e-05, "loss": 0.526, "step": 7617 }, { "epoch": 1.045139603484942, "grad_norm": 1.2734375, "learning_rate": 1.4667276734515446e-05, "loss": 0.5001, "step": 7618 }, { "epoch": 1.0452768059271456, "grad_norm": 1.171875, "learning_rate": 1.4666000069487054e-05, "loss": 0.4393, "step": 7619 }, { "epoch": 1.045414008369349, "grad_norm": 1.2421875, "learning_rate": 1.4664723307236104e-05, "loss": 0.4485, "step": 7620 }, { "epoch": 1.0455512108115523, "grad_norm": 1.1796875, "learning_rate": 1.4663446447789196e-05, "loss": 0.4549, "step": 7621 }, { "epoch": 1.0456884132537558, "grad_norm": 1.296875, "learning_rate": 1.4662169491172936e-05, "loss": 0.5041, "step": 7622 }, { "epoch": 1.0458256156959593, "grad_norm": 1.2734375, "learning_rate": 1.4660892437413932e-05, "loss": 0.5059, "step": 7623 }, { "epoch": 1.0459628181381628, "grad_norm": 1.203125, "learning_rate": 1.465961528653879e-05, "loss": 0.4551, "step": 7624 }, { "epoch": 1.0461000205803663, "grad_norm": 1.2890625, "learning_rate": 1.465833803857413e-05, "loss": 0.4834, "step": 7625 }, { "epoch": 1.0462372230225698, "grad_norm": 1.203125, "learning_rate": 1.4657060693546555e-05, "loss": 0.4513, "step": 7626 }, { "epoch": 1.0463744254647733, "grad_norm": 1.234375, "learning_rate": 1.4655783251482684e-05, "loss": 0.4345, "step": 7627 }, { "epoch": 1.0465116279069768, "grad_norm": 1.2109375, "learning_rate": 1.465450571240914e-05, "loss": 0.4491, "step": 7628 }, { "epoch": 1.0466488303491803, "grad_norm": 1.1015625, "learning_rate": 1.4653228076352535e-05, "loss": 0.394, "step": 7629 }, { "epoch": 1.0467860327913836, "grad_norm": 1.2890625, "learning_rate": 1.4651950343339489e-05, "loss": 0.4496, "step": 7630 }, { "epoch": 1.046923235233587, "grad_norm": 1.140625, "learning_rate": 1.465067251339663e-05, "loss": 0.4064, "step": 7631 }, { "epoch": 1.0470604376757906, "grad_norm": 1.2734375, "learning_rate": 1.4649394586550584e-05, "loss": 0.4953, "step": 7632 }, { "epoch": 1.047197640117994, "grad_norm": 1.1796875, "learning_rate": 1.4648116562827976e-05, "loss": 0.4054, "step": 7633 }, { "epoch": 1.0473348425601976, "grad_norm": 1.03125, "learning_rate": 1.4646838442255435e-05, "loss": 0.3238, "step": 7634 }, { "epoch": 1.047472045002401, "grad_norm": 1.1484375, "learning_rate": 1.4645560224859593e-05, "loss": 0.4257, "step": 7635 }, { "epoch": 1.0476092474446046, "grad_norm": 1.2578125, "learning_rate": 1.4644281910667086e-05, "loss": 0.4966, "step": 7636 }, { "epoch": 1.0477464498868079, "grad_norm": 1.3203125, "learning_rate": 1.4643003499704546e-05, "loss": 0.4396, "step": 7637 }, { "epoch": 1.0478836523290114, "grad_norm": 1.2890625, "learning_rate": 1.464172499199861e-05, "loss": 0.4831, "step": 7638 }, { "epoch": 1.0480208547712149, "grad_norm": 1.234375, "learning_rate": 1.4640446387575923e-05, "loss": 0.4779, "step": 7639 }, { "epoch": 1.0481580572134184, "grad_norm": 1.171875, "learning_rate": 1.4639167686463119e-05, "loss": 0.4495, "step": 7640 }, { "epoch": 1.0482952596556219, "grad_norm": 1.2265625, "learning_rate": 1.4637888888686844e-05, "loss": 0.4571, "step": 7641 }, { "epoch": 1.0484324620978254, "grad_norm": 1.328125, "learning_rate": 1.4636609994273748e-05, "loss": 0.4958, "step": 7642 }, { "epoch": 1.0485696645400289, "grad_norm": 1.15625, "learning_rate": 1.4635331003250472e-05, "loss": 0.391, "step": 7643 }, { "epoch": 1.0487068669822324, "grad_norm": 1.234375, "learning_rate": 1.463405191564367e-05, "loss": 0.4493, "step": 7644 }, { "epoch": 1.0488440694244359, "grad_norm": 1.25, "learning_rate": 1.4632772731479996e-05, "loss": 0.4284, "step": 7645 }, { "epoch": 1.0489812718666391, "grad_norm": 1.2109375, "learning_rate": 1.4631493450786096e-05, "loss": 0.4079, "step": 7646 }, { "epoch": 1.0491184743088426, "grad_norm": 1.0703125, "learning_rate": 1.463021407358863e-05, "loss": 0.3558, "step": 7647 }, { "epoch": 1.0492556767510461, "grad_norm": 1.1875, "learning_rate": 1.4628934599914252e-05, "loss": 0.432, "step": 7648 }, { "epoch": 1.0493928791932496, "grad_norm": 1.1796875, "learning_rate": 1.462765502978963e-05, "loss": 0.3639, "step": 7649 }, { "epoch": 1.0495300816354531, "grad_norm": 1.0625, "learning_rate": 1.4626375363241417e-05, "loss": 0.3577, "step": 7650 }, { "epoch": 1.0496672840776566, "grad_norm": 1.2734375, "learning_rate": 1.462509560029628e-05, "loss": 0.5113, "step": 7651 }, { "epoch": 1.0498044865198601, "grad_norm": 1.1953125, "learning_rate": 1.4623815740980885e-05, "loss": 0.4622, "step": 7652 }, { "epoch": 1.0499416889620634, "grad_norm": 1.203125, "learning_rate": 1.4622535785321898e-05, "loss": 0.4553, "step": 7653 }, { "epoch": 1.050078891404267, "grad_norm": 1.0859375, "learning_rate": 1.4621255733345993e-05, "loss": 0.3568, "step": 7654 }, { "epoch": 1.0502160938464704, "grad_norm": 1.203125, "learning_rate": 1.4619975585079837e-05, "loss": 0.4086, "step": 7655 }, { "epoch": 1.050353296288674, "grad_norm": 1.1796875, "learning_rate": 1.4618695340550104e-05, "loss": 0.3925, "step": 7656 }, { "epoch": 1.0504904987308774, "grad_norm": 1.375, "learning_rate": 1.4617414999783472e-05, "loss": 0.5659, "step": 7657 }, { "epoch": 1.050627701173081, "grad_norm": 1.28125, "learning_rate": 1.4616134562806617e-05, "loss": 0.4582, "step": 7658 }, { "epoch": 1.0507649036152844, "grad_norm": 1.265625, "learning_rate": 1.4614854029646216e-05, "loss": 0.4415, "step": 7659 }, { "epoch": 1.050902106057488, "grad_norm": 1.171875, "learning_rate": 1.4613573400328958e-05, "loss": 0.4349, "step": 7660 }, { "epoch": 1.0510393084996914, "grad_norm": 1.2421875, "learning_rate": 1.461229267488152e-05, "loss": 0.4642, "step": 7661 }, { "epoch": 1.0511765109418947, "grad_norm": 1.21875, "learning_rate": 1.461101185333059e-05, "loss": 0.4291, "step": 7662 }, { "epoch": 1.0513137133840982, "grad_norm": 1.1640625, "learning_rate": 1.4609730935702855e-05, "loss": 0.4084, "step": 7663 }, { "epoch": 1.0514509158263017, "grad_norm": 1.1796875, "learning_rate": 1.4608449922025007e-05, "loss": 0.4499, "step": 7664 }, { "epoch": 1.0515881182685052, "grad_norm": 1.171875, "learning_rate": 1.4607168812323738e-05, "loss": 0.4557, "step": 7665 }, { "epoch": 1.0517253207107087, "grad_norm": 1.171875, "learning_rate": 1.4605887606625737e-05, "loss": 0.4595, "step": 7666 }, { "epoch": 1.0518625231529122, "grad_norm": 1.1328125, "learning_rate": 1.4604606304957703e-05, "loss": 0.4121, "step": 7667 }, { "epoch": 1.0519997255951157, "grad_norm": 1.15625, "learning_rate": 1.4603324907346332e-05, "loss": 0.3789, "step": 7668 }, { "epoch": 1.052136928037319, "grad_norm": 1.1875, "learning_rate": 1.4602043413818327e-05, "loss": 0.4154, "step": 7669 }, { "epoch": 1.0522741304795225, "grad_norm": 1.3515625, "learning_rate": 1.4600761824400387e-05, "loss": 0.5195, "step": 7670 }, { "epoch": 1.052411332921726, "grad_norm": 1.2578125, "learning_rate": 1.4599480139119215e-05, "loss": 0.4679, "step": 7671 }, { "epoch": 1.0525485353639294, "grad_norm": 1.375, "learning_rate": 1.4598198358001521e-05, "loss": 0.4651, "step": 7672 }, { "epoch": 1.052685737806133, "grad_norm": 1.21875, "learning_rate": 1.4596916481074009e-05, "loss": 0.4299, "step": 7673 }, { "epoch": 1.0528229402483364, "grad_norm": 1.1484375, "learning_rate": 1.4595634508363387e-05, "loss": 0.421, "step": 7674 }, { "epoch": 1.05296014269054, "grad_norm": 1.109375, "learning_rate": 1.459435243989637e-05, "loss": 0.3583, "step": 7675 }, { "epoch": 1.0530973451327434, "grad_norm": 1.21875, "learning_rate": 1.4593070275699672e-05, "loss": 0.4414, "step": 7676 }, { "epoch": 1.053234547574947, "grad_norm": 1.234375, "learning_rate": 1.4591788015800006e-05, "loss": 0.4024, "step": 7677 }, { "epoch": 1.0533717500171502, "grad_norm": 1.1640625, "learning_rate": 1.459050566022409e-05, "loss": 0.3711, "step": 7678 }, { "epoch": 1.0535089524593537, "grad_norm": 1.2734375, "learning_rate": 1.4589223208998646e-05, "loss": 0.4721, "step": 7679 }, { "epoch": 1.0536461549015572, "grad_norm": 1.203125, "learning_rate": 1.4587940662150394e-05, "loss": 0.3939, "step": 7680 }, { "epoch": 1.0537833573437607, "grad_norm": 1.2109375, "learning_rate": 1.4586658019706061e-05, "loss": 0.4365, "step": 7681 }, { "epoch": 1.0539205597859642, "grad_norm": 1.34375, "learning_rate": 1.4585375281692366e-05, "loss": 0.4954, "step": 7682 }, { "epoch": 1.0540577622281677, "grad_norm": 1.2578125, "learning_rate": 1.4584092448136043e-05, "loss": 0.4733, "step": 7683 }, { "epoch": 1.0541949646703712, "grad_norm": 1.296875, "learning_rate": 1.4582809519063814e-05, "loss": 0.4651, "step": 7684 }, { "epoch": 1.0543321671125745, "grad_norm": 1.296875, "learning_rate": 1.4581526494502417e-05, "loss": 0.4451, "step": 7685 }, { "epoch": 1.054469369554778, "grad_norm": 1.1484375, "learning_rate": 1.4580243374478586e-05, "loss": 0.4029, "step": 7686 }, { "epoch": 1.0546065719969815, "grad_norm": 1.234375, "learning_rate": 1.4578960159019056e-05, "loss": 0.4478, "step": 7687 }, { "epoch": 1.054743774439185, "grad_norm": 1.296875, "learning_rate": 1.4577676848150558e-05, "loss": 0.4549, "step": 7688 }, { "epoch": 1.0548809768813885, "grad_norm": 1.1640625, "learning_rate": 1.457639344189984e-05, "loss": 0.4039, "step": 7689 }, { "epoch": 1.055018179323592, "grad_norm": 1.2265625, "learning_rate": 1.4575109940293641e-05, "loss": 0.4439, "step": 7690 }, { "epoch": 1.0551553817657955, "grad_norm": 1.125, "learning_rate": 1.4573826343358702e-05, "loss": 0.412, "step": 7691 }, { "epoch": 1.055292584207999, "grad_norm": 1.359375, "learning_rate": 1.457254265112177e-05, "loss": 0.4637, "step": 7692 }, { "epoch": 1.0554297866502025, "grad_norm": 1.203125, "learning_rate": 1.4571258863609594e-05, "loss": 0.4355, "step": 7693 }, { "epoch": 1.0555669890924058, "grad_norm": 1.25, "learning_rate": 1.4569974980848922e-05, "loss": 0.4915, "step": 7694 }, { "epoch": 1.0557041915346093, "grad_norm": 1.1796875, "learning_rate": 1.4568691002866504e-05, "loss": 0.4256, "step": 7695 }, { "epoch": 1.0558413939768128, "grad_norm": 1.28125, "learning_rate": 1.4567406929689099e-05, "loss": 0.4769, "step": 7696 }, { "epoch": 1.0559785964190163, "grad_norm": 1.296875, "learning_rate": 1.4566122761343455e-05, "loss": 0.4826, "step": 7697 }, { "epoch": 1.0561157988612198, "grad_norm": 1.3046875, "learning_rate": 1.4564838497856334e-05, "loss": 0.4486, "step": 7698 }, { "epoch": 1.0562530013034233, "grad_norm": 1.3046875, "learning_rate": 1.4563554139254497e-05, "loss": 0.5089, "step": 7699 }, { "epoch": 1.0563902037456268, "grad_norm": 1.234375, "learning_rate": 1.4562269685564698e-05, "loss": 0.4211, "step": 7700 }, { "epoch": 1.05652740618783, "grad_norm": 1.28125, "learning_rate": 1.456098513681371e-05, "loss": 0.4615, "step": 7701 }, { "epoch": 1.0566646086300335, "grad_norm": 1.3203125, "learning_rate": 1.455970049302829e-05, "loss": 0.5019, "step": 7702 }, { "epoch": 1.056801811072237, "grad_norm": 1.3984375, "learning_rate": 1.4558415754235211e-05, "loss": 0.5521, "step": 7703 }, { "epoch": 1.0569390135144405, "grad_norm": 1.203125, "learning_rate": 1.455713092046124e-05, "loss": 0.445, "step": 7704 }, { "epoch": 1.057076215956644, "grad_norm": 1.328125, "learning_rate": 1.4555845991733146e-05, "loss": 0.483, "step": 7705 }, { "epoch": 1.0572134183988475, "grad_norm": 1.1796875, "learning_rate": 1.4554560968077706e-05, "loss": 0.368, "step": 7706 }, { "epoch": 1.057350620841051, "grad_norm": 1.3125, "learning_rate": 1.4553275849521698e-05, "loss": 0.5251, "step": 7707 }, { "epoch": 1.0574878232832545, "grad_norm": 1.265625, "learning_rate": 1.455199063609189e-05, "loss": 0.4391, "step": 7708 }, { "epoch": 1.057625025725458, "grad_norm": 1.3203125, "learning_rate": 1.4550705327815069e-05, "loss": 0.5247, "step": 7709 }, { "epoch": 1.0577622281676613, "grad_norm": 1.140625, "learning_rate": 1.4549419924718012e-05, "loss": 0.4269, "step": 7710 }, { "epoch": 1.0578994306098648, "grad_norm": 1.1875, "learning_rate": 1.4548134426827505e-05, "loss": 0.4255, "step": 7711 }, { "epoch": 1.0580366330520683, "grad_norm": 1.1484375, "learning_rate": 1.4546848834170333e-05, "loss": 0.4349, "step": 7712 }, { "epoch": 1.0581738354942718, "grad_norm": 1.359375, "learning_rate": 1.454556314677328e-05, "loss": 0.5341, "step": 7713 }, { "epoch": 1.0583110379364753, "grad_norm": 1.1796875, "learning_rate": 1.4544277364663138e-05, "loss": 0.402, "step": 7714 }, { "epoch": 1.0584482403786788, "grad_norm": 1.1875, "learning_rate": 1.4542991487866695e-05, "loss": 0.4166, "step": 7715 }, { "epoch": 1.0585854428208823, "grad_norm": 1.203125, "learning_rate": 1.4541705516410747e-05, "loss": 0.4528, "step": 7716 }, { "epoch": 1.0587226452630856, "grad_norm": 1.1171875, "learning_rate": 1.4540419450322089e-05, "loss": 0.384, "step": 7717 }, { "epoch": 1.058859847705289, "grad_norm": 1.2265625, "learning_rate": 1.4539133289627519e-05, "loss": 0.4265, "step": 7718 }, { "epoch": 1.0589970501474926, "grad_norm": 1.2421875, "learning_rate": 1.4537847034353832e-05, "loss": 0.4418, "step": 7719 }, { "epoch": 1.059134252589696, "grad_norm": 1.0859375, "learning_rate": 1.453656068452783e-05, "loss": 0.3799, "step": 7720 }, { "epoch": 1.0592714550318996, "grad_norm": 1.2421875, "learning_rate": 1.4535274240176316e-05, "loss": 0.4304, "step": 7721 }, { "epoch": 1.059408657474103, "grad_norm": 1.21875, "learning_rate": 1.4533987701326097e-05, "loss": 0.4644, "step": 7722 }, { "epoch": 1.0595458599163066, "grad_norm": 1.1484375, "learning_rate": 1.4532701068003978e-05, "loss": 0.4026, "step": 7723 }, { "epoch": 1.05968306235851, "grad_norm": 1.25, "learning_rate": 1.453141434023677e-05, "loss": 0.4523, "step": 7724 }, { "epoch": 1.0598202648007136, "grad_norm": 1.265625, "learning_rate": 1.4530127518051279e-05, "loss": 0.4557, "step": 7725 }, { "epoch": 1.0599574672429168, "grad_norm": 1.140625, "learning_rate": 1.4528840601474323e-05, "loss": 0.4526, "step": 7726 }, { "epoch": 1.0600946696851203, "grad_norm": 1.2265625, "learning_rate": 1.4527553590532715e-05, "loss": 0.4542, "step": 7727 }, { "epoch": 1.0602318721273238, "grad_norm": 1.1953125, "learning_rate": 1.4526266485253268e-05, "loss": 0.433, "step": 7728 }, { "epoch": 1.0603690745695273, "grad_norm": 1.21875, "learning_rate": 1.4524979285662805e-05, "loss": 0.473, "step": 7729 }, { "epoch": 1.0605062770117308, "grad_norm": 1.34375, "learning_rate": 1.4523691991788147e-05, "loss": 0.5051, "step": 7730 }, { "epoch": 1.0606434794539343, "grad_norm": 1.1640625, "learning_rate": 1.4522404603656113e-05, "loss": 0.3959, "step": 7731 }, { "epoch": 1.0607806818961378, "grad_norm": 1.25, "learning_rate": 1.452111712129353e-05, "loss": 0.4914, "step": 7732 }, { "epoch": 1.060917884338341, "grad_norm": 1.2578125, "learning_rate": 1.4519829544727223e-05, "loss": 0.4354, "step": 7733 }, { "epoch": 1.0610550867805446, "grad_norm": 1.2734375, "learning_rate": 1.451854187398402e-05, "loss": 0.4612, "step": 7734 }, { "epoch": 1.061192289222748, "grad_norm": 1.09375, "learning_rate": 1.4517254109090756e-05, "loss": 0.3701, "step": 7735 }, { "epoch": 1.0613294916649516, "grad_norm": 1.2578125, "learning_rate": 1.4515966250074258e-05, "loss": 0.4596, "step": 7736 }, { "epoch": 1.061466694107155, "grad_norm": 1.234375, "learning_rate": 1.4514678296961362e-05, "loss": 0.4373, "step": 7737 }, { "epoch": 1.0616038965493586, "grad_norm": 1.3125, "learning_rate": 1.4513390249778905e-05, "loss": 0.4698, "step": 7738 }, { "epoch": 1.061741098991562, "grad_norm": 1.1796875, "learning_rate": 1.4512102108553723e-05, "loss": 0.4233, "step": 7739 }, { "epoch": 1.0618783014337656, "grad_norm": 1.28125, "learning_rate": 1.4510813873312658e-05, "loss": 0.4781, "step": 7740 }, { "epoch": 1.062015503875969, "grad_norm": 1.6171875, "learning_rate": 1.4509525544082554e-05, "loss": 0.5724, "step": 7741 }, { "epoch": 1.0621527063181724, "grad_norm": 1.25, "learning_rate": 1.4508237120890254e-05, "loss": 0.4641, "step": 7742 }, { "epoch": 1.0622899087603759, "grad_norm": 1.1953125, "learning_rate": 1.4506948603762603e-05, "loss": 0.4587, "step": 7743 }, { "epoch": 1.0624271112025794, "grad_norm": 1.21875, "learning_rate": 1.4505659992726451e-05, "loss": 0.4332, "step": 7744 }, { "epoch": 1.0625643136447829, "grad_norm": 1.2109375, "learning_rate": 1.4504371287808643e-05, "loss": 0.4246, "step": 7745 }, { "epoch": 1.0627015160869864, "grad_norm": 1.140625, "learning_rate": 1.4503082489036035e-05, "loss": 0.4152, "step": 7746 }, { "epoch": 1.0628387185291899, "grad_norm": 1.2734375, "learning_rate": 1.450179359643548e-05, "loss": 0.4192, "step": 7747 }, { "epoch": 1.0629759209713934, "grad_norm": 1.25, "learning_rate": 1.4500504610033837e-05, "loss": 0.4467, "step": 7748 }, { "epoch": 1.0631131234135967, "grad_norm": 1.3671875, "learning_rate": 1.4499215529857958e-05, "loss": 0.5089, "step": 7749 }, { "epoch": 1.0632503258558001, "grad_norm": 1.15625, "learning_rate": 1.4497926355934706e-05, "loss": 0.4152, "step": 7750 }, { "epoch": 1.0633875282980036, "grad_norm": 1.1875, "learning_rate": 1.4496637088290944e-05, "loss": 0.365, "step": 7751 }, { "epoch": 1.0635247307402071, "grad_norm": 1.265625, "learning_rate": 1.4495347726953532e-05, "loss": 0.4963, "step": 7752 }, { "epoch": 1.0636619331824106, "grad_norm": 1.1875, "learning_rate": 1.449405827194934e-05, "loss": 0.4432, "step": 7753 }, { "epoch": 1.0637991356246141, "grad_norm": 1.3359375, "learning_rate": 1.449276872330523e-05, "loss": 0.473, "step": 7754 }, { "epoch": 1.0639363380668176, "grad_norm": 1.1484375, "learning_rate": 1.449147908104808e-05, "loss": 0.432, "step": 7755 }, { "epoch": 1.0640735405090211, "grad_norm": 1.265625, "learning_rate": 1.4490189345204751e-05, "loss": 0.5056, "step": 7756 }, { "epoch": 1.0642107429512246, "grad_norm": 1.3125, "learning_rate": 1.4488899515802124e-05, "loss": 0.4994, "step": 7757 }, { "epoch": 1.064347945393428, "grad_norm": 1.265625, "learning_rate": 1.448760959286707e-05, "loss": 0.4712, "step": 7758 }, { "epoch": 1.0644851478356314, "grad_norm": 1.3125, "learning_rate": 1.448631957642647e-05, "loss": 0.4646, "step": 7759 }, { "epoch": 1.064622350277835, "grad_norm": 1.203125, "learning_rate": 1.4485029466507201e-05, "loss": 0.4107, "step": 7760 }, { "epoch": 1.0647595527200384, "grad_norm": 1.2578125, "learning_rate": 1.448373926313615e-05, "loss": 0.4549, "step": 7761 }, { "epoch": 1.064896755162242, "grad_norm": 1.296875, "learning_rate": 1.4482448966340191e-05, "loss": 0.4681, "step": 7762 }, { "epoch": 1.0650339576044454, "grad_norm": 1.2265625, "learning_rate": 1.448115857614621e-05, "loss": 0.4259, "step": 7763 }, { "epoch": 1.065171160046649, "grad_norm": 1.25, "learning_rate": 1.4479868092581102e-05, "loss": 0.4537, "step": 7764 }, { "epoch": 1.0653083624888522, "grad_norm": 1.21875, "learning_rate": 1.4478577515671748e-05, "loss": 0.4581, "step": 7765 }, { "epoch": 1.0654455649310557, "grad_norm": 1.2421875, "learning_rate": 1.4477286845445045e-05, "loss": 0.4052, "step": 7766 }, { "epoch": 1.0655827673732592, "grad_norm": 1.2265625, "learning_rate": 1.4475996081927882e-05, "loss": 0.4863, "step": 7767 }, { "epoch": 1.0657199698154627, "grad_norm": 1.2109375, "learning_rate": 1.4474705225147152e-05, "loss": 0.4693, "step": 7768 }, { "epoch": 1.0658571722576662, "grad_norm": 1.1640625, "learning_rate": 1.4473414275129758e-05, "loss": 0.4344, "step": 7769 }, { "epoch": 1.0659943746998697, "grad_norm": 1.2421875, "learning_rate": 1.4472123231902597e-05, "loss": 0.4232, "step": 7770 }, { "epoch": 1.0661315771420732, "grad_norm": 1.171875, "learning_rate": 1.4470832095492566e-05, "loss": 0.4475, "step": 7771 }, { "epoch": 1.0662687795842767, "grad_norm": 1.2109375, "learning_rate": 1.4469540865926573e-05, "loss": 0.4388, "step": 7772 }, { "epoch": 1.0664059820264802, "grad_norm": 1.2109375, "learning_rate": 1.4468249543231517e-05, "loss": 0.4406, "step": 7773 }, { "epoch": 1.0665431844686835, "grad_norm": 1.2734375, "learning_rate": 1.4466958127434305e-05, "loss": 0.4727, "step": 7774 }, { "epoch": 1.066680386910887, "grad_norm": 1.171875, "learning_rate": 1.4465666618561849e-05, "loss": 0.399, "step": 7775 }, { "epoch": 1.0668175893530905, "grad_norm": 1.1015625, "learning_rate": 1.4464375016641055e-05, "loss": 0.4243, "step": 7776 }, { "epoch": 1.066954791795294, "grad_norm": 1.2265625, "learning_rate": 1.4463083321698838e-05, "loss": 0.4983, "step": 7777 }, { "epoch": 1.0670919942374975, "grad_norm": 1.1796875, "learning_rate": 1.4461791533762114e-05, "loss": 0.3752, "step": 7778 }, { "epoch": 1.067229196679701, "grad_norm": 1.140625, "learning_rate": 1.4460499652857799e-05, "loss": 0.4006, "step": 7779 }, { "epoch": 1.0673663991219045, "grad_norm": 1.09375, "learning_rate": 1.445920767901281e-05, "loss": 0.3733, "step": 7780 }, { "epoch": 1.0675036015641077, "grad_norm": 1.578125, "learning_rate": 1.4457915612254063e-05, "loss": 0.5504, "step": 7781 }, { "epoch": 1.0676408040063112, "grad_norm": 1.140625, "learning_rate": 1.4456623452608485e-05, "loss": 0.3913, "step": 7782 }, { "epoch": 1.0677780064485147, "grad_norm": 1.1328125, "learning_rate": 1.4455331200102998e-05, "loss": 0.3935, "step": 7783 }, { "epoch": 1.0679152088907182, "grad_norm": 1.2265625, "learning_rate": 1.445403885476453e-05, "loss": 0.4376, "step": 7784 }, { "epoch": 1.0680524113329217, "grad_norm": 1.2734375, "learning_rate": 1.4452746416620005e-05, "loss": 0.4914, "step": 7785 }, { "epoch": 1.0681896137751252, "grad_norm": 1.2421875, "learning_rate": 1.4451453885696356e-05, "loss": 0.4624, "step": 7786 }, { "epoch": 1.0683268162173287, "grad_norm": 1.171875, "learning_rate": 1.4450161262020512e-05, "loss": 0.4318, "step": 7787 }, { "epoch": 1.0684640186595322, "grad_norm": 1.3984375, "learning_rate": 1.444886854561941e-05, "loss": 0.4635, "step": 7788 }, { "epoch": 1.0686012211017357, "grad_norm": 1.2890625, "learning_rate": 1.4447575736519985e-05, "loss": 0.4253, "step": 7789 }, { "epoch": 1.068738423543939, "grad_norm": 1.3515625, "learning_rate": 1.444628283474917e-05, "loss": 0.4612, "step": 7790 }, { "epoch": 1.0688756259861425, "grad_norm": 1.28125, "learning_rate": 1.4444989840333909e-05, "loss": 0.4471, "step": 7791 }, { "epoch": 1.069012828428346, "grad_norm": 1.5703125, "learning_rate": 1.4443696753301141e-05, "loss": 0.5135, "step": 7792 }, { "epoch": 1.0691500308705495, "grad_norm": 1.2734375, "learning_rate": 1.444240357367781e-05, "loss": 0.5015, "step": 7793 }, { "epoch": 1.069287233312753, "grad_norm": 1.2265625, "learning_rate": 1.4441110301490865e-05, "loss": 0.4653, "step": 7794 }, { "epoch": 1.0694244357549565, "grad_norm": 1.0703125, "learning_rate": 1.4439816936767244e-05, "loss": 0.3774, "step": 7795 }, { "epoch": 1.06956163819716, "grad_norm": 1.2265625, "learning_rate": 1.4438523479533904e-05, "loss": 0.4215, "step": 7796 }, { "epoch": 1.0696988406393633, "grad_norm": 1.265625, "learning_rate": 1.4437229929817796e-05, "loss": 0.3964, "step": 7797 }, { "epoch": 1.0698360430815668, "grad_norm": 1.21875, "learning_rate": 1.4435936287645867e-05, "loss": 0.437, "step": 7798 }, { "epoch": 1.0699732455237703, "grad_norm": 1.296875, "learning_rate": 1.4434642553045074e-05, "loss": 0.4645, "step": 7799 }, { "epoch": 1.0701104479659738, "grad_norm": 1.421875, "learning_rate": 1.4433348726042377e-05, "loss": 0.3964, "step": 7800 }, { "epoch": 1.0702476504081773, "grad_norm": 1.2109375, "learning_rate": 1.4432054806664732e-05, "loss": 0.3997, "step": 7801 }, { "epoch": 1.0703848528503808, "grad_norm": 1.09375, "learning_rate": 1.4430760794939101e-05, "loss": 0.3865, "step": 7802 }, { "epoch": 1.0705220552925843, "grad_norm": 1.2578125, "learning_rate": 1.4429466690892444e-05, "loss": 0.5219, "step": 7803 }, { "epoch": 1.0706592577347878, "grad_norm": 1.125, "learning_rate": 1.4428172494551728e-05, "loss": 0.4145, "step": 7804 }, { "epoch": 1.0707964601769913, "grad_norm": 1.1484375, "learning_rate": 1.4426878205943919e-05, "loss": 0.4182, "step": 7805 }, { "epoch": 1.0709336626191945, "grad_norm": 1.3046875, "learning_rate": 1.4425583825095986e-05, "loss": 0.4675, "step": 7806 }, { "epoch": 1.071070865061398, "grad_norm": 1.1953125, "learning_rate": 1.4424289352034897e-05, "loss": 0.4124, "step": 7807 }, { "epoch": 1.0712080675036015, "grad_norm": 1.3125, "learning_rate": 1.4422994786787623e-05, "loss": 0.4589, "step": 7808 }, { "epoch": 1.071345269945805, "grad_norm": 1.28125, "learning_rate": 1.4421700129381141e-05, "loss": 0.4955, "step": 7809 }, { "epoch": 1.0714824723880085, "grad_norm": 1.2265625, "learning_rate": 1.4420405379842429e-05, "loss": 0.4418, "step": 7810 }, { "epoch": 1.071619674830212, "grad_norm": 1.1953125, "learning_rate": 1.4419110538198458e-05, "loss": 0.445, "step": 7811 }, { "epoch": 1.0717568772724155, "grad_norm": 1.25, "learning_rate": 1.4417815604476215e-05, "loss": 0.4419, "step": 7812 }, { "epoch": 1.0718940797146188, "grad_norm": 1.2734375, "learning_rate": 1.4416520578702677e-05, "loss": 0.473, "step": 7813 }, { "epoch": 1.0720312821568223, "grad_norm": 1.2578125, "learning_rate": 1.4415225460904829e-05, "loss": 0.469, "step": 7814 }, { "epoch": 1.0721684845990258, "grad_norm": 1.328125, "learning_rate": 1.441393025110966e-05, "loss": 0.5077, "step": 7815 }, { "epoch": 1.0723056870412293, "grad_norm": 1.359375, "learning_rate": 1.441263494934415e-05, "loss": 0.4549, "step": 7816 }, { "epoch": 1.0724428894834328, "grad_norm": 1.25, "learning_rate": 1.4411339555635296e-05, "loss": 0.4662, "step": 7817 }, { "epoch": 1.0725800919256363, "grad_norm": 1.1484375, "learning_rate": 1.4410044070010085e-05, "loss": 0.3898, "step": 7818 }, { "epoch": 1.0727172943678398, "grad_norm": 1.1640625, "learning_rate": 1.440874849249551e-05, "loss": 0.4118, "step": 7819 }, { "epoch": 1.0728544968100433, "grad_norm": 1.2109375, "learning_rate": 1.4407452823118569e-05, "loss": 0.4372, "step": 7820 }, { "epoch": 1.0729916992522468, "grad_norm": 1.2890625, "learning_rate": 1.4406157061906257e-05, "loss": 0.4545, "step": 7821 }, { "epoch": 1.07312890169445, "grad_norm": 1.3828125, "learning_rate": 1.4404861208885572e-05, "loss": 0.5073, "step": 7822 }, { "epoch": 1.0732661041366536, "grad_norm": 1.1796875, "learning_rate": 1.4403565264083519e-05, "loss": 0.4493, "step": 7823 }, { "epoch": 1.073403306578857, "grad_norm": 1.234375, "learning_rate": 1.4402269227527096e-05, "loss": 0.4562, "step": 7824 }, { "epoch": 1.0735405090210606, "grad_norm": 1.359375, "learning_rate": 1.440097309924331e-05, "loss": 0.472, "step": 7825 }, { "epoch": 1.073677711463264, "grad_norm": 1.2578125, "learning_rate": 1.4399676879259171e-05, "loss": 0.4923, "step": 7826 }, { "epoch": 1.0738149139054676, "grad_norm": 1.21875, "learning_rate": 1.439838056760168e-05, "loss": 0.437, "step": 7827 }, { "epoch": 1.073952116347671, "grad_norm": 1.3125, "learning_rate": 1.4397084164297854e-05, "loss": 0.5026, "step": 7828 }, { "epoch": 1.0740893187898743, "grad_norm": 1.1796875, "learning_rate": 1.4395787669374701e-05, "loss": 0.4509, "step": 7829 }, { "epoch": 1.0742265212320778, "grad_norm": 1.2421875, "learning_rate": 1.4394491082859236e-05, "loss": 0.4779, "step": 7830 }, { "epoch": 1.0743637236742813, "grad_norm": 1.1796875, "learning_rate": 1.4393194404778478e-05, "loss": 0.4011, "step": 7831 }, { "epoch": 1.0745009261164848, "grad_norm": 1.2421875, "learning_rate": 1.4391897635159445e-05, "loss": 0.4395, "step": 7832 }, { "epoch": 1.0746381285586883, "grad_norm": 1.25, "learning_rate": 1.4390600774029154e-05, "loss": 0.4868, "step": 7833 }, { "epoch": 1.0747753310008918, "grad_norm": 1.359375, "learning_rate": 1.4389303821414627e-05, "loss": 0.5077, "step": 7834 }, { "epoch": 1.0749125334430953, "grad_norm": 1.2265625, "learning_rate": 1.438800677734289e-05, "loss": 0.4525, "step": 7835 }, { "epoch": 1.0750497358852988, "grad_norm": 1.203125, "learning_rate": 1.4386709641840968e-05, "loss": 0.4869, "step": 7836 }, { "epoch": 1.0751869383275023, "grad_norm": 1.15625, "learning_rate": 1.4385412414935887e-05, "loss": 0.4284, "step": 7837 }, { "epoch": 1.0753241407697056, "grad_norm": 1.171875, "learning_rate": 1.4384115096654678e-05, "loss": 0.386, "step": 7838 }, { "epoch": 1.0754613432119091, "grad_norm": 1.203125, "learning_rate": 1.4382817687024373e-05, "loss": 0.4295, "step": 7839 }, { "epoch": 1.0755985456541126, "grad_norm": 1.1484375, "learning_rate": 1.4381520186072004e-05, "loss": 0.3612, "step": 7840 }, { "epoch": 1.0757357480963161, "grad_norm": 1.1640625, "learning_rate": 1.4380222593824606e-05, "loss": 0.4204, "step": 7841 }, { "epoch": 1.0758729505385196, "grad_norm": 1.2578125, "learning_rate": 1.437892491030922e-05, "loss": 0.4226, "step": 7842 }, { "epoch": 1.0760101529807231, "grad_norm": 1.140625, "learning_rate": 1.4377627135552881e-05, "loss": 0.371, "step": 7843 }, { "epoch": 1.0761473554229266, "grad_norm": 1.2734375, "learning_rate": 1.437632926958263e-05, "loss": 0.4697, "step": 7844 }, { "epoch": 1.0762845578651299, "grad_norm": 1.203125, "learning_rate": 1.4375031312425511e-05, "loss": 0.4078, "step": 7845 }, { "epoch": 1.0764217603073334, "grad_norm": 1.2734375, "learning_rate": 1.4373733264108567e-05, "loss": 0.431, "step": 7846 }, { "epoch": 1.0765589627495369, "grad_norm": 1.1875, "learning_rate": 1.4372435124658847e-05, "loss": 0.3891, "step": 7847 }, { "epoch": 1.0766961651917404, "grad_norm": 1.2578125, "learning_rate": 1.4371136894103399e-05, "loss": 0.4898, "step": 7848 }, { "epoch": 1.0768333676339439, "grad_norm": 1.25, "learning_rate": 1.4369838572469274e-05, "loss": 0.3868, "step": 7849 }, { "epoch": 1.0769705700761474, "grad_norm": 1.1484375, "learning_rate": 1.4368540159783522e-05, "loss": 0.3917, "step": 7850 }, { "epoch": 1.0771077725183509, "grad_norm": 1.2109375, "learning_rate": 1.43672416560732e-05, "loss": 0.4573, "step": 7851 }, { "epoch": 1.0772449749605544, "grad_norm": 1.3515625, "learning_rate": 1.4365943061365362e-05, "loss": 0.4495, "step": 7852 }, { "epoch": 1.0773821774027579, "grad_norm": 1.3515625, "learning_rate": 1.4364644375687068e-05, "loss": 0.5079, "step": 7853 }, { "epoch": 1.0775193798449612, "grad_norm": 1.2578125, "learning_rate": 1.4363345599065373e-05, "loss": 0.476, "step": 7854 }, { "epoch": 1.0776565822871647, "grad_norm": 1.203125, "learning_rate": 1.4362046731527347e-05, "loss": 0.41, "step": 7855 }, { "epoch": 1.0777937847293682, "grad_norm": 1.1875, "learning_rate": 1.4360747773100046e-05, "loss": 0.3829, "step": 7856 }, { "epoch": 1.0779309871715717, "grad_norm": 1.2265625, "learning_rate": 1.4359448723810538e-05, "loss": 0.4742, "step": 7857 }, { "epoch": 1.0780681896137752, "grad_norm": 1.21875, "learning_rate": 1.4358149583685892e-05, "loss": 0.3848, "step": 7858 }, { "epoch": 1.0782053920559787, "grad_norm": 1.1640625, "learning_rate": 1.4356850352753177e-05, "loss": 0.4209, "step": 7859 }, { "epoch": 1.0783425944981821, "grad_norm": 1.234375, "learning_rate": 1.4355551031039468e-05, "loss": 0.4273, "step": 7860 }, { "epoch": 1.0784797969403854, "grad_norm": 1.328125, "learning_rate": 1.4354251618571829e-05, "loss": 0.5479, "step": 7861 }, { "epoch": 1.078616999382589, "grad_norm": 1.296875, "learning_rate": 1.4352952115377341e-05, "loss": 0.4795, "step": 7862 }, { "epoch": 1.0787542018247924, "grad_norm": 1.234375, "learning_rate": 1.435165252148308e-05, "loss": 0.4448, "step": 7863 }, { "epoch": 1.078891404266996, "grad_norm": 1.296875, "learning_rate": 1.4350352836916126e-05, "loss": 0.4492, "step": 7864 }, { "epoch": 1.0790286067091994, "grad_norm": 1.296875, "learning_rate": 1.4349053061703558e-05, "loss": 0.4837, "step": 7865 }, { "epoch": 1.079165809151403, "grad_norm": 1.2109375, "learning_rate": 1.4347753195872457e-05, "loss": 0.4917, "step": 7866 }, { "epoch": 1.0793030115936064, "grad_norm": 1.25, "learning_rate": 1.4346453239449911e-05, "loss": 0.5009, "step": 7867 }, { "epoch": 1.07944021403581, "grad_norm": 1.125, "learning_rate": 1.4345153192463007e-05, "loss": 0.4036, "step": 7868 }, { "epoch": 1.0795774164780134, "grad_norm": 1.2109375, "learning_rate": 1.4343853054938833e-05, "loss": 0.4755, "step": 7869 }, { "epoch": 1.0797146189202167, "grad_norm": 1.3046875, "learning_rate": 1.4342552826904474e-05, "loss": 0.4766, "step": 7870 }, { "epoch": 1.0798518213624202, "grad_norm": 1.4296875, "learning_rate": 1.4341252508387027e-05, "loss": 0.4599, "step": 7871 }, { "epoch": 1.0799890238046237, "grad_norm": 1.171875, "learning_rate": 1.4339952099413584e-05, "loss": 0.4134, "step": 7872 }, { "epoch": 1.0801262262468272, "grad_norm": 1.1640625, "learning_rate": 1.433865160001124e-05, "loss": 0.3602, "step": 7873 }, { "epoch": 1.0802634286890307, "grad_norm": 1.3359375, "learning_rate": 1.4337351010207095e-05, "loss": 0.4888, "step": 7874 }, { "epoch": 1.0804006311312342, "grad_norm": 1.0390625, "learning_rate": 1.4336050330028248e-05, "loss": 0.3412, "step": 7875 }, { "epoch": 1.0805378335734377, "grad_norm": 1.234375, "learning_rate": 1.43347495595018e-05, "loss": 0.4958, "step": 7876 }, { "epoch": 1.080675036015641, "grad_norm": 1.2734375, "learning_rate": 1.4333448698654856e-05, "loss": 0.4847, "step": 7877 }, { "epoch": 1.0808122384578445, "grad_norm": 1.2109375, "learning_rate": 1.4332147747514518e-05, "loss": 0.445, "step": 7878 }, { "epoch": 1.080949440900048, "grad_norm": 1.1484375, "learning_rate": 1.4330846706107894e-05, "loss": 0.3769, "step": 7879 }, { "epoch": 1.0810866433422515, "grad_norm": 1.1953125, "learning_rate": 1.4329545574462096e-05, "loss": 0.4335, "step": 7880 }, { "epoch": 1.081223845784455, "grad_norm": 1.1796875, "learning_rate": 1.432824435260423e-05, "loss": 0.4442, "step": 7881 }, { "epoch": 1.0813610482266585, "grad_norm": 1.109375, "learning_rate": 1.4326943040561414e-05, "loss": 0.4076, "step": 7882 }, { "epoch": 1.081498250668862, "grad_norm": 1.25, "learning_rate": 1.4325641638360759e-05, "loss": 0.4442, "step": 7883 }, { "epoch": 1.0816354531110655, "grad_norm": 1.1484375, "learning_rate": 1.4324340146029384e-05, "loss": 0.3841, "step": 7884 }, { "epoch": 1.081772655553269, "grad_norm": 1.296875, "learning_rate": 1.4323038563594402e-05, "loss": 0.4471, "step": 7885 }, { "epoch": 1.0819098579954722, "grad_norm": 1.25, "learning_rate": 1.4321736891082942e-05, "loss": 0.4859, "step": 7886 }, { "epoch": 1.0820470604376757, "grad_norm": 1.25, "learning_rate": 1.4320435128522118e-05, "loss": 0.4105, "step": 7887 }, { "epoch": 1.0821842628798792, "grad_norm": 1.1640625, "learning_rate": 1.4319133275939062e-05, "loss": 0.4378, "step": 7888 }, { "epoch": 1.0823214653220827, "grad_norm": 1.3046875, "learning_rate": 1.4317831333360892e-05, "loss": 0.4927, "step": 7889 }, { "epoch": 1.0824586677642862, "grad_norm": 1.1796875, "learning_rate": 1.4316529300814739e-05, "loss": 0.4435, "step": 7890 }, { "epoch": 1.0825958702064897, "grad_norm": 1.109375, "learning_rate": 1.431522717832773e-05, "loss": 0.375, "step": 7891 }, { "epoch": 1.0827330726486932, "grad_norm": 1.21875, "learning_rate": 1.4313924965927004e-05, "loss": 0.4461, "step": 7892 }, { "epoch": 1.0828702750908965, "grad_norm": 1.203125, "learning_rate": 1.4312622663639687e-05, "loss": 0.428, "step": 7893 }, { "epoch": 1.0830074775331, "grad_norm": 1.2890625, "learning_rate": 1.4311320271492917e-05, "loss": 0.4751, "step": 7894 }, { "epoch": 1.0831446799753035, "grad_norm": 1.234375, "learning_rate": 1.4310017789513833e-05, "loss": 0.442, "step": 7895 }, { "epoch": 1.083281882417507, "grad_norm": 1.2421875, "learning_rate": 1.4308715217729572e-05, "loss": 0.4485, "step": 7896 }, { "epoch": 1.0834190848597105, "grad_norm": 1.171875, "learning_rate": 1.4307412556167274e-05, "loss": 0.4314, "step": 7897 }, { "epoch": 1.083556287301914, "grad_norm": 1.1796875, "learning_rate": 1.430610980485408e-05, "loss": 0.4455, "step": 7898 }, { "epoch": 1.0836934897441175, "grad_norm": 1.234375, "learning_rate": 1.430480696381714e-05, "loss": 0.4856, "step": 7899 }, { "epoch": 1.083830692186321, "grad_norm": 1.296875, "learning_rate": 1.4303504033083598e-05, "loss": 0.4866, "step": 7900 }, { "epoch": 1.0839678946285245, "grad_norm": 1.15625, "learning_rate": 1.4302201012680601e-05, "loss": 0.4018, "step": 7901 }, { "epoch": 1.0841050970707278, "grad_norm": 1.1875, "learning_rate": 1.4300897902635301e-05, "loss": 0.4566, "step": 7902 }, { "epoch": 1.0842422995129313, "grad_norm": 1.1953125, "learning_rate": 1.4299594702974848e-05, "loss": 0.4912, "step": 7903 }, { "epoch": 1.0843795019551348, "grad_norm": 1.2109375, "learning_rate": 1.42982914137264e-05, "loss": 0.4148, "step": 7904 }, { "epoch": 1.0845167043973383, "grad_norm": 1.28125, "learning_rate": 1.429698803491711e-05, "loss": 0.45, "step": 7905 }, { "epoch": 1.0846539068395418, "grad_norm": 1.2265625, "learning_rate": 1.4295684566574135e-05, "loss": 0.4369, "step": 7906 }, { "epoch": 1.0847911092817453, "grad_norm": 1.3125, "learning_rate": 1.4294381008724635e-05, "loss": 0.5686, "step": 7907 }, { "epoch": 1.0849283117239488, "grad_norm": 1.0703125, "learning_rate": 1.4293077361395774e-05, "loss": 0.3537, "step": 7908 }, { "epoch": 1.085065514166152, "grad_norm": 1.1875, "learning_rate": 1.4291773624614713e-05, "loss": 0.4586, "step": 7909 }, { "epoch": 1.0852027166083555, "grad_norm": 1.1640625, "learning_rate": 1.4290469798408614e-05, "loss": 0.4252, "step": 7910 }, { "epoch": 1.085339919050559, "grad_norm": 1.1640625, "learning_rate": 1.4289165882804649e-05, "loss": 0.4219, "step": 7911 }, { "epoch": 1.0854771214927625, "grad_norm": 1.2890625, "learning_rate": 1.4287861877829987e-05, "loss": 0.473, "step": 7912 }, { "epoch": 1.085614323934966, "grad_norm": 1.1953125, "learning_rate": 1.4286557783511796e-05, "loss": 0.4356, "step": 7913 }, { "epoch": 1.0857515263771695, "grad_norm": 1.28125, "learning_rate": 1.4285253599877253e-05, "loss": 0.4743, "step": 7914 }, { "epoch": 1.085888728819373, "grad_norm": 1.2734375, "learning_rate": 1.4283949326953526e-05, "loss": 0.4909, "step": 7915 }, { "epoch": 1.0860259312615765, "grad_norm": 1.21875, "learning_rate": 1.4282644964767796e-05, "loss": 0.4156, "step": 7916 }, { "epoch": 1.08616313370378, "grad_norm": 1.28125, "learning_rate": 1.4281340513347238e-05, "loss": 0.5196, "step": 7917 }, { "epoch": 1.0863003361459833, "grad_norm": 1.2578125, "learning_rate": 1.4280035972719035e-05, "loss": 0.4554, "step": 7918 }, { "epoch": 1.0864375385881868, "grad_norm": 1.3359375, "learning_rate": 1.4278731342910366e-05, "loss": 0.5248, "step": 7919 }, { "epoch": 1.0865747410303903, "grad_norm": 1.2265625, "learning_rate": 1.4277426623948416e-05, "loss": 0.4699, "step": 7920 }, { "epoch": 1.0867119434725938, "grad_norm": 1.21875, "learning_rate": 1.4276121815860372e-05, "loss": 0.4485, "step": 7921 }, { "epoch": 1.0868491459147973, "grad_norm": 1.28125, "learning_rate": 1.4274816918673425e-05, "loss": 0.4493, "step": 7922 }, { "epoch": 1.0869863483570008, "grad_norm": 1.1484375, "learning_rate": 1.4273511932414754e-05, "loss": 0.4062, "step": 7923 }, { "epoch": 1.0871235507992043, "grad_norm": 1.1640625, "learning_rate": 1.4272206857111559e-05, "loss": 0.4312, "step": 7924 }, { "epoch": 1.0872607532414076, "grad_norm": 1.09375, "learning_rate": 1.4270901692791028e-05, "loss": 0.3621, "step": 7925 }, { "epoch": 1.087397955683611, "grad_norm": 1.1875, "learning_rate": 1.426959643948036e-05, "loss": 0.4497, "step": 7926 }, { "epoch": 1.0875351581258146, "grad_norm": 1.2265625, "learning_rate": 1.4268291097206748e-05, "loss": 0.4441, "step": 7927 }, { "epoch": 1.087672360568018, "grad_norm": 1.2265625, "learning_rate": 1.4266985665997393e-05, "loss": 0.4521, "step": 7928 }, { "epoch": 1.0878095630102216, "grad_norm": 1.28125, "learning_rate": 1.4265680145879495e-05, "loss": 0.4845, "step": 7929 }, { "epoch": 1.087946765452425, "grad_norm": 1.2109375, "learning_rate": 1.4264374536880254e-05, "loss": 0.4395, "step": 7930 }, { "epoch": 1.0880839678946286, "grad_norm": 1.2578125, "learning_rate": 1.4263068839026881e-05, "loss": 0.5044, "step": 7931 }, { "epoch": 1.088221170336832, "grad_norm": 1.140625, "learning_rate": 1.4261763052346575e-05, "loss": 0.4033, "step": 7932 }, { "epoch": 1.0883583727790356, "grad_norm": 1.1015625, "learning_rate": 1.4260457176866545e-05, "loss": 0.3811, "step": 7933 }, { "epoch": 1.0884955752212389, "grad_norm": 1.1015625, "learning_rate": 1.4259151212614003e-05, "loss": 0.3693, "step": 7934 }, { "epoch": 1.0886327776634424, "grad_norm": 1.171875, "learning_rate": 1.425784515961616e-05, "loss": 0.377, "step": 7935 }, { "epoch": 1.0887699801056459, "grad_norm": 1.484375, "learning_rate": 1.4256539017900227e-05, "loss": 0.4945, "step": 7936 }, { "epoch": 1.0889071825478494, "grad_norm": 1.4296875, "learning_rate": 1.425523278749342e-05, "loss": 0.4986, "step": 7937 }, { "epoch": 1.0890443849900528, "grad_norm": 1.1640625, "learning_rate": 1.425392646842296e-05, "loss": 0.3671, "step": 7938 }, { "epoch": 1.0891815874322563, "grad_norm": 1.25, "learning_rate": 1.4252620060716065e-05, "loss": 0.4631, "step": 7939 }, { "epoch": 1.0893187898744598, "grad_norm": 1.1015625, "learning_rate": 1.4251313564399953e-05, "loss": 0.3531, "step": 7940 }, { "epoch": 1.0894559923166631, "grad_norm": 1.1640625, "learning_rate": 1.4250006979501847e-05, "loss": 0.4258, "step": 7941 }, { "epoch": 1.0895931947588666, "grad_norm": 1.1953125, "learning_rate": 1.424870030604897e-05, "loss": 0.4381, "step": 7942 }, { "epoch": 1.0897303972010701, "grad_norm": 1.296875, "learning_rate": 1.4247393544068553e-05, "loss": 0.4219, "step": 7943 }, { "epoch": 1.0898675996432736, "grad_norm": 1.234375, "learning_rate": 1.4246086693587824e-05, "loss": 0.408, "step": 7944 }, { "epoch": 1.0900048020854771, "grad_norm": 1.125, "learning_rate": 1.4244779754634006e-05, "loss": 0.4107, "step": 7945 }, { "epoch": 1.0901420045276806, "grad_norm": 1.2109375, "learning_rate": 1.4243472727234338e-05, "loss": 0.4396, "step": 7946 }, { "epoch": 1.0902792069698841, "grad_norm": 1.203125, "learning_rate": 1.424216561141605e-05, "loss": 0.476, "step": 7947 }, { "epoch": 1.0904164094120876, "grad_norm": 1.25, "learning_rate": 1.424085840720638e-05, "loss": 0.3902, "step": 7948 }, { "epoch": 1.0905536118542911, "grad_norm": 1.34375, "learning_rate": 1.4239551114632565e-05, "loss": 0.5408, "step": 7949 }, { "epoch": 1.0906908142964944, "grad_norm": 1.296875, "learning_rate": 1.4238243733721846e-05, "loss": 0.4981, "step": 7950 }, { "epoch": 1.090828016738698, "grad_norm": 1.2109375, "learning_rate": 1.4236936264501458e-05, "loss": 0.4581, "step": 7951 }, { "epoch": 1.0909652191809014, "grad_norm": 1.40625, "learning_rate": 1.4235628706998648e-05, "loss": 0.5147, "step": 7952 }, { "epoch": 1.091102421623105, "grad_norm": 1.1328125, "learning_rate": 1.423432106124066e-05, "loss": 0.4156, "step": 7953 }, { "epoch": 1.0912396240653084, "grad_norm": 1.2421875, "learning_rate": 1.4233013327254741e-05, "loss": 0.4448, "step": 7954 }, { "epoch": 1.0913768265075119, "grad_norm": 1.25, "learning_rate": 1.423170550506814e-05, "loss": 0.4322, "step": 7955 }, { "epoch": 1.0915140289497154, "grad_norm": 1.265625, "learning_rate": 1.4230397594708107e-05, "loss": 0.3978, "step": 7956 }, { "epoch": 1.0916512313919187, "grad_norm": 1.21875, "learning_rate": 1.4229089596201893e-05, "loss": 0.4345, "step": 7957 }, { "epoch": 1.0917884338341222, "grad_norm": 1.25, "learning_rate": 1.4227781509576755e-05, "loss": 0.4428, "step": 7958 }, { "epoch": 1.0919256362763257, "grad_norm": 1.0859375, "learning_rate": 1.4226473334859945e-05, "loss": 0.3701, "step": 7959 }, { "epoch": 1.0920628387185292, "grad_norm": 1.171875, "learning_rate": 1.4225165072078724e-05, "loss": 0.436, "step": 7960 }, { "epoch": 1.0922000411607327, "grad_norm": 1.1796875, "learning_rate": 1.4223856721260348e-05, "loss": 0.4517, "step": 7961 }, { "epoch": 1.0923372436029362, "grad_norm": 1.25, "learning_rate": 1.4222548282432079e-05, "loss": 0.4208, "step": 7962 }, { "epoch": 1.0924744460451397, "grad_norm": 1.1875, "learning_rate": 1.422123975562118e-05, "loss": 0.4455, "step": 7963 }, { "epoch": 1.0926116484873432, "grad_norm": 1.34375, "learning_rate": 1.421993114085492e-05, "loss": 0.4693, "step": 7964 }, { "epoch": 1.0927488509295467, "grad_norm": 1.2265625, "learning_rate": 1.4218622438160564e-05, "loss": 0.4111, "step": 7965 }, { "epoch": 1.09288605337175, "grad_norm": 1.25, "learning_rate": 1.4217313647565376e-05, "loss": 0.4494, "step": 7966 }, { "epoch": 1.0930232558139534, "grad_norm": 1.125, "learning_rate": 1.4216004769096634e-05, "loss": 0.3677, "step": 7967 }, { "epoch": 1.093160458256157, "grad_norm": 1.2109375, "learning_rate": 1.4214695802781604e-05, "loss": 0.4362, "step": 7968 }, { "epoch": 1.0932976606983604, "grad_norm": 1.21875, "learning_rate": 1.4213386748647562e-05, "loss": 0.4374, "step": 7969 }, { "epoch": 1.093434863140564, "grad_norm": 1.2734375, "learning_rate": 1.4212077606721783e-05, "loss": 0.4762, "step": 7970 }, { "epoch": 1.0935720655827674, "grad_norm": 1.1953125, "learning_rate": 1.4210768377031548e-05, "loss": 0.45, "step": 7971 }, { "epoch": 1.093709268024971, "grad_norm": 1.28125, "learning_rate": 1.4209459059604136e-05, "loss": 0.4586, "step": 7972 }, { "epoch": 1.0938464704671742, "grad_norm": 1.34375, "learning_rate": 1.4208149654466825e-05, "loss": 0.4749, "step": 7973 }, { "epoch": 1.0939836729093777, "grad_norm": 1.2421875, "learning_rate": 1.42068401616469e-05, "loss": 0.4839, "step": 7974 }, { "epoch": 1.0941208753515812, "grad_norm": 1.1328125, "learning_rate": 1.4205530581171647e-05, "loss": 0.3768, "step": 7975 }, { "epoch": 1.0942580777937847, "grad_norm": 1.5390625, "learning_rate": 1.4204220913068356e-05, "loss": 0.4841, "step": 7976 }, { "epoch": 1.0943952802359882, "grad_norm": 1.1953125, "learning_rate": 1.420291115736431e-05, "loss": 0.4212, "step": 7977 }, { "epoch": 1.0945324826781917, "grad_norm": 1.2578125, "learning_rate": 1.4201601314086799e-05, "loss": 0.4397, "step": 7978 }, { "epoch": 1.0946696851203952, "grad_norm": 1.28125, "learning_rate": 1.420029138326312e-05, "loss": 0.4326, "step": 7979 }, { "epoch": 1.0948068875625987, "grad_norm": 1.203125, "learning_rate": 1.4198981364920566e-05, "loss": 0.4697, "step": 7980 }, { "epoch": 1.0949440900048022, "grad_norm": 1.1875, "learning_rate": 1.4197671259086432e-05, "loss": 0.4365, "step": 7981 }, { "epoch": 1.0950812924470055, "grad_norm": 1.3046875, "learning_rate": 1.4196361065788016e-05, "loss": 0.5025, "step": 7982 }, { "epoch": 1.095218494889209, "grad_norm": 1.1796875, "learning_rate": 1.4195050785052618e-05, "loss": 0.4757, "step": 7983 }, { "epoch": 1.0953556973314125, "grad_norm": 1.296875, "learning_rate": 1.419374041690754e-05, "loss": 0.4818, "step": 7984 }, { "epoch": 1.095492899773616, "grad_norm": 1.140625, "learning_rate": 1.4192429961380085e-05, "loss": 0.4197, "step": 7985 }, { "epoch": 1.0956301022158195, "grad_norm": 1.0859375, "learning_rate": 1.4191119418497557e-05, "loss": 0.3413, "step": 7986 }, { "epoch": 1.095767304658023, "grad_norm": 1.375, "learning_rate": 1.4189808788287265e-05, "loss": 0.4926, "step": 7987 }, { "epoch": 1.0959045071002265, "grad_norm": 1.265625, "learning_rate": 1.4188498070776516e-05, "loss": 0.4509, "step": 7988 }, { "epoch": 1.0960417095424297, "grad_norm": 1.203125, "learning_rate": 1.4187187265992621e-05, "loss": 0.4141, "step": 7989 }, { "epoch": 1.0961789119846332, "grad_norm": 1.1015625, "learning_rate": 1.4185876373962893e-05, "loss": 0.344, "step": 7990 }, { "epoch": 1.0963161144268367, "grad_norm": 1.25, "learning_rate": 1.4184565394714647e-05, "loss": 0.4474, "step": 7991 }, { "epoch": 1.0964533168690402, "grad_norm": 1.1875, "learning_rate": 1.4183254328275196e-05, "loss": 0.425, "step": 7992 }, { "epoch": 1.0965905193112437, "grad_norm": 1.2421875, "learning_rate": 1.4181943174671865e-05, "loss": 0.4452, "step": 7993 }, { "epoch": 1.0967277217534472, "grad_norm": 1.1171875, "learning_rate": 1.4180631933931962e-05, "loss": 0.3532, "step": 7994 }, { "epoch": 1.0968649241956507, "grad_norm": 1.3203125, "learning_rate": 1.417932060608282e-05, "loss": 0.4764, "step": 7995 }, { "epoch": 1.0970021266378542, "grad_norm": 1.234375, "learning_rate": 1.4178009191151753e-05, "loss": 0.4166, "step": 7996 }, { "epoch": 1.0971393290800577, "grad_norm": 1.1953125, "learning_rate": 1.4176697689166095e-05, "loss": 0.3991, "step": 7997 }, { "epoch": 1.097276531522261, "grad_norm": 1.1640625, "learning_rate": 1.4175386100153165e-05, "loss": 0.4119, "step": 7998 }, { "epoch": 1.0974137339644645, "grad_norm": 1.171875, "learning_rate": 1.4174074424140299e-05, "loss": 0.4338, "step": 7999 }, { "epoch": 1.097550936406668, "grad_norm": 1.21875, "learning_rate": 1.417276266115482e-05, "loss": 0.3859, "step": 8000 }, { "epoch": 1.0976881388488715, "grad_norm": 1.1796875, "learning_rate": 1.4171450811224068e-05, "loss": 0.4308, "step": 8001 }, { "epoch": 1.097825341291075, "grad_norm": 1.0546875, "learning_rate": 1.4170138874375371e-05, "loss": 0.3713, "step": 8002 }, { "epoch": 1.0979625437332785, "grad_norm": 1.296875, "learning_rate": 1.416882685063607e-05, "loss": 0.498, "step": 8003 }, { "epoch": 1.098099746175482, "grad_norm": 1.21875, "learning_rate": 1.4167514740033502e-05, "loss": 0.5281, "step": 8004 }, { "epoch": 1.0982369486176853, "grad_norm": 1.234375, "learning_rate": 1.4166202542595002e-05, "loss": 0.4364, "step": 8005 }, { "epoch": 1.0983741510598888, "grad_norm": 1.3046875, "learning_rate": 1.4164890258347913e-05, "loss": 0.4831, "step": 8006 }, { "epoch": 1.0985113535020923, "grad_norm": 1.2578125, "learning_rate": 1.4163577887319582e-05, "loss": 0.4111, "step": 8007 }, { "epoch": 1.0986485559442958, "grad_norm": 1.1171875, "learning_rate": 1.4162265429537349e-05, "loss": 0.3482, "step": 8008 }, { "epoch": 1.0987857583864993, "grad_norm": 1.15625, "learning_rate": 1.4160952885028568e-05, "loss": 0.3757, "step": 8009 }, { "epoch": 1.0989229608287028, "grad_norm": 1.1953125, "learning_rate": 1.4159640253820579e-05, "loss": 0.3709, "step": 8010 }, { "epoch": 1.0990601632709063, "grad_norm": 1.3046875, "learning_rate": 1.415832753594074e-05, "loss": 0.5258, "step": 8011 }, { "epoch": 1.0991973657131098, "grad_norm": 1.265625, "learning_rate": 1.4157014731416401e-05, "loss": 0.4615, "step": 8012 }, { "epoch": 1.0993345681553133, "grad_norm": 1.28125, "learning_rate": 1.4155701840274914e-05, "loss": 0.4451, "step": 8013 }, { "epoch": 1.0994717705975166, "grad_norm": 1.3671875, "learning_rate": 1.4154388862543633e-05, "loss": 0.4924, "step": 8014 }, { "epoch": 1.09960897303972, "grad_norm": 1.265625, "learning_rate": 1.415307579824992e-05, "loss": 0.439, "step": 8015 }, { "epoch": 1.0997461754819235, "grad_norm": 1.265625, "learning_rate": 1.4151762647421134e-05, "loss": 0.4475, "step": 8016 }, { "epoch": 1.099883377924127, "grad_norm": 1.2578125, "learning_rate": 1.4150449410084637e-05, "loss": 0.4838, "step": 8017 }, { "epoch": 1.1000205803663305, "grad_norm": 1.25, "learning_rate": 1.414913608626779e-05, "loss": 0.4338, "step": 8018 }, { "epoch": 1.100157782808534, "grad_norm": 1.2265625, "learning_rate": 1.414782267599796e-05, "loss": 0.447, "step": 8019 }, { "epoch": 1.1002949852507375, "grad_norm": 1.2109375, "learning_rate": 1.414650917930251e-05, "loss": 0.4012, "step": 8020 }, { "epoch": 1.1004321876929408, "grad_norm": 1.203125, "learning_rate": 1.4145195596208813e-05, "loss": 0.4282, "step": 8021 }, { "epoch": 1.1005693901351443, "grad_norm": 1.2734375, "learning_rate": 1.4143881926744236e-05, "loss": 0.4843, "step": 8022 }, { "epoch": 1.1007065925773478, "grad_norm": 1.25, "learning_rate": 1.4142568170936153e-05, "loss": 0.4709, "step": 8023 }, { "epoch": 1.1008437950195513, "grad_norm": 1.3203125, "learning_rate": 1.4141254328811936e-05, "loss": 0.4716, "step": 8024 }, { "epoch": 1.1009809974617548, "grad_norm": 1.28125, "learning_rate": 1.4139940400398962e-05, "loss": 0.4341, "step": 8025 }, { "epoch": 1.1011181999039583, "grad_norm": 1.203125, "learning_rate": 1.413862638572461e-05, "loss": 0.4587, "step": 8026 }, { "epoch": 1.1012554023461618, "grad_norm": 1.1953125, "learning_rate": 1.4137312284816255e-05, "loss": 0.4231, "step": 8027 }, { "epoch": 1.1013926047883653, "grad_norm": 1.2265625, "learning_rate": 1.4135998097701284e-05, "loss": 0.4998, "step": 8028 }, { "epoch": 1.1015298072305688, "grad_norm": 1.25, "learning_rate": 1.4134683824407077e-05, "loss": 0.4356, "step": 8029 }, { "epoch": 1.101667009672772, "grad_norm": 1.265625, "learning_rate": 1.4133369464961018e-05, "loss": 0.4391, "step": 8030 }, { "epoch": 1.1018042121149756, "grad_norm": 1.203125, "learning_rate": 1.4132055019390493e-05, "loss": 0.4603, "step": 8031 }, { "epoch": 1.101941414557179, "grad_norm": 1.2734375, "learning_rate": 1.4130740487722892e-05, "loss": 0.4742, "step": 8032 }, { "epoch": 1.1020786169993826, "grad_norm": 1.375, "learning_rate": 1.4129425869985604e-05, "loss": 0.5243, "step": 8033 }, { "epoch": 1.102215819441586, "grad_norm": 1.2421875, "learning_rate": 1.4128111166206022e-05, "loss": 0.4897, "step": 8034 }, { "epoch": 1.1023530218837896, "grad_norm": 1.2578125, "learning_rate": 1.4126796376411537e-05, "loss": 0.4335, "step": 8035 }, { "epoch": 1.102490224325993, "grad_norm": 1.171875, "learning_rate": 1.4125481500629547e-05, "loss": 0.398, "step": 8036 }, { "epoch": 1.1026274267681964, "grad_norm": 1.1875, "learning_rate": 1.412416653888745e-05, "loss": 0.4563, "step": 8037 }, { "epoch": 1.1027646292103999, "grad_norm": 1.2265625, "learning_rate": 1.4122851491212645e-05, "loss": 0.4163, "step": 8038 }, { "epoch": 1.1029018316526034, "grad_norm": 1.09375, "learning_rate": 1.412153635763253e-05, "loss": 0.3684, "step": 8039 }, { "epoch": 1.1030390340948069, "grad_norm": 1.2265625, "learning_rate": 1.4120221138174508e-05, "loss": 0.4364, "step": 8040 }, { "epoch": 1.1031762365370104, "grad_norm": 1.1875, "learning_rate": 1.4118905832865986e-05, "loss": 0.4138, "step": 8041 }, { "epoch": 1.1033134389792139, "grad_norm": 1.2890625, "learning_rate": 1.4117590441734368e-05, "loss": 0.4839, "step": 8042 }, { "epoch": 1.1034506414214174, "grad_norm": 1.21875, "learning_rate": 1.4116274964807062e-05, "loss": 0.4821, "step": 8043 }, { "epoch": 1.1035878438636209, "grad_norm": 1.3203125, "learning_rate": 1.411495940211148e-05, "loss": 0.5126, "step": 8044 }, { "epoch": 1.1037250463058244, "grad_norm": 1.203125, "learning_rate": 1.4113643753675033e-05, "loss": 0.369, "step": 8045 }, { "epoch": 1.1038622487480276, "grad_norm": 1.65625, "learning_rate": 1.411232801952513e-05, "loss": 0.4907, "step": 8046 }, { "epoch": 1.1039994511902311, "grad_norm": 1.15625, "learning_rate": 1.4111012199689193e-05, "loss": 0.4185, "step": 8047 }, { "epoch": 1.1041366536324346, "grad_norm": 1.2890625, "learning_rate": 1.4109696294194635e-05, "loss": 0.5071, "step": 8048 }, { "epoch": 1.1042738560746381, "grad_norm": 1.234375, "learning_rate": 1.4108380303068874e-05, "loss": 0.436, "step": 8049 }, { "epoch": 1.1044110585168416, "grad_norm": 1.28125, "learning_rate": 1.4107064226339334e-05, "loss": 0.4433, "step": 8050 }, { "epoch": 1.1045482609590451, "grad_norm": 1.2734375, "learning_rate": 1.4105748064033432e-05, "loss": 0.4914, "step": 8051 }, { "epoch": 1.1046854634012486, "grad_norm": 1.359375, "learning_rate": 1.4104431816178596e-05, "loss": 0.5252, "step": 8052 }, { "epoch": 1.104822665843452, "grad_norm": 1.2421875, "learning_rate": 1.410311548280225e-05, "loss": 0.4215, "step": 8053 }, { "epoch": 1.1049598682856554, "grad_norm": 1.2890625, "learning_rate": 1.4101799063931824e-05, "loss": 0.4729, "step": 8054 }, { "epoch": 1.105097070727859, "grad_norm": 1.2890625, "learning_rate": 1.4100482559594747e-05, "loss": 0.4472, "step": 8055 }, { "epoch": 1.1052342731700624, "grad_norm": 1.1875, "learning_rate": 1.4099165969818448e-05, "loss": 0.421, "step": 8056 }, { "epoch": 1.105371475612266, "grad_norm": 1.2578125, "learning_rate": 1.4097849294630361e-05, "loss": 0.4877, "step": 8057 }, { "epoch": 1.1055086780544694, "grad_norm": 1.140625, "learning_rate": 1.4096532534057921e-05, "loss": 0.363, "step": 8058 }, { "epoch": 1.105645880496673, "grad_norm": 1.203125, "learning_rate": 1.4095215688128568e-05, "loss": 0.4448, "step": 8059 }, { "epoch": 1.1057830829388764, "grad_norm": 1.296875, "learning_rate": 1.4093898756869733e-05, "loss": 0.4743, "step": 8060 }, { "epoch": 1.10592028538108, "grad_norm": 1.2578125, "learning_rate": 1.409258174030886e-05, "loss": 0.439, "step": 8061 }, { "epoch": 1.1060574878232832, "grad_norm": 1.1640625, "learning_rate": 1.409126463847339e-05, "loss": 0.3691, "step": 8062 }, { "epoch": 1.1061946902654867, "grad_norm": 1.28125, "learning_rate": 1.4089947451390767e-05, "loss": 0.4808, "step": 8063 }, { "epoch": 1.1063318927076902, "grad_norm": 1.3359375, "learning_rate": 1.4088630179088437e-05, "loss": 0.4534, "step": 8064 }, { "epoch": 1.1064690951498937, "grad_norm": 1.359375, "learning_rate": 1.4087312821593848e-05, "loss": 0.4961, "step": 8065 }, { "epoch": 1.1066062975920972, "grad_norm": 1.296875, "learning_rate": 1.4085995378934448e-05, "loss": 0.4798, "step": 8066 }, { "epoch": 1.1067435000343007, "grad_norm": 1.1875, "learning_rate": 1.4084677851137687e-05, "loss": 0.4625, "step": 8067 }, { "epoch": 1.1068807024765042, "grad_norm": 1.203125, "learning_rate": 1.4083360238231017e-05, "loss": 0.4493, "step": 8068 }, { "epoch": 1.1070179049187074, "grad_norm": 0.98046875, "learning_rate": 1.4082042540241893e-05, "loss": 0.3295, "step": 8069 }, { "epoch": 1.107155107360911, "grad_norm": 1.421875, "learning_rate": 1.4080724757197773e-05, "loss": 0.5021, "step": 8070 }, { "epoch": 1.1072923098031144, "grad_norm": 1.2109375, "learning_rate": 1.4079406889126113e-05, "loss": 0.4367, "step": 8071 }, { "epoch": 1.107429512245318, "grad_norm": 1.5625, "learning_rate": 1.4078088936054372e-05, "loss": 0.5143, "step": 8072 }, { "epoch": 1.1075667146875214, "grad_norm": 1.3359375, "learning_rate": 1.4076770898010013e-05, "loss": 0.5014, "step": 8073 }, { "epoch": 1.107703917129725, "grad_norm": 1.25, "learning_rate": 1.4075452775020498e-05, "loss": 0.4046, "step": 8074 }, { "epoch": 1.1078411195719284, "grad_norm": 1.390625, "learning_rate": 1.4074134567113296e-05, "loss": 0.5268, "step": 8075 }, { "epoch": 1.107978322014132, "grad_norm": 1.234375, "learning_rate": 1.4072816274315866e-05, "loss": 0.4397, "step": 8076 }, { "epoch": 1.1081155244563354, "grad_norm": 1.171875, "learning_rate": 1.4071497896655682e-05, "loss": 0.4296, "step": 8077 }, { "epoch": 1.1082527268985387, "grad_norm": 1.1796875, "learning_rate": 1.4070179434160209e-05, "loss": 0.4178, "step": 8078 }, { "epoch": 1.1083899293407422, "grad_norm": 1.21875, "learning_rate": 1.4068860886856925e-05, "loss": 0.4724, "step": 8079 }, { "epoch": 1.1085271317829457, "grad_norm": 1.2109375, "learning_rate": 1.4067542254773303e-05, "loss": 0.4118, "step": 8080 }, { "epoch": 1.1086643342251492, "grad_norm": 1.1875, "learning_rate": 1.4066223537936817e-05, "loss": 0.4253, "step": 8081 }, { "epoch": 1.1088015366673527, "grad_norm": 1.21875, "learning_rate": 1.4064904736374941e-05, "loss": 0.4101, "step": 8082 }, { "epoch": 1.1089387391095562, "grad_norm": 1.1875, "learning_rate": 1.4063585850115162e-05, "loss": 0.4037, "step": 8083 }, { "epoch": 1.1090759415517597, "grad_norm": 1.234375, "learning_rate": 1.4062266879184952e-05, "loss": 0.4326, "step": 8084 }, { "epoch": 1.109213143993963, "grad_norm": 1.1875, "learning_rate": 1.40609478236118e-05, "loss": 0.4497, "step": 8085 }, { "epoch": 1.1093503464361665, "grad_norm": 1.28125, "learning_rate": 1.4059628683423185e-05, "loss": 0.4514, "step": 8086 }, { "epoch": 1.10948754887837, "grad_norm": 1.2578125, "learning_rate": 1.40583094586466e-05, "loss": 0.4808, "step": 8087 }, { "epoch": 1.1096247513205735, "grad_norm": 1.34375, "learning_rate": 1.4056990149309524e-05, "loss": 0.5263, "step": 8088 }, { "epoch": 1.109761953762777, "grad_norm": 1.171875, "learning_rate": 1.4055670755439454e-05, "loss": 0.4205, "step": 8089 }, { "epoch": 1.1098991562049805, "grad_norm": 1.328125, "learning_rate": 1.405435127706388e-05, "loss": 0.4227, "step": 8090 }, { "epoch": 1.110036358647184, "grad_norm": 1.2734375, "learning_rate": 1.4053031714210294e-05, "loss": 0.4846, "step": 8091 }, { "epoch": 1.1101735610893875, "grad_norm": 1.2109375, "learning_rate": 1.4051712066906193e-05, "loss": 0.3812, "step": 8092 }, { "epoch": 1.110310763531591, "grad_norm": 1.15625, "learning_rate": 1.405039233517907e-05, "loss": 0.3249, "step": 8093 }, { "epoch": 1.1104479659737942, "grad_norm": 1.3203125, "learning_rate": 1.4049072519056424e-05, "loss": 0.4886, "step": 8094 }, { "epoch": 1.1105851684159977, "grad_norm": 1.09375, "learning_rate": 1.4047752618565757e-05, "loss": 0.346, "step": 8095 }, { "epoch": 1.1107223708582012, "grad_norm": 1.2265625, "learning_rate": 1.404643263373457e-05, "loss": 0.4365, "step": 8096 }, { "epoch": 1.1108595733004047, "grad_norm": 1.359375, "learning_rate": 1.4045112564590368e-05, "loss": 0.4997, "step": 8097 }, { "epoch": 1.1109967757426082, "grad_norm": 1.2578125, "learning_rate": 1.4043792411160656e-05, "loss": 0.4648, "step": 8098 }, { "epoch": 1.1111339781848117, "grad_norm": 1.171875, "learning_rate": 1.4042472173472939e-05, "loss": 0.4322, "step": 8099 }, { "epoch": 1.1112711806270152, "grad_norm": 1.171875, "learning_rate": 1.4041151851554729e-05, "loss": 0.4234, "step": 8100 }, { "epoch": 1.1114083830692185, "grad_norm": 1.1640625, "learning_rate": 1.4039831445433536e-05, "loss": 0.4107, "step": 8101 }, { "epoch": 1.111545585511422, "grad_norm": 1.2109375, "learning_rate": 1.4038510955136872e-05, "loss": 0.4375, "step": 8102 }, { "epoch": 1.1116827879536255, "grad_norm": 1.1484375, "learning_rate": 1.4037190380692249e-05, "loss": 0.4341, "step": 8103 }, { "epoch": 1.111819990395829, "grad_norm": 1.28125, "learning_rate": 1.4035869722127186e-05, "loss": 0.4699, "step": 8104 }, { "epoch": 1.1119571928380325, "grad_norm": 1.2734375, "learning_rate": 1.40345489794692e-05, "loss": 0.4778, "step": 8105 }, { "epoch": 1.112094395280236, "grad_norm": 1.40625, "learning_rate": 1.403322815274581e-05, "loss": 0.498, "step": 8106 }, { "epoch": 1.1122315977224395, "grad_norm": 1.15625, "learning_rate": 1.403190724198454e-05, "loss": 0.406, "step": 8107 }, { "epoch": 1.112368800164643, "grad_norm": 1.1640625, "learning_rate": 1.4030586247212907e-05, "loss": 0.3988, "step": 8108 }, { "epoch": 1.1125060026068465, "grad_norm": 1.328125, "learning_rate": 1.4029265168458443e-05, "loss": 0.4724, "step": 8109 }, { "epoch": 1.1126432050490498, "grad_norm": 1.3046875, "learning_rate": 1.402794400574867e-05, "loss": 0.4674, "step": 8110 }, { "epoch": 1.1127804074912533, "grad_norm": 1.3984375, "learning_rate": 1.4026622759111118e-05, "loss": 0.5032, "step": 8111 }, { "epoch": 1.1129176099334568, "grad_norm": 1.234375, "learning_rate": 1.4025301428573313e-05, "loss": 0.456, "step": 8112 }, { "epoch": 1.1130548123756603, "grad_norm": 1.2109375, "learning_rate": 1.4023980014162791e-05, "loss": 0.4431, "step": 8113 }, { "epoch": 1.1131920148178638, "grad_norm": 1.1484375, "learning_rate": 1.4022658515907084e-05, "loss": 0.3601, "step": 8114 }, { "epoch": 1.1133292172600673, "grad_norm": 1.3515625, "learning_rate": 1.4021336933833729e-05, "loss": 0.5552, "step": 8115 }, { "epoch": 1.1134664197022708, "grad_norm": 1.203125, "learning_rate": 1.402001526797026e-05, "loss": 0.4364, "step": 8116 }, { "epoch": 1.113603622144474, "grad_norm": 1.1015625, "learning_rate": 1.4018693518344222e-05, "loss": 0.3927, "step": 8117 }, { "epoch": 1.1137408245866776, "grad_norm": 1.3671875, "learning_rate": 1.4017371684983147e-05, "loss": 0.5642, "step": 8118 }, { "epoch": 1.113878027028881, "grad_norm": 1.2265625, "learning_rate": 1.4016049767914581e-05, "loss": 0.4385, "step": 8119 }, { "epoch": 1.1140152294710846, "grad_norm": 1.21875, "learning_rate": 1.401472776716607e-05, "loss": 0.4245, "step": 8120 }, { "epoch": 1.114152431913288, "grad_norm": 1.2265625, "learning_rate": 1.4013405682765155e-05, "loss": 0.4671, "step": 8121 }, { "epoch": 1.1142896343554916, "grad_norm": 1.21875, "learning_rate": 1.4012083514739387e-05, "loss": 0.4413, "step": 8122 }, { "epoch": 1.114426836797695, "grad_norm": 1.3125, "learning_rate": 1.4010761263116317e-05, "loss": 0.4987, "step": 8123 }, { "epoch": 1.1145640392398986, "grad_norm": 1.2421875, "learning_rate": 1.4009438927923488e-05, "loss": 0.458, "step": 8124 }, { "epoch": 1.114701241682102, "grad_norm": 1.140625, "learning_rate": 1.4008116509188463e-05, "loss": 0.4133, "step": 8125 }, { "epoch": 1.1148384441243053, "grad_norm": 1.1171875, "learning_rate": 1.4006794006938789e-05, "loss": 0.3696, "step": 8126 }, { "epoch": 1.1149756465665088, "grad_norm": 1.1796875, "learning_rate": 1.4005471421202026e-05, "loss": 0.4316, "step": 8127 }, { "epoch": 1.1151128490087123, "grad_norm": 1.390625, "learning_rate": 1.400414875200573e-05, "loss": 0.4836, "step": 8128 }, { "epoch": 1.1152500514509158, "grad_norm": 1.328125, "learning_rate": 1.4002825999377463e-05, "loss": 0.415, "step": 8129 }, { "epoch": 1.1153872538931193, "grad_norm": 1.25, "learning_rate": 1.4001503163344782e-05, "loss": 0.4471, "step": 8130 }, { "epoch": 1.1155244563353228, "grad_norm": 1.1953125, "learning_rate": 1.4000180243935254e-05, "loss": 0.4969, "step": 8131 }, { "epoch": 1.1156616587775263, "grad_norm": 1.1171875, "learning_rate": 1.399885724117644e-05, "loss": 0.4014, "step": 8132 }, { "epoch": 1.1157988612197296, "grad_norm": 1.375, "learning_rate": 1.3997534155095911e-05, "loss": 0.4993, "step": 8133 }, { "epoch": 1.115936063661933, "grad_norm": 1.2890625, "learning_rate": 1.3996210985721232e-05, "loss": 0.517, "step": 8134 }, { "epoch": 1.1160732661041366, "grad_norm": 1.078125, "learning_rate": 1.3994887733079977e-05, "loss": 0.3399, "step": 8135 }, { "epoch": 1.11621046854634, "grad_norm": 1.3125, "learning_rate": 1.3993564397199713e-05, "loss": 0.4978, "step": 8136 }, { "epoch": 1.1163476709885436, "grad_norm": 1.203125, "learning_rate": 1.3992240978108019e-05, "loss": 0.4515, "step": 8137 }, { "epoch": 1.116484873430747, "grad_norm": 1.3359375, "learning_rate": 1.3990917475832464e-05, "loss": 0.4665, "step": 8138 }, { "epoch": 1.1166220758729506, "grad_norm": 1.1796875, "learning_rate": 1.398959389040063e-05, "loss": 0.4221, "step": 8139 }, { "epoch": 1.116759278315154, "grad_norm": 1.28125, "learning_rate": 1.3988270221840093e-05, "loss": 0.4638, "step": 8140 }, { "epoch": 1.1168964807573576, "grad_norm": 1.09375, "learning_rate": 1.3986946470178434e-05, "loss": 0.3908, "step": 8141 }, { "epoch": 1.1170336831995609, "grad_norm": 1.171875, "learning_rate": 1.3985622635443235e-05, "loss": 0.4725, "step": 8142 }, { "epoch": 1.1171708856417644, "grad_norm": 1.2421875, "learning_rate": 1.3984298717662079e-05, "loss": 0.4635, "step": 8143 }, { "epoch": 1.1173080880839679, "grad_norm": 1.2109375, "learning_rate": 1.3982974716862556e-05, "loss": 0.4258, "step": 8144 }, { "epoch": 1.1174452905261714, "grad_norm": 1.3203125, "learning_rate": 1.398165063307225e-05, "loss": 0.4424, "step": 8145 }, { "epoch": 1.1175824929683749, "grad_norm": 1.3359375, "learning_rate": 1.398032646631875e-05, "loss": 0.5157, "step": 8146 }, { "epoch": 1.1177196954105784, "grad_norm": 1.1484375, "learning_rate": 1.3979002216629649e-05, "loss": 0.4228, "step": 8147 }, { "epoch": 1.1178568978527819, "grad_norm": 1.21875, "learning_rate": 1.3977677884032534e-05, "loss": 0.4828, "step": 8148 }, { "epoch": 1.1179941002949851, "grad_norm": 1.1875, "learning_rate": 1.3976353468555006e-05, "loss": 0.4199, "step": 8149 }, { "epoch": 1.1181313027371886, "grad_norm": 1.1640625, "learning_rate": 1.3975028970224658e-05, "loss": 0.4284, "step": 8150 }, { "epoch": 1.1182685051793921, "grad_norm": 1.1640625, "learning_rate": 1.3973704389069088e-05, "loss": 0.4119, "step": 8151 }, { "epoch": 1.1184057076215956, "grad_norm": 1.3359375, "learning_rate": 1.3972379725115896e-05, "loss": 0.4675, "step": 8152 }, { "epoch": 1.1185429100637991, "grad_norm": 2.015625, "learning_rate": 1.3971054978392682e-05, "loss": 0.3826, "step": 8153 }, { "epoch": 1.1186801125060026, "grad_norm": 1.1640625, "learning_rate": 1.396973014892705e-05, "loss": 0.4254, "step": 8154 }, { "epoch": 1.1188173149482061, "grad_norm": 1.15625, "learning_rate": 1.3968405236746605e-05, "loss": 0.3763, "step": 8155 }, { "epoch": 1.1189545173904096, "grad_norm": 1.25, "learning_rate": 1.3967080241878951e-05, "loss": 0.4661, "step": 8156 }, { "epoch": 1.1190917198326131, "grad_norm": 1.2421875, "learning_rate": 1.39657551643517e-05, "loss": 0.494, "step": 8157 }, { "epoch": 1.1192289222748164, "grad_norm": 1.34375, "learning_rate": 1.3964430004192457e-05, "loss": 0.4647, "step": 8158 }, { "epoch": 1.11936612471702, "grad_norm": 1.15625, "learning_rate": 1.396310476142884e-05, "loss": 0.4453, "step": 8159 }, { "epoch": 1.1195033271592234, "grad_norm": 1.296875, "learning_rate": 1.3961779436088453e-05, "loss": 0.5109, "step": 8160 }, { "epoch": 1.119640529601427, "grad_norm": 1.171875, "learning_rate": 1.396045402819892e-05, "loss": 0.3842, "step": 8161 }, { "epoch": 1.1197777320436304, "grad_norm": 1.28125, "learning_rate": 1.3959128537787854e-05, "loss": 0.515, "step": 8162 }, { "epoch": 1.119914934485834, "grad_norm": 1.1328125, "learning_rate": 1.3957802964882873e-05, "loss": 0.3882, "step": 8163 }, { "epoch": 1.1200521369280374, "grad_norm": 1.203125, "learning_rate": 1.39564773095116e-05, "loss": 0.4468, "step": 8164 }, { "epoch": 1.1201893393702407, "grad_norm": 1.2109375, "learning_rate": 1.3955151571701651e-05, "loss": 0.4538, "step": 8165 }, { "epoch": 1.1203265418124442, "grad_norm": 1.375, "learning_rate": 1.3953825751480655e-05, "loss": 0.5362, "step": 8166 }, { "epoch": 1.1204637442546477, "grad_norm": 1.125, "learning_rate": 1.3952499848876233e-05, "loss": 0.3486, "step": 8167 }, { "epoch": 1.1206009466968512, "grad_norm": 1.2890625, "learning_rate": 1.3951173863916017e-05, "loss": 0.4595, "step": 8168 }, { "epoch": 1.1207381491390547, "grad_norm": 1.1171875, "learning_rate": 1.3949847796627631e-05, "loss": 0.3949, "step": 8169 }, { "epoch": 1.1208753515812582, "grad_norm": 1.3125, "learning_rate": 1.3948521647038708e-05, "loss": 0.4858, "step": 8170 }, { "epoch": 1.1210125540234617, "grad_norm": 1.390625, "learning_rate": 1.3947195415176883e-05, "loss": 0.5677, "step": 8171 }, { "epoch": 1.1211497564656652, "grad_norm": 1.2890625, "learning_rate": 1.3945869101069787e-05, "loss": 0.5228, "step": 8172 }, { "epoch": 1.1212869589078687, "grad_norm": 1.15625, "learning_rate": 1.3944542704745053e-05, "loss": 0.4037, "step": 8173 }, { "epoch": 1.121424161350072, "grad_norm": 1.15625, "learning_rate": 1.3943216226230318e-05, "loss": 0.4175, "step": 8174 }, { "epoch": 1.1215613637922754, "grad_norm": 1.171875, "learning_rate": 1.3941889665553227e-05, "loss": 0.4265, "step": 8175 }, { "epoch": 1.121698566234479, "grad_norm": 1.234375, "learning_rate": 1.3940563022741415e-05, "loss": 0.4958, "step": 8176 }, { "epoch": 1.1218357686766824, "grad_norm": 1.1875, "learning_rate": 1.393923629782253e-05, "loss": 0.4524, "step": 8177 }, { "epoch": 1.121972971118886, "grad_norm": 1.15625, "learning_rate": 1.3937909490824209e-05, "loss": 0.4004, "step": 8178 }, { "epoch": 1.1221101735610894, "grad_norm": 1.2734375, "learning_rate": 1.3936582601774102e-05, "loss": 0.4763, "step": 8179 }, { "epoch": 1.122247376003293, "grad_norm": 1.1171875, "learning_rate": 1.3935255630699857e-05, "loss": 0.373, "step": 8180 }, { "epoch": 1.1223845784454962, "grad_norm": 1.171875, "learning_rate": 1.3933928577629123e-05, "loss": 0.397, "step": 8181 }, { "epoch": 1.1225217808876997, "grad_norm": 1.09375, "learning_rate": 1.3932601442589552e-05, "loss": 0.352, "step": 8182 }, { "epoch": 1.1226589833299032, "grad_norm": 1.15625, "learning_rate": 1.3931274225608795e-05, "loss": 0.4186, "step": 8183 }, { "epoch": 1.1227961857721067, "grad_norm": 1.234375, "learning_rate": 1.3929946926714503e-05, "loss": 0.4692, "step": 8184 }, { "epoch": 1.1229333882143102, "grad_norm": 1.2734375, "learning_rate": 1.3928619545934342e-05, "loss": 0.4781, "step": 8185 }, { "epoch": 1.1230705906565137, "grad_norm": 1.25, "learning_rate": 1.3927292083295957e-05, "loss": 0.43, "step": 8186 }, { "epoch": 1.1232077930987172, "grad_norm": 1.296875, "learning_rate": 1.3925964538827018e-05, "loss": 0.4968, "step": 8187 }, { "epoch": 1.1233449955409207, "grad_norm": 1.1171875, "learning_rate": 1.392463691255518e-05, "loss": 0.405, "step": 8188 }, { "epoch": 1.1234821979831242, "grad_norm": 1.2109375, "learning_rate": 1.3923309204508108e-05, "loss": 0.4534, "step": 8189 }, { "epoch": 1.1236194004253275, "grad_norm": 1.265625, "learning_rate": 1.3921981414713468e-05, "loss": 0.4841, "step": 8190 }, { "epoch": 1.123756602867531, "grad_norm": 1.296875, "learning_rate": 1.3920653543198927e-05, "loss": 0.454, "step": 8191 }, { "epoch": 1.1238938053097345, "grad_norm": 1.125, "learning_rate": 1.391932558999215e-05, "loss": 0.3844, "step": 8192 }, { "epoch": 1.124031007751938, "grad_norm": 1.40625, "learning_rate": 1.3917997555120807e-05, "loss": 0.4921, "step": 8193 }, { "epoch": 1.1241682101941415, "grad_norm": 1.2109375, "learning_rate": 1.3916669438612571e-05, "loss": 0.4791, "step": 8194 }, { "epoch": 1.124305412636345, "grad_norm": 1.296875, "learning_rate": 1.3915341240495115e-05, "loss": 0.4728, "step": 8195 }, { "epoch": 1.1244426150785485, "grad_norm": 1.21875, "learning_rate": 1.3914012960796113e-05, "loss": 0.4068, "step": 8196 }, { "epoch": 1.1245798175207518, "grad_norm": 1.28125, "learning_rate": 1.391268459954324e-05, "loss": 0.4339, "step": 8197 }, { "epoch": 1.1247170199629553, "grad_norm": 1.2265625, "learning_rate": 1.3911356156764177e-05, "loss": 0.4397, "step": 8198 }, { "epoch": 1.1248542224051588, "grad_norm": 1.28125, "learning_rate": 1.3910027632486604e-05, "loss": 0.4584, "step": 8199 }, { "epoch": 1.1249914248473623, "grad_norm": 1.28125, "learning_rate": 1.3908699026738204e-05, "loss": 0.4749, "step": 8200 }, { "epoch": 1.1251286272895658, "grad_norm": 1.203125, "learning_rate": 1.3907370339546655e-05, "loss": 0.4796, "step": 8201 }, { "epoch": 1.1252658297317693, "grad_norm": 1.3359375, "learning_rate": 1.3906041570939649e-05, "loss": 0.4964, "step": 8202 }, { "epoch": 1.1254030321739728, "grad_norm": 1.34375, "learning_rate": 1.3904712720944865e-05, "loss": 0.4981, "step": 8203 }, { "epoch": 1.1255402346161762, "grad_norm": 1.203125, "learning_rate": 1.3903383789589995e-05, "loss": 0.4303, "step": 8204 }, { "epoch": 1.1256774370583797, "grad_norm": 1.2890625, "learning_rate": 1.3902054776902728e-05, "loss": 0.4412, "step": 8205 }, { "epoch": 1.125814639500583, "grad_norm": 1.265625, "learning_rate": 1.390072568291076e-05, "loss": 0.478, "step": 8206 }, { "epoch": 1.1259518419427865, "grad_norm": 1.296875, "learning_rate": 1.3899396507641782e-05, "loss": 0.4334, "step": 8207 }, { "epoch": 1.12608904438499, "grad_norm": 1.390625, "learning_rate": 1.389806725112349e-05, "loss": 0.4431, "step": 8208 }, { "epoch": 1.1262262468271935, "grad_norm": 1.3671875, "learning_rate": 1.3896737913383577e-05, "loss": 0.549, "step": 8209 }, { "epoch": 1.126363449269397, "grad_norm": 1.3125, "learning_rate": 1.3895408494449746e-05, "loss": 0.5074, "step": 8210 }, { "epoch": 1.1265006517116005, "grad_norm": 1.2734375, "learning_rate": 1.3894078994349696e-05, "loss": 0.5062, "step": 8211 }, { "epoch": 1.126637854153804, "grad_norm": 1.2109375, "learning_rate": 1.3892749413111129e-05, "loss": 0.3918, "step": 8212 }, { "epoch": 1.1267750565960073, "grad_norm": 1.25, "learning_rate": 1.3891419750761745e-05, "loss": 0.4356, "step": 8213 }, { "epoch": 1.1269122590382108, "grad_norm": 1.2421875, "learning_rate": 1.3890090007329256e-05, "loss": 0.4523, "step": 8214 }, { "epoch": 1.1270494614804143, "grad_norm": 1.296875, "learning_rate": 1.3888760182841366e-05, "loss": 0.5217, "step": 8215 }, { "epoch": 1.1271866639226178, "grad_norm": 1.15625, "learning_rate": 1.3887430277325779e-05, "loss": 0.4212, "step": 8216 }, { "epoch": 1.1273238663648213, "grad_norm": 1.2109375, "learning_rate": 1.3886100290810217e-05, "loss": 0.4402, "step": 8217 }, { "epoch": 1.1274610688070248, "grad_norm": 1.234375, "learning_rate": 1.3884770223322382e-05, "loss": 0.4625, "step": 8218 }, { "epoch": 1.1275982712492283, "grad_norm": 1.234375, "learning_rate": 1.3883440074889989e-05, "loss": 0.4293, "step": 8219 }, { "epoch": 1.1277354736914318, "grad_norm": 1.2578125, "learning_rate": 1.3882109845540756e-05, "loss": 0.5235, "step": 8220 }, { "epoch": 1.1278726761336353, "grad_norm": 1.234375, "learning_rate": 1.3880779535302399e-05, "loss": 0.4848, "step": 8221 }, { "epoch": 1.1280098785758386, "grad_norm": 1.2578125, "learning_rate": 1.3879449144202637e-05, "loss": 0.4129, "step": 8222 }, { "epoch": 1.128147081018042, "grad_norm": 1.3984375, "learning_rate": 1.3878118672269192e-05, "loss": 0.4229, "step": 8223 }, { "epoch": 1.1282842834602456, "grad_norm": 1.2421875, "learning_rate": 1.3876788119529786e-05, "loss": 0.4728, "step": 8224 }, { "epoch": 1.128421485902449, "grad_norm": 1.1953125, "learning_rate": 1.3875457486012139e-05, "loss": 0.4361, "step": 8225 }, { "epoch": 1.1285586883446526, "grad_norm": 1.2265625, "learning_rate": 1.3874126771743983e-05, "loss": 0.4551, "step": 8226 }, { "epoch": 1.128695890786856, "grad_norm": 1.3828125, "learning_rate": 1.387279597675304e-05, "loss": 0.5022, "step": 8227 }, { "epoch": 1.1288330932290596, "grad_norm": 1.1796875, "learning_rate": 1.3871465101067041e-05, "loss": 0.4098, "step": 8228 }, { "epoch": 1.1289702956712628, "grad_norm": 1.2421875, "learning_rate": 1.3870134144713716e-05, "loss": 0.4013, "step": 8229 }, { "epoch": 1.1291074981134663, "grad_norm": 1.1015625, "learning_rate": 1.3868803107720797e-05, "loss": 0.3941, "step": 8230 }, { "epoch": 1.1292447005556698, "grad_norm": 1.1171875, "learning_rate": 1.3867471990116019e-05, "loss": 0.4071, "step": 8231 }, { "epoch": 1.1293819029978733, "grad_norm": 1.2109375, "learning_rate": 1.3866140791927119e-05, "loss": 0.4929, "step": 8232 }, { "epoch": 1.1295191054400768, "grad_norm": 1.3125, "learning_rate": 1.3864809513181832e-05, "loss": 0.4748, "step": 8233 }, { "epoch": 1.1296563078822803, "grad_norm": 1.2734375, "learning_rate": 1.3863478153907896e-05, "loss": 0.4855, "step": 8234 }, { "epoch": 1.1297935103244838, "grad_norm": 1.1953125, "learning_rate": 1.3862146714133056e-05, "loss": 0.442, "step": 8235 }, { "epoch": 1.1299307127666873, "grad_norm": 1.2421875, "learning_rate": 1.3860815193885051e-05, "loss": 0.4793, "step": 8236 }, { "epoch": 1.1300679152088908, "grad_norm": 1.328125, "learning_rate": 1.3859483593191627e-05, "loss": 0.5241, "step": 8237 }, { "epoch": 1.130205117651094, "grad_norm": 1.109375, "learning_rate": 1.3858151912080526e-05, "loss": 0.3978, "step": 8238 }, { "epoch": 1.1303423200932976, "grad_norm": 1.3125, "learning_rate": 1.3856820150579499e-05, "loss": 0.5092, "step": 8239 }, { "epoch": 1.130479522535501, "grad_norm": 1.2734375, "learning_rate": 1.3855488308716296e-05, "loss": 0.4666, "step": 8240 }, { "epoch": 1.1306167249777046, "grad_norm": 1.2109375, "learning_rate": 1.3854156386518662e-05, "loss": 0.3791, "step": 8241 }, { "epoch": 1.130753927419908, "grad_norm": 1.1796875, "learning_rate": 1.3852824384014358e-05, "loss": 0.3838, "step": 8242 }, { "epoch": 1.1308911298621116, "grad_norm": 1.25, "learning_rate": 1.3851492301231131e-05, "loss": 0.4897, "step": 8243 }, { "epoch": 1.131028332304315, "grad_norm": 1.28125, "learning_rate": 1.3850160138196738e-05, "loss": 0.412, "step": 8244 }, { "epoch": 1.1311655347465184, "grad_norm": 1.25, "learning_rate": 1.3848827894938941e-05, "loss": 0.4788, "step": 8245 }, { "epoch": 1.1313027371887219, "grad_norm": 1.28125, "learning_rate": 1.3847495571485495e-05, "loss": 0.4769, "step": 8246 }, { "epoch": 1.1314399396309254, "grad_norm": 1.2265625, "learning_rate": 1.3846163167864161e-05, "loss": 0.4588, "step": 8247 }, { "epoch": 1.1315771420731289, "grad_norm": 1.2109375, "learning_rate": 1.3844830684102701e-05, "loss": 0.4218, "step": 8248 }, { "epoch": 1.1317143445153324, "grad_norm": 1.2734375, "learning_rate": 1.3843498120228881e-05, "loss": 0.4588, "step": 8249 }, { "epoch": 1.1318515469575359, "grad_norm": 1.1875, "learning_rate": 1.3842165476270469e-05, "loss": 0.4507, "step": 8250 }, { "epoch": 1.1319887493997394, "grad_norm": 1.2109375, "learning_rate": 1.3840832752255224e-05, "loss": 0.462, "step": 8251 }, { "epoch": 1.1321259518419429, "grad_norm": 1.2265625, "learning_rate": 1.3839499948210924e-05, "loss": 0.4739, "step": 8252 }, { "epoch": 1.1322631542841464, "grad_norm": 1.1640625, "learning_rate": 1.3838167064165338e-05, "loss": 0.4472, "step": 8253 }, { "epoch": 1.1324003567263496, "grad_norm": 1.296875, "learning_rate": 1.3836834100146234e-05, "loss": 0.4491, "step": 8254 }, { "epoch": 1.1325375591685531, "grad_norm": 1.1953125, "learning_rate": 1.383550105618139e-05, "loss": 0.3902, "step": 8255 }, { "epoch": 1.1326747616107566, "grad_norm": 1.1171875, "learning_rate": 1.3834167932298584e-05, "loss": 0.416, "step": 8256 }, { "epoch": 1.1328119640529601, "grad_norm": 1.265625, "learning_rate": 1.3832834728525586e-05, "loss": 0.411, "step": 8257 }, { "epoch": 1.1329491664951636, "grad_norm": 1.1875, "learning_rate": 1.383150144489018e-05, "loss": 0.417, "step": 8258 }, { "epoch": 1.1330863689373671, "grad_norm": 1.3515625, "learning_rate": 1.3830168081420149e-05, "loss": 0.5053, "step": 8259 }, { "epoch": 1.1332235713795706, "grad_norm": 1.1328125, "learning_rate": 1.382883463814327e-05, "loss": 0.3983, "step": 8260 }, { "epoch": 1.133360773821774, "grad_norm": 1.2734375, "learning_rate": 1.3827501115087333e-05, "loss": 0.422, "step": 8261 }, { "epoch": 1.1334979762639774, "grad_norm": 1.1640625, "learning_rate": 1.382616751228012e-05, "loss": 0.4072, "step": 8262 }, { "epoch": 1.133635178706181, "grad_norm": 1.2421875, "learning_rate": 1.3824833829749418e-05, "loss": 0.4316, "step": 8263 }, { "epoch": 1.1337723811483844, "grad_norm": 1.2734375, "learning_rate": 1.382350006752302e-05, "loss": 0.4531, "step": 8264 }, { "epoch": 1.133909583590588, "grad_norm": 1.1875, "learning_rate": 1.382216622562871e-05, "loss": 0.4123, "step": 8265 }, { "epoch": 1.1340467860327914, "grad_norm": 1.203125, "learning_rate": 1.3820832304094287e-05, "loss": 0.4501, "step": 8266 }, { "epoch": 1.134183988474995, "grad_norm": 1.203125, "learning_rate": 1.3819498302947542e-05, "loss": 0.4054, "step": 8267 }, { "epoch": 1.1343211909171984, "grad_norm": 1.359375, "learning_rate": 1.3818164222216272e-05, "loss": 0.5183, "step": 8268 }, { "epoch": 1.134458393359402, "grad_norm": 1.15625, "learning_rate": 1.3816830061928275e-05, "loss": 0.3858, "step": 8269 }, { "epoch": 1.1345955958016052, "grad_norm": 1.4296875, "learning_rate": 1.3815495822111347e-05, "loss": 0.5368, "step": 8270 }, { "epoch": 1.1347327982438087, "grad_norm": 1.296875, "learning_rate": 1.3814161502793293e-05, "loss": 0.5264, "step": 8271 }, { "epoch": 1.1348700006860122, "grad_norm": 1.234375, "learning_rate": 1.3812827104001912e-05, "loss": 0.4491, "step": 8272 }, { "epoch": 1.1350072031282157, "grad_norm": 1.1953125, "learning_rate": 1.3811492625765007e-05, "loss": 0.3883, "step": 8273 }, { "epoch": 1.1351444055704192, "grad_norm": 1.109375, "learning_rate": 1.3810158068110386e-05, "loss": 0.3872, "step": 8274 }, { "epoch": 1.1352816080126227, "grad_norm": 1.125, "learning_rate": 1.3808823431065858e-05, "loss": 0.3936, "step": 8275 }, { "epoch": 1.1354188104548262, "grad_norm": 1.3125, "learning_rate": 1.380748871465923e-05, "loss": 0.4271, "step": 8276 }, { "epoch": 1.1355560128970295, "grad_norm": 1.15625, "learning_rate": 1.3806153918918312e-05, "loss": 0.4209, "step": 8277 }, { "epoch": 1.135693215339233, "grad_norm": 1.203125, "learning_rate": 1.3804819043870917e-05, "loss": 0.4534, "step": 8278 }, { "epoch": 1.1358304177814365, "grad_norm": 1.1796875, "learning_rate": 1.3803484089544861e-05, "loss": 0.3748, "step": 8279 }, { "epoch": 1.13596762022364, "grad_norm": 1.2890625, "learning_rate": 1.3802149055967956e-05, "loss": 0.4378, "step": 8280 }, { "epoch": 1.1361048226658435, "grad_norm": 1.1640625, "learning_rate": 1.3800813943168021e-05, "loss": 0.4211, "step": 8281 }, { "epoch": 1.136242025108047, "grad_norm": 1.3359375, "learning_rate": 1.3799478751172877e-05, "loss": 0.5202, "step": 8282 }, { "epoch": 1.1363792275502504, "grad_norm": 1.28125, "learning_rate": 1.3798143480010339e-05, "loss": 0.4466, "step": 8283 }, { "epoch": 1.136516429992454, "grad_norm": 1.21875, "learning_rate": 1.3796808129708234e-05, "loss": 0.4216, "step": 8284 }, { "epoch": 1.1366536324346574, "grad_norm": 1.125, "learning_rate": 1.3795472700294386e-05, "loss": 0.3965, "step": 8285 }, { "epoch": 1.1367908348768607, "grad_norm": 1.2265625, "learning_rate": 1.3794137191796616e-05, "loss": 0.4641, "step": 8286 }, { "epoch": 1.1369280373190642, "grad_norm": 1.1171875, "learning_rate": 1.3792801604242758e-05, "loss": 0.4051, "step": 8287 }, { "epoch": 1.1370652397612677, "grad_norm": 1.2109375, "learning_rate": 1.3791465937660635e-05, "loss": 0.458, "step": 8288 }, { "epoch": 1.1372024422034712, "grad_norm": 1.15625, "learning_rate": 1.379013019207808e-05, "loss": 0.4606, "step": 8289 }, { "epoch": 1.1373396446456747, "grad_norm": 1.2578125, "learning_rate": 1.3788794367522925e-05, "loss": 0.4701, "step": 8290 }, { "epoch": 1.1374768470878782, "grad_norm": 1.2265625, "learning_rate": 1.3787458464023005e-05, "loss": 0.472, "step": 8291 }, { "epoch": 1.1376140495300817, "grad_norm": 1.25, "learning_rate": 1.3786122481606153e-05, "loss": 0.4477, "step": 8292 }, { "epoch": 1.137751251972285, "grad_norm": 1.1953125, "learning_rate": 1.3784786420300202e-05, "loss": 0.4546, "step": 8293 }, { "epoch": 1.1378884544144885, "grad_norm": 1.1640625, "learning_rate": 1.3783450280132999e-05, "loss": 0.424, "step": 8294 }, { "epoch": 1.138025656856692, "grad_norm": 1.234375, "learning_rate": 1.3782114061132382e-05, "loss": 0.4542, "step": 8295 }, { "epoch": 1.1381628592988955, "grad_norm": 1.171875, "learning_rate": 1.378077776332619e-05, "loss": 0.3973, "step": 8296 }, { "epoch": 1.138300061741099, "grad_norm": 1.2734375, "learning_rate": 1.3779441386742267e-05, "loss": 0.4819, "step": 8297 }, { "epoch": 1.1384372641833025, "grad_norm": 1.328125, "learning_rate": 1.3778104931408464e-05, "loss": 0.4828, "step": 8298 }, { "epoch": 1.138574466625506, "grad_norm": 1.15625, "learning_rate": 1.377676839735262e-05, "loss": 0.407, "step": 8299 }, { "epoch": 1.1387116690677095, "grad_norm": 1.1640625, "learning_rate": 1.3775431784602589e-05, "loss": 0.4217, "step": 8300 }, { "epoch": 1.138848871509913, "grad_norm": 1.3671875, "learning_rate": 1.3774095093186217e-05, "loss": 0.5166, "step": 8301 }, { "epoch": 1.1389860739521163, "grad_norm": 1.25, "learning_rate": 1.3772758323131361e-05, "loss": 0.4174, "step": 8302 }, { "epoch": 1.1391232763943198, "grad_norm": 1.28125, "learning_rate": 1.3771421474465869e-05, "loss": 0.4871, "step": 8303 }, { "epoch": 1.1392604788365233, "grad_norm": 1.234375, "learning_rate": 1.3770084547217599e-05, "loss": 0.4627, "step": 8304 }, { "epoch": 1.1393976812787268, "grad_norm": 1.2109375, "learning_rate": 1.376874754141441e-05, "loss": 0.4525, "step": 8305 }, { "epoch": 1.1395348837209303, "grad_norm": 1.3125, "learning_rate": 1.3767410457084154e-05, "loss": 0.4708, "step": 8306 }, { "epoch": 1.1396720861631338, "grad_norm": 1.171875, "learning_rate": 1.3766073294254699e-05, "loss": 0.4145, "step": 8307 }, { "epoch": 1.1398092886053373, "grad_norm": 1.328125, "learning_rate": 1.3764736052953897e-05, "loss": 0.4808, "step": 8308 }, { "epoch": 1.1399464910475405, "grad_norm": 1.2109375, "learning_rate": 1.3763398733209621e-05, "loss": 0.4717, "step": 8309 }, { "epoch": 1.140083693489744, "grad_norm": 1.171875, "learning_rate": 1.376206133504973e-05, "loss": 0.4365, "step": 8310 }, { "epoch": 1.1402208959319475, "grad_norm": 1.3203125, "learning_rate": 1.376072385850209e-05, "loss": 0.4816, "step": 8311 }, { "epoch": 1.140358098374151, "grad_norm": 1.1796875, "learning_rate": 1.3759386303594573e-05, "loss": 0.4372, "step": 8312 }, { "epoch": 1.1404953008163545, "grad_norm": 1.296875, "learning_rate": 1.3758048670355048e-05, "loss": 0.465, "step": 8313 }, { "epoch": 1.140632503258558, "grad_norm": 1.3046875, "learning_rate": 1.3756710958811383e-05, "loss": 0.5035, "step": 8314 }, { "epoch": 1.1407697057007615, "grad_norm": 1.375, "learning_rate": 1.3755373168991455e-05, "loss": 0.4447, "step": 8315 }, { "epoch": 1.140906908142965, "grad_norm": 1.203125, "learning_rate": 1.375403530092314e-05, "loss": 0.4053, "step": 8316 }, { "epoch": 1.1410441105851685, "grad_norm": 1.234375, "learning_rate": 1.3752697354634307e-05, "loss": 0.3968, "step": 8317 }, { "epoch": 1.1411813130273718, "grad_norm": 1.265625, "learning_rate": 1.3751359330152841e-05, "loss": 0.4821, "step": 8318 }, { "epoch": 1.1413185154695753, "grad_norm": 1.2265625, "learning_rate": 1.3750021227506619e-05, "loss": 0.5134, "step": 8319 }, { "epoch": 1.1414557179117788, "grad_norm": 1.2109375, "learning_rate": 1.3748683046723521e-05, "loss": 0.473, "step": 8320 }, { "epoch": 1.1415929203539823, "grad_norm": 1.265625, "learning_rate": 1.3747344787831431e-05, "loss": 0.4366, "step": 8321 }, { "epoch": 1.1417301227961858, "grad_norm": 1.25, "learning_rate": 1.3746006450858236e-05, "loss": 0.4656, "step": 8322 }, { "epoch": 1.1418673252383893, "grad_norm": 1.21875, "learning_rate": 1.3744668035831818e-05, "loss": 0.4462, "step": 8323 }, { "epoch": 1.1420045276805928, "grad_norm": 1.1328125, "learning_rate": 1.3743329542780068e-05, "loss": 0.4028, "step": 8324 }, { "epoch": 1.142141730122796, "grad_norm": 1.28125, "learning_rate": 1.3741990971730872e-05, "loss": 0.4703, "step": 8325 }, { "epoch": 1.1422789325649996, "grad_norm": 1.171875, "learning_rate": 1.3740652322712125e-05, "loss": 0.4196, "step": 8326 }, { "epoch": 1.142416135007203, "grad_norm": 1.234375, "learning_rate": 1.3739313595751716e-05, "loss": 0.4344, "step": 8327 }, { "epoch": 1.1425533374494066, "grad_norm": 1.28125, "learning_rate": 1.373797479087754e-05, "loss": 0.6145, "step": 8328 }, { "epoch": 1.14269053989161, "grad_norm": 1.2734375, "learning_rate": 1.3736635908117493e-05, "loss": 0.4925, "step": 8329 }, { "epoch": 1.1428277423338136, "grad_norm": 1.2578125, "learning_rate": 1.3735296947499474e-05, "loss": 0.4241, "step": 8330 }, { "epoch": 1.142964944776017, "grad_norm": 1.2109375, "learning_rate": 1.373395790905138e-05, "loss": 0.3985, "step": 8331 }, { "epoch": 1.1431021472182206, "grad_norm": 1.1796875, "learning_rate": 1.3732618792801113e-05, "loss": 0.4734, "step": 8332 }, { "epoch": 1.143239349660424, "grad_norm": 1.390625, "learning_rate": 1.3731279598776578e-05, "loss": 0.5002, "step": 8333 }, { "epoch": 1.1433765521026273, "grad_norm": 1.1953125, "learning_rate": 1.3729940327005675e-05, "loss": 0.4494, "step": 8334 }, { "epoch": 1.1435137545448308, "grad_norm": 1.234375, "learning_rate": 1.3728600977516309e-05, "loss": 0.417, "step": 8335 }, { "epoch": 1.1436509569870343, "grad_norm": 1.234375, "learning_rate": 1.3727261550336388e-05, "loss": 0.4643, "step": 8336 }, { "epoch": 1.1437881594292378, "grad_norm": 1.2109375, "learning_rate": 1.3725922045493823e-05, "loss": 0.4562, "step": 8337 }, { "epoch": 1.1439253618714413, "grad_norm": 1.1953125, "learning_rate": 1.3724582463016523e-05, "loss": 0.4497, "step": 8338 }, { "epoch": 1.1440625643136448, "grad_norm": 1.125, "learning_rate": 1.37232428029324e-05, "loss": 0.3941, "step": 8339 }, { "epoch": 1.1441997667558483, "grad_norm": 1.1953125, "learning_rate": 1.3721903065269369e-05, "loss": 0.4138, "step": 8340 }, { "epoch": 1.1443369691980516, "grad_norm": 1.25, "learning_rate": 1.3720563250055343e-05, "loss": 0.439, "step": 8341 }, { "epoch": 1.1444741716402551, "grad_norm": 1.390625, "learning_rate": 1.371922335731824e-05, "loss": 0.4955, "step": 8342 }, { "epoch": 1.1446113740824586, "grad_norm": 1.2265625, "learning_rate": 1.371788338708598e-05, "loss": 0.4502, "step": 8343 }, { "epoch": 1.144748576524662, "grad_norm": 1.2265625, "learning_rate": 1.3716543339386479e-05, "loss": 0.4343, "step": 8344 }, { "epoch": 1.1448857789668656, "grad_norm": 1.2421875, "learning_rate": 1.3715203214247664e-05, "loss": 0.443, "step": 8345 }, { "epoch": 1.145022981409069, "grad_norm": 1.21875, "learning_rate": 1.3713863011697452e-05, "loss": 0.4507, "step": 8346 }, { "epoch": 1.1451601838512726, "grad_norm": 1.2265625, "learning_rate": 1.3712522731763776e-05, "loss": 0.4683, "step": 8347 }, { "epoch": 1.145297386293476, "grad_norm": 1.3515625, "learning_rate": 1.3711182374474556e-05, "loss": 0.483, "step": 8348 }, { "epoch": 1.1454345887356796, "grad_norm": 1.2421875, "learning_rate": 1.3709841939857724e-05, "loss": 0.49, "step": 8349 }, { "epoch": 1.1455717911778829, "grad_norm": 1.21875, "learning_rate": 1.3708501427941208e-05, "loss": 0.4292, "step": 8350 }, { "epoch": 1.1457089936200864, "grad_norm": 1.1953125, "learning_rate": 1.3707160838752938e-05, "loss": 0.4454, "step": 8351 }, { "epoch": 1.1458461960622899, "grad_norm": 1.25, "learning_rate": 1.3705820172320852e-05, "loss": 0.5017, "step": 8352 }, { "epoch": 1.1459833985044934, "grad_norm": 1.0546875, "learning_rate": 1.3704479428672878e-05, "loss": 0.355, "step": 8353 }, { "epoch": 1.1461206009466969, "grad_norm": 1.1953125, "learning_rate": 1.3703138607836957e-05, "loss": 0.408, "step": 8354 }, { "epoch": 1.1462578033889004, "grad_norm": 1.3203125, "learning_rate": 1.3701797709841025e-05, "loss": 0.502, "step": 8355 }, { "epoch": 1.1463950058311039, "grad_norm": 1.3125, "learning_rate": 1.370045673471302e-05, "loss": 0.5337, "step": 8356 }, { "epoch": 1.1465322082733072, "grad_norm": 1.2421875, "learning_rate": 1.369911568248089e-05, "loss": 0.4409, "step": 8357 }, { "epoch": 1.1466694107155107, "grad_norm": 1.265625, "learning_rate": 1.369777455317257e-05, "loss": 0.4529, "step": 8358 }, { "epoch": 1.1468066131577141, "grad_norm": 1.171875, "learning_rate": 1.3696433346816007e-05, "loss": 0.4383, "step": 8359 }, { "epoch": 1.1469438155999176, "grad_norm": 1.390625, "learning_rate": 1.3695092063439146e-05, "loss": 0.473, "step": 8360 }, { "epoch": 1.1470810180421211, "grad_norm": 1.1953125, "learning_rate": 1.369375070306994e-05, "loss": 0.4352, "step": 8361 }, { "epoch": 1.1472182204843246, "grad_norm": 1.296875, "learning_rate": 1.3692409265736325e-05, "loss": 0.4459, "step": 8362 }, { "epoch": 1.1473554229265281, "grad_norm": 1.203125, "learning_rate": 1.3691067751466267e-05, "loss": 0.4716, "step": 8363 }, { "epoch": 1.1474926253687316, "grad_norm": 1.140625, "learning_rate": 1.3689726160287709e-05, "loss": 0.412, "step": 8364 }, { "epoch": 1.1476298278109351, "grad_norm": 1.234375, "learning_rate": 1.3688384492228608e-05, "loss": 0.398, "step": 8365 }, { "epoch": 1.1477670302531384, "grad_norm": 1.171875, "learning_rate": 1.3687042747316918e-05, "loss": 0.4196, "step": 8366 }, { "epoch": 1.147904232695342, "grad_norm": 1.2890625, "learning_rate": 1.3685700925580598e-05, "loss": 0.4628, "step": 8367 }, { "epoch": 1.1480414351375454, "grad_norm": 1.2578125, "learning_rate": 1.3684359027047605e-05, "loss": 0.4889, "step": 8368 }, { "epoch": 1.148178637579749, "grad_norm": 1.2265625, "learning_rate": 1.36830170517459e-05, "loss": 0.3845, "step": 8369 }, { "epoch": 1.1483158400219524, "grad_norm": 1.25, "learning_rate": 1.3681674999703447e-05, "loss": 0.4486, "step": 8370 }, { "epoch": 1.148453042464156, "grad_norm": 1.234375, "learning_rate": 1.3680332870948205e-05, "loss": 0.4378, "step": 8371 }, { "epoch": 1.1485902449063594, "grad_norm": 1.125, "learning_rate": 1.3678990665508142e-05, "loss": 0.4264, "step": 8372 }, { "epoch": 1.1487274473485627, "grad_norm": 1.1640625, "learning_rate": 1.3677648383411224e-05, "loss": 0.3996, "step": 8373 }, { "epoch": 1.1488646497907662, "grad_norm": 1.3203125, "learning_rate": 1.367630602468542e-05, "loss": 0.4497, "step": 8374 }, { "epoch": 1.1490018522329697, "grad_norm": 1.2265625, "learning_rate": 1.36749635893587e-05, "loss": 0.4291, "step": 8375 }, { "epoch": 1.1491390546751732, "grad_norm": 1.25, "learning_rate": 1.3673621077459033e-05, "loss": 0.4579, "step": 8376 }, { "epoch": 1.1492762571173767, "grad_norm": 1.265625, "learning_rate": 1.3672278489014397e-05, "loss": 0.4783, "step": 8377 }, { "epoch": 1.1494134595595802, "grad_norm": 1.1640625, "learning_rate": 1.3670935824052763e-05, "loss": 0.4333, "step": 8378 }, { "epoch": 1.1495506620017837, "grad_norm": 1.234375, "learning_rate": 1.3669593082602109e-05, "loss": 0.4805, "step": 8379 }, { "epoch": 1.1496878644439872, "grad_norm": 1.2109375, "learning_rate": 1.3668250264690407e-05, "loss": 0.4931, "step": 8380 }, { "epoch": 1.1498250668861907, "grad_norm": 1.21875, "learning_rate": 1.3666907370345646e-05, "loss": 0.4337, "step": 8381 }, { "epoch": 1.149962269328394, "grad_norm": 1.34375, "learning_rate": 1.36655643995958e-05, "loss": 0.485, "step": 8382 }, { "epoch": 1.1500994717705975, "grad_norm": 1.28125, "learning_rate": 1.3664221352468854e-05, "loss": 0.4763, "step": 8383 }, { "epoch": 1.150236674212801, "grad_norm": 1.296875, "learning_rate": 1.3662878228992793e-05, "loss": 0.4227, "step": 8384 }, { "epoch": 1.1503738766550045, "grad_norm": 1.3515625, "learning_rate": 1.3661535029195603e-05, "loss": 0.5237, "step": 8385 }, { "epoch": 1.150511079097208, "grad_norm": 1.3203125, "learning_rate": 1.366019175310527e-05, "loss": 0.5057, "step": 8386 }, { "epoch": 1.1506482815394115, "grad_norm": 1.1796875, "learning_rate": 1.3658848400749785e-05, "loss": 0.4253, "step": 8387 }, { "epoch": 1.150785483981615, "grad_norm": 1.296875, "learning_rate": 1.3657504972157134e-05, "loss": 0.4606, "step": 8388 }, { "epoch": 1.1509226864238182, "grad_norm": 1.0703125, "learning_rate": 1.3656161467355316e-05, "loss": 0.3744, "step": 8389 }, { "epoch": 1.1510598888660217, "grad_norm": 1.28125, "learning_rate": 1.3654817886372315e-05, "loss": 0.4892, "step": 8390 }, { "epoch": 1.1511970913082252, "grad_norm": 1.28125, "learning_rate": 1.365347422923614e-05, "loss": 0.4654, "step": 8391 }, { "epoch": 1.1513342937504287, "grad_norm": 1.0859375, "learning_rate": 1.3652130495974775e-05, "loss": 0.3856, "step": 8392 }, { "epoch": 1.1514714961926322, "grad_norm": 1.203125, "learning_rate": 1.3650786686616227e-05, "loss": 0.4547, "step": 8393 }, { "epoch": 1.1516086986348357, "grad_norm": 1.234375, "learning_rate": 1.364944280118849e-05, "loss": 0.4444, "step": 8394 }, { "epoch": 1.1517459010770392, "grad_norm": 1.3828125, "learning_rate": 1.3648098839719572e-05, "loss": 0.5278, "step": 8395 }, { "epoch": 1.1518831035192427, "grad_norm": 1.1796875, "learning_rate": 1.3646754802237475e-05, "loss": 0.4251, "step": 8396 }, { "epoch": 1.1520203059614462, "grad_norm": 1.171875, "learning_rate": 1.3645410688770198e-05, "loss": 0.4059, "step": 8397 }, { "epoch": 1.1521575084036495, "grad_norm": 1.3125, "learning_rate": 1.3644066499345752e-05, "loss": 0.4878, "step": 8398 }, { "epoch": 1.152294710845853, "grad_norm": 1.203125, "learning_rate": 1.3642722233992144e-05, "loss": 0.4223, "step": 8399 }, { "epoch": 1.1524319132880565, "grad_norm": 1.15625, "learning_rate": 1.3641377892737386e-05, "loss": 0.3494, "step": 8400 }, { "epoch": 1.15256911573026, "grad_norm": 1.21875, "learning_rate": 1.3640033475609486e-05, "loss": 0.4352, "step": 8401 }, { "epoch": 1.1527063181724635, "grad_norm": 1.328125, "learning_rate": 1.3638688982636458e-05, "loss": 0.4867, "step": 8402 }, { "epoch": 1.152843520614667, "grad_norm": 1.3046875, "learning_rate": 1.3637344413846318e-05, "loss": 0.5112, "step": 8403 }, { "epoch": 1.1529807230568705, "grad_norm": 1.21875, "learning_rate": 1.3635999769267081e-05, "loss": 0.4162, "step": 8404 }, { "epoch": 1.1531179254990738, "grad_norm": 1.2109375, "learning_rate": 1.3634655048926763e-05, "loss": 0.4596, "step": 8405 }, { "epoch": 1.1532551279412773, "grad_norm": 1.375, "learning_rate": 1.3633310252853385e-05, "loss": 0.5134, "step": 8406 }, { "epoch": 1.1533923303834808, "grad_norm": 1.2421875, "learning_rate": 1.3631965381074965e-05, "loss": 0.5038, "step": 8407 }, { "epoch": 1.1535295328256843, "grad_norm": 1.203125, "learning_rate": 1.3630620433619528e-05, "loss": 0.4763, "step": 8408 }, { "epoch": 1.1536667352678878, "grad_norm": 1.21875, "learning_rate": 1.3629275410515096e-05, "loss": 0.4207, "step": 8409 }, { "epoch": 1.1538039377100913, "grad_norm": 1.1875, "learning_rate": 1.3627930311789696e-05, "loss": 0.3918, "step": 8410 }, { "epoch": 1.1539411401522948, "grad_norm": 1.1484375, "learning_rate": 1.3626585137471353e-05, "loss": 0.3939, "step": 8411 }, { "epoch": 1.1540783425944983, "grad_norm": 1.2578125, "learning_rate": 1.3625239887588098e-05, "loss": 0.4813, "step": 8412 }, { "epoch": 1.1542155450367018, "grad_norm": 1.203125, "learning_rate": 1.3623894562167958e-05, "loss": 0.4639, "step": 8413 }, { "epoch": 1.154352747478905, "grad_norm": 1.1796875, "learning_rate": 1.3622549161238969e-05, "loss": 0.423, "step": 8414 }, { "epoch": 1.1544899499211085, "grad_norm": 1.3046875, "learning_rate": 1.3621203684829164e-05, "loss": 0.4521, "step": 8415 }, { "epoch": 1.154627152363312, "grad_norm": 1.265625, "learning_rate": 1.3619858132966575e-05, "loss": 0.4397, "step": 8416 }, { "epoch": 1.1547643548055155, "grad_norm": 1.21875, "learning_rate": 1.3618512505679235e-05, "loss": 0.438, "step": 8417 }, { "epoch": 1.154901557247719, "grad_norm": 1.1328125, "learning_rate": 1.361716680299519e-05, "loss": 0.3796, "step": 8418 }, { "epoch": 1.1550387596899225, "grad_norm": 1.203125, "learning_rate": 1.3615821024942474e-05, "loss": 0.4081, "step": 8419 }, { "epoch": 1.155175962132126, "grad_norm": 1.2265625, "learning_rate": 1.361447517154913e-05, "loss": 0.453, "step": 8420 }, { "epoch": 1.1553131645743293, "grad_norm": 1.1640625, "learning_rate": 1.3613129242843202e-05, "loss": 0.4344, "step": 8421 }, { "epoch": 1.1554503670165328, "grad_norm": 1.1953125, "learning_rate": 1.3611783238852731e-05, "loss": 0.4585, "step": 8422 }, { "epoch": 1.1555875694587363, "grad_norm": 1.0390625, "learning_rate": 1.361043715960577e-05, "loss": 0.3287, "step": 8423 }, { "epoch": 1.1557247719009398, "grad_norm": 1.2109375, "learning_rate": 1.3609091005130357e-05, "loss": 0.4299, "step": 8424 }, { "epoch": 1.1558619743431433, "grad_norm": 1.2421875, "learning_rate": 1.3607744775454545e-05, "loss": 0.412, "step": 8425 }, { "epoch": 1.1559991767853468, "grad_norm": 1.2578125, "learning_rate": 1.3606398470606386e-05, "loss": 0.4653, "step": 8426 }, { "epoch": 1.1561363792275503, "grad_norm": 1.3515625, "learning_rate": 1.3605052090613931e-05, "loss": 0.4826, "step": 8427 }, { "epoch": 1.1562735816697538, "grad_norm": 1.2578125, "learning_rate": 1.3603705635505234e-05, "loss": 0.4886, "step": 8428 }, { "epoch": 1.1564107841119573, "grad_norm": 1.125, "learning_rate": 1.3602359105308347e-05, "loss": 0.3671, "step": 8429 }, { "epoch": 1.1565479865541606, "grad_norm": 1.203125, "learning_rate": 1.3601012500051333e-05, "loss": 0.3932, "step": 8430 }, { "epoch": 1.156685188996364, "grad_norm": 1.25, "learning_rate": 1.3599665819762246e-05, "loss": 0.4667, "step": 8431 }, { "epoch": 1.1568223914385676, "grad_norm": 1.2890625, "learning_rate": 1.3598319064469148e-05, "loss": 0.462, "step": 8432 }, { "epoch": 1.156959593880771, "grad_norm": 1.2109375, "learning_rate": 1.3596972234200099e-05, "loss": 0.4431, "step": 8433 }, { "epoch": 1.1570967963229746, "grad_norm": 1.1328125, "learning_rate": 1.3595625328983162e-05, "loss": 0.4371, "step": 8434 }, { "epoch": 1.157233998765178, "grad_norm": 1.3515625, "learning_rate": 1.3594278348846404e-05, "loss": 0.5156, "step": 8435 }, { "epoch": 1.1573712012073816, "grad_norm": 1.2265625, "learning_rate": 1.3592931293817886e-05, "loss": 0.4598, "step": 8436 }, { "epoch": 1.1575084036495848, "grad_norm": 1.2109375, "learning_rate": 1.3591584163925682e-05, "loss": 0.4504, "step": 8437 }, { "epoch": 1.1576456060917883, "grad_norm": 1.140625, "learning_rate": 1.3590236959197856e-05, "loss": 0.3928, "step": 8438 }, { "epoch": 1.1577828085339918, "grad_norm": 1.1484375, "learning_rate": 1.3588889679662482e-05, "loss": 0.3755, "step": 8439 }, { "epoch": 1.1579200109761953, "grad_norm": 1.28125, "learning_rate": 1.3587542325347633e-05, "loss": 0.4866, "step": 8440 }, { "epoch": 1.1580572134183988, "grad_norm": 1.25, "learning_rate": 1.3586194896281383e-05, "loss": 0.4487, "step": 8441 }, { "epoch": 1.1581944158606023, "grad_norm": 1.2734375, "learning_rate": 1.3584847392491804e-05, "loss": 0.4516, "step": 8442 }, { "epoch": 1.1583316183028058, "grad_norm": 1.1484375, "learning_rate": 1.3583499814006977e-05, "loss": 0.4057, "step": 8443 }, { "epoch": 1.1584688207450093, "grad_norm": 1.421875, "learning_rate": 1.358215216085498e-05, "loss": 0.4692, "step": 8444 }, { "epoch": 1.1586060231872128, "grad_norm": 1.234375, "learning_rate": 1.358080443306389e-05, "loss": 0.5052, "step": 8445 }, { "epoch": 1.1587432256294161, "grad_norm": 1.3125, "learning_rate": 1.3579456630661792e-05, "loss": 0.4838, "step": 8446 }, { "epoch": 1.1588804280716196, "grad_norm": 1.3203125, "learning_rate": 1.3578108753676769e-05, "loss": 0.5189, "step": 8447 }, { "epoch": 1.1590176305138231, "grad_norm": 1.1328125, "learning_rate": 1.3576760802136903e-05, "loss": 0.4419, "step": 8448 }, { "epoch": 1.1591548329560266, "grad_norm": 1.1953125, "learning_rate": 1.3575412776070285e-05, "loss": 0.4448, "step": 8449 }, { "epoch": 1.1592920353982301, "grad_norm": 1.0703125, "learning_rate": 1.3574064675505e-05, "loss": 0.3663, "step": 8450 }, { "epoch": 1.1594292378404336, "grad_norm": 1.15625, "learning_rate": 1.357271650046914e-05, "loss": 0.3757, "step": 8451 }, { "epoch": 1.1595664402826371, "grad_norm": 1.203125, "learning_rate": 1.3571368250990791e-05, "loss": 0.4405, "step": 8452 }, { "epoch": 1.1597036427248404, "grad_norm": 1.1796875, "learning_rate": 1.357001992709805e-05, "loss": 0.414, "step": 8453 }, { "epoch": 1.1598408451670439, "grad_norm": 1.3515625, "learning_rate": 1.356867152881901e-05, "loss": 0.5411, "step": 8454 }, { "epoch": 1.1599780476092474, "grad_norm": 1.28125, "learning_rate": 1.3567323056181768e-05, "loss": 0.4452, "step": 8455 }, { "epoch": 1.1601152500514509, "grad_norm": 1.171875, "learning_rate": 1.356597450921442e-05, "loss": 0.4034, "step": 8456 }, { "epoch": 1.1602524524936544, "grad_norm": 1.2265625, "learning_rate": 1.3564625887945064e-05, "loss": 0.4129, "step": 8457 }, { "epoch": 1.1603896549358579, "grad_norm": 1.21875, "learning_rate": 1.3563277192401804e-05, "loss": 0.4377, "step": 8458 }, { "epoch": 1.1605268573780614, "grad_norm": 1.2890625, "learning_rate": 1.3561928422612738e-05, "loss": 0.4914, "step": 8459 }, { "epoch": 1.1606640598202649, "grad_norm": 1.21875, "learning_rate": 1.356057957860597e-05, "loss": 0.4695, "step": 8460 }, { "epoch": 1.1608012622624684, "grad_norm": 1.1484375, "learning_rate": 1.3559230660409608e-05, "loss": 0.4201, "step": 8461 }, { "epoch": 1.1609384647046717, "grad_norm": 1.328125, "learning_rate": 1.3557881668051754e-05, "loss": 0.479, "step": 8462 }, { "epoch": 1.1610756671468752, "grad_norm": 1.2109375, "learning_rate": 1.355653260156052e-05, "loss": 0.449, "step": 8463 }, { "epoch": 1.1612128695890787, "grad_norm": 1.0625, "learning_rate": 1.3555183460964012e-05, "loss": 0.3456, "step": 8464 }, { "epoch": 1.1613500720312822, "grad_norm": 1.21875, "learning_rate": 1.3553834246290347e-05, "loss": 0.4632, "step": 8465 }, { "epoch": 1.1614872744734857, "grad_norm": 1.1015625, "learning_rate": 1.3552484957567634e-05, "loss": 0.4636, "step": 8466 }, { "epoch": 1.1616244769156892, "grad_norm": 1.2109375, "learning_rate": 1.3551135594823985e-05, "loss": 0.5026, "step": 8467 }, { "epoch": 1.1617616793578927, "grad_norm": 1.2890625, "learning_rate": 1.354978615808752e-05, "loss": 0.4632, "step": 8468 }, { "epoch": 1.161898881800096, "grad_norm": 1.265625, "learning_rate": 1.3548436647386356e-05, "loss": 0.4247, "step": 8469 }, { "epoch": 1.1620360842422994, "grad_norm": 1.234375, "learning_rate": 1.3547087062748608e-05, "loss": 0.4613, "step": 8470 }, { "epoch": 1.162173286684503, "grad_norm": 1.203125, "learning_rate": 1.3545737404202399e-05, "loss": 0.4177, "step": 8471 }, { "epoch": 1.1623104891267064, "grad_norm": 1.4609375, "learning_rate": 1.3544387671775852e-05, "loss": 0.5434, "step": 8472 }, { "epoch": 1.16244769156891, "grad_norm": 1.2578125, "learning_rate": 1.3543037865497088e-05, "loss": 0.5062, "step": 8473 }, { "epoch": 1.1625848940111134, "grad_norm": 1.28125, "learning_rate": 1.3541687985394236e-05, "loss": 0.4556, "step": 8474 }, { "epoch": 1.162722096453317, "grad_norm": 1.1640625, "learning_rate": 1.354033803149542e-05, "loss": 0.3959, "step": 8475 }, { "epoch": 1.1628592988955204, "grad_norm": 1.296875, "learning_rate": 1.3538988003828766e-05, "loss": 0.4954, "step": 8476 }, { "epoch": 1.162996501337724, "grad_norm": 1.25, "learning_rate": 1.3537637902422409e-05, "loss": 0.4538, "step": 8477 }, { "epoch": 1.1631337037799272, "grad_norm": 1.1484375, "learning_rate": 1.3536287727304478e-05, "loss": 0.4314, "step": 8478 }, { "epoch": 1.1632709062221307, "grad_norm": 1.2109375, "learning_rate": 1.3534937478503102e-05, "loss": 0.4595, "step": 8479 }, { "epoch": 1.1634081086643342, "grad_norm": 1.2421875, "learning_rate": 1.3533587156046422e-05, "loss": 0.4419, "step": 8480 }, { "epoch": 1.1635453111065377, "grad_norm": 1.3671875, "learning_rate": 1.3532236759962567e-05, "loss": 0.4771, "step": 8481 }, { "epoch": 1.1636825135487412, "grad_norm": 1.15625, "learning_rate": 1.353088629027968e-05, "loss": 0.4172, "step": 8482 }, { "epoch": 1.1638197159909447, "grad_norm": 1.2265625, "learning_rate": 1.3529535747025895e-05, "loss": 0.4673, "step": 8483 }, { "epoch": 1.1639569184331482, "grad_norm": 1.234375, "learning_rate": 1.3528185130229358e-05, "loss": 0.4772, "step": 8484 }, { "epoch": 1.1640941208753515, "grad_norm": 1.1953125, "learning_rate": 1.3526834439918206e-05, "loss": 0.4534, "step": 8485 }, { "epoch": 1.164231323317555, "grad_norm": 1.171875, "learning_rate": 1.3525483676120587e-05, "loss": 0.4606, "step": 8486 }, { "epoch": 1.1643685257597585, "grad_norm": 1.3046875, "learning_rate": 1.3524132838864642e-05, "loss": 0.4909, "step": 8487 }, { "epoch": 1.164505728201962, "grad_norm": 1.21875, "learning_rate": 1.3522781928178519e-05, "loss": 0.4469, "step": 8488 }, { "epoch": 1.1646429306441655, "grad_norm": 1.1796875, "learning_rate": 1.3521430944090366e-05, "loss": 0.4284, "step": 8489 }, { "epoch": 1.164780133086369, "grad_norm": 1.1796875, "learning_rate": 1.3520079886628333e-05, "loss": 0.4038, "step": 8490 }, { "epoch": 1.1649173355285725, "grad_norm": 1.109375, "learning_rate": 1.351872875582057e-05, "loss": 0.331, "step": 8491 }, { "epoch": 1.165054537970776, "grad_norm": 1.1328125, "learning_rate": 1.351737755169523e-05, "loss": 0.4092, "step": 8492 }, { "epoch": 1.1651917404129795, "grad_norm": 1.1484375, "learning_rate": 1.351602627428047e-05, "loss": 0.3852, "step": 8493 }, { "epoch": 1.1653289428551827, "grad_norm": 1.296875, "learning_rate": 1.3514674923604445e-05, "loss": 0.4787, "step": 8494 }, { "epoch": 1.1654661452973862, "grad_norm": 1.296875, "learning_rate": 1.3513323499695308e-05, "loss": 0.4239, "step": 8495 }, { "epoch": 1.1656033477395897, "grad_norm": 1.2109375, "learning_rate": 1.351197200258122e-05, "loss": 0.4745, "step": 8496 }, { "epoch": 1.1657405501817932, "grad_norm": 1.28125, "learning_rate": 1.3510620432290343e-05, "loss": 0.4346, "step": 8497 }, { "epoch": 1.1658777526239967, "grad_norm": 1.1875, "learning_rate": 1.3509268788850837e-05, "loss": 0.4278, "step": 8498 }, { "epoch": 1.1660149550662002, "grad_norm": 1.296875, "learning_rate": 1.3507917072290865e-05, "loss": 0.5375, "step": 8499 }, { "epoch": 1.1661521575084037, "grad_norm": 1.21875, "learning_rate": 1.3506565282638597e-05, "loss": 0.431, "step": 8500 }, { "epoch": 1.166289359950607, "grad_norm": 1.171875, "learning_rate": 1.3505213419922192e-05, "loss": 0.4339, "step": 8501 }, { "epoch": 1.1664265623928105, "grad_norm": 1.25, "learning_rate": 1.3503861484169823e-05, "loss": 0.4883, "step": 8502 }, { "epoch": 1.166563764835014, "grad_norm": 1.2109375, "learning_rate": 1.350250947540966e-05, "loss": 0.4637, "step": 8503 }, { "epoch": 1.1667009672772175, "grad_norm": 1.171875, "learning_rate": 1.3501157393669868e-05, "loss": 0.4096, "step": 8504 }, { "epoch": 1.166838169719421, "grad_norm": 1.125, "learning_rate": 1.3499805238978624e-05, "loss": 0.4311, "step": 8505 }, { "epoch": 1.1669753721616245, "grad_norm": 1.2578125, "learning_rate": 1.3498453011364102e-05, "loss": 0.4927, "step": 8506 }, { "epoch": 1.167112574603828, "grad_norm": 1.1171875, "learning_rate": 1.3497100710854476e-05, "loss": 0.3771, "step": 8507 }, { "epoch": 1.1672497770460315, "grad_norm": 1.1640625, "learning_rate": 1.3495748337477923e-05, "loss": 0.4686, "step": 8508 }, { "epoch": 1.167386979488235, "grad_norm": 1.1796875, "learning_rate": 1.3494395891262625e-05, "loss": 0.3982, "step": 8509 }, { "epoch": 1.1675241819304383, "grad_norm": 1.2890625, "learning_rate": 1.3493043372236755e-05, "loss": 0.5072, "step": 8510 }, { "epoch": 1.1676613843726418, "grad_norm": 1.2734375, "learning_rate": 1.3491690780428503e-05, "loss": 0.4778, "step": 8511 }, { "epoch": 1.1677985868148453, "grad_norm": 1.25, "learning_rate": 1.3490338115866048e-05, "loss": 0.4169, "step": 8512 }, { "epoch": 1.1679357892570488, "grad_norm": 1.203125, "learning_rate": 1.3488985378577574e-05, "loss": 0.417, "step": 8513 }, { "epoch": 1.1680729916992523, "grad_norm": 1.2109375, "learning_rate": 1.348763256859127e-05, "loss": 0.4408, "step": 8514 }, { "epoch": 1.1682101941414558, "grad_norm": 1.2265625, "learning_rate": 1.3486279685935316e-05, "loss": 0.4536, "step": 8515 }, { "epoch": 1.1683473965836593, "grad_norm": 1.2421875, "learning_rate": 1.3484926730637913e-05, "loss": 0.46, "step": 8516 }, { "epoch": 1.1684845990258625, "grad_norm": 1.203125, "learning_rate": 1.3483573702727241e-05, "loss": 0.3984, "step": 8517 }, { "epoch": 1.168621801468066, "grad_norm": 1.3828125, "learning_rate": 1.3482220602231499e-05, "loss": 0.4916, "step": 8518 }, { "epoch": 1.1687590039102695, "grad_norm": 1.140625, "learning_rate": 1.3480867429178878e-05, "loss": 0.3786, "step": 8519 }, { "epoch": 1.168896206352473, "grad_norm": 1.296875, "learning_rate": 1.3479514183597578e-05, "loss": 0.4949, "step": 8520 }, { "epoch": 1.1690334087946765, "grad_norm": 1.125, "learning_rate": 1.3478160865515785e-05, "loss": 0.3891, "step": 8521 }, { "epoch": 1.16917061123688, "grad_norm": 1.2109375, "learning_rate": 1.3476807474961708e-05, "loss": 0.4412, "step": 8522 }, { "epoch": 1.1693078136790835, "grad_norm": 1.234375, "learning_rate": 1.347545401196354e-05, "loss": 0.4544, "step": 8523 }, { "epoch": 1.169445016121287, "grad_norm": 1.2265625, "learning_rate": 1.3474100476549485e-05, "loss": 0.4897, "step": 8524 }, { "epoch": 1.1695822185634905, "grad_norm": 1.140625, "learning_rate": 1.3472746868747746e-05, "loss": 0.4022, "step": 8525 }, { "epoch": 1.1697194210056938, "grad_norm": 1.1484375, "learning_rate": 1.3471393188586526e-05, "loss": 0.4362, "step": 8526 }, { "epoch": 1.1698566234478973, "grad_norm": 1.2578125, "learning_rate": 1.3470039436094032e-05, "loss": 0.4321, "step": 8527 }, { "epoch": 1.1699938258901008, "grad_norm": 1.2578125, "learning_rate": 1.3468685611298468e-05, "loss": 0.4737, "step": 8528 }, { "epoch": 1.1701310283323043, "grad_norm": 1.140625, "learning_rate": 1.346733171422805e-05, "loss": 0.4075, "step": 8529 }, { "epoch": 1.1702682307745078, "grad_norm": 1.125, "learning_rate": 1.3465977744910982e-05, "loss": 0.4156, "step": 8530 }, { "epoch": 1.1704054332167113, "grad_norm": 1.171875, "learning_rate": 1.3464623703375481e-05, "loss": 0.4127, "step": 8531 }, { "epoch": 1.1705426356589148, "grad_norm": 1.2265625, "learning_rate": 1.3463269589649754e-05, "loss": 0.4419, "step": 8532 }, { "epoch": 1.170679838101118, "grad_norm": 1.203125, "learning_rate": 1.3461915403762019e-05, "loss": 0.4454, "step": 8533 }, { "epoch": 1.1708170405433216, "grad_norm": 1.1640625, "learning_rate": 1.3460561145740493e-05, "loss": 0.4098, "step": 8534 }, { "epoch": 1.170954242985525, "grad_norm": 1.265625, "learning_rate": 1.3459206815613393e-05, "loss": 0.5195, "step": 8535 }, { "epoch": 1.1710914454277286, "grad_norm": 1.2109375, "learning_rate": 1.3457852413408939e-05, "loss": 0.4367, "step": 8536 }, { "epoch": 1.171228647869932, "grad_norm": 1.2734375, "learning_rate": 1.3456497939155348e-05, "loss": 0.5134, "step": 8537 }, { "epoch": 1.1713658503121356, "grad_norm": 1.2109375, "learning_rate": 1.3455143392880849e-05, "loss": 0.4552, "step": 8538 }, { "epoch": 1.171503052754339, "grad_norm": 1.21875, "learning_rate": 1.3453788774613663e-05, "loss": 0.4364, "step": 8539 }, { "epoch": 1.1716402551965426, "grad_norm": 1.2421875, "learning_rate": 1.3452434084382015e-05, "loss": 0.4961, "step": 8540 }, { "epoch": 1.171777457638746, "grad_norm": 1.1796875, "learning_rate": 1.3451079322214128e-05, "loss": 0.434, "step": 8541 }, { "epoch": 1.1719146600809494, "grad_norm": 1.2734375, "learning_rate": 1.3449724488138238e-05, "loss": 0.4667, "step": 8542 }, { "epoch": 1.1720518625231529, "grad_norm": 1.296875, "learning_rate": 1.344836958218257e-05, "loss": 0.4894, "step": 8543 }, { "epoch": 1.1721890649653564, "grad_norm": 1.1796875, "learning_rate": 1.3447014604375355e-05, "loss": 0.4173, "step": 8544 }, { "epoch": 1.1723262674075599, "grad_norm": 1.1875, "learning_rate": 1.3445659554744827e-05, "loss": 0.4254, "step": 8545 }, { "epoch": 1.1724634698497634, "grad_norm": 1.234375, "learning_rate": 1.3444304433319222e-05, "loss": 0.4709, "step": 8546 }, { "epoch": 1.1726006722919669, "grad_norm": 1.2578125, "learning_rate": 1.3442949240126773e-05, "loss": 0.4716, "step": 8547 }, { "epoch": 1.1727378747341703, "grad_norm": 1.1953125, "learning_rate": 1.344159397519572e-05, "loss": 0.4026, "step": 8548 }, { "epoch": 1.1728750771763736, "grad_norm": 1.1015625, "learning_rate": 1.34402386385543e-05, "loss": 0.3985, "step": 8549 }, { "epoch": 1.1730122796185771, "grad_norm": 1.140625, "learning_rate": 1.3438883230230753e-05, "loss": 0.379, "step": 8550 }, { "epoch": 1.1731494820607806, "grad_norm": 1.2578125, "learning_rate": 1.3437527750253321e-05, "loss": 0.4776, "step": 8551 }, { "epoch": 1.1732866845029841, "grad_norm": 1.1875, "learning_rate": 1.3436172198650246e-05, "loss": 0.4014, "step": 8552 }, { "epoch": 1.1734238869451876, "grad_norm": 1.1953125, "learning_rate": 1.3434816575449779e-05, "loss": 0.4276, "step": 8553 }, { "epoch": 1.1735610893873911, "grad_norm": 1.171875, "learning_rate": 1.3433460880680159e-05, "loss": 0.4068, "step": 8554 }, { "epoch": 1.1736982918295946, "grad_norm": 1.234375, "learning_rate": 1.3432105114369636e-05, "loss": 0.4518, "step": 8555 }, { "epoch": 1.1738354942717981, "grad_norm": 1.3671875, "learning_rate": 1.3430749276546461e-05, "loss": 0.4991, "step": 8556 }, { "epoch": 1.1739726967140016, "grad_norm": 1.296875, "learning_rate": 1.3429393367238886e-05, "loss": 0.4495, "step": 8557 }, { "epoch": 1.174109899156205, "grad_norm": 1.375, "learning_rate": 1.3428037386475157e-05, "loss": 0.4559, "step": 8558 }, { "epoch": 1.1742471015984084, "grad_norm": 1.2421875, "learning_rate": 1.3426681334283534e-05, "loss": 0.4319, "step": 8559 }, { "epoch": 1.174384304040612, "grad_norm": 1.3203125, "learning_rate": 1.3425325210692268e-05, "loss": 0.4733, "step": 8560 }, { "epoch": 1.1745215064828154, "grad_norm": 1.21875, "learning_rate": 1.342396901572962e-05, "loss": 0.4757, "step": 8561 }, { "epoch": 1.174658708925019, "grad_norm": 1.265625, "learning_rate": 1.3422612749423844e-05, "loss": 0.3935, "step": 8562 }, { "epoch": 1.1747959113672224, "grad_norm": 1.2109375, "learning_rate": 1.34212564118032e-05, "loss": 0.4426, "step": 8563 }, { "epoch": 1.1749331138094259, "grad_norm": 1.15625, "learning_rate": 1.3419900002895953e-05, "loss": 0.4008, "step": 8564 }, { "epoch": 1.1750703162516292, "grad_norm": 1.171875, "learning_rate": 1.3418543522730366e-05, "loss": 0.4272, "step": 8565 }, { "epoch": 1.1752075186938327, "grad_norm": 1.2109375, "learning_rate": 1.3417186971334698e-05, "loss": 0.4679, "step": 8566 }, { "epoch": 1.1753447211360362, "grad_norm": 1.3125, "learning_rate": 1.3415830348737213e-05, "loss": 0.4802, "step": 8567 }, { "epoch": 1.1754819235782397, "grad_norm": 1.265625, "learning_rate": 1.3414473654966186e-05, "loss": 0.467, "step": 8568 }, { "epoch": 1.1756191260204432, "grad_norm": 1.2265625, "learning_rate": 1.341311689004988e-05, "loss": 0.4358, "step": 8569 }, { "epoch": 1.1757563284626467, "grad_norm": 1.2265625, "learning_rate": 1.341176005401657e-05, "loss": 0.4409, "step": 8570 }, { "epoch": 1.1758935309048502, "grad_norm": 1.25, "learning_rate": 1.341040314689452e-05, "loss": 0.4526, "step": 8571 }, { "epoch": 1.1760307333470537, "grad_norm": 1.125, "learning_rate": 1.340904616871201e-05, "loss": 0.3803, "step": 8572 }, { "epoch": 1.1761679357892572, "grad_norm": 1.203125, "learning_rate": 1.3407689119497311e-05, "loss": 0.4345, "step": 8573 }, { "epoch": 1.1763051382314604, "grad_norm": 1.234375, "learning_rate": 1.3406331999278704e-05, "loss": 0.4771, "step": 8574 }, { "epoch": 1.176442340673664, "grad_norm": 1.2109375, "learning_rate": 1.3404974808084456e-05, "loss": 0.4443, "step": 8575 }, { "epoch": 1.1765795431158674, "grad_norm": 1.1484375, "learning_rate": 1.3403617545942855e-05, "loss": 0.4476, "step": 8576 }, { "epoch": 1.176716745558071, "grad_norm": 1.25, "learning_rate": 1.340226021288218e-05, "loss": 0.4418, "step": 8577 }, { "epoch": 1.1768539480002744, "grad_norm": 1.265625, "learning_rate": 1.340090280893071e-05, "loss": 0.4699, "step": 8578 }, { "epoch": 1.176991150442478, "grad_norm": 1.3046875, "learning_rate": 1.3399545334116728e-05, "loss": 0.4301, "step": 8579 }, { "epoch": 1.1771283528846814, "grad_norm": 1.234375, "learning_rate": 1.3398187788468526e-05, "loss": 0.4384, "step": 8580 }, { "epoch": 1.1772655553268847, "grad_norm": 1.109375, "learning_rate": 1.3396830172014383e-05, "loss": 0.3638, "step": 8581 }, { "epoch": 1.1774027577690882, "grad_norm": 1.484375, "learning_rate": 1.3395472484782588e-05, "loss": 0.422, "step": 8582 }, { "epoch": 1.1775399602112917, "grad_norm": 1.40625, "learning_rate": 1.3394114726801433e-05, "loss": 0.5093, "step": 8583 }, { "epoch": 1.1776771626534952, "grad_norm": 1.1953125, "learning_rate": 1.3392756898099206e-05, "loss": 0.4697, "step": 8584 }, { "epoch": 1.1778143650956987, "grad_norm": 1.3203125, "learning_rate": 1.3391398998704201e-05, "loss": 0.511, "step": 8585 }, { "epoch": 1.1779515675379022, "grad_norm": 1.2734375, "learning_rate": 1.3390041028644709e-05, "loss": 0.4649, "step": 8586 }, { "epoch": 1.1780887699801057, "grad_norm": 1.2265625, "learning_rate": 1.3388682987949028e-05, "loss": 0.4489, "step": 8587 }, { "epoch": 1.1782259724223092, "grad_norm": 1.3203125, "learning_rate": 1.3387324876645455e-05, "loss": 0.5057, "step": 8588 }, { "epoch": 1.1783631748645127, "grad_norm": 1.1875, "learning_rate": 1.3385966694762287e-05, "loss": 0.4083, "step": 8589 }, { "epoch": 1.178500377306716, "grad_norm": 1.2421875, "learning_rate": 1.3384608442327824e-05, "loss": 0.4915, "step": 8590 }, { "epoch": 1.1786375797489195, "grad_norm": 1.1796875, "learning_rate": 1.3383250119370365e-05, "loss": 0.3891, "step": 8591 }, { "epoch": 1.178774782191123, "grad_norm": 1.15625, "learning_rate": 1.3381891725918217e-05, "loss": 0.4145, "step": 8592 }, { "epoch": 1.1789119846333265, "grad_norm": 1.15625, "learning_rate": 1.338053326199968e-05, "loss": 0.4178, "step": 8593 }, { "epoch": 1.17904918707553, "grad_norm": 1.2265625, "learning_rate": 1.337917472764306e-05, "loss": 0.4291, "step": 8594 }, { "epoch": 1.1791863895177335, "grad_norm": 1.1953125, "learning_rate": 1.3377816122876665e-05, "loss": 0.4253, "step": 8595 }, { "epoch": 1.179323591959937, "grad_norm": 1.203125, "learning_rate": 1.3376457447728804e-05, "loss": 0.4403, "step": 8596 }, { "epoch": 1.1794607944021402, "grad_norm": 1.1796875, "learning_rate": 1.3375098702227782e-05, "loss": 0.3786, "step": 8597 }, { "epoch": 1.1795979968443437, "grad_norm": 1.0234375, "learning_rate": 1.337373988640192e-05, "loss": 0.3536, "step": 8598 }, { "epoch": 1.1797351992865472, "grad_norm": 1.2578125, "learning_rate": 1.337238100027952e-05, "loss": 0.4741, "step": 8599 }, { "epoch": 1.1798724017287507, "grad_norm": 1.265625, "learning_rate": 1.3371022043888903e-05, "loss": 0.4333, "step": 8600 }, { "epoch": 1.1800096041709542, "grad_norm": 1.1875, "learning_rate": 1.3369663017258385e-05, "loss": 0.4116, "step": 8601 }, { "epoch": 1.1801468066131577, "grad_norm": 1.15625, "learning_rate": 1.336830392041628e-05, "loss": 0.4387, "step": 8602 }, { "epoch": 1.1802840090553612, "grad_norm": 1.21875, "learning_rate": 1.336694475339091e-05, "loss": 0.4496, "step": 8603 }, { "epoch": 1.1804212114975647, "grad_norm": 1.2109375, "learning_rate": 1.336558551621059e-05, "loss": 0.4388, "step": 8604 }, { "epoch": 1.1805584139397682, "grad_norm": 1.4609375, "learning_rate": 1.3364226208903644e-05, "loss": 0.4853, "step": 8605 }, { "epoch": 1.1806956163819715, "grad_norm": 1.1875, "learning_rate": 1.3362866831498398e-05, "loss": 0.4021, "step": 8606 }, { "epoch": 1.180832818824175, "grad_norm": 1.28125, "learning_rate": 1.3361507384023171e-05, "loss": 0.4516, "step": 8607 }, { "epoch": 1.1809700212663785, "grad_norm": 1.140625, "learning_rate": 1.3360147866506297e-05, "loss": 0.4159, "step": 8608 }, { "epoch": 1.181107223708582, "grad_norm": 1.046875, "learning_rate": 1.3358788278976094e-05, "loss": 0.3421, "step": 8609 }, { "epoch": 1.1812444261507855, "grad_norm": 1.234375, "learning_rate": 1.33574286214609e-05, "loss": 0.4212, "step": 8610 }, { "epoch": 1.181381628592989, "grad_norm": 1.203125, "learning_rate": 1.3356068893989038e-05, "loss": 0.4219, "step": 8611 }, { "epoch": 1.1815188310351925, "grad_norm": 1.1875, "learning_rate": 1.3354709096588843e-05, "loss": 0.44, "step": 8612 }, { "epoch": 1.1816560334773958, "grad_norm": 1.2890625, "learning_rate": 1.3353349229288648e-05, "loss": 0.4724, "step": 8613 }, { "epoch": 1.1817932359195993, "grad_norm": 1.203125, "learning_rate": 1.335198929211679e-05, "loss": 0.5059, "step": 8614 }, { "epoch": 1.1819304383618028, "grad_norm": 1.1640625, "learning_rate": 1.3350629285101596e-05, "loss": 0.4028, "step": 8615 }, { "epoch": 1.1820676408040063, "grad_norm": 1.3984375, "learning_rate": 1.3349269208271415e-05, "loss": 0.4917, "step": 8616 }, { "epoch": 1.1822048432462098, "grad_norm": 1.4921875, "learning_rate": 1.3347909061654582e-05, "loss": 0.4086, "step": 8617 }, { "epoch": 1.1823420456884133, "grad_norm": 1.2734375, "learning_rate": 1.3346548845279436e-05, "loss": 0.4365, "step": 8618 }, { "epoch": 1.1824792481306168, "grad_norm": 1.1328125, "learning_rate": 1.3345188559174322e-05, "loss": 0.4212, "step": 8619 }, { "epoch": 1.1826164505728203, "grad_norm": 1.1875, "learning_rate": 1.3343828203367578e-05, "loss": 0.4187, "step": 8620 }, { "epoch": 1.1827536530150238, "grad_norm": 1.09375, "learning_rate": 1.3342467777887555e-05, "loss": 0.358, "step": 8621 }, { "epoch": 1.182890855457227, "grad_norm": 1.2421875, "learning_rate": 1.3341107282762597e-05, "loss": 0.4635, "step": 8622 }, { "epoch": 1.1830280578994306, "grad_norm": 1.3046875, "learning_rate": 1.3339746718021049e-05, "loss": 0.4691, "step": 8623 }, { "epoch": 1.183165260341634, "grad_norm": 1.265625, "learning_rate": 1.3338386083691264e-05, "loss": 0.4912, "step": 8624 }, { "epoch": 1.1833024627838375, "grad_norm": 1.3671875, "learning_rate": 1.333702537980159e-05, "loss": 0.4922, "step": 8625 }, { "epoch": 1.183439665226041, "grad_norm": 1.2578125, "learning_rate": 1.3335664606380384e-05, "loss": 0.4441, "step": 8626 }, { "epoch": 1.1835768676682445, "grad_norm": 1.1875, "learning_rate": 1.3334303763455991e-05, "loss": 0.4018, "step": 8627 }, { "epoch": 1.183714070110448, "grad_norm": 1.2109375, "learning_rate": 1.3332942851056778e-05, "loss": 0.4355, "step": 8628 }, { "epoch": 1.1838512725526513, "grad_norm": 1.265625, "learning_rate": 1.333158186921109e-05, "loss": 0.4915, "step": 8629 }, { "epoch": 1.1839884749948548, "grad_norm": 1.2109375, "learning_rate": 1.333022081794729e-05, "loss": 0.4786, "step": 8630 }, { "epoch": 1.1841256774370583, "grad_norm": 1.2109375, "learning_rate": 1.332885969729374e-05, "loss": 0.4291, "step": 8631 }, { "epoch": 1.1842628798792618, "grad_norm": 1.2734375, "learning_rate": 1.3327498507278793e-05, "loss": 0.4508, "step": 8632 }, { "epoch": 1.1844000823214653, "grad_norm": 1.1484375, "learning_rate": 1.332613724793082e-05, "loss": 0.4185, "step": 8633 }, { "epoch": 1.1845372847636688, "grad_norm": 1.2421875, "learning_rate": 1.3324775919278176e-05, "loss": 0.4515, "step": 8634 }, { "epoch": 1.1846744872058723, "grad_norm": 1.0703125, "learning_rate": 1.3323414521349237e-05, "loss": 0.3284, "step": 8635 }, { "epoch": 1.1848116896480758, "grad_norm": 1.2421875, "learning_rate": 1.3322053054172362e-05, "loss": 0.4759, "step": 8636 }, { "epoch": 1.1849488920902793, "grad_norm": 1.265625, "learning_rate": 1.332069151777592e-05, "loss": 0.4436, "step": 8637 }, { "epoch": 1.1850860945324826, "grad_norm": 1.171875, "learning_rate": 1.331932991218828e-05, "loss": 0.4291, "step": 8638 }, { "epoch": 1.185223296974686, "grad_norm": 1.0859375, "learning_rate": 1.3317968237437815e-05, "loss": 0.351, "step": 8639 }, { "epoch": 1.1853604994168896, "grad_norm": 1.2578125, "learning_rate": 1.3316606493552897e-05, "loss": 0.4569, "step": 8640 }, { "epoch": 1.185497701859093, "grad_norm": 1.140625, "learning_rate": 1.3315244680561899e-05, "loss": 0.3813, "step": 8641 }, { "epoch": 1.1856349043012966, "grad_norm": 1.21875, "learning_rate": 1.3313882798493194e-05, "loss": 0.4547, "step": 8642 }, { "epoch": 1.1857721067435, "grad_norm": 1.25, "learning_rate": 1.3312520847375164e-05, "loss": 0.4308, "step": 8643 }, { "epoch": 1.1859093091857036, "grad_norm": 1.28125, "learning_rate": 1.3311158827236184e-05, "loss": 0.459, "step": 8644 }, { "epoch": 1.1860465116279069, "grad_norm": 1.1796875, "learning_rate": 1.3309796738104632e-05, "loss": 0.3986, "step": 8645 }, { "epoch": 1.1861837140701104, "grad_norm": 1.1328125, "learning_rate": 1.3308434580008893e-05, "loss": 0.3922, "step": 8646 }, { "epoch": 1.1863209165123139, "grad_norm": 1.28125, "learning_rate": 1.3307072352977349e-05, "loss": 0.4312, "step": 8647 }, { "epoch": 1.1864581189545174, "grad_norm": 1.2421875, "learning_rate": 1.330571005703838e-05, "loss": 0.382, "step": 8648 }, { "epoch": 1.1865953213967209, "grad_norm": 1.1875, "learning_rate": 1.3304347692220372e-05, "loss": 0.44, "step": 8649 }, { "epoch": 1.1867325238389244, "grad_norm": 1.2265625, "learning_rate": 1.3302985258551715e-05, "loss": 0.4892, "step": 8650 }, { "epoch": 1.1868697262811279, "grad_norm": 1.34375, "learning_rate": 1.3301622756060797e-05, "loss": 0.5246, "step": 8651 }, { "epoch": 1.1870069287233314, "grad_norm": 1.1640625, "learning_rate": 1.3300260184776003e-05, "loss": 0.4233, "step": 8652 }, { "epoch": 1.1871441311655349, "grad_norm": 1.2265625, "learning_rate": 1.3298897544725729e-05, "loss": 0.4608, "step": 8653 }, { "epoch": 1.1872813336077381, "grad_norm": 1.359375, "learning_rate": 1.3297534835938366e-05, "loss": 0.5101, "step": 8654 }, { "epoch": 1.1874185360499416, "grad_norm": 1.2421875, "learning_rate": 1.3296172058442309e-05, "loss": 0.4343, "step": 8655 }, { "epoch": 1.1875557384921451, "grad_norm": 1.3125, "learning_rate": 1.3294809212265948e-05, "loss": 0.5149, "step": 8656 }, { "epoch": 1.1876929409343486, "grad_norm": 1.1875, "learning_rate": 1.3293446297437686e-05, "loss": 0.3569, "step": 8657 }, { "epoch": 1.1878301433765521, "grad_norm": 1.1171875, "learning_rate": 1.329208331398592e-05, "loss": 0.3534, "step": 8658 }, { "epoch": 1.1879673458187556, "grad_norm": 1.2890625, "learning_rate": 1.3290720261939048e-05, "loss": 0.4842, "step": 8659 }, { "epoch": 1.1881045482609591, "grad_norm": 1.1875, "learning_rate": 1.328935714132547e-05, "loss": 0.3918, "step": 8660 }, { "epoch": 1.1882417507031624, "grad_norm": 1.296875, "learning_rate": 1.3287993952173592e-05, "loss": 0.4783, "step": 8661 }, { "epoch": 1.188378953145366, "grad_norm": 1.15625, "learning_rate": 1.3286630694511815e-05, "loss": 0.4615, "step": 8662 }, { "epoch": 1.1885161555875694, "grad_norm": 1.203125, "learning_rate": 1.3285267368368546e-05, "loss": 0.4133, "step": 8663 }, { "epoch": 1.188653358029773, "grad_norm": 1.0625, "learning_rate": 1.3283903973772195e-05, "loss": 0.3649, "step": 8664 }, { "epoch": 1.1887905604719764, "grad_norm": 1.2734375, "learning_rate": 1.328254051075116e-05, "loss": 0.4426, "step": 8665 }, { "epoch": 1.18892776291418, "grad_norm": 1.28125, "learning_rate": 1.3281176979333861e-05, "loss": 0.4797, "step": 8666 }, { "epoch": 1.1890649653563834, "grad_norm": 1.203125, "learning_rate": 1.3279813379548703e-05, "loss": 0.4404, "step": 8667 }, { "epoch": 1.189202167798587, "grad_norm": 1.25, "learning_rate": 1.3278449711424101e-05, "loss": 0.4466, "step": 8668 }, { "epoch": 1.1893393702407904, "grad_norm": 1.171875, "learning_rate": 1.3277085974988468e-05, "loss": 0.3866, "step": 8669 }, { "epoch": 1.1894765726829937, "grad_norm": 1.1875, "learning_rate": 1.3275722170270222e-05, "loss": 0.4019, "step": 8670 }, { "epoch": 1.1896137751251972, "grad_norm": 1.296875, "learning_rate": 1.3274358297297776e-05, "loss": 0.4528, "step": 8671 }, { "epoch": 1.1897509775674007, "grad_norm": 1.2421875, "learning_rate": 1.3272994356099551e-05, "loss": 0.4609, "step": 8672 }, { "epoch": 1.1898881800096042, "grad_norm": 1.265625, "learning_rate": 1.3271630346703966e-05, "loss": 0.428, "step": 8673 }, { "epoch": 1.1900253824518077, "grad_norm": 1.3828125, "learning_rate": 1.327026626913944e-05, "loss": 0.5119, "step": 8674 }, { "epoch": 1.1901625848940112, "grad_norm": 1.2109375, "learning_rate": 1.3268902123434398e-05, "loss": 0.4584, "step": 8675 }, { "epoch": 1.1902997873362147, "grad_norm": 1.3203125, "learning_rate": 1.3267537909617262e-05, "loss": 0.5302, "step": 8676 }, { "epoch": 1.190436989778418, "grad_norm": 1.2265625, "learning_rate": 1.3266173627716459e-05, "loss": 0.4436, "step": 8677 }, { "epoch": 1.1905741922206214, "grad_norm": 1.3359375, "learning_rate": 1.3264809277760411e-05, "loss": 0.4911, "step": 8678 }, { "epoch": 1.190711394662825, "grad_norm": 1.1953125, "learning_rate": 1.3263444859777555e-05, "loss": 0.4316, "step": 8679 }, { "epoch": 1.1908485971050284, "grad_norm": 1.296875, "learning_rate": 1.326208037379631e-05, "loss": 0.4604, "step": 8680 }, { "epoch": 1.190985799547232, "grad_norm": 1.1796875, "learning_rate": 1.3260715819845118e-05, "loss": 0.4184, "step": 8681 }, { "epoch": 1.1911230019894354, "grad_norm": 1.1640625, "learning_rate": 1.3259351197952404e-05, "loss": 0.4278, "step": 8682 }, { "epoch": 1.191260204431639, "grad_norm": 1.2109375, "learning_rate": 1.3257986508146602e-05, "loss": 0.3839, "step": 8683 }, { "epoch": 1.1913974068738424, "grad_norm": 1.25, "learning_rate": 1.3256621750456148e-05, "loss": 0.4368, "step": 8684 }, { "epoch": 1.191534609316046, "grad_norm": 1.1875, "learning_rate": 1.3255256924909481e-05, "loss": 0.4223, "step": 8685 }, { "epoch": 1.1916718117582492, "grad_norm": 1.1875, "learning_rate": 1.3253892031535037e-05, "loss": 0.4551, "step": 8686 }, { "epoch": 1.1918090142004527, "grad_norm": 1.171875, "learning_rate": 1.3252527070361256e-05, "loss": 0.4083, "step": 8687 }, { "epoch": 1.1919462166426562, "grad_norm": 1.21875, "learning_rate": 1.325116204141658e-05, "loss": 0.4679, "step": 8688 }, { "epoch": 1.1920834190848597, "grad_norm": 1.2421875, "learning_rate": 1.3249796944729445e-05, "loss": 0.4646, "step": 8689 }, { "epoch": 1.1922206215270632, "grad_norm": 1.2890625, "learning_rate": 1.3248431780328303e-05, "loss": 0.4792, "step": 8690 }, { "epoch": 1.1923578239692667, "grad_norm": 1.25, "learning_rate": 1.3247066548241597e-05, "loss": 0.4602, "step": 8691 }, { "epoch": 1.1924950264114702, "grad_norm": 1.296875, "learning_rate": 1.324570124849777e-05, "loss": 0.4338, "step": 8692 }, { "epoch": 1.1926322288536735, "grad_norm": 1.1484375, "learning_rate": 1.3244335881125272e-05, "loss": 0.3897, "step": 8693 }, { "epoch": 1.192769431295877, "grad_norm": 1.3046875, "learning_rate": 1.3242970446152551e-05, "loss": 0.4702, "step": 8694 }, { "epoch": 1.1929066337380805, "grad_norm": 1.21875, "learning_rate": 1.3241604943608062e-05, "loss": 0.4446, "step": 8695 }, { "epoch": 1.193043836180284, "grad_norm": 1.265625, "learning_rate": 1.3240239373520256e-05, "loss": 0.4947, "step": 8696 }, { "epoch": 1.1931810386224875, "grad_norm": 1.1796875, "learning_rate": 1.3238873735917582e-05, "loss": 0.4304, "step": 8697 }, { "epoch": 1.193318241064691, "grad_norm": 1.2265625, "learning_rate": 1.3237508030828496e-05, "loss": 0.4393, "step": 8698 }, { "epoch": 1.1934554435068945, "grad_norm": 1.203125, "learning_rate": 1.3236142258281457e-05, "loss": 0.4386, "step": 8699 }, { "epoch": 1.193592645949098, "grad_norm": 1.125, "learning_rate": 1.3234776418304922e-05, "loss": 0.3868, "step": 8700 }, { "epoch": 1.1937298483913015, "grad_norm": 1.28125, "learning_rate": 1.3233410510927353e-05, "loss": 0.5081, "step": 8701 }, { "epoch": 1.1938670508335048, "grad_norm": 1.2109375, "learning_rate": 1.3232044536177204e-05, "loss": 0.4507, "step": 8702 }, { "epoch": 1.1940042532757082, "grad_norm": 1.1875, "learning_rate": 1.3230678494082943e-05, "loss": 0.4441, "step": 8703 }, { "epoch": 1.1941414557179117, "grad_norm": 1.1953125, "learning_rate": 1.3229312384673028e-05, "loss": 0.4666, "step": 8704 }, { "epoch": 1.1942786581601152, "grad_norm": 1.2421875, "learning_rate": 1.3227946207975928e-05, "loss": 0.4596, "step": 8705 }, { "epoch": 1.1944158606023187, "grad_norm": 1.2265625, "learning_rate": 1.3226579964020107e-05, "loss": 0.4491, "step": 8706 }, { "epoch": 1.1945530630445222, "grad_norm": 1.296875, "learning_rate": 1.3225213652834034e-05, "loss": 0.4645, "step": 8707 }, { "epoch": 1.1946902654867257, "grad_norm": 1.234375, "learning_rate": 1.3223847274446177e-05, "loss": 0.4217, "step": 8708 }, { "epoch": 1.194827467928929, "grad_norm": 1.2265625, "learning_rate": 1.3222480828885012e-05, "loss": 0.4214, "step": 8709 }, { "epoch": 1.1949646703711325, "grad_norm": 1.21875, "learning_rate": 1.3221114316179e-05, "loss": 0.425, "step": 8710 }, { "epoch": 1.195101872813336, "grad_norm": 1.1796875, "learning_rate": 1.321974773635662e-05, "loss": 0.4246, "step": 8711 }, { "epoch": 1.1952390752555395, "grad_norm": 1.1796875, "learning_rate": 1.3218381089446349e-05, "loss": 0.4237, "step": 8712 }, { "epoch": 1.195376277697743, "grad_norm": 1.1796875, "learning_rate": 1.3217014375476655e-05, "loss": 0.428, "step": 8713 }, { "epoch": 1.1955134801399465, "grad_norm": 1.265625, "learning_rate": 1.3215647594476026e-05, "loss": 0.4311, "step": 8714 }, { "epoch": 1.19565068258215, "grad_norm": 1.15625, "learning_rate": 1.3214280746472933e-05, "loss": 0.4097, "step": 8715 }, { "epoch": 1.1957878850243535, "grad_norm": 1.359375, "learning_rate": 1.321291383149586e-05, "loss": 0.5327, "step": 8716 }, { "epoch": 1.195925087466557, "grad_norm": 1.15625, "learning_rate": 1.3211546849573285e-05, "loss": 0.394, "step": 8717 }, { "epoch": 1.1960622899087603, "grad_norm": 1.265625, "learning_rate": 1.3210179800733696e-05, "loss": 0.4289, "step": 8718 }, { "epoch": 1.1961994923509638, "grad_norm": 1.21875, "learning_rate": 1.3208812685005574e-05, "loss": 0.4343, "step": 8719 }, { "epoch": 1.1963366947931673, "grad_norm": 1.34375, "learning_rate": 1.3207445502417403e-05, "loss": 0.4937, "step": 8720 }, { "epoch": 1.1964738972353708, "grad_norm": 1.265625, "learning_rate": 1.3206078252997674e-05, "loss": 0.4513, "step": 8721 }, { "epoch": 1.1966110996775743, "grad_norm": 1.4609375, "learning_rate": 1.3204710936774873e-05, "loss": 0.5525, "step": 8722 }, { "epoch": 1.1967483021197778, "grad_norm": 1.15625, "learning_rate": 1.320334355377749e-05, "loss": 0.3763, "step": 8723 }, { "epoch": 1.1968855045619813, "grad_norm": 1.28125, "learning_rate": 1.320197610403402e-05, "loss": 0.4642, "step": 8724 }, { "epoch": 1.1970227070041846, "grad_norm": 1.2265625, "learning_rate": 1.320060858757295e-05, "loss": 0.4197, "step": 8725 }, { "epoch": 1.197159909446388, "grad_norm": 1.3046875, "learning_rate": 1.3199241004422779e-05, "loss": 0.5218, "step": 8726 }, { "epoch": 1.1972971118885916, "grad_norm": 1.1328125, "learning_rate": 1.3197873354612e-05, "loss": 0.3976, "step": 8727 }, { "epoch": 1.197434314330795, "grad_norm": 1.0703125, "learning_rate": 1.3196505638169112e-05, "loss": 0.3478, "step": 8728 }, { "epoch": 1.1975715167729986, "grad_norm": 1.3046875, "learning_rate": 1.3195137855122608e-05, "loss": 0.4872, "step": 8729 }, { "epoch": 1.197708719215202, "grad_norm": 1.171875, "learning_rate": 1.3193770005500995e-05, "loss": 0.4007, "step": 8730 }, { "epoch": 1.1978459216574056, "grad_norm": 1.140625, "learning_rate": 1.3192402089332768e-05, "loss": 0.4057, "step": 8731 }, { "epoch": 1.197983124099609, "grad_norm": 1.2421875, "learning_rate": 1.319103410664643e-05, "loss": 0.4821, "step": 8732 }, { "epoch": 1.1981203265418126, "grad_norm": 1.3828125, "learning_rate": 1.3189666057470488e-05, "loss": 0.4792, "step": 8733 }, { "epoch": 1.1982575289840158, "grad_norm": 1.2578125, "learning_rate": 1.3188297941833446e-05, "loss": 0.46, "step": 8734 }, { "epoch": 1.1983947314262193, "grad_norm": 1.234375, "learning_rate": 1.3186929759763812e-05, "loss": 0.4233, "step": 8735 }, { "epoch": 1.1985319338684228, "grad_norm": 1.2578125, "learning_rate": 1.318556151129009e-05, "loss": 0.5074, "step": 8736 }, { "epoch": 1.1986691363106263, "grad_norm": 1.1484375, "learning_rate": 1.3184193196440794e-05, "loss": 0.388, "step": 8737 }, { "epoch": 1.1988063387528298, "grad_norm": 1.34375, "learning_rate": 1.3182824815244428e-05, "loss": 0.5182, "step": 8738 }, { "epoch": 1.1989435411950333, "grad_norm": 1.2421875, "learning_rate": 1.318145636772951e-05, "loss": 0.4425, "step": 8739 }, { "epoch": 1.1990807436372368, "grad_norm": 1.3125, "learning_rate": 1.3180087853924555e-05, "loss": 0.4641, "step": 8740 }, { "epoch": 1.19921794607944, "grad_norm": 1.03125, "learning_rate": 1.3178719273858071e-05, "loss": 0.3924, "step": 8741 }, { "epoch": 1.1993551485216436, "grad_norm": 1.1875, "learning_rate": 1.3177350627558578e-05, "loss": 0.4317, "step": 8742 }, { "epoch": 1.199492350963847, "grad_norm": 1.1875, "learning_rate": 1.3175981915054596e-05, "loss": 0.4455, "step": 8743 }, { "epoch": 1.1996295534060506, "grad_norm": 1.25, "learning_rate": 1.3174613136374644e-05, "loss": 0.4102, "step": 8744 }, { "epoch": 1.199766755848254, "grad_norm": 1.21875, "learning_rate": 1.3173244291547236e-05, "loss": 0.4511, "step": 8745 }, { "epoch": 1.1999039582904576, "grad_norm": 1.2734375, "learning_rate": 1.3171875380600899e-05, "loss": 0.4444, "step": 8746 }, { "epoch": 1.200041160732661, "grad_norm": 1.203125, "learning_rate": 1.3170506403564154e-05, "loss": 0.3872, "step": 8747 }, { "epoch": 1.2001783631748646, "grad_norm": 1.375, "learning_rate": 1.3169137360465527e-05, "loss": 0.4462, "step": 8748 }, { "epoch": 1.200315565617068, "grad_norm": 1.2734375, "learning_rate": 1.3167768251333544e-05, "loss": 0.5059, "step": 8749 }, { "epoch": 1.2004527680592714, "grad_norm": 1.203125, "learning_rate": 1.3166399076196731e-05, "loss": 0.3833, "step": 8750 }, { "epoch": 1.2005899705014749, "grad_norm": 1.1640625, "learning_rate": 1.3165029835083617e-05, "loss": 0.4149, "step": 8751 }, { "epoch": 1.2007271729436784, "grad_norm": 1.2109375, "learning_rate": 1.3163660528022738e-05, "loss": 0.4331, "step": 8752 }, { "epoch": 1.2008643753858819, "grad_norm": 1.328125, "learning_rate": 1.3162291155042615e-05, "loss": 0.4718, "step": 8753 }, { "epoch": 1.2010015778280854, "grad_norm": 1.234375, "learning_rate": 1.3160921716171787e-05, "loss": 0.4303, "step": 8754 }, { "epoch": 1.2011387802702889, "grad_norm": 1.3046875, "learning_rate": 1.3159552211438786e-05, "loss": 0.5159, "step": 8755 }, { "epoch": 1.2012759827124924, "grad_norm": 1.25, "learning_rate": 1.3158182640872148e-05, "loss": 0.4342, "step": 8756 }, { "epoch": 1.2014131851546956, "grad_norm": 1.296875, "learning_rate": 1.3156813004500411e-05, "loss": 0.456, "step": 8757 }, { "epoch": 1.2015503875968991, "grad_norm": 1.125, "learning_rate": 1.3155443302352109e-05, "loss": 0.3956, "step": 8758 }, { "epoch": 1.2016875900391026, "grad_norm": 1.21875, "learning_rate": 1.3154073534455788e-05, "loss": 0.4731, "step": 8759 }, { "epoch": 1.2018247924813061, "grad_norm": 1.28125, "learning_rate": 1.3152703700839985e-05, "loss": 0.4786, "step": 8760 }, { "epoch": 1.2019619949235096, "grad_norm": 1.078125, "learning_rate": 1.3151333801533244e-05, "loss": 0.3735, "step": 8761 }, { "epoch": 1.2020991973657131, "grad_norm": 1.1796875, "learning_rate": 1.314996383656411e-05, "loss": 0.4567, "step": 8762 }, { "epoch": 1.2022363998079166, "grad_norm": 1.1796875, "learning_rate": 1.3148593805961125e-05, "loss": 0.4448, "step": 8763 }, { "epoch": 1.2023736022501201, "grad_norm": 1.25, "learning_rate": 1.3147223709752837e-05, "loss": 0.4351, "step": 8764 }, { "epoch": 1.2025108046923236, "grad_norm": 1.1171875, "learning_rate": 1.3145853547967793e-05, "loss": 0.3857, "step": 8765 }, { "epoch": 1.202648007134527, "grad_norm": 1.1796875, "learning_rate": 1.3144483320634542e-05, "loss": 0.4036, "step": 8766 }, { "epoch": 1.2027852095767304, "grad_norm": 1.1796875, "learning_rate": 1.3143113027781637e-05, "loss": 0.4013, "step": 8767 }, { "epoch": 1.202922412018934, "grad_norm": 1.203125, "learning_rate": 1.3141742669437628e-05, "loss": 0.4043, "step": 8768 }, { "epoch": 1.2030596144611374, "grad_norm": 1.328125, "learning_rate": 1.3140372245631068e-05, "loss": 0.5013, "step": 8769 }, { "epoch": 1.203196816903341, "grad_norm": 1.2734375, "learning_rate": 1.3139001756390512e-05, "loss": 0.489, "step": 8770 }, { "epoch": 1.2033340193455444, "grad_norm": 1.296875, "learning_rate": 1.3137631201744518e-05, "loss": 0.4681, "step": 8771 }, { "epoch": 1.203471221787748, "grad_norm": 1.28125, "learning_rate": 1.3136260581721644e-05, "loss": 0.3816, "step": 8772 }, { "epoch": 1.2036084242299512, "grad_norm": 1.25, "learning_rate": 1.3134889896350443e-05, "loss": 0.4319, "step": 8773 }, { "epoch": 1.2037456266721547, "grad_norm": 1.1640625, "learning_rate": 1.313351914565948e-05, "loss": 0.4145, "step": 8774 }, { "epoch": 1.2038828291143582, "grad_norm": 1.3125, "learning_rate": 1.3132148329677314e-05, "loss": 0.4883, "step": 8775 }, { "epoch": 1.2040200315565617, "grad_norm": 1.3125, "learning_rate": 1.313077744843251e-05, "loss": 0.4917, "step": 8776 }, { "epoch": 1.2041572339987652, "grad_norm": 1.2578125, "learning_rate": 1.3129406501953632e-05, "loss": 0.4587, "step": 8777 }, { "epoch": 1.2042944364409687, "grad_norm": 1.34375, "learning_rate": 1.3128035490269242e-05, "loss": 0.4604, "step": 8778 }, { "epoch": 1.2044316388831722, "grad_norm": 1.2421875, "learning_rate": 1.3126664413407912e-05, "loss": 0.4661, "step": 8779 }, { "epoch": 1.2045688413253757, "grad_norm": 1.28125, "learning_rate": 1.312529327139821e-05, "loss": 0.4825, "step": 8780 }, { "epoch": 1.2047060437675792, "grad_norm": 1.15625, "learning_rate": 1.31239220642687e-05, "loss": 0.4034, "step": 8781 }, { "epoch": 1.2048432462097824, "grad_norm": 1.1875, "learning_rate": 1.3122550792047956e-05, "loss": 0.4789, "step": 8782 }, { "epoch": 1.204980448651986, "grad_norm": 1.296875, "learning_rate": 1.3121179454764554e-05, "loss": 0.4596, "step": 8783 }, { "epoch": 1.2051176510941894, "grad_norm": 1.2421875, "learning_rate": 1.3119808052447063e-05, "loss": 0.5058, "step": 8784 }, { "epoch": 1.205254853536393, "grad_norm": 1.203125, "learning_rate": 1.3118436585124061e-05, "loss": 0.4371, "step": 8785 }, { "epoch": 1.2053920559785964, "grad_norm": 1.0703125, "learning_rate": 1.3117065052824123e-05, "loss": 0.357, "step": 8786 }, { "epoch": 1.2055292584208, "grad_norm": 1.21875, "learning_rate": 1.3115693455575827e-05, "loss": 0.4043, "step": 8787 }, { "epoch": 1.2056664608630034, "grad_norm": 1.171875, "learning_rate": 1.3114321793407749e-05, "loss": 0.3772, "step": 8788 }, { "epoch": 1.2058036633052067, "grad_norm": 1.15625, "learning_rate": 1.3112950066348478e-05, "loss": 0.4032, "step": 8789 }, { "epoch": 1.2059408657474102, "grad_norm": 1.2109375, "learning_rate": 1.3111578274426586e-05, "loss": 0.4204, "step": 8790 }, { "epoch": 1.2060780681896137, "grad_norm": 1.2265625, "learning_rate": 1.3110206417670664e-05, "loss": 0.4256, "step": 8791 }, { "epoch": 1.2062152706318172, "grad_norm": 1.3671875, "learning_rate": 1.3108834496109288e-05, "loss": 0.4904, "step": 8792 }, { "epoch": 1.2063524730740207, "grad_norm": 1.1953125, "learning_rate": 1.3107462509771054e-05, "loss": 0.4323, "step": 8793 }, { "epoch": 1.2064896755162242, "grad_norm": 1.3046875, "learning_rate": 1.3106090458684542e-05, "loss": 0.4764, "step": 8794 }, { "epoch": 1.2066268779584277, "grad_norm": 1.3125, "learning_rate": 1.3104718342878342e-05, "loss": 0.4768, "step": 8795 }, { "epoch": 1.2067640804006312, "grad_norm": 1.1875, "learning_rate": 1.3103346162381045e-05, "loss": 0.4431, "step": 8796 }, { "epoch": 1.2069012828428347, "grad_norm": 1.2265625, "learning_rate": 1.3101973917221242e-05, "loss": 0.4432, "step": 8797 }, { "epoch": 1.207038485285038, "grad_norm": 1.25, "learning_rate": 1.310060160742753e-05, "loss": 0.476, "step": 8798 }, { "epoch": 1.2071756877272415, "grad_norm": 1.296875, "learning_rate": 1.3099229233028494e-05, "loss": 0.4394, "step": 8799 }, { "epoch": 1.207312890169445, "grad_norm": 1.3125, "learning_rate": 1.3097856794052734e-05, "loss": 0.4585, "step": 8800 }, { "epoch": 1.2074500926116485, "grad_norm": 1.2109375, "learning_rate": 1.3096484290528849e-05, "loss": 0.4311, "step": 8801 }, { "epoch": 1.207587295053852, "grad_norm": 1.265625, "learning_rate": 1.3095111722485432e-05, "loss": 0.4608, "step": 8802 }, { "epoch": 1.2077244974960555, "grad_norm": 1.21875, "learning_rate": 1.3093739089951085e-05, "loss": 0.4382, "step": 8803 }, { "epoch": 1.207861699938259, "grad_norm": 1.203125, "learning_rate": 1.3092366392954409e-05, "loss": 0.4154, "step": 8804 }, { "epoch": 1.2079989023804623, "grad_norm": 1.1328125, "learning_rate": 1.3090993631524004e-05, "loss": 0.3858, "step": 8805 }, { "epoch": 1.2081361048226658, "grad_norm": 1.1875, "learning_rate": 1.308962080568848e-05, "loss": 0.4267, "step": 8806 }, { "epoch": 1.2082733072648693, "grad_norm": 1.2578125, "learning_rate": 1.3088247915476435e-05, "loss": 0.4363, "step": 8807 }, { "epoch": 1.2084105097070728, "grad_norm": 1.1875, "learning_rate": 1.3086874960916475e-05, "loss": 0.4308, "step": 8808 }, { "epoch": 1.2085477121492763, "grad_norm": 1.2734375, "learning_rate": 1.3085501942037211e-05, "loss": 0.5336, "step": 8809 }, { "epoch": 1.2086849145914798, "grad_norm": 1.2421875, "learning_rate": 1.3084128858867248e-05, "loss": 0.4398, "step": 8810 }, { "epoch": 1.2088221170336833, "grad_norm": 1.1796875, "learning_rate": 1.3082755711435198e-05, "loss": 0.4305, "step": 8811 }, { "epoch": 1.2089593194758868, "grad_norm": 1.3046875, "learning_rate": 1.3081382499769677e-05, "loss": 0.4706, "step": 8812 }, { "epoch": 1.2090965219180903, "grad_norm": 1.2578125, "learning_rate": 1.3080009223899288e-05, "loss": 0.471, "step": 8813 }, { "epoch": 1.2092337243602935, "grad_norm": 1.3671875, "learning_rate": 1.3078635883852652e-05, "loss": 0.5189, "step": 8814 }, { "epoch": 1.209370926802497, "grad_norm": 1.09375, "learning_rate": 1.3077262479658384e-05, "loss": 0.4029, "step": 8815 }, { "epoch": 1.2095081292447005, "grad_norm": 1.1796875, "learning_rate": 1.3075889011345098e-05, "loss": 0.4061, "step": 8816 }, { "epoch": 1.209645331686904, "grad_norm": 1.3515625, "learning_rate": 1.3074515478941417e-05, "loss": 0.4803, "step": 8817 }, { "epoch": 1.2097825341291075, "grad_norm": 1.25, "learning_rate": 1.3073141882475952e-05, "loss": 0.4642, "step": 8818 }, { "epoch": 1.209919736571311, "grad_norm": 1.21875, "learning_rate": 1.3071768221977332e-05, "loss": 0.4618, "step": 8819 }, { "epoch": 1.2100569390135145, "grad_norm": 1.296875, "learning_rate": 1.3070394497474174e-05, "loss": 0.5103, "step": 8820 }, { "epoch": 1.2101941414557178, "grad_norm": 1.3046875, "learning_rate": 1.3069020708995106e-05, "loss": 0.4231, "step": 8821 }, { "epoch": 1.2103313438979213, "grad_norm": 1.3046875, "learning_rate": 1.3067646856568747e-05, "loss": 0.4775, "step": 8822 }, { "epoch": 1.2104685463401248, "grad_norm": 1.1953125, "learning_rate": 1.306627294022373e-05, "loss": 0.456, "step": 8823 }, { "epoch": 1.2106057487823283, "grad_norm": 1.2578125, "learning_rate": 1.3064898959988677e-05, "loss": 0.4653, "step": 8824 }, { "epoch": 1.2107429512245318, "grad_norm": 1.203125, "learning_rate": 1.306352491589222e-05, "loss": 0.4422, "step": 8825 }, { "epoch": 1.2108801536667353, "grad_norm": 1.28125, "learning_rate": 1.3062150807962985e-05, "loss": 0.4117, "step": 8826 }, { "epoch": 1.2110173561089388, "grad_norm": 1.1796875, "learning_rate": 1.3060776636229608e-05, "loss": 0.4487, "step": 8827 }, { "epoch": 1.2111545585511423, "grad_norm": 1.1953125, "learning_rate": 1.305940240072072e-05, "loss": 0.4314, "step": 8828 }, { "epoch": 1.2112917609933458, "grad_norm": 1.2109375, "learning_rate": 1.3058028101464955e-05, "loss": 0.4227, "step": 8829 }, { "epoch": 1.211428963435549, "grad_norm": 1.4140625, "learning_rate": 1.3056653738490948e-05, "loss": 0.4704, "step": 8830 }, { "epoch": 1.2115661658777526, "grad_norm": 1.3125, "learning_rate": 1.3055279311827335e-05, "loss": 0.4965, "step": 8831 }, { "epoch": 1.211703368319956, "grad_norm": 1.15625, "learning_rate": 1.3053904821502756e-05, "loss": 0.4011, "step": 8832 }, { "epoch": 1.2118405707621596, "grad_norm": 1.4140625, "learning_rate": 1.3052530267545849e-05, "loss": 0.4238, "step": 8833 }, { "epoch": 1.211977773204363, "grad_norm": 1.3203125, "learning_rate": 1.3051155649985259e-05, "loss": 0.4902, "step": 8834 }, { "epoch": 1.2121149756465666, "grad_norm": 1.1875, "learning_rate": 1.3049780968849621e-05, "loss": 0.4117, "step": 8835 }, { "epoch": 1.21225217808877, "grad_norm": 1.3359375, "learning_rate": 1.3048406224167583e-05, "loss": 0.4924, "step": 8836 }, { "epoch": 1.2123893805309733, "grad_norm": 1.2734375, "learning_rate": 1.3047031415967788e-05, "loss": 0.4359, "step": 8837 }, { "epoch": 1.2125265829731768, "grad_norm": 1.25, "learning_rate": 1.3045656544278882e-05, "loss": 0.4381, "step": 8838 }, { "epoch": 1.2126637854153803, "grad_norm": 1.125, "learning_rate": 1.3044281609129515e-05, "loss": 0.4111, "step": 8839 }, { "epoch": 1.2128009878575838, "grad_norm": 1.2421875, "learning_rate": 1.3042906610548331e-05, "loss": 0.4199, "step": 8840 }, { "epoch": 1.2129381902997873, "grad_norm": 1.1171875, "learning_rate": 1.3041531548563983e-05, "loss": 0.3644, "step": 8841 }, { "epoch": 1.2130753927419908, "grad_norm": 1.359375, "learning_rate": 1.3040156423205122e-05, "loss": 0.4952, "step": 8842 }, { "epoch": 1.2132125951841943, "grad_norm": 1.203125, "learning_rate": 1.3038781234500403e-05, "loss": 0.4235, "step": 8843 }, { "epoch": 1.2133497976263978, "grad_norm": 1.3125, "learning_rate": 1.3037405982478478e-05, "loss": 0.4335, "step": 8844 }, { "epoch": 1.2134870000686013, "grad_norm": 1.203125, "learning_rate": 1.3036030667167995e-05, "loss": 0.4337, "step": 8845 }, { "epoch": 1.2136242025108046, "grad_norm": 1.1328125, "learning_rate": 1.3034655288597624e-05, "loss": 0.369, "step": 8846 }, { "epoch": 1.213761404953008, "grad_norm": 1.1484375, "learning_rate": 1.3033279846796014e-05, "loss": 0.4028, "step": 8847 }, { "epoch": 1.2138986073952116, "grad_norm": 1.265625, "learning_rate": 1.3031904341791825e-05, "loss": 0.4414, "step": 8848 }, { "epoch": 1.214035809837415, "grad_norm": 1.25, "learning_rate": 1.3030528773613722e-05, "loss": 0.4699, "step": 8849 }, { "epoch": 1.2141730122796186, "grad_norm": 1.1875, "learning_rate": 1.3029153142290363e-05, "loss": 0.4164, "step": 8850 }, { "epoch": 1.214310214721822, "grad_norm": 1.203125, "learning_rate": 1.3027777447850412e-05, "loss": 0.468, "step": 8851 }, { "epoch": 1.2144474171640256, "grad_norm": 1.1796875, "learning_rate": 1.3026401690322534e-05, "loss": 0.4276, "step": 8852 }, { "epoch": 1.2145846196062289, "grad_norm": 1.1171875, "learning_rate": 1.3025025869735392e-05, "loss": 0.3929, "step": 8853 }, { "epoch": 1.2147218220484324, "grad_norm": 1.3359375, "learning_rate": 1.3023649986117657e-05, "loss": 0.4892, "step": 8854 }, { "epoch": 1.2148590244906359, "grad_norm": 1.1171875, "learning_rate": 1.3022274039497998e-05, "loss": 0.3778, "step": 8855 }, { "epoch": 1.2149962269328394, "grad_norm": 1.2890625, "learning_rate": 1.302089802990508e-05, "loss": 0.4928, "step": 8856 }, { "epoch": 1.2151334293750429, "grad_norm": 1.15625, "learning_rate": 1.3019521957367576e-05, "loss": 0.3735, "step": 8857 }, { "epoch": 1.2152706318172464, "grad_norm": 1.1953125, "learning_rate": 1.3018145821914161e-05, "loss": 0.45, "step": 8858 }, { "epoch": 1.2154078342594499, "grad_norm": 1.1328125, "learning_rate": 1.3016769623573507e-05, "loss": 0.374, "step": 8859 }, { "epoch": 1.2155450367016534, "grad_norm": 1.2421875, "learning_rate": 1.301539336237429e-05, "loss": 0.4406, "step": 8860 }, { "epoch": 1.2156822391438569, "grad_norm": 1.109375, "learning_rate": 1.3014017038345184e-05, "loss": 0.3944, "step": 8861 }, { "epoch": 1.2158194415860601, "grad_norm": 1.3046875, "learning_rate": 1.3012640651514867e-05, "loss": 0.4822, "step": 8862 }, { "epoch": 1.2159566440282636, "grad_norm": 1.1796875, "learning_rate": 1.3011264201912018e-05, "loss": 0.4051, "step": 8863 }, { "epoch": 1.2160938464704671, "grad_norm": 1.28125, "learning_rate": 1.3009887689565318e-05, "loss": 0.4797, "step": 8864 }, { "epoch": 1.2162310489126706, "grad_norm": 1.1484375, "learning_rate": 1.3008511114503448e-05, "loss": 0.4183, "step": 8865 }, { "epoch": 1.2163682513548741, "grad_norm": 1.21875, "learning_rate": 1.3007134476755092e-05, "loss": 0.4321, "step": 8866 }, { "epoch": 1.2165054537970776, "grad_norm": 1.1640625, "learning_rate": 1.3005757776348935e-05, "loss": 0.3525, "step": 8867 }, { "epoch": 1.2166426562392811, "grad_norm": 1.2734375, "learning_rate": 1.300438101331366e-05, "loss": 0.4856, "step": 8868 }, { "epoch": 1.2167798586814844, "grad_norm": 1.15625, "learning_rate": 1.3003004187677957e-05, "loss": 0.4118, "step": 8869 }, { "epoch": 1.216917061123688, "grad_norm": 1.140625, "learning_rate": 1.300162729947051e-05, "loss": 0.3936, "step": 8870 }, { "epoch": 1.2170542635658914, "grad_norm": 1.1796875, "learning_rate": 1.300025034872001e-05, "loss": 0.4334, "step": 8871 }, { "epoch": 1.217191466008095, "grad_norm": 1.75, "learning_rate": 1.2998873335455147e-05, "loss": 0.5749, "step": 8872 }, { "epoch": 1.2173286684502984, "grad_norm": 1.1953125, "learning_rate": 1.2997496259704611e-05, "loss": 0.4217, "step": 8873 }, { "epoch": 1.217465870892502, "grad_norm": 1.1875, "learning_rate": 1.2996119121497101e-05, "loss": 0.4104, "step": 8874 }, { "epoch": 1.2176030733347054, "grad_norm": 1.2265625, "learning_rate": 1.299474192086131e-05, "loss": 0.4095, "step": 8875 }, { "epoch": 1.217740275776909, "grad_norm": 1.2890625, "learning_rate": 1.2993364657825929e-05, "loss": 0.5134, "step": 8876 }, { "epoch": 1.2178774782191124, "grad_norm": 1.1796875, "learning_rate": 1.299198733241966e-05, "loss": 0.4142, "step": 8877 }, { "epoch": 1.2180146806613157, "grad_norm": 1.296875, "learning_rate": 1.2990609944671203e-05, "loss": 0.4696, "step": 8878 }, { "epoch": 1.2181518831035192, "grad_norm": 1.203125, "learning_rate": 1.2989232494609252e-05, "loss": 0.4811, "step": 8879 }, { "epoch": 1.2182890855457227, "grad_norm": 1.3125, "learning_rate": 1.2987854982262512e-05, "loss": 0.4682, "step": 8880 }, { "epoch": 1.2184262879879262, "grad_norm": 1.265625, "learning_rate": 1.2986477407659682e-05, "loss": 0.4446, "step": 8881 }, { "epoch": 1.2185634904301297, "grad_norm": 1.1875, "learning_rate": 1.298509977082947e-05, "loss": 0.4226, "step": 8882 }, { "epoch": 1.2187006928723332, "grad_norm": 1.3203125, "learning_rate": 1.2983722071800577e-05, "loss": 0.4567, "step": 8883 }, { "epoch": 1.2188378953145367, "grad_norm": 1.28125, "learning_rate": 1.2982344310601713e-05, "loss": 0.4277, "step": 8884 }, { "epoch": 1.21897509775674, "grad_norm": 1.2421875, "learning_rate": 1.2980966487261583e-05, "loss": 0.4477, "step": 8885 }, { "epoch": 1.2191123001989435, "grad_norm": 1.2109375, "learning_rate": 1.2979588601808897e-05, "loss": 0.4148, "step": 8886 }, { "epoch": 1.219249502641147, "grad_norm": 1.296875, "learning_rate": 1.2978210654272365e-05, "loss": 0.5026, "step": 8887 }, { "epoch": 1.2193867050833505, "grad_norm": 1.28125, "learning_rate": 1.2976832644680701e-05, "loss": 0.4354, "step": 8888 }, { "epoch": 1.219523907525554, "grad_norm": 1.1640625, "learning_rate": 1.2975454573062611e-05, "loss": 0.3961, "step": 8889 }, { "epoch": 1.2196611099677575, "grad_norm": 1.375, "learning_rate": 1.2974076439446815e-05, "loss": 0.5283, "step": 8890 }, { "epoch": 1.219798312409961, "grad_norm": 1.1953125, "learning_rate": 1.2972698243862025e-05, "loss": 0.421, "step": 8891 }, { "epoch": 1.2199355148521644, "grad_norm": 1.1796875, "learning_rate": 1.297131998633696e-05, "loss": 0.3873, "step": 8892 }, { "epoch": 1.220072717294368, "grad_norm": 1.296875, "learning_rate": 1.2969941666900337e-05, "loss": 0.4767, "step": 8893 }, { "epoch": 1.2202099197365712, "grad_norm": 1.1953125, "learning_rate": 1.2968563285580874e-05, "loss": 0.4055, "step": 8894 }, { "epoch": 1.2203471221787747, "grad_norm": 1.234375, "learning_rate": 1.2967184842407292e-05, "loss": 0.4304, "step": 8895 }, { "epoch": 1.2204843246209782, "grad_norm": 1.1484375, "learning_rate": 1.2965806337408316e-05, "loss": 0.3988, "step": 8896 }, { "epoch": 1.2206215270631817, "grad_norm": 1.3359375, "learning_rate": 1.2964427770612664e-05, "loss": 0.4967, "step": 8897 }, { "epoch": 1.2207587295053852, "grad_norm": 1.265625, "learning_rate": 1.2963049142049062e-05, "loss": 0.4541, "step": 8898 }, { "epoch": 1.2208959319475887, "grad_norm": 1.3046875, "learning_rate": 1.2961670451746238e-05, "loss": 0.4478, "step": 8899 }, { "epoch": 1.2210331343897922, "grad_norm": 1.328125, "learning_rate": 1.2960291699732917e-05, "loss": 0.4789, "step": 8900 }, { "epoch": 1.2211703368319955, "grad_norm": 1.2734375, "learning_rate": 1.2958912886037825e-05, "loss": 0.4672, "step": 8901 }, { "epoch": 1.221307539274199, "grad_norm": 1.3515625, "learning_rate": 1.2957534010689697e-05, "loss": 0.5228, "step": 8902 }, { "epoch": 1.2214447417164025, "grad_norm": 1.234375, "learning_rate": 1.2956155073717259e-05, "loss": 0.446, "step": 8903 }, { "epoch": 1.221581944158606, "grad_norm": 1.234375, "learning_rate": 1.2954776075149245e-05, "loss": 0.3769, "step": 8904 }, { "epoch": 1.2217191466008095, "grad_norm": 1.1640625, "learning_rate": 1.2953397015014391e-05, "loss": 0.4127, "step": 8905 }, { "epoch": 1.221856349043013, "grad_norm": 1.2265625, "learning_rate": 1.2952017893341425e-05, "loss": 0.4152, "step": 8906 }, { "epoch": 1.2219935514852165, "grad_norm": 1.2265625, "learning_rate": 1.2950638710159087e-05, "loss": 0.4279, "step": 8907 }, { "epoch": 1.22213075392742, "grad_norm": 1.25, "learning_rate": 1.2949259465496114e-05, "loss": 0.4544, "step": 8908 }, { "epoch": 1.2222679563696235, "grad_norm": 1.1484375, "learning_rate": 1.2947880159381245e-05, "loss": 0.424, "step": 8909 }, { "epoch": 1.2224051588118268, "grad_norm": 1.2109375, "learning_rate": 1.2946500791843217e-05, "loss": 0.4563, "step": 8910 }, { "epoch": 1.2225423612540303, "grad_norm": 1.2734375, "learning_rate": 1.2945121362910775e-05, "loss": 0.4932, "step": 8911 }, { "epoch": 1.2226795636962338, "grad_norm": 1.125, "learning_rate": 1.2943741872612661e-05, "loss": 0.3917, "step": 8912 }, { "epoch": 1.2228167661384373, "grad_norm": 1.1484375, "learning_rate": 1.2942362320977612e-05, "loss": 0.3875, "step": 8913 }, { "epoch": 1.2229539685806408, "grad_norm": 1.203125, "learning_rate": 1.2940982708034384e-05, "loss": 0.4412, "step": 8914 }, { "epoch": 1.2230911710228443, "grad_norm": 1.4609375, "learning_rate": 1.2939603033811714e-05, "loss": 0.519, "step": 8915 }, { "epoch": 1.2232283734650478, "grad_norm": 1.0703125, "learning_rate": 1.2938223298338352e-05, "loss": 0.35, "step": 8916 }, { "epoch": 1.223365575907251, "grad_norm": 1.25, "learning_rate": 1.2936843501643046e-05, "loss": 0.5381, "step": 8917 }, { "epoch": 1.2235027783494545, "grad_norm": 1.296875, "learning_rate": 1.293546364375455e-05, "loss": 0.5187, "step": 8918 }, { "epoch": 1.223639980791658, "grad_norm": 1.2578125, "learning_rate": 1.2934083724701609e-05, "loss": 0.5013, "step": 8919 }, { "epoch": 1.2237771832338615, "grad_norm": 1.1953125, "learning_rate": 1.2932703744512979e-05, "loss": 0.4178, "step": 8920 }, { "epoch": 1.223914385676065, "grad_norm": 1.140625, "learning_rate": 1.2931323703217418e-05, "loss": 0.401, "step": 8921 }, { "epoch": 1.2240515881182685, "grad_norm": 1.1875, "learning_rate": 1.2929943600843676e-05, "loss": 0.4203, "step": 8922 }, { "epoch": 1.224188790560472, "grad_norm": 1.25, "learning_rate": 1.2928563437420505e-05, "loss": 0.4591, "step": 8923 }, { "epoch": 1.2243259930026755, "grad_norm": 1.203125, "learning_rate": 1.292718321297667e-05, "loss": 0.4084, "step": 8924 }, { "epoch": 1.224463195444879, "grad_norm": 1.2734375, "learning_rate": 1.2925802927540926e-05, "loss": 0.4493, "step": 8925 }, { "epoch": 1.2246003978870823, "grad_norm": 1.21875, "learning_rate": 1.2924422581142037e-05, "loss": 0.4371, "step": 8926 }, { "epoch": 1.2247376003292858, "grad_norm": 1.1953125, "learning_rate": 1.2923042173808762e-05, "loss": 0.4236, "step": 8927 }, { "epoch": 1.2248748027714893, "grad_norm": 1.109375, "learning_rate": 1.2921661705569865e-05, "loss": 0.4039, "step": 8928 }, { "epoch": 1.2250120052136928, "grad_norm": 1.2421875, "learning_rate": 1.2920281176454105e-05, "loss": 0.4283, "step": 8929 }, { "epoch": 1.2251492076558963, "grad_norm": 1.171875, "learning_rate": 1.2918900586490252e-05, "loss": 0.3844, "step": 8930 }, { "epoch": 1.2252864100980998, "grad_norm": 1.1484375, "learning_rate": 1.2917519935707075e-05, "loss": 0.3956, "step": 8931 }, { "epoch": 1.2254236125403033, "grad_norm": 1.203125, "learning_rate": 1.2916139224133336e-05, "loss": 0.4449, "step": 8932 }, { "epoch": 1.2255608149825066, "grad_norm": 1.2734375, "learning_rate": 1.2914758451797807e-05, "loss": 0.4342, "step": 8933 }, { "epoch": 1.22569801742471, "grad_norm": 1.328125, "learning_rate": 1.2913377618729255e-05, "loss": 0.4897, "step": 8934 }, { "epoch": 1.2258352198669136, "grad_norm": 1.125, "learning_rate": 1.2911996724956457e-05, "loss": 0.3496, "step": 8935 }, { "epoch": 1.225972422309117, "grad_norm": 1.15625, "learning_rate": 1.291061577050818e-05, "loss": 0.38, "step": 8936 }, { "epoch": 1.2261096247513206, "grad_norm": 1.3359375, "learning_rate": 1.2909234755413203e-05, "loss": 0.4704, "step": 8937 }, { "epoch": 1.226246827193524, "grad_norm": 1.2109375, "learning_rate": 1.2907853679700298e-05, "loss": 0.4388, "step": 8938 }, { "epoch": 1.2263840296357276, "grad_norm": 1.265625, "learning_rate": 1.2906472543398244e-05, "loss": 0.4916, "step": 8939 }, { "epoch": 1.226521232077931, "grad_norm": 1.1875, "learning_rate": 1.2905091346535819e-05, "loss": 0.386, "step": 8940 }, { "epoch": 1.2266584345201346, "grad_norm": 1.1015625, "learning_rate": 1.2903710089141797e-05, "loss": 0.391, "step": 8941 }, { "epoch": 1.2267956369623378, "grad_norm": 1.203125, "learning_rate": 1.2902328771244967e-05, "loss": 0.4113, "step": 8942 }, { "epoch": 1.2269328394045413, "grad_norm": 1.1875, "learning_rate": 1.2900947392874105e-05, "loss": 0.4401, "step": 8943 }, { "epoch": 1.2270700418467448, "grad_norm": 1.28125, "learning_rate": 1.2899565954057995e-05, "loss": 0.4309, "step": 8944 }, { "epoch": 1.2272072442889483, "grad_norm": 1.3828125, "learning_rate": 1.289818445482542e-05, "loss": 0.4914, "step": 8945 }, { "epoch": 1.2273444467311518, "grad_norm": 1.2890625, "learning_rate": 1.2896802895205167e-05, "loss": 0.3956, "step": 8946 }, { "epoch": 1.2274816491733553, "grad_norm": 1.3515625, "learning_rate": 1.2895421275226024e-05, "loss": 0.4711, "step": 8947 }, { "epoch": 1.2276188516155588, "grad_norm": 1.328125, "learning_rate": 1.2894039594916776e-05, "loss": 0.5, "step": 8948 }, { "epoch": 1.2277560540577621, "grad_norm": 1.2734375, "learning_rate": 1.2892657854306214e-05, "loss": 0.4906, "step": 8949 }, { "epoch": 1.2278932564999656, "grad_norm": 1.203125, "learning_rate": 1.289127605342313e-05, "loss": 0.4443, "step": 8950 }, { "epoch": 1.2280304589421691, "grad_norm": 1.1640625, "learning_rate": 1.288989419229631e-05, "loss": 0.4081, "step": 8951 }, { "epoch": 1.2281676613843726, "grad_norm": 1.46875, "learning_rate": 1.2888512270954552e-05, "loss": 0.4733, "step": 8952 }, { "epoch": 1.228304863826576, "grad_norm": 1.2578125, "learning_rate": 1.288713028942665e-05, "loss": 0.4767, "step": 8953 }, { "epoch": 1.2284420662687796, "grad_norm": 1.3125, "learning_rate": 1.2885748247741398e-05, "loss": 0.4645, "step": 8954 }, { "epoch": 1.228579268710983, "grad_norm": 1.1875, "learning_rate": 1.2884366145927594e-05, "loss": 0.4349, "step": 8955 }, { "epoch": 1.2287164711531866, "grad_norm": 1.21875, "learning_rate": 1.2882983984014033e-05, "loss": 0.4153, "step": 8956 }, { "epoch": 1.22885367359539, "grad_norm": 1.21875, "learning_rate": 1.2881601762029518e-05, "loss": 0.4908, "step": 8957 }, { "epoch": 1.2289908760375934, "grad_norm": 1.1875, "learning_rate": 1.2880219480002847e-05, "loss": 0.4356, "step": 8958 }, { "epoch": 1.2291280784797969, "grad_norm": 1.2578125, "learning_rate": 1.2878837137962825e-05, "loss": 0.5132, "step": 8959 }, { "epoch": 1.2292652809220004, "grad_norm": 1.3359375, "learning_rate": 1.2877454735938253e-05, "loss": 0.5248, "step": 8960 }, { "epoch": 1.2294024833642039, "grad_norm": 1.3125, "learning_rate": 1.2876072273957933e-05, "loss": 0.4911, "step": 8961 }, { "epoch": 1.2295396858064074, "grad_norm": 1.1953125, "learning_rate": 1.2874689752050673e-05, "loss": 0.4582, "step": 8962 }, { "epoch": 1.2296768882486109, "grad_norm": 1.171875, "learning_rate": 1.287330717024528e-05, "loss": 0.4278, "step": 8963 }, { "epoch": 1.2298140906908144, "grad_norm": 1.21875, "learning_rate": 1.287192452857056e-05, "loss": 0.4436, "step": 8964 }, { "epoch": 1.2299512931330177, "grad_norm": 1.15625, "learning_rate": 1.2870541827055323e-05, "loss": 0.4067, "step": 8965 }, { "epoch": 1.2300884955752212, "grad_norm": 1.234375, "learning_rate": 1.2869159065728383e-05, "loss": 0.427, "step": 8966 }, { "epoch": 1.2302256980174247, "grad_norm": 1.2578125, "learning_rate": 1.2867776244618548e-05, "loss": 0.4721, "step": 8967 }, { "epoch": 1.2303629004596282, "grad_norm": 1.3359375, "learning_rate": 1.286639336375463e-05, "loss": 0.4469, "step": 8968 }, { "epoch": 1.2305001029018316, "grad_norm": 1.234375, "learning_rate": 1.2865010423165445e-05, "loss": 0.4588, "step": 8969 }, { "epoch": 1.2306373053440351, "grad_norm": 1.2578125, "learning_rate": 1.286362742287981e-05, "loss": 0.4229, "step": 8970 }, { "epoch": 1.2307745077862386, "grad_norm": 1.1953125, "learning_rate": 1.2862244362926539e-05, "loss": 0.425, "step": 8971 }, { "epoch": 1.2309117102284421, "grad_norm": 1.1875, "learning_rate": 1.2860861243334452e-05, "loss": 0.4262, "step": 8972 }, { "epoch": 1.2310489126706456, "grad_norm": 1.1953125, "learning_rate": 1.2859478064132367e-05, "loss": 0.4325, "step": 8973 }, { "epoch": 1.231186115112849, "grad_norm": 1.328125, "learning_rate": 1.2858094825349104e-05, "loss": 0.4649, "step": 8974 }, { "epoch": 1.2313233175550524, "grad_norm": 1.25, "learning_rate": 1.2856711527013488e-05, "loss": 0.475, "step": 8975 }, { "epoch": 1.231460519997256, "grad_norm": 1.1640625, "learning_rate": 1.285532816915434e-05, "loss": 0.425, "step": 8976 }, { "epoch": 1.2315977224394594, "grad_norm": 1.1328125, "learning_rate": 1.285394475180048e-05, "loss": 0.4185, "step": 8977 }, { "epoch": 1.231734924881663, "grad_norm": 1.3125, "learning_rate": 1.285256127498074e-05, "loss": 0.4843, "step": 8978 }, { "epoch": 1.2318721273238664, "grad_norm": 1.125, "learning_rate": 1.285117773872394e-05, "loss": 0.3963, "step": 8979 }, { "epoch": 1.23200932976607, "grad_norm": 1.2265625, "learning_rate": 1.2849794143058913e-05, "loss": 0.4473, "step": 8980 }, { "epoch": 1.2321465322082732, "grad_norm": 1.3359375, "learning_rate": 1.2848410488014488e-05, "loss": 0.486, "step": 8981 }, { "epoch": 1.2322837346504767, "grad_norm": 1.28125, "learning_rate": 1.2847026773619496e-05, "loss": 0.5083, "step": 8982 }, { "epoch": 1.2324209370926802, "grad_norm": 1.125, "learning_rate": 1.2845642999902763e-05, "loss": 0.4237, "step": 8983 }, { "epoch": 1.2325581395348837, "grad_norm": 1.2109375, "learning_rate": 1.2844259166893127e-05, "loss": 0.4487, "step": 8984 }, { "epoch": 1.2326953419770872, "grad_norm": 1.234375, "learning_rate": 1.2842875274619422e-05, "loss": 0.4508, "step": 8985 }, { "epoch": 1.2328325444192907, "grad_norm": 1.234375, "learning_rate": 1.284149132311048e-05, "loss": 0.4604, "step": 8986 }, { "epoch": 1.2329697468614942, "grad_norm": 1.25, "learning_rate": 1.2840107312395143e-05, "loss": 0.4408, "step": 8987 }, { "epoch": 1.2331069493036977, "grad_norm": 1.203125, "learning_rate": 1.283872324250224e-05, "loss": 0.4677, "step": 8988 }, { "epoch": 1.2332441517459012, "grad_norm": 1.2421875, "learning_rate": 1.2837339113460615e-05, "loss": 0.4546, "step": 8989 }, { "epoch": 1.2333813541881045, "grad_norm": 1.2421875, "learning_rate": 1.2835954925299113e-05, "loss": 0.448, "step": 8990 }, { "epoch": 1.233518556630308, "grad_norm": 1.2265625, "learning_rate": 1.283457067804657e-05, "loss": 0.4666, "step": 8991 }, { "epoch": 1.2336557590725115, "grad_norm": 1.3359375, "learning_rate": 1.2833186371731828e-05, "loss": 0.4034, "step": 8992 }, { "epoch": 1.233792961514715, "grad_norm": 1.328125, "learning_rate": 1.2831802006383732e-05, "loss": 0.4472, "step": 8993 }, { "epoch": 1.2339301639569185, "grad_norm": 1.078125, "learning_rate": 1.2830417582031127e-05, "loss": 0.3613, "step": 8994 }, { "epoch": 1.234067366399122, "grad_norm": 1.3984375, "learning_rate": 1.2829033098702866e-05, "loss": 0.4876, "step": 8995 }, { "epoch": 1.2342045688413255, "grad_norm": 1.3828125, "learning_rate": 1.2827648556427785e-05, "loss": 0.5573, "step": 8996 }, { "epoch": 1.2343417712835287, "grad_norm": 1.34375, "learning_rate": 1.2826263955234742e-05, "loss": 0.4598, "step": 8997 }, { "epoch": 1.2344789737257322, "grad_norm": 1.453125, "learning_rate": 1.2824879295152578e-05, "loss": 0.5132, "step": 8998 }, { "epoch": 1.2346161761679357, "grad_norm": 1.40625, "learning_rate": 1.2823494576210156e-05, "loss": 0.4354, "step": 8999 }, { "epoch": 1.2347533786101392, "grad_norm": 1.2265625, "learning_rate": 1.282210979843632e-05, "loss": 0.4425, "step": 9000 }, { "epoch": 1.2348905810523427, "grad_norm": 1.1171875, "learning_rate": 1.2820724961859927e-05, "loss": 0.3542, "step": 9001 }, { "epoch": 1.2350277834945462, "grad_norm": 1.1484375, "learning_rate": 1.2819340066509829e-05, "loss": 0.4209, "step": 9002 }, { "epoch": 1.2351649859367497, "grad_norm": 1.1953125, "learning_rate": 1.2817955112414886e-05, "loss": 0.4224, "step": 9003 }, { "epoch": 1.2353021883789532, "grad_norm": 1.2109375, "learning_rate": 1.2816570099603958e-05, "loss": 0.463, "step": 9004 }, { "epoch": 1.2354393908211567, "grad_norm": 1.2421875, "learning_rate": 1.2815185028105894e-05, "loss": 0.4326, "step": 9005 }, { "epoch": 1.23557659326336, "grad_norm": 1.2578125, "learning_rate": 1.2813799897949561e-05, "loss": 0.4402, "step": 9006 }, { "epoch": 1.2357137957055635, "grad_norm": 1.1875, "learning_rate": 1.2812414709163821e-05, "loss": 0.4421, "step": 9007 }, { "epoch": 1.235850998147767, "grad_norm": 1.28125, "learning_rate": 1.281102946177753e-05, "loss": 0.4838, "step": 9008 }, { "epoch": 1.2359882005899705, "grad_norm": 1.265625, "learning_rate": 1.2809644155819558e-05, "loss": 0.482, "step": 9009 }, { "epoch": 1.236125403032174, "grad_norm": 1.1875, "learning_rate": 1.2808258791318766e-05, "loss": 0.4544, "step": 9010 }, { "epoch": 1.2362626054743775, "grad_norm": 1.3984375, "learning_rate": 1.2806873368304022e-05, "loss": 0.473, "step": 9011 }, { "epoch": 1.236399807916581, "grad_norm": 1.25, "learning_rate": 1.280548788680419e-05, "loss": 0.4872, "step": 9012 }, { "epoch": 1.2365370103587843, "grad_norm": 1.28125, "learning_rate": 1.2804102346848146e-05, "loss": 0.4501, "step": 9013 }, { "epoch": 1.2366742128009878, "grad_norm": 1.3671875, "learning_rate": 1.2802716748464752e-05, "loss": 0.4373, "step": 9014 }, { "epoch": 1.2368114152431913, "grad_norm": 1.3359375, "learning_rate": 1.2801331091682879e-05, "loss": 0.4894, "step": 9015 }, { "epoch": 1.2369486176853948, "grad_norm": 1.265625, "learning_rate": 1.2799945376531403e-05, "loss": 0.4936, "step": 9016 }, { "epoch": 1.2370858201275983, "grad_norm": 1.265625, "learning_rate": 1.2798559603039195e-05, "loss": 0.4244, "step": 9017 }, { "epoch": 1.2372230225698018, "grad_norm": 1.1640625, "learning_rate": 1.279717377123513e-05, "loss": 0.3438, "step": 9018 }, { "epoch": 1.2373602250120053, "grad_norm": 1.15625, "learning_rate": 1.2795787881148085e-05, "loss": 0.4139, "step": 9019 }, { "epoch": 1.2374974274542088, "grad_norm": 1.171875, "learning_rate": 1.2794401932806934e-05, "loss": 0.4746, "step": 9020 }, { "epoch": 1.2376346298964123, "grad_norm": 1.1953125, "learning_rate": 1.2793015926240559e-05, "loss": 0.4003, "step": 9021 }, { "epoch": 1.2377718323386155, "grad_norm": 1.203125, "learning_rate": 1.279162986147784e-05, "loss": 0.4485, "step": 9022 }, { "epoch": 1.237909034780819, "grad_norm": 1.15625, "learning_rate": 1.279024373854765e-05, "loss": 0.395, "step": 9023 }, { "epoch": 1.2380462372230225, "grad_norm": 1.28125, "learning_rate": 1.2788857557478877e-05, "loss": 0.4971, "step": 9024 }, { "epoch": 1.238183439665226, "grad_norm": 1.421875, "learning_rate": 1.2787471318300402e-05, "loss": 0.5416, "step": 9025 }, { "epoch": 1.2383206421074295, "grad_norm": 1.28125, "learning_rate": 1.278608502104111e-05, "loss": 0.4856, "step": 9026 }, { "epoch": 1.238457844549633, "grad_norm": 1.2578125, "learning_rate": 1.278469866572989e-05, "loss": 0.4536, "step": 9027 }, { "epoch": 1.2385950469918365, "grad_norm": 1.25, "learning_rate": 1.278331225239562e-05, "loss": 0.4734, "step": 9028 }, { "epoch": 1.2387322494340398, "grad_norm": 1.109375, "learning_rate": 1.2781925781067194e-05, "loss": 0.3437, "step": 9029 }, { "epoch": 1.2388694518762433, "grad_norm": 1.2109375, "learning_rate": 1.2780539251773502e-05, "loss": 0.4458, "step": 9030 }, { "epoch": 1.2390066543184468, "grad_norm": 1.2421875, "learning_rate": 1.2779152664543432e-05, "loss": 0.4428, "step": 9031 }, { "epoch": 1.2391438567606503, "grad_norm": 1.0625, "learning_rate": 1.2777766019405872e-05, "loss": 0.3093, "step": 9032 }, { "epoch": 1.2392810592028538, "grad_norm": 1.1953125, "learning_rate": 1.2776379316389723e-05, "loss": 0.432, "step": 9033 }, { "epoch": 1.2394182616450573, "grad_norm": 1.1015625, "learning_rate": 1.277499255552387e-05, "loss": 0.3907, "step": 9034 }, { "epoch": 1.2395554640872608, "grad_norm": 1.1796875, "learning_rate": 1.2773605736837213e-05, "loss": 0.3948, "step": 9035 }, { "epoch": 1.2396926665294643, "grad_norm": 1.28125, "learning_rate": 1.277221886035865e-05, "loss": 0.4684, "step": 9036 }, { "epoch": 1.2398298689716678, "grad_norm": 1.390625, "learning_rate": 1.2770831926117072e-05, "loss": 0.4818, "step": 9037 }, { "epoch": 1.239967071413871, "grad_norm": 1.15625, "learning_rate": 1.2769444934141386e-05, "loss": 0.4157, "step": 9038 }, { "epoch": 1.2401042738560746, "grad_norm": 1.21875, "learning_rate": 1.2768057884460485e-05, "loss": 0.4428, "step": 9039 }, { "epoch": 1.240241476298278, "grad_norm": 1.359375, "learning_rate": 1.2766670777103273e-05, "loss": 0.4564, "step": 9040 }, { "epoch": 1.2403786787404816, "grad_norm": 1.265625, "learning_rate": 1.276528361209865e-05, "loss": 0.4896, "step": 9041 }, { "epoch": 1.240515881182685, "grad_norm": 1.1640625, "learning_rate": 1.2763896389475525e-05, "loss": 0.4365, "step": 9042 }, { "epoch": 1.2406530836248886, "grad_norm": 1.40625, "learning_rate": 1.27625091092628e-05, "loss": 0.4914, "step": 9043 }, { "epoch": 1.240790286067092, "grad_norm": 1.3671875, "learning_rate": 1.2761121771489375e-05, "loss": 0.4641, "step": 9044 }, { "epoch": 1.2409274885092954, "grad_norm": 1.3125, "learning_rate": 1.2759734376184165e-05, "loss": 0.4578, "step": 9045 }, { "epoch": 1.2410646909514989, "grad_norm": 1.0859375, "learning_rate": 1.2758346923376075e-05, "loss": 0.373, "step": 9046 }, { "epoch": 1.2412018933937023, "grad_norm": 1.1328125, "learning_rate": 1.2756959413094014e-05, "loss": 0.4184, "step": 9047 }, { "epoch": 1.2413390958359058, "grad_norm": 1.171875, "learning_rate": 1.2755571845366894e-05, "loss": 0.4156, "step": 9048 }, { "epoch": 1.2414762982781093, "grad_norm": 1.2421875, "learning_rate": 1.2754184220223632e-05, "loss": 0.4189, "step": 9049 }, { "epoch": 1.2416135007203128, "grad_norm": 1.359375, "learning_rate": 1.2752796537693128e-05, "loss": 0.5331, "step": 9050 }, { "epoch": 1.2417507031625163, "grad_norm": 1.15625, "learning_rate": 1.2751408797804305e-05, "loss": 0.4285, "step": 9051 }, { "epoch": 1.2418879056047198, "grad_norm": 1.1015625, "learning_rate": 1.275002100058608e-05, "loss": 0.3724, "step": 9052 }, { "epoch": 1.2420251080469233, "grad_norm": 1.1796875, "learning_rate": 1.2748633146067366e-05, "loss": 0.4123, "step": 9053 }, { "epoch": 1.2421623104891266, "grad_norm": 1.25, "learning_rate": 1.2747245234277079e-05, "loss": 0.4238, "step": 9054 }, { "epoch": 1.2422995129313301, "grad_norm": 1.28125, "learning_rate": 1.2745857265244146e-05, "loss": 0.4158, "step": 9055 }, { "epoch": 1.2424367153735336, "grad_norm": 1.2265625, "learning_rate": 1.274446923899748e-05, "loss": 0.4474, "step": 9056 }, { "epoch": 1.2425739178157371, "grad_norm": 1.1953125, "learning_rate": 1.2743081155566005e-05, "loss": 0.421, "step": 9057 }, { "epoch": 1.2427111202579406, "grad_norm": 1.1796875, "learning_rate": 1.2741693014978642e-05, "loss": 0.4311, "step": 9058 }, { "epoch": 1.2428483227001441, "grad_norm": 1.1640625, "learning_rate": 1.2740304817264318e-05, "loss": 0.3748, "step": 9059 }, { "epoch": 1.2429855251423476, "grad_norm": 1.2265625, "learning_rate": 1.2738916562451952e-05, "loss": 0.4431, "step": 9060 }, { "epoch": 1.243122727584551, "grad_norm": 1.15625, "learning_rate": 1.2737528250570479e-05, "loss": 0.4224, "step": 9061 }, { "epoch": 1.2432599300267544, "grad_norm": 1.296875, "learning_rate": 1.2736139881648819e-05, "loss": 0.3916, "step": 9062 }, { "epoch": 1.2433971324689579, "grad_norm": 1.1875, "learning_rate": 1.2734751455715904e-05, "loss": 0.4337, "step": 9063 }, { "epoch": 1.2435343349111614, "grad_norm": 1.140625, "learning_rate": 1.2733362972800665e-05, "loss": 0.3805, "step": 9064 }, { "epoch": 1.2436715373533649, "grad_norm": 1.265625, "learning_rate": 1.2731974432932027e-05, "loss": 0.4008, "step": 9065 }, { "epoch": 1.2438087397955684, "grad_norm": 1.2734375, "learning_rate": 1.2730585836138932e-05, "loss": 0.4768, "step": 9066 }, { "epoch": 1.2439459422377719, "grad_norm": 1.1484375, "learning_rate": 1.2729197182450304e-05, "loss": 0.4653, "step": 9067 }, { "epoch": 1.2440831446799754, "grad_norm": 1.21875, "learning_rate": 1.272780847189508e-05, "loss": 0.4729, "step": 9068 }, { "epoch": 1.2442203471221789, "grad_norm": 1.3359375, "learning_rate": 1.2726419704502198e-05, "loss": 0.5158, "step": 9069 }, { "epoch": 1.2443575495643822, "grad_norm": 1.2265625, "learning_rate": 1.2725030880300593e-05, "loss": 0.4698, "step": 9070 }, { "epoch": 1.2444947520065857, "grad_norm": 1.3046875, "learning_rate": 1.2723641999319205e-05, "loss": 0.5013, "step": 9071 }, { "epoch": 1.2446319544487892, "grad_norm": 1.15625, "learning_rate": 1.2722253061586971e-05, "loss": 0.3939, "step": 9072 }, { "epoch": 1.2447691568909927, "grad_norm": 1.3515625, "learning_rate": 1.2720864067132832e-05, "loss": 0.4873, "step": 9073 }, { "epoch": 1.2449063593331962, "grad_norm": 1.265625, "learning_rate": 1.271947501598573e-05, "loss": 0.4557, "step": 9074 }, { "epoch": 1.2450435617753997, "grad_norm": 1.2109375, "learning_rate": 1.2718085908174611e-05, "loss": 0.4781, "step": 9075 }, { "epoch": 1.2451807642176032, "grad_norm": 1.3125, "learning_rate": 1.2716696743728413e-05, "loss": 0.4493, "step": 9076 }, { "epoch": 1.2453179666598064, "grad_norm": 1.3125, "learning_rate": 1.2715307522676084e-05, "loss": 0.479, "step": 9077 }, { "epoch": 1.24545516910201, "grad_norm": 1.203125, "learning_rate": 1.271391824504657e-05, "loss": 0.3695, "step": 9078 }, { "epoch": 1.2455923715442134, "grad_norm": 1.203125, "learning_rate": 1.271252891086882e-05, "loss": 0.3455, "step": 9079 }, { "epoch": 1.245729573986417, "grad_norm": 1.34375, "learning_rate": 1.271113952017178e-05, "loss": 0.5619, "step": 9080 }, { "epoch": 1.2458667764286204, "grad_norm": 1.2265625, "learning_rate": 1.2709750072984402e-05, "loss": 0.4268, "step": 9081 }, { "epoch": 1.246003978870824, "grad_norm": 1.234375, "learning_rate": 1.2708360569335637e-05, "loss": 0.4313, "step": 9082 }, { "epoch": 1.2461411813130274, "grad_norm": 1.359375, "learning_rate": 1.2706971009254436e-05, "loss": 0.4896, "step": 9083 }, { "epoch": 1.246278383755231, "grad_norm": 1.34375, "learning_rate": 1.2705581392769757e-05, "loss": 0.5512, "step": 9084 }, { "epoch": 1.2464155861974344, "grad_norm": 1.28125, "learning_rate": 1.2704191719910546e-05, "loss": 0.4752, "step": 9085 }, { "epoch": 1.2465527886396377, "grad_norm": 1.2734375, "learning_rate": 1.2702801990705763e-05, "loss": 0.4318, "step": 9086 }, { "epoch": 1.2466899910818412, "grad_norm": 1.296875, "learning_rate": 1.2701412205184366e-05, "loss": 0.4545, "step": 9087 }, { "epoch": 1.2468271935240447, "grad_norm": 1.2890625, "learning_rate": 1.2700022363375316e-05, "loss": 0.4278, "step": 9088 }, { "epoch": 1.2469643959662482, "grad_norm": 1.375, "learning_rate": 1.2698632465307565e-05, "loss": 0.4497, "step": 9089 }, { "epoch": 1.2471015984084517, "grad_norm": 1.0859375, "learning_rate": 1.2697242511010077e-05, "loss": 0.3827, "step": 9090 }, { "epoch": 1.2472388008506552, "grad_norm": 1.171875, "learning_rate": 1.2695852500511816e-05, "loss": 0.4144, "step": 9091 }, { "epoch": 1.2473760032928587, "grad_norm": 1.1875, "learning_rate": 1.2694462433841744e-05, "loss": 0.4694, "step": 9092 }, { "epoch": 1.247513205735062, "grad_norm": 1.265625, "learning_rate": 1.2693072311028822e-05, "loss": 0.4648, "step": 9093 }, { "epoch": 1.2476504081772655, "grad_norm": 1.3125, "learning_rate": 1.2691682132102015e-05, "loss": 0.4738, "step": 9094 }, { "epoch": 1.247787610619469, "grad_norm": 1.1484375, "learning_rate": 1.2690291897090292e-05, "loss": 0.4232, "step": 9095 }, { "epoch": 1.2479248130616725, "grad_norm": 1.2578125, "learning_rate": 1.2688901606022618e-05, "loss": 0.4577, "step": 9096 }, { "epoch": 1.248062015503876, "grad_norm": 1.2734375, "learning_rate": 1.2687511258927965e-05, "loss": 0.4627, "step": 9097 }, { "epoch": 1.2481992179460795, "grad_norm": 1.2421875, "learning_rate": 1.26861208558353e-05, "loss": 0.4493, "step": 9098 }, { "epoch": 1.248336420388283, "grad_norm": 1.25, "learning_rate": 1.2684730396773595e-05, "loss": 0.4344, "step": 9099 }, { "epoch": 1.2484736228304865, "grad_norm": 1.234375, "learning_rate": 1.2683339881771821e-05, "loss": 0.468, "step": 9100 }, { "epoch": 1.24861082527269, "grad_norm": 1.2109375, "learning_rate": 1.2681949310858954e-05, "loss": 0.4223, "step": 9101 }, { "epoch": 1.2487480277148932, "grad_norm": 1.296875, "learning_rate": 1.2680558684063966e-05, "loss": 0.4845, "step": 9102 }, { "epoch": 1.2488852301570967, "grad_norm": 1.2421875, "learning_rate": 1.2679168001415837e-05, "loss": 0.4609, "step": 9103 }, { "epoch": 1.2490224325993002, "grad_norm": 1.3125, "learning_rate": 1.2677777262943536e-05, "loss": 0.4823, "step": 9104 }, { "epoch": 1.2491596350415037, "grad_norm": 1.1796875, "learning_rate": 1.2676386468676045e-05, "loss": 0.3954, "step": 9105 }, { "epoch": 1.2492968374837072, "grad_norm": 1.1640625, "learning_rate": 1.2674995618642345e-05, "loss": 0.4083, "step": 9106 }, { "epoch": 1.2494340399259107, "grad_norm": 1.234375, "learning_rate": 1.2673604712871412e-05, "loss": 0.48, "step": 9107 }, { "epoch": 1.2495712423681142, "grad_norm": 1.0859375, "learning_rate": 1.2672213751392232e-05, "loss": 0.3986, "step": 9108 }, { "epoch": 1.2497084448103175, "grad_norm": 1.3046875, "learning_rate": 1.2670822734233786e-05, "loss": 0.4835, "step": 9109 }, { "epoch": 1.249845647252521, "grad_norm": 1.3046875, "learning_rate": 1.2669431661425056e-05, "loss": 0.4753, "step": 9110 }, { "epoch": 1.2499828496947245, "grad_norm": 1.2890625, "learning_rate": 1.2668040532995029e-05, "loss": 0.4789, "step": 9111 }, { "epoch": 1.250120052136928, "grad_norm": 1.2578125, "learning_rate": 1.2666649348972691e-05, "loss": 0.4833, "step": 9112 }, { "epoch": 1.2502572545791315, "grad_norm": 1.2421875, "learning_rate": 1.2665258109387026e-05, "loss": 0.5091, "step": 9113 }, { "epoch": 1.250394457021335, "grad_norm": 1.2578125, "learning_rate": 1.2663866814267027e-05, "loss": 0.47, "step": 9114 }, { "epoch": 1.2505316594635385, "grad_norm": 1.3203125, "learning_rate": 1.266247546364168e-05, "loss": 0.4413, "step": 9115 }, { "epoch": 1.250668861905742, "grad_norm": 1.1953125, "learning_rate": 1.266108405753998e-05, "loss": 0.4567, "step": 9116 }, { "epoch": 1.2508060643479455, "grad_norm": 1.1640625, "learning_rate": 1.2659692595990913e-05, "loss": 0.4471, "step": 9117 }, { "epoch": 1.250943266790149, "grad_norm": 1.1015625, "learning_rate": 1.2658301079023478e-05, "loss": 0.3853, "step": 9118 }, { "epoch": 1.2510804692323523, "grad_norm": 1.296875, "learning_rate": 1.2656909506666666e-05, "loss": 0.441, "step": 9119 }, { "epoch": 1.2512176716745558, "grad_norm": 1.1875, "learning_rate": 1.2655517878949473e-05, "loss": 0.4208, "step": 9120 }, { "epoch": 1.2513548741167593, "grad_norm": 1.171875, "learning_rate": 1.2654126195900893e-05, "loss": 0.4011, "step": 9121 }, { "epoch": 1.2514920765589628, "grad_norm": 1.1171875, "learning_rate": 1.2652734457549927e-05, "loss": 0.3781, "step": 9122 }, { "epoch": 1.2516292790011663, "grad_norm": 1.1015625, "learning_rate": 1.2651342663925571e-05, "loss": 0.3633, "step": 9123 }, { "epoch": 1.2517664814433698, "grad_norm": 1.1484375, "learning_rate": 1.2649950815056829e-05, "loss": 0.3668, "step": 9124 }, { "epoch": 1.251903683885573, "grad_norm": 1.2890625, "learning_rate": 1.2648558910972698e-05, "loss": 0.4442, "step": 9125 }, { "epoch": 1.2520408863277765, "grad_norm": 1.2421875, "learning_rate": 1.2647166951702183e-05, "loss": 0.4724, "step": 9126 }, { "epoch": 1.25217808876998, "grad_norm": 1.09375, "learning_rate": 1.2645774937274285e-05, "loss": 0.3744, "step": 9127 }, { "epoch": 1.2523152912121835, "grad_norm": 1.1953125, "learning_rate": 1.264438286771801e-05, "loss": 0.4616, "step": 9128 }, { "epoch": 1.252452493654387, "grad_norm": 1.1796875, "learning_rate": 1.2642990743062367e-05, "loss": 0.3983, "step": 9129 }, { "epoch": 1.2525896960965905, "grad_norm": 1.265625, "learning_rate": 1.2641598563336355e-05, "loss": 0.4664, "step": 9130 }, { "epoch": 1.252726898538794, "grad_norm": 1.2421875, "learning_rate": 1.2640206328568987e-05, "loss": 0.449, "step": 9131 }, { "epoch": 1.2528641009809975, "grad_norm": 1.3515625, "learning_rate": 1.2638814038789274e-05, "loss": 0.4795, "step": 9132 }, { "epoch": 1.253001303423201, "grad_norm": 1.3515625, "learning_rate": 1.263742169402622e-05, "loss": 0.4672, "step": 9133 }, { "epoch": 1.2531385058654045, "grad_norm": 1.203125, "learning_rate": 1.2636029294308843e-05, "loss": 0.445, "step": 9134 }, { "epoch": 1.2532757083076078, "grad_norm": 1.265625, "learning_rate": 1.263463683966615e-05, "loss": 0.4566, "step": 9135 }, { "epoch": 1.2534129107498113, "grad_norm": 1.3125, "learning_rate": 1.263324433012716e-05, "loss": 0.4464, "step": 9136 }, { "epoch": 1.2535501131920148, "grad_norm": 1.25, "learning_rate": 1.2631851765720886e-05, "loss": 0.4337, "step": 9137 }, { "epoch": 1.2536873156342183, "grad_norm": 1.2578125, "learning_rate": 1.2630459146476342e-05, "loss": 0.4495, "step": 9138 }, { "epoch": 1.2538245180764218, "grad_norm": 1.1796875, "learning_rate": 1.2629066472422546e-05, "loss": 0.3952, "step": 9139 }, { "epoch": 1.2539617205186253, "grad_norm": 1.2734375, "learning_rate": 1.2627673743588517e-05, "loss": 0.4741, "step": 9140 }, { "epoch": 1.2540989229608286, "grad_norm": 1.25, "learning_rate": 1.2626280960003275e-05, "loss": 0.4159, "step": 9141 }, { "epoch": 1.254236125403032, "grad_norm": 1.2421875, "learning_rate": 1.262488812169584e-05, "loss": 0.4211, "step": 9142 }, { "epoch": 1.2543733278452356, "grad_norm": 1.3515625, "learning_rate": 1.262349522869523e-05, "loss": 0.429, "step": 9143 }, { "epoch": 1.254510530287439, "grad_norm": 1.28125, "learning_rate": 1.2622102281030474e-05, "loss": 0.4743, "step": 9144 }, { "epoch": 1.2546477327296426, "grad_norm": 1.125, "learning_rate": 1.2620709278730595e-05, "loss": 0.3692, "step": 9145 }, { "epoch": 1.254784935171846, "grad_norm": 1.125, "learning_rate": 1.2619316221824615e-05, "loss": 0.3899, "step": 9146 }, { "epoch": 1.2549221376140496, "grad_norm": 1.203125, "learning_rate": 1.2617923110341562e-05, "loss": 0.4684, "step": 9147 }, { "epoch": 1.255059340056253, "grad_norm": 1.296875, "learning_rate": 1.2616529944310463e-05, "loss": 0.4738, "step": 9148 }, { "epoch": 1.2551965424984566, "grad_norm": 1.25, "learning_rate": 1.2615136723760343e-05, "loss": 0.429, "step": 9149 }, { "epoch": 1.25533374494066, "grad_norm": 1.234375, "learning_rate": 1.2613743448720239e-05, "loss": 0.355, "step": 9150 }, { "epoch": 1.2554709473828634, "grad_norm": 1.3125, "learning_rate": 1.2612350119219179e-05, "loss": 0.437, "step": 9151 }, { "epoch": 1.2556081498250669, "grad_norm": 1.2421875, "learning_rate": 1.2610956735286192e-05, "loss": 0.4155, "step": 9152 }, { "epoch": 1.2557453522672704, "grad_norm": 1.1953125, "learning_rate": 1.2609563296950316e-05, "loss": 0.4145, "step": 9153 }, { "epoch": 1.2558825547094739, "grad_norm": 1.296875, "learning_rate": 1.2608169804240578e-05, "loss": 0.4765, "step": 9154 }, { "epoch": 1.2560197571516774, "grad_norm": 1.203125, "learning_rate": 1.2606776257186022e-05, "loss": 0.4174, "step": 9155 }, { "epoch": 1.2561569595938809, "grad_norm": 1.1796875, "learning_rate": 1.2605382655815678e-05, "loss": 0.4756, "step": 9156 }, { "epoch": 1.2562941620360841, "grad_norm": 1.1953125, "learning_rate": 1.2603989000158588e-05, "loss": 0.4577, "step": 9157 }, { "epoch": 1.2564313644782876, "grad_norm": 1.265625, "learning_rate": 1.2602595290243784e-05, "loss": 0.4628, "step": 9158 }, { "epoch": 1.2565685669204911, "grad_norm": 1.203125, "learning_rate": 1.2601201526100315e-05, "loss": 0.4475, "step": 9159 }, { "epoch": 1.2567057693626946, "grad_norm": 1.265625, "learning_rate": 1.2599807707757217e-05, "loss": 0.46, "step": 9160 }, { "epoch": 1.2568429718048981, "grad_norm": 1.3125, "learning_rate": 1.2598413835243531e-05, "loss": 0.4509, "step": 9161 }, { "epoch": 1.2569801742471016, "grad_norm": 1.25, "learning_rate": 1.2597019908588302e-05, "loss": 0.4446, "step": 9162 }, { "epoch": 1.2571173766893051, "grad_norm": 1.2734375, "learning_rate": 1.2595625927820572e-05, "loss": 0.4606, "step": 9163 }, { "epoch": 1.2572545791315086, "grad_norm": 1.1640625, "learning_rate": 1.259423189296939e-05, "loss": 0.4001, "step": 9164 }, { "epoch": 1.2573917815737121, "grad_norm": 1.34375, "learning_rate": 1.2592837804063805e-05, "loss": 0.4924, "step": 9165 }, { "epoch": 1.2575289840159156, "grad_norm": 1.2734375, "learning_rate": 1.2591443661132858e-05, "loss": 0.5233, "step": 9166 }, { "epoch": 1.257666186458119, "grad_norm": 1.2109375, "learning_rate": 1.2590049464205602e-05, "loss": 0.4546, "step": 9167 }, { "epoch": 1.2578033889003224, "grad_norm": 1.203125, "learning_rate": 1.2588655213311086e-05, "loss": 0.468, "step": 9168 }, { "epoch": 1.257940591342526, "grad_norm": 1.1953125, "learning_rate": 1.258726090847836e-05, "loss": 0.4447, "step": 9169 }, { "epoch": 1.2580777937847294, "grad_norm": 1.34375, "learning_rate": 1.2585866549736478e-05, "loss": 0.4983, "step": 9170 }, { "epoch": 1.258214996226933, "grad_norm": 1.2890625, "learning_rate": 1.2584472137114491e-05, "loss": 0.4484, "step": 9171 }, { "epoch": 1.2583521986691364, "grad_norm": 1.3515625, "learning_rate": 1.2583077670641458e-05, "loss": 0.4667, "step": 9172 }, { "epoch": 1.2584894011113397, "grad_norm": 1.140625, "learning_rate": 1.2581683150346432e-05, "loss": 0.4219, "step": 9173 }, { "epoch": 1.2586266035535432, "grad_norm": 1.203125, "learning_rate": 1.258028857625847e-05, "loss": 0.431, "step": 9174 }, { "epoch": 1.2587638059957467, "grad_norm": 1.2109375, "learning_rate": 1.257889394840663e-05, "loss": 0.4781, "step": 9175 }, { "epoch": 1.2589010084379502, "grad_norm": 1.2890625, "learning_rate": 1.2577499266819968e-05, "loss": 0.464, "step": 9176 }, { "epoch": 1.2590382108801537, "grad_norm": 1.328125, "learning_rate": 1.257610453152755e-05, "loss": 0.4925, "step": 9177 }, { "epoch": 1.2591754133223572, "grad_norm": 1.1875, "learning_rate": 1.2574709742558431e-05, "loss": 0.4114, "step": 9178 }, { "epoch": 1.2593126157645607, "grad_norm": 1.203125, "learning_rate": 1.2573314899941678e-05, "loss": 0.44, "step": 9179 }, { "epoch": 1.2594498182067642, "grad_norm": 1.203125, "learning_rate": 1.257192000370635e-05, "loss": 0.4325, "step": 9180 }, { "epoch": 1.2595870206489677, "grad_norm": 1.203125, "learning_rate": 1.2570525053881517e-05, "loss": 0.4217, "step": 9181 }, { "epoch": 1.2597242230911712, "grad_norm": 1.3203125, "learning_rate": 1.2569130050496245e-05, "loss": 0.4656, "step": 9182 }, { "epoch": 1.2598614255333744, "grad_norm": 1.265625, "learning_rate": 1.2567734993579594e-05, "loss": 0.4608, "step": 9183 }, { "epoch": 1.259998627975578, "grad_norm": 1.25, "learning_rate": 1.2566339883160636e-05, "loss": 0.4684, "step": 9184 }, { "epoch": 1.2601358304177814, "grad_norm": 1.2421875, "learning_rate": 1.2564944719268442e-05, "loss": 0.4894, "step": 9185 }, { "epoch": 1.260273032859985, "grad_norm": 1.296875, "learning_rate": 1.2563549501932077e-05, "loss": 0.4811, "step": 9186 }, { "epoch": 1.2604102353021884, "grad_norm": 1.21875, "learning_rate": 1.2562154231180617e-05, "loss": 0.453, "step": 9187 }, { "epoch": 1.260547437744392, "grad_norm": 1.2578125, "learning_rate": 1.2560758907043133e-05, "loss": 0.4786, "step": 9188 }, { "epoch": 1.2606846401865952, "grad_norm": 1.21875, "learning_rate": 1.2559363529548697e-05, "loss": 0.4241, "step": 9189 }, { "epoch": 1.2608218426287987, "grad_norm": 1.25, "learning_rate": 1.2557968098726387e-05, "loss": 0.4598, "step": 9190 }, { "epoch": 1.2609590450710022, "grad_norm": 1.1875, "learning_rate": 1.2556572614605278e-05, "loss": 0.4128, "step": 9191 }, { "epoch": 1.2610962475132057, "grad_norm": 1.09375, "learning_rate": 1.2555177077214444e-05, "loss": 0.3647, "step": 9192 }, { "epoch": 1.2612334499554092, "grad_norm": 1.359375, "learning_rate": 1.2553781486582964e-05, "loss": 0.5188, "step": 9193 }, { "epoch": 1.2613706523976127, "grad_norm": 1.203125, "learning_rate": 1.2552385842739916e-05, "loss": 0.4453, "step": 9194 }, { "epoch": 1.2615078548398162, "grad_norm": 1.2421875, "learning_rate": 1.2550990145714383e-05, "loss": 0.4737, "step": 9195 }, { "epoch": 1.2616450572820197, "grad_norm": 1.171875, "learning_rate": 1.2549594395535444e-05, "loss": 0.4337, "step": 9196 }, { "epoch": 1.2617822597242232, "grad_norm": 1.3046875, "learning_rate": 1.2548198592232183e-05, "loss": 0.4524, "step": 9197 }, { "epoch": 1.2619194621664267, "grad_norm": 1.1953125, "learning_rate": 1.2546802735833683e-05, "loss": 0.4343, "step": 9198 }, { "epoch": 1.26205666460863, "grad_norm": 1.2421875, "learning_rate": 1.254540682636903e-05, "loss": 0.4327, "step": 9199 }, { "epoch": 1.2621938670508335, "grad_norm": 1.2265625, "learning_rate": 1.2544010863867306e-05, "loss": 0.4637, "step": 9200 }, { "epoch": 1.262331069493037, "grad_norm": 1.3515625, "learning_rate": 1.2542614848357601e-05, "loss": 0.4423, "step": 9201 }, { "epoch": 1.2624682719352405, "grad_norm": 1.3125, "learning_rate": 1.2541218779869002e-05, "loss": 0.444, "step": 9202 }, { "epoch": 1.262605474377444, "grad_norm": 1.234375, "learning_rate": 1.2539822658430596e-05, "loss": 0.4347, "step": 9203 }, { "epoch": 1.2627426768196475, "grad_norm": 1.1875, "learning_rate": 1.2538426484071475e-05, "loss": 0.401, "step": 9204 }, { "epoch": 1.2628798792618507, "grad_norm": 1.3203125, "learning_rate": 1.2537030256820733e-05, "loss": 0.4927, "step": 9205 }, { "epoch": 1.2630170817040542, "grad_norm": 1.2421875, "learning_rate": 1.2535633976707455e-05, "loss": 0.4337, "step": 9206 }, { "epoch": 1.2631542841462577, "grad_norm": 1.234375, "learning_rate": 1.2534237643760743e-05, "loss": 0.4525, "step": 9207 }, { "epoch": 1.2632914865884612, "grad_norm": 1.28125, "learning_rate": 1.2532841258009687e-05, "loss": 0.4659, "step": 9208 }, { "epoch": 1.2634286890306647, "grad_norm": 1.1796875, "learning_rate": 1.2531444819483382e-05, "loss": 0.3849, "step": 9209 }, { "epoch": 1.2635658914728682, "grad_norm": 1.3046875, "learning_rate": 1.2530048328210927e-05, "loss": 0.4581, "step": 9210 }, { "epoch": 1.2637030939150717, "grad_norm": 1.1484375, "learning_rate": 1.2528651784221417e-05, "loss": 0.3665, "step": 9211 }, { "epoch": 1.2638402963572752, "grad_norm": 1.2578125, "learning_rate": 1.2527255187543955e-05, "loss": 0.4082, "step": 9212 }, { "epoch": 1.2639774987994787, "grad_norm": 1.265625, "learning_rate": 1.2525858538207633e-05, "loss": 0.4682, "step": 9213 }, { "epoch": 1.2641147012416822, "grad_norm": 1.2265625, "learning_rate": 1.2524461836241563e-05, "loss": 0.4567, "step": 9214 }, { "epoch": 1.2642519036838855, "grad_norm": 1.2421875, "learning_rate": 1.2523065081674842e-05, "loss": 0.4221, "step": 9215 }, { "epoch": 1.264389106126089, "grad_norm": 1.1640625, "learning_rate": 1.252166827453657e-05, "loss": 0.3666, "step": 9216 }, { "epoch": 1.2645263085682925, "grad_norm": 1.1328125, "learning_rate": 1.2520271414855858e-05, "loss": 0.3954, "step": 9217 }, { "epoch": 1.264663511010496, "grad_norm": 1.21875, "learning_rate": 1.2518874502661806e-05, "loss": 0.4451, "step": 9218 }, { "epoch": 1.2648007134526995, "grad_norm": 1.25, "learning_rate": 1.2517477537983527e-05, "loss": 0.4481, "step": 9219 }, { "epoch": 1.264937915894903, "grad_norm": 1.296875, "learning_rate": 1.251608052085012e-05, "loss": 0.5021, "step": 9220 }, { "epoch": 1.2650751183371063, "grad_norm": 1.1328125, "learning_rate": 1.2514683451290697e-05, "loss": 0.3598, "step": 9221 }, { "epoch": 1.2652123207793098, "grad_norm": 1.171875, "learning_rate": 1.2513286329334371e-05, "loss": 0.4321, "step": 9222 }, { "epoch": 1.2653495232215133, "grad_norm": 1.15625, "learning_rate": 1.2511889155010251e-05, "loss": 0.4096, "step": 9223 }, { "epoch": 1.2654867256637168, "grad_norm": 1.234375, "learning_rate": 1.2510491928347448e-05, "loss": 0.5039, "step": 9224 }, { "epoch": 1.2656239281059203, "grad_norm": 1.25, "learning_rate": 1.2509094649375077e-05, "loss": 0.454, "step": 9225 }, { "epoch": 1.2657611305481238, "grad_norm": 1.1484375, "learning_rate": 1.2507697318122252e-05, "loss": 0.4185, "step": 9226 }, { "epoch": 1.2658983329903273, "grad_norm": 1.4140625, "learning_rate": 1.2506299934618088e-05, "loss": 0.5558, "step": 9227 }, { "epoch": 1.2660355354325308, "grad_norm": 1.0859375, "learning_rate": 1.2504902498891704e-05, "loss": 0.3484, "step": 9228 }, { "epoch": 1.2661727378747343, "grad_norm": 1.25, "learning_rate": 1.250350501097221e-05, "loss": 0.4765, "step": 9229 }, { "epoch": 1.2663099403169378, "grad_norm": 1.2109375, "learning_rate": 1.2502107470888732e-05, "loss": 0.4658, "step": 9230 }, { "epoch": 1.266447142759141, "grad_norm": 1.2265625, "learning_rate": 1.2500709878670385e-05, "loss": 0.375, "step": 9231 }, { "epoch": 1.2665843452013446, "grad_norm": 1.140625, "learning_rate": 1.2499312234346292e-05, "loss": 0.3977, "step": 9232 }, { "epoch": 1.266721547643548, "grad_norm": 1.265625, "learning_rate": 1.2497914537945578e-05, "loss": 0.4515, "step": 9233 }, { "epoch": 1.2668587500857516, "grad_norm": 1.34375, "learning_rate": 1.2496516789497357e-05, "loss": 0.4988, "step": 9234 }, { "epoch": 1.266995952527955, "grad_norm": 1.1875, "learning_rate": 1.2495118989030763e-05, "loss": 0.3778, "step": 9235 }, { "epoch": 1.2671331549701585, "grad_norm": 1.265625, "learning_rate": 1.2493721136574916e-05, "loss": 0.5042, "step": 9236 }, { "epoch": 1.2672703574123618, "grad_norm": 1.234375, "learning_rate": 1.2492323232158943e-05, "loss": 0.4651, "step": 9237 }, { "epoch": 1.2674075598545653, "grad_norm": 1.3125, "learning_rate": 1.2490925275811969e-05, "loss": 0.5761, "step": 9238 }, { "epoch": 1.2675447622967688, "grad_norm": 1.328125, "learning_rate": 1.2489527267563126e-05, "loss": 0.4868, "step": 9239 }, { "epoch": 1.2676819647389723, "grad_norm": 1.1328125, "learning_rate": 1.2488129207441541e-05, "loss": 0.3941, "step": 9240 }, { "epoch": 1.2678191671811758, "grad_norm": 1.21875, "learning_rate": 1.2486731095476348e-05, "loss": 0.4358, "step": 9241 }, { "epoch": 1.2679563696233793, "grad_norm": 1.3203125, "learning_rate": 1.2485332931696673e-05, "loss": 0.4818, "step": 9242 }, { "epoch": 1.2680935720655828, "grad_norm": 1.25, "learning_rate": 1.2483934716131655e-05, "loss": 0.4877, "step": 9243 }, { "epoch": 1.2682307745077863, "grad_norm": 1.328125, "learning_rate": 1.248253644881042e-05, "loss": 0.4701, "step": 9244 }, { "epoch": 1.2683679769499898, "grad_norm": 1.1015625, "learning_rate": 1.248113812976211e-05, "loss": 0.3412, "step": 9245 }, { "epoch": 1.2685051793921933, "grad_norm": 1.296875, "learning_rate": 1.2479739759015862e-05, "loss": 0.4474, "step": 9246 }, { "epoch": 1.2686423818343966, "grad_norm": 1.359375, "learning_rate": 1.2478341336600803e-05, "loss": 0.5236, "step": 9247 }, { "epoch": 1.2687795842766, "grad_norm": 1.234375, "learning_rate": 1.247694286254608e-05, "loss": 0.44, "step": 9248 }, { "epoch": 1.2689167867188036, "grad_norm": 1.21875, "learning_rate": 1.2475544336880829e-05, "loss": 0.4306, "step": 9249 }, { "epoch": 1.269053989161007, "grad_norm": 1.2578125, "learning_rate": 1.2474145759634188e-05, "loss": 0.4811, "step": 9250 }, { "epoch": 1.2691911916032106, "grad_norm": 1.2265625, "learning_rate": 1.2472747130835305e-05, "loss": 0.4281, "step": 9251 }, { "epoch": 1.269328394045414, "grad_norm": 1.1640625, "learning_rate": 1.2471348450513316e-05, "loss": 0.4001, "step": 9252 }, { "epoch": 1.2694655964876174, "grad_norm": 1.1484375, "learning_rate": 1.2469949718697369e-05, "loss": 0.3982, "step": 9253 }, { "epoch": 1.2696027989298209, "grad_norm": 1.171875, "learning_rate": 1.2468550935416606e-05, "loss": 0.4906, "step": 9254 }, { "epoch": 1.2697400013720244, "grad_norm": 1.296875, "learning_rate": 1.2467152100700171e-05, "loss": 0.4724, "step": 9255 }, { "epoch": 1.2698772038142279, "grad_norm": 1.1875, "learning_rate": 1.2465753214577211e-05, "loss": 0.3921, "step": 9256 }, { "epoch": 1.2700144062564314, "grad_norm": 1.265625, "learning_rate": 1.2464354277076877e-05, "loss": 0.4547, "step": 9257 }, { "epoch": 1.2701516086986349, "grad_norm": 1.1953125, "learning_rate": 1.2462955288228318e-05, "loss": 0.4787, "step": 9258 }, { "epoch": 1.2702888111408384, "grad_norm": 1.296875, "learning_rate": 1.2461556248060677e-05, "loss": 0.5032, "step": 9259 }, { "epoch": 1.2704260135830419, "grad_norm": 1.21875, "learning_rate": 1.2460157156603114e-05, "loss": 0.4643, "step": 9260 }, { "epoch": 1.2705632160252454, "grad_norm": 1.1953125, "learning_rate": 1.2458758013884776e-05, "loss": 0.428, "step": 9261 }, { "epoch": 1.2707004184674489, "grad_norm": 1.1953125, "learning_rate": 1.2457358819934817e-05, "loss": 0.4274, "step": 9262 }, { "epoch": 1.2708376209096521, "grad_norm": 1.2578125, "learning_rate": 1.2455959574782392e-05, "loss": 0.4858, "step": 9263 }, { "epoch": 1.2709748233518556, "grad_norm": 1.2109375, "learning_rate": 1.2454560278456654e-05, "loss": 0.3958, "step": 9264 }, { "epoch": 1.2711120257940591, "grad_norm": 1.28125, "learning_rate": 1.2453160930986759e-05, "loss": 0.4256, "step": 9265 }, { "epoch": 1.2712492282362626, "grad_norm": 1.3125, "learning_rate": 1.2451761532401868e-05, "loss": 0.494, "step": 9266 }, { "epoch": 1.2713864306784661, "grad_norm": 1.3671875, "learning_rate": 1.2450362082731136e-05, "loss": 0.5021, "step": 9267 }, { "epoch": 1.2715236331206696, "grad_norm": 1.2421875, "learning_rate": 1.2448962582003722e-05, "loss": 0.4303, "step": 9268 }, { "epoch": 1.271660835562873, "grad_norm": 1.2734375, "learning_rate": 1.244756303024879e-05, "loss": 0.5068, "step": 9269 }, { "epoch": 1.2717980380050764, "grad_norm": 1.21875, "learning_rate": 1.2446163427495499e-05, "loss": 0.4376, "step": 9270 }, { "epoch": 1.27193524044728, "grad_norm": 1.2421875, "learning_rate": 1.2444763773773017e-05, "loss": 0.4446, "step": 9271 }, { "epoch": 1.2720724428894834, "grad_norm": 1.203125, "learning_rate": 1.2443364069110499e-05, "loss": 0.4365, "step": 9272 }, { "epoch": 1.272209645331687, "grad_norm": 1.171875, "learning_rate": 1.2441964313537113e-05, "loss": 0.4392, "step": 9273 }, { "epoch": 1.2723468477738904, "grad_norm": 1.265625, "learning_rate": 1.244056450708203e-05, "loss": 0.461, "step": 9274 }, { "epoch": 1.272484050216094, "grad_norm": 1.3125, "learning_rate": 1.2439164649774411e-05, "loss": 0.5118, "step": 9275 }, { "epoch": 1.2726212526582974, "grad_norm": 1.1640625, "learning_rate": 1.2437764741643424e-05, "loss": 0.4369, "step": 9276 }, { "epoch": 1.272758455100501, "grad_norm": 1.3203125, "learning_rate": 1.243636478271824e-05, "loss": 0.4836, "step": 9277 }, { "epoch": 1.2728956575427044, "grad_norm": 1.2421875, "learning_rate": 1.243496477302803e-05, "loss": 0.4655, "step": 9278 }, { "epoch": 1.2730328599849077, "grad_norm": 1.1640625, "learning_rate": 1.2433564712601961e-05, "loss": 0.405, "step": 9279 }, { "epoch": 1.2731700624271112, "grad_norm": 1.328125, "learning_rate": 1.243216460146921e-05, "loss": 0.4769, "step": 9280 }, { "epoch": 1.2733072648693147, "grad_norm": 1.2265625, "learning_rate": 1.2430764439658951e-05, "loss": 0.4597, "step": 9281 }, { "epoch": 1.2734444673115182, "grad_norm": 1.2265625, "learning_rate": 1.2429364227200354e-05, "loss": 0.4798, "step": 9282 }, { "epoch": 1.2735816697537217, "grad_norm": 1.109375, "learning_rate": 1.2427963964122597e-05, "loss": 0.3877, "step": 9283 }, { "epoch": 1.2737188721959252, "grad_norm": 1.125, "learning_rate": 1.2426563650454853e-05, "loss": 0.3981, "step": 9284 }, { "epoch": 1.2738560746381284, "grad_norm": 1.28125, "learning_rate": 1.24251632862263e-05, "loss": 0.4586, "step": 9285 }, { "epoch": 1.273993277080332, "grad_norm": 1.21875, "learning_rate": 1.2423762871466121e-05, "loss": 0.3659, "step": 9286 }, { "epoch": 1.2741304795225354, "grad_norm": 1.1796875, "learning_rate": 1.2422362406203494e-05, "loss": 0.379, "step": 9287 }, { "epoch": 1.274267681964739, "grad_norm": 1.171875, "learning_rate": 1.2420961890467598e-05, "loss": 0.4222, "step": 9288 }, { "epoch": 1.2744048844069424, "grad_norm": 1.28125, "learning_rate": 1.2419561324287615e-05, "loss": 0.4731, "step": 9289 }, { "epoch": 1.274542086849146, "grad_norm": 1.2109375, "learning_rate": 1.241816070769273e-05, "loss": 0.4207, "step": 9290 }, { "epoch": 1.2746792892913494, "grad_norm": 1.296875, "learning_rate": 1.2416760040712122e-05, "loss": 0.4643, "step": 9291 }, { "epoch": 1.274816491733553, "grad_norm": 1.2890625, "learning_rate": 1.241535932337498e-05, "loss": 0.4786, "step": 9292 }, { "epoch": 1.2749536941757564, "grad_norm": 1.2734375, "learning_rate": 1.2413958555710488e-05, "loss": 0.4537, "step": 9293 }, { "epoch": 1.27509089661796, "grad_norm": 1.2421875, "learning_rate": 1.2412557737747833e-05, "loss": 0.4683, "step": 9294 }, { "epoch": 1.2752280990601632, "grad_norm": 1.1953125, "learning_rate": 1.2411156869516204e-05, "loss": 0.4007, "step": 9295 }, { "epoch": 1.2753653015023667, "grad_norm": 1.1640625, "learning_rate": 1.2409755951044791e-05, "loss": 0.4159, "step": 9296 }, { "epoch": 1.2755025039445702, "grad_norm": 1.3203125, "learning_rate": 1.240835498236278e-05, "loss": 0.4954, "step": 9297 }, { "epoch": 1.2756397063867737, "grad_norm": 1.5859375, "learning_rate": 1.2406953963499366e-05, "loss": 0.5889, "step": 9298 }, { "epoch": 1.2757769088289772, "grad_norm": 1.1015625, "learning_rate": 1.240555289448374e-05, "loss": 0.3551, "step": 9299 }, { "epoch": 1.2759141112711807, "grad_norm": 1.296875, "learning_rate": 1.2404151775345096e-05, "loss": 0.4866, "step": 9300 }, { "epoch": 1.276051313713384, "grad_norm": 1.1953125, "learning_rate": 1.2402750606112626e-05, "loss": 0.3749, "step": 9301 }, { "epoch": 1.2761885161555875, "grad_norm": 1.2578125, "learning_rate": 1.2401349386815527e-05, "loss": 0.47, "step": 9302 }, { "epoch": 1.276325718597791, "grad_norm": 1.2109375, "learning_rate": 1.2399948117482995e-05, "loss": 0.4376, "step": 9303 }, { "epoch": 1.2764629210399945, "grad_norm": 1.1171875, "learning_rate": 1.2398546798144226e-05, "loss": 0.3692, "step": 9304 }, { "epoch": 1.276600123482198, "grad_norm": 1.2421875, "learning_rate": 1.239714542882842e-05, "loss": 0.4489, "step": 9305 }, { "epoch": 1.2767373259244015, "grad_norm": 1.1484375, "learning_rate": 1.2395744009564777e-05, "loss": 0.3982, "step": 9306 }, { "epoch": 1.276874528366605, "grad_norm": 1.3203125, "learning_rate": 1.2394342540382498e-05, "loss": 0.4635, "step": 9307 }, { "epoch": 1.2770117308088085, "grad_norm": 1.2421875, "learning_rate": 1.2392941021310783e-05, "loss": 0.4578, "step": 9308 }, { "epoch": 1.277148933251012, "grad_norm": 1.203125, "learning_rate": 1.2391539452378833e-05, "loss": 0.4014, "step": 9309 }, { "epoch": 1.2772861356932155, "grad_norm": 1.2421875, "learning_rate": 1.2390137833615854e-05, "loss": 0.4188, "step": 9310 }, { "epoch": 1.2774233381354188, "grad_norm": 1.1875, "learning_rate": 1.2388736165051052e-05, "loss": 0.4123, "step": 9311 }, { "epoch": 1.2775605405776223, "grad_norm": 1.2265625, "learning_rate": 1.2387334446713628e-05, "loss": 0.4353, "step": 9312 }, { "epoch": 1.2776977430198257, "grad_norm": 1.2109375, "learning_rate": 1.2385932678632793e-05, "loss": 0.4621, "step": 9313 }, { "epoch": 1.2778349454620292, "grad_norm": 1.2578125, "learning_rate": 1.2384530860837753e-05, "loss": 0.4487, "step": 9314 }, { "epoch": 1.2779721479042327, "grad_norm": 1.265625, "learning_rate": 1.2383128993357716e-05, "loss": 0.4221, "step": 9315 }, { "epoch": 1.2781093503464362, "grad_norm": 1.3671875, "learning_rate": 1.2381727076221897e-05, "loss": 0.4238, "step": 9316 }, { "epoch": 1.2782465527886395, "grad_norm": 1.234375, "learning_rate": 1.23803251094595e-05, "loss": 0.4396, "step": 9317 }, { "epoch": 1.278383755230843, "grad_norm": 1.34375, "learning_rate": 1.237892309309974e-05, "loss": 0.5097, "step": 9318 }, { "epoch": 1.2785209576730465, "grad_norm": 1.1484375, "learning_rate": 1.237752102717183e-05, "loss": 0.4101, "step": 9319 }, { "epoch": 1.27865816011525, "grad_norm": 1.1796875, "learning_rate": 1.2376118911704984e-05, "loss": 0.3989, "step": 9320 }, { "epoch": 1.2787953625574535, "grad_norm": 1.3046875, "learning_rate": 1.2374716746728416e-05, "loss": 0.4613, "step": 9321 }, { "epoch": 1.278932564999657, "grad_norm": 1.25, "learning_rate": 1.2373314532271344e-05, "loss": 0.4593, "step": 9322 }, { "epoch": 1.2790697674418605, "grad_norm": 1.203125, "learning_rate": 1.2371912268362983e-05, "loss": 0.454, "step": 9323 }, { "epoch": 1.279206969884064, "grad_norm": 1.1953125, "learning_rate": 1.237050995503255e-05, "loss": 0.4282, "step": 9324 }, { "epoch": 1.2793441723262675, "grad_norm": 1.2890625, "learning_rate": 1.2369107592309272e-05, "loss": 0.4805, "step": 9325 }, { "epoch": 1.279481374768471, "grad_norm": 1.2109375, "learning_rate": 1.2367705180222359e-05, "loss": 0.4623, "step": 9326 }, { "epoch": 1.2796185772106743, "grad_norm": 1.1015625, "learning_rate": 1.2366302718801037e-05, "loss": 0.3863, "step": 9327 }, { "epoch": 1.2797557796528778, "grad_norm": 1.2109375, "learning_rate": 1.2364900208074527e-05, "loss": 0.4612, "step": 9328 }, { "epoch": 1.2798929820950813, "grad_norm": 1.3046875, "learning_rate": 1.2363497648072053e-05, "loss": 0.4578, "step": 9329 }, { "epoch": 1.2800301845372848, "grad_norm": 1.296875, "learning_rate": 1.2362095038822841e-05, "loss": 0.4971, "step": 9330 }, { "epoch": 1.2801673869794883, "grad_norm": 1.234375, "learning_rate": 1.2360692380356111e-05, "loss": 0.4384, "step": 9331 }, { "epoch": 1.2803045894216918, "grad_norm": 1.71875, "learning_rate": 1.2359289672701095e-05, "loss": 0.499, "step": 9332 }, { "epoch": 1.280441791863895, "grad_norm": 1.1953125, "learning_rate": 1.2357886915887023e-05, "loss": 0.4313, "step": 9333 }, { "epoch": 1.2805789943060986, "grad_norm": 1.2734375, "learning_rate": 1.2356484109943114e-05, "loss": 0.4543, "step": 9334 }, { "epoch": 1.280716196748302, "grad_norm": 1.2109375, "learning_rate": 1.2355081254898602e-05, "loss": 0.4576, "step": 9335 }, { "epoch": 1.2808533991905056, "grad_norm": 1.171875, "learning_rate": 1.2353678350782718e-05, "loss": 0.4098, "step": 9336 }, { "epoch": 1.280990601632709, "grad_norm": 1.2421875, "learning_rate": 1.2352275397624693e-05, "loss": 0.4812, "step": 9337 }, { "epoch": 1.2811278040749126, "grad_norm": 1.2109375, "learning_rate": 1.235087239545376e-05, "loss": 0.4342, "step": 9338 }, { "epoch": 1.281265006517116, "grad_norm": 1.28125, "learning_rate": 1.2349469344299148e-05, "loss": 0.4709, "step": 9339 }, { "epoch": 1.2814022089593196, "grad_norm": 1.1484375, "learning_rate": 1.2348066244190098e-05, "loss": 0.3882, "step": 9340 }, { "epoch": 1.281539411401523, "grad_norm": 1.328125, "learning_rate": 1.2346663095155843e-05, "loss": 0.5312, "step": 9341 }, { "epoch": 1.2816766138437266, "grad_norm": 1.1640625, "learning_rate": 1.2345259897225619e-05, "loss": 0.4482, "step": 9342 }, { "epoch": 1.2818138162859298, "grad_norm": 1.296875, "learning_rate": 1.2343856650428662e-05, "loss": 0.4599, "step": 9343 }, { "epoch": 1.2819510187281333, "grad_norm": 1.203125, "learning_rate": 1.2342453354794218e-05, "loss": 0.465, "step": 9344 }, { "epoch": 1.2820882211703368, "grad_norm": 1.203125, "learning_rate": 1.2341050010351518e-05, "loss": 0.4248, "step": 9345 }, { "epoch": 1.2822254236125403, "grad_norm": 1.25, "learning_rate": 1.2339646617129807e-05, "loss": 0.4456, "step": 9346 }, { "epoch": 1.2823626260547438, "grad_norm": 1.2890625, "learning_rate": 1.2338243175158322e-05, "loss": 0.4383, "step": 9347 }, { "epoch": 1.2824998284969473, "grad_norm": 1.1640625, "learning_rate": 1.233683968446631e-05, "loss": 0.3788, "step": 9348 }, { "epoch": 1.2826370309391506, "grad_norm": 1.3203125, "learning_rate": 1.2335436145083016e-05, "loss": 0.4173, "step": 9349 }, { "epoch": 1.282774233381354, "grad_norm": 1.03125, "learning_rate": 1.233403255703768e-05, "loss": 0.3313, "step": 9350 }, { "epoch": 1.2829114358235576, "grad_norm": 1.2109375, "learning_rate": 1.2332628920359552e-05, "loss": 0.4494, "step": 9351 }, { "epoch": 1.283048638265761, "grad_norm": 1.2578125, "learning_rate": 1.2331225235077877e-05, "loss": 0.4633, "step": 9352 }, { "epoch": 1.2831858407079646, "grad_norm": 1.1484375, "learning_rate": 1.2329821501221903e-05, "loss": 0.3712, "step": 9353 }, { "epoch": 1.283323043150168, "grad_norm": 1.2109375, "learning_rate": 1.2328417718820877e-05, "loss": 0.4172, "step": 9354 }, { "epoch": 1.2834602455923716, "grad_norm": 1.265625, "learning_rate": 1.2327013887904048e-05, "loss": 0.5187, "step": 9355 }, { "epoch": 1.283597448034575, "grad_norm": 1.25, "learning_rate": 1.2325610008500672e-05, "loss": 0.488, "step": 9356 }, { "epoch": 1.2837346504767786, "grad_norm": 1.25, "learning_rate": 1.2324206080639996e-05, "loss": 0.4953, "step": 9357 }, { "epoch": 1.283871852918982, "grad_norm": 1.203125, "learning_rate": 1.2322802104351275e-05, "loss": 0.4162, "step": 9358 }, { "epoch": 1.2840090553611854, "grad_norm": 1.3203125, "learning_rate": 1.2321398079663762e-05, "loss": 0.4595, "step": 9359 }, { "epoch": 1.2841462578033889, "grad_norm": 1.1796875, "learning_rate": 1.2319994006606712e-05, "loss": 0.416, "step": 9360 }, { "epoch": 1.2842834602455924, "grad_norm": 1.21875, "learning_rate": 1.2318589885209382e-05, "loss": 0.4454, "step": 9361 }, { "epoch": 1.2844206626877959, "grad_norm": 1.15625, "learning_rate": 1.2317185715501026e-05, "loss": 0.4041, "step": 9362 }, { "epoch": 1.2845578651299994, "grad_norm": 1.1484375, "learning_rate": 1.2315781497510903e-05, "loss": 0.3592, "step": 9363 }, { "epoch": 1.2846950675722029, "grad_norm": 1.234375, "learning_rate": 1.2314377231268273e-05, "loss": 0.4754, "step": 9364 }, { "epoch": 1.2848322700144061, "grad_norm": 1.09375, "learning_rate": 1.2312972916802394e-05, "loss": 0.3926, "step": 9365 }, { "epoch": 1.2849694724566096, "grad_norm": 1.1484375, "learning_rate": 1.2311568554142529e-05, "loss": 0.4172, "step": 9366 }, { "epoch": 1.2851066748988131, "grad_norm": 1.28125, "learning_rate": 1.2310164143317937e-05, "loss": 0.4933, "step": 9367 }, { "epoch": 1.2852438773410166, "grad_norm": 1.2265625, "learning_rate": 1.2308759684357882e-05, "loss": 0.4249, "step": 9368 }, { "epoch": 1.2853810797832201, "grad_norm": 1.2109375, "learning_rate": 1.2307355177291631e-05, "loss": 0.4894, "step": 9369 }, { "epoch": 1.2855182822254236, "grad_norm": 1.3046875, "learning_rate": 1.2305950622148448e-05, "loss": 0.4837, "step": 9370 }, { "epoch": 1.2856554846676271, "grad_norm": 1.28125, "learning_rate": 1.2304546018957595e-05, "loss": 0.462, "step": 9371 }, { "epoch": 1.2857926871098306, "grad_norm": 1.109375, "learning_rate": 1.2303141367748339e-05, "loss": 0.3581, "step": 9372 }, { "epoch": 1.2859298895520341, "grad_norm": 1.3046875, "learning_rate": 1.230173666854995e-05, "loss": 0.4654, "step": 9373 }, { "epoch": 1.2860670919942376, "grad_norm": 1.171875, "learning_rate": 1.2300331921391698e-05, "loss": 0.3969, "step": 9374 }, { "epoch": 1.286204294436441, "grad_norm": 1.140625, "learning_rate": 1.229892712630285e-05, "loss": 0.3795, "step": 9375 }, { "epoch": 1.2863414968786444, "grad_norm": 1.28125, "learning_rate": 1.2297522283312679e-05, "loss": 0.458, "step": 9376 }, { "epoch": 1.286478699320848, "grad_norm": 1.2578125, "learning_rate": 1.2296117392450457e-05, "loss": 0.4676, "step": 9377 }, { "epoch": 1.2866159017630514, "grad_norm": 1.21875, "learning_rate": 1.2294712453745457e-05, "loss": 0.4327, "step": 9378 }, { "epoch": 1.286753104205255, "grad_norm": 1.2265625, "learning_rate": 1.2293307467226949e-05, "loss": 0.389, "step": 9379 }, { "epoch": 1.2868903066474584, "grad_norm": 1.21875, "learning_rate": 1.2291902432924212e-05, "loss": 0.4321, "step": 9380 }, { "epoch": 1.2870275090896617, "grad_norm": 1.3515625, "learning_rate": 1.229049735086652e-05, "loss": 0.491, "step": 9381 }, { "epoch": 1.2871647115318652, "grad_norm": 1.1171875, "learning_rate": 1.228909222108315e-05, "loss": 0.399, "step": 9382 }, { "epoch": 1.2873019139740687, "grad_norm": 1.2109375, "learning_rate": 1.228768704360338e-05, "loss": 0.4737, "step": 9383 }, { "epoch": 1.2874391164162722, "grad_norm": 1.2578125, "learning_rate": 1.228628181845649e-05, "loss": 0.4362, "step": 9384 }, { "epoch": 1.2875763188584757, "grad_norm": 1.1953125, "learning_rate": 1.2284876545671758e-05, "loss": 0.4107, "step": 9385 }, { "epoch": 1.2877135213006792, "grad_norm": 1.2421875, "learning_rate": 1.2283471225278464e-05, "loss": 0.4552, "step": 9386 }, { "epoch": 1.2878507237428827, "grad_norm": 1.1875, "learning_rate": 1.2282065857305898e-05, "loss": 0.4141, "step": 9387 }, { "epoch": 1.2879879261850862, "grad_norm": 1.1796875, "learning_rate": 1.228066044178333e-05, "loss": 0.3392, "step": 9388 }, { "epoch": 1.2881251286272897, "grad_norm": 1.2265625, "learning_rate": 1.2279254978740052e-05, "loss": 0.4425, "step": 9389 }, { "epoch": 1.2882623310694932, "grad_norm": 1.1484375, "learning_rate": 1.2277849468205344e-05, "loss": 0.3749, "step": 9390 }, { "epoch": 1.2883995335116964, "grad_norm": 1.1875, "learning_rate": 1.2276443910208499e-05, "loss": 0.419, "step": 9391 }, { "epoch": 1.2885367359539, "grad_norm": 1.1953125, "learning_rate": 1.2275038304778796e-05, "loss": 0.3929, "step": 9392 }, { "epoch": 1.2886739383961034, "grad_norm": 1.171875, "learning_rate": 1.2273632651945526e-05, "loss": 0.3676, "step": 9393 }, { "epoch": 1.288811140838307, "grad_norm": 1.171875, "learning_rate": 1.227222695173798e-05, "loss": 0.4546, "step": 9394 }, { "epoch": 1.2889483432805104, "grad_norm": 1.3984375, "learning_rate": 1.227082120418544e-05, "loss": 0.5541, "step": 9395 }, { "epoch": 1.289085545722714, "grad_norm": 1.140625, "learning_rate": 1.2269415409317207e-05, "loss": 0.3923, "step": 9396 }, { "epoch": 1.2892227481649172, "grad_norm": 1.28125, "learning_rate": 1.2268009567162568e-05, "loss": 0.4522, "step": 9397 }, { "epoch": 1.2893599506071207, "grad_norm": 1.1875, "learning_rate": 1.2266603677750814e-05, "loss": 0.4436, "step": 9398 }, { "epoch": 1.2894971530493242, "grad_norm": 1.2890625, "learning_rate": 1.226519774111124e-05, "loss": 0.4817, "step": 9399 }, { "epoch": 1.2896343554915277, "grad_norm": 1.125, "learning_rate": 1.2263791757273138e-05, "loss": 0.3945, "step": 9400 }, { "epoch": 1.2897715579337312, "grad_norm": 1.359375, "learning_rate": 1.2262385726265809e-05, "loss": 0.4599, "step": 9401 }, { "epoch": 1.2899087603759347, "grad_norm": 1.3125, "learning_rate": 1.2260979648118545e-05, "loss": 0.4541, "step": 9402 }, { "epoch": 1.2900459628181382, "grad_norm": 1.125, "learning_rate": 1.2259573522860646e-05, "loss": 0.4309, "step": 9403 }, { "epoch": 1.2901831652603417, "grad_norm": 1.21875, "learning_rate": 1.225816735052141e-05, "loss": 0.4506, "step": 9404 }, { "epoch": 1.2903203677025452, "grad_norm": 1.25, "learning_rate": 1.2256761131130139e-05, "loss": 0.4625, "step": 9405 }, { "epoch": 1.2904575701447487, "grad_norm": 1.2734375, "learning_rate": 1.2255354864716131e-05, "loss": 0.4623, "step": 9406 }, { "epoch": 1.290594772586952, "grad_norm": 1.140625, "learning_rate": 1.2253948551308686e-05, "loss": 0.3796, "step": 9407 }, { "epoch": 1.2907319750291555, "grad_norm": 1.1640625, "learning_rate": 1.2252542190937106e-05, "loss": 0.3996, "step": 9408 }, { "epoch": 1.290869177471359, "grad_norm": 1.1484375, "learning_rate": 1.22511357836307e-05, "loss": 0.3917, "step": 9409 }, { "epoch": 1.2910063799135625, "grad_norm": 1.21875, "learning_rate": 1.2249729329418767e-05, "loss": 0.4108, "step": 9410 }, { "epoch": 1.291143582355766, "grad_norm": 1.234375, "learning_rate": 1.2248322828330615e-05, "loss": 0.4363, "step": 9411 }, { "epoch": 1.2912807847979695, "grad_norm": 1.3046875, "learning_rate": 1.224691628039555e-05, "loss": 0.5026, "step": 9412 }, { "epoch": 1.2914179872401728, "grad_norm": 1.21875, "learning_rate": 1.2245509685642875e-05, "loss": 0.4239, "step": 9413 }, { "epoch": 1.2915551896823763, "grad_norm": 1.2734375, "learning_rate": 1.2244103044101907e-05, "loss": 0.478, "step": 9414 }, { "epoch": 1.2916923921245798, "grad_norm": 1.359375, "learning_rate": 1.2242696355801952e-05, "loss": 0.4403, "step": 9415 }, { "epoch": 1.2918295945667833, "grad_norm": 1.3203125, "learning_rate": 1.2241289620772316e-05, "loss": 0.5126, "step": 9416 }, { "epoch": 1.2919667970089868, "grad_norm": 1.296875, "learning_rate": 1.2239882839042317e-05, "loss": 0.468, "step": 9417 }, { "epoch": 1.2921039994511903, "grad_norm": 1.25, "learning_rate": 1.2238476010641259e-05, "loss": 0.4253, "step": 9418 }, { "epoch": 1.2922412018933938, "grad_norm": 1.328125, "learning_rate": 1.223706913559846e-05, "loss": 0.4535, "step": 9419 }, { "epoch": 1.2923784043355973, "grad_norm": 1.390625, "learning_rate": 1.2235662213943238e-05, "loss": 0.5561, "step": 9420 }, { "epoch": 1.2925156067778008, "grad_norm": 1.2109375, "learning_rate": 1.2234255245704901e-05, "loss": 0.43, "step": 9421 }, { "epoch": 1.2926528092200043, "grad_norm": 1.2734375, "learning_rate": 1.2232848230912768e-05, "loss": 0.5193, "step": 9422 }, { "epoch": 1.2927900116622075, "grad_norm": 1.15625, "learning_rate": 1.223144116959616e-05, "loss": 0.3951, "step": 9423 }, { "epoch": 1.292927214104411, "grad_norm": 1.203125, "learning_rate": 1.223003406178439e-05, "loss": 0.4462, "step": 9424 }, { "epoch": 1.2930644165466145, "grad_norm": 1.2421875, "learning_rate": 1.2228626907506778e-05, "loss": 0.4649, "step": 9425 }, { "epoch": 1.293201618988818, "grad_norm": 1.2421875, "learning_rate": 1.2227219706792644e-05, "loss": 0.4502, "step": 9426 }, { "epoch": 1.2933388214310215, "grad_norm": 1.265625, "learning_rate": 1.222581245967131e-05, "loss": 0.5118, "step": 9427 }, { "epoch": 1.293476023873225, "grad_norm": 1.15625, "learning_rate": 1.2224405166172096e-05, "loss": 0.4237, "step": 9428 }, { "epoch": 1.2936132263154283, "grad_norm": 1.1953125, "learning_rate": 1.2222997826324325e-05, "loss": 0.4474, "step": 9429 }, { "epoch": 1.2937504287576318, "grad_norm": 1.328125, "learning_rate": 1.2221590440157325e-05, "loss": 0.4499, "step": 9430 }, { "epoch": 1.2938876311998353, "grad_norm": 1.234375, "learning_rate": 1.2220183007700418e-05, "loss": 0.4478, "step": 9431 }, { "epoch": 1.2940248336420388, "grad_norm": 1.2109375, "learning_rate": 1.2218775528982932e-05, "loss": 0.4658, "step": 9432 }, { "epoch": 1.2941620360842423, "grad_norm": 1.2265625, "learning_rate": 1.2217368004034188e-05, "loss": 0.4505, "step": 9433 }, { "epoch": 1.2942992385264458, "grad_norm": 1.25, "learning_rate": 1.2215960432883518e-05, "loss": 0.4641, "step": 9434 }, { "epoch": 1.2944364409686493, "grad_norm": 1.40625, "learning_rate": 1.2214552815560249e-05, "loss": 0.5498, "step": 9435 }, { "epoch": 1.2945736434108528, "grad_norm": 1.1640625, "learning_rate": 1.2213145152093714e-05, "loss": 0.4184, "step": 9436 }, { "epoch": 1.2947108458530563, "grad_norm": 1.3125, "learning_rate": 1.2211737442513239e-05, "loss": 0.5251, "step": 9437 }, { "epoch": 1.2948480482952598, "grad_norm": 1.1953125, "learning_rate": 1.221032968684816e-05, "loss": 0.4221, "step": 9438 }, { "epoch": 1.294985250737463, "grad_norm": 1.1015625, "learning_rate": 1.2208921885127805e-05, "loss": 0.3602, "step": 9439 }, { "epoch": 1.2951224531796666, "grad_norm": 1.265625, "learning_rate": 1.2207514037381512e-05, "loss": 0.4707, "step": 9440 }, { "epoch": 1.29525965562187, "grad_norm": 1.265625, "learning_rate": 1.2206106143638614e-05, "loss": 0.4773, "step": 9441 }, { "epoch": 1.2953968580640736, "grad_norm": 1.203125, "learning_rate": 1.2204698203928445e-05, "loss": 0.4127, "step": 9442 }, { "epoch": 1.295534060506277, "grad_norm": 1.234375, "learning_rate": 1.220329021828034e-05, "loss": 0.461, "step": 9443 }, { "epoch": 1.2956712629484806, "grad_norm": 1.1484375, "learning_rate": 1.220188218672364e-05, "loss": 0.371, "step": 9444 }, { "epoch": 1.2958084653906838, "grad_norm": 1.25, "learning_rate": 1.220047410928768e-05, "loss": 0.4177, "step": 9445 }, { "epoch": 1.2959456678328873, "grad_norm": 1.3125, "learning_rate": 1.2199065986001803e-05, "loss": 0.5167, "step": 9446 }, { "epoch": 1.2960828702750908, "grad_norm": 1.2734375, "learning_rate": 1.2197657816895348e-05, "loss": 0.4874, "step": 9447 }, { "epoch": 1.2962200727172943, "grad_norm": 1.1796875, "learning_rate": 1.2196249601997652e-05, "loss": 0.4107, "step": 9448 }, { "epoch": 1.2963572751594978, "grad_norm": 1.28125, "learning_rate": 1.2194841341338063e-05, "loss": 0.4481, "step": 9449 }, { "epoch": 1.2964944776017013, "grad_norm": 1.203125, "learning_rate": 1.2193433034945923e-05, "loss": 0.4569, "step": 9450 }, { "epoch": 1.2966316800439048, "grad_norm": 1.2578125, "learning_rate": 1.2192024682850574e-05, "loss": 0.4573, "step": 9451 }, { "epoch": 1.2967688824861083, "grad_norm": 1.296875, "learning_rate": 1.2190616285081358e-05, "loss": 0.5088, "step": 9452 }, { "epoch": 1.2969060849283118, "grad_norm": 1.21875, "learning_rate": 1.2189207841667628e-05, "loss": 0.4433, "step": 9453 }, { "epoch": 1.2970432873705153, "grad_norm": 1.375, "learning_rate": 1.2187799352638723e-05, "loss": 0.5146, "step": 9454 }, { "epoch": 1.2971804898127186, "grad_norm": 1.1484375, "learning_rate": 1.2186390818023997e-05, "loss": 0.4217, "step": 9455 }, { "epoch": 1.297317692254922, "grad_norm": 1.171875, "learning_rate": 1.2184982237852798e-05, "loss": 0.4138, "step": 9456 }, { "epoch": 1.2974548946971256, "grad_norm": 1.3125, "learning_rate": 1.2183573612154473e-05, "loss": 0.482, "step": 9457 }, { "epoch": 1.297592097139329, "grad_norm": 1.1953125, "learning_rate": 1.2182164940958374e-05, "loss": 0.4574, "step": 9458 }, { "epoch": 1.2977292995815326, "grad_norm": 1.3125, "learning_rate": 1.2180756224293853e-05, "loss": 0.4567, "step": 9459 }, { "epoch": 1.297866502023736, "grad_norm": 1.421875, "learning_rate": 1.2179347462190266e-05, "loss": 0.5039, "step": 9460 }, { "epoch": 1.2980037044659394, "grad_norm": 1.2421875, "learning_rate": 1.217793865467696e-05, "loss": 0.4191, "step": 9461 }, { "epoch": 1.2981409069081429, "grad_norm": 1.09375, "learning_rate": 1.217652980178329e-05, "loss": 0.3969, "step": 9462 }, { "epoch": 1.2982781093503464, "grad_norm": 1.2265625, "learning_rate": 1.2175120903538615e-05, "loss": 0.4975, "step": 9463 }, { "epoch": 1.2984153117925499, "grad_norm": 1.1640625, "learning_rate": 1.2173711959972292e-05, "loss": 0.4227, "step": 9464 }, { "epoch": 1.2985525142347534, "grad_norm": 1.2265625, "learning_rate": 1.2172302971113675e-05, "loss": 0.4623, "step": 9465 }, { "epoch": 1.2986897166769569, "grad_norm": 1.1953125, "learning_rate": 1.2170893936992124e-05, "loss": 0.3962, "step": 9466 }, { "epoch": 1.2988269191191604, "grad_norm": 1.1953125, "learning_rate": 1.2169484857636996e-05, "loss": 0.4757, "step": 9467 }, { "epoch": 1.2989641215613639, "grad_norm": 1.1875, "learning_rate": 1.2168075733077656e-05, "loss": 0.4134, "step": 9468 }, { "epoch": 1.2991013240035674, "grad_norm": 1.1875, "learning_rate": 1.2166666563343462e-05, "loss": 0.4095, "step": 9469 }, { "epoch": 1.2992385264457709, "grad_norm": 1.1953125, "learning_rate": 1.2165257348463772e-05, "loss": 0.414, "step": 9470 }, { "epoch": 1.2993757288879741, "grad_norm": 1.265625, "learning_rate": 1.2163848088467953e-05, "loss": 0.4536, "step": 9471 }, { "epoch": 1.2995129313301776, "grad_norm": 1.1328125, "learning_rate": 1.2162438783385372e-05, "loss": 0.3659, "step": 9472 }, { "epoch": 1.2996501337723811, "grad_norm": 1.25, "learning_rate": 1.2161029433245388e-05, "loss": 0.4778, "step": 9473 }, { "epoch": 1.2997873362145846, "grad_norm": 1.2421875, "learning_rate": 1.2159620038077371e-05, "loss": 0.447, "step": 9474 }, { "epoch": 1.2999245386567881, "grad_norm": 1.0625, "learning_rate": 1.2158210597910685e-05, "loss": 0.3562, "step": 9475 }, { "epoch": 1.3000617410989916, "grad_norm": 1.1640625, "learning_rate": 1.2156801112774698e-05, "loss": 0.382, "step": 9476 }, { "epoch": 1.300198943541195, "grad_norm": 1.2421875, "learning_rate": 1.2155391582698783e-05, "loss": 0.4257, "step": 9477 }, { "epoch": 1.3003361459833984, "grad_norm": 1.265625, "learning_rate": 1.2153982007712301e-05, "loss": 0.4308, "step": 9478 }, { "epoch": 1.300473348425602, "grad_norm": 1.2421875, "learning_rate": 1.215257238784463e-05, "loss": 0.4615, "step": 9479 }, { "epoch": 1.3006105508678054, "grad_norm": 1.265625, "learning_rate": 1.2151162723125137e-05, "loss": 0.4669, "step": 9480 }, { "epoch": 1.300747753310009, "grad_norm": 1.2578125, "learning_rate": 1.2149753013583196e-05, "loss": 0.4231, "step": 9481 }, { "epoch": 1.3008849557522124, "grad_norm": 1.265625, "learning_rate": 1.214834325924818e-05, "loss": 0.4612, "step": 9482 }, { "epoch": 1.301022158194416, "grad_norm": 1.2578125, "learning_rate": 1.2146933460149464e-05, "loss": 0.483, "step": 9483 }, { "epoch": 1.3011593606366194, "grad_norm": 1.3203125, "learning_rate": 1.2145523616316421e-05, "loss": 0.4764, "step": 9484 }, { "epoch": 1.301296563078823, "grad_norm": 1.1796875, "learning_rate": 1.214411372777843e-05, "loss": 0.4311, "step": 9485 }, { "epoch": 1.3014337655210264, "grad_norm": 1.265625, "learning_rate": 1.2142703794564867e-05, "loss": 0.4452, "step": 9486 }, { "epoch": 1.3015709679632297, "grad_norm": 1.2578125, "learning_rate": 1.2141293816705108e-05, "loss": 0.4408, "step": 9487 }, { "epoch": 1.3017081704054332, "grad_norm": 1.2578125, "learning_rate": 1.2139883794228533e-05, "loss": 0.42, "step": 9488 }, { "epoch": 1.3018453728476367, "grad_norm": 1.15625, "learning_rate": 1.2138473727164521e-05, "loss": 0.4467, "step": 9489 }, { "epoch": 1.3019825752898402, "grad_norm": 1.3125, "learning_rate": 1.2137063615542454e-05, "loss": 0.5311, "step": 9490 }, { "epoch": 1.3021197777320437, "grad_norm": 1.1484375, "learning_rate": 1.2135653459391717e-05, "loss": 0.435, "step": 9491 }, { "epoch": 1.3022569801742472, "grad_norm": 1.1015625, "learning_rate": 1.2134243258741685e-05, "loss": 0.395, "step": 9492 }, { "epoch": 1.3023941826164505, "grad_norm": 1.1953125, "learning_rate": 1.2132833013621745e-05, "loss": 0.4128, "step": 9493 }, { "epoch": 1.302531385058654, "grad_norm": 1.1953125, "learning_rate": 1.2131422724061284e-05, "loss": 0.4213, "step": 9494 }, { "epoch": 1.3026685875008575, "grad_norm": 1.171875, "learning_rate": 1.2130012390089686e-05, "loss": 0.4543, "step": 9495 }, { "epoch": 1.302805789943061, "grad_norm": 1.078125, "learning_rate": 1.2128602011736333e-05, "loss": 0.3673, "step": 9496 }, { "epoch": 1.3029429923852645, "grad_norm": 1.1875, "learning_rate": 1.2127191589030613e-05, "loss": 0.4073, "step": 9497 }, { "epoch": 1.303080194827468, "grad_norm": 1.15625, "learning_rate": 1.2125781122001921e-05, "loss": 0.4034, "step": 9498 }, { "epoch": 1.3032173972696715, "grad_norm": 1.1796875, "learning_rate": 1.2124370610679642e-05, "loss": 0.4193, "step": 9499 }, { "epoch": 1.303354599711875, "grad_norm": 1.359375, "learning_rate": 1.2122960055093162e-05, "loss": 0.5025, "step": 9500 }, { "epoch": 1.3034918021540784, "grad_norm": 1.3125, "learning_rate": 1.2121549455271878e-05, "loss": 0.431, "step": 9501 }, { "epoch": 1.303629004596282, "grad_norm": 1.359375, "learning_rate": 1.2120138811245177e-05, "loss": 0.4998, "step": 9502 }, { "epoch": 1.3037662070384852, "grad_norm": 1.3984375, "learning_rate": 1.2118728123042459e-05, "loss": 0.512, "step": 9503 }, { "epoch": 1.3039034094806887, "grad_norm": 1.2890625, "learning_rate": 1.2117317390693108e-05, "loss": 0.5239, "step": 9504 }, { "epoch": 1.3040406119228922, "grad_norm": 1.1171875, "learning_rate": 1.2115906614226525e-05, "loss": 0.3793, "step": 9505 }, { "epoch": 1.3041778143650957, "grad_norm": 1.21875, "learning_rate": 1.2114495793672103e-05, "loss": 0.492, "step": 9506 }, { "epoch": 1.3043150168072992, "grad_norm": 1.3203125, "learning_rate": 1.211308492905924e-05, "loss": 0.5002, "step": 9507 }, { "epoch": 1.3044522192495027, "grad_norm": 1.2734375, "learning_rate": 1.211167402041733e-05, "loss": 0.4908, "step": 9508 }, { "epoch": 1.304589421691706, "grad_norm": 1.21875, "learning_rate": 1.2110263067775774e-05, "loss": 0.4595, "step": 9509 }, { "epoch": 1.3047266241339095, "grad_norm": 1.28125, "learning_rate": 1.210885207116397e-05, "loss": 0.47, "step": 9510 }, { "epoch": 1.304863826576113, "grad_norm": 1.2421875, "learning_rate": 1.210744103061132e-05, "loss": 0.4891, "step": 9511 }, { "epoch": 1.3050010290183165, "grad_norm": 1.234375, "learning_rate": 1.2106029946147223e-05, "loss": 0.4756, "step": 9512 }, { "epoch": 1.30513823146052, "grad_norm": 1.265625, "learning_rate": 1.2104618817801083e-05, "loss": 0.4414, "step": 9513 }, { "epoch": 1.3052754339027235, "grad_norm": 1.109375, "learning_rate": 1.2103207645602302e-05, "loss": 0.3924, "step": 9514 }, { "epoch": 1.305412636344927, "grad_norm": 1.171875, "learning_rate": 1.2101796429580281e-05, "loss": 0.4126, "step": 9515 }, { "epoch": 1.3055498387871305, "grad_norm": 1.15625, "learning_rate": 1.2100385169764428e-05, "loss": 0.425, "step": 9516 }, { "epoch": 1.305687041229334, "grad_norm": 1.2265625, "learning_rate": 1.2098973866184147e-05, "loss": 0.446, "step": 9517 }, { "epoch": 1.3058242436715375, "grad_norm": 1.3671875, "learning_rate": 1.2097562518868845e-05, "loss": 0.5067, "step": 9518 }, { "epoch": 1.3059614461137408, "grad_norm": 1.28125, "learning_rate": 1.2096151127847928e-05, "loss": 0.4844, "step": 9519 }, { "epoch": 1.3060986485559443, "grad_norm": 1.2109375, "learning_rate": 1.2094739693150808e-05, "loss": 0.4408, "step": 9520 }, { "epoch": 1.3062358509981478, "grad_norm": 1.28125, "learning_rate": 1.2093328214806888e-05, "loss": 0.4434, "step": 9521 }, { "epoch": 1.3063730534403513, "grad_norm": 1.2421875, "learning_rate": 1.2091916692845587e-05, "loss": 0.5004, "step": 9522 }, { "epoch": 1.3065102558825548, "grad_norm": 1.2421875, "learning_rate": 1.2090505127296307e-05, "loss": 0.4252, "step": 9523 }, { "epoch": 1.3066474583247583, "grad_norm": 1.1796875, "learning_rate": 1.2089093518188464e-05, "loss": 0.4296, "step": 9524 }, { "epoch": 1.3067846607669615, "grad_norm": 1.1328125, "learning_rate": 1.2087681865551469e-05, "loss": 0.4031, "step": 9525 }, { "epoch": 1.306921863209165, "grad_norm": 1.2734375, "learning_rate": 1.208627016941474e-05, "loss": 0.489, "step": 9526 }, { "epoch": 1.3070590656513685, "grad_norm": 1.2421875, "learning_rate": 1.2084858429807686e-05, "loss": 0.4214, "step": 9527 }, { "epoch": 1.307196268093572, "grad_norm": 1.1796875, "learning_rate": 1.2083446646759726e-05, "loss": 0.4267, "step": 9528 }, { "epoch": 1.3073334705357755, "grad_norm": 1.234375, "learning_rate": 1.2082034820300278e-05, "loss": 0.4227, "step": 9529 }, { "epoch": 1.307470672977979, "grad_norm": 1.046875, "learning_rate": 1.2080622950458756e-05, "loss": 0.3534, "step": 9530 }, { "epoch": 1.3076078754201825, "grad_norm": 1.390625, "learning_rate": 1.2079211037264582e-05, "loss": 0.5426, "step": 9531 }, { "epoch": 1.307745077862386, "grad_norm": 1.2109375, "learning_rate": 1.207779908074717e-05, "loss": 0.3653, "step": 9532 }, { "epoch": 1.3078822803045895, "grad_norm": 1.2578125, "learning_rate": 1.2076387080935942e-05, "loss": 0.4725, "step": 9533 }, { "epoch": 1.308019482746793, "grad_norm": 1.3203125, "learning_rate": 1.2074975037860322e-05, "loss": 0.4584, "step": 9534 }, { "epoch": 1.3081566851889963, "grad_norm": 1.25, "learning_rate": 1.2073562951549727e-05, "loss": 0.4758, "step": 9535 }, { "epoch": 1.3082938876311998, "grad_norm": 1.359375, "learning_rate": 1.2072150822033584e-05, "loss": 0.4781, "step": 9536 }, { "epoch": 1.3084310900734033, "grad_norm": 1.203125, "learning_rate": 1.2070738649341314e-05, "loss": 0.4053, "step": 9537 }, { "epoch": 1.3085682925156068, "grad_norm": 1.296875, "learning_rate": 1.2069326433502342e-05, "loss": 0.4644, "step": 9538 }, { "epoch": 1.3087054949578103, "grad_norm": 1.1640625, "learning_rate": 1.2067914174546097e-05, "loss": 0.4092, "step": 9539 }, { "epoch": 1.3088426974000138, "grad_norm": 1.2578125, "learning_rate": 1.2066501872502001e-05, "loss": 0.4247, "step": 9540 }, { "epoch": 1.308979899842217, "grad_norm": 1.1015625, "learning_rate": 1.206508952739948e-05, "loss": 0.3515, "step": 9541 }, { "epoch": 1.3091171022844206, "grad_norm": 1.2265625, "learning_rate": 1.2063677139267968e-05, "loss": 0.4554, "step": 9542 }, { "epoch": 1.309254304726624, "grad_norm": 1.25, "learning_rate": 1.206226470813689e-05, "loss": 0.443, "step": 9543 }, { "epoch": 1.3093915071688276, "grad_norm": 1.5, "learning_rate": 1.2060852234035676e-05, "loss": 0.4822, "step": 9544 }, { "epoch": 1.309528709611031, "grad_norm": 1.171875, "learning_rate": 1.2059439716993757e-05, "loss": 0.4502, "step": 9545 }, { "epoch": 1.3096659120532346, "grad_norm": 1.2890625, "learning_rate": 1.2058027157040568e-05, "loss": 0.4531, "step": 9546 }, { "epoch": 1.309803114495438, "grad_norm": 1.2890625, "learning_rate": 1.2056614554205537e-05, "loss": 0.4595, "step": 9547 }, { "epoch": 1.3099403169376416, "grad_norm": 1.171875, "learning_rate": 1.2055201908518104e-05, "loss": 0.3932, "step": 9548 }, { "epoch": 1.310077519379845, "grad_norm": 1.265625, "learning_rate": 1.2053789220007695e-05, "loss": 0.4486, "step": 9549 }, { "epoch": 1.3102147218220486, "grad_norm": 1.1796875, "learning_rate": 1.2052376488703747e-05, "loss": 0.4147, "step": 9550 }, { "epoch": 1.3103519242642518, "grad_norm": 1.2734375, "learning_rate": 1.2050963714635701e-05, "loss": 0.4607, "step": 9551 }, { "epoch": 1.3104891267064553, "grad_norm": 1.3125, "learning_rate": 1.2049550897832993e-05, "loss": 0.4776, "step": 9552 }, { "epoch": 1.3106263291486588, "grad_norm": 1.3203125, "learning_rate": 1.2048138038325059e-05, "loss": 0.4624, "step": 9553 }, { "epoch": 1.3107635315908623, "grad_norm": 1.2109375, "learning_rate": 1.2046725136141337e-05, "loss": 0.409, "step": 9554 }, { "epoch": 1.3109007340330658, "grad_norm": 1.0859375, "learning_rate": 1.2045312191311269e-05, "loss": 0.3518, "step": 9555 }, { "epoch": 1.3110379364752693, "grad_norm": 1.2265625, "learning_rate": 1.2043899203864295e-05, "loss": 0.441, "step": 9556 }, { "epoch": 1.3111751389174726, "grad_norm": 1.2109375, "learning_rate": 1.2042486173829859e-05, "loss": 0.5013, "step": 9557 }, { "epoch": 1.3113123413596761, "grad_norm": 1.2109375, "learning_rate": 1.20410731012374e-05, "loss": 0.4675, "step": 9558 }, { "epoch": 1.3114495438018796, "grad_norm": 1.125, "learning_rate": 1.2039659986116359e-05, "loss": 0.3782, "step": 9559 }, { "epoch": 1.3115867462440831, "grad_norm": 1.1953125, "learning_rate": 1.2038246828496187e-05, "loss": 0.4835, "step": 9560 }, { "epoch": 1.3117239486862866, "grad_norm": 1.21875, "learning_rate": 1.2036833628406324e-05, "loss": 0.4537, "step": 9561 }, { "epoch": 1.31186115112849, "grad_norm": 1.2578125, "learning_rate": 1.2035420385876215e-05, "loss": 0.4906, "step": 9562 }, { "epoch": 1.3119983535706936, "grad_norm": 1.171875, "learning_rate": 1.2034007100935312e-05, "loss": 0.4541, "step": 9563 }, { "epoch": 1.312135556012897, "grad_norm": 1.2734375, "learning_rate": 1.2032593773613065e-05, "loss": 0.4598, "step": 9564 }, { "epoch": 1.3122727584551006, "grad_norm": 1.1875, "learning_rate": 1.203118040393891e-05, "loss": 0.4049, "step": 9565 }, { "epoch": 1.312409960897304, "grad_norm": 1.3046875, "learning_rate": 1.2029766991942307e-05, "loss": 0.4175, "step": 9566 }, { "epoch": 1.3125471633395074, "grad_norm": 1.1953125, "learning_rate": 1.2028353537652706e-05, "loss": 0.4374, "step": 9567 }, { "epoch": 1.3126843657817109, "grad_norm": 1.1875, "learning_rate": 1.2026940041099553e-05, "loss": 0.4057, "step": 9568 }, { "epoch": 1.3128215682239144, "grad_norm": 1.2265625, "learning_rate": 1.2025526502312303e-05, "loss": 0.4553, "step": 9569 }, { "epoch": 1.3129587706661179, "grad_norm": 1.21875, "learning_rate": 1.202411292132041e-05, "loss": 0.4267, "step": 9570 }, { "epoch": 1.3130959731083214, "grad_norm": 1.3125, "learning_rate": 1.2022699298153326e-05, "loss": 0.5035, "step": 9571 }, { "epoch": 1.3132331755505249, "grad_norm": 1.3203125, "learning_rate": 1.2021285632840507e-05, "loss": 0.4899, "step": 9572 }, { "epoch": 1.3133703779927282, "grad_norm": 1.203125, "learning_rate": 1.201987192541141e-05, "loss": 0.4635, "step": 9573 }, { "epoch": 1.3135075804349317, "grad_norm": 1.3046875, "learning_rate": 1.2018458175895489e-05, "loss": 0.4539, "step": 9574 }, { "epoch": 1.3136447828771352, "grad_norm": 1.171875, "learning_rate": 1.2017044384322203e-05, "loss": 0.4428, "step": 9575 }, { "epoch": 1.3137819853193387, "grad_norm": 1.2421875, "learning_rate": 1.2015630550721011e-05, "loss": 0.4589, "step": 9576 }, { "epoch": 1.3139191877615422, "grad_norm": 1.2734375, "learning_rate": 1.2014216675121368e-05, "loss": 0.4174, "step": 9577 }, { "epoch": 1.3140563902037457, "grad_norm": 1.1953125, "learning_rate": 1.2012802757552739e-05, "loss": 0.4378, "step": 9578 }, { "epoch": 1.3141935926459491, "grad_norm": 1.2890625, "learning_rate": 1.2011388798044584e-05, "loss": 0.4234, "step": 9579 }, { "epoch": 1.3143307950881526, "grad_norm": 1.234375, "learning_rate": 1.200997479662636e-05, "loss": 0.4363, "step": 9580 }, { "epoch": 1.3144679975303561, "grad_norm": 1.1875, "learning_rate": 1.2008560753327537e-05, "loss": 0.4354, "step": 9581 }, { "epoch": 1.3146051999725596, "grad_norm": 1.171875, "learning_rate": 1.2007146668177571e-05, "loss": 0.3783, "step": 9582 }, { "epoch": 1.314742402414763, "grad_norm": 1.15625, "learning_rate": 1.2005732541205932e-05, "loss": 0.4102, "step": 9583 }, { "epoch": 1.3148796048569664, "grad_norm": 1.3203125, "learning_rate": 1.2004318372442085e-05, "loss": 0.4418, "step": 9584 }, { "epoch": 1.31501680729917, "grad_norm": 1.171875, "learning_rate": 1.2002904161915497e-05, "loss": 0.4235, "step": 9585 }, { "epoch": 1.3151540097413734, "grad_norm": 1.1640625, "learning_rate": 1.2001489909655628e-05, "loss": 0.4295, "step": 9586 }, { "epoch": 1.315291212183577, "grad_norm": 1.15625, "learning_rate": 1.2000075615691953e-05, "loss": 0.3808, "step": 9587 }, { "epoch": 1.3154284146257804, "grad_norm": 1.078125, "learning_rate": 1.1998661280053938e-05, "loss": 0.3801, "step": 9588 }, { "epoch": 1.3155656170679837, "grad_norm": 1.109375, "learning_rate": 1.1997246902771054e-05, "loss": 0.3532, "step": 9589 }, { "epoch": 1.3157028195101872, "grad_norm": 1.1875, "learning_rate": 1.1995832483872769e-05, "loss": 0.4191, "step": 9590 }, { "epoch": 1.3158400219523907, "grad_norm": 1.265625, "learning_rate": 1.1994418023388556e-05, "loss": 0.4908, "step": 9591 }, { "epoch": 1.3159772243945942, "grad_norm": 1.1953125, "learning_rate": 1.199300352134789e-05, "loss": 0.4537, "step": 9592 }, { "epoch": 1.3161144268367977, "grad_norm": 1.25, "learning_rate": 1.1991588977780243e-05, "loss": 0.4003, "step": 9593 }, { "epoch": 1.3162516292790012, "grad_norm": 1.203125, "learning_rate": 1.199017439271508e-05, "loss": 0.4114, "step": 9594 }, { "epoch": 1.3163888317212047, "grad_norm": 1.1875, "learning_rate": 1.198875976618189e-05, "loss": 0.4141, "step": 9595 }, { "epoch": 1.3165260341634082, "grad_norm": 1.3046875, "learning_rate": 1.1987345098210139e-05, "loss": 0.477, "step": 9596 }, { "epoch": 1.3166632366056117, "grad_norm": 1.2734375, "learning_rate": 1.1985930388829306e-05, "loss": 0.4802, "step": 9597 }, { "epoch": 1.3168004390478152, "grad_norm": 1.3046875, "learning_rate": 1.198451563806887e-05, "loss": 0.4746, "step": 9598 }, { "epoch": 1.3169376414900185, "grad_norm": 1.15625, "learning_rate": 1.1983100845958307e-05, "loss": 0.4121, "step": 9599 }, { "epoch": 1.317074843932222, "grad_norm": 1.390625, "learning_rate": 1.1981686012527096e-05, "loss": 0.5142, "step": 9600 }, { "epoch": 1.3172120463744255, "grad_norm": 1.234375, "learning_rate": 1.198027113780472e-05, "loss": 0.3993, "step": 9601 }, { "epoch": 1.317349248816629, "grad_norm": 1.25, "learning_rate": 1.197885622182066e-05, "loss": 0.4429, "step": 9602 }, { "epoch": 1.3174864512588325, "grad_norm": 1.21875, "learning_rate": 1.1977441264604394e-05, "loss": 0.4545, "step": 9603 }, { "epoch": 1.317623653701036, "grad_norm": 1.2734375, "learning_rate": 1.1976026266185407e-05, "loss": 0.499, "step": 9604 }, { "epoch": 1.3177608561432392, "grad_norm": 1.203125, "learning_rate": 1.1974611226593183e-05, "loss": 0.4303, "step": 9605 }, { "epoch": 1.3178980585854427, "grad_norm": 1.109375, "learning_rate": 1.1973196145857201e-05, "loss": 0.3697, "step": 9606 }, { "epoch": 1.3180352610276462, "grad_norm": 1.1953125, "learning_rate": 1.1971781024006954e-05, "loss": 0.4327, "step": 9607 }, { "epoch": 1.3181724634698497, "grad_norm": 1.171875, "learning_rate": 1.1970365861071924e-05, "loss": 0.412, "step": 9608 }, { "epoch": 1.3183096659120532, "grad_norm": 1.296875, "learning_rate": 1.1968950657081599e-05, "loss": 0.4598, "step": 9609 }, { "epoch": 1.3184468683542567, "grad_norm": 1.21875, "learning_rate": 1.1967535412065465e-05, "loss": 0.4315, "step": 9610 }, { "epoch": 1.3185840707964602, "grad_norm": 1.2265625, "learning_rate": 1.1966120126053014e-05, "loss": 0.4377, "step": 9611 }, { "epoch": 1.3187212732386637, "grad_norm": 1.25, "learning_rate": 1.196470479907373e-05, "loss": 0.4486, "step": 9612 }, { "epoch": 1.3188584756808672, "grad_norm": 1.21875, "learning_rate": 1.1963289431157108e-05, "loss": 0.4629, "step": 9613 }, { "epoch": 1.3189956781230707, "grad_norm": 1.109375, "learning_rate": 1.1961874022332636e-05, "loss": 0.3764, "step": 9614 }, { "epoch": 1.319132880565274, "grad_norm": 1.296875, "learning_rate": 1.1960458572629807e-05, "loss": 0.487, "step": 9615 }, { "epoch": 1.3192700830074775, "grad_norm": 1.265625, "learning_rate": 1.1959043082078117e-05, "loss": 0.4507, "step": 9616 }, { "epoch": 1.319407285449681, "grad_norm": 1.2265625, "learning_rate": 1.1957627550707054e-05, "loss": 0.3856, "step": 9617 }, { "epoch": 1.3195444878918845, "grad_norm": 1.2734375, "learning_rate": 1.1956211978546118e-05, "loss": 0.4523, "step": 9618 }, { "epoch": 1.319681690334088, "grad_norm": 1.2890625, "learning_rate": 1.1954796365624803e-05, "loss": 0.4608, "step": 9619 }, { "epoch": 1.3198188927762915, "grad_norm": 1.1171875, "learning_rate": 1.1953380711972602e-05, "loss": 0.4163, "step": 9620 }, { "epoch": 1.3199560952184948, "grad_norm": 1.25, "learning_rate": 1.1951965017619013e-05, "loss": 0.4574, "step": 9621 }, { "epoch": 1.3200932976606983, "grad_norm": 1.328125, "learning_rate": 1.195054928259354e-05, "loss": 0.459, "step": 9622 }, { "epoch": 1.3202305001029018, "grad_norm": 1.328125, "learning_rate": 1.1949133506925674e-05, "loss": 0.46, "step": 9623 }, { "epoch": 1.3203677025451053, "grad_norm": 1.1953125, "learning_rate": 1.1947717690644918e-05, "loss": 0.4156, "step": 9624 }, { "epoch": 1.3205049049873088, "grad_norm": 1.2734375, "learning_rate": 1.1946301833780773e-05, "loss": 0.4727, "step": 9625 }, { "epoch": 1.3206421074295123, "grad_norm": 1.109375, "learning_rate": 1.1944885936362738e-05, "loss": 0.3831, "step": 9626 }, { "epoch": 1.3207793098717158, "grad_norm": 1.171875, "learning_rate": 1.1943469998420319e-05, "loss": 0.3826, "step": 9627 }, { "epoch": 1.3209165123139193, "grad_norm": 1.28125, "learning_rate": 1.1942054019983014e-05, "loss": 0.4688, "step": 9628 }, { "epoch": 1.3210537147561228, "grad_norm": 1.4296875, "learning_rate": 1.194063800108033e-05, "loss": 0.5391, "step": 9629 }, { "epoch": 1.3211909171983263, "grad_norm": 1.1875, "learning_rate": 1.1939221941741775e-05, "loss": 0.3521, "step": 9630 }, { "epoch": 1.3213281196405295, "grad_norm": 1.1484375, "learning_rate": 1.1937805841996848e-05, "loss": 0.4393, "step": 9631 }, { "epoch": 1.321465322082733, "grad_norm": 1.1015625, "learning_rate": 1.1936389701875057e-05, "loss": 0.4266, "step": 9632 }, { "epoch": 1.3216025245249365, "grad_norm": 1.265625, "learning_rate": 1.1934973521405913e-05, "loss": 0.4642, "step": 9633 }, { "epoch": 1.32173972696714, "grad_norm": 1.0859375, "learning_rate": 1.1933557300618921e-05, "loss": 0.3701, "step": 9634 }, { "epoch": 1.3218769294093435, "grad_norm": 1.234375, "learning_rate": 1.193214103954359e-05, "loss": 0.4583, "step": 9635 }, { "epoch": 1.322014131851547, "grad_norm": 1.328125, "learning_rate": 1.1930724738209429e-05, "loss": 0.511, "step": 9636 }, { "epoch": 1.3221513342937503, "grad_norm": 1.203125, "learning_rate": 1.192930839664595e-05, "loss": 0.4249, "step": 9637 }, { "epoch": 1.3222885367359538, "grad_norm": 1.0234375, "learning_rate": 1.192789201488267e-05, "loss": 0.3405, "step": 9638 }, { "epoch": 1.3224257391781573, "grad_norm": 1.1484375, "learning_rate": 1.1926475592949092e-05, "loss": 0.3797, "step": 9639 }, { "epoch": 1.3225629416203608, "grad_norm": 1.109375, "learning_rate": 1.192505913087473e-05, "loss": 0.3913, "step": 9640 }, { "epoch": 1.3227001440625643, "grad_norm": 1.28125, "learning_rate": 1.1923642628689103e-05, "loss": 0.4421, "step": 9641 }, { "epoch": 1.3228373465047678, "grad_norm": 1.3125, "learning_rate": 1.1922226086421722e-05, "loss": 0.4359, "step": 9642 }, { "epoch": 1.3229745489469713, "grad_norm": 1.2890625, "learning_rate": 1.1920809504102106e-05, "loss": 0.4488, "step": 9643 }, { "epoch": 1.3231117513891748, "grad_norm": 1.2734375, "learning_rate": 1.1919392881759768e-05, "loss": 0.435, "step": 9644 }, { "epoch": 1.3232489538313783, "grad_norm": 1.140625, "learning_rate": 1.1917976219424228e-05, "loss": 0.4025, "step": 9645 }, { "epoch": 1.3233861562735818, "grad_norm": 1.1953125, "learning_rate": 1.1916559517125002e-05, "loss": 0.4032, "step": 9646 }, { "epoch": 1.323523358715785, "grad_norm": 1.359375, "learning_rate": 1.1915142774891612e-05, "loss": 0.5114, "step": 9647 }, { "epoch": 1.3236605611579886, "grad_norm": 1.28125, "learning_rate": 1.1913725992753572e-05, "loss": 0.4913, "step": 9648 }, { "epoch": 1.323797763600192, "grad_norm": 1.21875, "learning_rate": 1.1912309170740408e-05, "loss": 0.3789, "step": 9649 }, { "epoch": 1.3239349660423956, "grad_norm": 1.171875, "learning_rate": 1.1910892308881641e-05, "loss": 0.4313, "step": 9650 }, { "epoch": 1.324072168484599, "grad_norm": 1.4375, "learning_rate": 1.190947540720679e-05, "loss": 0.5138, "step": 9651 }, { "epoch": 1.3242093709268026, "grad_norm": 1.1953125, "learning_rate": 1.190805846574538e-05, "loss": 0.3956, "step": 9652 }, { "epoch": 1.3243465733690059, "grad_norm": 1.109375, "learning_rate": 1.1906641484526935e-05, "loss": 0.3689, "step": 9653 }, { "epoch": 1.3244837758112094, "grad_norm": 1.1875, "learning_rate": 1.1905224463580983e-05, "loss": 0.4279, "step": 9654 }, { "epoch": 1.3246209782534129, "grad_norm": 1.2109375, "learning_rate": 1.1903807402937043e-05, "loss": 0.5365, "step": 9655 }, { "epoch": 1.3247581806956163, "grad_norm": 1.1953125, "learning_rate": 1.1902390302624647e-05, "loss": 0.4498, "step": 9656 }, { "epoch": 1.3248953831378198, "grad_norm": 1.265625, "learning_rate": 1.1900973162673317e-05, "loss": 0.4838, "step": 9657 }, { "epoch": 1.3250325855800233, "grad_norm": 1.1875, "learning_rate": 1.1899555983112586e-05, "loss": 0.4164, "step": 9658 }, { "epoch": 1.3251697880222268, "grad_norm": 1.0703125, "learning_rate": 1.1898138763971981e-05, "loss": 0.3311, "step": 9659 }, { "epoch": 1.3253069904644303, "grad_norm": 1.1328125, "learning_rate": 1.189672150528103e-05, "loss": 0.3984, "step": 9660 }, { "epoch": 1.3254441929066338, "grad_norm": 1.140625, "learning_rate": 1.1895304207069268e-05, "loss": 0.3988, "step": 9661 }, { "epoch": 1.3255813953488373, "grad_norm": 1.140625, "learning_rate": 1.1893886869366221e-05, "loss": 0.4045, "step": 9662 }, { "epoch": 1.3257185977910406, "grad_norm": 1.2734375, "learning_rate": 1.1892469492201424e-05, "loss": 0.469, "step": 9663 }, { "epoch": 1.3258558002332441, "grad_norm": 1.28125, "learning_rate": 1.1891052075604412e-05, "loss": 0.4845, "step": 9664 }, { "epoch": 1.3259930026754476, "grad_norm": 1.25, "learning_rate": 1.1889634619604715e-05, "loss": 0.4239, "step": 9665 }, { "epoch": 1.3261302051176511, "grad_norm": 1.28125, "learning_rate": 1.1888217124231872e-05, "loss": 0.4648, "step": 9666 }, { "epoch": 1.3262674075598546, "grad_norm": 1.2265625, "learning_rate": 1.1886799589515412e-05, "loss": 0.4583, "step": 9667 }, { "epoch": 1.3264046100020581, "grad_norm": 1.3515625, "learning_rate": 1.1885382015484876e-05, "loss": 0.4881, "step": 9668 }, { "epoch": 1.3265418124442614, "grad_norm": 1.2578125, "learning_rate": 1.1883964402169802e-05, "loss": 0.4367, "step": 9669 }, { "epoch": 1.326679014886465, "grad_norm": 1.1953125, "learning_rate": 1.1882546749599724e-05, "loss": 0.4336, "step": 9670 }, { "epoch": 1.3268162173286684, "grad_norm": 1.1171875, "learning_rate": 1.1881129057804184e-05, "loss": 0.3537, "step": 9671 }, { "epoch": 1.326953419770872, "grad_norm": 1.3671875, "learning_rate": 1.187971132681272e-05, "loss": 0.5037, "step": 9672 }, { "epoch": 1.3270906222130754, "grad_norm": 1.375, "learning_rate": 1.1878293556654874e-05, "loss": 0.4918, "step": 9673 }, { "epoch": 1.3272278246552789, "grad_norm": 1.1484375, "learning_rate": 1.1876875747360183e-05, "loss": 0.4414, "step": 9674 }, { "epoch": 1.3273650270974824, "grad_norm": 1.2265625, "learning_rate": 1.1875457898958195e-05, "loss": 0.3645, "step": 9675 }, { "epoch": 1.3275022295396859, "grad_norm": 1.234375, "learning_rate": 1.187404001147845e-05, "loss": 0.4642, "step": 9676 }, { "epoch": 1.3276394319818894, "grad_norm": 1.171875, "learning_rate": 1.1872622084950488e-05, "loss": 0.4236, "step": 9677 }, { "epoch": 1.3277766344240929, "grad_norm": 1.1953125, "learning_rate": 1.1871204119403861e-05, "loss": 0.4379, "step": 9678 }, { "epoch": 1.3279138368662962, "grad_norm": 1.2109375, "learning_rate": 1.1869786114868108e-05, "loss": 0.4419, "step": 9679 }, { "epoch": 1.3280510393084997, "grad_norm": 1.1484375, "learning_rate": 1.1868368071372777e-05, "loss": 0.4186, "step": 9680 }, { "epoch": 1.3281882417507032, "grad_norm": 1.2734375, "learning_rate": 1.1866949988947418e-05, "loss": 0.4748, "step": 9681 }, { "epoch": 1.3283254441929067, "grad_norm": 1.3125, "learning_rate": 1.1865531867621574e-05, "loss": 0.4944, "step": 9682 }, { "epoch": 1.3284626466351102, "grad_norm": 1.1953125, "learning_rate": 1.1864113707424797e-05, "loss": 0.4166, "step": 9683 }, { "epoch": 1.3285998490773137, "grad_norm": 1.265625, "learning_rate": 1.1862695508386633e-05, "loss": 0.4817, "step": 9684 }, { "epoch": 1.328737051519517, "grad_norm": 1.25, "learning_rate": 1.1861277270536633e-05, "loss": 0.4171, "step": 9685 }, { "epoch": 1.3288742539617204, "grad_norm": 1.3828125, "learning_rate": 1.185985899390435e-05, "loss": 0.5178, "step": 9686 }, { "epoch": 1.329011456403924, "grad_norm": 1.21875, "learning_rate": 1.1858440678519335e-05, "loss": 0.4388, "step": 9687 }, { "epoch": 1.3291486588461274, "grad_norm": 1.3359375, "learning_rate": 1.1857022324411139e-05, "loss": 0.4711, "step": 9688 }, { "epoch": 1.329285861288331, "grad_norm": 1.1328125, "learning_rate": 1.1855603931609316e-05, "loss": 0.3849, "step": 9689 }, { "epoch": 1.3294230637305344, "grad_norm": 1.1953125, "learning_rate": 1.1854185500143422e-05, "loss": 0.4433, "step": 9690 }, { "epoch": 1.329560266172738, "grad_norm": 1.1796875, "learning_rate": 1.1852767030043011e-05, "loss": 0.445, "step": 9691 }, { "epoch": 1.3296974686149414, "grad_norm": 1.2578125, "learning_rate": 1.185134852133764e-05, "loss": 0.4097, "step": 9692 }, { "epoch": 1.329834671057145, "grad_norm": 1.140625, "learning_rate": 1.1849929974056862e-05, "loss": 0.3862, "step": 9693 }, { "epoch": 1.3299718734993484, "grad_norm": 1.140625, "learning_rate": 1.1848511388230235e-05, "loss": 0.3984, "step": 9694 }, { "epoch": 1.3301090759415517, "grad_norm": 1.2109375, "learning_rate": 1.184709276388732e-05, "loss": 0.4752, "step": 9695 }, { "epoch": 1.3302462783837552, "grad_norm": 1.3125, "learning_rate": 1.1845674101057676e-05, "loss": 0.4588, "step": 9696 }, { "epoch": 1.3303834808259587, "grad_norm": 1.2734375, "learning_rate": 1.1844255399770858e-05, "loss": 0.4518, "step": 9697 }, { "epoch": 1.3305206832681622, "grad_norm": 1.3671875, "learning_rate": 1.1842836660056434e-05, "loss": 0.4994, "step": 9698 }, { "epoch": 1.3306578857103657, "grad_norm": 1.21875, "learning_rate": 1.184141788194396e-05, "loss": 0.4533, "step": 9699 }, { "epoch": 1.3307950881525692, "grad_norm": 1.25, "learning_rate": 1.1839999065463e-05, "loss": 0.4441, "step": 9700 }, { "epoch": 1.3309322905947725, "grad_norm": 1.2109375, "learning_rate": 1.1838580210643115e-05, "loss": 0.4542, "step": 9701 }, { "epoch": 1.331069493036976, "grad_norm": 1.25, "learning_rate": 1.1837161317513872e-05, "loss": 0.475, "step": 9702 }, { "epoch": 1.3312066954791795, "grad_norm": 1.1875, "learning_rate": 1.1835742386104833e-05, "loss": 0.4359, "step": 9703 }, { "epoch": 1.331343897921383, "grad_norm": 1.2265625, "learning_rate": 1.1834323416445565e-05, "loss": 0.4775, "step": 9704 }, { "epoch": 1.3314811003635865, "grad_norm": 1.2421875, "learning_rate": 1.1832904408565635e-05, "loss": 0.4598, "step": 9705 }, { "epoch": 1.33161830280579, "grad_norm": 1.140625, "learning_rate": 1.1831485362494606e-05, "loss": 0.426, "step": 9706 }, { "epoch": 1.3317555052479935, "grad_norm": 1.265625, "learning_rate": 1.1830066278262052e-05, "loss": 0.4596, "step": 9707 }, { "epoch": 1.331892707690197, "grad_norm": 1.1484375, "learning_rate": 1.1828647155897533e-05, "loss": 0.4381, "step": 9708 }, { "epoch": 1.3320299101324005, "grad_norm": 1.3671875, "learning_rate": 1.1827227995430627e-05, "loss": 0.5441, "step": 9709 }, { "epoch": 1.332167112574604, "grad_norm": 1.203125, "learning_rate": 1.18258087968909e-05, "loss": 0.4465, "step": 9710 }, { "epoch": 1.3323043150168072, "grad_norm": 1.09375, "learning_rate": 1.1824389560307925e-05, "loss": 0.4085, "step": 9711 }, { "epoch": 1.3324415174590107, "grad_norm": 1.2578125, "learning_rate": 1.182297028571127e-05, "loss": 0.3632, "step": 9712 }, { "epoch": 1.3325787199012142, "grad_norm": 1.2734375, "learning_rate": 1.1821550973130509e-05, "loss": 0.4613, "step": 9713 }, { "epoch": 1.3327159223434177, "grad_norm": 1.1171875, "learning_rate": 1.1820131622595219e-05, "loss": 0.3453, "step": 9714 }, { "epoch": 1.3328531247856212, "grad_norm": 1.3359375, "learning_rate": 1.1818712234134967e-05, "loss": 0.4867, "step": 9715 }, { "epoch": 1.3329903272278247, "grad_norm": 1.234375, "learning_rate": 1.1817292807779337e-05, "loss": 0.4337, "step": 9716 }, { "epoch": 1.333127529670028, "grad_norm": 1.1953125, "learning_rate": 1.1815873343557898e-05, "loss": 0.4238, "step": 9717 }, { "epoch": 1.3332647321122315, "grad_norm": 1.203125, "learning_rate": 1.1814453841500228e-05, "loss": 0.4353, "step": 9718 }, { "epoch": 1.333401934554435, "grad_norm": 1.203125, "learning_rate": 1.1813034301635907e-05, "loss": 0.3464, "step": 9719 }, { "epoch": 1.3335391369966385, "grad_norm": 1.25, "learning_rate": 1.1811614723994507e-05, "loss": 0.413, "step": 9720 }, { "epoch": 1.333676339438842, "grad_norm": 1.2734375, "learning_rate": 1.1810195108605613e-05, "loss": 0.4539, "step": 9721 }, { "epoch": 1.3338135418810455, "grad_norm": 1.109375, "learning_rate": 1.1808775455498802e-05, "loss": 0.3814, "step": 9722 }, { "epoch": 1.333950744323249, "grad_norm": 1.296875, "learning_rate": 1.1807355764703652e-05, "loss": 0.4947, "step": 9723 }, { "epoch": 1.3340879467654525, "grad_norm": 1.234375, "learning_rate": 1.1805936036249749e-05, "loss": 0.4811, "step": 9724 }, { "epoch": 1.334225149207656, "grad_norm": 1.1953125, "learning_rate": 1.1804516270166675e-05, "loss": 0.4046, "step": 9725 }, { "epoch": 1.3343623516498595, "grad_norm": 1.1875, "learning_rate": 1.1803096466484007e-05, "loss": 0.4462, "step": 9726 }, { "epoch": 1.3344995540920628, "grad_norm": 1.2578125, "learning_rate": 1.1801676625231338e-05, "loss": 0.5057, "step": 9727 }, { "epoch": 1.3346367565342663, "grad_norm": 1.3359375, "learning_rate": 1.1800256746438241e-05, "loss": 0.4968, "step": 9728 }, { "epoch": 1.3347739589764698, "grad_norm": 1.3515625, "learning_rate": 1.1798836830134311e-05, "loss": 0.5024, "step": 9729 }, { "epoch": 1.3349111614186733, "grad_norm": 1.390625, "learning_rate": 1.1797416876349126e-05, "loss": 0.5017, "step": 9730 }, { "epoch": 1.3350483638608768, "grad_norm": 1.1171875, "learning_rate": 1.1795996885112278e-05, "loss": 0.3702, "step": 9731 }, { "epoch": 1.3351855663030803, "grad_norm": 1.3671875, "learning_rate": 1.1794576856453353e-05, "loss": 0.5251, "step": 9732 }, { "epoch": 1.3353227687452836, "grad_norm": 1.1328125, "learning_rate": 1.1793156790401938e-05, "loss": 0.3933, "step": 9733 }, { "epoch": 1.335459971187487, "grad_norm": 1.25, "learning_rate": 1.1791736686987623e-05, "loss": 0.4306, "step": 9734 }, { "epoch": 1.3355971736296905, "grad_norm": 1.1796875, "learning_rate": 1.1790316546240001e-05, "loss": 0.4219, "step": 9735 }, { "epoch": 1.335734376071894, "grad_norm": 1.15625, "learning_rate": 1.1788896368188658e-05, "loss": 0.4049, "step": 9736 }, { "epoch": 1.3358715785140975, "grad_norm": 1.296875, "learning_rate": 1.1787476152863187e-05, "loss": 0.5074, "step": 9737 }, { "epoch": 1.336008780956301, "grad_norm": 1.25, "learning_rate": 1.178605590029318e-05, "loss": 0.4662, "step": 9738 }, { "epoch": 1.3361459833985045, "grad_norm": 1.1953125, "learning_rate": 1.178463561050823e-05, "loss": 0.3927, "step": 9739 }, { "epoch": 1.336283185840708, "grad_norm": 1.2578125, "learning_rate": 1.1783215283537932e-05, "loss": 0.4577, "step": 9740 }, { "epoch": 1.3364203882829115, "grad_norm": 1.21875, "learning_rate": 1.1781794919411877e-05, "loss": 0.4227, "step": 9741 }, { "epoch": 1.336557590725115, "grad_norm": 1.234375, "learning_rate": 1.1780374518159663e-05, "loss": 0.4628, "step": 9742 }, { "epoch": 1.3366947931673183, "grad_norm": 1.234375, "learning_rate": 1.1778954079810888e-05, "loss": 0.4751, "step": 9743 }, { "epoch": 1.3368319956095218, "grad_norm": 1.1796875, "learning_rate": 1.1777533604395142e-05, "loss": 0.3702, "step": 9744 }, { "epoch": 1.3369691980517253, "grad_norm": 1.3203125, "learning_rate": 1.177611309194203e-05, "loss": 0.5183, "step": 9745 }, { "epoch": 1.3371064004939288, "grad_norm": 1.296875, "learning_rate": 1.1774692542481147e-05, "loss": 0.4834, "step": 9746 }, { "epoch": 1.3372436029361323, "grad_norm": 1.3046875, "learning_rate": 1.1773271956042092e-05, "loss": 0.502, "step": 9747 }, { "epoch": 1.3373808053783358, "grad_norm": 1.2265625, "learning_rate": 1.1771851332654464e-05, "loss": 0.4559, "step": 9748 }, { "epoch": 1.337518007820539, "grad_norm": 1.21875, "learning_rate": 1.1770430672347866e-05, "loss": 0.4329, "step": 9749 }, { "epoch": 1.3376552102627426, "grad_norm": 1.1953125, "learning_rate": 1.1769009975151896e-05, "loss": 0.4073, "step": 9750 }, { "epoch": 1.337792412704946, "grad_norm": 1.0234375, "learning_rate": 1.176758924109616e-05, "loss": 0.3315, "step": 9751 }, { "epoch": 1.3379296151471496, "grad_norm": 1.265625, "learning_rate": 1.176616847021026e-05, "loss": 0.4385, "step": 9752 }, { "epoch": 1.338066817589353, "grad_norm": 1.34375, "learning_rate": 1.1764747662523799e-05, "loss": 0.49, "step": 9753 }, { "epoch": 1.3382040200315566, "grad_norm": 1.234375, "learning_rate": 1.1763326818066383e-05, "loss": 0.4483, "step": 9754 }, { "epoch": 1.33834122247376, "grad_norm": 1.0625, "learning_rate": 1.1761905936867615e-05, "loss": 0.3478, "step": 9755 }, { "epoch": 1.3384784249159636, "grad_norm": 1.2734375, "learning_rate": 1.1760485018957101e-05, "loss": 0.5139, "step": 9756 }, { "epoch": 1.338615627358167, "grad_norm": 1.1953125, "learning_rate": 1.175906406436445e-05, "loss": 0.4183, "step": 9757 }, { "epoch": 1.3387528298003706, "grad_norm": 1.28125, "learning_rate": 1.1757643073119265e-05, "loss": 0.4774, "step": 9758 }, { "epoch": 1.3388900322425739, "grad_norm": 1.2109375, "learning_rate": 1.175622204525116e-05, "loss": 0.4509, "step": 9759 }, { "epoch": 1.3390272346847774, "grad_norm": 1.375, "learning_rate": 1.1754800980789742e-05, "loss": 0.439, "step": 9760 }, { "epoch": 1.3391644371269809, "grad_norm": 1.2578125, "learning_rate": 1.1753379879764619e-05, "loss": 0.4592, "step": 9761 }, { "epoch": 1.3393016395691844, "grad_norm": 1.328125, "learning_rate": 1.1751958742205405e-05, "loss": 0.5003, "step": 9762 }, { "epoch": 1.3394388420113879, "grad_norm": 1.3046875, "learning_rate": 1.1750537568141708e-05, "loss": 0.4596, "step": 9763 }, { "epoch": 1.3395760444535914, "grad_norm": 1.296875, "learning_rate": 1.1749116357603142e-05, "loss": 0.5015, "step": 9764 }, { "epoch": 1.3397132468957946, "grad_norm": 1.140625, "learning_rate": 1.1747695110619322e-05, "loss": 0.3831, "step": 9765 }, { "epoch": 1.3398504493379981, "grad_norm": 1.28125, "learning_rate": 1.1746273827219855e-05, "loss": 0.4713, "step": 9766 }, { "epoch": 1.3399876517802016, "grad_norm": 1.2890625, "learning_rate": 1.174485250743436e-05, "loss": 0.4513, "step": 9767 }, { "epoch": 1.3401248542224051, "grad_norm": 1.3984375, "learning_rate": 1.1743431151292453e-05, "loss": 0.5033, "step": 9768 }, { "epoch": 1.3402620566646086, "grad_norm": 1.1484375, "learning_rate": 1.174200975882375e-05, "loss": 0.3945, "step": 9769 }, { "epoch": 1.3403992591068121, "grad_norm": 1.296875, "learning_rate": 1.1740588330057863e-05, "loss": 0.4351, "step": 9770 }, { "epoch": 1.3405364615490156, "grad_norm": 1.2265625, "learning_rate": 1.1739166865024415e-05, "loss": 0.4225, "step": 9771 }, { "epoch": 1.3406736639912191, "grad_norm": 1.28125, "learning_rate": 1.1737745363753023e-05, "loss": 0.4918, "step": 9772 }, { "epoch": 1.3408108664334226, "grad_norm": 1.375, "learning_rate": 1.1736323826273304e-05, "loss": 0.4932, "step": 9773 }, { "epoch": 1.3409480688756261, "grad_norm": 1.1875, "learning_rate": 1.1734902252614877e-05, "loss": 0.4281, "step": 9774 }, { "epoch": 1.3410852713178294, "grad_norm": 1.265625, "learning_rate": 1.1733480642807365e-05, "loss": 0.4937, "step": 9775 }, { "epoch": 1.341222473760033, "grad_norm": 1.2734375, "learning_rate": 1.1732058996880386e-05, "loss": 0.4383, "step": 9776 }, { "epoch": 1.3413596762022364, "grad_norm": 1.21875, "learning_rate": 1.1730637314863567e-05, "loss": 0.4635, "step": 9777 }, { "epoch": 1.34149687864444, "grad_norm": 1.1484375, "learning_rate": 1.1729215596786527e-05, "loss": 0.4028, "step": 9778 }, { "epoch": 1.3416340810866434, "grad_norm": 1.3046875, "learning_rate": 1.172779384267889e-05, "loss": 0.5278, "step": 9779 }, { "epoch": 1.341771283528847, "grad_norm": 1.1328125, "learning_rate": 1.1726372052570283e-05, "loss": 0.3738, "step": 9780 }, { "epoch": 1.3419084859710502, "grad_norm": 1.2109375, "learning_rate": 1.1724950226490325e-05, "loss": 0.4557, "step": 9781 }, { "epoch": 1.3420456884132537, "grad_norm": 1.25, "learning_rate": 1.1723528364468645e-05, "loss": 0.4576, "step": 9782 }, { "epoch": 1.3421828908554572, "grad_norm": 1.203125, "learning_rate": 1.1722106466534872e-05, "loss": 0.4832, "step": 9783 }, { "epoch": 1.3423200932976607, "grad_norm": 1.203125, "learning_rate": 1.1720684532718632e-05, "loss": 0.4142, "step": 9784 }, { "epoch": 1.3424572957398642, "grad_norm": 1.25, "learning_rate": 1.171926256304955e-05, "loss": 0.4682, "step": 9785 }, { "epoch": 1.3425944981820677, "grad_norm": 1.1640625, "learning_rate": 1.1717840557557255e-05, "loss": 0.3754, "step": 9786 }, { "epoch": 1.3427317006242712, "grad_norm": 1.15625, "learning_rate": 1.171641851627138e-05, "loss": 0.413, "step": 9787 }, { "epoch": 1.3428689030664747, "grad_norm": 1.1796875, "learning_rate": 1.1714996439221552e-05, "loss": 0.4125, "step": 9788 }, { "epoch": 1.3430061055086782, "grad_norm": 1.2109375, "learning_rate": 1.1713574326437405e-05, "loss": 0.4575, "step": 9789 }, { "epoch": 1.3431433079508817, "grad_norm": 1.15625, "learning_rate": 1.1712152177948568e-05, "loss": 0.4229, "step": 9790 }, { "epoch": 1.343280510393085, "grad_norm": 1.2890625, "learning_rate": 1.1710729993784674e-05, "loss": 0.4326, "step": 9791 }, { "epoch": 1.3434177128352884, "grad_norm": 1.40625, "learning_rate": 1.1709307773975355e-05, "loss": 0.5376, "step": 9792 }, { "epoch": 1.343554915277492, "grad_norm": 1.375, "learning_rate": 1.1707885518550249e-05, "loss": 0.4845, "step": 9793 }, { "epoch": 1.3436921177196954, "grad_norm": 1.15625, "learning_rate": 1.1706463227538985e-05, "loss": 0.4084, "step": 9794 }, { "epoch": 1.343829320161899, "grad_norm": 1.2265625, "learning_rate": 1.1705040900971202e-05, "loss": 0.425, "step": 9795 }, { "epoch": 1.3439665226041024, "grad_norm": 1.1640625, "learning_rate": 1.1703618538876537e-05, "loss": 0.4176, "step": 9796 }, { "epoch": 1.3441037250463057, "grad_norm": 1.15625, "learning_rate": 1.1702196141284625e-05, "loss": 0.4023, "step": 9797 }, { "epoch": 1.3442409274885092, "grad_norm": 1.2890625, "learning_rate": 1.1700773708225104e-05, "loss": 0.488, "step": 9798 }, { "epoch": 1.3443781299307127, "grad_norm": 1.28125, "learning_rate": 1.1699351239727611e-05, "loss": 0.479, "step": 9799 }, { "epoch": 1.3445153323729162, "grad_norm": 1.28125, "learning_rate": 1.169792873582179e-05, "loss": 0.4868, "step": 9800 }, { "epoch": 1.3446525348151197, "grad_norm": 1.3046875, "learning_rate": 1.1696506196537275e-05, "loss": 0.4655, "step": 9801 }, { "epoch": 1.3447897372573232, "grad_norm": 1.2109375, "learning_rate": 1.1695083621903709e-05, "loss": 0.4361, "step": 9802 }, { "epoch": 1.3449269396995267, "grad_norm": 1.3046875, "learning_rate": 1.1693661011950735e-05, "loss": 0.4544, "step": 9803 }, { "epoch": 1.3450641421417302, "grad_norm": 1.1015625, "learning_rate": 1.169223836670799e-05, "loss": 0.2806, "step": 9804 }, { "epoch": 1.3452013445839337, "grad_norm": 1.25, "learning_rate": 1.1690815686205122e-05, "loss": 0.4612, "step": 9805 }, { "epoch": 1.3453385470261372, "grad_norm": 1.3203125, "learning_rate": 1.1689392970471772e-05, "loss": 0.4628, "step": 9806 }, { "epoch": 1.3454757494683405, "grad_norm": 1.2421875, "learning_rate": 1.1687970219537584e-05, "loss": 0.4734, "step": 9807 }, { "epoch": 1.345612951910544, "grad_norm": 1.15625, "learning_rate": 1.1686547433432205e-05, "loss": 0.3799, "step": 9808 }, { "epoch": 1.3457501543527475, "grad_norm": 1.296875, "learning_rate": 1.1685124612185278e-05, "loss": 0.5072, "step": 9809 }, { "epoch": 1.345887356794951, "grad_norm": 1.359375, "learning_rate": 1.1683701755826453e-05, "loss": 0.4811, "step": 9810 }, { "epoch": 1.3460245592371545, "grad_norm": 1.265625, "learning_rate": 1.1682278864385372e-05, "loss": 0.4495, "step": 9811 }, { "epoch": 1.346161761679358, "grad_norm": 1.1953125, "learning_rate": 1.1680855937891689e-05, "loss": 0.4047, "step": 9812 }, { "epoch": 1.3462989641215612, "grad_norm": 1.1484375, "learning_rate": 1.1679432976375049e-05, "loss": 0.4021, "step": 9813 }, { "epoch": 1.3464361665637647, "grad_norm": 1.15625, "learning_rate": 1.1678009979865102e-05, "loss": 0.4483, "step": 9814 }, { "epoch": 1.3465733690059682, "grad_norm": 1.2890625, "learning_rate": 1.1676586948391496e-05, "loss": 0.492, "step": 9815 }, { "epoch": 1.3467105714481717, "grad_norm": 1.1796875, "learning_rate": 1.1675163881983887e-05, "loss": 0.4389, "step": 9816 }, { "epoch": 1.3468477738903752, "grad_norm": 1.1875, "learning_rate": 1.1673740780671923e-05, "loss": 0.4243, "step": 9817 }, { "epoch": 1.3469849763325787, "grad_norm": 1.2265625, "learning_rate": 1.1672317644485257e-05, "loss": 0.444, "step": 9818 }, { "epoch": 1.3471221787747822, "grad_norm": 1.3203125, "learning_rate": 1.1670894473453537e-05, "loss": 0.5128, "step": 9819 }, { "epoch": 1.3472593812169857, "grad_norm": 1.2890625, "learning_rate": 1.1669471267606427e-05, "loss": 0.4645, "step": 9820 }, { "epoch": 1.3473965836591892, "grad_norm": 1.265625, "learning_rate": 1.1668048026973573e-05, "loss": 0.5135, "step": 9821 }, { "epoch": 1.3475337861013927, "grad_norm": 1.1875, "learning_rate": 1.1666624751584635e-05, "loss": 0.429, "step": 9822 }, { "epoch": 1.347670988543596, "grad_norm": 1.2109375, "learning_rate": 1.1665201441469267e-05, "loss": 0.4883, "step": 9823 }, { "epoch": 1.3478081909857995, "grad_norm": 1.2734375, "learning_rate": 1.1663778096657123e-05, "loss": 0.4613, "step": 9824 }, { "epoch": 1.347945393428003, "grad_norm": 1.2421875, "learning_rate": 1.1662354717177868e-05, "loss": 0.4557, "step": 9825 }, { "epoch": 1.3480825958702065, "grad_norm": 1.3046875, "learning_rate": 1.1660931303061153e-05, "loss": 0.4982, "step": 9826 }, { "epoch": 1.34821979831241, "grad_norm": 1.28125, "learning_rate": 1.1659507854336638e-05, "loss": 0.4677, "step": 9827 }, { "epoch": 1.3483570007546135, "grad_norm": 1.359375, "learning_rate": 1.1658084371033984e-05, "loss": 0.5284, "step": 9828 }, { "epoch": 1.3484942031968168, "grad_norm": 1.3125, "learning_rate": 1.1656660853182849e-05, "loss": 0.5275, "step": 9829 }, { "epoch": 1.3486314056390203, "grad_norm": 1.25, "learning_rate": 1.1655237300812898e-05, "loss": 0.4846, "step": 9830 }, { "epoch": 1.3487686080812238, "grad_norm": 1.2109375, "learning_rate": 1.165381371395379e-05, "loss": 0.439, "step": 9831 }, { "epoch": 1.3489058105234273, "grad_norm": 1.2578125, "learning_rate": 1.1652390092635187e-05, "loss": 0.4456, "step": 9832 }, { "epoch": 1.3490430129656308, "grad_norm": 1.0078125, "learning_rate": 1.1650966436886753e-05, "loss": 0.3254, "step": 9833 }, { "epoch": 1.3491802154078343, "grad_norm": 1.2265625, "learning_rate": 1.1649542746738153e-05, "loss": 0.4664, "step": 9834 }, { "epoch": 1.3493174178500378, "grad_norm": 1.2109375, "learning_rate": 1.164811902221905e-05, "loss": 0.4563, "step": 9835 }, { "epoch": 1.3494546202922413, "grad_norm": 1.2890625, "learning_rate": 1.1646695263359107e-05, "loss": 0.4871, "step": 9836 }, { "epoch": 1.3495918227344448, "grad_norm": 1.2890625, "learning_rate": 1.1645271470187995e-05, "loss": 0.4632, "step": 9837 }, { "epoch": 1.3497290251766483, "grad_norm": 1.2265625, "learning_rate": 1.1643847642735377e-05, "loss": 0.4203, "step": 9838 }, { "epoch": 1.3498662276188516, "grad_norm": 1.1875, "learning_rate": 1.1642423781030922e-05, "loss": 0.4358, "step": 9839 }, { "epoch": 1.350003430061055, "grad_norm": 1.328125, "learning_rate": 1.1640999885104298e-05, "loss": 0.4884, "step": 9840 }, { "epoch": 1.3501406325032586, "grad_norm": 1.1953125, "learning_rate": 1.1639575954985171e-05, "loss": 0.4782, "step": 9841 }, { "epoch": 1.350277834945462, "grad_norm": 1.1640625, "learning_rate": 1.1638151990703217e-05, "loss": 0.4063, "step": 9842 }, { "epoch": 1.3504150373876656, "grad_norm": 1.234375, "learning_rate": 1.1636727992288104e-05, "loss": 0.4173, "step": 9843 }, { "epoch": 1.350552239829869, "grad_norm": 1.265625, "learning_rate": 1.1635303959769497e-05, "loss": 0.4152, "step": 9844 }, { "epoch": 1.3506894422720723, "grad_norm": 1.1796875, "learning_rate": 1.1633879893177074e-05, "loss": 0.4441, "step": 9845 }, { "epoch": 1.3508266447142758, "grad_norm": 1.21875, "learning_rate": 1.1632455792540505e-05, "loss": 0.4708, "step": 9846 }, { "epoch": 1.3509638471564793, "grad_norm": 1.2109375, "learning_rate": 1.1631031657889463e-05, "loss": 0.4069, "step": 9847 }, { "epoch": 1.3511010495986828, "grad_norm": 1.265625, "learning_rate": 1.1629607489253622e-05, "loss": 0.4427, "step": 9848 }, { "epoch": 1.3512382520408863, "grad_norm": 1.1484375, "learning_rate": 1.1628183286662658e-05, "loss": 0.3815, "step": 9849 }, { "epoch": 1.3513754544830898, "grad_norm": 1.1796875, "learning_rate": 1.1626759050146244e-05, "loss": 0.4354, "step": 9850 }, { "epoch": 1.3515126569252933, "grad_norm": 1.2578125, "learning_rate": 1.1625334779734059e-05, "loss": 0.4496, "step": 9851 }, { "epoch": 1.3516498593674968, "grad_norm": 1.2265625, "learning_rate": 1.1623910475455778e-05, "loss": 0.4475, "step": 9852 }, { "epoch": 1.3517870618097003, "grad_norm": 1.1953125, "learning_rate": 1.1622486137341076e-05, "loss": 0.4415, "step": 9853 }, { "epoch": 1.3519242642519038, "grad_norm": 1.265625, "learning_rate": 1.1621061765419635e-05, "loss": 0.4343, "step": 9854 }, { "epoch": 1.352061466694107, "grad_norm": 1.2734375, "learning_rate": 1.161963735972113e-05, "loss": 0.4732, "step": 9855 }, { "epoch": 1.3521986691363106, "grad_norm": 1.328125, "learning_rate": 1.1618212920275242e-05, "loss": 0.5203, "step": 9856 }, { "epoch": 1.352335871578514, "grad_norm": 1.2265625, "learning_rate": 1.1616788447111653e-05, "loss": 0.4513, "step": 9857 }, { "epoch": 1.3524730740207176, "grad_norm": 1.296875, "learning_rate": 1.1615363940260042e-05, "loss": 0.4584, "step": 9858 }, { "epoch": 1.352610276462921, "grad_norm": 1.21875, "learning_rate": 1.1613939399750092e-05, "loss": 0.4623, "step": 9859 }, { "epoch": 1.3527474789051246, "grad_norm": 1.25, "learning_rate": 1.161251482561148e-05, "loss": 0.4574, "step": 9860 }, { "epoch": 1.3528846813473279, "grad_norm": 1.2421875, "learning_rate": 1.1611090217873897e-05, "loss": 0.4867, "step": 9861 }, { "epoch": 1.3530218837895314, "grad_norm": 1.3046875, "learning_rate": 1.1609665576567024e-05, "loss": 0.5002, "step": 9862 }, { "epoch": 1.3531590862317349, "grad_norm": 1.2734375, "learning_rate": 1.1608240901720541e-05, "loss": 0.5119, "step": 9863 }, { "epoch": 1.3532962886739384, "grad_norm": 1.1875, "learning_rate": 1.1606816193364138e-05, "loss": 0.3975, "step": 9864 }, { "epoch": 1.3534334911161419, "grad_norm": 1.1484375, "learning_rate": 1.1605391451527499e-05, "loss": 0.3858, "step": 9865 }, { "epoch": 1.3535706935583454, "grad_norm": 1.21875, "learning_rate": 1.160396667624031e-05, "loss": 0.4345, "step": 9866 }, { "epoch": 1.3537078960005489, "grad_norm": 1.3046875, "learning_rate": 1.1602541867532258e-05, "loss": 0.5029, "step": 9867 }, { "epoch": 1.3538450984427524, "grad_norm": 1.3125, "learning_rate": 1.1601117025433033e-05, "loss": 0.4562, "step": 9868 }, { "epoch": 1.3539823008849559, "grad_norm": 1.15625, "learning_rate": 1.159969214997232e-05, "loss": 0.4076, "step": 9869 }, { "epoch": 1.3541195033271594, "grad_norm": 1.1875, "learning_rate": 1.159826724117981e-05, "loss": 0.5023, "step": 9870 }, { "epoch": 1.3542567057693626, "grad_norm": 1.2109375, "learning_rate": 1.1596842299085198e-05, "loss": 0.4733, "step": 9871 }, { "epoch": 1.3543939082115661, "grad_norm": 1.21875, "learning_rate": 1.1595417323718168e-05, "loss": 0.4344, "step": 9872 }, { "epoch": 1.3545311106537696, "grad_norm": 1.2734375, "learning_rate": 1.159399231510841e-05, "loss": 0.4037, "step": 9873 }, { "epoch": 1.3546683130959731, "grad_norm": 1.1953125, "learning_rate": 1.1592567273285622e-05, "loss": 0.4392, "step": 9874 }, { "epoch": 1.3548055155381766, "grad_norm": 1.203125, "learning_rate": 1.1591142198279493e-05, "loss": 0.437, "step": 9875 }, { "epoch": 1.3549427179803801, "grad_norm": 1.2265625, "learning_rate": 1.1589717090119716e-05, "loss": 0.4127, "step": 9876 }, { "epoch": 1.3550799204225834, "grad_norm": 1.125, "learning_rate": 1.158829194883599e-05, "loss": 0.3801, "step": 9877 }, { "epoch": 1.355217122864787, "grad_norm": 1.15625, "learning_rate": 1.1586866774458003e-05, "loss": 0.4114, "step": 9878 }, { "epoch": 1.3553543253069904, "grad_norm": 1.34375, "learning_rate": 1.1585441567015459e-05, "loss": 0.486, "step": 9879 }, { "epoch": 1.355491527749194, "grad_norm": 1.28125, "learning_rate": 1.1584016326538043e-05, "loss": 0.4368, "step": 9880 }, { "epoch": 1.3556287301913974, "grad_norm": 1.234375, "learning_rate": 1.158259105305546e-05, "loss": 0.4286, "step": 9881 }, { "epoch": 1.355765932633601, "grad_norm": 1.203125, "learning_rate": 1.1581165746597404e-05, "loss": 0.4283, "step": 9882 }, { "epoch": 1.3559031350758044, "grad_norm": 1.1640625, "learning_rate": 1.1579740407193578e-05, "loss": 0.4387, "step": 9883 }, { "epoch": 1.356040337518008, "grad_norm": 1.3515625, "learning_rate": 1.1578315034873675e-05, "loss": 0.4625, "step": 9884 }, { "epoch": 1.3561775399602114, "grad_norm": 1.2265625, "learning_rate": 1.1576889629667396e-05, "loss": 0.4436, "step": 9885 }, { "epoch": 1.356314742402415, "grad_norm": 1.2109375, "learning_rate": 1.1575464191604444e-05, "loss": 0.4241, "step": 9886 }, { "epoch": 1.3564519448446182, "grad_norm": 1.1796875, "learning_rate": 1.1574038720714518e-05, "loss": 0.4116, "step": 9887 }, { "epoch": 1.3565891472868217, "grad_norm": 1.3046875, "learning_rate": 1.1572613217027323e-05, "loss": 0.486, "step": 9888 }, { "epoch": 1.3567263497290252, "grad_norm": 1.2265625, "learning_rate": 1.1571187680572555e-05, "loss": 0.4436, "step": 9889 }, { "epoch": 1.3568635521712287, "grad_norm": 1.2734375, "learning_rate": 1.1569762111379921e-05, "loss": 0.4446, "step": 9890 }, { "epoch": 1.3570007546134322, "grad_norm": 1.2734375, "learning_rate": 1.1568336509479123e-05, "loss": 0.4945, "step": 9891 }, { "epoch": 1.3571379570556357, "grad_norm": 1.3203125, "learning_rate": 1.1566910874899865e-05, "loss": 0.4589, "step": 9892 }, { "epoch": 1.357275159497839, "grad_norm": 1.28125, "learning_rate": 1.1565485207671859e-05, "loss": 0.4869, "step": 9893 }, { "epoch": 1.3574123619400424, "grad_norm": 1.25, "learning_rate": 1.1564059507824801e-05, "loss": 0.4505, "step": 9894 }, { "epoch": 1.357549564382246, "grad_norm": 1.15625, "learning_rate": 1.15626337753884e-05, "loss": 0.3642, "step": 9895 }, { "epoch": 1.3576867668244494, "grad_norm": 1.3359375, "learning_rate": 1.1561208010392368e-05, "loss": 0.4779, "step": 9896 }, { "epoch": 1.357823969266653, "grad_norm": 1.265625, "learning_rate": 1.1559782212866413e-05, "loss": 0.5014, "step": 9897 }, { "epoch": 1.3579611717088564, "grad_norm": 1.3125, "learning_rate": 1.1558356382840234e-05, "loss": 0.5669, "step": 9898 }, { "epoch": 1.35809837415106, "grad_norm": 1.28125, "learning_rate": 1.1556930520343547e-05, "loss": 0.4652, "step": 9899 }, { "epoch": 1.3582355765932634, "grad_norm": 1.203125, "learning_rate": 1.1555504625406064e-05, "loss": 0.4216, "step": 9900 }, { "epoch": 1.358372779035467, "grad_norm": 1.21875, "learning_rate": 1.1554078698057489e-05, "loss": 0.429, "step": 9901 }, { "epoch": 1.3585099814776704, "grad_norm": 1.2109375, "learning_rate": 1.1552652738327538e-05, "loss": 0.4307, "step": 9902 }, { "epoch": 1.3586471839198737, "grad_norm": 1.21875, "learning_rate": 1.1551226746245923e-05, "loss": 0.4338, "step": 9903 }, { "epoch": 1.3587843863620772, "grad_norm": 1.1328125, "learning_rate": 1.1549800721842353e-05, "loss": 0.3672, "step": 9904 }, { "epoch": 1.3589215888042807, "grad_norm": 1.25, "learning_rate": 1.1548374665146547e-05, "loss": 0.4848, "step": 9905 }, { "epoch": 1.3590587912464842, "grad_norm": 1.3203125, "learning_rate": 1.154694857618821e-05, "loss": 0.4615, "step": 9906 }, { "epoch": 1.3591959936886877, "grad_norm": 1.1015625, "learning_rate": 1.1545522454997065e-05, "loss": 0.3976, "step": 9907 }, { "epoch": 1.3593331961308912, "grad_norm": 1.2578125, "learning_rate": 1.154409630160282e-05, "loss": 0.4479, "step": 9908 }, { "epoch": 1.3594703985730945, "grad_norm": 1.171875, "learning_rate": 1.15426701160352e-05, "loss": 0.4283, "step": 9909 }, { "epoch": 1.359607601015298, "grad_norm": 1.3046875, "learning_rate": 1.1541243898323913e-05, "loss": 0.4468, "step": 9910 }, { "epoch": 1.3597448034575015, "grad_norm": 1.234375, "learning_rate": 1.153981764849868e-05, "loss": 0.4316, "step": 9911 }, { "epoch": 1.359882005899705, "grad_norm": 1.203125, "learning_rate": 1.1538391366589219e-05, "loss": 0.4233, "step": 9912 }, { "epoch": 1.3600192083419085, "grad_norm": 1.1171875, "learning_rate": 1.1536965052625246e-05, "loss": 0.3985, "step": 9913 }, { "epoch": 1.360156410784112, "grad_norm": 1.1796875, "learning_rate": 1.1535538706636483e-05, "loss": 0.3892, "step": 9914 }, { "epoch": 1.3602936132263155, "grad_norm": 1.1875, "learning_rate": 1.1534112328652649e-05, "loss": 0.4448, "step": 9915 }, { "epoch": 1.360430815668519, "grad_norm": 1.328125, "learning_rate": 1.1532685918703466e-05, "loss": 0.4411, "step": 9916 }, { "epoch": 1.3605680181107225, "grad_norm": 1.2578125, "learning_rate": 1.1531259476818653e-05, "loss": 0.4481, "step": 9917 }, { "epoch": 1.360705220552926, "grad_norm": 1.296875, "learning_rate": 1.152983300302793e-05, "loss": 0.4396, "step": 9918 }, { "epoch": 1.3608424229951293, "grad_norm": 1.1953125, "learning_rate": 1.1528406497361024e-05, "loss": 0.3967, "step": 9919 }, { "epoch": 1.3609796254373328, "grad_norm": 1.25, "learning_rate": 1.1526979959847657e-05, "loss": 0.4596, "step": 9920 }, { "epoch": 1.3611168278795363, "grad_norm": 1.2421875, "learning_rate": 1.1525553390517552e-05, "loss": 0.4781, "step": 9921 }, { "epoch": 1.3612540303217397, "grad_norm": 1.28125, "learning_rate": 1.1524126789400434e-05, "loss": 0.4835, "step": 9922 }, { "epoch": 1.3613912327639432, "grad_norm": 1.2890625, "learning_rate": 1.1522700156526028e-05, "loss": 0.4813, "step": 9923 }, { "epoch": 1.3615284352061467, "grad_norm": 1.3359375, "learning_rate": 1.1521273491924061e-05, "loss": 0.4301, "step": 9924 }, { "epoch": 1.36166563764835, "grad_norm": 1.40625, "learning_rate": 1.1519846795624256e-05, "loss": 0.4851, "step": 9925 }, { "epoch": 1.3618028400905535, "grad_norm": 1.25, "learning_rate": 1.1518420067656344e-05, "loss": 0.4596, "step": 9926 }, { "epoch": 1.361940042532757, "grad_norm": 1.171875, "learning_rate": 1.1516993308050053e-05, "loss": 0.4416, "step": 9927 }, { "epoch": 1.3620772449749605, "grad_norm": 1.21875, "learning_rate": 1.1515566516835105e-05, "loss": 0.4196, "step": 9928 }, { "epoch": 1.362214447417164, "grad_norm": 1.2265625, "learning_rate": 1.151413969404124e-05, "loss": 0.4506, "step": 9929 }, { "epoch": 1.3623516498593675, "grad_norm": 1.1796875, "learning_rate": 1.151271283969818e-05, "loss": 0.3834, "step": 9930 }, { "epoch": 1.362488852301571, "grad_norm": 1.171875, "learning_rate": 1.1511285953835655e-05, "loss": 0.382, "step": 9931 }, { "epoch": 1.3626260547437745, "grad_norm": 1.3828125, "learning_rate": 1.1509859036483402e-05, "loss": 0.483, "step": 9932 }, { "epoch": 1.362763257185978, "grad_norm": 1.1953125, "learning_rate": 1.1508432087671148e-05, "loss": 0.42, "step": 9933 }, { "epoch": 1.3629004596281815, "grad_norm": 1.453125, "learning_rate": 1.1507005107428628e-05, "loss": 0.4698, "step": 9934 }, { "epoch": 1.3630376620703848, "grad_norm": 1.3671875, "learning_rate": 1.1505578095785573e-05, "loss": 0.4528, "step": 9935 }, { "epoch": 1.3631748645125883, "grad_norm": 1.265625, "learning_rate": 1.1504151052771716e-05, "loss": 0.4227, "step": 9936 }, { "epoch": 1.3633120669547918, "grad_norm": 1.3125, "learning_rate": 1.1502723978416794e-05, "loss": 0.5265, "step": 9937 }, { "epoch": 1.3634492693969953, "grad_norm": 1.2890625, "learning_rate": 1.1501296872750543e-05, "loss": 0.4647, "step": 9938 }, { "epoch": 1.3635864718391988, "grad_norm": 1.1328125, "learning_rate": 1.1499869735802695e-05, "loss": 0.4246, "step": 9939 }, { "epoch": 1.3637236742814023, "grad_norm": 1.265625, "learning_rate": 1.1498442567602988e-05, "loss": 0.4499, "step": 9940 }, { "epoch": 1.3638608767236056, "grad_norm": 1.2109375, "learning_rate": 1.1497015368181161e-05, "loss": 0.4565, "step": 9941 }, { "epoch": 1.363998079165809, "grad_norm": 1.203125, "learning_rate": 1.149558813756695e-05, "loss": 0.4034, "step": 9942 }, { "epoch": 1.3641352816080126, "grad_norm": 1.2890625, "learning_rate": 1.1494160875790092e-05, "loss": 0.4763, "step": 9943 }, { "epoch": 1.364272484050216, "grad_norm": 1.234375, "learning_rate": 1.1492733582880328e-05, "loss": 0.455, "step": 9944 }, { "epoch": 1.3644096864924196, "grad_norm": 1.1875, "learning_rate": 1.1491306258867396e-05, "loss": 0.4303, "step": 9945 }, { "epoch": 1.364546888934623, "grad_norm": 1.3359375, "learning_rate": 1.1489878903781037e-05, "loss": 0.4585, "step": 9946 }, { "epoch": 1.3646840913768266, "grad_norm": 1.2265625, "learning_rate": 1.1488451517650994e-05, "loss": 0.4516, "step": 9947 }, { "epoch": 1.36482129381903, "grad_norm": 1.296875, "learning_rate": 1.1487024100507008e-05, "loss": 0.4873, "step": 9948 }, { "epoch": 1.3649584962612336, "grad_norm": 1.1953125, "learning_rate": 1.1485596652378814e-05, "loss": 0.4267, "step": 9949 }, { "epoch": 1.365095698703437, "grad_norm": 1.2734375, "learning_rate": 1.1484169173296168e-05, "loss": 0.4788, "step": 9950 }, { "epoch": 1.3652329011456403, "grad_norm": 1.3046875, "learning_rate": 1.14827416632888e-05, "loss": 0.4816, "step": 9951 }, { "epoch": 1.3653701035878438, "grad_norm": 1.203125, "learning_rate": 1.1481314122386464e-05, "loss": 0.4393, "step": 9952 }, { "epoch": 1.3655073060300473, "grad_norm": 1.2578125, "learning_rate": 1.1479886550618899e-05, "loss": 0.4848, "step": 9953 }, { "epoch": 1.3656445084722508, "grad_norm": 1.078125, "learning_rate": 1.1478458948015853e-05, "loss": 0.3455, "step": 9954 }, { "epoch": 1.3657817109144543, "grad_norm": 1.2265625, "learning_rate": 1.1477031314607073e-05, "loss": 0.4412, "step": 9955 }, { "epoch": 1.3659189133566578, "grad_norm": 1.1640625, "learning_rate": 1.1475603650422303e-05, "loss": 0.4166, "step": 9956 }, { "epoch": 1.366056115798861, "grad_norm": 1.1796875, "learning_rate": 1.147417595549129e-05, "loss": 0.3994, "step": 9957 }, { "epoch": 1.3661933182410646, "grad_norm": 1.2109375, "learning_rate": 1.1472748229843789e-05, "loss": 0.4331, "step": 9958 }, { "epoch": 1.366330520683268, "grad_norm": 1.2734375, "learning_rate": 1.1471320473509542e-05, "loss": 0.4706, "step": 9959 }, { "epoch": 1.3664677231254716, "grad_norm": 1.34375, "learning_rate": 1.1469892686518296e-05, "loss": 0.483, "step": 9960 }, { "epoch": 1.366604925567675, "grad_norm": 1.3125, "learning_rate": 1.1468464868899806e-05, "loss": 0.4064, "step": 9961 }, { "epoch": 1.3667421280098786, "grad_norm": 1.1640625, "learning_rate": 1.1467037020683823e-05, "loss": 0.3843, "step": 9962 }, { "epoch": 1.366879330452082, "grad_norm": 1.203125, "learning_rate": 1.1465609141900094e-05, "loss": 0.4316, "step": 9963 }, { "epoch": 1.3670165328942856, "grad_norm": 1.2890625, "learning_rate": 1.1464181232578374e-05, "loss": 0.4555, "step": 9964 }, { "epoch": 1.367153735336489, "grad_norm": 1.1953125, "learning_rate": 1.1462753292748415e-05, "loss": 0.4271, "step": 9965 }, { "epoch": 1.3672909377786926, "grad_norm": 1.28125, "learning_rate": 1.1461325322439968e-05, "loss": 0.4888, "step": 9966 }, { "epoch": 1.3674281402208959, "grad_norm": 1.2578125, "learning_rate": 1.145989732168279e-05, "loss": 0.4761, "step": 9967 }, { "epoch": 1.3675653426630994, "grad_norm": 1.2109375, "learning_rate": 1.1458469290506633e-05, "loss": 0.4352, "step": 9968 }, { "epoch": 1.3677025451053029, "grad_norm": 1.21875, "learning_rate": 1.1457041228941255e-05, "loss": 0.4528, "step": 9969 }, { "epoch": 1.3678397475475064, "grad_norm": 1.25, "learning_rate": 1.1455613137016408e-05, "loss": 0.4867, "step": 9970 }, { "epoch": 1.3679769499897099, "grad_norm": 1.28125, "learning_rate": 1.145418501476185e-05, "loss": 0.4993, "step": 9971 }, { "epoch": 1.3681141524319134, "grad_norm": 1.3046875, "learning_rate": 1.1452756862207335e-05, "loss": 0.4672, "step": 9972 }, { "epoch": 1.3682513548741166, "grad_norm": 1.1875, "learning_rate": 1.1451328679382625e-05, "loss": 0.4104, "step": 9973 }, { "epoch": 1.3683885573163201, "grad_norm": 1.0, "learning_rate": 1.1449900466317476e-05, "loss": 0.2667, "step": 9974 }, { "epoch": 1.3685257597585236, "grad_norm": 1.25, "learning_rate": 1.1448472223041646e-05, "loss": 0.429, "step": 9975 }, { "epoch": 1.3686629622007271, "grad_norm": 1.359375, "learning_rate": 1.1447043949584896e-05, "loss": 0.4458, "step": 9976 }, { "epoch": 1.3688001646429306, "grad_norm": 1.3671875, "learning_rate": 1.1445615645976989e-05, "loss": 0.502, "step": 9977 }, { "epoch": 1.3689373670851341, "grad_norm": 1.1484375, "learning_rate": 1.1444187312247679e-05, "loss": 0.392, "step": 9978 }, { "epoch": 1.3690745695273376, "grad_norm": 1.2578125, "learning_rate": 1.1442758948426731e-05, "loss": 0.4434, "step": 9979 }, { "epoch": 1.3692117719695411, "grad_norm": 1.1796875, "learning_rate": 1.1441330554543908e-05, "loss": 0.4381, "step": 9980 }, { "epoch": 1.3693489744117446, "grad_norm": 1.2578125, "learning_rate": 1.1439902130628968e-05, "loss": 0.4431, "step": 9981 }, { "epoch": 1.3694861768539481, "grad_norm": 1.21875, "learning_rate": 1.143847367671168e-05, "loss": 0.4553, "step": 9982 }, { "epoch": 1.3696233792961514, "grad_norm": 1.21875, "learning_rate": 1.1437045192821803e-05, "loss": 0.4241, "step": 9983 }, { "epoch": 1.369760581738355, "grad_norm": 1.3046875, "learning_rate": 1.1435616678989103e-05, "loss": 0.4701, "step": 9984 }, { "epoch": 1.3698977841805584, "grad_norm": 1.2421875, "learning_rate": 1.1434188135243347e-05, "loss": 0.5107, "step": 9985 }, { "epoch": 1.370034986622762, "grad_norm": 1.1640625, "learning_rate": 1.1432759561614299e-05, "loss": 0.4184, "step": 9986 }, { "epoch": 1.3701721890649654, "grad_norm": 1.2890625, "learning_rate": 1.1431330958131727e-05, "loss": 0.4322, "step": 9987 }, { "epoch": 1.370309391507169, "grad_norm": 1.34375, "learning_rate": 1.1429902324825395e-05, "loss": 0.5061, "step": 9988 }, { "epoch": 1.3704465939493722, "grad_norm": 1.2109375, "learning_rate": 1.1428473661725073e-05, "loss": 0.436, "step": 9989 }, { "epoch": 1.3705837963915757, "grad_norm": 1.015625, "learning_rate": 1.1427044968860525e-05, "loss": 0.2965, "step": 9990 }, { "epoch": 1.3707209988337792, "grad_norm": 1.2421875, "learning_rate": 1.1425616246261524e-05, "loss": 0.474, "step": 9991 }, { "epoch": 1.3708582012759827, "grad_norm": 1.1640625, "learning_rate": 1.1424187493957838e-05, "loss": 0.3793, "step": 9992 }, { "epoch": 1.3709954037181862, "grad_norm": 1.2109375, "learning_rate": 1.1422758711979241e-05, "loss": 0.3469, "step": 9993 }, { "epoch": 1.3711326061603897, "grad_norm": 1.125, "learning_rate": 1.1421329900355497e-05, "loss": 0.3735, "step": 9994 }, { "epoch": 1.3712698086025932, "grad_norm": 1.21875, "learning_rate": 1.1419901059116383e-05, "loss": 0.4879, "step": 9995 }, { "epoch": 1.3714070110447967, "grad_norm": 1.1015625, "learning_rate": 1.1418472188291664e-05, "loss": 0.395, "step": 9996 }, { "epoch": 1.3715442134870002, "grad_norm": 1.265625, "learning_rate": 1.141704328791112e-05, "loss": 0.4653, "step": 9997 }, { "epoch": 1.3716814159292037, "grad_norm": 1.1484375, "learning_rate": 1.141561435800452e-05, "loss": 0.4446, "step": 9998 }, { "epoch": 1.371818618371407, "grad_norm": 1.1796875, "learning_rate": 1.1414185398601639e-05, "loss": 0.4057, "step": 9999 }, { "epoch": 1.3719558208136104, "grad_norm": 1.34375, "learning_rate": 1.1412756409732249e-05, "loss": 0.4882, "step": 10000 }, { "epoch": 1.372093023255814, "grad_norm": 1.2890625, "learning_rate": 1.1411327391426128e-05, "loss": 0.4651, "step": 10001 }, { "epoch": 1.3722302256980174, "grad_norm": 1.265625, "learning_rate": 1.1409898343713048e-05, "loss": 0.4946, "step": 10002 }, { "epoch": 1.372367428140221, "grad_norm": 1.1875, "learning_rate": 1.140846926662279e-05, "loss": 0.428, "step": 10003 }, { "epoch": 1.3725046305824244, "grad_norm": 1.3046875, "learning_rate": 1.1407040160185132e-05, "loss": 0.5082, "step": 10004 }, { "epoch": 1.3726418330246277, "grad_norm": 1.2890625, "learning_rate": 1.1405611024429843e-05, "loss": 0.4756, "step": 10005 }, { "epoch": 1.3727790354668312, "grad_norm": 1.203125, "learning_rate": 1.1404181859386707e-05, "loss": 0.4301, "step": 10006 }, { "epoch": 1.3729162379090347, "grad_norm": 1.2578125, "learning_rate": 1.1402752665085502e-05, "loss": 0.4758, "step": 10007 }, { "epoch": 1.3730534403512382, "grad_norm": 1.296875, "learning_rate": 1.1401323441556005e-05, "loss": 0.4585, "step": 10008 }, { "epoch": 1.3731906427934417, "grad_norm": 1.2109375, "learning_rate": 1.1399894188827997e-05, "loss": 0.4332, "step": 10009 }, { "epoch": 1.3733278452356452, "grad_norm": 1.2890625, "learning_rate": 1.139846490693126e-05, "loss": 0.4752, "step": 10010 }, { "epoch": 1.3734650476778487, "grad_norm": 1.3359375, "learning_rate": 1.1397035595895576e-05, "loss": 0.4906, "step": 10011 }, { "epoch": 1.3736022501200522, "grad_norm": 1.21875, "learning_rate": 1.1395606255750722e-05, "loss": 0.4176, "step": 10012 }, { "epoch": 1.3737394525622557, "grad_norm": 1.28125, "learning_rate": 1.1394176886526486e-05, "loss": 0.4437, "step": 10013 }, { "epoch": 1.3738766550044592, "grad_norm": 1.15625, "learning_rate": 1.1392747488252648e-05, "loss": 0.4373, "step": 10014 }, { "epoch": 1.3740138574466625, "grad_norm": 1.359375, "learning_rate": 1.1391318060958987e-05, "loss": 0.4922, "step": 10015 }, { "epoch": 1.374151059888866, "grad_norm": 1.15625, "learning_rate": 1.1389888604675294e-05, "loss": 0.3775, "step": 10016 }, { "epoch": 1.3742882623310695, "grad_norm": 1.2265625, "learning_rate": 1.138845911943135e-05, "loss": 0.4359, "step": 10017 }, { "epoch": 1.374425464773273, "grad_norm": 1.390625, "learning_rate": 1.1387029605256942e-05, "loss": 0.4865, "step": 10018 }, { "epoch": 1.3745626672154765, "grad_norm": 1.1640625, "learning_rate": 1.1385600062181856e-05, "loss": 0.4281, "step": 10019 }, { "epoch": 1.37469986965768, "grad_norm": 1.21875, "learning_rate": 1.1384170490235878e-05, "loss": 0.4406, "step": 10020 }, { "epoch": 1.3748370720998833, "grad_norm": 1.0625, "learning_rate": 1.1382740889448796e-05, "loss": 0.3797, "step": 10021 }, { "epoch": 1.3749742745420868, "grad_norm": 1.3125, "learning_rate": 1.1381311259850393e-05, "loss": 0.44, "step": 10022 }, { "epoch": 1.3751114769842903, "grad_norm": 1.2265625, "learning_rate": 1.1379881601470463e-05, "loss": 0.4662, "step": 10023 }, { "epoch": 1.3752486794264938, "grad_norm": 1.2265625, "learning_rate": 1.1378451914338796e-05, "loss": 0.4373, "step": 10024 }, { "epoch": 1.3753858818686973, "grad_norm": 1.203125, "learning_rate": 1.1377022198485174e-05, "loss": 0.4673, "step": 10025 }, { "epoch": 1.3755230843109008, "grad_norm": 1.15625, "learning_rate": 1.1375592453939393e-05, "loss": 0.4029, "step": 10026 }, { "epoch": 1.3756602867531043, "grad_norm": 1.46875, "learning_rate": 1.1374162680731245e-05, "loss": 0.4981, "step": 10027 }, { "epoch": 1.3757974891953078, "grad_norm": 1.1484375, "learning_rate": 1.1372732878890513e-05, "loss": 0.4077, "step": 10028 }, { "epoch": 1.3759346916375113, "grad_norm": 1.328125, "learning_rate": 1.1371303048446997e-05, "loss": 0.4701, "step": 10029 }, { "epoch": 1.3760718940797148, "grad_norm": 1.3203125, "learning_rate": 1.1369873189430488e-05, "loss": 0.4557, "step": 10030 }, { "epoch": 1.376209096521918, "grad_norm": 1.1875, "learning_rate": 1.1368443301870778e-05, "loss": 0.412, "step": 10031 }, { "epoch": 1.3763462989641215, "grad_norm": 1.21875, "learning_rate": 1.136701338579766e-05, "loss": 0.4019, "step": 10032 }, { "epoch": 1.376483501406325, "grad_norm": 1.1328125, "learning_rate": 1.1365583441240928e-05, "loss": 0.3952, "step": 10033 }, { "epoch": 1.3766207038485285, "grad_norm": 1.234375, "learning_rate": 1.1364153468230379e-05, "loss": 0.4757, "step": 10034 }, { "epoch": 1.376757906290732, "grad_norm": 1.3671875, "learning_rate": 1.1362723466795807e-05, "loss": 0.4958, "step": 10035 }, { "epoch": 1.3768951087329355, "grad_norm": 1.3046875, "learning_rate": 1.1361293436967006e-05, "loss": 0.4222, "step": 10036 }, { "epoch": 1.3770323111751388, "grad_norm": 1.234375, "learning_rate": 1.1359863378773776e-05, "loss": 0.438, "step": 10037 }, { "epoch": 1.3771695136173423, "grad_norm": 1.140625, "learning_rate": 1.1358433292245916e-05, "loss": 0.3797, "step": 10038 }, { "epoch": 1.3773067160595458, "grad_norm": 1.203125, "learning_rate": 1.1357003177413219e-05, "loss": 0.4612, "step": 10039 }, { "epoch": 1.3774439185017493, "grad_norm": 1.109375, "learning_rate": 1.1355573034305488e-05, "loss": 0.3976, "step": 10040 }, { "epoch": 1.3775811209439528, "grad_norm": 1.359375, "learning_rate": 1.1354142862952515e-05, "loss": 0.4585, "step": 10041 }, { "epoch": 1.3777183233861563, "grad_norm": 1.2734375, "learning_rate": 1.1352712663384107e-05, "loss": 0.4744, "step": 10042 }, { "epoch": 1.3778555258283598, "grad_norm": 1.4375, "learning_rate": 1.135128243563006e-05, "loss": 0.4191, "step": 10043 }, { "epoch": 1.3779927282705633, "grad_norm": 1.3203125, "learning_rate": 1.1349852179720178e-05, "loss": 0.4396, "step": 10044 }, { "epoch": 1.3781299307127668, "grad_norm": 1.3359375, "learning_rate": 1.1348421895684259e-05, "loss": 0.4984, "step": 10045 }, { "epoch": 1.3782671331549703, "grad_norm": 1.25, "learning_rate": 1.1346991583552104e-05, "loss": 0.4505, "step": 10046 }, { "epoch": 1.3784043355971736, "grad_norm": 1.1484375, "learning_rate": 1.134556124335352e-05, "loss": 0.4038, "step": 10047 }, { "epoch": 1.378541538039377, "grad_norm": 1.15625, "learning_rate": 1.1344130875118309e-05, "loss": 0.4213, "step": 10048 }, { "epoch": 1.3786787404815806, "grad_norm": 1.1640625, "learning_rate": 1.1342700478876274e-05, "loss": 0.4053, "step": 10049 }, { "epoch": 1.378815942923784, "grad_norm": 1.3203125, "learning_rate": 1.1341270054657216e-05, "loss": 0.4945, "step": 10050 }, { "epoch": 1.3789531453659876, "grad_norm": 1.25, "learning_rate": 1.1339839602490945e-05, "loss": 0.4589, "step": 10051 }, { "epoch": 1.379090347808191, "grad_norm": 1.328125, "learning_rate": 1.1338409122407263e-05, "loss": 0.4757, "step": 10052 }, { "epoch": 1.3792275502503943, "grad_norm": 1.265625, "learning_rate": 1.1336978614435979e-05, "loss": 0.4492, "step": 10053 }, { "epoch": 1.3793647526925978, "grad_norm": 1.234375, "learning_rate": 1.1335548078606897e-05, "loss": 0.448, "step": 10054 }, { "epoch": 1.3795019551348013, "grad_norm": 1.3671875, "learning_rate": 1.1334117514949825e-05, "loss": 0.4339, "step": 10055 }, { "epoch": 1.3796391575770048, "grad_norm": 1.2578125, "learning_rate": 1.1332686923494572e-05, "loss": 0.4618, "step": 10056 }, { "epoch": 1.3797763600192083, "grad_norm": 1.4296875, "learning_rate": 1.1331256304270944e-05, "loss": 0.5228, "step": 10057 }, { "epoch": 1.3799135624614118, "grad_norm": 1.28125, "learning_rate": 1.1329825657308753e-05, "loss": 0.4226, "step": 10058 }, { "epoch": 1.3800507649036153, "grad_norm": 1.2734375, "learning_rate": 1.1328394982637803e-05, "loss": 0.4242, "step": 10059 }, { "epoch": 1.3801879673458188, "grad_norm": 1.4296875, "learning_rate": 1.1326964280287912e-05, "loss": 0.5113, "step": 10060 }, { "epoch": 1.3803251697880223, "grad_norm": 1.28125, "learning_rate": 1.1325533550288883e-05, "loss": 0.4584, "step": 10061 }, { "epoch": 1.3804623722302258, "grad_norm": 1.3046875, "learning_rate": 1.1324102792670531e-05, "loss": 0.4385, "step": 10062 }, { "epoch": 1.380599574672429, "grad_norm": 1.2421875, "learning_rate": 1.1322672007462668e-05, "loss": 0.45, "step": 10063 }, { "epoch": 1.3807367771146326, "grad_norm": 1.203125, "learning_rate": 1.1321241194695105e-05, "loss": 0.421, "step": 10064 }, { "epoch": 1.380873979556836, "grad_norm": 1.1796875, "learning_rate": 1.1319810354397657e-05, "loss": 0.4348, "step": 10065 }, { "epoch": 1.3810111819990396, "grad_norm": 1.3671875, "learning_rate": 1.1318379486600137e-05, "loss": 0.5299, "step": 10066 }, { "epoch": 1.381148384441243, "grad_norm": 1.1796875, "learning_rate": 1.131694859133236e-05, "loss": 0.4449, "step": 10067 }, { "epoch": 1.3812855868834466, "grad_norm": 1.1953125, "learning_rate": 1.1315517668624136e-05, "loss": 0.4015, "step": 10068 }, { "epoch": 1.3814227893256499, "grad_norm": 1.234375, "learning_rate": 1.1314086718505283e-05, "loss": 0.4693, "step": 10069 }, { "epoch": 1.3815599917678534, "grad_norm": 1.125, "learning_rate": 1.1312655741005618e-05, "loss": 0.395, "step": 10070 }, { "epoch": 1.3816971942100569, "grad_norm": 1.2734375, "learning_rate": 1.1311224736154957e-05, "loss": 0.4817, "step": 10071 }, { "epoch": 1.3818343966522604, "grad_norm": 1.171875, "learning_rate": 1.1309793703983117e-05, "loss": 0.4284, "step": 10072 }, { "epoch": 1.3819715990944639, "grad_norm": 1.140625, "learning_rate": 1.1308362644519915e-05, "loss": 0.3819, "step": 10073 }, { "epoch": 1.3821088015366674, "grad_norm": 1.140625, "learning_rate": 1.1306931557795167e-05, "loss": 0.4087, "step": 10074 }, { "epoch": 1.3822460039788709, "grad_norm": 1.2265625, "learning_rate": 1.1305500443838697e-05, "loss": 0.427, "step": 10075 }, { "epoch": 1.3823832064210744, "grad_norm": 1.2578125, "learning_rate": 1.1304069302680321e-05, "loss": 0.4658, "step": 10076 }, { "epoch": 1.3825204088632779, "grad_norm": 1.2421875, "learning_rate": 1.1302638134349857e-05, "loss": 0.4811, "step": 10077 }, { "epoch": 1.3826576113054814, "grad_norm": 1.2421875, "learning_rate": 1.1301206938877127e-05, "loss": 0.4636, "step": 10078 }, { "epoch": 1.3827948137476846, "grad_norm": 1.28125, "learning_rate": 1.1299775716291955e-05, "loss": 0.4661, "step": 10079 }, { "epoch": 1.3829320161898881, "grad_norm": 1.1328125, "learning_rate": 1.1298344466624156e-05, "loss": 0.4086, "step": 10080 }, { "epoch": 1.3830692186320916, "grad_norm": 1.1796875, "learning_rate": 1.1296913189903558e-05, "loss": 0.4046, "step": 10081 }, { "epoch": 1.3832064210742951, "grad_norm": 1.3515625, "learning_rate": 1.129548188615998e-05, "loss": 0.4828, "step": 10082 }, { "epoch": 1.3833436235164986, "grad_norm": 1.1484375, "learning_rate": 1.1294050555423248e-05, "loss": 0.3969, "step": 10083 }, { "epoch": 1.3834808259587021, "grad_norm": 1.3515625, "learning_rate": 1.1292619197723185e-05, "loss": 0.459, "step": 10084 }, { "epoch": 1.3836180284009054, "grad_norm": 1.21875, "learning_rate": 1.1291187813089615e-05, "loss": 0.4564, "step": 10085 }, { "epoch": 1.383755230843109, "grad_norm": 1.28125, "learning_rate": 1.1289756401552362e-05, "loss": 0.4682, "step": 10086 }, { "epoch": 1.3838924332853124, "grad_norm": 1.1953125, "learning_rate": 1.1288324963141251e-05, "loss": 0.4328, "step": 10087 }, { "epoch": 1.384029635727516, "grad_norm": 1.203125, "learning_rate": 1.1286893497886108e-05, "loss": 0.4361, "step": 10088 }, { "epoch": 1.3841668381697194, "grad_norm": 1.203125, "learning_rate": 1.1285462005816763e-05, "loss": 0.4175, "step": 10089 }, { "epoch": 1.384304040611923, "grad_norm": 1.3359375, "learning_rate": 1.1284030486963037e-05, "loss": 0.505, "step": 10090 }, { "epoch": 1.3844412430541264, "grad_norm": 1.203125, "learning_rate": 1.1282598941354766e-05, "loss": 0.4359, "step": 10091 }, { "epoch": 1.38457844549633, "grad_norm": 1.1796875, "learning_rate": 1.128116736902177e-05, "loss": 0.392, "step": 10092 }, { "epoch": 1.3847156479385334, "grad_norm": 1.2421875, "learning_rate": 1.1279735769993883e-05, "loss": 0.4537, "step": 10093 }, { "epoch": 1.384852850380737, "grad_norm": 1.25, "learning_rate": 1.1278304144300934e-05, "loss": 0.446, "step": 10094 }, { "epoch": 1.3849900528229402, "grad_norm": 1.1015625, "learning_rate": 1.1276872491972749e-05, "loss": 0.3205, "step": 10095 }, { "epoch": 1.3851272552651437, "grad_norm": 1.359375, "learning_rate": 1.127544081303916e-05, "loss": 0.4816, "step": 10096 }, { "epoch": 1.3852644577073472, "grad_norm": 1.171875, "learning_rate": 1.1274009107530001e-05, "loss": 0.3924, "step": 10097 }, { "epoch": 1.3854016601495507, "grad_norm": 1.3046875, "learning_rate": 1.1272577375475103e-05, "loss": 0.4737, "step": 10098 }, { "epoch": 1.3855388625917542, "grad_norm": 1.2890625, "learning_rate": 1.1271145616904293e-05, "loss": 0.4791, "step": 10099 }, { "epoch": 1.3856760650339577, "grad_norm": 1.296875, "learning_rate": 1.126971383184741e-05, "loss": 0.4726, "step": 10100 }, { "epoch": 1.385813267476161, "grad_norm": 1.1484375, "learning_rate": 1.1268282020334284e-05, "loss": 0.3894, "step": 10101 }, { "epoch": 1.3859504699183645, "grad_norm": 1.234375, "learning_rate": 1.1266850182394749e-05, "loss": 0.4716, "step": 10102 }, { "epoch": 1.386087672360568, "grad_norm": 1.2265625, "learning_rate": 1.1265418318058642e-05, "loss": 0.4589, "step": 10103 }, { "epoch": 1.3862248748027715, "grad_norm": 1.2265625, "learning_rate": 1.1263986427355794e-05, "loss": 0.4439, "step": 10104 }, { "epoch": 1.386362077244975, "grad_norm": 1.3515625, "learning_rate": 1.1262554510316041e-05, "loss": 0.5272, "step": 10105 }, { "epoch": 1.3864992796871785, "grad_norm": 1.2421875, "learning_rate": 1.126112256696922e-05, "loss": 0.4309, "step": 10106 }, { "epoch": 1.386636482129382, "grad_norm": 1.171875, "learning_rate": 1.1259690597345169e-05, "loss": 0.4111, "step": 10107 }, { "epoch": 1.3867736845715855, "grad_norm": 1.3125, "learning_rate": 1.1258258601473722e-05, "loss": 0.4631, "step": 10108 }, { "epoch": 1.386910887013789, "grad_norm": 1.3515625, "learning_rate": 1.1256826579384718e-05, "loss": 0.5019, "step": 10109 }, { "epoch": 1.3870480894559925, "grad_norm": 1.1640625, "learning_rate": 1.1255394531107997e-05, "loss": 0.3529, "step": 10110 }, { "epoch": 1.3871852918981957, "grad_norm": 1.1328125, "learning_rate": 1.1253962456673397e-05, "loss": 0.4085, "step": 10111 }, { "epoch": 1.3873224943403992, "grad_norm": 1.265625, "learning_rate": 1.1252530356110755e-05, "loss": 0.4761, "step": 10112 }, { "epoch": 1.3874596967826027, "grad_norm": 1.234375, "learning_rate": 1.125109822944991e-05, "loss": 0.4409, "step": 10113 }, { "epoch": 1.3875968992248062, "grad_norm": 1.3125, "learning_rate": 1.1249666076720706e-05, "loss": 0.4693, "step": 10114 }, { "epoch": 1.3877341016670097, "grad_norm": 1.1328125, "learning_rate": 1.1248233897952986e-05, "loss": 0.3979, "step": 10115 }, { "epoch": 1.3878713041092132, "grad_norm": 1.2421875, "learning_rate": 1.1246801693176582e-05, "loss": 0.4632, "step": 10116 }, { "epoch": 1.3880085065514165, "grad_norm": 1.265625, "learning_rate": 1.1245369462421343e-05, "loss": 0.4451, "step": 10117 }, { "epoch": 1.38814570899362, "grad_norm": 1.375, "learning_rate": 1.124393720571711e-05, "loss": 0.4969, "step": 10118 }, { "epoch": 1.3882829114358235, "grad_norm": 1.1640625, "learning_rate": 1.1242504923093728e-05, "loss": 0.4326, "step": 10119 }, { "epoch": 1.388420113878027, "grad_norm": 1.25, "learning_rate": 1.1241072614581042e-05, "loss": 0.4572, "step": 10120 }, { "epoch": 1.3885573163202305, "grad_norm": 1.1328125, "learning_rate": 1.1239640280208888e-05, "loss": 0.3397, "step": 10121 }, { "epoch": 1.388694518762434, "grad_norm": 1.234375, "learning_rate": 1.1238207920007115e-05, "loss": 0.4351, "step": 10122 }, { "epoch": 1.3888317212046375, "grad_norm": 1.09375, "learning_rate": 1.1236775534005571e-05, "loss": 0.3716, "step": 10123 }, { "epoch": 1.388968923646841, "grad_norm": 1.2421875, "learning_rate": 1.12353431222341e-05, "loss": 0.4278, "step": 10124 }, { "epoch": 1.3891061260890445, "grad_norm": 1.171875, "learning_rate": 1.1233910684722544e-05, "loss": 0.3848, "step": 10125 }, { "epoch": 1.389243328531248, "grad_norm": 1.296875, "learning_rate": 1.1232478221500758e-05, "loss": 0.4719, "step": 10126 }, { "epoch": 1.3893805309734513, "grad_norm": 1.125, "learning_rate": 1.1231045732598582e-05, "loss": 0.3846, "step": 10127 }, { "epoch": 1.3895177334156548, "grad_norm": 1.1953125, "learning_rate": 1.122961321804587e-05, "loss": 0.4565, "step": 10128 }, { "epoch": 1.3896549358578583, "grad_norm": 1.28125, "learning_rate": 1.1228180677872467e-05, "loss": 0.4754, "step": 10129 }, { "epoch": 1.3897921383000618, "grad_norm": 1.234375, "learning_rate": 1.122674811210822e-05, "loss": 0.4543, "step": 10130 }, { "epoch": 1.3899293407422653, "grad_norm": 1.2109375, "learning_rate": 1.1225315520782981e-05, "loss": 0.4369, "step": 10131 }, { "epoch": 1.3900665431844688, "grad_norm": 1.1640625, "learning_rate": 1.1223882903926602e-05, "loss": 0.3724, "step": 10132 }, { "epoch": 1.390203745626672, "grad_norm": 1.2578125, "learning_rate": 1.1222450261568929e-05, "loss": 0.4802, "step": 10133 }, { "epoch": 1.3903409480688755, "grad_norm": 1.21875, "learning_rate": 1.1221017593739816e-05, "loss": 0.4751, "step": 10134 }, { "epoch": 1.390478150511079, "grad_norm": 1.28125, "learning_rate": 1.1219584900469116e-05, "loss": 0.4423, "step": 10135 }, { "epoch": 1.3906153529532825, "grad_norm": 1.2109375, "learning_rate": 1.1218152181786676e-05, "loss": 0.4, "step": 10136 }, { "epoch": 1.390752555395486, "grad_norm": 1.1640625, "learning_rate": 1.1216719437722357e-05, "loss": 0.4053, "step": 10137 }, { "epoch": 1.3908897578376895, "grad_norm": 1.328125, "learning_rate": 1.1215286668306003e-05, "loss": 0.4772, "step": 10138 }, { "epoch": 1.391026960279893, "grad_norm": 1.203125, "learning_rate": 1.121385387356747e-05, "loss": 0.3935, "step": 10139 }, { "epoch": 1.3911641627220965, "grad_norm": 1.2734375, "learning_rate": 1.1212421053536618e-05, "loss": 0.4733, "step": 10140 }, { "epoch": 1.3913013651643, "grad_norm": 1.40625, "learning_rate": 1.1210988208243294e-05, "loss": 0.4806, "step": 10141 }, { "epoch": 1.3914385676065035, "grad_norm": 1.2578125, "learning_rate": 1.120955533771736e-05, "loss": 0.4586, "step": 10142 }, { "epoch": 1.3915757700487068, "grad_norm": 1.3046875, "learning_rate": 1.1208122441988669e-05, "loss": 0.487, "step": 10143 }, { "epoch": 1.3917129724909103, "grad_norm": 1.140625, "learning_rate": 1.1206689521087077e-05, "loss": 0.3691, "step": 10144 }, { "epoch": 1.3918501749331138, "grad_norm": 1.53125, "learning_rate": 1.120525657504244e-05, "loss": 0.5736, "step": 10145 }, { "epoch": 1.3919873773753173, "grad_norm": 1.203125, "learning_rate": 1.1203823603884619e-05, "loss": 0.4293, "step": 10146 }, { "epoch": 1.3921245798175208, "grad_norm": 1.3515625, "learning_rate": 1.1202390607643468e-05, "loss": 0.4734, "step": 10147 }, { "epoch": 1.3922617822597243, "grad_norm": 1.359375, "learning_rate": 1.1200957586348848e-05, "loss": 0.493, "step": 10148 }, { "epoch": 1.3923989847019276, "grad_norm": 1.0859375, "learning_rate": 1.1199524540030615e-05, "loss": 0.388, "step": 10149 }, { "epoch": 1.392536187144131, "grad_norm": 1.1796875, "learning_rate": 1.1198091468718633e-05, "loss": 0.4006, "step": 10150 }, { "epoch": 1.3926733895863346, "grad_norm": 1.109375, "learning_rate": 1.1196658372442756e-05, "loss": 0.3988, "step": 10151 }, { "epoch": 1.392810592028538, "grad_norm": 1.203125, "learning_rate": 1.1195225251232851e-05, "loss": 0.421, "step": 10152 }, { "epoch": 1.3929477944707416, "grad_norm": 1.3125, "learning_rate": 1.1193792105118775e-05, "loss": 0.4799, "step": 10153 }, { "epoch": 1.393084996912945, "grad_norm": 1.203125, "learning_rate": 1.1192358934130392e-05, "loss": 0.4556, "step": 10154 }, { "epoch": 1.3932221993551486, "grad_norm": 1.2109375, "learning_rate": 1.119092573829756e-05, "loss": 0.4795, "step": 10155 }, { "epoch": 1.393359401797352, "grad_norm": 1.28125, "learning_rate": 1.1189492517650147e-05, "loss": 0.5088, "step": 10156 }, { "epoch": 1.3934966042395556, "grad_norm": 1.171875, "learning_rate": 1.1188059272218015e-05, "loss": 0.3779, "step": 10157 }, { "epoch": 1.393633806681759, "grad_norm": 1.171875, "learning_rate": 1.1186626002031022e-05, "loss": 0.4291, "step": 10158 }, { "epoch": 1.3937710091239623, "grad_norm": 1.1171875, "learning_rate": 1.118519270711904e-05, "loss": 0.4174, "step": 10159 }, { "epoch": 1.3939082115661658, "grad_norm": 1.2109375, "learning_rate": 1.1183759387511928e-05, "loss": 0.4355, "step": 10160 }, { "epoch": 1.3940454140083693, "grad_norm": 1.21875, "learning_rate": 1.1182326043239554e-05, "loss": 0.4662, "step": 10161 }, { "epoch": 1.3941826164505728, "grad_norm": 1.1015625, "learning_rate": 1.118089267433178e-05, "loss": 0.3731, "step": 10162 }, { "epoch": 1.3943198188927763, "grad_norm": 1.2265625, "learning_rate": 1.117945928081848e-05, "loss": 0.4406, "step": 10163 }, { "epoch": 1.3944570213349798, "grad_norm": 1.21875, "learning_rate": 1.1178025862729513e-05, "loss": 0.4079, "step": 10164 }, { "epoch": 1.3945942237771831, "grad_norm": 1.203125, "learning_rate": 1.1176592420094752e-05, "loss": 0.4417, "step": 10165 }, { "epoch": 1.3947314262193866, "grad_norm": 1.25, "learning_rate": 1.117515895294406e-05, "loss": 0.4152, "step": 10166 }, { "epoch": 1.3948686286615901, "grad_norm": 1.1953125, "learning_rate": 1.1173725461307305e-05, "loss": 0.4768, "step": 10167 }, { "epoch": 1.3950058311037936, "grad_norm": 1.234375, "learning_rate": 1.1172291945214359e-05, "loss": 0.4313, "step": 10168 }, { "epoch": 1.3951430335459971, "grad_norm": 1.2734375, "learning_rate": 1.1170858404695093e-05, "loss": 0.4637, "step": 10169 }, { "epoch": 1.3952802359882006, "grad_norm": 1.234375, "learning_rate": 1.116942483977937e-05, "loss": 0.4271, "step": 10170 }, { "epoch": 1.3954174384304041, "grad_norm": 1.21875, "learning_rate": 1.1167991250497068e-05, "loss": 0.4228, "step": 10171 }, { "epoch": 1.3955546408726076, "grad_norm": 1.171875, "learning_rate": 1.1166557636878052e-05, "loss": 0.4232, "step": 10172 }, { "epoch": 1.395691843314811, "grad_norm": 1.1484375, "learning_rate": 1.1165123998952196e-05, "loss": 0.3844, "step": 10173 }, { "epoch": 1.3958290457570146, "grad_norm": 1.1640625, "learning_rate": 1.1163690336749373e-05, "loss": 0.4432, "step": 10174 }, { "epoch": 1.3959662481992179, "grad_norm": 1.1796875, "learning_rate": 1.1162256650299451e-05, "loss": 0.4181, "step": 10175 }, { "epoch": 1.3961034506414214, "grad_norm": 1.2578125, "learning_rate": 1.1160822939632307e-05, "loss": 0.3911, "step": 10176 }, { "epoch": 1.3962406530836249, "grad_norm": 1.265625, "learning_rate": 1.1159389204777813e-05, "loss": 0.4016, "step": 10177 }, { "epoch": 1.3963778555258284, "grad_norm": 1.2421875, "learning_rate": 1.1157955445765843e-05, "loss": 0.4764, "step": 10178 }, { "epoch": 1.3965150579680319, "grad_norm": 1.296875, "learning_rate": 1.115652166262627e-05, "loss": 0.4929, "step": 10179 }, { "epoch": 1.3966522604102354, "grad_norm": 1.1875, "learning_rate": 1.1155087855388968e-05, "loss": 0.4188, "step": 10180 }, { "epoch": 1.3967894628524387, "grad_norm": 1.2734375, "learning_rate": 1.1153654024083817e-05, "loss": 0.4646, "step": 10181 }, { "epoch": 1.3969266652946422, "grad_norm": 1.1796875, "learning_rate": 1.115222016874069e-05, "loss": 0.4235, "step": 10182 }, { "epoch": 1.3970638677368457, "grad_norm": 1.453125, "learning_rate": 1.1150786289389465e-05, "loss": 0.5471, "step": 10183 }, { "epoch": 1.3972010701790492, "grad_norm": 1.1796875, "learning_rate": 1.1149352386060015e-05, "loss": 0.4048, "step": 10184 }, { "epoch": 1.3973382726212527, "grad_norm": 1.234375, "learning_rate": 1.114791845878222e-05, "loss": 0.3745, "step": 10185 }, { "epoch": 1.3974754750634562, "grad_norm": 1.03125, "learning_rate": 1.1146484507585958e-05, "loss": 0.3465, "step": 10186 }, { "epoch": 1.3976126775056597, "grad_norm": 1.1796875, "learning_rate": 1.1145050532501105e-05, "loss": 0.3946, "step": 10187 }, { "epoch": 1.3977498799478631, "grad_norm": 1.2890625, "learning_rate": 1.1143616533557544e-05, "loss": 0.4896, "step": 10188 }, { "epoch": 1.3978870823900666, "grad_norm": 1.1796875, "learning_rate": 1.114218251078515e-05, "loss": 0.3935, "step": 10189 }, { "epoch": 1.3980242848322701, "grad_norm": 1.15625, "learning_rate": 1.1140748464213806e-05, "loss": 0.4083, "step": 10190 }, { "epoch": 1.3981614872744734, "grad_norm": 1.2109375, "learning_rate": 1.1139314393873392e-05, "loss": 0.4802, "step": 10191 }, { "epoch": 1.398298689716677, "grad_norm": 1.2421875, "learning_rate": 1.1137880299793789e-05, "loss": 0.4663, "step": 10192 }, { "epoch": 1.3984358921588804, "grad_norm": 1.296875, "learning_rate": 1.1136446182004876e-05, "loss": 0.4025, "step": 10193 }, { "epoch": 1.398573094601084, "grad_norm": 1.2421875, "learning_rate": 1.1135012040536534e-05, "loss": 0.5167, "step": 10194 }, { "epoch": 1.3987102970432874, "grad_norm": 1.2265625, "learning_rate": 1.1133577875418651e-05, "loss": 0.4588, "step": 10195 }, { "epoch": 1.398847499485491, "grad_norm": 1.28125, "learning_rate": 1.1132143686681105e-05, "loss": 0.4956, "step": 10196 }, { "epoch": 1.3989847019276942, "grad_norm": 1.328125, "learning_rate": 1.113070947435378e-05, "loss": 0.5392, "step": 10197 }, { "epoch": 1.3991219043698977, "grad_norm": 1.2578125, "learning_rate": 1.1129275238466561e-05, "loss": 0.4142, "step": 10198 }, { "epoch": 1.3992591068121012, "grad_norm": 1.234375, "learning_rate": 1.1127840979049334e-05, "loss": 0.4669, "step": 10199 }, { "epoch": 1.3993963092543047, "grad_norm": 1.2265625, "learning_rate": 1.1126406696131982e-05, "loss": 0.4486, "step": 10200 }, { "epoch": 1.3995335116965082, "grad_norm": 1.1796875, "learning_rate": 1.1124972389744387e-05, "loss": 0.4281, "step": 10201 }, { "epoch": 1.3996707141387117, "grad_norm": 1.3046875, "learning_rate": 1.112353805991644e-05, "loss": 0.4698, "step": 10202 }, { "epoch": 1.3998079165809152, "grad_norm": 1.2734375, "learning_rate": 1.1122103706678022e-05, "loss": 0.4279, "step": 10203 }, { "epoch": 1.3999451190231187, "grad_norm": 1.2109375, "learning_rate": 1.1120669330059024e-05, "loss": 0.4323, "step": 10204 }, { "epoch": 1.4000823214653222, "grad_norm": 1.3046875, "learning_rate": 1.111923493008933e-05, "loss": 0.486, "step": 10205 }, { "epoch": 1.4002195239075257, "grad_norm": 1.1796875, "learning_rate": 1.111780050679883e-05, "loss": 0.4403, "step": 10206 }, { "epoch": 1.400356726349729, "grad_norm": 1.171875, "learning_rate": 1.1116366060217413e-05, "loss": 0.3996, "step": 10207 }, { "epoch": 1.4004939287919325, "grad_norm": 1.1484375, "learning_rate": 1.1114931590374967e-05, "loss": 0.4274, "step": 10208 }, { "epoch": 1.400631131234136, "grad_norm": 1.1953125, "learning_rate": 1.1113497097301378e-05, "loss": 0.4385, "step": 10209 }, { "epoch": 1.4007683336763395, "grad_norm": 1.3515625, "learning_rate": 1.1112062581026542e-05, "loss": 0.4908, "step": 10210 }, { "epoch": 1.400905536118543, "grad_norm": 1.265625, "learning_rate": 1.1110628041580342e-05, "loss": 0.4681, "step": 10211 }, { "epoch": 1.4010427385607465, "grad_norm": 1.171875, "learning_rate": 1.1109193478992672e-05, "loss": 0.3749, "step": 10212 }, { "epoch": 1.4011799410029497, "grad_norm": 1.2734375, "learning_rate": 1.1107758893293425e-05, "loss": 0.438, "step": 10213 }, { "epoch": 1.4013171434451532, "grad_norm": 1.296875, "learning_rate": 1.1106324284512488e-05, "loss": 0.5093, "step": 10214 }, { "epoch": 1.4014543458873567, "grad_norm": 1.2734375, "learning_rate": 1.1104889652679758e-05, "loss": 0.5008, "step": 10215 }, { "epoch": 1.4015915483295602, "grad_norm": 1.2578125, "learning_rate": 1.1103454997825123e-05, "loss": 0.4482, "step": 10216 }, { "epoch": 1.4017287507717637, "grad_norm": 1.2265625, "learning_rate": 1.110202031997848e-05, "loss": 0.3884, "step": 10217 }, { "epoch": 1.4018659532139672, "grad_norm": 1.265625, "learning_rate": 1.110058561916972e-05, "loss": 0.4947, "step": 10218 }, { "epoch": 1.4020031556561707, "grad_norm": 1.2578125, "learning_rate": 1.1099150895428739e-05, "loss": 0.4198, "step": 10219 }, { "epoch": 1.4021403580983742, "grad_norm": 1.1328125, "learning_rate": 1.109771614878543e-05, "loss": 0.3895, "step": 10220 }, { "epoch": 1.4022775605405777, "grad_norm": 1.40625, "learning_rate": 1.1096281379269684e-05, "loss": 0.4854, "step": 10221 }, { "epoch": 1.4024147629827812, "grad_norm": 1.2421875, "learning_rate": 1.1094846586911403e-05, "loss": 0.406, "step": 10222 }, { "epoch": 1.4025519654249845, "grad_norm": 1.15625, "learning_rate": 1.1093411771740481e-05, "loss": 0.4337, "step": 10223 }, { "epoch": 1.402689167867188, "grad_norm": 1.171875, "learning_rate": 1.1091976933786811e-05, "loss": 0.4033, "step": 10224 }, { "epoch": 1.4028263703093915, "grad_norm": 1.296875, "learning_rate": 1.1090542073080297e-05, "loss": 0.4599, "step": 10225 }, { "epoch": 1.402963572751595, "grad_norm": 1.2578125, "learning_rate": 1.1089107189650829e-05, "loss": 0.4968, "step": 10226 }, { "epoch": 1.4031007751937985, "grad_norm": 1.2578125, "learning_rate": 1.1087672283528308e-05, "loss": 0.492, "step": 10227 }, { "epoch": 1.403237977636002, "grad_norm": 1.2109375, "learning_rate": 1.1086237354742635e-05, "loss": 0.4494, "step": 10228 }, { "epoch": 1.4033751800782053, "grad_norm": 1.1875, "learning_rate": 1.1084802403323703e-05, "loss": 0.4834, "step": 10229 }, { "epoch": 1.4035123825204088, "grad_norm": 1.09375, "learning_rate": 1.108336742930141e-05, "loss": 0.364, "step": 10230 }, { "epoch": 1.4036495849626123, "grad_norm": 1.25, "learning_rate": 1.1081932432705664e-05, "loss": 0.477, "step": 10231 }, { "epoch": 1.4037867874048158, "grad_norm": 1.3515625, "learning_rate": 1.108049741356636e-05, "loss": 0.488, "step": 10232 }, { "epoch": 1.4039239898470193, "grad_norm": 1.1484375, "learning_rate": 1.1079062371913398e-05, "loss": 0.3959, "step": 10233 }, { "epoch": 1.4040611922892228, "grad_norm": 1.2890625, "learning_rate": 1.107762730777668e-05, "loss": 0.5056, "step": 10234 }, { "epoch": 1.4041983947314263, "grad_norm": 1.234375, "learning_rate": 1.1076192221186109e-05, "loss": 0.4391, "step": 10235 }, { "epoch": 1.4043355971736298, "grad_norm": 1.234375, "learning_rate": 1.1074757112171586e-05, "loss": 0.4503, "step": 10236 }, { "epoch": 1.4044727996158333, "grad_norm": 1.3515625, "learning_rate": 1.1073321980763012e-05, "loss": 0.5346, "step": 10237 }, { "epoch": 1.4046100020580368, "grad_norm": 1.2734375, "learning_rate": 1.1071886826990293e-05, "loss": 0.4205, "step": 10238 }, { "epoch": 1.40474720450024, "grad_norm": 1.2578125, "learning_rate": 1.107045165088333e-05, "loss": 0.4623, "step": 10239 }, { "epoch": 1.4048844069424435, "grad_norm": 1.203125, "learning_rate": 1.1069016452472026e-05, "loss": 0.4394, "step": 10240 }, { "epoch": 1.405021609384647, "grad_norm": 1.3046875, "learning_rate": 1.1067581231786289e-05, "loss": 0.5239, "step": 10241 }, { "epoch": 1.4051588118268505, "grad_norm": 1.3046875, "learning_rate": 1.106614598885602e-05, "loss": 0.4929, "step": 10242 }, { "epoch": 1.405296014269054, "grad_norm": 1.2734375, "learning_rate": 1.1064710723711125e-05, "loss": 0.4885, "step": 10243 }, { "epoch": 1.4054332167112575, "grad_norm": 1.234375, "learning_rate": 1.1063275436381513e-05, "loss": 0.4672, "step": 10244 }, { "epoch": 1.4055704191534608, "grad_norm": 1.2265625, "learning_rate": 1.1061840126897088e-05, "loss": 0.4792, "step": 10245 }, { "epoch": 1.4057076215956643, "grad_norm": 1.203125, "learning_rate": 1.1060404795287756e-05, "loss": 0.4341, "step": 10246 }, { "epoch": 1.4058448240378678, "grad_norm": 1.2890625, "learning_rate": 1.1058969441583424e-05, "loss": 0.4808, "step": 10247 }, { "epoch": 1.4059820264800713, "grad_norm": 1.1484375, "learning_rate": 1.1057534065814e-05, "loss": 0.4323, "step": 10248 }, { "epoch": 1.4061192289222748, "grad_norm": 1.1328125, "learning_rate": 1.1056098668009393e-05, "loss": 0.3847, "step": 10249 }, { "epoch": 1.4062564313644783, "grad_norm": 1.1953125, "learning_rate": 1.105466324819951e-05, "loss": 0.4089, "step": 10250 }, { "epoch": 1.4063936338066818, "grad_norm": 1.2890625, "learning_rate": 1.1053227806414262e-05, "loss": 0.4712, "step": 10251 }, { "epoch": 1.4065308362488853, "grad_norm": 1.1953125, "learning_rate": 1.1051792342683557e-05, "loss": 0.442, "step": 10252 }, { "epoch": 1.4066680386910888, "grad_norm": 1.2421875, "learning_rate": 1.1050356857037304e-05, "loss": 0.381, "step": 10253 }, { "epoch": 1.4068052411332923, "grad_norm": 1.2109375, "learning_rate": 1.1048921349505414e-05, "loss": 0.4194, "step": 10254 }, { "epoch": 1.4069424435754956, "grad_norm": 1.25, "learning_rate": 1.10474858201178e-05, "loss": 0.4225, "step": 10255 }, { "epoch": 1.407079646017699, "grad_norm": 1.3515625, "learning_rate": 1.1046050268904369e-05, "loss": 0.4699, "step": 10256 }, { "epoch": 1.4072168484599026, "grad_norm": 1.1875, "learning_rate": 1.1044614695895037e-05, "loss": 0.4398, "step": 10257 }, { "epoch": 1.407354050902106, "grad_norm": 1.171875, "learning_rate": 1.1043179101119714e-05, "loss": 0.4144, "step": 10258 }, { "epoch": 1.4074912533443096, "grad_norm": 1.15625, "learning_rate": 1.1041743484608314e-05, "loss": 0.3542, "step": 10259 }, { "epoch": 1.407628455786513, "grad_norm": 1.265625, "learning_rate": 1.1040307846390745e-05, "loss": 0.4426, "step": 10260 }, { "epoch": 1.4077656582287164, "grad_norm": 1.3515625, "learning_rate": 1.1038872186496928e-05, "loss": 0.4983, "step": 10261 }, { "epoch": 1.4079028606709199, "grad_norm": 1.265625, "learning_rate": 1.1037436504956771e-05, "loss": 0.4459, "step": 10262 }, { "epoch": 1.4080400631131234, "grad_norm": 1.28125, "learning_rate": 1.1036000801800191e-05, "loss": 0.4801, "step": 10263 }, { "epoch": 1.4081772655553269, "grad_norm": 1.2109375, "learning_rate": 1.1034565077057105e-05, "loss": 0.4165, "step": 10264 }, { "epoch": 1.4083144679975304, "grad_norm": 1.40625, "learning_rate": 1.1033129330757423e-05, "loss": 0.4945, "step": 10265 }, { "epoch": 1.4084516704397338, "grad_norm": 1.1484375, "learning_rate": 1.1031693562931065e-05, "loss": 0.3787, "step": 10266 }, { "epoch": 1.4085888728819373, "grad_norm": 1.2265625, "learning_rate": 1.1030257773607946e-05, "loss": 0.4114, "step": 10267 }, { "epoch": 1.4087260753241408, "grad_norm": 1.2109375, "learning_rate": 1.1028821962817981e-05, "loss": 0.4391, "step": 10268 }, { "epoch": 1.4088632777663443, "grad_norm": 1.1875, "learning_rate": 1.1027386130591088e-05, "loss": 0.3999, "step": 10269 }, { "epoch": 1.4090004802085478, "grad_norm": 1.34375, "learning_rate": 1.1025950276957187e-05, "loss": 0.5361, "step": 10270 }, { "epoch": 1.4091376826507511, "grad_norm": 1.1640625, "learning_rate": 1.1024514401946194e-05, "loss": 0.4066, "step": 10271 }, { "epoch": 1.4092748850929546, "grad_norm": 1.2890625, "learning_rate": 1.1023078505588028e-05, "loss": 0.4347, "step": 10272 }, { "epoch": 1.4094120875351581, "grad_norm": 1.3203125, "learning_rate": 1.1021642587912609e-05, "loss": 0.4162, "step": 10273 }, { "epoch": 1.4095492899773616, "grad_norm": 1.25, "learning_rate": 1.1020206648949852e-05, "loss": 0.4535, "step": 10274 }, { "epoch": 1.4096864924195651, "grad_norm": 1.1875, "learning_rate": 1.101877068872968e-05, "loss": 0.3981, "step": 10275 }, { "epoch": 1.4098236948617686, "grad_norm": 1.171875, "learning_rate": 1.1017334707282013e-05, "loss": 0.4005, "step": 10276 }, { "epoch": 1.409960897303972, "grad_norm": 1.3125, "learning_rate": 1.1015898704636772e-05, "loss": 0.4791, "step": 10277 }, { "epoch": 1.4100980997461754, "grad_norm": 1.234375, "learning_rate": 1.1014462680823877e-05, "loss": 0.4623, "step": 10278 }, { "epoch": 1.410235302188379, "grad_norm": 1.2734375, "learning_rate": 1.101302663587325e-05, "loss": 0.4584, "step": 10279 }, { "epoch": 1.4103725046305824, "grad_norm": 1.34375, "learning_rate": 1.1011590569814812e-05, "loss": 0.5227, "step": 10280 }, { "epoch": 1.410509707072786, "grad_norm": 1.171875, "learning_rate": 1.101015448267849e-05, "loss": 0.4075, "step": 10281 }, { "epoch": 1.4106469095149894, "grad_norm": 1.265625, "learning_rate": 1.10087183744942e-05, "loss": 0.4495, "step": 10282 }, { "epoch": 1.4107841119571929, "grad_norm": 1.2578125, "learning_rate": 1.1007282245291871e-05, "loss": 0.4584, "step": 10283 }, { "epoch": 1.4109213143993964, "grad_norm": 1.2265625, "learning_rate": 1.1005846095101422e-05, "loss": 0.4152, "step": 10284 }, { "epoch": 1.4110585168415999, "grad_norm": 1.2265625, "learning_rate": 1.100440992395278e-05, "loss": 0.4959, "step": 10285 }, { "epoch": 1.4111957192838034, "grad_norm": 1.234375, "learning_rate": 1.100297373187587e-05, "loss": 0.4499, "step": 10286 }, { "epoch": 1.4113329217260067, "grad_norm": 1.1484375, "learning_rate": 1.1001537518900616e-05, "loss": 0.3813, "step": 10287 }, { "epoch": 1.4114701241682102, "grad_norm": 1.2890625, "learning_rate": 1.1000101285056944e-05, "loss": 0.4998, "step": 10288 }, { "epoch": 1.4116073266104137, "grad_norm": 1.1640625, "learning_rate": 1.0998665030374777e-05, "loss": 0.4915, "step": 10289 }, { "epoch": 1.4117445290526172, "grad_norm": 1.265625, "learning_rate": 1.099722875488405e-05, "loss": 0.4093, "step": 10290 }, { "epoch": 1.4118817314948207, "grad_norm": 1.203125, "learning_rate": 1.099579245861468e-05, "loss": 0.4376, "step": 10291 }, { "epoch": 1.4120189339370242, "grad_norm": 1.2734375, "learning_rate": 1.0994356141596598e-05, "loss": 0.4118, "step": 10292 }, { "epoch": 1.4121561363792274, "grad_norm": 1.390625, "learning_rate": 1.0992919803859732e-05, "loss": 0.5458, "step": 10293 }, { "epoch": 1.412293338821431, "grad_norm": 1.1796875, "learning_rate": 1.0991483445434008e-05, "loss": 0.446, "step": 10294 }, { "epoch": 1.4124305412636344, "grad_norm": 1.234375, "learning_rate": 1.0990047066349358e-05, "loss": 0.4709, "step": 10295 }, { "epoch": 1.412567743705838, "grad_norm": 1.2109375, "learning_rate": 1.0988610666635708e-05, "loss": 0.4675, "step": 10296 }, { "epoch": 1.4127049461480414, "grad_norm": 1.2578125, "learning_rate": 1.098717424632299e-05, "loss": 0.4492, "step": 10297 }, { "epoch": 1.412842148590245, "grad_norm": 1.203125, "learning_rate": 1.0985737805441133e-05, "loss": 0.4413, "step": 10298 }, { "epoch": 1.4129793510324484, "grad_norm": 1.1328125, "learning_rate": 1.0984301344020065e-05, "loss": 0.3779, "step": 10299 }, { "epoch": 1.413116553474652, "grad_norm": 1.28125, "learning_rate": 1.098286486208972e-05, "loss": 0.4923, "step": 10300 }, { "epoch": 1.4132537559168554, "grad_norm": 1.3359375, "learning_rate": 1.0981428359680025e-05, "loss": 0.5117, "step": 10301 }, { "epoch": 1.413390958359059, "grad_norm": 1.234375, "learning_rate": 1.0979991836820913e-05, "loss": 0.4631, "step": 10302 }, { "epoch": 1.4135281608012622, "grad_norm": 1.1484375, "learning_rate": 1.097855529354232e-05, "loss": 0.3701, "step": 10303 }, { "epoch": 1.4136653632434657, "grad_norm": 1.21875, "learning_rate": 1.0977118729874172e-05, "loss": 0.4172, "step": 10304 }, { "epoch": 1.4138025656856692, "grad_norm": 1.328125, "learning_rate": 1.0975682145846407e-05, "loss": 0.4806, "step": 10305 }, { "epoch": 1.4139397681278727, "grad_norm": 1.2421875, "learning_rate": 1.0974245541488955e-05, "loss": 0.4542, "step": 10306 }, { "epoch": 1.4140769705700762, "grad_norm": 1.3125, "learning_rate": 1.0972808916831755e-05, "loss": 0.484, "step": 10307 }, { "epoch": 1.4142141730122797, "grad_norm": 1.25, "learning_rate": 1.0971372271904732e-05, "loss": 0.4656, "step": 10308 }, { "epoch": 1.414351375454483, "grad_norm": 1.28125, "learning_rate": 1.0969935606737825e-05, "loss": 0.4692, "step": 10309 }, { "epoch": 1.4144885778966865, "grad_norm": 1.1953125, "learning_rate": 1.096849892136097e-05, "loss": 0.4154, "step": 10310 }, { "epoch": 1.41462578033889, "grad_norm": 1.25, "learning_rate": 1.0967062215804103e-05, "loss": 0.4479, "step": 10311 }, { "epoch": 1.4147629827810935, "grad_norm": 1.3125, "learning_rate": 1.0965625490097159e-05, "loss": 0.4959, "step": 10312 }, { "epoch": 1.414900185223297, "grad_norm": 1.25, "learning_rate": 1.096418874427007e-05, "loss": 0.5001, "step": 10313 }, { "epoch": 1.4150373876655005, "grad_norm": 1.4140625, "learning_rate": 1.0962751978352779e-05, "loss": 0.5007, "step": 10314 }, { "epoch": 1.415174590107704, "grad_norm": 1.140625, "learning_rate": 1.0961315192375217e-05, "loss": 0.408, "step": 10315 }, { "epoch": 1.4153117925499075, "grad_norm": 1.2421875, "learning_rate": 1.095987838636733e-05, "loss": 0.4566, "step": 10316 }, { "epoch": 1.415448994992111, "grad_norm": 1.1640625, "learning_rate": 1.0958441560359046e-05, "loss": 0.4908, "step": 10317 }, { "epoch": 1.4155861974343145, "grad_norm": 1.265625, "learning_rate": 1.0957004714380311e-05, "loss": 0.4394, "step": 10318 }, { "epoch": 1.4157233998765177, "grad_norm": 1.1875, "learning_rate": 1.0955567848461057e-05, "loss": 0.4038, "step": 10319 }, { "epoch": 1.4158606023187212, "grad_norm": 1.3203125, "learning_rate": 1.0954130962631226e-05, "loss": 0.4642, "step": 10320 }, { "epoch": 1.4159978047609247, "grad_norm": 1.234375, "learning_rate": 1.0952694056920758e-05, "loss": 0.4312, "step": 10321 }, { "epoch": 1.4161350072031282, "grad_norm": 1.21875, "learning_rate": 1.0951257131359594e-05, "loss": 0.4044, "step": 10322 }, { "epoch": 1.4162722096453317, "grad_norm": 1.3359375, "learning_rate": 1.0949820185977674e-05, "loss": 0.4859, "step": 10323 }, { "epoch": 1.4164094120875352, "grad_norm": 1.2265625, "learning_rate": 1.0948383220804935e-05, "loss": 0.3822, "step": 10324 }, { "epoch": 1.4165466145297385, "grad_norm": 1.171875, "learning_rate": 1.0946946235871323e-05, "loss": 0.4383, "step": 10325 }, { "epoch": 1.416683816971942, "grad_norm": 1.2421875, "learning_rate": 1.0945509231206777e-05, "loss": 0.4327, "step": 10326 }, { "epoch": 1.4168210194141455, "grad_norm": 1.265625, "learning_rate": 1.094407220684124e-05, "loss": 0.4119, "step": 10327 }, { "epoch": 1.416958221856349, "grad_norm": 1.2265625, "learning_rate": 1.0942635162804654e-05, "loss": 0.4086, "step": 10328 }, { "epoch": 1.4170954242985525, "grad_norm": 1.15625, "learning_rate": 1.0941198099126959e-05, "loss": 0.4298, "step": 10329 }, { "epoch": 1.417232626740756, "grad_norm": 1.3046875, "learning_rate": 1.0939761015838105e-05, "loss": 0.4955, "step": 10330 }, { "epoch": 1.4173698291829595, "grad_norm": 1.1875, "learning_rate": 1.0938323912968028e-05, "loss": 0.4131, "step": 10331 }, { "epoch": 1.417507031625163, "grad_norm": 1.2421875, "learning_rate": 1.0936886790546677e-05, "loss": 0.4701, "step": 10332 }, { "epoch": 1.4176442340673665, "grad_norm": 1.1875, "learning_rate": 1.0935449648603993e-05, "loss": 0.4283, "step": 10333 }, { "epoch": 1.41778143650957, "grad_norm": 1.25, "learning_rate": 1.0934012487169926e-05, "loss": 0.4466, "step": 10334 }, { "epoch": 1.4179186389517733, "grad_norm": 1.2421875, "learning_rate": 1.0932575306274418e-05, "loss": 0.4118, "step": 10335 }, { "epoch": 1.4180558413939768, "grad_norm": 1.2109375, "learning_rate": 1.0931138105947415e-05, "loss": 0.4581, "step": 10336 }, { "epoch": 1.4181930438361803, "grad_norm": 1.2109375, "learning_rate": 1.092970088621886e-05, "loss": 0.4164, "step": 10337 }, { "epoch": 1.4183302462783838, "grad_norm": 1.375, "learning_rate": 1.0928263647118701e-05, "loss": 0.4851, "step": 10338 }, { "epoch": 1.4184674487205873, "grad_norm": 1.2109375, "learning_rate": 1.092682638867689e-05, "loss": 0.4076, "step": 10339 }, { "epoch": 1.4186046511627908, "grad_norm": 1.2421875, "learning_rate": 1.0925389110923368e-05, "loss": 0.4792, "step": 10340 }, { "epoch": 1.418741853604994, "grad_norm": 1.171875, "learning_rate": 1.0923951813888087e-05, "loss": 0.4588, "step": 10341 }, { "epoch": 1.4188790560471976, "grad_norm": 1.203125, "learning_rate": 1.0922514497600991e-05, "loss": 0.3942, "step": 10342 }, { "epoch": 1.419016258489401, "grad_norm": 1.203125, "learning_rate": 1.0921077162092031e-05, "loss": 0.4168, "step": 10343 }, { "epoch": 1.4191534609316045, "grad_norm": 1.203125, "learning_rate": 1.0919639807391158e-05, "loss": 0.4353, "step": 10344 }, { "epoch": 1.419290663373808, "grad_norm": 1.3125, "learning_rate": 1.0918202433528317e-05, "loss": 0.4278, "step": 10345 }, { "epoch": 1.4194278658160115, "grad_norm": 1.2109375, "learning_rate": 1.0916765040533458e-05, "loss": 0.4418, "step": 10346 }, { "epoch": 1.419565068258215, "grad_norm": 1.3203125, "learning_rate": 1.0915327628436535e-05, "loss": 0.4443, "step": 10347 }, { "epoch": 1.4197022707004185, "grad_norm": 1.3828125, "learning_rate": 1.0913890197267494e-05, "loss": 0.4843, "step": 10348 }, { "epoch": 1.419839473142622, "grad_norm": 1.234375, "learning_rate": 1.0912452747056288e-05, "loss": 0.4431, "step": 10349 }, { "epoch": 1.4199766755848255, "grad_norm": 1.28125, "learning_rate": 1.0911015277832868e-05, "loss": 0.483, "step": 10350 }, { "epoch": 1.4201138780270288, "grad_norm": 1.3515625, "learning_rate": 1.0909577789627183e-05, "loss": 0.4961, "step": 10351 }, { "epoch": 1.4202510804692323, "grad_norm": 1.2578125, "learning_rate": 1.0908140282469195e-05, "loss": 0.4385, "step": 10352 }, { "epoch": 1.4203882829114358, "grad_norm": 1.2265625, "learning_rate": 1.0906702756388845e-05, "loss": 0.4151, "step": 10353 }, { "epoch": 1.4205254853536393, "grad_norm": 1.2421875, "learning_rate": 1.090526521141609e-05, "loss": 0.4471, "step": 10354 }, { "epoch": 1.4206626877958428, "grad_norm": 1.125, "learning_rate": 1.0903827647580882e-05, "loss": 0.4188, "step": 10355 }, { "epoch": 1.4207998902380463, "grad_norm": 1.171875, "learning_rate": 1.0902390064913178e-05, "loss": 0.4112, "step": 10356 }, { "epoch": 1.4209370926802496, "grad_norm": 1.1640625, "learning_rate": 1.0900952463442927e-05, "loss": 0.3742, "step": 10357 }, { "epoch": 1.421074295122453, "grad_norm": 1.3515625, "learning_rate": 1.0899514843200087e-05, "loss": 0.4606, "step": 10358 }, { "epoch": 1.4212114975646566, "grad_norm": 1.265625, "learning_rate": 1.0898077204214614e-05, "loss": 0.4656, "step": 10359 }, { "epoch": 1.42134870000686, "grad_norm": 1.234375, "learning_rate": 1.089663954651646e-05, "loss": 0.443, "step": 10360 }, { "epoch": 1.4214859024490636, "grad_norm": 1.2421875, "learning_rate": 1.0895201870135586e-05, "loss": 0.4501, "step": 10361 }, { "epoch": 1.421623104891267, "grad_norm": 1.15625, "learning_rate": 1.089376417510194e-05, "loss": 0.3992, "step": 10362 }, { "epoch": 1.4217603073334706, "grad_norm": 1.234375, "learning_rate": 1.0892326461445484e-05, "loss": 0.4306, "step": 10363 }, { "epoch": 1.421897509775674, "grad_norm": 1.1640625, "learning_rate": 1.0890888729196171e-05, "loss": 0.4149, "step": 10364 }, { "epoch": 1.4220347122178776, "grad_norm": 1.109375, "learning_rate": 1.0889450978383962e-05, "loss": 0.3611, "step": 10365 }, { "epoch": 1.422171914660081, "grad_norm": 1.21875, "learning_rate": 1.088801320903881e-05, "loss": 0.4406, "step": 10366 }, { "epoch": 1.4223091171022844, "grad_norm": 1.1640625, "learning_rate": 1.0886575421190677e-05, "loss": 0.4027, "step": 10367 }, { "epoch": 1.4224463195444879, "grad_norm": 1.2734375, "learning_rate": 1.088513761486952e-05, "loss": 0.4774, "step": 10368 }, { "epoch": 1.4225835219866914, "grad_norm": 1.21875, "learning_rate": 1.0883699790105297e-05, "loss": 0.4619, "step": 10369 }, { "epoch": 1.4227207244288949, "grad_norm": 1.2109375, "learning_rate": 1.0882261946927972e-05, "loss": 0.4746, "step": 10370 }, { "epoch": 1.4228579268710984, "grad_norm": 1.1875, "learning_rate": 1.0880824085367496e-05, "loss": 0.4055, "step": 10371 }, { "epoch": 1.4229951293133019, "grad_norm": 1.0703125, "learning_rate": 1.0879386205453833e-05, "loss": 0.3832, "step": 10372 }, { "epoch": 1.4231323317555051, "grad_norm": 1.2265625, "learning_rate": 1.0877948307216943e-05, "loss": 0.4462, "step": 10373 }, { "epoch": 1.4232695341977086, "grad_norm": 1.265625, "learning_rate": 1.0876510390686787e-05, "loss": 0.4333, "step": 10374 }, { "epoch": 1.4234067366399121, "grad_norm": 1.125, "learning_rate": 1.0875072455893328e-05, "loss": 0.3992, "step": 10375 }, { "epoch": 1.4235439390821156, "grad_norm": 1.3046875, "learning_rate": 1.0873634502866524e-05, "loss": 0.461, "step": 10376 }, { "epoch": 1.4236811415243191, "grad_norm": 1.265625, "learning_rate": 1.0872196531636336e-05, "loss": 0.4452, "step": 10377 }, { "epoch": 1.4238183439665226, "grad_norm": 1.265625, "learning_rate": 1.0870758542232728e-05, "loss": 0.4402, "step": 10378 }, { "epoch": 1.4239555464087261, "grad_norm": 1.3125, "learning_rate": 1.0869320534685663e-05, "loss": 0.4755, "step": 10379 }, { "epoch": 1.4240927488509296, "grad_norm": 1.25, "learning_rate": 1.0867882509025106e-05, "loss": 0.4332, "step": 10380 }, { "epoch": 1.4242299512931331, "grad_norm": 1.375, "learning_rate": 1.0866444465281014e-05, "loss": 0.541, "step": 10381 }, { "epoch": 1.4243671537353366, "grad_norm": 1.171875, "learning_rate": 1.0865006403483357e-05, "loss": 0.4582, "step": 10382 }, { "epoch": 1.42450435617754, "grad_norm": 1.1796875, "learning_rate": 1.0863568323662092e-05, "loss": 0.3976, "step": 10383 }, { "epoch": 1.4246415586197434, "grad_norm": 1.1875, "learning_rate": 1.086213022584719e-05, "loss": 0.4056, "step": 10384 }, { "epoch": 1.424778761061947, "grad_norm": 1.3515625, "learning_rate": 1.0860692110068613e-05, "loss": 0.536, "step": 10385 }, { "epoch": 1.4249159635041504, "grad_norm": 1.203125, "learning_rate": 1.0859253976356326e-05, "loss": 0.3847, "step": 10386 }, { "epoch": 1.425053165946354, "grad_norm": 1.140625, "learning_rate": 1.0857815824740296e-05, "loss": 0.3651, "step": 10387 }, { "epoch": 1.4251903683885574, "grad_norm": 1.3125, "learning_rate": 1.0856377655250487e-05, "loss": 0.4898, "step": 10388 }, { "epoch": 1.4253275708307607, "grad_norm": 1.28125, "learning_rate": 1.085493946791687e-05, "loss": 0.5078, "step": 10389 }, { "epoch": 1.4254647732729642, "grad_norm": 1.265625, "learning_rate": 1.0853501262769402e-05, "loss": 0.4616, "step": 10390 }, { "epoch": 1.4256019757151677, "grad_norm": 1.234375, "learning_rate": 1.085206303983806e-05, "loss": 0.4606, "step": 10391 }, { "epoch": 1.4257391781573712, "grad_norm": 1.1640625, "learning_rate": 1.0850624799152802e-05, "loss": 0.4224, "step": 10392 }, { "epoch": 1.4258763805995747, "grad_norm": 1.234375, "learning_rate": 1.0849186540743604e-05, "loss": 0.4398, "step": 10393 }, { "epoch": 1.4260135830417782, "grad_norm": 1.359375, "learning_rate": 1.0847748264640429e-05, "loss": 0.4601, "step": 10394 }, { "epoch": 1.4261507854839817, "grad_norm": 1.2890625, "learning_rate": 1.084630997087325e-05, "loss": 0.4553, "step": 10395 }, { "epoch": 1.4262879879261852, "grad_norm": 1.1484375, "learning_rate": 1.0844871659472032e-05, "loss": 0.4231, "step": 10396 }, { "epoch": 1.4264251903683887, "grad_norm": 1.3046875, "learning_rate": 1.0843433330466745e-05, "loss": 0.4777, "step": 10397 }, { "epoch": 1.4265623928105922, "grad_norm": 1.3046875, "learning_rate": 1.084199498388736e-05, "loss": 0.4688, "step": 10398 }, { "epoch": 1.4266995952527954, "grad_norm": 1.2578125, "learning_rate": 1.0840556619763847e-05, "loss": 0.4584, "step": 10399 }, { "epoch": 1.426836797694999, "grad_norm": 1.1171875, "learning_rate": 1.0839118238126174e-05, "loss": 0.3955, "step": 10400 }, { "epoch": 1.4269740001372024, "grad_norm": 1.1640625, "learning_rate": 1.0837679839004312e-05, "loss": 0.4195, "step": 10401 }, { "epoch": 1.427111202579406, "grad_norm": 1.203125, "learning_rate": 1.0836241422428233e-05, "loss": 0.4051, "step": 10402 }, { "epoch": 1.4272484050216094, "grad_norm": 1.2734375, "learning_rate": 1.0834802988427909e-05, "loss": 0.452, "step": 10403 }, { "epoch": 1.427385607463813, "grad_norm": 1.1796875, "learning_rate": 1.0833364537033309e-05, "loss": 0.4321, "step": 10404 }, { "epoch": 1.4275228099060162, "grad_norm": 1.203125, "learning_rate": 1.083192606827441e-05, "loss": 0.4252, "step": 10405 }, { "epoch": 1.4276600123482197, "grad_norm": 1.296875, "learning_rate": 1.0830487582181181e-05, "loss": 0.4705, "step": 10406 }, { "epoch": 1.4277972147904232, "grad_norm": 1.234375, "learning_rate": 1.0829049078783595e-05, "loss": 0.4655, "step": 10407 }, { "epoch": 1.4279344172326267, "grad_norm": 1.375, "learning_rate": 1.0827610558111625e-05, "loss": 0.4498, "step": 10408 }, { "epoch": 1.4280716196748302, "grad_norm": 1.3203125, "learning_rate": 1.0826172020195247e-05, "loss": 0.4925, "step": 10409 }, { "epoch": 1.4282088221170337, "grad_norm": 1.3125, "learning_rate": 1.0824733465064433e-05, "loss": 0.4605, "step": 10410 }, { "epoch": 1.4283460245592372, "grad_norm": 1.25, "learning_rate": 1.0823294892749157e-05, "loss": 0.427, "step": 10411 }, { "epoch": 1.4284832270014407, "grad_norm": 1.2109375, "learning_rate": 1.0821856303279391e-05, "loss": 0.4095, "step": 10412 }, { "epoch": 1.4286204294436442, "grad_norm": 1.203125, "learning_rate": 1.0820417696685116e-05, "loss": 0.4369, "step": 10413 }, { "epoch": 1.4287576318858477, "grad_norm": 1.265625, "learning_rate": 1.0818979072996305e-05, "loss": 0.4781, "step": 10414 }, { "epoch": 1.428894834328051, "grad_norm": 1.09375, "learning_rate": 1.0817540432242934e-05, "loss": 0.3865, "step": 10415 }, { "epoch": 1.4290320367702545, "grad_norm": 1.2890625, "learning_rate": 1.0816101774454979e-05, "loss": 0.4832, "step": 10416 }, { "epoch": 1.429169239212458, "grad_norm": 1.2421875, "learning_rate": 1.0814663099662412e-05, "loss": 0.4636, "step": 10417 }, { "epoch": 1.4293064416546615, "grad_norm": 1.1015625, "learning_rate": 1.0813224407895215e-05, "loss": 0.356, "step": 10418 }, { "epoch": 1.429443644096865, "grad_norm": 1.25, "learning_rate": 1.0811785699183364e-05, "loss": 0.464, "step": 10419 }, { "epoch": 1.4295808465390685, "grad_norm": 1.28125, "learning_rate": 1.0810346973556835e-05, "loss": 0.4466, "step": 10420 }, { "epoch": 1.4297180489812717, "grad_norm": 1.2265625, "learning_rate": 1.080890823104561e-05, "loss": 0.4356, "step": 10421 }, { "epoch": 1.4298552514234752, "grad_norm": 1.28125, "learning_rate": 1.0807469471679661e-05, "loss": 0.4618, "step": 10422 }, { "epoch": 1.4299924538656787, "grad_norm": 1.140625, "learning_rate": 1.0806030695488973e-05, "loss": 0.3907, "step": 10423 }, { "epoch": 1.4301296563078822, "grad_norm": 1.28125, "learning_rate": 1.0804591902503519e-05, "loss": 0.4723, "step": 10424 }, { "epoch": 1.4302668587500857, "grad_norm": 1.21875, "learning_rate": 1.080315309275328e-05, "loss": 0.4361, "step": 10425 }, { "epoch": 1.4304040611922892, "grad_norm": 1.265625, "learning_rate": 1.0801714266268238e-05, "loss": 0.4766, "step": 10426 }, { "epoch": 1.4305412636344927, "grad_norm": 1.3359375, "learning_rate": 1.080027542307837e-05, "loss": 0.5047, "step": 10427 }, { "epoch": 1.4306784660766962, "grad_norm": 1.28125, "learning_rate": 1.079883656321366e-05, "loss": 0.4878, "step": 10428 }, { "epoch": 1.4308156685188997, "grad_norm": 1.1640625, "learning_rate": 1.0797397686704084e-05, "loss": 0.395, "step": 10429 }, { "epoch": 1.4309528709611032, "grad_norm": 1.3125, "learning_rate": 1.0795958793579623e-05, "loss": 0.4665, "step": 10430 }, { "epoch": 1.4310900734033065, "grad_norm": 1.1484375, "learning_rate": 1.0794519883870264e-05, "loss": 0.4103, "step": 10431 }, { "epoch": 1.43122727584551, "grad_norm": 1.359375, "learning_rate": 1.0793080957605989e-05, "loss": 0.4867, "step": 10432 }, { "epoch": 1.4313644782877135, "grad_norm": 1.1796875, "learning_rate": 1.079164201481677e-05, "loss": 0.3959, "step": 10433 }, { "epoch": 1.431501680729917, "grad_norm": 1.2890625, "learning_rate": 1.07902030555326e-05, "loss": 0.4684, "step": 10434 }, { "epoch": 1.4316388831721205, "grad_norm": 1.21875, "learning_rate": 1.0788764079783454e-05, "loss": 0.4516, "step": 10435 }, { "epoch": 1.431776085614324, "grad_norm": 1.2890625, "learning_rate": 1.0787325087599318e-05, "loss": 0.4803, "step": 10436 }, { "epoch": 1.4319132880565273, "grad_norm": 1.15625, "learning_rate": 1.0785886079010177e-05, "loss": 0.3119, "step": 10437 }, { "epoch": 1.4320504904987308, "grad_norm": 1.2109375, "learning_rate": 1.0784447054046014e-05, "loss": 0.4288, "step": 10438 }, { "epoch": 1.4321876929409343, "grad_norm": 1.3203125, "learning_rate": 1.0783008012736811e-05, "loss": 0.4919, "step": 10439 }, { "epoch": 1.4323248953831378, "grad_norm": 1.2265625, "learning_rate": 1.0781568955112555e-05, "loss": 0.4721, "step": 10440 }, { "epoch": 1.4324620978253413, "grad_norm": 1.2265625, "learning_rate": 1.078012988120323e-05, "loss": 0.3986, "step": 10441 }, { "epoch": 1.4325993002675448, "grad_norm": 1.25, "learning_rate": 1.077869079103882e-05, "loss": 0.401, "step": 10442 }, { "epoch": 1.4327365027097483, "grad_norm": 1.1875, "learning_rate": 1.0777251684649313e-05, "loss": 0.4182, "step": 10443 }, { "epoch": 1.4328737051519518, "grad_norm": 1.3125, "learning_rate": 1.0775812562064691e-05, "loss": 0.4601, "step": 10444 }, { "epoch": 1.4330109075941553, "grad_norm": 1.265625, "learning_rate": 1.0774373423314943e-05, "loss": 0.4376, "step": 10445 }, { "epoch": 1.4331481100363588, "grad_norm": 1.2265625, "learning_rate": 1.0772934268430055e-05, "loss": 0.4394, "step": 10446 }, { "epoch": 1.433285312478562, "grad_norm": 1.21875, "learning_rate": 1.0771495097440012e-05, "loss": 0.4764, "step": 10447 }, { "epoch": 1.4334225149207656, "grad_norm": 1.2578125, "learning_rate": 1.0770055910374802e-05, "loss": 0.431, "step": 10448 }, { "epoch": 1.433559717362969, "grad_norm": 1.2109375, "learning_rate": 1.0768616707264414e-05, "loss": 0.3708, "step": 10449 }, { "epoch": 1.4336969198051726, "grad_norm": 1.1953125, "learning_rate": 1.0767177488138836e-05, "loss": 0.4278, "step": 10450 }, { "epoch": 1.433834122247376, "grad_norm": 1.2421875, "learning_rate": 1.0765738253028055e-05, "loss": 0.448, "step": 10451 }, { "epoch": 1.4339713246895796, "grad_norm": 1.0703125, "learning_rate": 1.0764299001962057e-05, "loss": 0.3559, "step": 10452 }, { "epoch": 1.4341085271317828, "grad_norm": 1.2265625, "learning_rate": 1.0762859734970833e-05, "loss": 0.4465, "step": 10453 }, { "epoch": 1.4342457295739863, "grad_norm": 1.2109375, "learning_rate": 1.0761420452084374e-05, "loss": 0.4721, "step": 10454 }, { "epoch": 1.4343829320161898, "grad_norm": 1.2734375, "learning_rate": 1.0759981153332665e-05, "loss": 0.4351, "step": 10455 }, { "epoch": 1.4345201344583933, "grad_norm": 1.1953125, "learning_rate": 1.0758541838745701e-05, "loss": 0.4291, "step": 10456 }, { "epoch": 1.4346573369005968, "grad_norm": 1.359375, "learning_rate": 1.075710250835347e-05, "loss": 0.4601, "step": 10457 }, { "epoch": 1.4347945393428003, "grad_norm": 1.2109375, "learning_rate": 1.0755663162185959e-05, "loss": 0.4122, "step": 10458 }, { "epoch": 1.4349317417850038, "grad_norm": 1.1875, "learning_rate": 1.0754223800273163e-05, "loss": 0.444, "step": 10459 }, { "epoch": 1.4350689442272073, "grad_norm": 1.234375, "learning_rate": 1.0752784422645074e-05, "loss": 0.4487, "step": 10460 }, { "epoch": 1.4352061466694108, "grad_norm": 1.1875, "learning_rate": 1.0751345029331678e-05, "loss": 0.4197, "step": 10461 }, { "epoch": 1.4353433491116143, "grad_norm": 1.296875, "learning_rate": 1.0749905620362972e-05, "loss": 0.3901, "step": 10462 }, { "epoch": 1.4354805515538176, "grad_norm": 1.1953125, "learning_rate": 1.0748466195768946e-05, "loss": 0.4166, "step": 10463 }, { "epoch": 1.435617753996021, "grad_norm": 1.1875, "learning_rate": 1.0747026755579591e-05, "loss": 0.4505, "step": 10464 }, { "epoch": 1.4357549564382246, "grad_norm": 1.234375, "learning_rate": 1.0745587299824902e-05, "loss": 0.44, "step": 10465 }, { "epoch": 1.435892158880428, "grad_norm": 1.2421875, "learning_rate": 1.074414782853487e-05, "loss": 0.4253, "step": 10466 }, { "epoch": 1.4360293613226316, "grad_norm": 1.28125, "learning_rate": 1.0742708341739491e-05, "loss": 0.4857, "step": 10467 }, { "epoch": 1.436166563764835, "grad_norm": 1.25, "learning_rate": 1.0741268839468757e-05, "loss": 0.4872, "step": 10468 }, { "epoch": 1.4363037662070384, "grad_norm": 1.1796875, "learning_rate": 1.0739829321752666e-05, "loss": 0.4108, "step": 10469 }, { "epoch": 1.4364409686492419, "grad_norm": 1.2578125, "learning_rate": 1.0738389788621205e-05, "loss": 0.4721, "step": 10470 }, { "epoch": 1.4365781710914454, "grad_norm": 1.234375, "learning_rate": 1.0736950240104371e-05, "loss": 0.3978, "step": 10471 }, { "epoch": 1.4367153735336489, "grad_norm": 1.2734375, "learning_rate": 1.0735510676232162e-05, "loss": 0.4237, "step": 10472 }, { "epoch": 1.4368525759758524, "grad_norm": 1.140625, "learning_rate": 1.073407109703457e-05, "loss": 0.4021, "step": 10473 }, { "epoch": 1.4369897784180559, "grad_norm": 1.2578125, "learning_rate": 1.0732631502541594e-05, "loss": 0.4805, "step": 10474 }, { "epoch": 1.4371269808602594, "grad_norm": 1.2265625, "learning_rate": 1.0731191892783228e-05, "loss": 0.4296, "step": 10475 }, { "epoch": 1.4372641833024629, "grad_norm": 1.234375, "learning_rate": 1.0729752267789469e-05, "loss": 0.4397, "step": 10476 }, { "epoch": 1.4374013857446664, "grad_norm": 1.234375, "learning_rate": 1.0728312627590314e-05, "loss": 0.4731, "step": 10477 }, { "epoch": 1.4375385881868699, "grad_norm": 1.1796875, "learning_rate": 1.0726872972215756e-05, "loss": 0.4537, "step": 10478 }, { "epoch": 1.4376757906290731, "grad_norm": 1.1484375, "learning_rate": 1.0725433301695796e-05, "loss": 0.4033, "step": 10479 }, { "epoch": 1.4378129930712766, "grad_norm": 1.2109375, "learning_rate": 1.072399361606043e-05, "loss": 0.4303, "step": 10480 }, { "epoch": 1.4379501955134801, "grad_norm": 1.15625, "learning_rate": 1.0722553915339657e-05, "loss": 0.4347, "step": 10481 }, { "epoch": 1.4380873979556836, "grad_norm": 1.2734375, "learning_rate": 1.0721114199563476e-05, "loss": 0.4691, "step": 10482 }, { "epoch": 1.4382246003978871, "grad_norm": 1.28125, "learning_rate": 1.0719674468761883e-05, "loss": 0.4339, "step": 10483 }, { "epoch": 1.4383618028400906, "grad_norm": 1.171875, "learning_rate": 1.0718234722964877e-05, "loss": 0.4275, "step": 10484 }, { "epoch": 1.438499005282294, "grad_norm": 1.3125, "learning_rate": 1.071679496220246e-05, "loss": 0.4516, "step": 10485 }, { "epoch": 1.4386362077244974, "grad_norm": 1.1875, "learning_rate": 1.071535518650463e-05, "loss": 0.4276, "step": 10486 }, { "epoch": 1.438773410166701, "grad_norm": 1.2109375, "learning_rate": 1.0713915395901384e-05, "loss": 0.4196, "step": 10487 }, { "epoch": 1.4389106126089044, "grad_norm": 1.1640625, "learning_rate": 1.0712475590422725e-05, "loss": 0.3941, "step": 10488 }, { "epoch": 1.439047815051108, "grad_norm": 1.171875, "learning_rate": 1.071103577009865e-05, "loss": 0.4291, "step": 10489 }, { "epoch": 1.4391850174933114, "grad_norm": 1.2578125, "learning_rate": 1.0709595934959166e-05, "loss": 0.4385, "step": 10490 }, { "epoch": 1.439322219935515, "grad_norm": 1.296875, "learning_rate": 1.070815608503427e-05, "loss": 0.4975, "step": 10491 }, { "epoch": 1.4394594223777184, "grad_norm": 1.234375, "learning_rate": 1.070671622035396e-05, "loss": 0.4328, "step": 10492 }, { "epoch": 1.439596624819922, "grad_norm": 1.296875, "learning_rate": 1.0705276340948242e-05, "loss": 0.5031, "step": 10493 }, { "epoch": 1.4397338272621254, "grad_norm": 1.296875, "learning_rate": 1.0703836446847118e-05, "loss": 0.4409, "step": 10494 }, { "epoch": 1.4398710297043287, "grad_norm": 1.265625, "learning_rate": 1.0702396538080587e-05, "loss": 0.4376, "step": 10495 }, { "epoch": 1.4400082321465322, "grad_norm": 1.2890625, "learning_rate": 1.0700956614678656e-05, "loss": 0.4425, "step": 10496 }, { "epoch": 1.4401454345887357, "grad_norm": 1.265625, "learning_rate": 1.0699516676671325e-05, "loss": 0.4289, "step": 10497 }, { "epoch": 1.4402826370309392, "grad_norm": 1.390625, "learning_rate": 1.0698076724088595e-05, "loss": 0.4749, "step": 10498 }, { "epoch": 1.4404198394731427, "grad_norm": 1.3046875, "learning_rate": 1.0696636756960472e-05, "loss": 0.4881, "step": 10499 }, { "epoch": 1.4405570419153462, "grad_norm": 1.203125, "learning_rate": 1.0695196775316961e-05, "loss": 0.4558, "step": 10500 }, { "epoch": 1.4406942443575494, "grad_norm": 1.265625, "learning_rate": 1.0693756779188063e-05, "loss": 0.4447, "step": 10501 }, { "epoch": 1.440831446799753, "grad_norm": 1.2578125, "learning_rate": 1.0692316768603783e-05, "loss": 0.4558, "step": 10502 }, { "epoch": 1.4409686492419564, "grad_norm": 1.3203125, "learning_rate": 1.069087674359413e-05, "loss": 0.5294, "step": 10503 }, { "epoch": 1.44110585168416, "grad_norm": 1.171875, "learning_rate": 1.0689436704189102e-05, "loss": 0.4075, "step": 10504 }, { "epoch": 1.4412430541263634, "grad_norm": 1.0859375, "learning_rate": 1.068799665041871e-05, "loss": 0.366, "step": 10505 }, { "epoch": 1.441380256568567, "grad_norm": 1.234375, "learning_rate": 1.0686556582312954e-05, "loss": 0.4458, "step": 10506 }, { "epoch": 1.4415174590107704, "grad_norm": 1.1328125, "learning_rate": 1.0685116499901846e-05, "loss": 0.3895, "step": 10507 }, { "epoch": 1.441654661452974, "grad_norm": 1.1171875, "learning_rate": 1.0683676403215388e-05, "loss": 0.4223, "step": 10508 }, { "epoch": 1.4417918638951774, "grad_norm": 1.3046875, "learning_rate": 1.0682236292283585e-05, "loss": 0.4994, "step": 10509 }, { "epoch": 1.441929066337381, "grad_norm": 1.28125, "learning_rate": 1.068079616713645e-05, "loss": 0.4185, "step": 10510 }, { "epoch": 1.4420662687795842, "grad_norm": 1.3359375, "learning_rate": 1.0679356027803981e-05, "loss": 0.5175, "step": 10511 }, { "epoch": 1.4422034712217877, "grad_norm": 1.171875, "learning_rate": 1.0677915874316194e-05, "loss": 0.4277, "step": 10512 }, { "epoch": 1.4423406736639912, "grad_norm": 1.28125, "learning_rate": 1.0676475706703091e-05, "loss": 0.4932, "step": 10513 }, { "epoch": 1.4424778761061947, "grad_norm": 1.25, "learning_rate": 1.0675035524994686e-05, "loss": 0.4483, "step": 10514 }, { "epoch": 1.4426150785483982, "grad_norm": 1.1015625, "learning_rate": 1.0673595329220978e-05, "loss": 0.4094, "step": 10515 }, { "epoch": 1.4427522809906017, "grad_norm": 1.203125, "learning_rate": 1.0672155119411982e-05, "loss": 0.4096, "step": 10516 }, { "epoch": 1.442889483432805, "grad_norm": 1.1328125, "learning_rate": 1.0670714895597704e-05, "loss": 0.3861, "step": 10517 }, { "epoch": 1.4430266858750085, "grad_norm": 1.234375, "learning_rate": 1.0669274657808157e-05, "loss": 0.456, "step": 10518 }, { "epoch": 1.443163888317212, "grad_norm": 1.1328125, "learning_rate": 1.0667834406073346e-05, "loss": 0.4297, "step": 10519 }, { "epoch": 1.4433010907594155, "grad_norm": 1.21875, "learning_rate": 1.0666394140423282e-05, "loss": 0.4573, "step": 10520 }, { "epoch": 1.443438293201619, "grad_norm": 1.2421875, "learning_rate": 1.0664953860887974e-05, "loss": 0.4487, "step": 10521 }, { "epoch": 1.4435754956438225, "grad_norm": 1.1640625, "learning_rate": 1.0663513567497437e-05, "loss": 0.4032, "step": 10522 }, { "epoch": 1.443712698086026, "grad_norm": 1.2578125, "learning_rate": 1.0662073260281677e-05, "loss": 0.4579, "step": 10523 }, { "epoch": 1.4438499005282295, "grad_norm": 1.2734375, "learning_rate": 1.0660632939270703e-05, "loss": 0.4892, "step": 10524 }, { "epoch": 1.443987102970433, "grad_norm": 1.375, "learning_rate": 1.065919260449453e-05, "loss": 0.5067, "step": 10525 }, { "epoch": 1.4441243054126365, "grad_norm": 1.15625, "learning_rate": 1.0657752255983169e-05, "loss": 0.4278, "step": 10526 }, { "epoch": 1.4442615078548398, "grad_norm": 1.265625, "learning_rate": 1.0656311893766628e-05, "loss": 0.4714, "step": 10527 }, { "epoch": 1.4443987102970433, "grad_norm": 1.2734375, "learning_rate": 1.0654871517874925e-05, "loss": 0.4578, "step": 10528 }, { "epoch": 1.4445359127392468, "grad_norm": 1.265625, "learning_rate": 1.0653431128338068e-05, "loss": 0.4603, "step": 10529 }, { "epoch": 1.4446731151814503, "grad_norm": 1.21875, "learning_rate": 1.065199072518607e-05, "loss": 0.4424, "step": 10530 }, { "epoch": 1.4448103176236538, "grad_norm": 1.2890625, "learning_rate": 1.0650550308448946e-05, "loss": 0.4654, "step": 10531 }, { "epoch": 1.4449475200658572, "grad_norm": 1.25, "learning_rate": 1.0649109878156704e-05, "loss": 0.3998, "step": 10532 }, { "epoch": 1.4450847225080605, "grad_norm": 1.4140625, "learning_rate": 1.0647669434339362e-05, "loss": 0.5003, "step": 10533 }, { "epoch": 1.445221924950264, "grad_norm": 1.1796875, "learning_rate": 1.0646228977026934e-05, "loss": 0.3725, "step": 10534 }, { "epoch": 1.4453591273924675, "grad_norm": 1.203125, "learning_rate": 1.0644788506249428e-05, "loss": 0.4158, "step": 10535 }, { "epoch": 1.445496329834671, "grad_norm": 1.2265625, "learning_rate": 1.0643348022036866e-05, "loss": 0.4532, "step": 10536 }, { "epoch": 1.4456335322768745, "grad_norm": 1.3515625, "learning_rate": 1.0641907524419258e-05, "loss": 0.5244, "step": 10537 }, { "epoch": 1.445770734719078, "grad_norm": 1.15625, "learning_rate": 1.0640467013426618e-05, "loss": 0.4642, "step": 10538 }, { "epoch": 1.4459079371612815, "grad_norm": 1.359375, "learning_rate": 1.0639026489088966e-05, "loss": 0.5156, "step": 10539 }, { "epoch": 1.446045139603485, "grad_norm": 1.1953125, "learning_rate": 1.0637585951436313e-05, "loss": 0.4122, "step": 10540 }, { "epoch": 1.4461823420456885, "grad_norm": 1.234375, "learning_rate": 1.0636145400498675e-05, "loss": 0.4698, "step": 10541 }, { "epoch": 1.446319544487892, "grad_norm": 1.2109375, "learning_rate": 1.0634704836306067e-05, "loss": 0.4472, "step": 10542 }, { "epoch": 1.4464567469300953, "grad_norm": 1.0390625, "learning_rate": 1.0633264258888509e-05, "loss": 0.3601, "step": 10543 }, { "epoch": 1.4465939493722988, "grad_norm": 1.2890625, "learning_rate": 1.0631823668276014e-05, "loss": 0.4773, "step": 10544 }, { "epoch": 1.4467311518145023, "grad_norm": 1.21875, "learning_rate": 1.06303830644986e-05, "loss": 0.4532, "step": 10545 }, { "epoch": 1.4468683542567058, "grad_norm": 1.2734375, "learning_rate": 1.0628942447586281e-05, "loss": 0.4017, "step": 10546 }, { "epoch": 1.4470055566989093, "grad_norm": 1.265625, "learning_rate": 1.062750181756908e-05, "loss": 0.447, "step": 10547 }, { "epoch": 1.4471427591411128, "grad_norm": 1.3046875, "learning_rate": 1.0626061174477011e-05, "loss": 0.5153, "step": 10548 }, { "epoch": 1.447279961583316, "grad_norm": 1.3046875, "learning_rate": 1.0624620518340092e-05, "loss": 0.499, "step": 10549 }, { "epoch": 1.4474171640255196, "grad_norm": 1.1328125, "learning_rate": 1.0623179849188342e-05, "loss": 0.3537, "step": 10550 }, { "epoch": 1.447554366467723, "grad_norm": 1.234375, "learning_rate": 1.0621739167051779e-05, "loss": 0.4289, "step": 10551 }, { "epoch": 1.4476915689099266, "grad_norm": 1.375, "learning_rate": 1.062029847196042e-05, "loss": 0.4856, "step": 10552 }, { "epoch": 1.44782877135213, "grad_norm": 1.2734375, "learning_rate": 1.0618857763944285e-05, "loss": 0.397, "step": 10553 }, { "epoch": 1.4479659737943336, "grad_norm": 1.2734375, "learning_rate": 1.0617417043033393e-05, "loss": 0.4348, "step": 10554 }, { "epoch": 1.448103176236537, "grad_norm": 1.2265625, "learning_rate": 1.0615976309257766e-05, "loss": 0.4754, "step": 10555 }, { "epoch": 1.4482403786787406, "grad_norm": 1.234375, "learning_rate": 1.0614535562647416e-05, "loss": 0.4579, "step": 10556 }, { "epoch": 1.448377581120944, "grad_norm": 1.1953125, "learning_rate": 1.0613094803232372e-05, "loss": 0.4318, "step": 10557 }, { "epoch": 1.4485147835631476, "grad_norm": 1.15625, "learning_rate": 1.061165403104265e-05, "loss": 0.4373, "step": 10558 }, { "epoch": 1.4486519860053508, "grad_norm": 1.3671875, "learning_rate": 1.0610213246108273e-05, "loss": 0.5084, "step": 10559 }, { "epoch": 1.4487891884475543, "grad_norm": 1.2421875, "learning_rate": 1.0608772448459259e-05, "loss": 0.4209, "step": 10560 }, { "epoch": 1.4489263908897578, "grad_norm": 1.2578125, "learning_rate": 1.060733163812563e-05, "loss": 0.4614, "step": 10561 }, { "epoch": 1.4490635933319613, "grad_norm": 1.3125, "learning_rate": 1.0605890815137402e-05, "loss": 0.4652, "step": 10562 }, { "epoch": 1.4492007957741648, "grad_norm": 1.234375, "learning_rate": 1.0604449979524606e-05, "loss": 0.4569, "step": 10563 }, { "epoch": 1.4493379982163683, "grad_norm": 1.203125, "learning_rate": 1.0603009131317259e-05, "loss": 0.3619, "step": 10564 }, { "epoch": 1.4494752006585716, "grad_norm": 1.3125, "learning_rate": 1.0601568270545382e-05, "loss": 0.4522, "step": 10565 }, { "epoch": 1.449612403100775, "grad_norm": 1.25, "learning_rate": 1.0600127397239e-05, "loss": 0.4337, "step": 10566 }, { "epoch": 1.4497496055429786, "grad_norm": 1.109375, "learning_rate": 1.0598686511428135e-05, "loss": 0.3859, "step": 10567 }, { "epoch": 1.449886807985182, "grad_norm": 1.1171875, "learning_rate": 1.0597245613142807e-05, "loss": 0.367, "step": 10568 }, { "epoch": 1.4500240104273856, "grad_norm": 1.203125, "learning_rate": 1.0595804702413044e-05, "loss": 0.4068, "step": 10569 }, { "epoch": 1.450161212869589, "grad_norm": 1.2421875, "learning_rate": 1.0594363779268863e-05, "loss": 0.4815, "step": 10570 }, { "epoch": 1.4502984153117926, "grad_norm": 1.2421875, "learning_rate": 1.0592922843740295e-05, "loss": 0.4172, "step": 10571 }, { "epoch": 1.450435617753996, "grad_norm": 1.140625, "learning_rate": 1.0591481895857358e-05, "loss": 0.3932, "step": 10572 }, { "epoch": 1.4505728201961996, "grad_norm": 1.21875, "learning_rate": 1.059004093565008e-05, "loss": 0.4407, "step": 10573 }, { "epoch": 1.450710022638403, "grad_norm": 1.3046875, "learning_rate": 1.058859996314848e-05, "loss": 0.4224, "step": 10574 }, { "epoch": 1.4508472250806064, "grad_norm": 1.171875, "learning_rate": 1.058715897838259e-05, "loss": 0.4301, "step": 10575 }, { "epoch": 1.4509844275228099, "grad_norm": 1.21875, "learning_rate": 1.0585717981382431e-05, "loss": 0.4253, "step": 10576 }, { "epoch": 1.4511216299650134, "grad_norm": 1.2890625, "learning_rate": 1.0584276972178027e-05, "loss": 0.4579, "step": 10577 }, { "epoch": 1.4512588324072169, "grad_norm": 1.2578125, "learning_rate": 1.0582835950799407e-05, "loss": 0.4476, "step": 10578 }, { "epoch": 1.4513960348494204, "grad_norm": 1.2421875, "learning_rate": 1.058139491727659e-05, "loss": 0.493, "step": 10579 }, { "epoch": 1.4515332372916239, "grad_norm": 1.265625, "learning_rate": 1.057995387163961e-05, "loss": 0.433, "step": 10580 }, { "epoch": 1.4516704397338271, "grad_norm": 1.1328125, "learning_rate": 1.0578512813918489e-05, "loss": 0.3791, "step": 10581 }, { "epoch": 1.4518076421760306, "grad_norm": 1.1875, "learning_rate": 1.0577071744143252e-05, "loss": 0.4473, "step": 10582 }, { "epoch": 1.4519448446182341, "grad_norm": 1.2265625, "learning_rate": 1.057563066234393e-05, "loss": 0.4611, "step": 10583 }, { "epoch": 1.4520820470604376, "grad_norm": 1.2890625, "learning_rate": 1.0574189568550548e-05, "loss": 0.4564, "step": 10584 }, { "epoch": 1.4522192495026411, "grad_norm": 1.265625, "learning_rate": 1.0572748462793135e-05, "loss": 0.4145, "step": 10585 }, { "epoch": 1.4523564519448446, "grad_norm": 1.8515625, "learning_rate": 1.0571307345101711e-05, "loss": 0.5768, "step": 10586 }, { "epoch": 1.4524936543870481, "grad_norm": 1.2421875, "learning_rate": 1.0569866215506312e-05, "loss": 0.4372, "step": 10587 }, { "epoch": 1.4526308568292516, "grad_norm": 1.2265625, "learning_rate": 1.0568425074036962e-05, "loss": 0.3997, "step": 10588 }, { "epoch": 1.4527680592714551, "grad_norm": 1.3125, "learning_rate": 1.056698392072369e-05, "loss": 0.4921, "step": 10589 }, { "epoch": 1.4529052617136586, "grad_norm": 1.359375, "learning_rate": 1.0565542755596526e-05, "loss": 0.4967, "step": 10590 }, { "epoch": 1.453042464155862, "grad_norm": 1.234375, "learning_rate": 1.0564101578685495e-05, "loss": 0.3945, "step": 10591 }, { "epoch": 1.4531796665980654, "grad_norm": 1.3359375, "learning_rate": 1.0562660390020627e-05, "loss": 0.5209, "step": 10592 }, { "epoch": 1.453316869040269, "grad_norm": 1.2890625, "learning_rate": 1.0561219189631958e-05, "loss": 0.4989, "step": 10593 }, { "epoch": 1.4534540714824724, "grad_norm": 1.234375, "learning_rate": 1.0559777977549505e-05, "loss": 0.4658, "step": 10594 }, { "epoch": 1.453591273924676, "grad_norm": 1.1953125, "learning_rate": 1.0558336753803307e-05, "loss": 0.4055, "step": 10595 }, { "epoch": 1.4537284763668794, "grad_norm": 1.3046875, "learning_rate": 1.0556895518423392e-05, "loss": 0.4738, "step": 10596 }, { "epoch": 1.4538656788090827, "grad_norm": 1.2890625, "learning_rate": 1.0555454271439787e-05, "loss": 0.4804, "step": 10597 }, { "epoch": 1.4540028812512862, "grad_norm": 1.28125, "learning_rate": 1.0554013012882527e-05, "loss": 0.4424, "step": 10598 }, { "epoch": 1.4541400836934897, "grad_norm": 1.28125, "learning_rate": 1.0552571742781638e-05, "loss": 0.4498, "step": 10599 }, { "epoch": 1.4542772861356932, "grad_norm": 1.234375, "learning_rate": 1.0551130461167154e-05, "loss": 0.4327, "step": 10600 }, { "epoch": 1.4544144885778967, "grad_norm": 1.265625, "learning_rate": 1.0549689168069106e-05, "loss": 0.4481, "step": 10601 }, { "epoch": 1.4545516910201002, "grad_norm": 1.28125, "learning_rate": 1.0548247863517525e-05, "loss": 0.4801, "step": 10602 }, { "epoch": 1.4546888934623037, "grad_norm": 1.2578125, "learning_rate": 1.0546806547542441e-05, "loss": 0.4743, "step": 10603 }, { "epoch": 1.4548260959045072, "grad_norm": 1.3515625, "learning_rate": 1.0545365220173887e-05, "loss": 0.502, "step": 10604 }, { "epoch": 1.4549632983467107, "grad_norm": 1.3046875, "learning_rate": 1.0543923881441896e-05, "loss": 0.4759, "step": 10605 }, { "epoch": 1.4551005007889142, "grad_norm": 1.140625, "learning_rate": 1.0542482531376499e-05, "loss": 0.3857, "step": 10606 }, { "epoch": 1.4552377032311175, "grad_norm": 1.328125, "learning_rate": 1.0541041170007729e-05, "loss": 0.488, "step": 10607 }, { "epoch": 1.455374905673321, "grad_norm": 1.328125, "learning_rate": 1.0539599797365616e-05, "loss": 0.4573, "step": 10608 }, { "epoch": 1.4555121081155245, "grad_norm": 1.1953125, "learning_rate": 1.0538158413480197e-05, "loss": 0.4838, "step": 10609 }, { "epoch": 1.455649310557728, "grad_norm": 1.1796875, "learning_rate": 1.0536717018381504e-05, "loss": 0.4317, "step": 10610 }, { "epoch": 1.4557865129999314, "grad_norm": 1.171875, "learning_rate": 1.053527561209957e-05, "loss": 0.436, "step": 10611 }, { "epoch": 1.455923715442135, "grad_norm": 1.4375, "learning_rate": 1.053383419466443e-05, "loss": 0.5048, "step": 10612 }, { "epoch": 1.4560609178843382, "grad_norm": 1.2265625, "learning_rate": 1.0532392766106114e-05, "loss": 0.4337, "step": 10613 }, { "epoch": 1.4561981203265417, "grad_norm": 1.15625, "learning_rate": 1.0530951326454662e-05, "loss": 0.4456, "step": 10614 }, { "epoch": 1.4563353227687452, "grad_norm": 1.2890625, "learning_rate": 1.0529509875740103e-05, "loss": 0.49, "step": 10615 }, { "epoch": 1.4564725252109487, "grad_norm": 1.1875, "learning_rate": 1.0528068413992477e-05, "loss": 0.4368, "step": 10616 }, { "epoch": 1.4566097276531522, "grad_norm": 1.3671875, "learning_rate": 1.0526626941241811e-05, "loss": 0.4787, "step": 10617 }, { "epoch": 1.4567469300953557, "grad_norm": 1.0078125, "learning_rate": 1.0525185457518148e-05, "loss": 0.3326, "step": 10618 }, { "epoch": 1.4568841325375592, "grad_norm": 1.140625, "learning_rate": 1.0523743962851521e-05, "loss": 0.4436, "step": 10619 }, { "epoch": 1.4570213349797627, "grad_norm": 1.296875, "learning_rate": 1.0522302457271963e-05, "loss": 0.4903, "step": 10620 }, { "epoch": 1.4571585374219662, "grad_norm": 1.2578125, "learning_rate": 1.0520860940809512e-05, "loss": 0.359, "step": 10621 }, { "epoch": 1.4572957398641697, "grad_norm": 1.1796875, "learning_rate": 1.0519419413494205e-05, "loss": 0.4247, "step": 10622 }, { "epoch": 1.457432942306373, "grad_norm": 1.359375, "learning_rate": 1.0517977875356074e-05, "loss": 0.5157, "step": 10623 }, { "epoch": 1.4575701447485765, "grad_norm": 1.2265625, "learning_rate": 1.051653632642516e-05, "loss": 0.4528, "step": 10624 }, { "epoch": 1.45770734719078, "grad_norm": 1.21875, "learning_rate": 1.0515094766731497e-05, "loss": 0.4561, "step": 10625 }, { "epoch": 1.4578445496329835, "grad_norm": 1.3046875, "learning_rate": 1.0513653196305122e-05, "loss": 0.4567, "step": 10626 }, { "epoch": 1.457981752075187, "grad_norm": 1.3125, "learning_rate": 1.0512211615176073e-05, "loss": 0.5131, "step": 10627 }, { "epoch": 1.4581189545173905, "grad_norm": 1.140625, "learning_rate": 1.0510770023374388e-05, "loss": 0.4211, "step": 10628 }, { "epoch": 1.4582561569595938, "grad_norm": 1.203125, "learning_rate": 1.0509328420930105e-05, "loss": 0.4529, "step": 10629 }, { "epoch": 1.4583933594017973, "grad_norm": 1.25, "learning_rate": 1.050788680787326e-05, "loss": 0.4865, "step": 10630 }, { "epoch": 1.4585305618440008, "grad_norm": 1.2734375, "learning_rate": 1.050644518423389e-05, "loss": 0.4286, "step": 10631 }, { "epoch": 1.4586677642862043, "grad_norm": 1.296875, "learning_rate": 1.0505003550042034e-05, "loss": 0.4788, "step": 10632 }, { "epoch": 1.4588049667284078, "grad_norm": 1.234375, "learning_rate": 1.0503561905327732e-05, "loss": 0.4807, "step": 10633 }, { "epoch": 1.4589421691706113, "grad_norm": 1.21875, "learning_rate": 1.0502120250121023e-05, "loss": 0.4238, "step": 10634 }, { "epoch": 1.4590793716128148, "grad_norm": 1.09375, "learning_rate": 1.0500678584451945e-05, "loss": 0.3349, "step": 10635 }, { "epoch": 1.4592165740550183, "grad_norm": 1.1015625, "learning_rate": 1.0499236908350534e-05, "loss": 0.3729, "step": 10636 }, { "epoch": 1.4593537764972218, "grad_norm": 1.1875, "learning_rate": 1.0497795221846834e-05, "loss": 0.417, "step": 10637 }, { "epoch": 1.4594909789394253, "grad_norm": 1.1953125, "learning_rate": 1.0496353524970883e-05, "loss": 0.4351, "step": 10638 }, { "epoch": 1.4596281813816285, "grad_norm": 1.25, "learning_rate": 1.049491181775272e-05, "loss": 0.4353, "step": 10639 }, { "epoch": 1.459765383823832, "grad_norm": 1.3046875, "learning_rate": 1.0493470100222385e-05, "loss": 0.4293, "step": 10640 }, { "epoch": 1.4599025862660355, "grad_norm": 1.3359375, "learning_rate": 1.049202837240992e-05, "loss": 0.4802, "step": 10641 }, { "epoch": 1.460039788708239, "grad_norm": 1.1875, "learning_rate": 1.049058663434536e-05, "loss": 0.3542, "step": 10642 }, { "epoch": 1.4601769911504425, "grad_norm": 1.296875, "learning_rate": 1.0489144886058753e-05, "loss": 0.4833, "step": 10643 }, { "epoch": 1.460314193592646, "grad_norm": 1.296875, "learning_rate": 1.0487703127580136e-05, "loss": 0.3974, "step": 10644 }, { "epoch": 1.4604513960348493, "grad_norm": 1.2265625, "learning_rate": 1.0486261358939547e-05, "loss": 0.4366, "step": 10645 }, { "epoch": 1.4605885984770528, "grad_norm": 1.1953125, "learning_rate": 1.0484819580167033e-05, "loss": 0.4268, "step": 10646 }, { "epoch": 1.4607258009192563, "grad_norm": 1.1171875, "learning_rate": 1.0483377791292635e-05, "loss": 0.4187, "step": 10647 }, { "epoch": 1.4608630033614598, "grad_norm": 1.1484375, "learning_rate": 1.0481935992346392e-05, "loss": 0.4094, "step": 10648 }, { "epoch": 1.4610002058036633, "grad_norm": 1.28125, "learning_rate": 1.0480494183358345e-05, "loss": 0.507, "step": 10649 }, { "epoch": 1.4611374082458668, "grad_norm": 1.21875, "learning_rate": 1.0479052364358537e-05, "loss": 0.4622, "step": 10650 }, { "epoch": 1.4612746106880703, "grad_norm": 1.359375, "learning_rate": 1.0477610535377012e-05, "loss": 0.5015, "step": 10651 }, { "epoch": 1.4614118131302738, "grad_norm": 1.328125, "learning_rate": 1.0476168696443812e-05, "loss": 0.4346, "step": 10652 }, { "epoch": 1.4615490155724773, "grad_norm": 1.171875, "learning_rate": 1.0474726847588978e-05, "loss": 0.3978, "step": 10653 }, { "epoch": 1.4616862180146808, "grad_norm": 1.140625, "learning_rate": 1.0473284988842556e-05, "loss": 0.3991, "step": 10654 }, { "epoch": 1.461823420456884, "grad_norm": 1.2890625, "learning_rate": 1.0471843120234583e-05, "loss": 0.4854, "step": 10655 }, { "epoch": 1.4619606228990876, "grad_norm": 1.265625, "learning_rate": 1.0470401241795114e-05, "loss": 0.4731, "step": 10656 }, { "epoch": 1.462097825341291, "grad_norm": 1.140625, "learning_rate": 1.046895935355418e-05, "loss": 0.3767, "step": 10657 }, { "epoch": 1.4622350277834946, "grad_norm": 1.1484375, "learning_rate": 1.046751745554183e-05, "loss": 0.4054, "step": 10658 }, { "epoch": 1.462372230225698, "grad_norm": 1.234375, "learning_rate": 1.0466075547788107e-05, "loss": 0.4015, "step": 10659 }, { "epoch": 1.4625094326679016, "grad_norm": 1.296875, "learning_rate": 1.0464633630323057e-05, "loss": 0.4738, "step": 10660 }, { "epoch": 1.4626466351101048, "grad_norm": 1.265625, "learning_rate": 1.0463191703176723e-05, "loss": 0.4603, "step": 10661 }, { "epoch": 1.4627838375523083, "grad_norm": 1.2578125, "learning_rate": 1.046174976637915e-05, "loss": 0.4572, "step": 10662 }, { "epoch": 1.4629210399945118, "grad_norm": 1.359375, "learning_rate": 1.0460307819960383e-05, "loss": 0.4947, "step": 10663 }, { "epoch": 1.4630582424367153, "grad_norm": 1.2109375, "learning_rate": 1.0458865863950467e-05, "loss": 0.4098, "step": 10664 }, { "epoch": 1.4631954448789188, "grad_norm": 1.234375, "learning_rate": 1.0457423898379445e-05, "loss": 0.4392, "step": 10665 }, { "epoch": 1.4633326473211223, "grad_norm": 1.3125, "learning_rate": 1.0455981923277365e-05, "loss": 0.4887, "step": 10666 }, { "epoch": 1.4634698497633258, "grad_norm": 1.28125, "learning_rate": 1.0454539938674273e-05, "loss": 0.4915, "step": 10667 }, { "epoch": 1.4636070522055293, "grad_norm": 1.2890625, "learning_rate": 1.045309794460021e-05, "loss": 0.435, "step": 10668 }, { "epoch": 1.4637442546477328, "grad_norm": 1.3046875, "learning_rate": 1.0451655941085226e-05, "loss": 0.4923, "step": 10669 }, { "epoch": 1.4638814570899363, "grad_norm": 1.3046875, "learning_rate": 1.0450213928159365e-05, "loss": 0.5138, "step": 10670 }, { "epoch": 1.4640186595321396, "grad_norm": 1.296875, "learning_rate": 1.0448771905852677e-05, "loss": 0.4438, "step": 10671 }, { "epoch": 1.464155861974343, "grad_norm": 1.1875, "learning_rate": 1.0447329874195205e-05, "loss": 0.4513, "step": 10672 }, { "epoch": 1.4642930644165466, "grad_norm": 1.296875, "learning_rate": 1.0445887833216997e-05, "loss": 0.4736, "step": 10673 }, { "epoch": 1.46443026685875, "grad_norm": 1.34375, "learning_rate": 1.0444445782948099e-05, "loss": 0.5098, "step": 10674 }, { "epoch": 1.4645674693009536, "grad_norm": 1.2265625, "learning_rate": 1.0443003723418563e-05, "loss": 0.5247, "step": 10675 }, { "epoch": 1.464704671743157, "grad_norm": 1.125, "learning_rate": 1.0441561654658428e-05, "loss": 0.3756, "step": 10676 }, { "epoch": 1.4648418741853604, "grad_norm": 1.25, "learning_rate": 1.0440119576697748e-05, "loss": 0.4544, "step": 10677 }, { "epoch": 1.4649790766275639, "grad_norm": 1.265625, "learning_rate": 1.0438677489566567e-05, "loss": 0.4733, "step": 10678 }, { "epoch": 1.4651162790697674, "grad_norm": 1.2578125, "learning_rate": 1.0437235393294934e-05, "loss": 0.4539, "step": 10679 }, { "epoch": 1.4652534815119709, "grad_norm": 1.1640625, "learning_rate": 1.0435793287912896e-05, "loss": 0.4223, "step": 10680 }, { "epoch": 1.4653906839541744, "grad_norm": 1.25, "learning_rate": 1.0434351173450503e-05, "loss": 0.431, "step": 10681 }, { "epoch": 1.4655278863963779, "grad_norm": 1.1953125, "learning_rate": 1.0432909049937804e-05, "loss": 0.4238, "step": 10682 }, { "epoch": 1.4656650888385814, "grad_norm": 1.171875, "learning_rate": 1.0431466917404848e-05, "loss": 0.4046, "step": 10683 }, { "epoch": 1.4658022912807849, "grad_norm": 1.0546875, "learning_rate": 1.0430024775881684e-05, "loss": 0.3524, "step": 10684 }, { "epoch": 1.4659394937229884, "grad_norm": 1.1484375, "learning_rate": 1.0428582625398357e-05, "loss": 0.4356, "step": 10685 }, { "epoch": 1.4660766961651919, "grad_norm": 1.328125, "learning_rate": 1.0427140465984917e-05, "loss": 0.5304, "step": 10686 }, { "epoch": 1.4662138986073951, "grad_norm": 1.265625, "learning_rate": 1.0425698297671419e-05, "loss": 0.43, "step": 10687 }, { "epoch": 1.4663511010495986, "grad_norm": 1.234375, "learning_rate": 1.0424256120487905e-05, "loss": 0.4974, "step": 10688 }, { "epoch": 1.4664883034918021, "grad_norm": 1.1953125, "learning_rate": 1.042281393446443e-05, "loss": 0.3958, "step": 10689 }, { "epoch": 1.4666255059340056, "grad_norm": 1.125, "learning_rate": 1.0421371739631044e-05, "loss": 0.4118, "step": 10690 }, { "epoch": 1.4667627083762091, "grad_norm": 1.2109375, "learning_rate": 1.0419929536017791e-05, "loss": 0.4244, "step": 10691 }, { "epoch": 1.4668999108184126, "grad_norm": 1.15625, "learning_rate": 1.0418487323654731e-05, "loss": 0.435, "step": 10692 }, { "epoch": 1.467037113260616, "grad_norm": 1.3203125, "learning_rate": 1.0417045102571907e-05, "loss": 0.4462, "step": 10693 }, { "epoch": 1.4671743157028194, "grad_norm": 1.2890625, "learning_rate": 1.041560287279937e-05, "loss": 0.4632, "step": 10694 }, { "epoch": 1.467311518145023, "grad_norm": 1.25, "learning_rate": 1.0414160634367177e-05, "loss": 0.444, "step": 10695 }, { "epoch": 1.4674487205872264, "grad_norm": 1.1640625, "learning_rate": 1.0412718387305372e-05, "loss": 0.4235, "step": 10696 }, { "epoch": 1.46758592302943, "grad_norm": 1.328125, "learning_rate": 1.0411276131644008e-05, "loss": 0.4915, "step": 10697 }, { "epoch": 1.4677231254716334, "grad_norm": 1.234375, "learning_rate": 1.040983386741314e-05, "loss": 0.4629, "step": 10698 }, { "epoch": 1.467860327913837, "grad_norm": 1.2421875, "learning_rate": 1.0408391594642815e-05, "loss": 0.4588, "step": 10699 }, { "epoch": 1.4679975303560404, "grad_norm": 1.2421875, "learning_rate": 1.0406949313363089e-05, "loss": 0.4583, "step": 10700 }, { "epoch": 1.468134732798244, "grad_norm": 1.2265625, "learning_rate": 1.0405507023604011e-05, "loss": 0.4688, "step": 10701 }, { "epoch": 1.4682719352404474, "grad_norm": 1.3046875, "learning_rate": 1.0404064725395632e-05, "loss": 0.4965, "step": 10702 }, { "epoch": 1.4684091376826507, "grad_norm": 1.234375, "learning_rate": 1.0402622418768006e-05, "loss": 0.4512, "step": 10703 }, { "epoch": 1.4685463401248542, "grad_norm": 1.2578125, "learning_rate": 1.0401180103751187e-05, "loss": 0.3848, "step": 10704 }, { "epoch": 1.4686835425670577, "grad_norm": 1.1953125, "learning_rate": 1.0399737780375228e-05, "loss": 0.4392, "step": 10705 }, { "epoch": 1.4688207450092612, "grad_norm": 1.1484375, "learning_rate": 1.0398295448670175e-05, "loss": 0.438, "step": 10706 }, { "epoch": 1.4689579474514647, "grad_norm": 1.1328125, "learning_rate": 1.0396853108666089e-05, "loss": 0.4085, "step": 10707 }, { "epoch": 1.4690951498936682, "grad_norm": 1.1953125, "learning_rate": 1.039541076039302e-05, "loss": 0.4295, "step": 10708 }, { "epoch": 1.4692323523358715, "grad_norm": 1.3203125, "learning_rate": 1.0393968403881023e-05, "loss": 0.4947, "step": 10709 }, { "epoch": 1.469369554778075, "grad_norm": 1.2578125, "learning_rate": 1.0392526039160146e-05, "loss": 0.4592, "step": 10710 }, { "epoch": 1.4695067572202785, "grad_norm": 1.296875, "learning_rate": 1.039108366626045e-05, "loss": 0.4763, "step": 10711 }, { "epoch": 1.469643959662482, "grad_norm": 1.2265625, "learning_rate": 1.0389641285211982e-05, "loss": 0.4478, "step": 10712 }, { "epoch": 1.4697811621046855, "grad_norm": 1.21875, "learning_rate": 1.03881988960448e-05, "loss": 0.4407, "step": 10713 }, { "epoch": 1.469918364546889, "grad_norm": 1.2578125, "learning_rate": 1.038675649878896e-05, "loss": 0.4538, "step": 10714 }, { "epoch": 1.4700555669890925, "grad_norm": 1.2109375, "learning_rate": 1.0385314093474513e-05, "loss": 0.4585, "step": 10715 }, { "epoch": 1.470192769431296, "grad_norm": 1.2265625, "learning_rate": 1.0383871680131512e-05, "loss": 0.4687, "step": 10716 }, { "epoch": 1.4703299718734995, "grad_norm": 1.2265625, "learning_rate": 1.0382429258790017e-05, "loss": 0.425, "step": 10717 }, { "epoch": 1.470467174315703, "grad_norm": 1.40625, "learning_rate": 1.038098682948008e-05, "loss": 0.5048, "step": 10718 }, { "epoch": 1.4706043767579062, "grad_norm": 1.1484375, "learning_rate": 1.0379544392231755e-05, "loss": 0.4104, "step": 10719 }, { "epoch": 1.4707415792001097, "grad_norm": 1.28125, "learning_rate": 1.0378101947075096e-05, "loss": 0.4787, "step": 10720 }, { "epoch": 1.4708787816423132, "grad_norm": 1.09375, "learning_rate": 1.0376659494040162e-05, "loss": 0.3801, "step": 10721 }, { "epoch": 1.4710159840845167, "grad_norm": 1.28125, "learning_rate": 1.037521703315701e-05, "loss": 0.4756, "step": 10722 }, { "epoch": 1.4711531865267202, "grad_norm": 1.1875, "learning_rate": 1.037377456445569e-05, "loss": 0.4297, "step": 10723 }, { "epoch": 1.4712903889689237, "grad_norm": 1.1875, "learning_rate": 1.0372332087966259e-05, "loss": 0.4198, "step": 10724 }, { "epoch": 1.471427591411127, "grad_norm": 1.28125, "learning_rate": 1.0370889603718775e-05, "loss": 0.4198, "step": 10725 }, { "epoch": 1.4715647938533305, "grad_norm": 1.1875, "learning_rate": 1.0369447111743293e-05, "loss": 0.4299, "step": 10726 }, { "epoch": 1.471701996295534, "grad_norm": 1.3203125, "learning_rate": 1.0368004612069872e-05, "loss": 0.4718, "step": 10727 }, { "epoch": 1.4718391987377375, "grad_norm": 1.234375, "learning_rate": 1.0366562104728566e-05, "loss": 0.4104, "step": 10728 }, { "epoch": 1.471976401179941, "grad_norm": 1.140625, "learning_rate": 1.0365119589749433e-05, "loss": 0.3797, "step": 10729 }, { "epoch": 1.4721136036221445, "grad_norm": 1.2734375, "learning_rate": 1.0363677067162526e-05, "loss": 0.4844, "step": 10730 }, { "epoch": 1.472250806064348, "grad_norm": 1.28125, "learning_rate": 1.0362234536997906e-05, "loss": 0.465, "step": 10731 }, { "epoch": 1.4723880085065515, "grad_norm": 1.2421875, "learning_rate": 1.0360791999285627e-05, "loss": 0.4379, "step": 10732 }, { "epoch": 1.472525210948755, "grad_norm": 1.328125, "learning_rate": 1.035934945405575e-05, "loss": 0.49, "step": 10733 }, { "epoch": 1.4726624133909585, "grad_norm": 1.234375, "learning_rate": 1.0357906901338331e-05, "loss": 0.4625, "step": 10734 }, { "epoch": 1.4727996158331618, "grad_norm": 1.2421875, "learning_rate": 1.0356464341163424e-05, "loss": 0.4477, "step": 10735 }, { "epoch": 1.4729368182753653, "grad_norm": 1.28125, "learning_rate": 1.0355021773561092e-05, "loss": 0.4738, "step": 10736 }, { "epoch": 1.4730740207175688, "grad_norm": 1.0078125, "learning_rate": 1.0353579198561392e-05, "loss": 0.3055, "step": 10737 }, { "epoch": 1.4732112231597723, "grad_norm": 1.203125, "learning_rate": 1.0352136616194379e-05, "loss": 0.4007, "step": 10738 }, { "epoch": 1.4733484256019758, "grad_norm": 1.25, "learning_rate": 1.0350694026490113e-05, "loss": 0.4492, "step": 10739 }, { "epoch": 1.4734856280441793, "grad_norm": 1.2421875, "learning_rate": 1.0349251429478652e-05, "loss": 0.4588, "step": 10740 }, { "epoch": 1.4736228304863825, "grad_norm": 1.375, "learning_rate": 1.0347808825190058e-05, "loss": 0.4528, "step": 10741 }, { "epoch": 1.473760032928586, "grad_norm": 1.1953125, "learning_rate": 1.0346366213654381e-05, "loss": 0.4128, "step": 10742 }, { "epoch": 1.4738972353707895, "grad_norm": 1.1875, "learning_rate": 1.034492359490169e-05, "loss": 0.4303, "step": 10743 }, { "epoch": 1.474034437812993, "grad_norm": 1.2109375, "learning_rate": 1.0343480968962036e-05, "loss": 0.4208, "step": 10744 }, { "epoch": 1.4741716402551965, "grad_norm": 1.390625, "learning_rate": 1.0342038335865483e-05, "loss": 0.4869, "step": 10745 }, { "epoch": 1.4743088426974, "grad_norm": 1.1953125, "learning_rate": 1.034059569564209e-05, "loss": 0.4242, "step": 10746 }, { "epoch": 1.4744460451396035, "grad_norm": 1.25, "learning_rate": 1.0339153048321916e-05, "loss": 0.4703, "step": 10747 }, { "epoch": 1.474583247581807, "grad_norm": 1.1875, "learning_rate": 1.0337710393935013e-05, "loss": 0.4111, "step": 10748 }, { "epoch": 1.4747204500240105, "grad_norm": 1.265625, "learning_rate": 1.0336267732511453e-05, "loss": 0.4643, "step": 10749 }, { "epoch": 1.474857652466214, "grad_norm": 1.2578125, "learning_rate": 1.0334825064081288e-05, "loss": 0.4423, "step": 10750 }, { "epoch": 1.4749948549084173, "grad_norm": 1.21875, "learning_rate": 1.033338238867458e-05, "loss": 0.4662, "step": 10751 }, { "epoch": 1.4751320573506208, "grad_norm": 1.1796875, "learning_rate": 1.033193970632139e-05, "loss": 0.4158, "step": 10752 }, { "epoch": 1.4752692597928243, "grad_norm": 1.140625, "learning_rate": 1.0330497017051777e-05, "loss": 0.3622, "step": 10753 }, { "epoch": 1.4754064622350278, "grad_norm": 1.1953125, "learning_rate": 1.0329054320895801e-05, "loss": 0.4264, "step": 10754 }, { "epoch": 1.4755436646772313, "grad_norm": 1.2265625, "learning_rate": 1.0327611617883526e-05, "loss": 0.4559, "step": 10755 }, { "epoch": 1.4756808671194348, "grad_norm": 1.1640625, "learning_rate": 1.0326168908045009e-05, "loss": 0.381, "step": 10756 }, { "epoch": 1.475818069561638, "grad_norm": 1.21875, "learning_rate": 1.0324726191410312e-05, "loss": 0.4489, "step": 10757 }, { "epoch": 1.4759552720038416, "grad_norm": 1.2734375, "learning_rate": 1.0323283468009495e-05, "loss": 0.4518, "step": 10758 }, { "epoch": 1.476092474446045, "grad_norm": 1.234375, "learning_rate": 1.032184073787262e-05, "loss": 0.4687, "step": 10759 }, { "epoch": 1.4762296768882486, "grad_norm": 1.109375, "learning_rate": 1.032039800102975e-05, "loss": 0.3681, "step": 10760 }, { "epoch": 1.476366879330452, "grad_norm": 1.21875, "learning_rate": 1.0318955257510946e-05, "loss": 0.3944, "step": 10761 }, { "epoch": 1.4765040817726556, "grad_norm": 1.171875, "learning_rate": 1.0317512507346266e-05, "loss": 0.4039, "step": 10762 }, { "epoch": 1.476641284214859, "grad_norm": 1.1640625, "learning_rate": 1.0316069750565776e-05, "loss": 0.3953, "step": 10763 }, { "epoch": 1.4767784866570626, "grad_norm": 1.1796875, "learning_rate": 1.0314626987199535e-05, "loss": 0.4613, "step": 10764 }, { "epoch": 1.476915689099266, "grad_norm": 1.3046875, "learning_rate": 1.0313184217277604e-05, "loss": 0.4643, "step": 10765 }, { "epoch": 1.4770528915414696, "grad_norm": 1.34375, "learning_rate": 1.031174144083005e-05, "loss": 0.4851, "step": 10766 }, { "epoch": 1.4771900939836728, "grad_norm": 1.1953125, "learning_rate": 1.031029865788693e-05, "loss": 0.3973, "step": 10767 }, { "epoch": 1.4773272964258763, "grad_norm": 1.3203125, "learning_rate": 1.030885586847831e-05, "loss": 0.4994, "step": 10768 }, { "epoch": 1.4774644988680798, "grad_norm": 1.296875, "learning_rate": 1.0307413072634249e-05, "loss": 0.4645, "step": 10769 }, { "epoch": 1.4776017013102833, "grad_norm": 1.25, "learning_rate": 1.0305970270384813e-05, "loss": 0.4972, "step": 10770 }, { "epoch": 1.4777389037524868, "grad_norm": 1.2421875, "learning_rate": 1.0304527461760065e-05, "loss": 0.4369, "step": 10771 }, { "epoch": 1.4778761061946903, "grad_norm": 1.1328125, "learning_rate": 1.0303084646790067e-05, "loss": 0.3539, "step": 10772 }, { "epoch": 1.4780133086368936, "grad_norm": 1.2265625, "learning_rate": 1.030164182550488e-05, "loss": 0.4472, "step": 10773 }, { "epoch": 1.4781505110790971, "grad_norm": 1.203125, "learning_rate": 1.030019899793457e-05, "loss": 0.4186, "step": 10774 }, { "epoch": 1.4782877135213006, "grad_norm": 1.2265625, "learning_rate": 1.0298756164109196e-05, "loss": 0.4197, "step": 10775 }, { "epoch": 1.4784249159635041, "grad_norm": 1.3359375, "learning_rate": 1.0297313324058827e-05, "loss": 0.4458, "step": 10776 }, { "epoch": 1.4785621184057076, "grad_norm": 1.28125, "learning_rate": 1.0295870477813524e-05, "loss": 0.454, "step": 10777 }, { "epoch": 1.4786993208479111, "grad_norm": 1.2421875, "learning_rate": 1.029442762540335e-05, "loss": 0.4874, "step": 10778 }, { "epoch": 1.4788365232901146, "grad_norm": 1.2734375, "learning_rate": 1.0292984766858367e-05, "loss": 0.5186, "step": 10779 }, { "epoch": 1.4789737257323181, "grad_norm": 1.25, "learning_rate": 1.0291541902208646e-05, "loss": 0.4458, "step": 10780 }, { "epoch": 1.4791109281745216, "grad_norm": 1.171875, "learning_rate": 1.0290099031484244e-05, "loss": 0.4149, "step": 10781 }, { "epoch": 1.479248130616725, "grad_norm": 1.0859375, "learning_rate": 1.0288656154715231e-05, "loss": 0.3731, "step": 10782 }, { "epoch": 1.4793853330589284, "grad_norm": 1.2734375, "learning_rate": 1.0287213271931667e-05, "loss": 0.4677, "step": 10783 }, { "epoch": 1.4795225355011319, "grad_norm": 1.1953125, "learning_rate": 1.0285770383163616e-05, "loss": 0.4037, "step": 10784 }, { "epoch": 1.4796597379433354, "grad_norm": 1.21875, "learning_rate": 1.0284327488441144e-05, "loss": 0.4655, "step": 10785 }, { "epoch": 1.4797969403855389, "grad_norm": 1.203125, "learning_rate": 1.0282884587794315e-05, "loss": 0.4695, "step": 10786 }, { "epoch": 1.4799341428277424, "grad_norm": 1.2578125, "learning_rate": 1.0281441681253195e-05, "loss": 0.4448, "step": 10787 }, { "epoch": 1.4800713452699459, "grad_norm": 1.3203125, "learning_rate": 1.027999876884785e-05, "loss": 0.4943, "step": 10788 }, { "epoch": 1.4802085477121492, "grad_norm": 1.109375, "learning_rate": 1.0278555850608343e-05, "loss": 0.4042, "step": 10789 }, { "epoch": 1.4803457501543527, "grad_norm": 1.09375, "learning_rate": 1.0277112926564738e-05, "loss": 0.351, "step": 10790 }, { "epoch": 1.4804829525965562, "grad_norm": 1.0703125, "learning_rate": 1.0275669996747106e-05, "loss": 0.3344, "step": 10791 }, { "epoch": 1.4806201550387597, "grad_norm": 1.28125, "learning_rate": 1.0274227061185506e-05, "loss": 0.4813, "step": 10792 }, { "epoch": 1.4807573574809632, "grad_norm": 1.3125, "learning_rate": 1.0272784119910008e-05, "loss": 0.4845, "step": 10793 }, { "epoch": 1.4808945599231667, "grad_norm": 1.234375, "learning_rate": 1.0271341172950675e-05, "loss": 0.4637, "step": 10794 }, { "epoch": 1.4810317623653702, "grad_norm": 1.3359375, "learning_rate": 1.0269898220337574e-05, "loss": 0.464, "step": 10795 }, { "epoch": 1.4811689648075737, "grad_norm": 1.328125, "learning_rate": 1.026845526210077e-05, "loss": 0.5025, "step": 10796 }, { "epoch": 1.4813061672497772, "grad_norm": 1.1328125, "learning_rate": 1.026701229827033e-05, "loss": 0.3685, "step": 10797 }, { "epoch": 1.4814433696919806, "grad_norm": 1.1953125, "learning_rate": 1.026556932887632e-05, "loss": 0.381, "step": 10798 }, { "epoch": 1.481580572134184, "grad_norm": 1.15625, "learning_rate": 1.0264126353948804e-05, "loss": 0.3895, "step": 10799 }, { "epoch": 1.4817177745763874, "grad_norm": 1.3671875, "learning_rate": 1.0262683373517855e-05, "loss": 0.5305, "step": 10800 }, { "epoch": 1.481854977018591, "grad_norm": 1.1640625, "learning_rate": 1.0261240387613529e-05, "loss": 0.3643, "step": 10801 }, { "epoch": 1.4819921794607944, "grad_norm": 1.328125, "learning_rate": 1.0259797396265902e-05, "loss": 0.4569, "step": 10802 }, { "epoch": 1.482129381902998, "grad_norm": 1.0859375, "learning_rate": 1.0258354399505037e-05, "loss": 0.3658, "step": 10803 }, { "epoch": 1.4822665843452014, "grad_norm": 1.1875, "learning_rate": 1.0256911397361001e-05, "loss": 0.4102, "step": 10804 }, { "epoch": 1.4824037867874047, "grad_norm": 1.265625, "learning_rate": 1.0255468389863859e-05, "loss": 0.4247, "step": 10805 }, { "epoch": 1.4825409892296082, "grad_norm": 1.21875, "learning_rate": 1.0254025377043681e-05, "loss": 0.4934, "step": 10806 }, { "epoch": 1.4826781916718117, "grad_norm": 1.3046875, "learning_rate": 1.0252582358930535e-05, "loss": 0.4353, "step": 10807 }, { "epoch": 1.4828153941140152, "grad_norm": 1.265625, "learning_rate": 1.0251139335554488e-05, "loss": 0.4701, "step": 10808 }, { "epoch": 1.4829525965562187, "grad_norm": 1.2265625, "learning_rate": 1.0249696306945602e-05, "loss": 0.4416, "step": 10809 }, { "epoch": 1.4830897989984222, "grad_norm": 1.328125, "learning_rate": 1.024825327313395e-05, "loss": 0.5412, "step": 10810 }, { "epoch": 1.4832270014406257, "grad_norm": 1.3203125, "learning_rate": 1.0246810234149597e-05, "loss": 0.4436, "step": 10811 }, { "epoch": 1.4833642038828292, "grad_norm": 1.2578125, "learning_rate": 1.0245367190022612e-05, "loss": 0.468, "step": 10812 }, { "epoch": 1.4835014063250327, "grad_norm": 1.265625, "learning_rate": 1.0243924140783064e-05, "loss": 0.4848, "step": 10813 }, { "epoch": 1.4836386087672362, "grad_norm": 1.1484375, "learning_rate": 1.0242481086461015e-05, "loss": 0.396, "step": 10814 }, { "epoch": 1.4837758112094395, "grad_norm": 1.265625, "learning_rate": 1.0241038027086543e-05, "loss": 0.4309, "step": 10815 }, { "epoch": 1.483913013651643, "grad_norm": 1.21875, "learning_rate": 1.0239594962689707e-05, "loss": 0.4583, "step": 10816 }, { "epoch": 1.4840502160938465, "grad_norm": 1.1796875, "learning_rate": 1.0238151893300583e-05, "loss": 0.4073, "step": 10817 }, { "epoch": 1.48418741853605, "grad_norm": 1.140625, "learning_rate": 1.0236708818949233e-05, "loss": 0.3924, "step": 10818 }, { "epoch": 1.4843246209782535, "grad_norm": 1.3125, "learning_rate": 1.0235265739665725e-05, "loss": 0.4778, "step": 10819 }, { "epoch": 1.484461823420457, "grad_norm": 1.1875, "learning_rate": 1.0233822655480133e-05, "loss": 0.421, "step": 10820 }, { "epoch": 1.4845990258626602, "grad_norm": 1.359375, "learning_rate": 1.023237956642252e-05, "loss": 0.4533, "step": 10821 }, { "epoch": 1.4847362283048637, "grad_norm": 1.2890625, "learning_rate": 1.0230936472522961e-05, "loss": 0.4676, "step": 10822 }, { "epoch": 1.4848734307470672, "grad_norm": 1.3125, "learning_rate": 1.0229493373811518e-05, "loss": 0.4611, "step": 10823 }, { "epoch": 1.4850106331892707, "grad_norm": 1.234375, "learning_rate": 1.0228050270318266e-05, "loss": 0.4837, "step": 10824 }, { "epoch": 1.4851478356314742, "grad_norm": 1.2734375, "learning_rate": 1.0226607162073272e-05, "loss": 0.4671, "step": 10825 }, { "epoch": 1.4852850380736777, "grad_norm": 1.1875, "learning_rate": 1.0225164049106604e-05, "loss": 0.43, "step": 10826 }, { "epoch": 1.4854222405158812, "grad_norm": 1.21875, "learning_rate": 1.022372093144833e-05, "loss": 0.4962, "step": 10827 }, { "epoch": 1.4855594429580847, "grad_norm": 1.3203125, "learning_rate": 1.0222277809128525e-05, "loss": 0.4661, "step": 10828 }, { "epoch": 1.4856966454002882, "grad_norm": 1.34375, "learning_rate": 1.0220834682177253e-05, "loss": 0.4861, "step": 10829 }, { "epoch": 1.4858338478424917, "grad_norm": 1.265625, "learning_rate": 1.0219391550624582e-05, "loss": 0.467, "step": 10830 }, { "epoch": 1.485971050284695, "grad_norm": 1.3046875, "learning_rate": 1.0217948414500588e-05, "loss": 0.467, "step": 10831 }, { "epoch": 1.4861082527268985, "grad_norm": 1.203125, "learning_rate": 1.0216505273835335e-05, "loss": 0.3903, "step": 10832 }, { "epoch": 1.486245455169102, "grad_norm": 1.2734375, "learning_rate": 1.02150621286589e-05, "loss": 0.468, "step": 10833 }, { "epoch": 1.4863826576113055, "grad_norm": 1.1796875, "learning_rate": 1.0213618979001345e-05, "loss": 0.4969, "step": 10834 }, { "epoch": 1.486519860053509, "grad_norm": 1.2265625, "learning_rate": 1.0212175824892745e-05, "loss": 0.4823, "step": 10835 }, { "epoch": 1.4866570624957125, "grad_norm": 1.3046875, "learning_rate": 1.0210732666363167e-05, "loss": 0.4721, "step": 10836 }, { "epoch": 1.4867942649379158, "grad_norm": 1.2421875, "learning_rate": 1.0209289503442683e-05, "loss": 0.48, "step": 10837 }, { "epoch": 1.4869314673801193, "grad_norm": 1.265625, "learning_rate": 1.0207846336161364e-05, "loss": 0.4481, "step": 10838 }, { "epoch": 1.4870686698223228, "grad_norm": 1.234375, "learning_rate": 1.0206403164549278e-05, "loss": 0.4771, "step": 10839 }, { "epoch": 1.4872058722645263, "grad_norm": 1.1484375, "learning_rate": 1.0204959988636498e-05, "loss": 0.4335, "step": 10840 }, { "epoch": 1.4873430747067298, "grad_norm": 1.2265625, "learning_rate": 1.0203516808453094e-05, "loss": 0.5077, "step": 10841 }, { "epoch": 1.4874802771489333, "grad_norm": 1.2890625, "learning_rate": 1.0202073624029135e-05, "loss": 0.489, "step": 10842 }, { "epoch": 1.4876174795911368, "grad_norm": 1.21875, "learning_rate": 1.0200630435394692e-05, "loss": 0.4695, "step": 10843 }, { "epoch": 1.4877546820333403, "grad_norm": 1.3125, "learning_rate": 1.0199187242579836e-05, "loss": 0.4256, "step": 10844 }, { "epoch": 1.4878918844755438, "grad_norm": 1.3984375, "learning_rate": 1.0197744045614641e-05, "loss": 0.462, "step": 10845 }, { "epoch": 1.4880290869177473, "grad_norm": 1.2265625, "learning_rate": 1.0196300844529175e-05, "loss": 0.4599, "step": 10846 }, { "epoch": 1.4881662893599505, "grad_norm": 1.203125, "learning_rate": 1.0194857639353508e-05, "loss": 0.4076, "step": 10847 }, { "epoch": 1.488303491802154, "grad_norm": 1.2578125, "learning_rate": 1.0193414430117714e-05, "loss": 0.4507, "step": 10848 }, { "epoch": 1.4884406942443575, "grad_norm": 1.28125, "learning_rate": 1.0191971216851862e-05, "loss": 0.4517, "step": 10849 }, { "epoch": 1.488577896686561, "grad_norm": 1.234375, "learning_rate": 1.0190527999586025e-05, "loss": 0.4602, "step": 10850 }, { "epoch": 1.4887150991287645, "grad_norm": 1.3203125, "learning_rate": 1.0189084778350273e-05, "loss": 0.4723, "step": 10851 }, { "epoch": 1.488852301570968, "grad_norm": 1.15625, "learning_rate": 1.018764155317468e-05, "loss": 0.4341, "step": 10852 }, { "epoch": 1.4889895040131713, "grad_norm": 1.0859375, "learning_rate": 1.0186198324089316e-05, "loss": 0.3736, "step": 10853 }, { "epoch": 1.4891267064553748, "grad_norm": 1.2734375, "learning_rate": 1.0184755091124251e-05, "loss": 0.4315, "step": 10854 }, { "epoch": 1.4892639088975783, "grad_norm": 1.328125, "learning_rate": 1.0183311854309561e-05, "loss": 0.4616, "step": 10855 }, { "epoch": 1.4894011113397818, "grad_norm": 1.2109375, "learning_rate": 1.0181868613675312e-05, "loss": 0.5033, "step": 10856 }, { "epoch": 1.4895383137819853, "grad_norm": 1.21875, "learning_rate": 1.018042536925158e-05, "loss": 0.4342, "step": 10857 }, { "epoch": 1.4896755162241888, "grad_norm": 1.3203125, "learning_rate": 1.0178982121068436e-05, "loss": 0.4361, "step": 10858 }, { "epoch": 1.4898127186663923, "grad_norm": 1.1796875, "learning_rate": 1.0177538869155955e-05, "loss": 0.4439, "step": 10859 }, { "epoch": 1.4899499211085958, "grad_norm": 1.359375, "learning_rate": 1.01760956135442e-05, "loss": 0.5105, "step": 10860 }, { "epoch": 1.4900871235507993, "grad_norm": 1.265625, "learning_rate": 1.0174652354263254e-05, "loss": 0.4504, "step": 10861 }, { "epoch": 1.4902243259930028, "grad_norm": 1.15625, "learning_rate": 1.0173209091343188e-05, "loss": 0.3501, "step": 10862 }, { "epoch": 1.490361528435206, "grad_norm": 1.1484375, "learning_rate": 1.0171765824814066e-05, "loss": 0.404, "step": 10863 }, { "epoch": 1.4904987308774096, "grad_norm": 1.3359375, "learning_rate": 1.0170322554705967e-05, "loss": 0.4811, "step": 10864 }, { "epoch": 1.490635933319613, "grad_norm": 1.2265625, "learning_rate": 1.0168879281048962e-05, "loss": 0.414, "step": 10865 }, { "epoch": 1.4907731357618166, "grad_norm": 1.3671875, "learning_rate": 1.0167436003873126e-05, "loss": 0.4944, "step": 10866 }, { "epoch": 1.49091033820402, "grad_norm": 1.375, "learning_rate": 1.0165992723208527e-05, "loss": 0.4765, "step": 10867 }, { "epoch": 1.4910475406462236, "grad_norm": 1.296875, "learning_rate": 1.016454943908524e-05, "loss": 0.4727, "step": 10868 }, { "epoch": 1.4911847430884269, "grad_norm": 1.328125, "learning_rate": 1.016310615153334e-05, "loss": 0.4984, "step": 10869 }, { "epoch": 1.4913219455306304, "grad_norm": 1.1796875, "learning_rate": 1.0161662860582895e-05, "loss": 0.4093, "step": 10870 }, { "epoch": 1.4914591479728339, "grad_norm": 1.203125, "learning_rate": 1.0160219566263985e-05, "loss": 0.4267, "step": 10871 }, { "epoch": 1.4915963504150374, "grad_norm": 1.203125, "learning_rate": 1.0158776268606676e-05, "loss": 0.3595, "step": 10872 }, { "epoch": 1.4917335528572409, "grad_norm": 1.3203125, "learning_rate": 1.0157332967641045e-05, "loss": 0.4845, "step": 10873 }, { "epoch": 1.4918707552994444, "grad_norm": 1.296875, "learning_rate": 1.0155889663397162e-05, "loss": 0.5229, "step": 10874 }, { "epoch": 1.4920079577416479, "grad_norm": 1.1953125, "learning_rate": 1.0154446355905106e-05, "loss": 0.406, "step": 10875 }, { "epoch": 1.4921451601838513, "grad_norm": 1.2109375, "learning_rate": 1.0153003045194945e-05, "loss": 0.4352, "step": 10876 }, { "epoch": 1.4922823626260548, "grad_norm": 1.2734375, "learning_rate": 1.0151559731296755e-05, "loss": 0.4852, "step": 10877 }, { "epoch": 1.4924195650682583, "grad_norm": 1.234375, "learning_rate": 1.0150116414240606e-05, "loss": 0.4623, "step": 10878 }, { "epoch": 1.4925567675104616, "grad_norm": 1.28125, "learning_rate": 1.0148673094056578e-05, "loss": 0.4464, "step": 10879 }, { "epoch": 1.4926939699526651, "grad_norm": 1.2890625, "learning_rate": 1.0147229770774738e-05, "loss": 0.465, "step": 10880 }, { "epoch": 1.4928311723948686, "grad_norm": 1.1875, "learning_rate": 1.0145786444425164e-05, "loss": 0.4358, "step": 10881 }, { "epoch": 1.4929683748370721, "grad_norm": 1.1796875, "learning_rate": 1.0144343115037924e-05, "loss": 0.4221, "step": 10882 }, { "epoch": 1.4931055772792756, "grad_norm": 1.2734375, "learning_rate": 1.0142899782643098e-05, "loss": 0.4678, "step": 10883 }, { "epoch": 1.4932427797214791, "grad_norm": 1.1875, "learning_rate": 1.014145644727076e-05, "loss": 0.4256, "step": 10884 }, { "epoch": 1.4933799821636824, "grad_norm": 1.2890625, "learning_rate": 1.0140013108950977e-05, "loss": 0.4154, "step": 10885 }, { "epoch": 1.493517184605886, "grad_norm": 1.1640625, "learning_rate": 1.0138569767713831e-05, "loss": 0.3907, "step": 10886 }, { "epoch": 1.4936543870480894, "grad_norm": 1.1875, "learning_rate": 1.0137126423589389e-05, "loss": 0.4184, "step": 10887 }, { "epoch": 1.493791589490293, "grad_norm": 1.234375, "learning_rate": 1.0135683076607734e-05, "loss": 0.4798, "step": 10888 }, { "epoch": 1.4939287919324964, "grad_norm": 1.1875, "learning_rate": 1.013423972679893e-05, "loss": 0.4017, "step": 10889 }, { "epoch": 1.4940659943747, "grad_norm": 1.296875, "learning_rate": 1.0132796374193057e-05, "loss": 0.4759, "step": 10890 }, { "epoch": 1.4942031968169034, "grad_norm": 1.1953125, "learning_rate": 1.0131353018820187e-05, "loss": 0.3998, "step": 10891 }, { "epoch": 1.4943403992591069, "grad_norm": 1.203125, "learning_rate": 1.0129909660710397e-05, "loss": 0.4257, "step": 10892 }, { "epoch": 1.4944776017013104, "grad_norm": 1.171875, "learning_rate": 1.0128466299893758e-05, "loss": 0.4311, "step": 10893 }, { "epoch": 1.4946148041435139, "grad_norm": 1.3125, "learning_rate": 1.0127022936400346e-05, "loss": 0.4944, "step": 10894 }, { "epoch": 1.4947520065857172, "grad_norm": 1.2734375, "learning_rate": 1.0125579570260238e-05, "loss": 0.5002, "step": 10895 }, { "epoch": 1.4948892090279207, "grad_norm": 1.1875, "learning_rate": 1.0124136201503506e-05, "loss": 0.4349, "step": 10896 }, { "epoch": 1.4950264114701242, "grad_norm": 1.0859375, "learning_rate": 1.0122692830160224e-05, "loss": 0.3516, "step": 10897 }, { "epoch": 1.4951636139123277, "grad_norm": 1.1953125, "learning_rate": 1.0121249456260466e-05, "loss": 0.4285, "step": 10898 }, { "epoch": 1.4953008163545312, "grad_norm": 1.2578125, "learning_rate": 1.0119806079834311e-05, "loss": 0.4338, "step": 10899 }, { "epoch": 1.4954380187967347, "grad_norm": 1.1640625, "learning_rate": 1.0118362700911828e-05, "loss": 0.4272, "step": 10900 }, { "epoch": 1.495575221238938, "grad_norm": 1.140625, "learning_rate": 1.0116919319523094e-05, "loss": 0.4171, "step": 10901 }, { "epoch": 1.4957124236811414, "grad_norm": 1.234375, "learning_rate": 1.0115475935698188e-05, "loss": 0.4655, "step": 10902 }, { "epoch": 1.495849626123345, "grad_norm": 1.1953125, "learning_rate": 1.0114032549467177e-05, "loss": 0.3964, "step": 10903 }, { "epoch": 1.4959868285655484, "grad_norm": 1.1484375, "learning_rate": 1.011258916086014e-05, "loss": 0.3977, "step": 10904 }, { "epoch": 1.496124031007752, "grad_norm": 1.28125, "learning_rate": 1.0111145769907155e-05, "loss": 0.4775, "step": 10905 }, { "epoch": 1.4962612334499554, "grad_norm": 1.2265625, "learning_rate": 1.0109702376638294e-05, "loss": 0.4604, "step": 10906 }, { "epoch": 1.496398435892159, "grad_norm": 1.3203125, "learning_rate": 1.0108258981083634e-05, "loss": 0.4996, "step": 10907 }, { "epoch": 1.4965356383343624, "grad_norm": 1.2578125, "learning_rate": 1.0106815583273246e-05, "loss": 0.4772, "step": 10908 }, { "epoch": 1.496672840776566, "grad_norm": 1.3046875, "learning_rate": 1.0105372183237208e-05, "loss": 0.5107, "step": 10909 }, { "epoch": 1.4968100432187694, "grad_norm": 1.2578125, "learning_rate": 1.0103928781005594e-05, "loss": 0.4248, "step": 10910 }, { "epoch": 1.4969472456609727, "grad_norm": 1.21875, "learning_rate": 1.0102485376608481e-05, "loss": 0.4192, "step": 10911 }, { "epoch": 1.4970844481031762, "grad_norm": 1.234375, "learning_rate": 1.0101041970075944e-05, "loss": 0.4611, "step": 10912 }, { "epoch": 1.4972216505453797, "grad_norm": 1.2890625, "learning_rate": 1.0099598561438057e-05, "loss": 0.509, "step": 10913 }, { "epoch": 1.4973588529875832, "grad_norm": 1.359375, "learning_rate": 1.0098155150724898e-05, "loss": 0.5176, "step": 10914 }, { "epoch": 1.4974960554297867, "grad_norm": 1.3046875, "learning_rate": 1.0096711737966538e-05, "loss": 0.488, "step": 10915 }, { "epoch": 1.4976332578719902, "grad_norm": 1.2734375, "learning_rate": 1.009526832319306e-05, "loss": 0.4631, "step": 10916 }, { "epoch": 1.4977704603141935, "grad_norm": 1.2265625, "learning_rate": 1.0093824906434533e-05, "loss": 0.4333, "step": 10917 }, { "epoch": 1.497907662756397, "grad_norm": 1.21875, "learning_rate": 1.0092381487721032e-05, "loss": 0.4746, "step": 10918 }, { "epoch": 1.4980448651986005, "grad_norm": 1.109375, "learning_rate": 1.0090938067082637e-05, "loss": 0.3617, "step": 10919 }, { "epoch": 1.498182067640804, "grad_norm": 1.1171875, "learning_rate": 1.008949464454942e-05, "loss": 0.3464, "step": 10920 }, { "epoch": 1.4983192700830075, "grad_norm": 1.3046875, "learning_rate": 1.0088051220151461e-05, "loss": 0.4922, "step": 10921 }, { "epoch": 1.498456472525211, "grad_norm": 1.171875, "learning_rate": 1.008660779391883e-05, "loss": 0.4154, "step": 10922 }, { "epoch": 1.4985936749674145, "grad_norm": 1.2578125, "learning_rate": 1.0085164365881609e-05, "loss": 0.4111, "step": 10923 }, { "epoch": 1.498730877409618, "grad_norm": 1.234375, "learning_rate": 1.008372093606987e-05, "loss": 0.4041, "step": 10924 }, { "epoch": 1.4988680798518215, "grad_norm": 1.390625, "learning_rate": 1.008227750451369e-05, "loss": 0.5458, "step": 10925 }, { "epoch": 1.499005282294025, "grad_norm": 1.203125, "learning_rate": 1.0080834071243141e-05, "loss": 0.4328, "step": 10926 }, { "epoch": 1.4991424847362282, "grad_norm": 1.21875, "learning_rate": 1.0079390636288304e-05, "loss": 0.4418, "step": 10927 }, { "epoch": 1.4992796871784317, "grad_norm": 1.1640625, "learning_rate": 1.0077947199679255e-05, "loss": 0.4039, "step": 10928 }, { "epoch": 1.4994168896206352, "grad_norm": 1.1953125, "learning_rate": 1.0076503761446067e-05, "loss": 0.3935, "step": 10929 }, { "epoch": 1.4995540920628387, "grad_norm": 1.2265625, "learning_rate": 1.0075060321618818e-05, "loss": 0.476, "step": 10930 }, { "epoch": 1.4996912945050422, "grad_norm": 1.2109375, "learning_rate": 1.0073616880227584e-05, "loss": 0.4585, "step": 10931 }, { "epoch": 1.4998284969472457, "grad_norm": 1.1796875, "learning_rate": 1.0072173437302439e-05, "loss": 0.4364, "step": 10932 }, { "epoch": 1.4998284969472457, "eval_loss": 1.7029352188110352, "eval_runtime": 118.5064, "eval_samples_per_second": 1.426, "eval_steps_per_second": 0.717, "step": 10932 } ], "logging_steps": 1, "max_steps": 21864, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 3644, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.2259192839580156e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }