{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6981, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014324595330181923, "grad_norm": 17.07872200012207, "learning_rate": 0.0, "loss": 3.3203, "step": 1 }, { "epoch": 0.00028649190660363845, "grad_norm": 12.444620132446289, "learning_rate": 9.523809523809526e-07, "loss": 3.3132, "step": 2 }, { "epoch": 0.0004297378599054577, "grad_norm": 14.081363677978516, "learning_rate": 1.9047619047619051e-06, "loss": 3.0484, "step": 3 }, { "epoch": 0.0005729838132072769, "grad_norm": 17.641916275024414, "learning_rate": 2.8571428571428573e-06, "loss": 3.3318, "step": 4 }, { "epoch": 0.0007162297665090961, "grad_norm": 22.040523529052734, "learning_rate": 3.8095238095238102e-06, "loss": 3.4548, "step": 5 }, { "epoch": 0.0008594757198109154, "grad_norm": 14.864448547363281, "learning_rate": 4.7619047619047615e-06, "loss": 3.187, "step": 6 }, { "epoch": 0.0010027216731127346, "grad_norm": 13.00040054321289, "learning_rate": 5.7142857142857145e-06, "loss": 3.161, "step": 7 }, { "epoch": 0.0011459676264145538, "grad_norm": 14.418370246887207, "learning_rate": 6.666666666666667e-06, "loss": 3.2755, "step": 8 }, { "epoch": 0.001289213579716373, "grad_norm": 15.671940803527832, "learning_rate": 7.6190476190476205e-06, "loss": 3.1799, "step": 9 }, { "epoch": 0.0014324595330181922, "grad_norm": 12.775418281555176, "learning_rate": 8.571428571428573e-06, "loss": 3.2224, "step": 10 }, { "epoch": 0.0015757054863200115, "grad_norm": 15.23670482635498, "learning_rate": 9.523809523809523e-06, "loss": 3.2249, "step": 11 }, { "epoch": 0.0017189514396218307, "grad_norm": 17.242197036743164, "learning_rate": 1.0476190476190477e-05, "loss": 3.2994, "step": 12 }, { "epoch": 0.00186219739292365, "grad_norm": 9.63542366027832, "learning_rate": 1.1428571428571429e-05, "loss": 3.056, "step": 13 }, { "epoch": 0.0020054433462254693, "grad_norm": 15.193946838378906, "learning_rate": 1.2380952380952381e-05, "loss": 3.3298, "step": 14 }, { "epoch": 0.0021486892995272885, "grad_norm": 7.0082173347473145, "learning_rate": 1.3333333333333333e-05, "loss": 3.0659, "step": 15 }, { "epoch": 0.0022919352528291076, "grad_norm": 12.726550102233887, "learning_rate": 1.4285714285714285e-05, "loss": 2.8165, "step": 16 }, { "epoch": 0.002435181206130927, "grad_norm": 8.23282241821289, "learning_rate": 1.5238095238095241e-05, "loss": 2.8992, "step": 17 }, { "epoch": 0.002578427159432746, "grad_norm": 6.917903900146484, "learning_rate": 1.6190476190476193e-05, "loss": 2.7731, "step": 18 }, { "epoch": 0.002721673112734565, "grad_norm": 7.510035514831543, "learning_rate": 1.7142857142857145e-05, "loss": 3.0933, "step": 19 }, { "epoch": 0.0028649190660363843, "grad_norm": 5.700948238372803, "learning_rate": 1.8095238095238094e-05, "loss": 2.8212, "step": 20 }, { "epoch": 0.0030081650193382035, "grad_norm": 7.009974479675293, "learning_rate": 1.9047619047619046e-05, "loss": 2.7315, "step": 21 }, { "epoch": 0.003151410972640023, "grad_norm": 11.565633773803711, "learning_rate": 2e-05, "loss": 3.1823, "step": 22 }, { "epoch": 0.0032946569259418423, "grad_norm": 6.763265132904053, "learning_rate": 2.0952380952380954e-05, "loss": 2.8644, "step": 23 }, { "epoch": 0.0034379028792436614, "grad_norm": 6.666884422302246, "learning_rate": 2.1904761904761906e-05, "loss": 2.6397, "step": 24 }, { "epoch": 0.0035811488325454806, "grad_norm": 8.2684326171875, "learning_rate": 2.2857142857142858e-05, "loss": 2.8554, "step": 25 }, { "epoch": 0.0037243947858473, "grad_norm": 5.980471134185791, "learning_rate": 2.380952380952381e-05, "loss": 3.2534, "step": 26 }, { "epoch": 0.003867640739149119, "grad_norm": 4.946630001068115, "learning_rate": 2.4761904761904762e-05, "loss": 2.6832, "step": 27 }, { "epoch": 0.004010886692450939, "grad_norm": 7.3297810554504395, "learning_rate": 2.5714285714285714e-05, "loss": 2.8198, "step": 28 }, { "epoch": 0.004154132645752757, "grad_norm": 5.921866416931152, "learning_rate": 2.6666666666666667e-05, "loss": 2.4808, "step": 29 }, { "epoch": 0.004297378599054577, "grad_norm": 4.322668075561523, "learning_rate": 2.7619047619047622e-05, "loss": 2.6248, "step": 30 }, { "epoch": 0.004440624552356396, "grad_norm": 3.2872703075408936, "learning_rate": 2.857142857142857e-05, "loss": 2.7196, "step": 31 }, { "epoch": 0.004583870505658215, "grad_norm": 3.8503293991088867, "learning_rate": 2.9523809523809526e-05, "loss": 2.7443, "step": 32 }, { "epoch": 0.004727116458960034, "grad_norm": 4.385834693908691, "learning_rate": 3.0476190476190482e-05, "loss": 2.7645, "step": 33 }, { "epoch": 0.004870362412261854, "grad_norm": 3.6277992725372314, "learning_rate": 3.142857142857143e-05, "loss": 2.7599, "step": 34 }, { "epoch": 0.005013608365563673, "grad_norm": 3.9347822666168213, "learning_rate": 3.2380952380952386e-05, "loss": 2.8341, "step": 35 }, { "epoch": 0.005156854318865492, "grad_norm": 3.5991530418395996, "learning_rate": 3.3333333333333335e-05, "loss": 2.5607, "step": 36 }, { "epoch": 0.0053001002721673116, "grad_norm": 3.8151729106903076, "learning_rate": 3.428571428571429e-05, "loss": 2.5943, "step": 37 }, { "epoch": 0.00544334622546913, "grad_norm": 3.480487585067749, "learning_rate": 3.523809523809524e-05, "loss": 2.388, "step": 38 }, { "epoch": 0.00558659217877095, "grad_norm": 3.683131217956543, "learning_rate": 3.619047619047619e-05, "loss": 2.5571, "step": 39 }, { "epoch": 0.005729838132072769, "grad_norm": 3.602438449859619, "learning_rate": 3.7142857142857143e-05, "loss": 2.5571, "step": 40 }, { "epoch": 0.005873084085374588, "grad_norm": 3.1322181224823, "learning_rate": 3.809523809523809e-05, "loss": 2.5278, "step": 41 }, { "epoch": 0.006016330038676407, "grad_norm": 2.8120577335357666, "learning_rate": 3.904761904761905e-05, "loss": 2.2805, "step": 42 }, { "epoch": 0.006159575991978227, "grad_norm": 2.3309707641601562, "learning_rate": 4e-05, "loss": 2.5755, "step": 43 }, { "epoch": 0.006302821945280046, "grad_norm": 2.506972551345825, "learning_rate": 4.095238095238095e-05, "loss": 2.5206, "step": 44 }, { "epoch": 0.006446067898581865, "grad_norm": 3.444685935974121, "learning_rate": 4.190476190476191e-05, "loss": 2.5743, "step": 45 }, { "epoch": 0.0065893138518836845, "grad_norm": 4.068913459777832, "learning_rate": 4.2857142857142856e-05, "loss": 2.5254, "step": 46 }, { "epoch": 0.006732559805185503, "grad_norm": 2.835662603378296, "learning_rate": 4.380952380952381e-05, "loss": 2.3845, "step": 47 }, { "epoch": 0.006875805758487323, "grad_norm": 3.6947576999664307, "learning_rate": 4.476190476190477e-05, "loss": 2.5177, "step": 48 }, { "epoch": 0.007019051711789142, "grad_norm": 2.6243436336517334, "learning_rate": 4.5714285714285716e-05, "loss": 2.3541, "step": 49 }, { "epoch": 0.007162297665090961, "grad_norm": 2.635598659515381, "learning_rate": 4.666666666666667e-05, "loss": 2.3371, "step": 50 }, { "epoch": 0.00730554361839278, "grad_norm": 2.48020076751709, "learning_rate": 4.761904761904762e-05, "loss": 2.3019, "step": 51 }, { "epoch": 0.0074487895716946, "grad_norm": 2.0743110179901123, "learning_rate": 4.8571428571428576e-05, "loss": 2.5482, "step": 52 }, { "epoch": 0.007592035524996419, "grad_norm": 3.0455386638641357, "learning_rate": 4.9523809523809525e-05, "loss": 2.4514, "step": 53 }, { "epoch": 0.007735281478298238, "grad_norm": 1.9184397459030151, "learning_rate": 5.047619047619048e-05, "loss": 2.1701, "step": 54 }, { "epoch": 0.007878527431600058, "grad_norm": 3.2046592235565186, "learning_rate": 5.142857142857143e-05, "loss": 2.5301, "step": 55 }, { "epoch": 0.008021773384901877, "grad_norm": 2.512943983078003, "learning_rate": 5.2380952380952384e-05, "loss": 2.4511, "step": 56 }, { "epoch": 0.008165019338203695, "grad_norm": 2.1332807540893555, "learning_rate": 5.333333333333333e-05, "loss": 2.2251, "step": 57 }, { "epoch": 0.008308265291505515, "grad_norm": 2.215681552886963, "learning_rate": 5.428571428571428e-05, "loss": 2.3543, "step": 58 }, { "epoch": 0.008451511244807334, "grad_norm": 2.4583051204681396, "learning_rate": 5.5238095238095244e-05, "loss": 2.4101, "step": 59 }, { "epoch": 0.008594757198109154, "grad_norm": 1.9365713596343994, "learning_rate": 5.619047619047619e-05, "loss": 2.3006, "step": 60 }, { "epoch": 0.008738003151410973, "grad_norm": 2.1359055042266846, "learning_rate": 5.714285714285714e-05, "loss": 2.2496, "step": 61 }, { "epoch": 0.008881249104712791, "grad_norm": 2.4271366596221924, "learning_rate": 5.8095238095238104e-05, "loss": 2.4346, "step": 62 }, { "epoch": 0.009024495058014611, "grad_norm": 1.7826251983642578, "learning_rate": 5.904761904761905e-05, "loss": 2.1877, "step": 63 }, { "epoch": 0.00916774101131643, "grad_norm": 1.519139289855957, "learning_rate": 6e-05, "loss": 2.253, "step": 64 }, { "epoch": 0.00931098696461825, "grad_norm": 2.1480679512023926, "learning_rate": 6.0952380952380964e-05, "loss": 2.2472, "step": 65 }, { "epoch": 0.009454232917920068, "grad_norm": 2.6887569427490234, "learning_rate": 6.19047619047619e-05, "loss": 2.2535, "step": 66 }, { "epoch": 0.009597478871221888, "grad_norm": 1.7213101387023926, "learning_rate": 6.285714285714286e-05, "loss": 2.1591, "step": 67 }, { "epoch": 0.009740724824523707, "grad_norm": 1.7249000072479248, "learning_rate": 6.38095238095238e-05, "loss": 2.2263, "step": 68 }, { "epoch": 0.009883970777825527, "grad_norm": 1.932397723197937, "learning_rate": 6.476190476190477e-05, "loss": 2.4243, "step": 69 }, { "epoch": 0.010027216731127346, "grad_norm": 1.5131607055664062, "learning_rate": 6.571428571428571e-05, "loss": 2.1577, "step": 70 }, { "epoch": 0.010170462684429164, "grad_norm": 1.9068949222564697, "learning_rate": 6.666666666666667e-05, "loss": 2.4197, "step": 71 }, { "epoch": 0.010313708637730984, "grad_norm": 2.0715880393981934, "learning_rate": 6.761904761904763e-05, "loss": 2.582, "step": 72 }, { "epoch": 0.010456954591032804, "grad_norm": 2.10426926612854, "learning_rate": 6.857142857142858e-05, "loss": 2.4402, "step": 73 }, { "epoch": 0.010600200544334623, "grad_norm": 2.1137704849243164, "learning_rate": 6.952380952380952e-05, "loss": 2.2273, "step": 74 }, { "epoch": 0.010743446497636441, "grad_norm": 2.619250535964966, "learning_rate": 7.047619047619048e-05, "loss": 2.2086, "step": 75 }, { "epoch": 0.01088669245093826, "grad_norm": 1.8961827754974365, "learning_rate": 7.142857142857143e-05, "loss": 2.2338, "step": 76 }, { "epoch": 0.01102993840424008, "grad_norm": 1.794726848602295, "learning_rate": 7.238095238095238e-05, "loss": 2.1932, "step": 77 }, { "epoch": 0.0111731843575419, "grad_norm": 1.6158698797225952, "learning_rate": 7.333333333333333e-05, "loss": 2.2294, "step": 78 }, { "epoch": 0.01131643031084372, "grad_norm": 1.7154897451400757, "learning_rate": 7.428571428571429e-05, "loss": 2.3147, "step": 79 }, { "epoch": 0.011459676264145537, "grad_norm": 2.0792758464813232, "learning_rate": 7.523809523809524e-05, "loss": 2.2899, "step": 80 }, { "epoch": 0.011602922217447357, "grad_norm": 2.077021360397339, "learning_rate": 7.619047619047618e-05, "loss": 2.5762, "step": 81 }, { "epoch": 0.011746168170749176, "grad_norm": 2.1197731494903564, "learning_rate": 7.714285714285715e-05, "loss": 2.3633, "step": 82 }, { "epoch": 0.011889414124050996, "grad_norm": 1.853774905204773, "learning_rate": 7.80952380952381e-05, "loss": 2.2655, "step": 83 }, { "epoch": 0.012032660077352814, "grad_norm": 2.1580569744110107, "learning_rate": 7.904761904761905e-05, "loss": 2.3382, "step": 84 }, { "epoch": 0.012175906030654634, "grad_norm": 2.0754215717315674, "learning_rate": 8e-05, "loss": 2.2528, "step": 85 }, { "epoch": 0.012319151983956453, "grad_norm": 1.9672143459320068, "learning_rate": 8.095238095238096e-05, "loss": 2.3245, "step": 86 }, { "epoch": 0.012462397937258273, "grad_norm": 1.9062166213989258, "learning_rate": 8.19047619047619e-05, "loss": 2.2921, "step": 87 }, { "epoch": 0.012605643890560092, "grad_norm": 2.117784023284912, "learning_rate": 8.285714285714287e-05, "loss": 2.274, "step": 88 }, { "epoch": 0.01274888984386191, "grad_norm": 1.9252371788024902, "learning_rate": 8.380952380952382e-05, "loss": 2.0558, "step": 89 }, { "epoch": 0.01289213579716373, "grad_norm": 1.6491607427597046, "learning_rate": 8.476190476190477e-05, "loss": 2.2269, "step": 90 }, { "epoch": 0.01303538175046555, "grad_norm": 1.709864854812622, "learning_rate": 8.571428571428571e-05, "loss": 2.151, "step": 91 }, { "epoch": 0.013178627703767369, "grad_norm": 1.9329763650894165, "learning_rate": 8.666666666666667e-05, "loss": 1.9822, "step": 92 }, { "epoch": 0.013321873657069187, "grad_norm": 1.588519811630249, "learning_rate": 8.761904761904762e-05, "loss": 2.2678, "step": 93 }, { "epoch": 0.013465119610371007, "grad_norm": 1.70687735080719, "learning_rate": 8.857142857142857e-05, "loss": 2.348, "step": 94 }, { "epoch": 0.013608365563672826, "grad_norm": 2.211210250854492, "learning_rate": 8.952380952380953e-05, "loss": 2.1171, "step": 95 }, { "epoch": 0.013751611516974646, "grad_norm": 1.9821035861968994, "learning_rate": 9.047619047619048e-05, "loss": 2.0035, "step": 96 }, { "epoch": 0.013894857470276465, "grad_norm": 1.7123980522155762, "learning_rate": 9.142857142857143e-05, "loss": 2.35, "step": 97 }, { "epoch": 0.014038103423578283, "grad_norm": 1.2838419675827026, "learning_rate": 9.238095238095239e-05, "loss": 2.1924, "step": 98 }, { "epoch": 0.014181349376880103, "grad_norm": 1.5693804025650024, "learning_rate": 9.333333333333334e-05, "loss": 2.0903, "step": 99 }, { "epoch": 0.014324595330181922, "grad_norm": 2.047297239303589, "learning_rate": 9.428571428571429e-05, "loss": 2.2399, "step": 100 }, { "epoch": 0.014467841283483742, "grad_norm": 2.0838024616241455, "learning_rate": 9.523809523809524e-05, "loss": 2.3008, "step": 101 }, { "epoch": 0.01461108723678556, "grad_norm": 1.6792353391647339, "learning_rate": 9.61904761904762e-05, "loss": 2.1405, "step": 102 }, { "epoch": 0.01475433319008738, "grad_norm": 1.8295334577560425, "learning_rate": 9.714285714285715e-05, "loss": 2.3061, "step": 103 }, { "epoch": 0.0148975791433892, "grad_norm": 1.4387637376785278, "learning_rate": 9.80952380952381e-05, "loss": 2.0905, "step": 104 }, { "epoch": 0.015040825096691019, "grad_norm": 1.6112143993377686, "learning_rate": 9.904761904761905e-05, "loss": 2.2882, "step": 105 }, { "epoch": 0.015184071049992838, "grad_norm": 2.2519917488098145, "learning_rate": 0.0001, "loss": 2.216, "step": 106 }, { "epoch": 0.015327317003294656, "grad_norm": 2.1588332653045654, "learning_rate": 0.00010095238095238096, "loss": 2.2333, "step": 107 }, { "epoch": 0.015470562956596476, "grad_norm": 2.039283514022827, "learning_rate": 0.0001019047619047619, "loss": 2.2741, "step": 108 }, { "epoch": 0.015613808909898295, "grad_norm": 2.334714412689209, "learning_rate": 0.00010285714285714286, "loss": 2.419, "step": 109 }, { "epoch": 0.015757054863200115, "grad_norm": 1.6153100728988647, "learning_rate": 0.00010380952380952383, "loss": 2.231, "step": 110 }, { "epoch": 0.015900300816501935, "grad_norm": 2.0350120067596436, "learning_rate": 0.00010476190476190477, "loss": 2.2334, "step": 111 }, { "epoch": 0.016043546769803754, "grad_norm": 2.0163400173187256, "learning_rate": 0.00010571428571428572, "loss": 2.1534, "step": 112 }, { "epoch": 0.016186792723105574, "grad_norm": 1.9854036569595337, "learning_rate": 0.00010666666666666667, "loss": 2.3384, "step": 113 }, { "epoch": 0.01633003867640739, "grad_norm": 1.900425910949707, "learning_rate": 0.00010761904761904762, "loss": 2.0472, "step": 114 }, { "epoch": 0.01647328462970921, "grad_norm": 1.5657649040222168, "learning_rate": 0.00010857142857142856, "loss": 1.9941, "step": 115 }, { "epoch": 0.01661653058301103, "grad_norm": 2.3009397983551025, "learning_rate": 0.00010952380952380953, "loss": 2.1056, "step": 116 }, { "epoch": 0.01675977653631285, "grad_norm": 1.8707588911056519, "learning_rate": 0.00011047619047619049, "loss": 2.1639, "step": 117 }, { "epoch": 0.01690302248961467, "grad_norm": 1.5526729822158813, "learning_rate": 0.00011142857142857144, "loss": 2.1142, "step": 118 }, { "epoch": 0.017046268442916488, "grad_norm": 1.468161702156067, "learning_rate": 0.00011238095238095239, "loss": 2.3557, "step": 119 }, { "epoch": 0.017189514396218308, "grad_norm": 2.3663737773895264, "learning_rate": 0.00011333333333333334, "loss": 2.3407, "step": 120 }, { "epoch": 0.017332760349520127, "grad_norm": 1.9272948503494263, "learning_rate": 0.00011428571428571428, "loss": 2.0577, "step": 121 }, { "epoch": 0.017476006302821947, "grad_norm": 1.9185744524002075, "learning_rate": 0.00011523809523809524, "loss": 2.2176, "step": 122 }, { "epoch": 0.017619252256123763, "grad_norm": 1.755556583404541, "learning_rate": 0.00011619047619047621, "loss": 2.1527, "step": 123 }, { "epoch": 0.017762498209425583, "grad_norm": 1.9412447214126587, "learning_rate": 0.00011714285714285715, "loss": 2.0891, "step": 124 }, { "epoch": 0.017905744162727402, "grad_norm": 1.9787492752075195, "learning_rate": 0.0001180952380952381, "loss": 2.2772, "step": 125 }, { "epoch": 0.018048990116029222, "grad_norm": 2.441361427307129, "learning_rate": 0.00011904761904761905, "loss": 2.2762, "step": 126 }, { "epoch": 0.01819223606933104, "grad_norm": 1.6774814128875732, "learning_rate": 0.00012, "loss": 2.1973, "step": 127 }, { "epoch": 0.01833548202263286, "grad_norm": 1.5562336444854736, "learning_rate": 0.00012095238095238095, "loss": 2.1191, "step": 128 }, { "epoch": 0.01847872797593468, "grad_norm": 1.5988961458206177, "learning_rate": 0.00012190476190476193, "loss": 2.0806, "step": 129 }, { "epoch": 0.0186219739292365, "grad_norm": 1.959429383277893, "learning_rate": 0.00012285714285714287, "loss": 2.2117, "step": 130 }, { "epoch": 0.01876521988253832, "grad_norm": 2.1283791065216064, "learning_rate": 0.0001238095238095238, "loss": 2.0358, "step": 131 }, { "epoch": 0.018908465835840136, "grad_norm": 1.5216805934906006, "learning_rate": 0.00012476190476190478, "loss": 2.069, "step": 132 }, { "epoch": 0.019051711789141956, "grad_norm": 1.5348018407821655, "learning_rate": 0.00012571428571428572, "loss": 2.2106, "step": 133 }, { "epoch": 0.019194957742443775, "grad_norm": 1.6625076532363892, "learning_rate": 0.00012666666666666666, "loss": 2.0635, "step": 134 }, { "epoch": 0.019338203695745595, "grad_norm": 1.7094850540161133, "learning_rate": 0.0001276190476190476, "loss": 2.2911, "step": 135 }, { "epoch": 0.019481449649047414, "grad_norm": 2.108917236328125, "learning_rate": 0.00012857142857142858, "loss": 2.2598, "step": 136 }, { "epoch": 0.019624695602349234, "grad_norm": 1.285107970237732, "learning_rate": 0.00012952380952380954, "loss": 2.0118, "step": 137 }, { "epoch": 0.019767941555651054, "grad_norm": 1.588222622871399, "learning_rate": 0.0001304761904761905, "loss": 2.0765, "step": 138 }, { "epoch": 0.019911187508952873, "grad_norm": 1.740833044052124, "learning_rate": 0.00013142857142857143, "loss": 2.3131, "step": 139 }, { "epoch": 0.020054433462254693, "grad_norm": 1.5425759553909302, "learning_rate": 0.00013238095238095237, "loss": 2.3766, "step": 140 }, { "epoch": 0.02019767941555651, "grad_norm": 1.820640206336975, "learning_rate": 0.00013333333333333334, "loss": 2.2642, "step": 141 }, { "epoch": 0.02034092536885833, "grad_norm": 2.1920790672302246, "learning_rate": 0.00013428571428571428, "loss": 2.084, "step": 142 }, { "epoch": 0.020484171322160148, "grad_norm": 1.5781718492507935, "learning_rate": 0.00013523809523809525, "loss": 2.2659, "step": 143 }, { "epoch": 0.020627417275461968, "grad_norm": 1.8226157426834106, "learning_rate": 0.0001361904761904762, "loss": 2.2298, "step": 144 }, { "epoch": 0.020770663228763787, "grad_norm": 2.2049548625946045, "learning_rate": 0.00013714285714285716, "loss": 2.2777, "step": 145 }, { "epoch": 0.020913909182065607, "grad_norm": 1.7138323783874512, "learning_rate": 0.0001380952380952381, "loss": 1.9627, "step": 146 }, { "epoch": 0.021057155135367427, "grad_norm": 2.1737287044525146, "learning_rate": 0.00013904761904761905, "loss": 2.3337, "step": 147 }, { "epoch": 0.021200401088669246, "grad_norm": 1.5966408252716064, "learning_rate": 0.00014, "loss": 2.247, "step": 148 }, { "epoch": 0.021343647041971066, "grad_norm": 1.5897079706192017, "learning_rate": 0.00014095238095238096, "loss": 2.1142, "step": 149 }, { "epoch": 0.021486892995272882, "grad_norm": 1.828383445739746, "learning_rate": 0.00014190476190476193, "loss": 2.1913, "step": 150 }, { "epoch": 0.0216301389485747, "grad_norm": 1.374778389930725, "learning_rate": 0.00014285714285714287, "loss": 2.0038, "step": 151 }, { "epoch": 0.02177338490187652, "grad_norm": 1.390619158744812, "learning_rate": 0.0001438095238095238, "loss": 2.2203, "step": 152 }, { "epoch": 0.02191663085517834, "grad_norm": 1.8672500848770142, "learning_rate": 0.00014476190476190475, "loss": 2.021, "step": 153 }, { "epoch": 0.02205987680848016, "grad_norm": 1.2887027263641357, "learning_rate": 0.00014571428571428572, "loss": 2.0757, "step": 154 }, { "epoch": 0.02220312276178198, "grad_norm": 2.0224878787994385, "learning_rate": 0.00014666666666666666, "loss": 2.1881, "step": 155 }, { "epoch": 0.0223463687150838, "grad_norm": 2.22458815574646, "learning_rate": 0.00014761904761904763, "loss": 2.1733, "step": 156 }, { "epoch": 0.02248961466838562, "grad_norm": 1.7837990522384644, "learning_rate": 0.00014857142857142857, "loss": 2.0505, "step": 157 }, { "epoch": 0.02263286062168744, "grad_norm": 2.088524580001831, "learning_rate": 0.00014952380952380954, "loss": 2.0601, "step": 158 }, { "epoch": 0.022776106574989255, "grad_norm": 1.7616703510284424, "learning_rate": 0.00015047619047619048, "loss": 2.1004, "step": 159 }, { "epoch": 0.022919352528291075, "grad_norm": 2.0035274028778076, "learning_rate": 0.00015142857142857143, "loss": 1.941, "step": 160 }, { "epoch": 0.023062598481592894, "grad_norm": 2.067079544067383, "learning_rate": 0.00015238095238095237, "loss": 2.0153, "step": 161 }, { "epoch": 0.023205844434894714, "grad_norm": 1.6726689338684082, "learning_rate": 0.00015333333333333334, "loss": 2.0973, "step": 162 }, { "epoch": 0.023349090388196533, "grad_norm": 1.9336814880371094, "learning_rate": 0.0001542857142857143, "loss": 2.1694, "step": 163 }, { "epoch": 0.023492336341498353, "grad_norm": 2.0434844493865967, "learning_rate": 0.00015523809523809525, "loss": 2.1513, "step": 164 }, { "epoch": 0.023635582294800173, "grad_norm": 2.5834715366363525, "learning_rate": 0.0001561904761904762, "loss": 2.1062, "step": 165 }, { "epoch": 0.023778828248101992, "grad_norm": 1.5724657773971558, "learning_rate": 0.00015714285714285716, "loss": 2.1132, "step": 166 }, { "epoch": 0.023922074201403812, "grad_norm": 1.7696533203125, "learning_rate": 0.0001580952380952381, "loss": 2.1059, "step": 167 }, { "epoch": 0.024065320154705628, "grad_norm": 1.6792616844177246, "learning_rate": 0.00015904761904761904, "loss": 2.0247, "step": 168 }, { "epoch": 0.024208566108007448, "grad_norm": 1.5773078203201294, "learning_rate": 0.00016, "loss": 2.0776, "step": 169 }, { "epoch": 0.024351812061309267, "grad_norm": 1.8438524007797241, "learning_rate": 0.00016095238095238096, "loss": 2.2405, "step": 170 }, { "epoch": 0.024495058014611087, "grad_norm": 2.2779531478881836, "learning_rate": 0.00016190476190476192, "loss": 2.3006, "step": 171 }, { "epoch": 0.024638303967912906, "grad_norm": 1.5605627298355103, "learning_rate": 0.00016285714285714287, "loss": 1.9487, "step": 172 }, { "epoch": 0.024781549921214726, "grad_norm": 1.4041372537612915, "learning_rate": 0.0001638095238095238, "loss": 2.339, "step": 173 }, { "epoch": 0.024924795874516546, "grad_norm": 1.8062151670455933, "learning_rate": 0.00016476190476190475, "loss": 1.8319, "step": 174 }, { "epoch": 0.025068041827818365, "grad_norm": 1.762485146522522, "learning_rate": 0.00016571428571428575, "loss": 2.0949, "step": 175 }, { "epoch": 0.025211287781120185, "grad_norm": 1.8863420486450195, "learning_rate": 0.0001666666666666667, "loss": 2.1156, "step": 176 }, { "epoch": 0.025354533734422, "grad_norm": 1.7349575757980347, "learning_rate": 0.00016761904761904763, "loss": 2.1684, "step": 177 }, { "epoch": 0.02549777968772382, "grad_norm": 1.3953487873077393, "learning_rate": 0.00016857142857142857, "loss": 2.2788, "step": 178 }, { "epoch": 0.02564102564102564, "grad_norm": 1.5261420011520386, "learning_rate": 0.00016952380952380954, "loss": 1.8818, "step": 179 }, { "epoch": 0.02578427159432746, "grad_norm": 1.3842451572418213, "learning_rate": 0.00017047619047619048, "loss": 2.257, "step": 180 }, { "epoch": 0.02592751754762928, "grad_norm": 1.3190925121307373, "learning_rate": 0.00017142857142857143, "loss": 2.2254, "step": 181 }, { "epoch": 0.0260707635009311, "grad_norm": 1.8696284294128418, "learning_rate": 0.0001723809523809524, "loss": 2.279, "step": 182 }, { "epoch": 0.02621400945423292, "grad_norm": 2.319302797317505, "learning_rate": 0.00017333333333333334, "loss": 2.3203, "step": 183 }, { "epoch": 0.026357255407534738, "grad_norm": 1.8303583860397339, "learning_rate": 0.0001742857142857143, "loss": 2.2154, "step": 184 }, { "epoch": 0.026500501360836558, "grad_norm": 1.6571789979934692, "learning_rate": 0.00017523809523809525, "loss": 2.126, "step": 185 }, { "epoch": 0.026643747314138374, "grad_norm": 1.532020092010498, "learning_rate": 0.0001761904761904762, "loss": 1.9924, "step": 186 }, { "epoch": 0.026786993267440194, "grad_norm": 1.9024659395217896, "learning_rate": 0.00017714285714285713, "loss": 2.1025, "step": 187 }, { "epoch": 0.026930239220742013, "grad_norm": 1.9406988620758057, "learning_rate": 0.0001780952380952381, "loss": 2.166, "step": 188 }, { "epoch": 0.027073485174043833, "grad_norm": 1.9504728317260742, "learning_rate": 0.00017904761904761907, "loss": 2.1566, "step": 189 }, { "epoch": 0.027216731127345652, "grad_norm": 1.6800481081008911, "learning_rate": 0.00018, "loss": 2.1965, "step": 190 }, { "epoch": 0.027359977080647472, "grad_norm": 1.30282461643219, "learning_rate": 0.00018095238095238095, "loss": 2.0646, "step": 191 }, { "epoch": 0.02750322303394929, "grad_norm": 1.5442649126052856, "learning_rate": 0.00018190476190476192, "loss": 2.0342, "step": 192 }, { "epoch": 0.02764646898725111, "grad_norm": 1.4691842794418335, "learning_rate": 0.00018285714285714286, "loss": 1.9152, "step": 193 }, { "epoch": 0.02778971494055293, "grad_norm": 1.412746548652649, "learning_rate": 0.0001838095238095238, "loss": 2.1865, "step": 194 }, { "epoch": 0.027932960893854747, "grad_norm": 1.389622449874878, "learning_rate": 0.00018476190476190478, "loss": 2.0969, "step": 195 }, { "epoch": 0.028076206847156567, "grad_norm": 1.5035760402679443, "learning_rate": 0.00018571428571428572, "loss": 1.932, "step": 196 }, { "epoch": 0.028219452800458386, "grad_norm": 1.5785410404205322, "learning_rate": 0.0001866666666666667, "loss": 2.342, "step": 197 }, { "epoch": 0.028362698753760206, "grad_norm": 1.6420410871505737, "learning_rate": 0.00018761904761904763, "loss": 2.2316, "step": 198 }, { "epoch": 0.028505944707062025, "grad_norm": 1.3329194784164429, "learning_rate": 0.00018857142857142857, "loss": 2.0453, "step": 199 }, { "epoch": 0.028649190660363845, "grad_norm": 1.605850338935852, "learning_rate": 0.0001895238095238095, "loss": 2.1962, "step": 200 }, { "epoch": 0.028792436613665665, "grad_norm": 1.3607438802719116, "learning_rate": 0.00019047619047619048, "loss": 2.0482, "step": 201 }, { "epoch": 0.028935682566967484, "grad_norm": 1.7939872741699219, "learning_rate": 0.00019142857142857145, "loss": 2.1492, "step": 202 }, { "epoch": 0.029078928520269304, "grad_norm": 1.7994794845581055, "learning_rate": 0.0001923809523809524, "loss": 1.991, "step": 203 }, { "epoch": 0.02922217447357112, "grad_norm": 1.3572405576705933, "learning_rate": 0.00019333333333333333, "loss": 2.1157, "step": 204 }, { "epoch": 0.02936542042687294, "grad_norm": 1.5422847270965576, "learning_rate": 0.0001942857142857143, "loss": 2.1926, "step": 205 }, { "epoch": 0.02950866638017476, "grad_norm": 1.6855864524841309, "learning_rate": 0.00019523809523809525, "loss": 2.017, "step": 206 }, { "epoch": 0.02965191233347658, "grad_norm": 1.9175734519958496, "learning_rate": 0.0001961904761904762, "loss": 2.0882, "step": 207 }, { "epoch": 0.0297951582867784, "grad_norm": 1.7112526893615723, "learning_rate": 0.00019714285714285716, "loss": 2.1923, "step": 208 }, { "epoch": 0.029938404240080218, "grad_norm": 1.597528100013733, "learning_rate": 0.0001980952380952381, "loss": 2.0914, "step": 209 }, { "epoch": 0.030081650193382038, "grad_norm": 1.4300575256347656, "learning_rate": 0.00019904761904761907, "loss": 2.1496, "step": 210 }, { "epoch": 0.030224896146683857, "grad_norm": 1.9462852478027344, "learning_rate": 0.0002, "loss": 2.053, "step": 211 }, { "epoch": 0.030368142099985677, "grad_norm": 1.4876902103424072, "learning_rate": 0.00019999998923623733, "loss": 2.0478, "step": 212 }, { "epoch": 0.030511388053287496, "grad_norm": 1.7003942728042603, "learning_rate": 0.00019999995694495155, "loss": 1.9805, "step": 213 }, { "epoch": 0.030654634006589312, "grad_norm": 1.4573678970336914, "learning_rate": 0.00019999990312614968, "loss": 2.1118, "step": 214 }, { "epoch": 0.030797879959891132, "grad_norm": 1.4337831735610962, "learning_rate": 0.00019999982777984328, "loss": 2.2406, "step": 215 }, { "epoch": 0.03094112591319295, "grad_norm": 1.3579963445663452, "learning_rate": 0.00019999973090604857, "loss": 2.1225, "step": 216 }, { "epoch": 0.03108437186649477, "grad_norm": 1.3236494064331055, "learning_rate": 0.0001999996125047864, "loss": 1.8691, "step": 217 }, { "epoch": 0.03122761781979659, "grad_norm": 2.1528525352478027, "learning_rate": 0.00019999947257608226, "loss": 2.0453, "step": 218 }, { "epoch": 0.03137086377309841, "grad_norm": 1.5716359615325928, "learning_rate": 0.0001999993111199663, "loss": 2.2127, "step": 219 }, { "epoch": 0.03151410972640023, "grad_norm": 1.4546295404434204, "learning_rate": 0.00019999912813647321, "loss": 2.1366, "step": 220 }, { "epoch": 0.03165735567970205, "grad_norm": 1.2222797870635986, "learning_rate": 0.00019999892362564244, "loss": 2.0532, "step": 221 }, { "epoch": 0.03180060163300387, "grad_norm": 1.2081308364868164, "learning_rate": 0.00019999869758751803, "loss": 1.9721, "step": 222 }, { "epoch": 0.03194384758630569, "grad_norm": 1.42484450340271, "learning_rate": 0.00019999845002214862, "loss": 2.0889, "step": 223 }, { "epoch": 0.03208709353960751, "grad_norm": 1.4059746265411377, "learning_rate": 0.00019999818092958745, "loss": 2.0902, "step": 224 }, { "epoch": 0.03223033949290933, "grad_norm": 1.8382090330123901, "learning_rate": 0.00019999789030989256, "loss": 2.0476, "step": 225 }, { "epoch": 0.03237358544621115, "grad_norm": 1.6440504789352417, "learning_rate": 0.00019999757816312639, "loss": 2.1482, "step": 226 }, { "epoch": 0.03251683139951296, "grad_norm": 1.642638087272644, "learning_rate": 0.0001999972444893562, "loss": 2.2172, "step": 227 }, { "epoch": 0.03266007735281478, "grad_norm": 1.5059553384780884, "learning_rate": 0.00019999688928865387, "loss": 2.1989, "step": 228 }, { "epoch": 0.0328033233061166, "grad_norm": 1.300182819366455, "learning_rate": 0.00019999651256109578, "loss": 2.231, "step": 229 }, { "epoch": 0.03294656925941842, "grad_norm": 2.031682252883911, "learning_rate": 0.00019999611430676306, "loss": 2.1164, "step": 230 }, { "epoch": 0.03308981521272024, "grad_norm": 1.5730478763580322, "learning_rate": 0.00019999569452574148, "loss": 2.2195, "step": 231 }, { "epoch": 0.03323306116602206, "grad_norm": 1.8631510734558105, "learning_rate": 0.00019999525321812134, "loss": 1.9845, "step": 232 }, { "epoch": 0.03337630711932388, "grad_norm": 1.4764221906661987, "learning_rate": 0.0001999947903839977, "loss": 1.9758, "step": 233 }, { "epoch": 0.0335195530726257, "grad_norm": 1.5872505903244019, "learning_rate": 0.00019999430602347018, "loss": 2.0329, "step": 234 }, { "epoch": 0.03366279902592752, "grad_norm": 1.189752221107483, "learning_rate": 0.00019999380013664304, "loss": 2.0802, "step": 235 }, { "epoch": 0.03380604497922934, "grad_norm": 1.7900701761245728, "learning_rate": 0.0001999932727236252, "loss": 1.9835, "step": 236 }, { "epoch": 0.033949290932531156, "grad_norm": 1.450388789176941, "learning_rate": 0.00019999272378453019, "loss": 1.925, "step": 237 }, { "epoch": 0.034092536885832976, "grad_norm": 1.4623603820800781, "learning_rate": 0.0001999921533194762, "loss": 2.1969, "step": 238 }, { "epoch": 0.034235782839134796, "grad_norm": 1.14110267162323, "learning_rate": 0.00019999156132858597, "loss": 1.8988, "step": 239 }, { "epoch": 0.034379028792436615, "grad_norm": 1.701717495918274, "learning_rate": 0.000199990947811987, "loss": 2.198, "step": 240 }, { "epoch": 0.034522274745738435, "grad_norm": 1.4082845449447632, "learning_rate": 0.00019999031276981142, "loss": 2.1777, "step": 241 }, { "epoch": 0.034665520699040255, "grad_norm": 1.1076407432556152, "learning_rate": 0.00019998965620219585, "loss": 1.8761, "step": 242 }, { "epoch": 0.034808766652342074, "grad_norm": 1.7932696342468262, "learning_rate": 0.00019998897810928162, "loss": 2.3157, "step": 243 }, { "epoch": 0.034952012605643894, "grad_norm": 1.6360929012298584, "learning_rate": 0.0001999882784912148, "loss": 2.0558, "step": 244 }, { "epoch": 0.035095258558945706, "grad_norm": 1.2197916507720947, "learning_rate": 0.00019998755734814593, "loss": 1.9071, "step": 245 }, { "epoch": 0.035238504512247526, "grad_norm": 1.3521648645401, "learning_rate": 0.00019998681468023027, "loss": 2.0842, "step": 246 }, { "epoch": 0.035381750465549346, "grad_norm": 1.6105707883834839, "learning_rate": 0.00019998605048762768, "loss": 2.1477, "step": 247 }, { "epoch": 0.035524996418851165, "grad_norm": 1.3070355653762817, "learning_rate": 0.0001999852647705027, "loss": 2.2225, "step": 248 }, { "epoch": 0.035668242372152985, "grad_norm": 1.6671980619430542, "learning_rate": 0.0001999844575290245, "loss": 2.2229, "step": 249 }, { "epoch": 0.035811488325454804, "grad_norm": 1.5772150754928589, "learning_rate": 0.00019998362876336677, "loss": 2.2144, "step": 250 }, { "epoch": 0.035954734278756624, "grad_norm": 1.5078805685043335, "learning_rate": 0.00019998277847370802, "loss": 1.9953, "step": 251 }, { "epoch": 0.036097980232058444, "grad_norm": 1.424390196800232, "learning_rate": 0.00019998190666023122, "loss": 2.1057, "step": 252 }, { "epoch": 0.03624122618536026, "grad_norm": 1.2690606117248535, "learning_rate": 0.00019998101332312408, "loss": 2.0265, "step": 253 }, { "epoch": 0.03638447213866208, "grad_norm": 1.2818070650100708, "learning_rate": 0.00019998009846257896, "loss": 2.1688, "step": 254 }, { "epoch": 0.0365277180919639, "grad_norm": 1.3130383491516113, "learning_rate": 0.00019997916207879275, "loss": 2.0528, "step": 255 }, { "epoch": 0.03667096404526572, "grad_norm": 1.242950439453125, "learning_rate": 0.00019997820417196703, "loss": 1.9179, "step": 256 }, { "epoch": 0.03681420999856754, "grad_norm": 1.6066879034042358, "learning_rate": 0.00019997722474230804, "loss": 2.1527, "step": 257 }, { "epoch": 0.03695745595186936, "grad_norm": 1.7110648155212402, "learning_rate": 0.00019997622379002661, "loss": 2.0529, "step": 258 }, { "epoch": 0.03710070190517118, "grad_norm": 2.226452589035034, "learning_rate": 0.00019997520131533823, "loss": 2.4725, "step": 259 }, { "epoch": 0.037243947858473, "grad_norm": 1.449493408203125, "learning_rate": 0.00019997415731846298, "loss": 1.9713, "step": 260 }, { "epoch": 0.03738719381177482, "grad_norm": 1.6020697355270386, "learning_rate": 0.0001999730917996257, "loss": 2.2761, "step": 261 }, { "epoch": 0.03753043976507664, "grad_norm": 1.558585524559021, "learning_rate": 0.00019997200475905565, "loss": 2.0281, "step": 262 }, { "epoch": 0.03767368571837845, "grad_norm": 1.5056793689727783, "learning_rate": 0.00019997089619698693, "loss": 2.1021, "step": 263 }, { "epoch": 0.03781693167168027, "grad_norm": 1.675189733505249, "learning_rate": 0.00019996976611365812, "loss": 1.9848, "step": 264 }, { "epoch": 0.03796017762498209, "grad_norm": 1.744429588317871, "learning_rate": 0.00019996861450931257, "loss": 2.164, "step": 265 }, { "epoch": 0.03810342357828391, "grad_norm": 1.3687845468521118, "learning_rate": 0.00019996744138419813, "loss": 2.0527, "step": 266 }, { "epoch": 0.03824666953158573, "grad_norm": 1.4210408926010132, "learning_rate": 0.00019996624673856737, "loss": 2.0662, "step": 267 }, { "epoch": 0.03838991548488755, "grad_norm": 1.3032699823379517, "learning_rate": 0.00019996503057267745, "loss": 2.1742, "step": 268 }, { "epoch": 0.03853316143818937, "grad_norm": 1.5257645845413208, "learning_rate": 0.00019996379288679026, "loss": 2.0335, "step": 269 }, { "epoch": 0.03867640739149119, "grad_norm": 1.4737271070480347, "learning_rate": 0.00019996253368117212, "loss": 2.0711, "step": 270 }, { "epoch": 0.03881965334479301, "grad_norm": 1.5205200910568237, "learning_rate": 0.00019996125295609421, "loss": 2.0227, "step": 271 }, { "epoch": 0.03896289929809483, "grad_norm": 1.5012180805206299, "learning_rate": 0.0001999599507118322, "loss": 2.0223, "step": 272 }, { "epoch": 0.03910614525139665, "grad_norm": 1.2219935655593872, "learning_rate": 0.00019995862694866635, "loss": 2.0794, "step": 273 }, { "epoch": 0.03924939120469847, "grad_norm": 1.8646619319915771, "learning_rate": 0.00019995728166688178, "loss": 2.0646, "step": 274 }, { "epoch": 0.03939263715800029, "grad_norm": 1.589030385017395, "learning_rate": 0.00019995591486676801, "loss": 2.0444, "step": 275 }, { "epoch": 0.03953588311130211, "grad_norm": 1.6086562871932983, "learning_rate": 0.0001999545265486193, "loss": 2.097, "step": 276 }, { "epoch": 0.03967912906460393, "grad_norm": 1.3394397497177124, "learning_rate": 0.0001999531167127345, "loss": 1.9718, "step": 277 }, { "epoch": 0.039822375017905746, "grad_norm": 1.250309944152832, "learning_rate": 0.00019995168535941716, "loss": 1.9709, "step": 278 }, { "epoch": 0.039965620971207566, "grad_norm": 1.4585522413253784, "learning_rate": 0.00019995023248897535, "loss": 1.9048, "step": 279 }, { "epoch": 0.040108866924509386, "grad_norm": 1.2257181406021118, "learning_rate": 0.00019994875810172186, "loss": 2.0774, "step": 280 }, { "epoch": 0.040252112877811205, "grad_norm": 1.4613111019134521, "learning_rate": 0.00019994726219797412, "loss": 1.8333, "step": 281 }, { "epoch": 0.04039535883111302, "grad_norm": 1.3776381015777588, "learning_rate": 0.00019994574477805415, "loss": 2.0558, "step": 282 }, { "epoch": 0.04053860478441484, "grad_norm": 2.0125038623809814, "learning_rate": 0.0001999442058422886, "loss": 2.4184, "step": 283 }, { "epoch": 0.04068185073771666, "grad_norm": 1.1807944774627686, "learning_rate": 0.00019994264539100875, "loss": 1.9329, "step": 284 }, { "epoch": 0.04082509669101848, "grad_norm": 1.3612524271011353, "learning_rate": 0.00019994106342455053, "loss": 1.9494, "step": 285 }, { "epoch": 0.040968342644320296, "grad_norm": 1.6259981393814087, "learning_rate": 0.00019993945994325454, "loss": 2.0858, "step": 286 }, { "epoch": 0.041111588597622116, "grad_norm": 1.2712697982788086, "learning_rate": 0.00019993783494746592, "loss": 1.8944, "step": 287 }, { "epoch": 0.041254834550923936, "grad_norm": 1.5849056243896484, "learning_rate": 0.00019993618843753454, "loss": 2.0241, "step": 288 }, { "epoch": 0.041398080504225755, "grad_norm": 1.6149200201034546, "learning_rate": 0.00019993452041381483, "loss": 2.1524, "step": 289 }, { "epoch": 0.041541326457527575, "grad_norm": 1.3977165222167969, "learning_rate": 0.00019993283087666583, "loss": 2.0648, "step": 290 }, { "epoch": 0.041684572410829394, "grad_norm": 1.6440380811691284, "learning_rate": 0.0001999311198264513, "loss": 2.1419, "step": 291 }, { "epoch": 0.041827818364131214, "grad_norm": 1.3889896869659424, "learning_rate": 0.00019992938726353963, "loss": 2.289, "step": 292 }, { "epoch": 0.041971064317433034, "grad_norm": 1.6291149854660034, "learning_rate": 0.0001999276331883037, "loss": 2.1108, "step": 293 }, { "epoch": 0.04211431027073485, "grad_norm": 1.3153551816940308, "learning_rate": 0.0001999258576011212, "loss": 1.9377, "step": 294 }, { "epoch": 0.04225755622403667, "grad_norm": 1.5975871086120605, "learning_rate": 0.00019992406050237433, "loss": 1.8804, "step": 295 }, { "epoch": 0.04240080217733849, "grad_norm": 1.5043078660964966, "learning_rate": 0.00019992224189244997, "loss": 1.9086, "step": 296 }, { "epoch": 0.04254404813064031, "grad_norm": 1.3906041383743286, "learning_rate": 0.00019992040177173963, "loss": 2.1771, "step": 297 }, { "epoch": 0.04268729408394213, "grad_norm": 1.5055556297302246, "learning_rate": 0.00019991854014063943, "loss": 1.9426, "step": 298 }, { "epoch": 0.04283054003724395, "grad_norm": 1.4108178615570068, "learning_rate": 0.00019991665699955013, "loss": 2.1007, "step": 299 }, { "epoch": 0.042973785990545764, "grad_norm": 1.230557918548584, "learning_rate": 0.00019991475234887714, "loss": 1.8892, "step": 300 }, { "epoch": 0.043117031943847584, "grad_norm": 1.3338364362716675, "learning_rate": 0.0001999128261890305, "loss": 1.9198, "step": 301 }, { "epoch": 0.0432602778971494, "grad_norm": 1.3945890665054321, "learning_rate": 0.00019991087852042479, "loss": 1.9348, "step": 302 }, { "epoch": 0.04340352385045122, "grad_norm": 1.355621337890625, "learning_rate": 0.00019990890934347937, "loss": 1.9451, "step": 303 }, { "epoch": 0.04354676980375304, "grad_norm": 1.6459373235702515, "learning_rate": 0.00019990691865861814, "loss": 2.108, "step": 304 }, { "epoch": 0.04369001575705486, "grad_norm": 1.2340909242630005, "learning_rate": 0.0001999049064662696, "loss": 1.9682, "step": 305 }, { "epoch": 0.04383326171035668, "grad_norm": 1.5531057119369507, "learning_rate": 0.00019990287276686698, "loss": 2.3723, "step": 306 }, { "epoch": 0.0439765076636585, "grad_norm": 1.6007899045944214, "learning_rate": 0.00019990081756084808, "loss": 1.9459, "step": 307 }, { "epoch": 0.04411975361696032, "grad_norm": 1.3618367910385132, "learning_rate": 0.00019989874084865532, "loss": 2.0814, "step": 308 }, { "epoch": 0.04426299957026214, "grad_norm": 1.4074528217315674, "learning_rate": 0.00019989664263073573, "loss": 1.9987, "step": 309 }, { "epoch": 0.04440624552356396, "grad_norm": 1.596379280090332, "learning_rate": 0.00019989452290754107, "loss": 1.9317, "step": 310 }, { "epoch": 0.04454949147686578, "grad_norm": 1.6090009212493896, "learning_rate": 0.00019989238167952765, "loss": 2.1037, "step": 311 }, { "epoch": 0.0446927374301676, "grad_norm": 1.4958434104919434, "learning_rate": 0.00019989021894715637, "loss": 2.1303, "step": 312 }, { "epoch": 0.04483598338346942, "grad_norm": 1.3297470808029175, "learning_rate": 0.00019988803471089286, "loss": 2.1547, "step": 313 }, { "epoch": 0.04497922933677124, "grad_norm": 1.6264489889144897, "learning_rate": 0.00019988582897120734, "loss": 2.0452, "step": 314 }, { "epoch": 0.04512247529007306, "grad_norm": 1.5363062620162964, "learning_rate": 0.0001998836017285746, "loss": 1.9883, "step": 315 }, { "epoch": 0.04526572124337488, "grad_norm": 1.557101845741272, "learning_rate": 0.00019988135298347416, "loss": 1.7807, "step": 316 }, { "epoch": 0.0454089671966767, "grad_norm": 1.3252801895141602, "learning_rate": 0.00019987908273639011, "loss": 2.1036, "step": 317 }, { "epoch": 0.04555221314997851, "grad_norm": 1.2484028339385986, "learning_rate": 0.00019987679098781115, "loss": 1.9304, "step": 318 }, { "epoch": 0.04569545910328033, "grad_norm": 1.432595133781433, "learning_rate": 0.00019987447773823068, "loss": 1.9563, "step": 319 }, { "epoch": 0.04583870505658215, "grad_norm": 1.3906108140945435, "learning_rate": 0.00019987214298814666, "loss": 2.0004, "step": 320 }, { "epoch": 0.04598195100988397, "grad_norm": 1.4368585348129272, "learning_rate": 0.00019986978673806172, "loss": 1.9703, "step": 321 }, { "epoch": 0.04612519696318579, "grad_norm": 1.2342629432678223, "learning_rate": 0.00019986740898848306, "loss": 2.1163, "step": 322 }, { "epoch": 0.04626844291648761, "grad_norm": 1.3849512338638306, "learning_rate": 0.0001998650097399226, "loss": 1.9343, "step": 323 }, { "epoch": 0.04641168886978943, "grad_norm": 1.447033166885376, "learning_rate": 0.0001998625889928968, "loss": 2.0728, "step": 324 }, { "epoch": 0.04655493482309125, "grad_norm": 1.067741870880127, "learning_rate": 0.00019986014674792683, "loss": 2.0603, "step": 325 }, { "epoch": 0.04669818077639307, "grad_norm": 1.157702088356018, "learning_rate": 0.0001998576830055384, "loss": 1.7821, "step": 326 }, { "epoch": 0.046841426729694886, "grad_norm": 1.234670877456665, "learning_rate": 0.0001998551977662619, "loss": 1.8081, "step": 327 }, { "epoch": 0.046984672682996706, "grad_norm": 1.4706523418426514, "learning_rate": 0.00019985269103063238, "loss": 2.0863, "step": 328 }, { "epoch": 0.047127918636298526, "grad_norm": 1.6353185176849365, "learning_rate": 0.00019985016279918942, "loss": 1.9967, "step": 329 }, { "epoch": 0.047271164589600345, "grad_norm": 1.2527613639831543, "learning_rate": 0.00019984761307247735, "loss": 1.904, "step": 330 }, { "epoch": 0.047414410542902165, "grad_norm": 1.3356518745422363, "learning_rate": 0.00019984504185104502, "loss": 2.0357, "step": 331 }, { "epoch": 0.047557656496203984, "grad_norm": 1.5131295919418335, "learning_rate": 0.00019984244913544597, "loss": 2.0039, "step": 332 }, { "epoch": 0.047700902449505804, "grad_norm": 1.4638311862945557, "learning_rate": 0.00019983983492623833, "loss": 2.0443, "step": 333 }, { "epoch": 0.047844148402807624, "grad_norm": 1.2566871643066406, "learning_rate": 0.00019983719922398488, "loss": 2.2218, "step": 334 }, { "epoch": 0.04798739435610944, "grad_norm": 1.0978515148162842, "learning_rate": 0.00019983454202925302, "loss": 1.8873, "step": 335 }, { "epoch": 0.048130640309411256, "grad_norm": 1.3893675804138184, "learning_rate": 0.0001998318633426148, "loss": 1.9128, "step": 336 }, { "epoch": 0.048273886262713075, "grad_norm": 1.5992467403411865, "learning_rate": 0.0001998291631646468, "loss": 1.9888, "step": 337 }, { "epoch": 0.048417132216014895, "grad_norm": 1.32235586643219, "learning_rate": 0.00019982644149593043, "loss": 1.7996, "step": 338 }, { "epoch": 0.048560378169316715, "grad_norm": 1.8973851203918457, "learning_rate": 0.0001998236983370515, "loss": 2.3643, "step": 339 }, { "epoch": 0.048703624122618534, "grad_norm": 1.165830373764038, "learning_rate": 0.00019982093368860055, "loss": 1.8418, "step": 340 }, { "epoch": 0.048846870075920354, "grad_norm": 1.4823572635650635, "learning_rate": 0.0001998181475511728, "loss": 2.1028, "step": 341 }, { "epoch": 0.048990116029222174, "grad_norm": 1.3446413278579712, "learning_rate": 0.000199815339925368, "loss": 1.9097, "step": 342 }, { "epoch": 0.04913336198252399, "grad_norm": 1.3959591388702393, "learning_rate": 0.00019981251081179052, "loss": 1.8651, "step": 343 }, { "epoch": 0.04927660793582581, "grad_norm": 1.1690489053726196, "learning_rate": 0.00019980966021104947, "loss": 1.9266, "step": 344 }, { "epoch": 0.04941985388912763, "grad_norm": 1.3153574466705322, "learning_rate": 0.00019980678812375847, "loss": 2.0484, "step": 345 }, { "epoch": 0.04956309984242945, "grad_norm": 1.2163996696472168, "learning_rate": 0.00019980389455053583, "loss": 1.7331, "step": 346 }, { "epoch": 0.04970634579573127, "grad_norm": 1.445618987083435, "learning_rate": 0.00019980097949200444, "loss": 2.1082, "step": 347 }, { "epoch": 0.04984959174903309, "grad_norm": 1.2888035774230957, "learning_rate": 0.00019979804294879188, "loss": 1.9496, "step": 348 }, { "epoch": 0.04999283770233491, "grad_norm": 1.1476936340332031, "learning_rate": 0.00019979508492153026, "loss": 1.947, "step": 349 }, { "epoch": 0.05013608365563673, "grad_norm": 1.763843297958374, "learning_rate": 0.00019979210541085644, "loss": 2.1944, "step": 350 }, { "epoch": 0.05027932960893855, "grad_norm": 1.307909369468689, "learning_rate": 0.00019978910441741175, "loss": 2.1688, "step": 351 }, { "epoch": 0.05042257556224037, "grad_norm": 1.4061204195022583, "learning_rate": 0.00019978608194184228, "loss": 2.0543, "step": 352 }, { "epoch": 0.05056582151554219, "grad_norm": 1.554964542388916, "learning_rate": 0.00019978303798479875, "loss": 1.9538, "step": 353 }, { "epoch": 0.050709067468844, "grad_norm": 1.3116655349731445, "learning_rate": 0.00019977997254693632, "loss": 2.0414, "step": 354 }, { "epoch": 0.05085231342214582, "grad_norm": 1.3545162677764893, "learning_rate": 0.00019977688562891502, "loss": 2.1962, "step": 355 }, { "epoch": 0.05099555937544764, "grad_norm": 1.5842483043670654, "learning_rate": 0.00019977377723139932, "loss": 2.0936, "step": 356 }, { "epoch": 0.05113880532874946, "grad_norm": 1.5763037204742432, "learning_rate": 0.00019977064735505836, "loss": 2.0445, "step": 357 }, { "epoch": 0.05128205128205128, "grad_norm": 1.499285340309143, "learning_rate": 0.00019976749600056603, "loss": 2.0224, "step": 358 }, { "epoch": 0.0514252972353531, "grad_norm": 1.6249871253967285, "learning_rate": 0.00019976432316860067, "loss": 2.191, "step": 359 }, { "epoch": 0.05156854318865492, "grad_norm": 1.2954195737838745, "learning_rate": 0.00019976112885984525, "loss": 1.8544, "step": 360 }, { "epoch": 0.05171178914195674, "grad_norm": 1.5711109638214111, "learning_rate": 0.00019975791307498756, "loss": 2.2776, "step": 361 }, { "epoch": 0.05185503509525856, "grad_norm": 1.373634696006775, "learning_rate": 0.0001997546758147198, "loss": 1.7861, "step": 362 }, { "epoch": 0.05199828104856038, "grad_norm": 1.4684334993362427, "learning_rate": 0.00019975141707973882, "loss": 2.0559, "step": 363 }, { "epoch": 0.0521415270018622, "grad_norm": 1.498948097229004, "learning_rate": 0.00019974813687074626, "loss": 1.9949, "step": 364 }, { "epoch": 0.05228477295516402, "grad_norm": 1.411465048789978, "learning_rate": 0.0001997448351884482, "loss": 2.3365, "step": 365 }, { "epoch": 0.05242801890846584, "grad_norm": 1.2386773824691772, "learning_rate": 0.00019974151203355545, "loss": 1.7988, "step": 366 }, { "epoch": 0.05257126486176766, "grad_norm": 1.1888538599014282, "learning_rate": 0.00019973816740678335, "loss": 1.9336, "step": 367 }, { "epoch": 0.052714510815069476, "grad_norm": 1.5913686752319336, "learning_rate": 0.00019973480130885195, "loss": 2.0819, "step": 368 }, { "epoch": 0.052857756768371296, "grad_norm": 1.3778491020202637, "learning_rate": 0.00019973141374048588, "loss": 1.8411, "step": 369 }, { "epoch": 0.053001002721673116, "grad_norm": 1.1902939081192017, "learning_rate": 0.0001997280047024144, "loss": 1.8307, "step": 370 }, { "epoch": 0.053144248674974935, "grad_norm": 1.4414849281311035, "learning_rate": 0.00019972457419537143, "loss": 2.0446, "step": 371 }, { "epoch": 0.05328749462827675, "grad_norm": 1.3872461318969727, "learning_rate": 0.0001997211222200954, "loss": 2.0007, "step": 372 }, { "epoch": 0.05343074058157857, "grad_norm": 1.3155354261398315, "learning_rate": 0.0001997176487773295, "loss": 2.0912, "step": 373 }, { "epoch": 0.05357398653488039, "grad_norm": 1.5374528169631958, "learning_rate": 0.0001997141538678214, "loss": 1.9541, "step": 374 }, { "epoch": 0.05371723248818221, "grad_norm": 1.3710609674453735, "learning_rate": 0.00019971063749232352, "loss": 2.1359, "step": 375 }, { "epoch": 0.053860478441484026, "grad_norm": 1.547144889831543, "learning_rate": 0.0001997070996515929, "loss": 1.9254, "step": 376 }, { "epoch": 0.054003724394785846, "grad_norm": 1.3555607795715332, "learning_rate": 0.00019970354034639103, "loss": 1.9206, "step": 377 }, { "epoch": 0.054146970348087665, "grad_norm": 1.2106049060821533, "learning_rate": 0.00019969995957748422, "loss": 1.935, "step": 378 }, { "epoch": 0.054290216301389485, "grad_norm": 1.5011792182922363, "learning_rate": 0.0001996963573456433, "loss": 2.1391, "step": 379 }, { "epoch": 0.054433462254691305, "grad_norm": 1.2954236268997192, "learning_rate": 0.0001996927336516438, "loss": 2.1518, "step": 380 }, { "epoch": 0.054576708207993124, "grad_norm": 1.3048149347305298, "learning_rate": 0.00019968908849626567, "loss": 2.0662, "step": 381 }, { "epoch": 0.054719954161294944, "grad_norm": 1.3198808431625366, "learning_rate": 0.00019968542188029376, "loss": 1.9644, "step": 382 }, { "epoch": 0.054863200114596763, "grad_norm": 1.377397894859314, "learning_rate": 0.00019968173380451734, "loss": 1.8829, "step": 383 }, { "epoch": 0.05500644606789858, "grad_norm": 1.548907995223999, "learning_rate": 0.00019967802426973039, "loss": 2.1388, "step": 384 }, { "epoch": 0.0551496920212004, "grad_norm": 1.2217864990234375, "learning_rate": 0.00019967429327673143, "loss": 1.9758, "step": 385 }, { "epoch": 0.05529293797450222, "grad_norm": 1.2895621061325073, "learning_rate": 0.0001996705408263237, "loss": 2.1857, "step": 386 }, { "epoch": 0.05543618392780404, "grad_norm": 2.0282673835754395, "learning_rate": 0.00019966676691931498, "loss": 2.223, "step": 387 }, { "epoch": 0.05557942988110586, "grad_norm": 1.4661885499954224, "learning_rate": 0.00019966297155651775, "loss": 2.0125, "step": 388 }, { "epoch": 0.05572267583440768, "grad_norm": 1.6159390211105347, "learning_rate": 0.00019965915473874898, "loss": 2.1099, "step": 389 }, { "epoch": 0.055865921787709494, "grad_norm": 1.3449915647506714, "learning_rate": 0.0001996553164668304, "loss": 2.1298, "step": 390 }, { "epoch": 0.05600916774101131, "grad_norm": 1.3918635845184326, "learning_rate": 0.00019965145674158825, "loss": 2.1353, "step": 391 }, { "epoch": 0.05615241369431313, "grad_norm": 1.2471503019332886, "learning_rate": 0.00019964757556385348, "loss": 2.0842, "step": 392 }, { "epoch": 0.05629565964761495, "grad_norm": 1.369918942451477, "learning_rate": 0.00019964367293446154, "loss": 2.1243, "step": 393 }, { "epoch": 0.05643890560091677, "grad_norm": 1.4609460830688477, "learning_rate": 0.00019963974885425266, "loss": 1.9133, "step": 394 }, { "epoch": 0.05658215155421859, "grad_norm": 1.5046788454055786, "learning_rate": 0.00019963580332407155, "loss": 1.7329, "step": 395 }, { "epoch": 0.05672539750752041, "grad_norm": 1.3716177940368652, "learning_rate": 0.00019963183634476756, "loss": 1.9633, "step": 396 }, { "epoch": 0.05686864346082223, "grad_norm": 1.2313588857650757, "learning_rate": 0.00019962784791719473, "loss": 1.7349, "step": 397 }, { "epoch": 0.05701188941412405, "grad_norm": 1.2433929443359375, "learning_rate": 0.00019962383804221164, "loss": 2.0327, "step": 398 }, { "epoch": 0.05715513536742587, "grad_norm": 1.5789649486541748, "learning_rate": 0.00019961980672068155, "loss": 1.984, "step": 399 }, { "epoch": 0.05729838132072769, "grad_norm": 1.5980504751205444, "learning_rate": 0.00019961575395347224, "loss": 2.1896, "step": 400 }, { "epoch": 0.05744162727402951, "grad_norm": 1.5471700429916382, "learning_rate": 0.00019961167974145625, "loss": 1.8995, "step": 401 }, { "epoch": 0.05758487322733133, "grad_norm": 1.2064976692199707, "learning_rate": 0.0001996075840855106, "loss": 1.8129, "step": 402 }, { "epoch": 0.05772811918063315, "grad_norm": 1.3381381034851074, "learning_rate": 0.000199603466986517, "loss": 1.967, "step": 403 }, { "epoch": 0.05787136513393497, "grad_norm": 1.542614221572876, "learning_rate": 0.00019959932844536177, "loss": 1.8058, "step": 404 }, { "epoch": 0.05801461108723679, "grad_norm": 1.2417548894882202, "learning_rate": 0.0001995951684629358, "loss": 1.9628, "step": 405 }, { "epoch": 0.05815785704053861, "grad_norm": 1.6852108240127563, "learning_rate": 0.0001995909870401347, "loss": 1.9207, "step": 406 }, { "epoch": 0.05830110299384043, "grad_norm": 1.2398333549499512, "learning_rate": 0.00019958678417785858, "loss": 1.8877, "step": 407 }, { "epoch": 0.05844434894714224, "grad_norm": 1.0941513776779175, "learning_rate": 0.00019958255987701217, "loss": 2.0225, "step": 408 }, { "epoch": 0.05858759490044406, "grad_norm": 1.5106207132339478, "learning_rate": 0.00019957831413850493, "loss": 1.9624, "step": 409 }, { "epoch": 0.05873084085374588, "grad_norm": 1.5055174827575684, "learning_rate": 0.00019957404696325086, "loss": 1.9741, "step": 410 }, { "epoch": 0.0588740868070477, "grad_norm": 1.2165216207504272, "learning_rate": 0.00019956975835216852, "loss": 2.1074, "step": 411 }, { "epoch": 0.05901733276034952, "grad_norm": 1.3454694747924805, "learning_rate": 0.0001995654483061812, "loss": 1.9755, "step": 412 }, { "epoch": 0.05916057871365134, "grad_norm": 1.620636224746704, "learning_rate": 0.00019956111682621673, "loss": 1.913, "step": 413 }, { "epoch": 0.05930382466695316, "grad_norm": 1.2214117050170898, "learning_rate": 0.00019955676391320752, "loss": 2.1167, "step": 414 }, { "epoch": 0.05944707062025498, "grad_norm": 1.1916916370391846, "learning_rate": 0.00019955238956809073, "loss": 2.1619, "step": 415 }, { "epoch": 0.0595903165735568, "grad_norm": 1.2380868196487427, "learning_rate": 0.00019954799379180797, "loss": 2.0448, "step": 416 }, { "epoch": 0.059733562526858616, "grad_norm": 1.2401046752929688, "learning_rate": 0.0001995435765853056, "loss": 1.9667, "step": 417 }, { "epoch": 0.059876808480160436, "grad_norm": 1.4161618947982788, "learning_rate": 0.00019953913794953451, "loss": 1.8608, "step": 418 }, { "epoch": 0.060020054433462255, "grad_norm": 1.4902164936065674, "learning_rate": 0.0001995346778854502, "loss": 2.0256, "step": 419 }, { "epoch": 0.060163300386764075, "grad_norm": 1.6481677293777466, "learning_rate": 0.00019953019639401288, "loss": 2.104, "step": 420 }, { "epoch": 0.060306546340065895, "grad_norm": 1.4211974143981934, "learning_rate": 0.00019952569347618727, "loss": 2.0294, "step": 421 }, { "epoch": 0.060449792293367714, "grad_norm": 1.4712406396865845, "learning_rate": 0.0001995211691329427, "loss": 2.1101, "step": 422 }, { "epoch": 0.060593038246669534, "grad_norm": 1.6464601755142212, "learning_rate": 0.00019951662336525322, "loss": 2.0296, "step": 423 }, { "epoch": 0.060736284199971353, "grad_norm": 1.44418203830719, "learning_rate": 0.00019951205617409735, "loss": 2.1663, "step": 424 }, { "epoch": 0.06087953015327317, "grad_norm": 1.3139228820800781, "learning_rate": 0.00019950746756045834, "loss": 2.1475, "step": 425 }, { "epoch": 0.06102277610657499, "grad_norm": 1.5912656784057617, "learning_rate": 0.00019950285752532397, "loss": 2.1121, "step": 426 }, { "epoch": 0.061166022059876805, "grad_norm": 1.4179151058197021, "learning_rate": 0.00019949822606968673, "loss": 1.9011, "step": 427 }, { "epoch": 0.061309268013178625, "grad_norm": 1.2277412414550781, "learning_rate": 0.0001994935731945436, "loss": 2.0042, "step": 428 }, { "epoch": 0.061452513966480445, "grad_norm": 1.328521490097046, "learning_rate": 0.00019948889890089623, "loss": 2.0258, "step": 429 }, { "epoch": 0.061595759919782264, "grad_norm": 1.693434238433838, "learning_rate": 0.00019948420318975093, "loss": 1.9883, "step": 430 }, { "epoch": 0.061739005873084084, "grad_norm": 1.3324311971664429, "learning_rate": 0.0001994794860621185, "loss": 2.0259, "step": 431 }, { "epoch": 0.0618822518263859, "grad_norm": 1.3343366384506226, "learning_rate": 0.00019947474751901448, "loss": 2.1525, "step": 432 }, { "epoch": 0.06202549777968772, "grad_norm": 1.3561005592346191, "learning_rate": 0.0001994699875614589, "loss": 1.8517, "step": 433 }, { "epoch": 0.06216874373298954, "grad_norm": 1.1823476552963257, "learning_rate": 0.00019946520619047652, "loss": 1.98, "step": 434 }, { "epoch": 0.06231198968629136, "grad_norm": 1.4545029401779175, "learning_rate": 0.00019946040340709665, "loss": 1.9867, "step": 435 }, { "epoch": 0.06245523563959318, "grad_norm": 1.332228183746338, "learning_rate": 0.00019945557921235318, "loss": 1.9919, "step": 436 }, { "epoch": 0.062598481592895, "grad_norm": 1.2009073495864868, "learning_rate": 0.00019945073360728462, "loss": 1.9498, "step": 437 }, { "epoch": 0.06274172754619682, "grad_norm": 1.24563467502594, "learning_rate": 0.00019944586659293416, "loss": 2.0434, "step": 438 }, { "epoch": 0.06288497349949863, "grad_norm": 1.6802427768707275, "learning_rate": 0.00019944097817034952, "loss": 2.0288, "step": 439 }, { "epoch": 0.06302821945280046, "grad_norm": 1.318039059638977, "learning_rate": 0.00019943606834058305, "loss": 1.8909, "step": 440 }, { "epoch": 0.06317146540610227, "grad_norm": 1.7798895835876465, "learning_rate": 0.00019943113710469177, "loss": 1.9639, "step": 441 }, { "epoch": 0.0633147113594041, "grad_norm": 1.559804916381836, "learning_rate": 0.00019942618446373717, "loss": 2.1037, "step": 442 }, { "epoch": 0.06345795731270591, "grad_norm": 1.5088804960250854, "learning_rate": 0.0001994212104187855, "loss": 1.8103, "step": 443 }, { "epoch": 0.06360120326600774, "grad_norm": 1.470672369003296, "learning_rate": 0.00019941621497090748, "loss": 1.8882, "step": 444 }, { "epoch": 0.06374444921930955, "grad_norm": 1.4069653749465942, "learning_rate": 0.0001994111981211786, "loss": 2.1634, "step": 445 }, { "epoch": 0.06388769517261138, "grad_norm": 1.339611530303955, "learning_rate": 0.00019940615987067877, "loss": 1.9897, "step": 446 }, { "epoch": 0.06403094112591319, "grad_norm": 1.2784814834594727, "learning_rate": 0.00019940110022049264, "loss": 1.9671, "step": 447 }, { "epoch": 0.06417418707921502, "grad_norm": 1.4795504808425903, "learning_rate": 0.00019939601917170943, "loss": 2.0881, "step": 448 }, { "epoch": 0.06431743303251683, "grad_norm": 1.3208190202713013, "learning_rate": 0.00019939091672542295, "loss": 2.0966, "step": 449 }, { "epoch": 0.06446067898581866, "grad_norm": 1.4620670080184937, "learning_rate": 0.00019938579288273167, "loss": 1.9524, "step": 450 }, { "epoch": 0.06460392493912047, "grad_norm": 1.6208335161209106, "learning_rate": 0.0001993806476447386, "loss": 1.9457, "step": 451 }, { "epoch": 0.0647471708924223, "grad_norm": 1.6700886487960815, "learning_rate": 0.00019937548101255139, "loss": 1.847, "step": 452 }, { "epoch": 0.06489041684572411, "grad_norm": 1.1986421346664429, "learning_rate": 0.00019937029298728226, "loss": 1.7493, "step": 453 }, { "epoch": 0.06503366279902592, "grad_norm": 1.657135248184204, "learning_rate": 0.00019936508357004806, "loss": 1.9111, "step": 454 }, { "epoch": 0.06517690875232775, "grad_norm": 1.4031460285186768, "learning_rate": 0.0001993598527619703, "loss": 1.7746, "step": 455 }, { "epoch": 0.06532015470562956, "grad_norm": 1.2285127639770508, "learning_rate": 0.000199354600564175, "loss": 1.8288, "step": 456 }, { "epoch": 0.06546340065893139, "grad_norm": 1.522947907447815, "learning_rate": 0.00019934932697779288, "loss": 2.1844, "step": 457 }, { "epoch": 0.0656066466122332, "grad_norm": 1.5217342376708984, "learning_rate": 0.00019934403200395914, "loss": 1.882, "step": 458 }, { "epoch": 0.06574989256553503, "grad_norm": 1.5375550985336304, "learning_rate": 0.0001993387156438137, "loss": 1.7221, "step": 459 }, { "epoch": 0.06589313851883684, "grad_norm": 1.298375129699707, "learning_rate": 0.000199333377898501, "loss": 1.9242, "step": 460 }, { "epoch": 0.06603638447213867, "grad_norm": 1.7595490217208862, "learning_rate": 0.00019932801876917022, "loss": 2.0323, "step": 461 }, { "epoch": 0.06617963042544048, "grad_norm": 1.4374971389770508, "learning_rate": 0.00019932263825697498, "loss": 1.9913, "step": 462 }, { "epoch": 0.0663228763787423, "grad_norm": 1.5165716409683228, "learning_rate": 0.00019931723636307353, "loss": 2.0238, "step": 463 }, { "epoch": 0.06646612233204412, "grad_norm": 2.313573122024536, "learning_rate": 0.00019931181308862885, "loss": 2.0214, "step": 464 }, { "epoch": 0.06660936828534594, "grad_norm": 1.250164270401001, "learning_rate": 0.00019930636843480838, "loss": 1.9533, "step": 465 }, { "epoch": 0.06675261423864776, "grad_norm": 1.1751495599746704, "learning_rate": 0.00019930090240278422, "loss": 1.9515, "step": 466 }, { "epoch": 0.06689586019194958, "grad_norm": 1.307032823562622, "learning_rate": 0.0001992954149937331, "loss": 1.8049, "step": 467 }, { "epoch": 0.0670391061452514, "grad_norm": 1.2220652103424072, "learning_rate": 0.00019928990620883633, "loss": 1.9115, "step": 468 }, { "epoch": 0.06718235209855322, "grad_norm": 1.2635343074798584, "learning_rate": 0.0001992843760492798, "loss": 1.9745, "step": 469 }, { "epoch": 0.06732559805185503, "grad_norm": 1.3601716756820679, "learning_rate": 0.00019927882451625402, "loss": 2.0441, "step": 470 }, { "epoch": 0.06746884400515686, "grad_norm": 1.3189035654067993, "learning_rate": 0.00019927325161095408, "loss": 2.0821, "step": 471 }, { "epoch": 0.06761208995845867, "grad_norm": 1.3500027656555176, "learning_rate": 0.0001992676573345797, "loss": 1.8935, "step": 472 }, { "epoch": 0.06775533591176049, "grad_norm": 1.1705610752105713, "learning_rate": 0.00019926204168833515, "loss": 1.9297, "step": 473 }, { "epoch": 0.06789858186506231, "grad_norm": 1.4547010660171509, "learning_rate": 0.0001992564046734294, "loss": 2.0614, "step": 474 }, { "epoch": 0.06804182781836413, "grad_norm": 1.535499930381775, "learning_rate": 0.000199250746291076, "loss": 1.824, "step": 475 }, { "epoch": 0.06818507377166595, "grad_norm": 1.2382667064666748, "learning_rate": 0.00019924506654249292, "loss": 1.8132, "step": 476 }, { "epoch": 0.06832831972496776, "grad_norm": 1.3612666130065918, "learning_rate": 0.00019923936542890297, "loss": 2.0766, "step": 477 }, { "epoch": 0.06847156567826959, "grad_norm": 1.6234214305877686, "learning_rate": 0.00019923364295153343, "loss": 1.8553, "step": 478 }, { "epoch": 0.0686148116315714, "grad_norm": 1.401366114616394, "learning_rate": 0.00019922789911161622, "loss": 1.994, "step": 479 }, { "epoch": 0.06875805758487323, "grad_norm": 1.385826826095581, "learning_rate": 0.00019922213391038786, "loss": 1.9807, "step": 480 }, { "epoch": 0.06890130353817504, "grad_norm": 1.2016290426254272, "learning_rate": 0.00019921634734908943, "loss": 1.8815, "step": 481 }, { "epoch": 0.06904454949147687, "grad_norm": 1.5417659282684326, "learning_rate": 0.00019921053942896665, "loss": 1.9502, "step": 482 }, { "epoch": 0.06918779544477868, "grad_norm": 1.5243892669677734, "learning_rate": 0.0001992047101512698, "loss": 2.0321, "step": 483 }, { "epoch": 0.06933104139808051, "grad_norm": 1.6552860736846924, "learning_rate": 0.00019919885951725374, "loss": 1.8776, "step": 484 }, { "epoch": 0.06947428735138232, "grad_norm": 1.693177342414856, "learning_rate": 0.00019919298752817807, "loss": 2.0605, "step": 485 }, { "epoch": 0.06961753330468415, "grad_norm": 1.5228900909423828, "learning_rate": 0.0001991870941853068, "loss": 1.6713, "step": 486 }, { "epoch": 0.06976077925798596, "grad_norm": 1.225235104560852, "learning_rate": 0.00019918117948990868, "loss": 2.006, "step": 487 }, { "epoch": 0.06990402521128779, "grad_norm": 1.6665676832199097, "learning_rate": 0.00019917524344325696, "loss": 1.8929, "step": 488 }, { "epoch": 0.0700472711645896, "grad_norm": 1.4340968132019043, "learning_rate": 0.00019916928604662952, "loss": 2.1841, "step": 489 }, { "epoch": 0.07019051711789141, "grad_norm": 1.5390084981918335, "learning_rate": 0.0001991633073013089, "loss": 1.9178, "step": 490 }, { "epoch": 0.07033376307119324, "grad_norm": 1.2651180028915405, "learning_rate": 0.00019915730720858208, "loss": 1.9811, "step": 491 }, { "epoch": 0.07047700902449505, "grad_norm": 1.4839897155761719, "learning_rate": 0.00019915128576974077, "loss": 2.0874, "step": 492 }, { "epoch": 0.07062025497779688, "grad_norm": 2.026418685913086, "learning_rate": 0.00019914524298608128, "loss": 2.0269, "step": 493 }, { "epoch": 0.07076350093109869, "grad_norm": 1.4041849374771118, "learning_rate": 0.00019913917885890442, "loss": 1.7953, "step": 494 }, { "epoch": 0.07090674688440052, "grad_norm": 1.6482614278793335, "learning_rate": 0.00019913309338951568, "loss": 1.9059, "step": 495 }, { "epoch": 0.07104999283770233, "grad_norm": 1.6024208068847656, "learning_rate": 0.00019912698657922507, "loss": 1.8433, "step": 496 }, { "epoch": 0.07119323879100416, "grad_norm": 1.2326537370681763, "learning_rate": 0.0001991208584293473, "loss": 1.837, "step": 497 }, { "epoch": 0.07133648474430597, "grad_norm": 1.1278340816497803, "learning_rate": 0.00019911470894120155, "loss": 1.989, "step": 498 }, { "epoch": 0.0714797306976078, "grad_norm": 1.349207878112793, "learning_rate": 0.0001991085381161117, "loss": 1.9829, "step": 499 }, { "epoch": 0.07162297665090961, "grad_norm": 1.4765079021453857, "learning_rate": 0.0001991023459554061, "loss": 1.9638, "step": 500 }, { "epoch": 0.07176622260421144, "grad_norm": 1.4788472652435303, "learning_rate": 0.00019909613246041788, "loss": 1.9042, "step": 501 }, { "epoch": 0.07190946855751325, "grad_norm": 1.5731797218322754, "learning_rate": 0.00019908989763248454, "loss": 1.9095, "step": 502 }, { "epoch": 0.07205271451081507, "grad_norm": 1.2606579065322876, "learning_rate": 0.00019908364147294835, "loss": 1.7536, "step": 503 }, { "epoch": 0.07219596046411689, "grad_norm": 1.251291036605835, "learning_rate": 0.00019907736398315607, "loss": 1.9551, "step": 504 }, { "epoch": 0.07233920641741871, "grad_norm": 1.0975549221038818, "learning_rate": 0.00019907106516445913, "loss": 1.9077, "step": 505 }, { "epoch": 0.07248245237072053, "grad_norm": 1.2490005493164062, "learning_rate": 0.00019906474501821348, "loss": 1.785, "step": 506 }, { "epoch": 0.07262569832402235, "grad_norm": 1.2707229852676392, "learning_rate": 0.00019905840354577972, "loss": 1.8963, "step": 507 }, { "epoch": 0.07276894427732417, "grad_norm": 1.333234190940857, "learning_rate": 0.00019905204074852298, "loss": 1.8579, "step": 508 }, { "epoch": 0.07291219023062598, "grad_norm": 1.1848360300064087, "learning_rate": 0.000199045656627813, "loss": 1.6882, "step": 509 }, { "epoch": 0.0730554361839278, "grad_norm": 1.6569358110427856, "learning_rate": 0.00019903925118502417, "loss": 1.8705, "step": 510 }, { "epoch": 0.07319868213722962, "grad_norm": 1.1256681680679321, "learning_rate": 0.0001990328244215354, "loss": 1.9893, "step": 511 }, { "epoch": 0.07334192809053144, "grad_norm": 1.3366971015930176, "learning_rate": 0.00019902637633873021, "loss": 1.8577, "step": 512 }, { "epoch": 0.07348517404383326, "grad_norm": 1.3253220319747925, "learning_rate": 0.00019901990693799672, "loss": 2.1919, "step": 513 }, { "epoch": 0.07362841999713508, "grad_norm": 1.6851608753204346, "learning_rate": 0.00019901341622072764, "loss": 1.6948, "step": 514 }, { "epoch": 0.0737716659504369, "grad_norm": 1.4687020778656006, "learning_rate": 0.00019900690418832022, "loss": 1.8024, "step": 515 }, { "epoch": 0.07391491190373872, "grad_norm": 1.5015839338302612, "learning_rate": 0.00019900037084217637, "loss": 2.0096, "step": 516 }, { "epoch": 0.07405815785704054, "grad_norm": 1.3085094690322876, "learning_rate": 0.0001989938161837026, "loss": 1.9152, "step": 517 }, { "epoch": 0.07420140381034236, "grad_norm": 1.6648415327072144, "learning_rate": 0.0001989872402143099, "loss": 2.0185, "step": 518 }, { "epoch": 0.07434464976364417, "grad_norm": 1.1659842729568481, "learning_rate": 0.00019898064293541392, "loss": 1.9548, "step": 519 }, { "epoch": 0.074487895716946, "grad_norm": 1.765710711479187, "learning_rate": 0.0001989740243484349, "loss": 1.8557, "step": 520 }, { "epoch": 0.07463114167024781, "grad_norm": 1.6128560304641724, "learning_rate": 0.00019896738445479768, "loss": 1.9519, "step": 521 }, { "epoch": 0.07477438762354964, "grad_norm": 1.596352458000183, "learning_rate": 0.00019896072325593166, "loss": 1.8127, "step": 522 }, { "epoch": 0.07491763357685145, "grad_norm": 1.475137710571289, "learning_rate": 0.0001989540407532708, "loss": 1.8512, "step": 523 }, { "epoch": 0.07506087953015328, "grad_norm": 1.5763697624206543, "learning_rate": 0.00019894733694825374, "loss": 1.7669, "step": 524 }, { "epoch": 0.07520412548345509, "grad_norm": 1.620430588722229, "learning_rate": 0.00019894061184232357, "loss": 2.1306, "step": 525 }, { "epoch": 0.0753473714367569, "grad_norm": 1.470253586769104, "learning_rate": 0.00019893386543692806, "loss": 1.9267, "step": 526 }, { "epoch": 0.07549061739005873, "grad_norm": 1.4225101470947266, "learning_rate": 0.00019892709773351958, "loss": 1.7245, "step": 527 }, { "epoch": 0.07563386334336054, "grad_norm": 1.5447077751159668, "learning_rate": 0.00019892030873355497, "loss": 1.8163, "step": 528 }, { "epoch": 0.07577710929666237, "grad_norm": 1.3347465991973877, "learning_rate": 0.00019891349843849585, "loss": 1.9, "step": 529 }, { "epoch": 0.07592035524996418, "grad_norm": 1.5933501720428467, "learning_rate": 0.0001989066668498082, "loss": 2.0117, "step": 530 }, { "epoch": 0.07606360120326601, "grad_norm": 1.260048508644104, "learning_rate": 0.00019889981396896278, "loss": 1.6273, "step": 531 }, { "epoch": 0.07620684715656782, "grad_norm": 1.3954657316207886, "learning_rate": 0.00019889293979743477, "loss": 1.8269, "step": 532 }, { "epoch": 0.07635009310986965, "grad_norm": 1.7805372476577759, "learning_rate": 0.00019888604433670405, "loss": 2.0946, "step": 533 }, { "epoch": 0.07649333906317146, "grad_norm": 1.489526629447937, "learning_rate": 0.00019887912758825504, "loss": 1.859, "step": 534 }, { "epoch": 0.07663658501647329, "grad_norm": 1.2721531391143799, "learning_rate": 0.00019887218955357675, "loss": 1.8398, "step": 535 }, { "epoch": 0.0767798309697751, "grad_norm": 1.2705563306808472, "learning_rate": 0.00019886523023416274, "loss": 1.9455, "step": 536 }, { "epoch": 0.07692307692307693, "grad_norm": 1.668686866760254, "learning_rate": 0.00019885824963151118, "loss": 1.985, "step": 537 }, { "epoch": 0.07706632287637874, "grad_norm": 1.633049488067627, "learning_rate": 0.00019885124774712487, "loss": 1.8412, "step": 538 }, { "epoch": 0.07720956882968057, "grad_norm": 1.5006086826324463, "learning_rate": 0.00019884422458251109, "loss": 1.8652, "step": 539 }, { "epoch": 0.07735281478298238, "grad_norm": 1.7487094402313232, "learning_rate": 0.00019883718013918178, "loss": 1.951, "step": 540 }, { "epoch": 0.0774960607362842, "grad_norm": 1.7116528749465942, "learning_rate": 0.00019883011441865342, "loss": 1.6665, "step": 541 }, { "epoch": 0.07763930668958602, "grad_norm": 1.8973088264465332, "learning_rate": 0.0001988230274224471, "loss": 1.8049, "step": 542 }, { "epoch": 0.07778255264288784, "grad_norm": 1.6456761360168457, "learning_rate": 0.00019881591915208845, "loss": 1.8446, "step": 543 }, { "epoch": 0.07792579859618966, "grad_norm": 1.4625601768493652, "learning_rate": 0.00019880878960910772, "loss": 1.8848, "step": 544 }, { "epoch": 0.07806904454949147, "grad_norm": 1.8032500743865967, "learning_rate": 0.00019880163879503973, "loss": 2.1432, "step": 545 }, { "epoch": 0.0782122905027933, "grad_norm": 1.427456259727478, "learning_rate": 0.00019879446671142387, "loss": 1.8321, "step": 546 }, { "epoch": 0.07835553645609511, "grad_norm": 1.3966703414916992, "learning_rate": 0.00019878727335980413, "loss": 1.9034, "step": 547 }, { "epoch": 0.07849878240939694, "grad_norm": 1.3583844900131226, "learning_rate": 0.000198780058741729, "loss": 1.9344, "step": 548 }, { "epoch": 0.07864202836269875, "grad_norm": 1.497982382774353, "learning_rate": 0.0001987728228587517, "loss": 1.8587, "step": 549 }, { "epoch": 0.07878527431600058, "grad_norm": 1.5875831842422485, "learning_rate": 0.00019876556571242986, "loss": 1.8476, "step": 550 }, { "epoch": 0.07892852026930239, "grad_norm": 1.4853816032409668, "learning_rate": 0.0001987582873043258, "loss": 1.907, "step": 551 }, { "epoch": 0.07907176622260421, "grad_norm": 1.4317355155944824, "learning_rate": 0.00019875098763600635, "loss": 1.7035, "step": 552 }, { "epoch": 0.07921501217590603, "grad_norm": 1.4166377782821655, "learning_rate": 0.00019874366670904299, "loss": 1.9747, "step": 553 }, { "epoch": 0.07935825812920785, "grad_norm": 1.7570289373397827, "learning_rate": 0.00019873632452501168, "loss": 1.7895, "step": 554 }, { "epoch": 0.07950150408250967, "grad_norm": 1.395645260810852, "learning_rate": 0.00019872896108549308, "loss": 1.9733, "step": 555 }, { "epoch": 0.07964475003581149, "grad_norm": 2.197361946105957, "learning_rate": 0.00019872157639207232, "loss": 1.9965, "step": 556 }, { "epoch": 0.0797879959891133, "grad_norm": 1.3623532056808472, "learning_rate": 0.00019871417044633913, "loss": 1.7679, "step": 557 }, { "epoch": 0.07993124194241513, "grad_norm": 1.8133429288864136, "learning_rate": 0.00019870674324988781, "loss": 1.8795, "step": 558 }, { "epoch": 0.08007448789571694, "grad_norm": 1.5363651514053345, "learning_rate": 0.00019869929480431734, "loss": 1.9855, "step": 559 }, { "epoch": 0.08021773384901877, "grad_norm": 1.8445954322814941, "learning_rate": 0.00019869182511123113, "loss": 1.9694, "step": 560 }, { "epoch": 0.08036097980232058, "grad_norm": 1.2648767232894897, "learning_rate": 0.00019868433417223716, "loss": 1.7517, "step": 561 }, { "epoch": 0.08050422575562241, "grad_norm": 1.5396267175674438, "learning_rate": 0.00019867682198894816, "loss": 1.8148, "step": 562 }, { "epoch": 0.08064747170892422, "grad_norm": 1.5066369771957397, "learning_rate": 0.00019866928856298119, "loss": 1.8024, "step": 563 }, { "epoch": 0.08079071766222604, "grad_norm": 1.432725191116333, "learning_rate": 0.00019866173389595813, "loss": 1.9704, "step": 564 }, { "epoch": 0.08093396361552786, "grad_norm": 1.9395262002944946, "learning_rate": 0.00019865415798950525, "loss": 1.9467, "step": 565 }, { "epoch": 0.08107720956882968, "grad_norm": 1.7762507200241089, "learning_rate": 0.00019864656084525345, "loss": 1.7086, "step": 566 }, { "epoch": 0.0812204555221315, "grad_norm": 1.4609636068344116, "learning_rate": 0.00019863894246483822, "loss": 2.0004, "step": 567 }, { "epoch": 0.08136370147543331, "grad_norm": 1.2922967672348022, "learning_rate": 0.00019863130284989965, "loss": 1.6907, "step": 568 }, { "epoch": 0.08150694742873514, "grad_norm": 1.382042407989502, "learning_rate": 0.0001986236420020823, "loss": 1.6902, "step": 569 }, { "epoch": 0.08165019338203695, "grad_norm": 1.6791597604751587, "learning_rate": 0.00019861595992303537, "loss": 1.9861, "step": 570 }, { "epoch": 0.08179343933533878, "grad_norm": 1.4134072065353394, "learning_rate": 0.00019860825661441266, "loss": 1.7097, "step": 571 }, { "epoch": 0.08193668528864059, "grad_norm": 1.5742466449737549, "learning_rate": 0.00019860053207787246, "loss": 1.8282, "step": 572 }, { "epoch": 0.08207993124194242, "grad_norm": 1.2867913246154785, "learning_rate": 0.0001985927863150777, "loss": 1.6376, "step": 573 }, { "epoch": 0.08222317719524423, "grad_norm": 1.512536644935608, "learning_rate": 0.0001985850193276958, "loss": 1.8297, "step": 574 }, { "epoch": 0.08236642314854606, "grad_norm": 1.7403769493103027, "learning_rate": 0.0001985772311173989, "loss": 1.9454, "step": 575 }, { "epoch": 0.08250966910184787, "grad_norm": 1.563878059387207, "learning_rate": 0.00019856942168586353, "loss": 1.6628, "step": 576 }, { "epoch": 0.0826529150551497, "grad_norm": 1.405603051185608, "learning_rate": 0.00019856159103477086, "loss": 1.953, "step": 577 }, { "epoch": 0.08279616100845151, "grad_norm": 1.4249982833862305, "learning_rate": 0.00019855373916580668, "loss": 1.7132, "step": 578 }, { "epoch": 0.08293940696175334, "grad_norm": 1.4292669296264648, "learning_rate": 0.00019854586608066127, "loss": 1.8521, "step": 579 }, { "epoch": 0.08308265291505515, "grad_norm": 1.8385111093521118, "learning_rate": 0.00019853797178102952, "loss": 1.7276, "step": 580 }, { "epoch": 0.08322589886835696, "grad_norm": 1.5136661529541016, "learning_rate": 0.0001985300562686109, "loss": 1.8693, "step": 581 }, { "epoch": 0.08336914482165879, "grad_norm": 1.5073760747909546, "learning_rate": 0.00019852211954510943, "loss": 1.8093, "step": 582 }, { "epoch": 0.0835123907749606, "grad_norm": 1.8011831045150757, "learning_rate": 0.0001985141616122336, "loss": 1.7947, "step": 583 }, { "epoch": 0.08365563672826243, "grad_norm": 1.6022714376449585, "learning_rate": 0.00019850618247169667, "loss": 1.6716, "step": 584 }, { "epoch": 0.08379888268156424, "grad_norm": 1.5719233751296997, "learning_rate": 0.0001984981821252163, "loss": 1.8268, "step": 585 }, { "epoch": 0.08394212863486607, "grad_norm": 1.644780158996582, "learning_rate": 0.00019849016057451476, "loss": 1.9281, "step": 586 }, { "epoch": 0.08408537458816788, "grad_norm": 1.7939538955688477, "learning_rate": 0.00019848211782131888, "loss": 2.1111, "step": 587 }, { "epoch": 0.0842286205414697, "grad_norm": 1.3756718635559082, "learning_rate": 0.00019847405386736014, "loss": 1.7286, "step": 588 }, { "epoch": 0.08437186649477152, "grad_norm": 1.5947803258895874, "learning_rate": 0.00019846596871437441, "loss": 1.9275, "step": 589 }, { "epoch": 0.08451511244807335, "grad_norm": 1.6748199462890625, "learning_rate": 0.00019845786236410227, "loss": 1.8367, "step": 590 }, { "epoch": 0.08465835840137516, "grad_norm": 1.541049838066101, "learning_rate": 0.00019844973481828886, "loss": 1.6547, "step": 591 }, { "epoch": 0.08480160435467698, "grad_norm": 1.5490299463272095, "learning_rate": 0.00019844158607868376, "loss": 2.0083, "step": 592 }, { "epoch": 0.0849448503079788, "grad_norm": 1.6812397241592407, "learning_rate": 0.00019843341614704125, "loss": 2.0063, "step": 593 }, { "epoch": 0.08508809626128062, "grad_norm": 1.2974023818969727, "learning_rate": 0.00019842522502512008, "loss": 1.9684, "step": 594 }, { "epoch": 0.08523134221458244, "grad_norm": 1.8313188552856445, "learning_rate": 0.0001984170127146836, "loss": 1.9563, "step": 595 }, { "epoch": 0.08537458816788426, "grad_norm": 1.3798177242279053, "learning_rate": 0.00019840877921749973, "loss": 1.7836, "step": 596 }, { "epoch": 0.08551783412118608, "grad_norm": 1.7076380252838135, "learning_rate": 0.00019840052453534094, "loss": 1.8817, "step": 597 }, { "epoch": 0.0856610800744879, "grad_norm": 1.4497733116149902, "learning_rate": 0.00019839224866998424, "loss": 1.9776, "step": 598 }, { "epoch": 0.08580432602778972, "grad_norm": 1.3002909421920776, "learning_rate": 0.00019838395162321125, "loss": 1.8919, "step": 599 }, { "epoch": 0.08594757198109153, "grad_norm": 1.7706894874572754, "learning_rate": 0.0001983756333968081, "loss": 1.7648, "step": 600 }, { "epoch": 0.08609081793439335, "grad_norm": 2.081761360168457, "learning_rate": 0.0001983672939925655, "loss": 1.83, "step": 601 }, { "epoch": 0.08623406388769517, "grad_norm": 1.5996431112289429, "learning_rate": 0.00019835893341227872, "loss": 1.6994, "step": 602 }, { "epoch": 0.086377309840997, "grad_norm": 1.6949021816253662, "learning_rate": 0.00019835055165774756, "loss": 1.8734, "step": 603 }, { "epoch": 0.0865205557942988, "grad_norm": 1.7539021968841553, "learning_rate": 0.00019834214873077643, "loss": 1.9698, "step": 604 }, { "epoch": 0.08666380174760063, "grad_norm": 1.9941868782043457, "learning_rate": 0.00019833372463317427, "loss": 1.7233, "step": 605 }, { "epoch": 0.08680704770090245, "grad_norm": 1.683844804763794, "learning_rate": 0.0001983252793667546, "loss": 1.7536, "step": 606 }, { "epoch": 0.08695029365420427, "grad_norm": 1.5519018173217773, "learning_rate": 0.00019831681293333545, "loss": 1.7444, "step": 607 }, { "epoch": 0.08709353960750608, "grad_norm": 2.322967290878296, "learning_rate": 0.00019830832533473939, "loss": 1.816, "step": 608 }, { "epoch": 0.08723678556080791, "grad_norm": 1.3005996942520142, "learning_rate": 0.0001982998165727937, "loss": 1.6581, "step": 609 }, { "epoch": 0.08738003151410972, "grad_norm": 1.5435669422149658, "learning_rate": 0.00019829128664933002, "loss": 1.7816, "step": 610 }, { "epoch": 0.08752327746741155, "grad_norm": 1.3896721601486206, "learning_rate": 0.00019828273556618466, "loss": 1.8953, "step": 611 }, { "epoch": 0.08766652342071336, "grad_norm": 1.994359016418457, "learning_rate": 0.00019827416332519844, "loss": 1.8527, "step": 612 }, { "epoch": 0.08780976937401519, "grad_norm": 1.9261572360992432, "learning_rate": 0.00019826556992821678, "loss": 1.8019, "step": 613 }, { "epoch": 0.087953015327317, "grad_norm": 1.5855371952056885, "learning_rate": 0.0001982569553770896, "loss": 1.8227, "step": 614 }, { "epoch": 0.08809626128061883, "grad_norm": 2.102388381958008, "learning_rate": 0.0001982483196736714, "loss": 1.7842, "step": 615 }, { "epoch": 0.08823950723392064, "grad_norm": 1.91208815574646, "learning_rate": 0.00019823966281982128, "loss": 1.8305, "step": 616 }, { "epoch": 0.08838275318722245, "grad_norm": 1.5928481817245483, "learning_rate": 0.00019823098481740276, "loss": 1.8916, "step": 617 }, { "epoch": 0.08852599914052428, "grad_norm": 2.083204746246338, "learning_rate": 0.0001982222856682841, "loss": 1.8967, "step": 618 }, { "epoch": 0.0886692450938261, "grad_norm": 1.43557608127594, "learning_rate": 0.0001982135653743379, "loss": 1.8346, "step": 619 }, { "epoch": 0.08881249104712792, "grad_norm": 1.3750650882720947, "learning_rate": 0.00019820482393744152, "loss": 1.6437, "step": 620 }, { "epoch": 0.08895573700042973, "grad_norm": 1.6617361307144165, "learning_rate": 0.00019819606135947676, "loss": 1.7574, "step": 621 }, { "epoch": 0.08909898295373156, "grad_norm": 1.420037865638733, "learning_rate": 0.00019818727764232992, "loss": 1.7199, "step": 622 }, { "epoch": 0.08924222890703337, "grad_norm": 1.6648025512695312, "learning_rate": 0.00019817847278789198, "loss": 2.0163, "step": 623 }, { "epoch": 0.0893854748603352, "grad_norm": 1.573229432106018, "learning_rate": 0.00019816964679805838, "loss": 1.7876, "step": 624 }, { "epoch": 0.08952872081363701, "grad_norm": 1.6348079442977905, "learning_rate": 0.00019816079967472915, "loss": 1.5708, "step": 625 }, { "epoch": 0.08967196676693884, "grad_norm": 1.4463605880737305, "learning_rate": 0.00019815193141980886, "loss": 1.7944, "step": 626 }, { "epoch": 0.08981521272024065, "grad_norm": 1.8186712265014648, "learning_rate": 0.0001981430420352066, "loss": 1.7724, "step": 627 }, { "epoch": 0.08995845867354248, "grad_norm": 1.2448248863220215, "learning_rate": 0.0001981341315228361, "loss": 1.8221, "step": 628 }, { "epoch": 0.09010170462684429, "grad_norm": 1.3949155807495117, "learning_rate": 0.00019812519988461548, "loss": 1.8245, "step": 629 }, { "epoch": 0.09024495058014612, "grad_norm": 1.7242192029953003, "learning_rate": 0.00019811624712246756, "loss": 1.7219, "step": 630 }, { "epoch": 0.09038819653344793, "grad_norm": 1.4282169342041016, "learning_rate": 0.00019810727323831966, "loss": 1.7889, "step": 631 }, { "epoch": 0.09053144248674976, "grad_norm": 2.1040520668029785, "learning_rate": 0.00019809827823410358, "loss": 1.6694, "step": 632 }, { "epoch": 0.09067468844005157, "grad_norm": 1.4820910692214966, "learning_rate": 0.00019808926211175573, "loss": 1.6882, "step": 633 }, { "epoch": 0.0908179343933534, "grad_norm": 2.084611654281616, "learning_rate": 0.0001980802248732171, "loss": 1.8569, "step": 634 }, { "epoch": 0.09096118034665521, "grad_norm": 1.386694073677063, "learning_rate": 0.0001980711665204332, "loss": 1.7554, "step": 635 }, { "epoch": 0.09110442629995702, "grad_norm": 1.708006501197815, "learning_rate": 0.00019806208705535402, "loss": 1.8797, "step": 636 }, { "epoch": 0.09124767225325885, "grad_norm": 1.9858362674713135, "learning_rate": 0.0001980529864799341, "loss": 1.8476, "step": 637 }, { "epoch": 0.09139091820656066, "grad_norm": 1.624764084815979, "learning_rate": 0.0001980438647961327, "loss": 1.7315, "step": 638 }, { "epoch": 0.09153416415986249, "grad_norm": 1.708223819732666, "learning_rate": 0.00019803472200591337, "loss": 1.7988, "step": 639 }, { "epoch": 0.0916774101131643, "grad_norm": 1.260899305343628, "learning_rate": 0.00019802555811124437, "loss": 1.6443, "step": 640 }, { "epoch": 0.09182065606646612, "grad_norm": 1.6228735446929932, "learning_rate": 0.00019801637311409848, "loss": 1.84, "step": 641 }, { "epoch": 0.09196390201976794, "grad_norm": 1.7184464931488037, "learning_rate": 0.000198007167016453, "loss": 1.9908, "step": 642 }, { "epoch": 0.09210714797306976, "grad_norm": 1.3809646368026733, "learning_rate": 0.00019799793982028976, "loss": 1.6932, "step": 643 }, { "epoch": 0.09225039392637158, "grad_norm": 1.562656283378601, "learning_rate": 0.00019798869152759513, "loss": 1.587, "step": 644 }, { "epoch": 0.0923936398796734, "grad_norm": 1.4949400424957275, "learning_rate": 0.00019797942214036009, "loss": 1.7536, "step": 645 }, { "epoch": 0.09253688583297522, "grad_norm": 1.9296308755874634, "learning_rate": 0.00019797013166058004, "loss": 1.8761, "step": 646 }, { "epoch": 0.09268013178627704, "grad_norm": 1.821153998374939, "learning_rate": 0.00019796082009025505, "loss": 1.7696, "step": 647 }, { "epoch": 0.09282337773957886, "grad_norm": 2.160893440246582, "learning_rate": 0.00019795148743138966, "loss": 1.8523, "step": 648 }, { "epoch": 0.09296662369288068, "grad_norm": 1.7494330406188965, "learning_rate": 0.00019794213368599294, "loss": 1.7692, "step": 649 }, { "epoch": 0.0931098696461825, "grad_norm": 2.3378031253814697, "learning_rate": 0.0001979327588560785, "loss": 1.8139, "step": 650 }, { "epoch": 0.09325311559948432, "grad_norm": 1.8240364789962769, "learning_rate": 0.00019792336294366457, "loss": 1.768, "step": 651 }, { "epoch": 0.09339636155278613, "grad_norm": 1.8711587190628052, "learning_rate": 0.00019791394595077382, "loss": 1.7597, "step": 652 }, { "epoch": 0.09353960750608795, "grad_norm": 1.6669682264328003, "learning_rate": 0.0001979045078794335, "loss": 1.8608, "step": 653 }, { "epoch": 0.09368285345938977, "grad_norm": 1.5728895664215088, "learning_rate": 0.00019789504873167537, "loss": 1.7904, "step": 654 }, { "epoch": 0.09382609941269159, "grad_norm": 2.1707494258880615, "learning_rate": 0.0001978855685095358, "loss": 1.8941, "step": 655 }, { "epoch": 0.09396934536599341, "grad_norm": 1.6794214248657227, "learning_rate": 0.0001978760672150556, "loss": 1.9251, "step": 656 }, { "epoch": 0.09411259131929522, "grad_norm": 1.9527090787887573, "learning_rate": 0.0001978665448502802, "loss": 1.5873, "step": 657 }, { "epoch": 0.09425583727259705, "grad_norm": 1.6207743883132935, "learning_rate": 0.00019785700141725953, "loss": 1.7273, "step": 658 }, { "epoch": 0.09439908322589886, "grad_norm": 1.5756882429122925, "learning_rate": 0.00019784743691804804, "loss": 1.6424, "step": 659 }, { "epoch": 0.09454232917920069, "grad_norm": 1.1030863523483276, "learning_rate": 0.0001978378513547047, "loss": 1.7495, "step": 660 }, { "epoch": 0.0946855751325025, "grad_norm": 1.3622251749038696, "learning_rate": 0.00019782824472929308, "loss": 1.7736, "step": 661 }, { "epoch": 0.09482882108580433, "grad_norm": 1.5522910356521606, "learning_rate": 0.00019781861704388124, "loss": 1.6102, "step": 662 }, { "epoch": 0.09497206703910614, "grad_norm": 1.5790303945541382, "learning_rate": 0.0001978089683005418, "loss": 1.6427, "step": 663 }, { "epoch": 0.09511531299240797, "grad_norm": 1.869668960571289, "learning_rate": 0.00019779929850135189, "loss": 1.893, "step": 664 }, { "epoch": 0.09525855894570978, "grad_norm": 1.2680106163024902, "learning_rate": 0.00019778960764839316, "loss": 1.6168, "step": 665 }, { "epoch": 0.09540180489901161, "grad_norm": 1.5408239364624023, "learning_rate": 0.00019777989574375183, "loss": 1.8076, "step": 666 }, { "epoch": 0.09554505085231342, "grad_norm": 1.904329538345337, "learning_rate": 0.0001977701627895186, "loss": 1.6122, "step": 667 }, { "epoch": 0.09568829680561525, "grad_norm": 1.8531043529510498, "learning_rate": 0.00019776040878778875, "loss": 1.5994, "step": 668 }, { "epoch": 0.09583154275891706, "grad_norm": 1.6195244789123535, "learning_rate": 0.00019775063374066212, "loss": 1.7338, "step": 669 }, { "epoch": 0.09597478871221889, "grad_norm": 1.6500298976898193, "learning_rate": 0.000197740837650243, "loss": 1.6755, "step": 670 }, { "epoch": 0.0961180346655207, "grad_norm": 1.9808924198150635, "learning_rate": 0.00019773102051864018, "loss": 1.6546, "step": 671 }, { "epoch": 0.09626128061882251, "grad_norm": 2.1945536136627197, "learning_rate": 0.00019772118234796717, "loss": 1.7222, "step": 672 }, { "epoch": 0.09640452657212434, "grad_norm": 1.8495655059814453, "learning_rate": 0.0001977113231403418, "loss": 1.7317, "step": 673 }, { "epoch": 0.09654777252542615, "grad_norm": 1.6014138460159302, "learning_rate": 0.00019770144289788655, "loss": 1.5159, "step": 674 }, { "epoch": 0.09669101847872798, "grad_norm": 1.4743225574493408, "learning_rate": 0.00019769154162272839, "loss": 1.8782, "step": 675 }, { "epoch": 0.09683426443202979, "grad_norm": 1.4953423738479614, "learning_rate": 0.0001976816193169988, "loss": 1.8579, "step": 676 }, { "epoch": 0.09697751038533162, "grad_norm": 1.2342016696929932, "learning_rate": 0.0001976716759828338, "loss": 1.8547, "step": 677 }, { "epoch": 0.09712075633863343, "grad_norm": 1.7234338521957397, "learning_rate": 0.00019766171162237397, "loss": 1.8031, "step": 678 }, { "epoch": 0.09726400229193526, "grad_norm": 1.773073434829712, "learning_rate": 0.00019765172623776437, "loss": 1.7907, "step": 679 }, { "epoch": 0.09740724824523707, "grad_norm": 1.4676114320755005, "learning_rate": 0.00019764171983115462, "loss": 1.7955, "step": 680 }, { "epoch": 0.0975504941985389, "grad_norm": 2.1274361610412598, "learning_rate": 0.00019763169240469885, "loss": 2.0114, "step": 681 }, { "epoch": 0.09769374015184071, "grad_norm": 1.4914871454238892, "learning_rate": 0.00019762164396055573, "loss": 1.9642, "step": 682 }, { "epoch": 0.09783698610514253, "grad_norm": 1.4457696676254272, "learning_rate": 0.00019761157450088842, "loss": 1.7404, "step": 683 }, { "epoch": 0.09798023205844435, "grad_norm": 1.5062708854675293, "learning_rate": 0.0001976014840278646, "loss": 1.5659, "step": 684 }, { "epoch": 0.09812347801174617, "grad_norm": 1.810644507408142, "learning_rate": 0.00019759137254365657, "loss": 1.5031, "step": 685 }, { "epoch": 0.09826672396504799, "grad_norm": 2.0730068683624268, "learning_rate": 0.000197581240050441, "loss": 1.7493, "step": 686 }, { "epoch": 0.09840996991834981, "grad_norm": 1.33100163936615, "learning_rate": 0.00019757108655039924, "loss": 1.5771, "step": 687 }, { "epoch": 0.09855321587165163, "grad_norm": 1.6414936780929565, "learning_rate": 0.00019756091204571708, "loss": 1.8274, "step": 688 }, { "epoch": 0.09869646182495345, "grad_norm": 1.9333001375198364, "learning_rate": 0.00019755071653858476, "loss": 1.6505, "step": 689 }, { "epoch": 0.09883970777825526, "grad_norm": 1.6460649967193604, "learning_rate": 0.00019754050003119723, "loss": 1.8473, "step": 690 }, { "epoch": 0.09898295373155708, "grad_norm": 1.3726948499679565, "learning_rate": 0.00019753026252575375, "loss": 1.8689, "step": 691 }, { "epoch": 0.0991261996848589, "grad_norm": 1.579671859741211, "learning_rate": 0.00019752000402445825, "loss": 1.529, "step": 692 }, { "epoch": 0.09926944563816072, "grad_norm": 1.297569990158081, "learning_rate": 0.00019750972452951918, "loss": 1.7489, "step": 693 }, { "epoch": 0.09941269159146254, "grad_norm": 1.8538092374801636, "learning_rate": 0.00019749942404314935, "loss": 1.673, "step": 694 }, { "epoch": 0.09955593754476436, "grad_norm": 1.5851699113845825, "learning_rate": 0.00019748910256756628, "loss": 1.7389, "step": 695 }, { "epoch": 0.09969918349806618, "grad_norm": 1.2902235984802246, "learning_rate": 0.00019747876010499192, "loss": 1.698, "step": 696 }, { "epoch": 0.099842429451368, "grad_norm": 1.3676519393920898, "learning_rate": 0.0001974683966576527, "loss": 1.7666, "step": 697 }, { "epoch": 0.09998567540466982, "grad_norm": 1.6822603940963745, "learning_rate": 0.00019745801222777968, "loss": 1.576, "step": 698 }, { "epoch": 0.10012892135797163, "grad_norm": 1.6453901529312134, "learning_rate": 0.00019744760681760832, "loss": 1.7159, "step": 699 }, { "epoch": 0.10027216731127346, "grad_norm": 1.5896878242492676, "learning_rate": 0.0001974371804293787, "loss": 1.738, "step": 700 }, { "epoch": 0.10041541326457527, "grad_norm": 1.7985727787017822, "learning_rate": 0.0001974267330653353, "loss": 1.8406, "step": 701 }, { "epoch": 0.1005586592178771, "grad_norm": 1.7758680582046509, "learning_rate": 0.00019741626472772722, "loss": 1.8078, "step": 702 }, { "epoch": 0.10070190517117891, "grad_norm": 1.8356214761734009, "learning_rate": 0.000197405775418808, "loss": 1.8122, "step": 703 }, { "epoch": 0.10084515112448074, "grad_norm": 2.069838047027588, "learning_rate": 0.00019739526514083578, "loss": 1.6782, "step": 704 }, { "epoch": 0.10098839707778255, "grad_norm": 1.7264612913131714, "learning_rate": 0.00019738473389607314, "loss": 1.6891, "step": 705 }, { "epoch": 0.10113164303108438, "grad_norm": 1.6699116230010986, "learning_rate": 0.00019737418168678714, "loss": 1.6091, "step": 706 }, { "epoch": 0.10127488898438619, "grad_norm": 1.566804051399231, "learning_rate": 0.0001973636085152495, "loss": 1.8726, "step": 707 }, { "epoch": 0.101418134937688, "grad_norm": 1.8234835863113403, "learning_rate": 0.00019735301438373633, "loss": 1.7642, "step": 708 }, { "epoch": 0.10156138089098983, "grad_norm": 1.442474603652954, "learning_rate": 0.00019734239929452825, "loss": 2.0408, "step": 709 }, { "epoch": 0.10170462684429164, "grad_norm": 1.305235743522644, "learning_rate": 0.00019733176324991046, "loss": 1.855, "step": 710 }, { "epoch": 0.10184787279759347, "grad_norm": 1.2246805429458618, "learning_rate": 0.00019732110625217262, "loss": 1.6049, "step": 711 }, { "epoch": 0.10199111875089528, "grad_norm": 1.5693334341049194, "learning_rate": 0.00019731042830360896, "loss": 1.6744, "step": 712 }, { "epoch": 0.10213436470419711, "grad_norm": 1.8136435747146606, "learning_rate": 0.0001972997294065181, "loss": 1.7744, "step": 713 }, { "epoch": 0.10227761065749892, "grad_norm": 1.5060064792633057, "learning_rate": 0.0001972890095632033, "loss": 1.7055, "step": 714 }, { "epoch": 0.10242085661080075, "grad_norm": 1.3698856830596924, "learning_rate": 0.00019727826877597232, "loss": 1.7072, "step": 715 }, { "epoch": 0.10256410256410256, "grad_norm": 1.987074613571167, "learning_rate": 0.0001972675070471373, "loss": 1.712, "step": 716 }, { "epoch": 0.10270734851740439, "grad_norm": 1.5495303869247437, "learning_rate": 0.000197256724379015, "loss": 1.7371, "step": 717 }, { "epoch": 0.1028505944707062, "grad_norm": 1.2801169157028198, "learning_rate": 0.0001972459207739267, "loss": 1.7591, "step": 718 }, { "epoch": 0.10299384042400803, "grad_norm": 1.606809377670288, "learning_rate": 0.00019723509623419808, "loss": 1.7298, "step": 719 }, { "epoch": 0.10313708637730984, "grad_norm": 1.4142528772354126, "learning_rate": 0.00019722425076215946, "loss": 1.6494, "step": 720 }, { "epoch": 0.10328033233061167, "grad_norm": 1.254608154296875, "learning_rate": 0.00019721338436014558, "loss": 1.8597, "step": 721 }, { "epoch": 0.10342357828391348, "grad_norm": 1.6490896940231323, "learning_rate": 0.00019720249703049573, "loss": 1.6433, "step": 722 }, { "epoch": 0.1035668242372153, "grad_norm": 1.5009382963180542, "learning_rate": 0.0001971915887755536, "loss": 1.5715, "step": 723 }, { "epoch": 0.10371007019051712, "grad_norm": 1.2261353731155396, "learning_rate": 0.00019718065959766756, "loss": 1.6734, "step": 724 }, { "epoch": 0.10385331614381894, "grad_norm": 1.2172801494598389, "learning_rate": 0.00019716970949919035, "loss": 1.7203, "step": 725 }, { "epoch": 0.10399656209712076, "grad_norm": 1.3454374074935913, "learning_rate": 0.00019715873848247928, "loss": 1.6532, "step": 726 }, { "epoch": 0.10413980805042257, "grad_norm": 1.5467625856399536, "learning_rate": 0.0001971477465498961, "loss": 1.6718, "step": 727 }, { "epoch": 0.1042830540037244, "grad_norm": 1.5804749727249146, "learning_rate": 0.00019713673370380712, "loss": 1.6108, "step": 728 }, { "epoch": 0.10442629995702621, "grad_norm": 1.5299850702285767, "learning_rate": 0.00019712569994658315, "loss": 1.4396, "step": 729 }, { "epoch": 0.10456954591032804, "grad_norm": 1.2803105115890503, "learning_rate": 0.00019711464528059946, "loss": 1.7147, "step": 730 }, { "epoch": 0.10471279186362985, "grad_norm": 1.6970678567886353, "learning_rate": 0.00019710356970823587, "loss": 1.7287, "step": 731 }, { "epoch": 0.10485603781693167, "grad_norm": 1.1644597053527832, "learning_rate": 0.00019709247323187662, "loss": 1.7064, "step": 732 }, { "epoch": 0.10499928377023349, "grad_norm": 1.5943645238876343, "learning_rate": 0.0001970813558539106, "loss": 1.7171, "step": 733 }, { "epoch": 0.10514252972353531, "grad_norm": 1.348835825920105, "learning_rate": 0.00019707021757673103, "loss": 1.6029, "step": 734 }, { "epoch": 0.10528577567683713, "grad_norm": 1.4635934829711914, "learning_rate": 0.00019705905840273572, "loss": 1.7004, "step": 735 }, { "epoch": 0.10542902163013895, "grad_norm": 1.2927683591842651, "learning_rate": 0.00019704787833432698, "loss": 1.9932, "step": 736 }, { "epoch": 0.10557226758344077, "grad_norm": 1.78449285030365, "learning_rate": 0.00019703667737391162, "loss": 1.7306, "step": 737 }, { "epoch": 0.10571551353674259, "grad_norm": 1.8855433464050293, "learning_rate": 0.00019702545552390089, "loss": 1.9031, "step": 738 }, { "epoch": 0.1058587594900444, "grad_norm": 1.2628839015960693, "learning_rate": 0.00019701421278671058, "loss": 1.7967, "step": 739 }, { "epoch": 0.10600200544334623, "grad_norm": 1.3372970819473267, "learning_rate": 0.000197002949164761, "loss": 1.602, "step": 740 }, { "epoch": 0.10614525139664804, "grad_norm": 1.7041925191879272, "learning_rate": 0.00019699166466047692, "loss": 1.9064, "step": 741 }, { "epoch": 0.10628849734994987, "grad_norm": 1.346119999885559, "learning_rate": 0.0001969803592762876, "loss": 1.8142, "step": 742 }, { "epoch": 0.10643174330325168, "grad_norm": 1.6607774496078491, "learning_rate": 0.0001969690330146268, "loss": 1.8994, "step": 743 }, { "epoch": 0.1065749892565535, "grad_norm": 1.774286150932312, "learning_rate": 0.00019695768587793286, "loss": 1.8676, "step": 744 }, { "epoch": 0.10671823520985532, "grad_norm": 1.3368016481399536, "learning_rate": 0.00019694631786864843, "loss": 1.7106, "step": 745 }, { "epoch": 0.10686148116315713, "grad_norm": 1.9259347915649414, "learning_rate": 0.00019693492898922084, "loss": 1.7263, "step": 746 }, { "epoch": 0.10700472711645896, "grad_norm": 1.8786557912826538, "learning_rate": 0.00019692351924210176, "loss": 1.6849, "step": 747 }, { "epoch": 0.10714797306976077, "grad_norm": 1.7448755502700806, "learning_rate": 0.00019691208862974752, "loss": 1.7431, "step": 748 }, { "epoch": 0.1072912190230626, "grad_norm": 1.5958060026168823, "learning_rate": 0.0001969006371546188, "loss": 1.5711, "step": 749 }, { "epoch": 0.10743446497636441, "grad_norm": 1.277031660079956, "learning_rate": 0.0001968891648191808, "loss": 1.73, "step": 750 }, { "epoch": 0.10757771092966624, "grad_norm": 1.4704946279525757, "learning_rate": 0.00019687767162590328, "loss": 1.7419, "step": 751 }, { "epoch": 0.10772095688296805, "grad_norm": 1.2916069030761719, "learning_rate": 0.00019686615757726034, "loss": 1.7143, "step": 752 }, { "epoch": 0.10786420283626988, "grad_norm": 1.5522805452346802, "learning_rate": 0.0001968546226757308, "loss": 1.7327, "step": 753 }, { "epoch": 0.10800744878957169, "grad_norm": 1.3881850242614746, "learning_rate": 0.00019684306692379776, "loss": 1.5627, "step": 754 }, { "epoch": 0.10815069474287352, "grad_norm": 1.4550009965896606, "learning_rate": 0.0001968314903239489, "loss": 1.6797, "step": 755 }, { "epoch": 0.10829394069617533, "grad_norm": 1.7874292135238647, "learning_rate": 0.00019681989287867636, "loss": 1.7466, "step": 756 }, { "epoch": 0.10843718664947716, "grad_norm": 1.4580813646316528, "learning_rate": 0.00019680827459047685, "loss": 1.6753, "step": 757 }, { "epoch": 0.10858043260277897, "grad_norm": 1.342289686203003, "learning_rate": 0.00019679663546185144, "loss": 1.8325, "step": 758 }, { "epoch": 0.1087236785560808, "grad_norm": 1.9250755310058594, "learning_rate": 0.00019678497549530574, "loss": 1.7228, "step": 759 }, { "epoch": 0.10886692450938261, "grad_norm": 1.4190378189086914, "learning_rate": 0.0001967732946933499, "loss": 1.6921, "step": 760 }, { "epoch": 0.10901017046268444, "grad_norm": 1.6499125957489014, "learning_rate": 0.00019676159305849846, "loss": 1.7999, "step": 761 }, { "epoch": 0.10915341641598625, "grad_norm": 1.6628186702728271, "learning_rate": 0.0001967498705932705, "loss": 1.7987, "step": 762 }, { "epoch": 0.10929666236928806, "grad_norm": 1.4736112356185913, "learning_rate": 0.0001967381273001896, "loss": 1.626, "step": 763 }, { "epoch": 0.10943990832258989, "grad_norm": 1.4702813625335693, "learning_rate": 0.00019672636318178381, "loss": 1.7792, "step": 764 }, { "epoch": 0.1095831542758917, "grad_norm": 1.8236439228057861, "learning_rate": 0.0001967145782405856, "loss": 1.7363, "step": 765 }, { "epoch": 0.10972640022919353, "grad_norm": 1.2456893920898438, "learning_rate": 0.00019670277247913205, "loss": 1.6081, "step": 766 }, { "epoch": 0.10986964618249534, "grad_norm": 1.7825539112091064, "learning_rate": 0.00019669094589996457, "loss": 1.5565, "step": 767 }, { "epoch": 0.11001289213579717, "grad_norm": 1.441588044166565, "learning_rate": 0.00019667909850562917, "loss": 1.5394, "step": 768 }, { "epoch": 0.11015613808909898, "grad_norm": 1.2529128789901733, "learning_rate": 0.00019666723029867632, "loss": 1.7932, "step": 769 }, { "epoch": 0.1102993840424008, "grad_norm": 1.7631900310516357, "learning_rate": 0.00019665534128166092, "loss": 1.7974, "step": 770 }, { "epoch": 0.11044262999570262, "grad_norm": 2.2570300102233887, "learning_rate": 0.0001966434314571424, "loss": 1.8064, "step": 771 }, { "epoch": 0.11058587594900444, "grad_norm": 1.6106598377227783, "learning_rate": 0.00019663150082768462, "loss": 1.6633, "step": 772 }, { "epoch": 0.11072912190230626, "grad_norm": 1.2818262577056885, "learning_rate": 0.000196619549395856, "loss": 1.4161, "step": 773 }, { "epoch": 0.11087236785560808, "grad_norm": 1.3702994585037231, "learning_rate": 0.00019660757716422932, "loss": 1.7935, "step": 774 }, { "epoch": 0.1110156138089099, "grad_norm": 1.9527215957641602, "learning_rate": 0.00019659558413538198, "loss": 1.912, "step": 775 }, { "epoch": 0.11115885976221172, "grad_norm": 1.2086849212646484, "learning_rate": 0.0001965835703118957, "loss": 1.7713, "step": 776 }, { "epoch": 0.11130210571551354, "grad_norm": 1.5319490432739258, "learning_rate": 0.00019657153569635683, "loss": 1.9257, "step": 777 }, { "epoch": 0.11144535166881536, "grad_norm": 1.898750901222229, "learning_rate": 0.0001965594802913561, "loss": 1.685, "step": 778 }, { "epoch": 0.11158859762211717, "grad_norm": 1.5422533750534058, "learning_rate": 0.00019654740409948872, "loss": 1.7039, "step": 779 }, { "epoch": 0.11173184357541899, "grad_norm": 1.49237859249115, "learning_rate": 0.00019653530712335443, "loss": 1.8051, "step": 780 }, { "epoch": 0.11187508952872081, "grad_norm": 1.3994147777557373, "learning_rate": 0.00019652318936555743, "loss": 1.5716, "step": 781 }, { "epoch": 0.11201833548202263, "grad_norm": 1.3747738599777222, "learning_rate": 0.00019651105082870628, "loss": 1.6163, "step": 782 }, { "epoch": 0.11216158143532445, "grad_norm": 2.0043458938598633, "learning_rate": 0.00019649889151541417, "loss": 1.7238, "step": 783 }, { "epoch": 0.11230482738862627, "grad_norm": 1.4104177951812744, "learning_rate": 0.00019648671142829876, "loss": 1.7578, "step": 784 }, { "epoch": 0.11244807334192809, "grad_norm": 1.3834949731826782, "learning_rate": 0.000196474510569982, "loss": 1.5151, "step": 785 }, { "epoch": 0.1125913192952299, "grad_norm": 1.402267336845398, "learning_rate": 0.0001964622889430905, "loss": 1.8032, "step": 786 }, { "epoch": 0.11273456524853173, "grad_norm": 1.367239236831665, "learning_rate": 0.00019645004655025522, "loss": 1.8894, "step": 787 }, { "epoch": 0.11287781120183354, "grad_norm": 1.663748860359192, "learning_rate": 0.00019643778339411175, "loss": 1.7245, "step": 788 }, { "epoch": 0.11302105715513537, "grad_norm": 1.497519612312317, "learning_rate": 0.00019642549947729992, "loss": 1.5935, "step": 789 }, { "epoch": 0.11316430310843718, "grad_norm": 1.1501355171203613, "learning_rate": 0.00019641319480246424, "loss": 1.6902, "step": 790 }, { "epoch": 0.11330754906173901, "grad_norm": 1.303268551826477, "learning_rate": 0.0001964008693722536, "loss": 1.458, "step": 791 }, { "epoch": 0.11345079501504082, "grad_norm": 1.8508501052856445, "learning_rate": 0.0001963885231893213, "loss": 1.9419, "step": 792 }, { "epoch": 0.11359404096834265, "grad_norm": 1.630737066268921, "learning_rate": 0.00019637615625632524, "loss": 1.8513, "step": 793 }, { "epoch": 0.11373728692164446, "grad_norm": 1.3402734994888306, "learning_rate": 0.00019636376857592764, "loss": 1.8409, "step": 794 }, { "epoch": 0.11388053287494629, "grad_norm": 1.8030574321746826, "learning_rate": 0.00019635136015079533, "loss": 1.711, "step": 795 }, { "epoch": 0.1140237788282481, "grad_norm": 1.4685052633285522, "learning_rate": 0.00019633893098359951, "loss": 1.5852, "step": 796 }, { "epoch": 0.11416702478154993, "grad_norm": 1.2346967458724976, "learning_rate": 0.00019632648107701585, "loss": 1.8133, "step": 797 }, { "epoch": 0.11431027073485174, "grad_norm": 1.6928762197494507, "learning_rate": 0.00019631401043372447, "loss": 1.6101, "step": 798 }, { "epoch": 0.11445351668815355, "grad_norm": 1.311968445777893, "learning_rate": 0.00019630151905641012, "loss": 1.5881, "step": 799 }, { "epoch": 0.11459676264145538, "grad_norm": 1.5631657838821411, "learning_rate": 0.00019628900694776177, "loss": 1.7071, "step": 800 }, { "epoch": 0.11474000859475719, "grad_norm": 1.8374607563018799, "learning_rate": 0.00019627647411047305, "loss": 1.8186, "step": 801 }, { "epoch": 0.11488325454805902, "grad_norm": 1.5641448497772217, "learning_rate": 0.0001962639205472419, "loss": 1.7584, "step": 802 }, { "epoch": 0.11502650050136083, "grad_norm": 1.2980116605758667, "learning_rate": 0.00019625134626077083, "loss": 1.7779, "step": 803 }, { "epoch": 0.11516974645466266, "grad_norm": 1.503077507019043, "learning_rate": 0.00019623875125376674, "loss": 1.7195, "step": 804 }, { "epoch": 0.11531299240796447, "grad_norm": 1.5456128120422363, "learning_rate": 0.00019622613552894107, "loss": 1.6583, "step": 805 }, { "epoch": 0.1154562383612663, "grad_norm": 1.3264356851577759, "learning_rate": 0.0001962134990890096, "loss": 1.7716, "step": 806 }, { "epoch": 0.11559948431456811, "grad_norm": 1.964059829711914, "learning_rate": 0.00019620084193669275, "loss": 1.4807, "step": 807 }, { "epoch": 0.11574273026786994, "grad_norm": 1.4878560304641724, "learning_rate": 0.00019618816407471519, "loss": 1.6133, "step": 808 }, { "epoch": 0.11588597622117175, "grad_norm": 1.2516502141952515, "learning_rate": 0.00019617546550580622, "loss": 1.6847, "step": 809 }, { "epoch": 0.11602922217447358, "grad_norm": 1.4682655334472656, "learning_rate": 0.0001961627462326995, "loss": 1.4748, "step": 810 }, { "epoch": 0.11617246812777539, "grad_norm": 1.5775532722473145, "learning_rate": 0.00019615000625813314, "loss": 1.7997, "step": 811 }, { "epoch": 0.11631571408107721, "grad_norm": 1.522460699081421, "learning_rate": 0.0001961372455848498, "loss": 1.5879, "step": 812 }, { "epoch": 0.11645896003437903, "grad_norm": 1.695204734802246, "learning_rate": 0.0001961244642155965, "loss": 1.6329, "step": 813 }, { "epoch": 0.11660220598768085, "grad_norm": 1.32692289352417, "learning_rate": 0.0001961116621531248, "loss": 1.6465, "step": 814 }, { "epoch": 0.11674545194098267, "grad_norm": 1.479142427444458, "learning_rate": 0.00019609883940019057, "loss": 1.6318, "step": 815 }, { "epoch": 0.11688869789428448, "grad_norm": 1.5152790546417236, "learning_rate": 0.00019608599595955436, "loss": 1.7847, "step": 816 }, { "epoch": 0.1170319438475863, "grad_norm": 1.5613727569580078, "learning_rate": 0.00019607313183398094, "loss": 1.7614, "step": 817 }, { "epoch": 0.11717518980088812, "grad_norm": 1.3511062860488892, "learning_rate": 0.0001960602470262397, "loss": 1.7262, "step": 818 }, { "epoch": 0.11731843575418995, "grad_norm": 1.5462993383407593, "learning_rate": 0.00019604734153910437, "loss": 1.6684, "step": 819 }, { "epoch": 0.11746168170749176, "grad_norm": 1.625539779663086, "learning_rate": 0.00019603441537535324, "loss": 1.7509, "step": 820 }, { "epoch": 0.11760492766079358, "grad_norm": 1.6381545066833496, "learning_rate": 0.00019602146853776894, "loss": 1.8444, "step": 821 }, { "epoch": 0.1177481736140954, "grad_norm": 1.2968568801879883, "learning_rate": 0.00019600850102913865, "loss": 1.6877, "step": 822 }, { "epoch": 0.11789141956739722, "grad_norm": 1.4398295879364014, "learning_rate": 0.00019599551285225393, "loss": 1.6437, "step": 823 }, { "epoch": 0.11803466552069904, "grad_norm": 1.7400752305984497, "learning_rate": 0.00019598250400991077, "loss": 1.7911, "step": 824 }, { "epoch": 0.11817791147400086, "grad_norm": 1.8015711307525635, "learning_rate": 0.00019596947450490975, "loss": 1.6992, "step": 825 }, { "epoch": 0.11832115742730268, "grad_norm": 1.5374224185943604, "learning_rate": 0.0001959564243400557, "loss": 1.6953, "step": 826 }, { "epoch": 0.1184644033806045, "grad_norm": 1.3744714260101318, "learning_rate": 0.00019594335351815807, "loss": 1.5789, "step": 827 }, { "epoch": 0.11860764933390631, "grad_norm": 2.015181541442871, "learning_rate": 0.00019593026204203066, "loss": 1.6549, "step": 828 }, { "epoch": 0.11875089528720814, "grad_norm": 1.6133536100387573, "learning_rate": 0.0001959171499144917, "loss": 1.7005, "step": 829 }, { "epoch": 0.11889414124050995, "grad_norm": 1.6305330991744995, "learning_rate": 0.00019590401713836397, "loss": 1.621, "step": 830 }, { "epoch": 0.11903738719381178, "grad_norm": 1.504027247428894, "learning_rate": 0.0001958908637164746, "loss": 1.7492, "step": 831 }, { "epoch": 0.1191806331471136, "grad_norm": 1.4346352815628052, "learning_rate": 0.0001958776896516552, "loss": 1.6971, "step": 832 }, { "epoch": 0.11932387910041542, "grad_norm": 1.4614126682281494, "learning_rate": 0.00019586449494674187, "loss": 1.5235, "step": 833 }, { "epoch": 0.11946712505371723, "grad_norm": 1.557592749595642, "learning_rate": 0.000195851279604575, "loss": 1.6026, "step": 834 }, { "epoch": 0.11961037100701905, "grad_norm": 1.587651014328003, "learning_rate": 0.0001958380436279996, "loss": 1.6651, "step": 835 }, { "epoch": 0.11975361696032087, "grad_norm": 1.58087158203125, "learning_rate": 0.000195824787019865, "loss": 1.5143, "step": 836 }, { "epoch": 0.11989686291362268, "grad_norm": 1.273144006729126, "learning_rate": 0.0001958115097830251, "loss": 1.5331, "step": 837 }, { "epoch": 0.12004010886692451, "grad_norm": 1.759933590888977, "learning_rate": 0.0001957982119203381, "loss": 1.6638, "step": 838 }, { "epoch": 0.12018335482022632, "grad_norm": 1.5463939905166626, "learning_rate": 0.0001957848934346667, "loss": 1.8055, "step": 839 }, { "epoch": 0.12032660077352815, "grad_norm": 1.8380411863327026, "learning_rate": 0.00019577155432887804, "loss": 1.6704, "step": 840 }, { "epoch": 0.12046984672682996, "grad_norm": 1.2270687818527222, "learning_rate": 0.00019575819460584373, "loss": 1.6812, "step": 841 }, { "epoch": 0.12061309268013179, "grad_norm": 1.6814182996749878, "learning_rate": 0.00019574481426843976, "loss": 1.4969, "step": 842 }, { "epoch": 0.1207563386334336, "grad_norm": 1.5508201122283936, "learning_rate": 0.0001957314133195466, "loss": 1.6531, "step": 843 }, { "epoch": 0.12089958458673543, "grad_norm": 1.7305281162261963, "learning_rate": 0.00019571799176204912, "loss": 1.857, "step": 844 }, { "epoch": 0.12104283054003724, "grad_norm": 1.395186185836792, "learning_rate": 0.00019570454959883668, "loss": 1.7079, "step": 845 }, { "epoch": 0.12118607649333907, "grad_norm": 1.3091259002685547, "learning_rate": 0.00019569108683280303, "loss": 1.6031, "step": 846 }, { "epoch": 0.12132932244664088, "grad_norm": 1.341686725616455, "learning_rate": 0.00019567760346684638, "loss": 1.3075, "step": 847 }, { "epoch": 0.12147256839994271, "grad_norm": 1.3981155157089233, "learning_rate": 0.00019566409950386935, "loss": 1.8879, "step": 848 }, { "epoch": 0.12161581435324452, "grad_norm": 1.5670076608657837, "learning_rate": 0.000195650574946779, "loss": 1.6899, "step": 849 }, { "epoch": 0.12175906030654635, "grad_norm": 1.4896048307418823, "learning_rate": 0.00019563702979848686, "loss": 1.7236, "step": 850 }, { "epoch": 0.12190230625984816, "grad_norm": 1.258460521697998, "learning_rate": 0.00019562346406190888, "loss": 1.6889, "step": 851 }, { "epoch": 0.12204555221314999, "grad_norm": 1.6992498636245728, "learning_rate": 0.00019560987773996536, "loss": 1.7578, "step": 852 }, { "epoch": 0.1221887981664518, "grad_norm": 1.6377817392349243, "learning_rate": 0.00019559627083558115, "loss": 1.7459, "step": 853 }, { "epoch": 0.12233204411975361, "grad_norm": 1.5157989263534546, "learning_rate": 0.00019558264335168548, "loss": 1.6657, "step": 854 }, { "epoch": 0.12247529007305544, "grad_norm": 1.3764675855636597, "learning_rate": 0.000195568995291212, "loss": 1.6585, "step": 855 }, { "epoch": 0.12261853602635725, "grad_norm": 1.3570427894592285, "learning_rate": 0.00019555532665709878, "loss": 1.6414, "step": 856 }, { "epoch": 0.12276178197965908, "grad_norm": 1.4844766855239868, "learning_rate": 0.0001955416374522884, "loss": 1.745, "step": 857 }, { "epoch": 0.12290502793296089, "grad_norm": 1.3481073379516602, "learning_rate": 0.00019552792767972771, "loss": 1.61, "step": 858 }, { "epoch": 0.12304827388626272, "grad_norm": 1.2660897970199585, "learning_rate": 0.00019551419734236818, "loss": 1.6613, "step": 859 }, { "epoch": 0.12319151983956453, "grad_norm": 1.4173600673675537, "learning_rate": 0.00019550044644316557, "loss": 1.632, "step": 860 }, { "epoch": 0.12333476579286635, "grad_norm": 1.3973641395568848, "learning_rate": 0.0001954866749850801, "loss": 1.4701, "step": 861 }, { "epoch": 0.12347801174616817, "grad_norm": 1.2421332597732544, "learning_rate": 0.0001954728829710764, "loss": 1.5154, "step": 862 }, { "epoch": 0.12362125769947, "grad_norm": 1.5635004043579102, "learning_rate": 0.00019545907040412363, "loss": 1.6637, "step": 863 }, { "epoch": 0.1237645036527718, "grad_norm": 1.5483280420303345, "learning_rate": 0.00019544523728719525, "loss": 1.6362, "step": 864 }, { "epoch": 0.12390774960607363, "grad_norm": 1.4203495979309082, "learning_rate": 0.00019543138362326917, "loss": 1.6175, "step": 865 }, { "epoch": 0.12405099555937545, "grad_norm": 1.3820923566818237, "learning_rate": 0.00019541750941532774, "loss": 1.5147, "step": 866 }, { "epoch": 0.12419424151267727, "grad_norm": 1.4948850870132446, "learning_rate": 0.00019540361466635777, "loss": 1.8273, "step": 867 }, { "epoch": 0.12433748746597909, "grad_norm": 1.2597991228103638, "learning_rate": 0.00019538969937935044, "loss": 1.6709, "step": 868 }, { "epoch": 0.12448073341928091, "grad_norm": 1.3481260538101196, "learning_rate": 0.00019537576355730134, "loss": 1.5585, "step": 869 }, { "epoch": 0.12462397937258272, "grad_norm": 1.4097731113433838, "learning_rate": 0.00019536180720321054, "loss": 1.7845, "step": 870 }, { "epoch": 0.12476722532588454, "grad_norm": 1.5789159536361694, "learning_rate": 0.00019534783032008248, "loss": 1.7004, "step": 871 }, { "epoch": 0.12491047127918636, "grad_norm": 1.355206847190857, "learning_rate": 0.00019533383291092606, "loss": 1.6142, "step": 872 }, { "epoch": 0.12505371723248818, "grad_norm": 1.5390346050262451, "learning_rate": 0.00019531981497875454, "loss": 1.5849, "step": 873 }, { "epoch": 0.12519696318579, "grad_norm": 1.2994829416275024, "learning_rate": 0.00019530577652658568, "loss": 1.6995, "step": 874 }, { "epoch": 0.12534020913909183, "grad_norm": 1.3114550113677979, "learning_rate": 0.00019529171755744158, "loss": 1.6445, "step": 875 }, { "epoch": 0.12548345509239364, "grad_norm": 1.5998321771621704, "learning_rate": 0.00019527763807434878, "loss": 1.7977, "step": 876 }, { "epoch": 0.12562670104569545, "grad_norm": 1.2786026000976562, "learning_rate": 0.00019526353808033825, "loss": 1.5183, "step": 877 }, { "epoch": 0.12576994699899727, "grad_norm": 1.9640566110610962, "learning_rate": 0.0001952494175784454, "loss": 1.6211, "step": 878 }, { "epoch": 0.1259131929522991, "grad_norm": 1.085138201713562, "learning_rate": 0.00019523527657171, "loss": 1.5403, "step": 879 }, { "epoch": 0.12605643890560092, "grad_norm": 1.5732018947601318, "learning_rate": 0.00019522111506317625, "loss": 1.5265, "step": 880 }, { "epoch": 0.12619968485890273, "grad_norm": 1.538540005683899, "learning_rate": 0.00019520693305589282, "loss": 1.4834, "step": 881 }, { "epoch": 0.12634293081220455, "grad_norm": 1.5261553525924683, "learning_rate": 0.00019519273055291266, "loss": 1.4261, "step": 882 }, { "epoch": 0.1264861767655064, "grad_norm": 1.5756278038024902, "learning_rate": 0.0001951785075572933, "loss": 1.6371, "step": 883 }, { "epoch": 0.1266294227188082, "grad_norm": 1.1240745782852173, "learning_rate": 0.00019516426407209652, "loss": 1.6103, "step": 884 }, { "epoch": 0.12677266867211, "grad_norm": 1.4535131454467773, "learning_rate": 0.0001951500001003887, "loss": 1.7135, "step": 885 }, { "epoch": 0.12691591462541182, "grad_norm": 1.5299814939498901, "learning_rate": 0.0001951357156452404, "loss": 1.7885, "step": 886 }, { "epoch": 0.12705916057871366, "grad_norm": 1.7502105236053467, "learning_rate": 0.00019512141070972678, "loss": 1.6142, "step": 887 }, { "epoch": 0.12720240653201548, "grad_norm": 1.7939033508300781, "learning_rate": 0.00019510708529692735, "loss": 1.4853, "step": 888 }, { "epoch": 0.1273456524853173, "grad_norm": 1.4003231525421143, "learning_rate": 0.00019509273940992596, "loss": 1.661, "step": 889 }, { "epoch": 0.1274888984386191, "grad_norm": 1.538064956665039, "learning_rate": 0.00019507837305181096, "loss": 1.8178, "step": 890 }, { "epoch": 0.12763214439192092, "grad_norm": 1.3666563034057617, "learning_rate": 0.00019506398622567509, "loss": 1.5423, "step": 891 }, { "epoch": 0.12777539034522276, "grad_norm": 1.386530876159668, "learning_rate": 0.00019504957893461545, "loss": 1.6558, "step": 892 }, { "epoch": 0.12791863629852457, "grad_norm": 1.5850868225097656, "learning_rate": 0.00019503515118173353, "loss": 1.793, "step": 893 }, { "epoch": 0.12806188225182638, "grad_norm": 1.5066699981689453, "learning_rate": 0.00019502070297013538, "loss": 1.8138, "step": 894 }, { "epoch": 0.1282051282051282, "grad_norm": 1.35176682472229, "learning_rate": 0.0001950062343029312, "loss": 1.786, "step": 895 }, { "epoch": 0.12834837415843003, "grad_norm": 1.6995621919631958, "learning_rate": 0.00019499174518323588, "loss": 1.5731, "step": 896 }, { "epoch": 0.12849162011173185, "grad_norm": 1.4979966878890991, "learning_rate": 0.0001949772356141685, "loss": 1.5357, "step": 897 }, { "epoch": 0.12863486606503366, "grad_norm": 1.404828429222107, "learning_rate": 0.0001949627055988526, "loss": 1.5116, "step": 898 }, { "epoch": 0.12877811201833547, "grad_norm": 1.318796157836914, "learning_rate": 0.00019494815514041613, "loss": 1.6719, "step": 899 }, { "epoch": 0.1289213579716373, "grad_norm": 1.4717530012130737, "learning_rate": 0.00019493358424199148, "loss": 1.7167, "step": 900 }, { "epoch": 0.12906460392493913, "grad_norm": 1.4654228687286377, "learning_rate": 0.00019491899290671535, "loss": 1.6247, "step": 901 }, { "epoch": 0.12920784987824094, "grad_norm": 1.2905997037887573, "learning_rate": 0.00019490438113772896, "loss": 1.6019, "step": 902 }, { "epoch": 0.12935109583154275, "grad_norm": 1.6492799520492554, "learning_rate": 0.00019488974893817784, "loss": 1.8544, "step": 903 }, { "epoch": 0.1294943417848446, "grad_norm": 1.655245304107666, "learning_rate": 0.00019487509631121192, "loss": 1.4752, "step": 904 }, { "epoch": 0.1296375877381464, "grad_norm": 1.2696119546890259, "learning_rate": 0.00019486042325998556, "loss": 1.4508, "step": 905 }, { "epoch": 0.12978083369144822, "grad_norm": 1.6683545112609863, "learning_rate": 0.0001948457297876575, "loss": 1.6142, "step": 906 }, { "epoch": 0.12992407964475003, "grad_norm": 1.3680920600891113, "learning_rate": 0.00019483101589739084, "loss": 1.574, "step": 907 }, { "epoch": 0.13006732559805184, "grad_norm": 1.2326867580413818, "learning_rate": 0.00019481628159235322, "loss": 1.6057, "step": 908 }, { "epoch": 0.13021057155135368, "grad_norm": 1.1549794673919678, "learning_rate": 0.0001948015268757165, "loss": 1.7952, "step": 909 }, { "epoch": 0.1303538175046555, "grad_norm": 1.5406399965286255, "learning_rate": 0.00019478675175065702, "loss": 1.6213, "step": 910 }, { "epoch": 0.1304970634579573, "grad_norm": 1.6765726804733276, "learning_rate": 0.0001947719562203555, "loss": 1.6014, "step": 911 }, { "epoch": 0.13064030941125912, "grad_norm": 1.32000732421875, "learning_rate": 0.00019475714028799703, "loss": 1.6371, "step": 912 }, { "epoch": 0.13078355536456096, "grad_norm": 1.6403682231903076, "learning_rate": 0.0001947423039567711, "loss": 1.7579, "step": 913 }, { "epoch": 0.13092680131786277, "grad_norm": 1.3830195665359497, "learning_rate": 0.0001947274472298717, "loss": 1.6199, "step": 914 }, { "epoch": 0.13107004727116459, "grad_norm": 1.4727109670639038, "learning_rate": 0.00019471257011049702, "loss": 1.535, "step": 915 }, { "epoch": 0.1312132932244664, "grad_norm": 1.4898260831832886, "learning_rate": 0.00019469767260184975, "loss": 1.6533, "step": 916 }, { "epoch": 0.13135653917776824, "grad_norm": 1.3684073686599731, "learning_rate": 0.000194682754707137, "loss": 1.5232, "step": 917 }, { "epoch": 0.13149978513107005, "grad_norm": 1.3937976360321045, "learning_rate": 0.0001946678164295702, "loss": 1.5752, "step": 918 }, { "epoch": 0.13164303108437186, "grad_norm": 1.380321741104126, "learning_rate": 0.0001946528577723652, "loss": 1.6235, "step": 919 }, { "epoch": 0.13178627703767368, "grad_norm": 1.8011161088943481, "learning_rate": 0.00019463787873874217, "loss": 1.8215, "step": 920 }, { "epoch": 0.13192952299097552, "grad_norm": 1.7107772827148438, "learning_rate": 0.0001946228793319258, "loss": 1.6096, "step": 921 }, { "epoch": 0.13207276894427733, "grad_norm": 1.350441813468933, "learning_rate": 0.00019460785955514504, "loss": 1.5305, "step": 922 }, { "epoch": 0.13221601489757914, "grad_norm": 1.3562066555023193, "learning_rate": 0.00019459281941163332, "loss": 1.3447, "step": 923 }, { "epoch": 0.13235926085088096, "grad_norm": 1.2562601566314697, "learning_rate": 0.00019457775890462838, "loss": 1.6173, "step": 924 }, { "epoch": 0.13250250680418277, "grad_norm": 1.3229228258132935, "learning_rate": 0.00019456267803737243, "loss": 1.7514, "step": 925 }, { "epoch": 0.1326457527574846, "grad_norm": 1.3482604026794434, "learning_rate": 0.0001945475768131119, "loss": 1.7536, "step": 926 }, { "epoch": 0.13278899871078642, "grad_norm": 1.6788184642791748, "learning_rate": 0.00019453245523509777, "loss": 1.6746, "step": 927 }, { "epoch": 0.13293224466408823, "grad_norm": 1.146547794342041, "learning_rate": 0.0001945173133065854, "loss": 1.6844, "step": 928 }, { "epoch": 0.13307549061739005, "grad_norm": 1.5112667083740234, "learning_rate": 0.00019450215103083437, "loss": 1.5235, "step": 929 }, { "epoch": 0.1332187365706919, "grad_norm": 1.7696430683135986, "learning_rate": 0.0001944869684111088, "loss": 1.7597, "step": 930 }, { "epoch": 0.1333619825239937, "grad_norm": 1.3771663904190063, "learning_rate": 0.00019447176545067711, "loss": 1.5269, "step": 931 }, { "epoch": 0.1335052284772955, "grad_norm": 1.187925100326538, "learning_rate": 0.00019445654215281214, "loss": 1.6062, "step": 932 }, { "epoch": 0.13364847443059732, "grad_norm": 1.2776521444320679, "learning_rate": 0.0001944412985207911, "loss": 1.6094, "step": 933 }, { "epoch": 0.13379172038389917, "grad_norm": 1.6938817501068115, "learning_rate": 0.0001944260345578955, "loss": 1.7232, "step": 934 }, { "epoch": 0.13393496633720098, "grad_norm": 1.3913626670837402, "learning_rate": 0.00019441075026741138, "loss": 1.7062, "step": 935 }, { "epoch": 0.1340782122905028, "grad_norm": 1.5622707605361938, "learning_rate": 0.00019439544565262904, "loss": 1.7202, "step": 936 }, { "epoch": 0.1342214582438046, "grad_norm": 1.4457072019577026, "learning_rate": 0.00019438012071684314, "loss": 1.6776, "step": 937 }, { "epoch": 0.13436470419710644, "grad_norm": 1.3128763437271118, "learning_rate": 0.0001943647754633528, "loss": 1.6889, "step": 938 }, { "epoch": 0.13450795015040826, "grad_norm": 1.381349802017212, "learning_rate": 0.0001943494098954615, "loss": 1.7132, "step": 939 }, { "epoch": 0.13465119610371007, "grad_norm": 1.2013156414031982, "learning_rate": 0.000194334024016477, "loss": 1.6364, "step": 940 }, { "epoch": 0.13479444205701188, "grad_norm": 1.1933903694152832, "learning_rate": 0.00019431861782971156, "loss": 1.666, "step": 941 }, { "epoch": 0.13493768801031372, "grad_norm": 1.4307141304016113, "learning_rate": 0.0001943031913384817, "loss": 1.8255, "step": 942 }, { "epoch": 0.13508093396361553, "grad_norm": 1.6470608711242676, "learning_rate": 0.00019428774454610843, "loss": 1.6506, "step": 943 }, { "epoch": 0.13522417991691735, "grad_norm": 1.7180609703063965, "learning_rate": 0.000194272277455917, "loss": 1.732, "step": 944 }, { "epoch": 0.13536742587021916, "grad_norm": 1.2828840017318726, "learning_rate": 0.0001942567900712371, "loss": 1.6025, "step": 945 }, { "epoch": 0.13551067182352097, "grad_norm": 1.6159284114837646, "learning_rate": 0.00019424128239540277, "loss": 1.6597, "step": 946 }, { "epoch": 0.1356539177768228, "grad_norm": 1.1628026962280273, "learning_rate": 0.0001942257544317525, "loss": 1.8775, "step": 947 }, { "epoch": 0.13579716373012463, "grad_norm": 1.4651850461959839, "learning_rate": 0.00019421020618362898, "loss": 1.6985, "step": 948 }, { "epoch": 0.13594040968342644, "grad_norm": 1.3251550197601318, "learning_rate": 0.00019419463765437943, "loss": 1.5467, "step": 949 }, { "epoch": 0.13608365563672825, "grad_norm": 1.433849811553955, "learning_rate": 0.00019417904884735533, "loss": 1.511, "step": 950 }, { "epoch": 0.1362269015900301, "grad_norm": 1.5445812940597534, "learning_rate": 0.00019416343976591261, "loss": 1.4708, "step": 951 }, { "epoch": 0.1363701475433319, "grad_norm": 1.6535338163375854, "learning_rate": 0.0001941478104134115, "loss": 1.6613, "step": 952 }, { "epoch": 0.13651339349663372, "grad_norm": 1.5296074151992798, "learning_rate": 0.00019413216079321654, "loss": 1.5847, "step": 953 }, { "epoch": 0.13665663944993553, "grad_norm": 1.3820561170578003, "learning_rate": 0.00019411649090869684, "loss": 1.6084, "step": 954 }, { "epoch": 0.13679988540323737, "grad_norm": 1.7438321113586426, "learning_rate": 0.00019410080076322564, "loss": 1.5868, "step": 955 }, { "epoch": 0.13694313135653918, "grad_norm": 1.090944766998291, "learning_rate": 0.00019408509036018066, "loss": 1.733, "step": 956 }, { "epoch": 0.137086377309841, "grad_norm": 1.11934494972229, "learning_rate": 0.00019406935970294397, "loss": 1.6123, "step": 957 }, { "epoch": 0.1372296232631428, "grad_norm": 1.1867879629135132, "learning_rate": 0.00019405360879490202, "loss": 1.6464, "step": 958 }, { "epoch": 0.13737286921644465, "grad_norm": 1.5631729364395142, "learning_rate": 0.00019403783763944556, "loss": 1.6699, "step": 959 }, { "epoch": 0.13751611516974646, "grad_norm": 1.6658815145492554, "learning_rate": 0.0001940220462399697, "loss": 1.711, "step": 960 }, { "epoch": 0.13765936112304827, "grad_norm": 1.3660664558410645, "learning_rate": 0.000194006234599874, "loss": 1.5602, "step": 961 }, { "epoch": 0.1378026070763501, "grad_norm": 1.699874758720398, "learning_rate": 0.00019399040272256225, "loss": 1.5976, "step": 962 }, { "epoch": 0.1379458530296519, "grad_norm": 1.3316453695297241, "learning_rate": 0.00019397455061144272, "loss": 1.4881, "step": 963 }, { "epoch": 0.13808909898295374, "grad_norm": 1.207200050354004, "learning_rate": 0.00019395867826992795, "loss": 1.806, "step": 964 }, { "epoch": 0.13823234493625555, "grad_norm": 1.202599048614502, "learning_rate": 0.00019394278570143488, "loss": 1.6282, "step": 965 }, { "epoch": 0.13837559088955736, "grad_norm": 1.7298444509506226, "learning_rate": 0.00019392687290938475, "loss": 1.5611, "step": 966 }, { "epoch": 0.13851883684285918, "grad_norm": 1.230285406112671, "learning_rate": 0.00019391093989720322, "loss": 1.6841, "step": 967 }, { "epoch": 0.13866208279616102, "grad_norm": 1.3954623937606812, "learning_rate": 0.00019389498666832025, "loss": 1.7756, "step": 968 }, { "epoch": 0.13880532874946283, "grad_norm": 1.4035662412643433, "learning_rate": 0.0001938790132261702, "loss": 1.7445, "step": 969 }, { "epoch": 0.13894857470276464, "grad_norm": 1.6769047975540161, "learning_rate": 0.00019386301957419172, "loss": 1.4716, "step": 970 }, { "epoch": 0.13909182065606646, "grad_norm": 1.46638822555542, "learning_rate": 0.00019384700571582793, "loss": 1.8324, "step": 971 }, { "epoch": 0.1392350666093683, "grad_norm": 1.3066847324371338, "learning_rate": 0.00019383097165452613, "loss": 1.8041, "step": 972 }, { "epoch": 0.1393783125626701, "grad_norm": 1.7001136541366577, "learning_rate": 0.0001938149173937381, "loss": 1.7542, "step": 973 }, { "epoch": 0.13952155851597192, "grad_norm": 1.4689810276031494, "learning_rate": 0.0001937988429369199, "loss": 1.8392, "step": 974 }, { "epoch": 0.13966480446927373, "grad_norm": 1.5190823078155518, "learning_rate": 0.000193782748287532, "loss": 1.6861, "step": 975 }, { "epoch": 0.13980805042257557, "grad_norm": 1.3374345302581787, "learning_rate": 0.00019376663344903913, "loss": 1.4986, "step": 976 }, { "epoch": 0.1399512963758774, "grad_norm": 1.3624929189682007, "learning_rate": 0.00019375049842491047, "loss": 1.6484, "step": 977 }, { "epoch": 0.1400945423291792, "grad_norm": 1.1864464282989502, "learning_rate": 0.00019373434321861942, "loss": 1.6878, "step": 978 }, { "epoch": 0.140237788282481, "grad_norm": 1.3731497526168823, "learning_rate": 0.00019371816783364388, "loss": 1.5349, "step": 979 }, { "epoch": 0.14038103423578283, "grad_norm": 1.232944369316101, "learning_rate": 0.00019370197227346596, "loss": 1.5453, "step": 980 }, { "epoch": 0.14052428018908467, "grad_norm": 1.293986439704895, "learning_rate": 0.00019368575654157217, "loss": 1.5605, "step": 981 }, { "epoch": 0.14066752614238648, "grad_norm": 1.5975431203842163, "learning_rate": 0.00019366952064145334, "loss": 1.6435, "step": 982 }, { "epoch": 0.1408107720956883, "grad_norm": 1.2800720930099487, "learning_rate": 0.00019365326457660472, "loss": 1.6251, "step": 983 }, { "epoch": 0.1409540180489901, "grad_norm": 1.3393703699111938, "learning_rate": 0.00019363698835052576, "loss": 1.7513, "step": 984 }, { "epoch": 0.14109726400229194, "grad_norm": 1.6636801958084106, "learning_rate": 0.00019362069196672037, "loss": 1.6304, "step": 985 }, { "epoch": 0.14124050995559376, "grad_norm": 1.5316323041915894, "learning_rate": 0.00019360437542869676, "loss": 1.5975, "step": 986 }, { "epoch": 0.14138375590889557, "grad_norm": 1.4087072610855103, "learning_rate": 0.00019358803873996747, "loss": 1.5366, "step": 987 }, { "epoch": 0.14152700186219738, "grad_norm": 1.1620088815689087, "learning_rate": 0.00019357168190404936, "loss": 1.6894, "step": 988 }, { "epoch": 0.14167024781549922, "grad_norm": 1.3791073560714722, "learning_rate": 0.0001935553049244637, "loss": 1.7392, "step": 989 }, { "epoch": 0.14181349376880104, "grad_norm": 1.6591752767562866, "learning_rate": 0.00019353890780473602, "loss": 1.5439, "step": 990 }, { "epoch": 0.14195673972210285, "grad_norm": 1.6009260416030884, "learning_rate": 0.00019352249054839624, "loss": 1.6955, "step": 991 }, { "epoch": 0.14209998567540466, "grad_norm": 1.2773163318634033, "learning_rate": 0.00019350605315897852, "loss": 1.467, "step": 992 }, { "epoch": 0.1422432316287065, "grad_norm": 1.2984821796417236, "learning_rate": 0.00019348959564002152, "loss": 1.7157, "step": 993 }, { "epoch": 0.14238647758200831, "grad_norm": 1.357259750366211, "learning_rate": 0.00019347311799506803, "loss": 1.7353, "step": 994 }, { "epoch": 0.14252972353531013, "grad_norm": 1.2685633897781372, "learning_rate": 0.0001934566202276654, "loss": 1.6591, "step": 995 }, { "epoch": 0.14267296948861194, "grad_norm": 1.2923705577850342, "learning_rate": 0.00019344010234136508, "loss": 1.6316, "step": 996 }, { "epoch": 0.14281621544191378, "grad_norm": 1.4283264875411987, "learning_rate": 0.00019342356433972303, "loss": 1.8922, "step": 997 }, { "epoch": 0.1429594613952156, "grad_norm": 1.7292664051055908, "learning_rate": 0.00019340700622629946, "loss": 1.7035, "step": 998 }, { "epoch": 0.1431027073485174, "grad_norm": 1.114280104637146, "learning_rate": 0.00019339042800465889, "loss": 1.5319, "step": 999 }, { "epoch": 0.14324595330181922, "grad_norm": 1.433469533920288, "learning_rate": 0.00019337382967837024, "loss": 1.56, "step": 1000 }, { "epoch": 0.14338919925512103, "grad_norm": 1.18865168094635, "learning_rate": 0.0001933572112510067, "loss": 1.6652, "step": 1001 }, { "epoch": 0.14353244520842287, "grad_norm": 1.522809386253357, "learning_rate": 0.0001933405727261458, "loss": 1.4638, "step": 1002 }, { "epoch": 0.14367569116172468, "grad_norm": 1.3776298761367798, "learning_rate": 0.0001933239141073694, "loss": 1.4991, "step": 1003 }, { "epoch": 0.1438189371150265, "grad_norm": 1.2137130498886108, "learning_rate": 0.00019330723539826375, "loss": 1.5591, "step": 1004 }, { "epoch": 0.1439621830683283, "grad_norm": 1.5983413457870483, "learning_rate": 0.00019329053660241928, "loss": 1.7367, "step": 1005 }, { "epoch": 0.14410542902163015, "grad_norm": 1.2168375253677368, "learning_rate": 0.00019327381772343087, "loss": 1.5848, "step": 1006 }, { "epoch": 0.14424867497493196, "grad_norm": 1.6772574186325073, "learning_rate": 0.00019325707876489766, "loss": 1.5304, "step": 1007 }, { "epoch": 0.14439192092823377, "grad_norm": 1.205828070640564, "learning_rate": 0.00019324031973042317, "loss": 1.6687, "step": 1008 }, { "epoch": 0.1445351668815356, "grad_norm": 1.347359299659729, "learning_rate": 0.00019322354062361517, "loss": 1.7297, "step": 1009 }, { "epoch": 0.14467841283483743, "grad_norm": 1.1474287509918213, "learning_rate": 0.0001932067414480858, "loss": 1.6692, "step": 1010 }, { "epoch": 0.14482165878813924, "grad_norm": 1.5905922651290894, "learning_rate": 0.00019318992220745149, "loss": 1.6037, "step": 1011 }, { "epoch": 0.14496490474144105, "grad_norm": 1.495343804359436, "learning_rate": 0.00019317308290533306, "loss": 1.3346, "step": 1012 }, { "epoch": 0.14510815069474287, "grad_norm": 1.5528923273086548, "learning_rate": 0.00019315622354535553, "loss": 1.5895, "step": 1013 }, { "epoch": 0.1452513966480447, "grad_norm": 1.5690736770629883, "learning_rate": 0.00019313934413114832, "loss": 1.5037, "step": 1014 }, { "epoch": 0.14539464260134652, "grad_norm": 1.3213329315185547, "learning_rate": 0.00019312244466634517, "loss": 1.707, "step": 1015 }, { "epoch": 0.14553788855464833, "grad_norm": 1.4903837442398071, "learning_rate": 0.0001931055251545841, "loss": 1.7707, "step": 1016 }, { "epoch": 0.14568113450795014, "grad_norm": 1.3928664922714233, "learning_rate": 0.00019308858559950748, "loss": 1.6099, "step": 1017 }, { "epoch": 0.14582438046125196, "grad_norm": 1.3110967874526978, "learning_rate": 0.00019307162600476195, "loss": 1.4615, "step": 1018 }, { "epoch": 0.1459676264145538, "grad_norm": 1.3422695398330688, "learning_rate": 0.00019305464637399853, "loss": 1.6469, "step": 1019 }, { "epoch": 0.1461108723678556, "grad_norm": 1.3224635124206543, "learning_rate": 0.00019303764671087245, "loss": 1.5305, "step": 1020 }, { "epoch": 0.14625411832115742, "grad_norm": 1.5960010290145874, "learning_rate": 0.0001930206270190434, "loss": 1.5957, "step": 1021 }, { "epoch": 0.14639736427445924, "grad_norm": 1.560904622077942, "learning_rate": 0.00019300358730217526, "loss": 1.6573, "step": 1022 }, { "epoch": 0.14654061022776108, "grad_norm": 1.2874823808670044, "learning_rate": 0.0001929865275639362, "loss": 1.6412, "step": 1023 }, { "epoch": 0.1466838561810629, "grad_norm": 1.1874332427978516, "learning_rate": 0.00019296944780799885, "loss": 1.6104, "step": 1024 }, { "epoch": 0.1468271021343647, "grad_norm": 1.3832107782363892, "learning_rate": 0.00019295234803804004, "loss": 1.5944, "step": 1025 }, { "epoch": 0.1469703480876665, "grad_norm": 1.4702212810516357, "learning_rate": 0.0001929352282577409, "loss": 1.6766, "step": 1026 }, { "epoch": 0.14711359404096835, "grad_norm": 1.3950883150100708, "learning_rate": 0.0001929180884707869, "loss": 1.5534, "step": 1027 }, { "epoch": 0.14725683999427017, "grad_norm": 1.2149051427841187, "learning_rate": 0.0001929009286808678, "loss": 1.6351, "step": 1028 }, { "epoch": 0.14740008594757198, "grad_norm": 1.470154881477356, "learning_rate": 0.0001928837488916777, "loss": 1.8191, "step": 1029 }, { "epoch": 0.1475433319008738, "grad_norm": 1.2702248096466064, "learning_rate": 0.00019286654910691503, "loss": 1.5896, "step": 1030 }, { "epoch": 0.14768657785417563, "grad_norm": 1.364892840385437, "learning_rate": 0.0001928493293302824, "loss": 1.5507, "step": 1031 }, { "epoch": 0.14782982380747745, "grad_norm": 1.3552056550979614, "learning_rate": 0.0001928320895654868, "loss": 1.6162, "step": 1032 }, { "epoch": 0.14797306976077926, "grad_norm": 1.320356011390686, "learning_rate": 0.00019281482981623957, "loss": 1.7633, "step": 1033 }, { "epoch": 0.14811631571408107, "grad_norm": 1.2614949941635132, "learning_rate": 0.0001927975500862563, "loss": 1.402, "step": 1034 }, { "epoch": 0.14825956166738288, "grad_norm": 1.0605757236480713, "learning_rate": 0.00019278025037925689, "loss": 1.7286, "step": 1035 }, { "epoch": 0.14840280762068472, "grad_norm": 1.4198863506317139, "learning_rate": 0.00019276293069896548, "loss": 1.7611, "step": 1036 }, { "epoch": 0.14854605357398654, "grad_norm": 1.5028280019760132, "learning_rate": 0.00019274559104911067, "loss": 1.6216, "step": 1037 }, { "epoch": 0.14868929952728835, "grad_norm": 1.3857834339141846, "learning_rate": 0.0001927282314334252, "loss": 1.6809, "step": 1038 }, { "epoch": 0.14883254548059016, "grad_norm": 1.5174531936645508, "learning_rate": 0.00019271085185564615, "loss": 1.7131, "step": 1039 }, { "epoch": 0.148975791433892, "grad_norm": 1.47689688205719, "learning_rate": 0.00019269345231951493, "loss": 1.5616, "step": 1040 }, { "epoch": 0.14911903738719381, "grad_norm": 1.4529578685760498, "learning_rate": 0.00019267603282877724, "loss": 1.5127, "step": 1041 }, { "epoch": 0.14926228334049563, "grad_norm": 1.4539470672607422, "learning_rate": 0.00019265859338718304, "loss": 1.6635, "step": 1042 }, { "epoch": 0.14940552929379744, "grad_norm": 1.2897837162017822, "learning_rate": 0.00019264113399848664, "loss": 1.6854, "step": 1043 }, { "epoch": 0.14954877524709928, "grad_norm": 1.045975685119629, "learning_rate": 0.0001926236546664466, "loss": 1.636, "step": 1044 }, { "epoch": 0.1496920212004011, "grad_norm": 1.3277157545089722, "learning_rate": 0.00019260615539482584, "loss": 1.6928, "step": 1045 }, { "epoch": 0.1498352671537029, "grad_norm": 1.6697393655776978, "learning_rate": 0.0001925886361873914, "loss": 1.7401, "step": 1046 }, { "epoch": 0.14997851310700472, "grad_norm": 1.3728654384613037, "learning_rate": 0.00019257109704791484, "loss": 1.7717, "step": 1047 }, { "epoch": 0.15012175906030656, "grad_norm": 1.5748082399368286, "learning_rate": 0.00019255353798017184, "loss": 1.5119, "step": 1048 }, { "epoch": 0.15026500501360837, "grad_norm": 1.2474201917648315, "learning_rate": 0.00019253595898794247, "loss": 1.5075, "step": 1049 }, { "epoch": 0.15040825096691018, "grad_norm": 1.0984222888946533, "learning_rate": 0.00019251836007501102, "loss": 1.5575, "step": 1050 }, { "epoch": 0.150551496920212, "grad_norm": 1.2190009355545044, "learning_rate": 0.00019250074124516618, "loss": 1.5892, "step": 1051 }, { "epoch": 0.1506947428735138, "grad_norm": 1.3655880689620972, "learning_rate": 0.00019248310250220073, "loss": 1.7195, "step": 1052 }, { "epoch": 0.15083798882681565, "grad_norm": 1.4146318435668945, "learning_rate": 0.0001924654438499119, "loss": 1.4434, "step": 1053 }, { "epoch": 0.15098123478011746, "grad_norm": 1.2266039848327637, "learning_rate": 0.00019244776529210122, "loss": 1.6555, "step": 1054 }, { "epoch": 0.15112448073341928, "grad_norm": 1.497471570968628, "learning_rate": 0.00019243006683257439, "loss": 1.6901, "step": 1055 }, { "epoch": 0.1512677266867211, "grad_norm": 1.252934455871582, "learning_rate": 0.00019241234847514144, "loss": 1.6138, "step": 1056 }, { "epoch": 0.15141097264002293, "grad_norm": 1.7987240552902222, "learning_rate": 0.00019239461022361676, "loss": 1.6302, "step": 1057 }, { "epoch": 0.15155421859332474, "grad_norm": 1.4514118432998657, "learning_rate": 0.00019237685208181886, "loss": 1.9393, "step": 1058 }, { "epoch": 0.15169746454662655, "grad_norm": 1.4311445951461792, "learning_rate": 0.0001923590740535707, "loss": 1.6419, "step": 1059 }, { "epoch": 0.15184071049992837, "grad_norm": 1.366348385810852, "learning_rate": 0.00019234127614269943, "loss": 1.6692, "step": 1060 }, { "epoch": 0.1519839564532302, "grad_norm": 1.4392712116241455, "learning_rate": 0.00019232345835303648, "loss": 1.5465, "step": 1061 }, { "epoch": 0.15212720240653202, "grad_norm": 1.5436354875564575, "learning_rate": 0.0001923056206884176, "loss": 1.7787, "step": 1062 }, { "epoch": 0.15227044835983383, "grad_norm": 1.5093073844909668, "learning_rate": 0.0001922877631526828, "loss": 1.49, "step": 1063 }, { "epoch": 0.15241369431313564, "grad_norm": 1.5289547443389893, "learning_rate": 0.0001922698857496764, "loss": 1.7708, "step": 1064 }, { "epoch": 0.15255694026643749, "grad_norm": 1.3712234497070312, "learning_rate": 0.0001922519884832469, "loss": 1.5721, "step": 1065 }, { "epoch": 0.1527001862197393, "grad_norm": 1.5805515050888062, "learning_rate": 0.00019223407135724713, "loss": 1.6024, "step": 1066 }, { "epoch": 0.1528434321730411, "grad_norm": 1.6870808601379395, "learning_rate": 0.00019221613437553426, "loss": 1.7993, "step": 1067 }, { "epoch": 0.15298667812634292, "grad_norm": 1.2619612216949463, "learning_rate": 0.00019219817754196966, "loss": 1.7692, "step": 1068 }, { "epoch": 0.15312992407964476, "grad_norm": 1.2510889768600464, "learning_rate": 0.00019218020086041898, "loss": 1.5239, "step": 1069 }, { "epoch": 0.15327317003294658, "grad_norm": 1.080594778060913, "learning_rate": 0.00019216220433475214, "loss": 1.6541, "step": 1070 }, { "epoch": 0.1534164159862484, "grad_norm": 1.387591004371643, "learning_rate": 0.0001921441879688434, "loss": 1.4627, "step": 1071 }, { "epoch": 0.1535596619395502, "grad_norm": 1.726069688796997, "learning_rate": 0.00019212615176657116, "loss": 1.6504, "step": 1072 }, { "epoch": 0.15370290789285201, "grad_norm": 1.229512095451355, "learning_rate": 0.00019210809573181825, "loss": 1.5672, "step": 1073 }, { "epoch": 0.15384615384615385, "grad_norm": 1.0011974573135376, "learning_rate": 0.00019209001986847163, "loss": 1.4367, "step": 1074 }, { "epoch": 0.15398939979945567, "grad_norm": 1.3893342018127441, "learning_rate": 0.00019207192418042266, "loss": 1.7764, "step": 1075 }, { "epoch": 0.15413264575275748, "grad_norm": 1.1153842210769653, "learning_rate": 0.00019205380867156677, "loss": 1.8684, "step": 1076 }, { "epoch": 0.1542758917060593, "grad_norm": 1.322856068611145, "learning_rate": 0.0001920356733458039, "loss": 1.6349, "step": 1077 }, { "epoch": 0.15441913765936113, "grad_norm": 1.235565185546875, "learning_rate": 0.00019201751820703807, "loss": 1.5923, "step": 1078 }, { "epoch": 0.15456238361266295, "grad_norm": 1.6908771991729736, "learning_rate": 0.00019199934325917766, "loss": 1.5258, "step": 1079 }, { "epoch": 0.15470562956596476, "grad_norm": 1.65554940700531, "learning_rate": 0.00019198114850613524, "loss": 1.6399, "step": 1080 }, { "epoch": 0.15484887551926657, "grad_norm": 1.1040687561035156, "learning_rate": 0.00019196293395182777, "loss": 1.7762, "step": 1081 }, { "epoch": 0.1549921214725684, "grad_norm": 1.2426106929779053, "learning_rate": 0.0001919446996001763, "loss": 1.7969, "step": 1082 }, { "epoch": 0.15513536742587022, "grad_norm": 1.290367841720581, "learning_rate": 0.00019192644545510635, "loss": 1.8039, "step": 1083 }, { "epoch": 0.15527861337917204, "grad_norm": 1.5014369487762451, "learning_rate": 0.0001919081715205475, "loss": 1.5574, "step": 1084 }, { "epoch": 0.15542185933247385, "grad_norm": 1.3923765420913696, "learning_rate": 0.00019188987780043365, "loss": 1.5518, "step": 1085 }, { "epoch": 0.1555651052857757, "grad_norm": 1.1949843168258667, "learning_rate": 0.00019187156429870307, "loss": 1.5505, "step": 1086 }, { "epoch": 0.1557083512390775, "grad_norm": 1.4897551536560059, "learning_rate": 0.00019185323101929814, "loss": 1.5782, "step": 1087 }, { "epoch": 0.15585159719237932, "grad_norm": 1.3129485845565796, "learning_rate": 0.0001918348779661656, "loss": 1.5843, "step": 1088 }, { "epoch": 0.15599484314568113, "grad_norm": 1.2399946451187134, "learning_rate": 0.00019181650514325641, "loss": 1.7298, "step": 1089 }, { "epoch": 0.15613808909898294, "grad_norm": 1.1018502712249756, "learning_rate": 0.00019179811255452575, "loss": 1.6082, "step": 1090 }, { "epoch": 0.15628133505228478, "grad_norm": 1.2920575141906738, "learning_rate": 0.00019177970020393313, "loss": 1.5963, "step": 1091 }, { "epoch": 0.1564245810055866, "grad_norm": 1.3606441020965576, "learning_rate": 0.00019176126809544223, "loss": 1.6317, "step": 1092 }, { "epoch": 0.1565678269588884, "grad_norm": 1.9767169952392578, "learning_rate": 0.00019174281623302107, "loss": 1.6372, "step": 1093 }, { "epoch": 0.15671107291219022, "grad_norm": 1.5381182432174683, "learning_rate": 0.0001917243446206418, "loss": 1.4984, "step": 1094 }, { "epoch": 0.15685431886549206, "grad_norm": 1.060970425605774, "learning_rate": 0.00019170585326228103, "loss": 1.635, "step": 1095 }, { "epoch": 0.15699756481879387, "grad_norm": 1.327100396156311, "learning_rate": 0.00019168734216191936, "loss": 1.509, "step": 1096 }, { "epoch": 0.15714081077209568, "grad_norm": 1.2250781059265137, "learning_rate": 0.0001916688113235419, "loss": 1.5696, "step": 1097 }, { "epoch": 0.1572840567253975, "grad_norm": 1.2987079620361328, "learning_rate": 0.00019165026075113777, "loss": 1.4361, "step": 1098 }, { "epoch": 0.15742730267869934, "grad_norm": 1.2638452053070068, "learning_rate": 0.0001916316904487005, "loss": 1.5452, "step": 1099 }, { "epoch": 0.15757054863200115, "grad_norm": 1.1070082187652588, "learning_rate": 0.00019161310042022778, "loss": 1.6651, "step": 1100 }, { "epoch": 0.15771379458530296, "grad_norm": 1.296685814857483, "learning_rate": 0.00019159449066972163, "loss": 1.7363, "step": 1101 }, { "epoch": 0.15785704053860478, "grad_norm": 1.2389682531356812, "learning_rate": 0.00019157586120118828, "loss": 1.5189, "step": 1102 }, { "epoch": 0.15800028649190662, "grad_norm": 1.4720712900161743, "learning_rate": 0.00019155721201863816, "loss": 1.6173, "step": 1103 }, { "epoch": 0.15814353244520843, "grad_norm": 1.3605849742889404, "learning_rate": 0.00019153854312608593, "loss": 1.5762, "step": 1104 }, { "epoch": 0.15828677839851024, "grad_norm": 1.4007539749145508, "learning_rate": 0.00019151985452755063, "loss": 1.7202, "step": 1105 }, { "epoch": 0.15843002435181205, "grad_norm": 1.312222957611084, "learning_rate": 0.00019150114622705543, "loss": 1.5817, "step": 1106 }, { "epoch": 0.15857327030511387, "grad_norm": 1.4316692352294922, "learning_rate": 0.0001914824182286277, "loss": 1.6759, "step": 1107 }, { "epoch": 0.1587165162584157, "grad_norm": 1.1199148893356323, "learning_rate": 0.00019146367053629918, "loss": 1.5129, "step": 1108 }, { "epoch": 0.15885976221171752, "grad_norm": 1.3151581287384033, "learning_rate": 0.0001914449031541058, "loss": 1.507, "step": 1109 }, { "epoch": 0.15900300816501933, "grad_norm": 1.3532556295394897, "learning_rate": 0.00019142611608608765, "loss": 1.5678, "step": 1110 }, { "epoch": 0.15914625411832115, "grad_norm": 1.2562410831451416, "learning_rate": 0.00019140730933628916, "loss": 1.5945, "step": 1111 }, { "epoch": 0.15928950007162299, "grad_norm": 1.5938620567321777, "learning_rate": 0.00019138848290875898, "loss": 1.4882, "step": 1112 }, { "epoch": 0.1594327460249248, "grad_norm": 1.3952393531799316, "learning_rate": 0.00019136963680754988, "loss": 1.6717, "step": 1113 }, { "epoch": 0.1595759919782266, "grad_norm": 1.3570115566253662, "learning_rate": 0.00019135077103671908, "loss": 1.6381, "step": 1114 }, { "epoch": 0.15971923793152842, "grad_norm": 1.3998174667358398, "learning_rate": 0.00019133188560032784, "loss": 1.5213, "step": 1115 }, { "epoch": 0.15986248388483026, "grad_norm": 1.1035337448120117, "learning_rate": 0.00019131298050244173, "loss": 1.5471, "step": 1116 }, { "epoch": 0.16000572983813208, "grad_norm": 1.1831141710281372, "learning_rate": 0.00019129405574713057, "loss": 1.5006, "step": 1117 }, { "epoch": 0.1601489757914339, "grad_norm": 1.282888412475586, "learning_rate": 0.0001912751113384684, "loss": 1.5999, "step": 1118 }, { "epoch": 0.1602922217447357, "grad_norm": 1.3090945482254028, "learning_rate": 0.00019125614728053344, "loss": 1.5101, "step": 1119 }, { "epoch": 0.16043546769803754, "grad_norm": 1.290527582168579, "learning_rate": 0.0001912371635774082, "loss": 1.559, "step": 1120 }, { "epoch": 0.16057871365133936, "grad_norm": 1.2799506187438965, "learning_rate": 0.00019121816023317948, "loss": 1.4486, "step": 1121 }, { "epoch": 0.16072195960464117, "grad_norm": 1.693171739578247, "learning_rate": 0.0001911991372519381, "loss": 1.5881, "step": 1122 }, { "epoch": 0.16086520555794298, "grad_norm": 1.204098105430603, "learning_rate": 0.0001911800946377793, "loss": 1.4233, "step": 1123 }, { "epoch": 0.16100845151124482, "grad_norm": 1.3100093603134155, "learning_rate": 0.0001911610323948025, "loss": 1.4994, "step": 1124 }, { "epoch": 0.16115169746454663, "grad_norm": 1.049872875213623, "learning_rate": 0.0001911419505271113, "loss": 1.714, "step": 1125 }, { "epoch": 0.16129494341784845, "grad_norm": 1.3493924140930176, "learning_rate": 0.0001911228490388136, "loss": 1.5788, "step": 1126 }, { "epoch": 0.16143818937115026, "grad_norm": 0.9671481847763062, "learning_rate": 0.0001911037279340214, "loss": 1.75, "step": 1127 }, { "epoch": 0.16158143532445207, "grad_norm": 1.0375068187713623, "learning_rate": 0.00019108458721685105, "loss": 1.5562, "step": 1128 }, { "epoch": 0.1617246812777539, "grad_norm": 1.0071982145309448, "learning_rate": 0.00019106542689142306, "loss": 1.6495, "step": 1129 }, { "epoch": 0.16186792723105572, "grad_norm": 1.5138421058654785, "learning_rate": 0.0001910462469618622, "loss": 1.5734, "step": 1130 }, { "epoch": 0.16201117318435754, "grad_norm": 1.1565243005752563, "learning_rate": 0.0001910270474322974, "loss": 1.5991, "step": 1131 }, { "epoch": 0.16215441913765935, "grad_norm": 1.1317607164382935, "learning_rate": 0.00019100782830686188, "loss": 1.4963, "step": 1132 }, { "epoch": 0.1622976650909612, "grad_norm": 1.418850064277649, "learning_rate": 0.000190988589589693, "loss": 1.7298, "step": 1133 }, { "epoch": 0.162440911044263, "grad_norm": 1.1677254438400269, "learning_rate": 0.00019096933128493238, "loss": 1.6786, "step": 1134 }, { "epoch": 0.16258415699756482, "grad_norm": 1.2567205429077148, "learning_rate": 0.0001909500533967259, "loss": 1.4873, "step": 1135 }, { "epoch": 0.16272740295086663, "grad_norm": 1.387148380279541, "learning_rate": 0.00019093075592922358, "loss": 1.5934, "step": 1136 }, { "epoch": 0.16287064890416847, "grad_norm": 1.2823207378387451, "learning_rate": 0.0001909114388865797, "loss": 1.6544, "step": 1137 }, { "epoch": 0.16301389485747028, "grad_norm": 1.4455963373184204, "learning_rate": 0.00019089210227295276, "loss": 1.6599, "step": 1138 }, { "epoch": 0.1631571408107721, "grad_norm": 1.2512375116348267, "learning_rate": 0.0001908727460925054, "loss": 1.3639, "step": 1139 }, { "epoch": 0.1633003867640739, "grad_norm": 1.1785351037979126, "learning_rate": 0.00019085337034940457, "loss": 1.6829, "step": 1140 }, { "epoch": 0.16344363271737575, "grad_norm": 1.1674830913543701, "learning_rate": 0.00019083397504782138, "loss": 1.6855, "step": 1141 }, { "epoch": 0.16358687867067756, "grad_norm": 1.2480065822601318, "learning_rate": 0.00019081456019193112, "loss": 1.4826, "step": 1142 }, { "epoch": 0.16373012462397937, "grad_norm": 1.3124099969863892, "learning_rate": 0.00019079512578591337, "loss": 1.6589, "step": 1143 }, { "epoch": 0.16387337057728119, "grad_norm": 1.1940853595733643, "learning_rate": 0.0001907756718339519, "loss": 1.654, "step": 1144 }, { "epoch": 0.164016616530583, "grad_norm": 1.024263858795166, "learning_rate": 0.0001907561983402346, "loss": 1.6325, "step": 1145 }, { "epoch": 0.16415986248388484, "grad_norm": 1.1584320068359375, "learning_rate": 0.0001907367053089537, "loss": 1.5977, "step": 1146 }, { "epoch": 0.16430310843718665, "grad_norm": 1.0498390197753906, "learning_rate": 0.00019071719274430554, "loss": 1.5509, "step": 1147 }, { "epoch": 0.16444635439048846, "grad_norm": 1.3356209993362427, "learning_rate": 0.00019069766065049067, "loss": 1.5498, "step": 1148 }, { "epoch": 0.16458960034379028, "grad_norm": 1.485255479812622, "learning_rate": 0.00019067810903171384, "loss": 1.5769, "step": 1149 }, { "epoch": 0.16473284629709212, "grad_norm": 1.454628825187683, "learning_rate": 0.0001906585378921841, "loss": 1.6471, "step": 1150 }, { "epoch": 0.16487609225039393, "grad_norm": 1.3714594841003418, "learning_rate": 0.0001906389472361146, "loss": 1.5501, "step": 1151 }, { "epoch": 0.16501933820369574, "grad_norm": 1.7393662929534912, "learning_rate": 0.00019061933706772274, "loss": 1.423, "step": 1152 }, { "epoch": 0.16516258415699756, "grad_norm": 1.060095191001892, "learning_rate": 0.0001905997073912301, "loss": 1.5219, "step": 1153 }, { "epoch": 0.1653058301102994, "grad_norm": 1.3549896478652954, "learning_rate": 0.00019058005821086244, "loss": 1.6104, "step": 1154 }, { "epoch": 0.1654490760636012, "grad_norm": 1.3918348550796509, "learning_rate": 0.00019056038953084973, "loss": 1.6572, "step": 1155 }, { "epoch": 0.16559232201690302, "grad_norm": 1.4463690519332886, "learning_rate": 0.00019054070135542618, "loss": 1.6054, "step": 1156 }, { "epoch": 0.16573556797020483, "grad_norm": 1.142948031425476, "learning_rate": 0.00019052099368883018, "loss": 1.5054, "step": 1157 }, { "epoch": 0.16587881392350667, "grad_norm": 1.4111652374267578, "learning_rate": 0.00019050126653530426, "loss": 1.6335, "step": 1158 }, { "epoch": 0.1660220598768085, "grad_norm": 1.8660470247268677, "learning_rate": 0.00019048151989909523, "loss": 1.5976, "step": 1159 }, { "epoch": 0.1661653058301103, "grad_norm": 2.259783983230591, "learning_rate": 0.00019046175378445405, "loss": 1.7887, "step": 1160 }, { "epoch": 0.1663085517834121, "grad_norm": 1.33748197555542, "learning_rate": 0.00019044196819563588, "loss": 1.4558, "step": 1161 }, { "epoch": 0.16645179773671392, "grad_norm": 1.414290189743042, "learning_rate": 0.0001904221631369, "loss": 1.7088, "step": 1162 }, { "epoch": 0.16659504369001576, "grad_norm": 1.4009716510772705, "learning_rate": 0.00019040233861251002, "loss": 1.4561, "step": 1163 }, { "epoch": 0.16673828964331758, "grad_norm": 1.019525170326233, "learning_rate": 0.00019038249462673365, "loss": 1.5078, "step": 1164 }, { "epoch": 0.1668815355966194, "grad_norm": 1.4304003715515137, "learning_rate": 0.0001903626311838428, "loss": 1.5986, "step": 1165 }, { "epoch": 0.1670247815499212, "grad_norm": 1.4599138498306274, "learning_rate": 0.00019034274828811358, "loss": 1.3979, "step": 1166 }, { "epoch": 0.16716802750322304, "grad_norm": 1.3102240562438965, "learning_rate": 0.0001903228459438263, "loss": 1.5667, "step": 1167 }, { "epoch": 0.16731127345652486, "grad_norm": 1.3250577449798584, "learning_rate": 0.00019030292415526544, "loss": 1.4317, "step": 1168 }, { "epoch": 0.16745451940982667, "grad_norm": 1.3943833112716675, "learning_rate": 0.00019028298292671965, "loss": 1.47, "step": 1169 }, { "epoch": 0.16759776536312848, "grad_norm": 1.32235848903656, "learning_rate": 0.0001902630222624818, "loss": 1.8173, "step": 1170 }, { "epoch": 0.16774101131643032, "grad_norm": 1.4043816328048706, "learning_rate": 0.00019024304216684888, "loss": 1.5382, "step": 1171 }, { "epoch": 0.16788425726973213, "grad_norm": 1.140218734741211, "learning_rate": 0.00019022304264412217, "loss": 1.5429, "step": 1172 }, { "epoch": 0.16802750322303395, "grad_norm": 1.278874397277832, "learning_rate": 0.00019020302369860708, "loss": 1.6018, "step": 1173 }, { "epoch": 0.16817074917633576, "grad_norm": 1.3329445123672485, "learning_rate": 0.00019018298533461314, "loss": 1.7903, "step": 1174 }, { "epoch": 0.1683139951296376, "grad_norm": 1.3933483362197876, "learning_rate": 0.00019016292755645418, "loss": 1.5939, "step": 1175 }, { "epoch": 0.1684572410829394, "grad_norm": 1.2334198951721191, "learning_rate": 0.00019014285036844804, "loss": 1.5718, "step": 1176 }, { "epoch": 0.16860048703624123, "grad_norm": 1.3225997686386108, "learning_rate": 0.00019012275377491695, "loss": 1.7461, "step": 1177 }, { "epoch": 0.16874373298954304, "grad_norm": 1.2648080587387085, "learning_rate": 0.00019010263778018716, "loss": 1.5045, "step": 1178 }, { "epoch": 0.16888697894284485, "grad_norm": 1.5840142965316772, "learning_rate": 0.00019008250238858914, "loss": 1.6079, "step": 1179 }, { "epoch": 0.1690302248961467, "grad_norm": 1.3549392223358154, "learning_rate": 0.00019006234760445754, "loss": 1.6425, "step": 1180 }, { "epoch": 0.1691734708494485, "grad_norm": 1.4406667947769165, "learning_rate": 0.0001900421734321312, "loss": 1.6561, "step": 1181 }, { "epoch": 0.16931671680275032, "grad_norm": 1.625773549079895, "learning_rate": 0.00019002197987595313, "loss": 1.7253, "step": 1182 }, { "epoch": 0.16945996275605213, "grad_norm": 1.225946307182312, "learning_rate": 0.00019000176694027049, "loss": 1.5984, "step": 1183 }, { "epoch": 0.16960320870935397, "grad_norm": 1.3628190755844116, "learning_rate": 0.00018998153462943462, "loss": 1.5338, "step": 1184 }, { "epoch": 0.16974645466265578, "grad_norm": 1.2427383661270142, "learning_rate": 0.00018996128294780106, "loss": 1.6973, "step": 1185 }, { "epoch": 0.1698897006159576, "grad_norm": 1.2896844148635864, "learning_rate": 0.00018994101189972944, "loss": 1.8172, "step": 1186 }, { "epoch": 0.1700329465692594, "grad_norm": 1.388316035270691, "learning_rate": 0.00018992072148958368, "loss": 1.6139, "step": 1187 }, { "epoch": 0.17017619252256125, "grad_norm": 1.3267853260040283, "learning_rate": 0.00018990041172173178, "loss": 1.6635, "step": 1188 }, { "epoch": 0.17031943847586306, "grad_norm": 1.6138681173324585, "learning_rate": 0.00018988008260054591, "loss": 1.6435, "step": 1189 }, { "epoch": 0.17046268442916487, "grad_norm": 1.1469848155975342, "learning_rate": 0.00018985973413040245, "loss": 1.6849, "step": 1190 }, { "epoch": 0.1706059303824667, "grad_norm": 1.3248859643936157, "learning_rate": 0.00018983936631568194, "loss": 1.5674, "step": 1191 }, { "epoch": 0.17074917633576853, "grad_norm": 1.5334855318069458, "learning_rate": 0.000189818979160769, "loss": 1.7072, "step": 1192 }, { "epoch": 0.17089242228907034, "grad_norm": 1.283549427986145, "learning_rate": 0.00018979857267005255, "loss": 1.5593, "step": 1193 }, { "epoch": 0.17103566824237215, "grad_norm": 1.2195879220962524, "learning_rate": 0.00018977814684792557, "loss": 1.5845, "step": 1194 }, { "epoch": 0.17117891419567396, "grad_norm": 1.0253046751022339, "learning_rate": 0.0001897577016987852, "loss": 1.3568, "step": 1195 }, { "epoch": 0.1713221601489758, "grad_norm": 1.143478274345398, "learning_rate": 0.00018973723722703285, "loss": 1.6699, "step": 1196 }, { "epoch": 0.17146540610227762, "grad_norm": 1.6317315101623535, "learning_rate": 0.00018971675343707397, "loss": 1.6397, "step": 1197 }, { "epoch": 0.17160865205557943, "grad_norm": 1.4558167457580566, "learning_rate": 0.00018969625033331822, "loss": 1.6215, "step": 1198 }, { "epoch": 0.17175189800888124, "grad_norm": 1.8541502952575684, "learning_rate": 0.0001896757279201794, "loss": 1.7016, "step": 1199 }, { "epoch": 0.17189514396218306, "grad_norm": 1.4308134317398071, "learning_rate": 0.00018965518620207549, "loss": 1.5507, "step": 1200 }, { "epoch": 0.1720383899154849, "grad_norm": 1.4074689149856567, "learning_rate": 0.00018963462518342862, "loss": 1.5436, "step": 1201 }, { "epoch": 0.1721816358687867, "grad_norm": 1.3150405883789062, "learning_rate": 0.00018961404486866508, "loss": 1.5772, "step": 1202 }, { "epoch": 0.17232488182208852, "grad_norm": 1.2344485521316528, "learning_rate": 0.00018959344526221525, "loss": 1.5256, "step": 1203 }, { "epoch": 0.17246812777539033, "grad_norm": 1.349897027015686, "learning_rate": 0.00018957282636851376, "loss": 1.7211, "step": 1204 }, { "epoch": 0.17261137372869217, "grad_norm": 1.405430793762207, "learning_rate": 0.00018955218819199937, "loss": 1.6493, "step": 1205 }, { "epoch": 0.172754619681994, "grad_norm": 1.3404686450958252, "learning_rate": 0.00018953153073711487, "loss": 1.6018, "step": 1206 }, { "epoch": 0.1728978656352958, "grad_norm": 1.3996638059616089, "learning_rate": 0.0001895108540083074, "loss": 1.7137, "step": 1207 }, { "epoch": 0.1730411115885976, "grad_norm": 1.2229081392288208, "learning_rate": 0.0001894901580100281, "loss": 1.6565, "step": 1208 }, { "epoch": 0.17318435754189945, "grad_norm": 1.4207645654678345, "learning_rate": 0.00018946944274673234, "loss": 1.6564, "step": 1209 }, { "epoch": 0.17332760349520127, "grad_norm": 1.238709568977356, "learning_rate": 0.00018944870822287956, "loss": 1.5874, "step": 1210 }, { "epoch": 0.17347084944850308, "grad_norm": 1.3521292209625244, "learning_rate": 0.00018942795444293342, "loss": 1.6006, "step": 1211 }, { "epoch": 0.1736140954018049, "grad_norm": 1.3679680824279785, "learning_rate": 0.00018940718141136168, "loss": 1.5365, "step": 1212 }, { "epoch": 0.17375734135510673, "grad_norm": 1.139381766319275, "learning_rate": 0.0001893863891326363, "loss": 1.4694, "step": 1213 }, { "epoch": 0.17390058730840854, "grad_norm": 1.249692440032959, "learning_rate": 0.00018936557761123327, "loss": 1.584, "step": 1214 }, { "epoch": 0.17404383326171036, "grad_norm": 1.5437959432601929, "learning_rate": 0.00018934474685163285, "loss": 1.5373, "step": 1215 }, { "epoch": 0.17418707921501217, "grad_norm": 1.1334469318389893, "learning_rate": 0.00018932389685831936, "loss": 1.704, "step": 1216 }, { "epoch": 0.17433032516831398, "grad_norm": 1.3460679054260254, "learning_rate": 0.00018930302763578132, "loss": 1.6909, "step": 1217 }, { "epoch": 0.17447357112161582, "grad_norm": 1.271440863609314, "learning_rate": 0.0001892821391885113, "loss": 1.5452, "step": 1218 }, { "epoch": 0.17461681707491764, "grad_norm": 1.1553233861923218, "learning_rate": 0.00018926123152100615, "loss": 1.665, "step": 1219 }, { "epoch": 0.17476006302821945, "grad_norm": 1.3136087656021118, "learning_rate": 0.0001892403046377667, "loss": 1.3904, "step": 1220 }, { "epoch": 0.17490330898152126, "grad_norm": 1.141589879989624, "learning_rate": 0.00018921935854329802, "loss": 1.5523, "step": 1221 }, { "epoch": 0.1750465549348231, "grad_norm": 1.6217595338821411, "learning_rate": 0.00018919839324210927, "loss": 1.6887, "step": 1222 }, { "epoch": 0.1751898008881249, "grad_norm": 1.266294002532959, "learning_rate": 0.00018917740873871378, "loss": 1.5216, "step": 1223 }, { "epoch": 0.17533304684142673, "grad_norm": 1.7049514055252075, "learning_rate": 0.000189156405037629, "loss": 1.6104, "step": 1224 }, { "epoch": 0.17547629279472854, "grad_norm": 1.541986346244812, "learning_rate": 0.0001891353821433765, "loss": 1.4442, "step": 1225 }, { "epoch": 0.17561953874803038, "grad_norm": 1.446729063987732, "learning_rate": 0.00018911434006048196, "loss": 1.4614, "step": 1226 }, { "epoch": 0.1757627847013322, "grad_norm": 1.0549607276916504, "learning_rate": 0.00018909327879347524, "loss": 1.5738, "step": 1227 }, { "epoch": 0.175906030654634, "grad_norm": 1.3403862714767456, "learning_rate": 0.00018907219834689033, "loss": 1.6185, "step": 1228 }, { "epoch": 0.17604927660793582, "grad_norm": 1.7199970483779907, "learning_rate": 0.00018905109872526532, "loss": 1.502, "step": 1229 }, { "epoch": 0.17619252256123766, "grad_norm": 1.0753248929977417, "learning_rate": 0.0001890299799331424, "loss": 1.4956, "step": 1230 }, { "epoch": 0.17633576851453947, "grad_norm": 1.5027984380722046, "learning_rate": 0.00018900884197506796, "loss": 1.5793, "step": 1231 }, { "epoch": 0.17647901446784128, "grad_norm": 1.3917884826660156, "learning_rate": 0.00018898768485559248, "loss": 1.553, "step": 1232 }, { "epoch": 0.1766222604211431, "grad_norm": 1.4231101274490356, "learning_rate": 0.00018896650857927054, "loss": 1.58, "step": 1233 }, { "epoch": 0.1767655063744449, "grad_norm": 1.5742706060409546, "learning_rate": 0.00018894531315066088, "loss": 1.363, "step": 1234 }, { "epoch": 0.17690875232774675, "grad_norm": 1.369739294052124, "learning_rate": 0.00018892409857432636, "loss": 1.6174, "step": 1235 }, { "epoch": 0.17705199828104856, "grad_norm": 1.125423550605774, "learning_rate": 0.00018890286485483395, "loss": 1.6589, "step": 1236 }, { "epoch": 0.17719524423435037, "grad_norm": 1.5110818147659302, "learning_rate": 0.00018888161199675474, "loss": 1.4631, "step": 1237 }, { "epoch": 0.1773384901876522, "grad_norm": 1.57328200340271, "learning_rate": 0.00018886034000466391, "loss": 1.3517, "step": 1238 }, { "epoch": 0.17748173614095403, "grad_norm": 1.2364228963851929, "learning_rate": 0.0001888390488831409, "loss": 1.9052, "step": 1239 }, { "epoch": 0.17762498209425584, "grad_norm": 1.0857590436935425, "learning_rate": 0.00018881773863676905, "loss": 1.4701, "step": 1240 }, { "epoch": 0.17776822804755765, "grad_norm": 1.065874695777893, "learning_rate": 0.00018879640927013598, "loss": 1.6432, "step": 1241 }, { "epoch": 0.17791147400085947, "grad_norm": 1.3527491092681885, "learning_rate": 0.00018877506078783337, "loss": 1.4062, "step": 1242 }, { "epoch": 0.1780547199541613, "grad_norm": 1.3648526668548584, "learning_rate": 0.000188753693194457, "loss": 1.6736, "step": 1243 }, { "epoch": 0.17819796590746312, "grad_norm": 1.3594846725463867, "learning_rate": 0.0001887323064946068, "loss": 1.8158, "step": 1244 }, { "epoch": 0.17834121186076493, "grad_norm": 1.193901538848877, "learning_rate": 0.00018871090069288678, "loss": 1.5239, "step": 1245 }, { "epoch": 0.17848445781406674, "grad_norm": 1.1142951250076294, "learning_rate": 0.0001886894757939051, "loss": 1.6189, "step": 1246 }, { "epoch": 0.17862770376736858, "grad_norm": 1.1740015745162964, "learning_rate": 0.00018866803180227402, "loss": 1.6688, "step": 1247 }, { "epoch": 0.1787709497206704, "grad_norm": 1.470294713973999, "learning_rate": 0.00018864656872260985, "loss": 1.4777, "step": 1248 }, { "epoch": 0.1789141956739722, "grad_norm": 1.260516881942749, "learning_rate": 0.00018862508655953316, "loss": 1.7868, "step": 1249 }, { "epoch": 0.17905744162727402, "grad_norm": 1.2214590311050415, "learning_rate": 0.0001886035853176684, "loss": 1.5263, "step": 1250 }, { "epoch": 0.17920068758057586, "grad_norm": 1.4747414588928223, "learning_rate": 0.00018858206500164436, "loss": 1.6624, "step": 1251 }, { "epoch": 0.17934393353387768, "grad_norm": 0.9780392050743103, "learning_rate": 0.00018856052561609378, "loss": 1.663, "step": 1252 }, { "epoch": 0.1794871794871795, "grad_norm": 1.3245917558670044, "learning_rate": 0.00018853896716565358, "loss": 1.5287, "step": 1253 }, { "epoch": 0.1796304254404813, "grad_norm": 1.2611510753631592, "learning_rate": 0.00018851738965496476, "loss": 1.603, "step": 1254 }, { "epoch": 0.1797736713937831, "grad_norm": 1.3821632862091064, "learning_rate": 0.00018849579308867238, "loss": 1.584, "step": 1255 }, { "epoch": 0.17991691734708495, "grad_norm": 1.3047230243682861, "learning_rate": 0.00018847417747142568, "loss": 1.538, "step": 1256 }, { "epoch": 0.18006016330038677, "grad_norm": 1.4552621841430664, "learning_rate": 0.00018845254280787797, "loss": 1.588, "step": 1257 }, { "epoch": 0.18020340925368858, "grad_norm": 1.2241252660751343, "learning_rate": 0.00018843088910268664, "loss": 1.6529, "step": 1258 }, { "epoch": 0.1803466552069904, "grad_norm": 1.7683545351028442, "learning_rate": 0.00018840921636051325, "loss": 1.5621, "step": 1259 }, { "epoch": 0.18048990116029223, "grad_norm": 1.5617848634719849, "learning_rate": 0.00018838752458602334, "loss": 1.5785, "step": 1260 }, { "epoch": 0.18063314711359404, "grad_norm": 1.4345194101333618, "learning_rate": 0.00018836581378388665, "loss": 1.6482, "step": 1261 }, { "epoch": 0.18077639306689586, "grad_norm": 1.1633356809616089, "learning_rate": 0.00018834408395877693, "loss": 1.585, "step": 1262 }, { "epoch": 0.18091963902019767, "grad_norm": 1.0452786684036255, "learning_rate": 0.00018832233511537216, "loss": 1.555, "step": 1263 }, { "epoch": 0.1810628849734995, "grad_norm": 1.1625540256500244, "learning_rate": 0.00018830056725835424, "loss": 1.4642, "step": 1264 }, { "epoch": 0.18120613092680132, "grad_norm": 0.9701222777366638, "learning_rate": 0.00018827878039240933, "loss": 1.6051, "step": 1265 }, { "epoch": 0.18134937688010314, "grad_norm": 0.9942775964736938, "learning_rate": 0.00018825697452222754, "loss": 1.5155, "step": 1266 }, { "epoch": 0.18149262283340495, "grad_norm": 1.3580447435379028, "learning_rate": 0.00018823514965250317, "loss": 1.4882, "step": 1267 }, { "epoch": 0.1816358687867068, "grad_norm": 1.5448790788650513, "learning_rate": 0.00018821330578793453, "loss": 1.6674, "step": 1268 }, { "epoch": 0.1817791147400086, "grad_norm": 1.298673152923584, "learning_rate": 0.0001881914429332241, "loss": 1.7395, "step": 1269 }, { "epoch": 0.18192236069331041, "grad_norm": 1.4411338567733765, "learning_rate": 0.00018816956109307843, "loss": 1.5537, "step": 1270 }, { "epoch": 0.18206560664661223, "grad_norm": 1.4277161359786987, "learning_rate": 0.0001881476602722081, "loss": 1.4551, "step": 1271 }, { "epoch": 0.18220885259991404, "grad_norm": 1.1127568483352661, "learning_rate": 0.00018812574047532782, "loss": 1.5008, "step": 1272 }, { "epoch": 0.18235209855321588, "grad_norm": 1.179653286933899, "learning_rate": 0.00018810380170715643, "loss": 1.5082, "step": 1273 }, { "epoch": 0.1824953445065177, "grad_norm": 1.0704222917556763, "learning_rate": 0.00018808184397241674, "loss": 1.6778, "step": 1274 }, { "epoch": 0.1826385904598195, "grad_norm": 1.095610499382019, "learning_rate": 0.0001880598672758357, "loss": 1.379, "step": 1275 }, { "epoch": 0.18278183641312132, "grad_norm": 1.3067362308502197, "learning_rate": 0.00018803787162214442, "loss": 1.6067, "step": 1276 }, { "epoch": 0.18292508236642316, "grad_norm": 1.6524701118469238, "learning_rate": 0.00018801585701607793, "loss": 1.796, "step": 1277 }, { "epoch": 0.18306832831972497, "grad_norm": 1.4314591884613037, "learning_rate": 0.00018799382346237553, "loss": 1.6398, "step": 1278 }, { "epoch": 0.18321157427302678, "grad_norm": 1.6322757005691528, "learning_rate": 0.0001879717709657804, "loss": 1.6413, "step": 1279 }, { "epoch": 0.1833548202263286, "grad_norm": 1.080710530281067, "learning_rate": 0.00018794969953104, "loss": 1.5053, "step": 1280 }, { "epoch": 0.18349806617963044, "grad_norm": 1.5241643190383911, "learning_rate": 0.00018792760916290567, "loss": 1.661, "step": 1281 }, { "epoch": 0.18364131213293225, "grad_norm": 1.599449872970581, "learning_rate": 0.00018790549986613298, "loss": 1.688, "step": 1282 }, { "epoch": 0.18378455808623406, "grad_norm": 1.409179449081421, "learning_rate": 0.0001878833716454815, "loss": 1.623, "step": 1283 }, { "epoch": 0.18392780403953587, "grad_norm": 1.19838285446167, "learning_rate": 0.00018786122450571485, "loss": 1.5355, "step": 1284 }, { "epoch": 0.18407104999283772, "grad_norm": 1.4844355583190918, "learning_rate": 0.0001878390584516008, "loss": 1.7239, "step": 1285 }, { "epoch": 0.18421429594613953, "grad_norm": 1.0305694341659546, "learning_rate": 0.00018781687348791114, "loss": 1.4403, "step": 1286 }, { "epoch": 0.18435754189944134, "grad_norm": 1.455285668373108, "learning_rate": 0.00018779466961942176, "loss": 1.5038, "step": 1287 }, { "epoch": 0.18450078785274315, "grad_norm": 1.3533012866973877, "learning_rate": 0.00018777244685091259, "loss": 1.4664, "step": 1288 }, { "epoch": 0.18464403380604497, "grad_norm": 1.110084056854248, "learning_rate": 0.00018775020518716761, "loss": 1.4808, "step": 1289 }, { "epoch": 0.1847872797593468, "grad_norm": 1.1757829189300537, "learning_rate": 0.00018772794463297498, "loss": 1.6743, "step": 1290 }, { "epoch": 0.18493052571264862, "grad_norm": 1.0400776863098145, "learning_rate": 0.00018770566519312677, "loss": 1.5445, "step": 1291 }, { "epoch": 0.18507377166595043, "grad_norm": 1.2498466968536377, "learning_rate": 0.00018768336687241926, "loss": 1.6843, "step": 1292 }, { "epoch": 0.18521701761925224, "grad_norm": 1.3689961433410645, "learning_rate": 0.00018766104967565266, "loss": 1.65, "step": 1293 }, { "epoch": 0.18536026357255408, "grad_norm": 1.4129729270935059, "learning_rate": 0.00018763871360763136, "loss": 1.6829, "step": 1294 }, { "epoch": 0.1855035095258559, "grad_norm": 1.3198117017745972, "learning_rate": 0.00018761635867316372, "loss": 1.6143, "step": 1295 }, { "epoch": 0.1856467554791577, "grad_norm": 1.3007780313491821, "learning_rate": 0.0001875939848770622, "loss": 1.6274, "step": 1296 }, { "epoch": 0.18579000143245952, "grad_norm": 1.2682918310165405, "learning_rate": 0.0001875715922241434, "loss": 1.5893, "step": 1297 }, { "epoch": 0.18593324738576136, "grad_norm": 1.3636759519577026, "learning_rate": 0.00018754918071922782, "loss": 1.7061, "step": 1298 }, { "epoch": 0.18607649333906318, "grad_norm": 1.5074098110198975, "learning_rate": 0.00018752675036714015, "loss": 1.4694, "step": 1299 }, { "epoch": 0.186219739292365, "grad_norm": 1.0680893659591675, "learning_rate": 0.00018750430117270913, "loss": 1.6128, "step": 1300 }, { "epoch": 0.1863629852456668, "grad_norm": 1.302860140800476, "learning_rate": 0.00018748183314076737, "loss": 1.6252, "step": 1301 }, { "epoch": 0.18650623119896864, "grad_norm": 1.3957186937332153, "learning_rate": 0.00018745934627615186, "loss": 1.4627, "step": 1302 }, { "epoch": 0.18664947715227045, "grad_norm": 1.319977879524231, "learning_rate": 0.0001874368405837033, "loss": 1.4813, "step": 1303 }, { "epoch": 0.18679272310557227, "grad_norm": 1.1989517211914062, "learning_rate": 0.00018741431606826672, "loss": 1.6607, "step": 1304 }, { "epoch": 0.18693596905887408, "grad_norm": 1.2207152843475342, "learning_rate": 0.0001873917727346911, "loss": 1.5185, "step": 1305 }, { "epoch": 0.1870792150121759, "grad_norm": 1.1995621919631958, "learning_rate": 0.00018736921058782935, "loss": 1.495, "step": 1306 }, { "epoch": 0.18722246096547773, "grad_norm": 1.5264108180999756, "learning_rate": 0.00018734662963253867, "loss": 1.5067, "step": 1307 }, { "epoch": 0.18736570691877955, "grad_norm": 1.152520775794983, "learning_rate": 0.0001873240298736801, "loss": 1.51, "step": 1308 }, { "epoch": 0.18750895287208136, "grad_norm": 1.3869556188583374, "learning_rate": 0.00018730141131611882, "loss": 1.4964, "step": 1309 }, { "epoch": 0.18765219882538317, "grad_norm": 1.205184817314148, "learning_rate": 0.00018727877396472408, "loss": 1.4647, "step": 1310 }, { "epoch": 0.187795444778685, "grad_norm": 1.2313976287841797, "learning_rate": 0.00018725611782436911, "loss": 1.3627, "step": 1311 }, { "epoch": 0.18793869073198682, "grad_norm": 1.202388048171997, "learning_rate": 0.00018723344289993122, "loss": 1.5974, "step": 1312 }, { "epoch": 0.18808193668528864, "grad_norm": 1.336032748222351, "learning_rate": 0.00018721074919629177, "loss": 1.4878, "step": 1313 }, { "epoch": 0.18822518263859045, "grad_norm": 1.2324628829956055, "learning_rate": 0.00018718803671833616, "loss": 1.533, "step": 1314 }, { "epoch": 0.1883684285918923, "grad_norm": 1.105347752571106, "learning_rate": 0.0001871653054709538, "loss": 1.6977, "step": 1315 }, { "epoch": 0.1885116745451941, "grad_norm": 1.3929126262664795, "learning_rate": 0.0001871425554590382, "loss": 1.4521, "step": 1316 }, { "epoch": 0.18865492049849591, "grad_norm": 1.5176773071289062, "learning_rate": 0.00018711978668748685, "loss": 1.594, "step": 1317 }, { "epoch": 0.18879816645179773, "grad_norm": 1.2566306591033936, "learning_rate": 0.00018709699916120127, "loss": 1.5335, "step": 1318 }, { "epoch": 0.18894141240509957, "grad_norm": 1.239752173423767, "learning_rate": 0.00018707419288508713, "loss": 1.4487, "step": 1319 }, { "epoch": 0.18908465835840138, "grad_norm": 1.6874043941497803, "learning_rate": 0.00018705136786405398, "loss": 1.4876, "step": 1320 }, { "epoch": 0.1892279043117032, "grad_norm": 1.2177658081054688, "learning_rate": 0.00018702852410301554, "loss": 1.6854, "step": 1321 }, { "epoch": 0.189371150265005, "grad_norm": 1.0874955654144287, "learning_rate": 0.00018700566160688946, "loss": 1.6169, "step": 1322 }, { "epoch": 0.18951439621830685, "grad_norm": 1.414839506149292, "learning_rate": 0.00018698278038059752, "loss": 1.4785, "step": 1323 }, { "epoch": 0.18965764217160866, "grad_norm": 1.2484415769577026, "learning_rate": 0.00018695988042906542, "loss": 1.5669, "step": 1324 }, { "epoch": 0.18980088812491047, "grad_norm": 1.1560161113739014, "learning_rate": 0.00018693696175722303, "loss": 1.6042, "step": 1325 }, { "epoch": 0.18994413407821228, "grad_norm": 1.2718194723129272, "learning_rate": 0.00018691402437000408, "loss": 1.5422, "step": 1326 }, { "epoch": 0.1900873800315141, "grad_norm": 1.4425206184387207, "learning_rate": 0.0001868910682723465, "loss": 1.5792, "step": 1327 }, { "epoch": 0.19023062598481594, "grad_norm": 1.1723393201828003, "learning_rate": 0.00018686809346919213, "loss": 1.6503, "step": 1328 }, { "epoch": 0.19037387193811775, "grad_norm": 1.3499562740325928, "learning_rate": 0.0001868450999654869, "loss": 1.7573, "step": 1329 }, { "epoch": 0.19051711789141956, "grad_norm": 1.4633843898773193, "learning_rate": 0.00018682208776618072, "loss": 1.5316, "step": 1330 }, { "epoch": 0.19066036384472138, "grad_norm": 1.2080957889556885, "learning_rate": 0.00018679905687622758, "loss": 1.52, "step": 1331 }, { "epoch": 0.19080360979802322, "grad_norm": 1.3475323915481567, "learning_rate": 0.0001867760073005854, "loss": 1.6821, "step": 1332 }, { "epoch": 0.19094685575132503, "grad_norm": 1.3870445489883423, "learning_rate": 0.00018675293904421624, "loss": 1.3968, "step": 1333 }, { "epoch": 0.19109010170462684, "grad_norm": 1.4150516986846924, "learning_rate": 0.0001867298521120861, "loss": 1.6493, "step": 1334 }, { "epoch": 0.19123334765792865, "grad_norm": 1.1244345903396606, "learning_rate": 0.00018670674650916506, "loss": 1.6517, "step": 1335 }, { "epoch": 0.1913765936112305, "grad_norm": 1.3144078254699707, "learning_rate": 0.0001866836222404271, "loss": 1.6679, "step": 1336 }, { "epoch": 0.1915198395645323, "grad_norm": 0.9928476810455322, "learning_rate": 0.0001866604793108504, "loss": 1.4902, "step": 1337 }, { "epoch": 0.19166308551783412, "grad_norm": 1.242979645729065, "learning_rate": 0.00018663731772541702, "loss": 1.6062, "step": 1338 }, { "epoch": 0.19180633147113593, "grad_norm": 1.1237986087799072, "learning_rate": 0.00018661413748911304, "loss": 1.3077, "step": 1339 }, { "epoch": 0.19194957742443777, "grad_norm": 1.1328160762786865, "learning_rate": 0.00018659093860692866, "loss": 1.4462, "step": 1340 }, { "epoch": 0.19209282337773959, "grad_norm": 1.2164851427078247, "learning_rate": 0.000186567721083858, "loss": 1.6855, "step": 1341 }, { "epoch": 0.1922360693310414, "grad_norm": 1.2371037006378174, "learning_rate": 0.00018654448492489917, "loss": 1.5788, "step": 1342 }, { "epoch": 0.1923793152843432, "grad_norm": 1.3131842613220215, "learning_rate": 0.0001865212301350544, "loss": 1.5088, "step": 1343 }, { "epoch": 0.19252256123764502, "grad_norm": 1.1550123691558838, "learning_rate": 0.00018649795671932986, "loss": 1.5024, "step": 1344 }, { "epoch": 0.19266580719094686, "grad_norm": 1.4322195053100586, "learning_rate": 0.0001864746646827357, "loss": 1.4668, "step": 1345 }, { "epoch": 0.19280905314424868, "grad_norm": 1.469759464263916, "learning_rate": 0.00018645135403028617, "loss": 1.6173, "step": 1346 }, { "epoch": 0.1929522990975505, "grad_norm": 1.3037060499191284, "learning_rate": 0.00018642802476699944, "loss": 1.4538, "step": 1347 }, { "epoch": 0.1930955450508523, "grad_norm": 1.096132755279541, "learning_rate": 0.00018640467689789775, "loss": 1.807, "step": 1348 }, { "epoch": 0.19323879100415414, "grad_norm": 1.375309944152832, "learning_rate": 0.00018638131042800733, "loss": 1.5423, "step": 1349 }, { "epoch": 0.19338203695745596, "grad_norm": 1.1808770895004272, "learning_rate": 0.00018635792536235836, "loss": 1.6205, "step": 1350 }, { "epoch": 0.19352528291075777, "grad_norm": 1.064618706703186, "learning_rate": 0.00018633452170598508, "loss": 1.3178, "step": 1351 }, { "epoch": 0.19366852886405958, "grad_norm": 1.2102824449539185, "learning_rate": 0.00018631109946392574, "loss": 1.3562, "step": 1352 }, { "epoch": 0.19381177481736142, "grad_norm": 1.4303596019744873, "learning_rate": 0.00018628765864122255, "loss": 1.5097, "step": 1353 }, { "epoch": 0.19395502077066323, "grad_norm": 1.4360798597335815, "learning_rate": 0.00018626419924292173, "loss": 1.6851, "step": 1354 }, { "epoch": 0.19409826672396505, "grad_norm": 1.3655837774276733, "learning_rate": 0.00018624072127407351, "loss": 1.6359, "step": 1355 }, { "epoch": 0.19424151267726686, "grad_norm": 1.1922723054885864, "learning_rate": 0.00018621722473973216, "loss": 1.5103, "step": 1356 }, { "epoch": 0.1943847586305687, "grad_norm": 1.491030216217041, "learning_rate": 0.00018619370964495586, "loss": 1.5473, "step": 1357 }, { "epoch": 0.1945280045838705, "grad_norm": 1.8464454412460327, "learning_rate": 0.00018617017599480682, "loss": 1.4654, "step": 1358 }, { "epoch": 0.19467125053717232, "grad_norm": 1.2090373039245605, "learning_rate": 0.00018614662379435129, "loss": 1.6454, "step": 1359 }, { "epoch": 0.19481449649047414, "grad_norm": 1.1983674764633179, "learning_rate": 0.0001861230530486594, "loss": 1.5215, "step": 1360 }, { "epoch": 0.19495774244377595, "grad_norm": 1.207505702972412, "learning_rate": 0.00018609946376280548, "loss": 1.6056, "step": 1361 }, { "epoch": 0.1951009883970778, "grad_norm": 1.3699766397476196, "learning_rate": 0.0001860758559418676, "loss": 1.5147, "step": 1362 }, { "epoch": 0.1952442343503796, "grad_norm": 1.1752371788024902, "learning_rate": 0.000186052229590928, "loss": 1.595, "step": 1363 }, { "epoch": 0.19538748030368142, "grad_norm": 1.3725600242614746, "learning_rate": 0.00018602858471507283, "loss": 1.4904, "step": 1364 }, { "epoch": 0.19553072625698323, "grad_norm": 1.1825995445251465, "learning_rate": 0.00018600492131939225, "loss": 1.4723, "step": 1365 }, { "epoch": 0.19567397221028507, "grad_norm": 1.0944961309432983, "learning_rate": 0.00018598123940898037, "loss": 1.7333, "step": 1366 }, { "epoch": 0.19581721816358688, "grad_norm": 1.216032862663269, "learning_rate": 0.00018595753898893537, "loss": 1.505, "step": 1367 }, { "epoch": 0.1959604641168887, "grad_norm": 1.5602285861968994, "learning_rate": 0.00018593382006435935, "loss": 1.4779, "step": 1368 }, { "epoch": 0.1961037100701905, "grad_norm": 1.3048970699310303, "learning_rate": 0.0001859100826403584, "loss": 1.2623, "step": 1369 }, { "epoch": 0.19624695602349235, "grad_norm": 1.774187684059143, "learning_rate": 0.00018588632672204264, "loss": 1.6226, "step": 1370 }, { "epoch": 0.19639020197679416, "grad_norm": 1.2090622186660767, "learning_rate": 0.00018586255231452605, "loss": 1.3823, "step": 1371 }, { "epoch": 0.19653344793009597, "grad_norm": 1.2232331037521362, "learning_rate": 0.0001858387594229267, "loss": 1.7424, "step": 1372 }, { "epoch": 0.19667669388339779, "grad_norm": 1.278077244758606, "learning_rate": 0.00018581494805236667, "loss": 1.4362, "step": 1373 }, { "epoch": 0.19681993983669963, "grad_norm": 1.2518329620361328, "learning_rate": 0.00018579111820797185, "loss": 1.5339, "step": 1374 }, { "epoch": 0.19696318579000144, "grad_norm": 1.1202784776687622, "learning_rate": 0.00018576726989487233, "loss": 1.5043, "step": 1375 }, { "epoch": 0.19710643174330325, "grad_norm": 1.3669075965881348, "learning_rate": 0.00018574340311820203, "loss": 1.6675, "step": 1376 }, { "epoch": 0.19724967769660506, "grad_norm": 1.0630940198898315, "learning_rate": 0.00018571951788309883, "loss": 1.4834, "step": 1377 }, { "epoch": 0.1973929236499069, "grad_norm": 1.0558778047561646, "learning_rate": 0.00018569561419470466, "loss": 1.8214, "step": 1378 }, { "epoch": 0.19753616960320872, "grad_norm": 1.340063452720642, "learning_rate": 0.00018567169205816538, "loss": 1.5403, "step": 1379 }, { "epoch": 0.19767941555651053, "grad_norm": 1.3751434087753296, "learning_rate": 0.00018564775147863086, "loss": 1.3535, "step": 1380 }, { "epoch": 0.19782266150981234, "grad_norm": 1.607695460319519, "learning_rate": 0.0001856237924612549, "loss": 1.522, "step": 1381 }, { "epoch": 0.19796590746311415, "grad_norm": 1.485645055770874, "learning_rate": 0.00018559981501119525, "loss": 1.8324, "step": 1382 }, { "epoch": 0.198109153416416, "grad_norm": 1.254434585571289, "learning_rate": 0.00018557581913361372, "loss": 1.4599, "step": 1383 }, { "epoch": 0.1982523993697178, "grad_norm": 1.3116018772125244, "learning_rate": 0.000185551804833676, "loss": 1.4589, "step": 1384 }, { "epoch": 0.19839564532301962, "grad_norm": 1.1751677989959717, "learning_rate": 0.00018552777211655182, "loss": 1.6581, "step": 1385 }, { "epoch": 0.19853889127632143, "grad_norm": 1.2518548965454102, "learning_rate": 0.00018550372098741474, "loss": 1.5799, "step": 1386 }, { "epoch": 0.19868213722962327, "grad_norm": 1.0701404809951782, "learning_rate": 0.00018547965145144244, "loss": 1.6265, "step": 1387 }, { "epoch": 0.1988253831829251, "grad_norm": 1.1967848539352417, "learning_rate": 0.00018545556351381643, "loss": 1.5453, "step": 1388 }, { "epoch": 0.1989686291362269, "grad_norm": 1.0598925352096558, "learning_rate": 0.00018543145717972234, "loss": 1.2879, "step": 1389 }, { "epoch": 0.1991118750895287, "grad_norm": 1.2547351121902466, "learning_rate": 0.00018540733245434962, "loss": 1.5942, "step": 1390 }, { "epoch": 0.19925512104283055, "grad_norm": 1.359018087387085, "learning_rate": 0.00018538318934289172, "loss": 1.5967, "step": 1391 }, { "epoch": 0.19939836699613236, "grad_norm": 1.3531522750854492, "learning_rate": 0.00018535902785054605, "loss": 1.6998, "step": 1392 }, { "epoch": 0.19954161294943418, "grad_norm": 1.4051594734191895, "learning_rate": 0.00018533484798251398, "loss": 1.5047, "step": 1393 }, { "epoch": 0.199684858902736, "grad_norm": 1.3112071752548218, "learning_rate": 0.00018531064974400087, "loss": 1.5499, "step": 1394 }, { "epoch": 0.19982810485603783, "grad_norm": 1.2580560445785522, "learning_rate": 0.000185286433140216, "loss": 1.5583, "step": 1395 }, { "epoch": 0.19997135080933964, "grad_norm": 1.6397414207458496, "learning_rate": 0.00018526219817637256, "loss": 1.7045, "step": 1396 }, { "epoch": 0.20011459676264146, "grad_norm": 1.1508907079696655, "learning_rate": 0.00018523794485768774, "loss": 1.7343, "step": 1397 }, { "epoch": 0.20025784271594327, "grad_norm": 1.1869248151779175, "learning_rate": 0.00018521367318938275, "loss": 1.7179, "step": 1398 }, { "epoch": 0.20040108866924508, "grad_norm": 1.2818737030029297, "learning_rate": 0.00018518938317668262, "loss": 1.6175, "step": 1399 }, { "epoch": 0.20054433462254692, "grad_norm": 1.1448330879211426, "learning_rate": 0.00018516507482481637, "loss": 1.6389, "step": 1400 }, { "epoch": 0.20068758057584873, "grad_norm": 1.2073677778244019, "learning_rate": 0.00018514074813901705, "loss": 1.5017, "step": 1401 }, { "epoch": 0.20083082652915055, "grad_norm": 1.501707911491394, "learning_rate": 0.00018511640312452156, "loss": 1.3922, "step": 1402 }, { "epoch": 0.20097407248245236, "grad_norm": 1.1832395792007446, "learning_rate": 0.0001850920397865708, "loss": 1.5244, "step": 1403 }, { "epoch": 0.2011173184357542, "grad_norm": 1.102522373199463, "learning_rate": 0.00018506765813040954, "loss": 1.4628, "step": 1404 }, { "epoch": 0.201260564389056, "grad_norm": 1.4263315200805664, "learning_rate": 0.00018504325816128662, "loss": 1.5894, "step": 1405 }, { "epoch": 0.20140381034235783, "grad_norm": 1.1822935342788696, "learning_rate": 0.00018501883988445466, "loss": 1.6237, "step": 1406 }, { "epoch": 0.20154705629565964, "grad_norm": 1.2093781232833862, "learning_rate": 0.00018499440330517039, "loss": 1.5436, "step": 1407 }, { "epoch": 0.20169030224896148, "grad_norm": 1.2069730758666992, "learning_rate": 0.00018496994842869438, "loss": 1.579, "step": 1408 }, { "epoch": 0.2018335482022633, "grad_norm": 1.0482884645462036, "learning_rate": 0.00018494547526029114, "loss": 1.6636, "step": 1409 }, { "epoch": 0.2019767941555651, "grad_norm": 1.1818374395370483, "learning_rate": 0.00018492098380522916, "loss": 1.6744, "step": 1410 }, { "epoch": 0.20212004010886692, "grad_norm": 1.8392456769943237, "learning_rate": 0.0001848964740687808, "loss": 1.6356, "step": 1411 }, { "epoch": 0.20226328606216876, "grad_norm": 1.1773076057434082, "learning_rate": 0.00018487194605622248, "loss": 1.7481, "step": 1412 }, { "epoch": 0.20240653201547057, "grad_norm": 0.96749347448349, "learning_rate": 0.00018484739977283444, "loss": 1.6481, "step": 1413 }, { "epoch": 0.20254977796877238, "grad_norm": 1.4052890539169312, "learning_rate": 0.00018482283522390085, "loss": 1.486, "step": 1414 }, { "epoch": 0.2026930239220742, "grad_norm": 1.329801082611084, "learning_rate": 0.00018479825241470986, "loss": 1.6224, "step": 1415 }, { "epoch": 0.202836269875376, "grad_norm": 1.041306734085083, "learning_rate": 0.00018477365135055357, "loss": 1.5605, "step": 1416 }, { "epoch": 0.20297951582867785, "grad_norm": 1.2327864170074463, "learning_rate": 0.00018474903203672796, "loss": 1.7122, "step": 1417 }, { "epoch": 0.20312276178197966, "grad_norm": 1.0725963115692139, "learning_rate": 0.00018472439447853297, "loss": 1.5483, "step": 1418 }, { "epoch": 0.20326600773528147, "grad_norm": 1.2916926145553589, "learning_rate": 0.00018469973868127246, "loss": 1.5684, "step": 1419 }, { "epoch": 0.20340925368858329, "grad_norm": 1.137190818786621, "learning_rate": 0.0001846750646502542, "loss": 1.4168, "step": 1420 }, { "epoch": 0.20355249964188513, "grad_norm": 1.351855993270874, "learning_rate": 0.0001846503723907899, "loss": 1.7641, "step": 1421 }, { "epoch": 0.20369574559518694, "grad_norm": 1.2285610437393188, "learning_rate": 0.00018462566190819522, "loss": 1.7989, "step": 1422 }, { "epoch": 0.20383899154848875, "grad_norm": 1.5112128257751465, "learning_rate": 0.00018460093320778968, "loss": 1.5881, "step": 1423 }, { "epoch": 0.20398223750179056, "grad_norm": 1.2674046754837036, "learning_rate": 0.00018457618629489673, "loss": 1.5889, "step": 1424 }, { "epoch": 0.2041254834550924, "grad_norm": 1.2024331092834473, "learning_rate": 0.00018455142117484386, "loss": 1.8703, "step": 1425 }, { "epoch": 0.20426872940839422, "grad_norm": 1.217358112335205, "learning_rate": 0.0001845266378529623, "loss": 1.5985, "step": 1426 }, { "epoch": 0.20441197536169603, "grad_norm": 1.5263713598251343, "learning_rate": 0.00018450183633458733, "loss": 1.8882, "step": 1427 }, { "epoch": 0.20455522131499784, "grad_norm": 1.003667950630188, "learning_rate": 0.0001844770166250581, "loss": 1.5212, "step": 1428 }, { "epoch": 0.20469846726829968, "grad_norm": 1.2690109014511108, "learning_rate": 0.00018445217872971767, "loss": 1.6261, "step": 1429 }, { "epoch": 0.2048417132216015, "grad_norm": 1.008750319480896, "learning_rate": 0.000184427322653913, "loss": 1.478, "step": 1430 }, { "epoch": 0.2049849591749033, "grad_norm": 1.1880033016204834, "learning_rate": 0.00018440244840299506, "loss": 1.4072, "step": 1431 }, { "epoch": 0.20512820512820512, "grad_norm": 1.5475003719329834, "learning_rate": 0.00018437755598231856, "loss": 1.4973, "step": 1432 }, { "epoch": 0.20527145108150693, "grad_norm": 1.1574499607086182, "learning_rate": 0.00018435264539724234, "loss": 1.5492, "step": 1433 }, { "epoch": 0.20541469703480877, "grad_norm": 1.3186848163604736, "learning_rate": 0.00018432771665312893, "loss": 1.4195, "step": 1434 }, { "epoch": 0.2055579429881106, "grad_norm": 1.1808315515518188, "learning_rate": 0.0001843027697553449, "loss": 1.6309, "step": 1435 }, { "epoch": 0.2057011889414124, "grad_norm": 1.3311676979064941, "learning_rate": 0.00018427780470926073, "loss": 1.5881, "step": 1436 }, { "epoch": 0.2058444348947142, "grad_norm": 1.3893495798110962, "learning_rate": 0.00018425282152025076, "loss": 1.3887, "step": 1437 }, { "epoch": 0.20598768084801605, "grad_norm": 1.5669349431991577, "learning_rate": 0.00018422782019369323, "loss": 1.4602, "step": 1438 }, { "epoch": 0.20613092680131787, "grad_norm": 1.0069324970245361, "learning_rate": 0.0001842028007349704, "loss": 1.8855, "step": 1439 }, { "epoch": 0.20627417275461968, "grad_norm": 1.1586416959762573, "learning_rate": 0.0001841777631494682, "loss": 1.4954, "step": 1440 }, { "epoch": 0.2064174187079215, "grad_norm": 1.324857473373413, "learning_rate": 0.00018415270744257667, "loss": 1.5312, "step": 1441 }, { "epoch": 0.20656066466122333, "grad_norm": 1.468889832496643, "learning_rate": 0.0001841276336196897, "loss": 1.516, "step": 1442 }, { "epoch": 0.20670391061452514, "grad_norm": 1.3050689697265625, "learning_rate": 0.00018410254168620504, "loss": 1.4701, "step": 1443 }, { "epoch": 0.20684715656782696, "grad_norm": 1.337196707725525, "learning_rate": 0.00018407743164752438, "loss": 1.4604, "step": 1444 }, { "epoch": 0.20699040252112877, "grad_norm": 1.1995974779129028, "learning_rate": 0.0001840523035090533, "loss": 1.6387, "step": 1445 }, { "epoch": 0.2071336484744306, "grad_norm": 1.3773281574249268, "learning_rate": 0.0001840271572762012, "loss": 1.5073, "step": 1446 }, { "epoch": 0.20727689442773242, "grad_norm": 1.3792911767959595, "learning_rate": 0.00018400199295438152, "loss": 1.4108, "step": 1447 }, { "epoch": 0.20742014038103423, "grad_norm": 1.5256664752960205, "learning_rate": 0.00018397681054901146, "loss": 1.5577, "step": 1448 }, { "epoch": 0.20756338633433605, "grad_norm": 1.257270336151123, "learning_rate": 0.0001839516100655122, "loss": 1.514, "step": 1449 }, { "epoch": 0.2077066322876379, "grad_norm": 1.4502323865890503, "learning_rate": 0.0001839263915093088, "loss": 1.5809, "step": 1450 }, { "epoch": 0.2078498782409397, "grad_norm": 1.4171514511108398, "learning_rate": 0.00018390115488583014, "loss": 1.6163, "step": 1451 }, { "epoch": 0.2079931241942415, "grad_norm": 1.5732930898666382, "learning_rate": 0.00018387590020050904, "loss": 1.5341, "step": 1452 }, { "epoch": 0.20813637014754333, "grad_norm": 1.4738789796829224, "learning_rate": 0.00018385062745878225, "loss": 1.5455, "step": 1453 }, { "epoch": 0.20827961610084514, "grad_norm": 1.3400347232818604, "learning_rate": 0.0001838253366660904, "loss": 1.661, "step": 1454 }, { "epoch": 0.20842286205414698, "grad_norm": 1.1181697845458984, "learning_rate": 0.00018380002782787783, "loss": 1.5108, "step": 1455 }, { "epoch": 0.2085661080074488, "grad_norm": 1.2964807748794556, "learning_rate": 0.00018377470094959307, "loss": 1.7491, "step": 1456 }, { "epoch": 0.2087093539607506, "grad_norm": 1.07728111743927, "learning_rate": 0.00018374935603668826, "loss": 1.7404, "step": 1457 }, { "epoch": 0.20885259991405242, "grad_norm": 1.277836561203003, "learning_rate": 0.00018372399309461962, "loss": 1.6554, "step": 1458 }, { "epoch": 0.20899584586735426, "grad_norm": 1.2218456268310547, "learning_rate": 0.00018369861212884706, "loss": 1.5702, "step": 1459 }, { "epoch": 0.20913909182065607, "grad_norm": 1.1546441316604614, "learning_rate": 0.00018367321314483452, "loss": 1.6073, "step": 1460 }, { "epoch": 0.20928233777395788, "grad_norm": 1.1945067644119263, "learning_rate": 0.0001836477961480498, "loss": 1.7721, "step": 1461 }, { "epoch": 0.2094255837272597, "grad_norm": 1.376602053642273, "learning_rate": 0.00018362236114396457, "loss": 1.4378, "step": 1462 }, { "epoch": 0.20956882968056154, "grad_norm": 1.144270420074463, "learning_rate": 0.00018359690813805427, "loss": 1.5511, "step": 1463 }, { "epoch": 0.20971207563386335, "grad_norm": 1.2560991048812866, "learning_rate": 0.00018357143713579837, "loss": 1.4585, "step": 1464 }, { "epoch": 0.20985532158716516, "grad_norm": 1.1994050741195679, "learning_rate": 0.0001835459481426801, "loss": 1.5581, "step": 1465 }, { "epoch": 0.20999856754046697, "grad_norm": 1.117190957069397, "learning_rate": 0.00018352044116418668, "loss": 1.5889, "step": 1466 }, { "epoch": 0.21014181349376881, "grad_norm": 1.4148699045181274, "learning_rate": 0.00018349491620580906, "loss": 1.4453, "step": 1467 }, { "epoch": 0.21028505944707063, "grad_norm": 1.1372407674789429, "learning_rate": 0.00018346937327304216, "loss": 1.4362, "step": 1468 }, { "epoch": 0.21042830540037244, "grad_norm": 1.087860107421875, "learning_rate": 0.00018344381237138472, "loss": 1.6302, "step": 1469 }, { "epoch": 0.21057155135367425, "grad_norm": 1.18148672580719, "learning_rate": 0.00018341823350633942, "loss": 1.6436, "step": 1470 }, { "epoch": 0.21071479730697606, "grad_norm": 1.1798086166381836, "learning_rate": 0.00018339263668341275, "loss": 1.6784, "step": 1471 }, { "epoch": 0.2108580432602779, "grad_norm": 1.1287727355957031, "learning_rate": 0.00018336702190811498, "loss": 1.5693, "step": 1472 }, { "epoch": 0.21100128921357972, "grad_norm": 1.2252769470214844, "learning_rate": 0.00018334138918596046, "loss": 1.5764, "step": 1473 }, { "epoch": 0.21114453516688153, "grad_norm": 1.0599393844604492, "learning_rate": 0.00018331573852246722, "loss": 1.4944, "step": 1474 }, { "epoch": 0.21128778112018334, "grad_norm": 1.0087907314300537, "learning_rate": 0.00018329006992315723, "loss": 1.4305, "step": 1475 }, { "epoch": 0.21143102707348518, "grad_norm": 1.403327465057373, "learning_rate": 0.00018326438339355628, "loss": 1.3829, "step": 1476 }, { "epoch": 0.211574273026787, "grad_norm": 1.25113844871521, "learning_rate": 0.00018323867893919405, "loss": 1.6329, "step": 1477 }, { "epoch": 0.2117175189800888, "grad_norm": 1.2075749635696411, "learning_rate": 0.00018321295656560408, "loss": 1.5226, "step": 1478 }, { "epoch": 0.21186076493339062, "grad_norm": 1.202713131904602, "learning_rate": 0.00018318721627832377, "loss": 1.5277, "step": 1479 }, { "epoch": 0.21200401088669246, "grad_norm": 1.574428915977478, "learning_rate": 0.0001831614580828944, "loss": 1.5132, "step": 1480 }, { "epoch": 0.21214725683999427, "grad_norm": 1.2128314971923828, "learning_rate": 0.000183135681984861, "loss": 1.7146, "step": 1481 }, { "epoch": 0.2122905027932961, "grad_norm": 1.147371768951416, "learning_rate": 0.00018310988798977255, "loss": 1.3821, "step": 1482 }, { "epoch": 0.2124337487465979, "grad_norm": 1.1978446245193481, "learning_rate": 0.00018308407610318183, "loss": 1.5223, "step": 1483 }, { "epoch": 0.21257699469989974, "grad_norm": 1.2576225996017456, "learning_rate": 0.00018305824633064557, "loss": 1.4436, "step": 1484 }, { "epoch": 0.21272024065320155, "grad_norm": 1.2156585454940796, "learning_rate": 0.00018303239867772426, "loss": 1.6271, "step": 1485 }, { "epoch": 0.21286348660650337, "grad_norm": 1.5877586603164673, "learning_rate": 0.00018300653314998224, "loss": 1.5013, "step": 1486 }, { "epoch": 0.21300673255980518, "grad_norm": 1.1044498682022095, "learning_rate": 0.00018298064975298773, "loss": 1.5872, "step": 1487 }, { "epoch": 0.213149978513107, "grad_norm": 1.3802251815795898, "learning_rate": 0.0001829547484923128, "loss": 1.4996, "step": 1488 }, { "epoch": 0.21329322446640883, "grad_norm": 1.81985342502594, "learning_rate": 0.00018292882937353326, "loss": 1.5557, "step": 1489 }, { "epoch": 0.21343647041971064, "grad_norm": 1.135331392288208, "learning_rate": 0.000182902892402229, "loss": 1.4416, "step": 1490 }, { "epoch": 0.21357971637301246, "grad_norm": 1.1704977750778198, "learning_rate": 0.0001828769375839835, "loss": 1.6421, "step": 1491 }, { "epoch": 0.21372296232631427, "grad_norm": 1.4549529552459717, "learning_rate": 0.00018285096492438424, "loss": 1.5246, "step": 1492 }, { "epoch": 0.2138662082796161, "grad_norm": 1.286210060119629, "learning_rate": 0.00018282497442902244, "loss": 1.4616, "step": 1493 }, { "epoch": 0.21400945423291792, "grad_norm": 1.360775351524353, "learning_rate": 0.0001827989661034933, "loss": 1.7217, "step": 1494 }, { "epoch": 0.21415270018621974, "grad_norm": 1.1904916763305664, "learning_rate": 0.00018277293995339565, "loss": 1.5762, "step": 1495 }, { "epoch": 0.21429594613952155, "grad_norm": 1.5662777423858643, "learning_rate": 0.00018274689598433237, "loss": 1.6667, "step": 1496 }, { "epoch": 0.2144391920928234, "grad_norm": 1.0565769672393799, "learning_rate": 0.0001827208342019101, "loss": 1.5429, "step": 1497 }, { "epoch": 0.2145824380461252, "grad_norm": 1.2067725658416748, "learning_rate": 0.00018269475461173918, "loss": 1.6204, "step": 1498 }, { "epoch": 0.21472568399942701, "grad_norm": 1.6487369537353516, "learning_rate": 0.000182668657219434, "loss": 1.497, "step": 1499 }, { "epoch": 0.21486892995272883, "grad_norm": 1.070554494857788, "learning_rate": 0.00018264254203061264, "loss": 1.615, "step": 1500 }, { "epoch": 0.21501217590603067, "grad_norm": 1.204064130783081, "learning_rate": 0.00018261640905089708, "loss": 1.4424, "step": 1501 }, { "epoch": 0.21515542185933248, "grad_norm": 1.4721273183822632, "learning_rate": 0.00018259025828591308, "loss": 1.6287, "step": 1502 }, { "epoch": 0.2152986678126343, "grad_norm": 1.1576553583145142, "learning_rate": 0.00018256408974129027, "loss": 1.6383, "step": 1503 }, { "epoch": 0.2154419137659361, "grad_norm": 1.3377002477645874, "learning_rate": 0.00018253790342266207, "loss": 1.5456, "step": 1504 }, { "epoch": 0.21558515971923792, "grad_norm": 1.1552419662475586, "learning_rate": 0.00018251169933566577, "loss": 1.5032, "step": 1505 }, { "epoch": 0.21572840567253976, "grad_norm": 1.3365596532821655, "learning_rate": 0.00018248547748594244, "loss": 1.1736, "step": 1506 }, { "epoch": 0.21587165162584157, "grad_norm": 1.254346489906311, "learning_rate": 0.00018245923787913704, "loss": 1.4876, "step": 1507 }, { "epoch": 0.21601489757914338, "grad_norm": 1.282131552696228, "learning_rate": 0.00018243298052089823, "loss": 1.6848, "step": 1508 }, { "epoch": 0.2161581435324452, "grad_norm": 1.6513956785202026, "learning_rate": 0.00018240670541687864, "loss": 1.5951, "step": 1509 }, { "epoch": 0.21630138948574704, "grad_norm": 1.2527356147766113, "learning_rate": 0.00018238041257273463, "loss": 1.605, "step": 1510 }, { "epoch": 0.21644463543904885, "grad_norm": 1.3012182712554932, "learning_rate": 0.00018235410199412636, "loss": 1.4072, "step": 1511 }, { "epoch": 0.21658788139235066, "grad_norm": 1.1916866302490234, "learning_rate": 0.0001823277736867179, "loss": 1.9058, "step": 1512 }, { "epoch": 0.21673112734565247, "grad_norm": 1.228234887123108, "learning_rate": 0.00018230142765617705, "loss": 1.5857, "step": 1513 }, { "epoch": 0.21687437329895431, "grad_norm": 1.2658697366714478, "learning_rate": 0.0001822750639081755, "loss": 1.7319, "step": 1514 }, { "epoch": 0.21701761925225613, "grad_norm": 1.0723098516464233, "learning_rate": 0.0001822486824483886, "loss": 1.401, "step": 1515 }, { "epoch": 0.21716086520555794, "grad_norm": 1.2387244701385498, "learning_rate": 0.0001822222832824958, "loss": 1.5509, "step": 1516 }, { "epoch": 0.21730411115885975, "grad_norm": 1.3989781141281128, "learning_rate": 0.00018219586641618005, "loss": 1.5491, "step": 1517 }, { "epoch": 0.2174473571121616, "grad_norm": 1.379372477531433, "learning_rate": 0.0001821694318551283, "loss": 1.6565, "step": 1518 }, { "epoch": 0.2175906030654634, "grad_norm": 1.1753172874450684, "learning_rate": 0.00018214297960503125, "loss": 1.5637, "step": 1519 }, { "epoch": 0.21773384901876522, "grad_norm": 1.3417823314666748, "learning_rate": 0.00018211650967158344, "loss": 1.7797, "step": 1520 }, { "epoch": 0.21787709497206703, "grad_norm": 1.1454881429672241, "learning_rate": 0.00018209002206048315, "loss": 1.4194, "step": 1521 }, { "epoch": 0.21802034092536887, "grad_norm": 1.2077876329421997, "learning_rate": 0.0001820635167774325, "loss": 1.6962, "step": 1522 }, { "epoch": 0.21816358687867068, "grad_norm": 1.1510627269744873, "learning_rate": 0.00018203699382813746, "loss": 1.3798, "step": 1523 }, { "epoch": 0.2183068328319725, "grad_norm": 1.35822594165802, "learning_rate": 0.00018201045321830775, "loss": 1.7364, "step": 1524 }, { "epoch": 0.2184500787852743, "grad_norm": 1.259592056274414, "learning_rate": 0.0001819838949536569, "loss": 1.5586, "step": 1525 }, { "epoch": 0.21859332473857612, "grad_norm": 1.089378833770752, "learning_rate": 0.00018195731903990225, "loss": 1.6026, "step": 1526 }, { "epoch": 0.21873657069187796, "grad_norm": 1.1910594701766968, "learning_rate": 0.00018193072548276494, "loss": 1.541, "step": 1527 }, { "epoch": 0.21887981664517978, "grad_norm": 1.1854430437088013, "learning_rate": 0.00018190411428796991, "loss": 1.6404, "step": 1528 }, { "epoch": 0.2190230625984816, "grad_norm": 1.0846129655838013, "learning_rate": 0.0001818774854612459, "loss": 1.6656, "step": 1529 }, { "epoch": 0.2191663085517834, "grad_norm": 1.1875724792480469, "learning_rate": 0.00018185083900832544, "loss": 1.6105, "step": 1530 }, { "epoch": 0.21930955450508524, "grad_norm": 0.983755886554718, "learning_rate": 0.00018182417493494478, "loss": 1.613, "step": 1531 }, { "epoch": 0.21945280045838705, "grad_norm": 1.209051251411438, "learning_rate": 0.00018179749324684412, "loss": 1.6166, "step": 1532 }, { "epoch": 0.21959604641168887, "grad_norm": 1.1180802583694458, "learning_rate": 0.00018177079394976736, "loss": 1.6125, "step": 1533 }, { "epoch": 0.21973929236499068, "grad_norm": 1.1822010278701782, "learning_rate": 0.00018174407704946212, "loss": 1.4294, "step": 1534 }, { "epoch": 0.21988253831829252, "grad_norm": 1.2946507930755615, "learning_rate": 0.00018171734255167997, "loss": 1.6211, "step": 1535 }, { "epoch": 0.22002578427159433, "grad_norm": 1.413060188293457, "learning_rate": 0.00018169059046217617, "loss": 1.3517, "step": 1536 }, { "epoch": 0.22016903022489615, "grad_norm": 1.380689263343811, "learning_rate": 0.00018166382078670977, "loss": 1.4929, "step": 1537 }, { "epoch": 0.22031227617819796, "grad_norm": 1.2837501764297485, "learning_rate": 0.0001816370335310436, "loss": 1.6481, "step": 1538 }, { "epoch": 0.2204555221314998, "grad_norm": 1.3572331666946411, "learning_rate": 0.00018161022870094432, "loss": 1.6932, "step": 1539 }, { "epoch": 0.2205987680848016, "grad_norm": 1.275476098060608, "learning_rate": 0.00018158340630218235, "loss": 1.5412, "step": 1540 }, { "epoch": 0.22074201403810342, "grad_norm": 1.2900543212890625, "learning_rate": 0.0001815565663405319, "loss": 1.6308, "step": 1541 }, { "epoch": 0.22088525999140524, "grad_norm": 1.2929813861846924, "learning_rate": 0.0001815297088217709, "loss": 1.559, "step": 1542 }, { "epoch": 0.22102850594470705, "grad_norm": 1.3224568367004395, "learning_rate": 0.00018150283375168114, "loss": 1.4038, "step": 1543 }, { "epoch": 0.2211717518980089, "grad_norm": 1.0380637645721436, "learning_rate": 0.00018147594113604817, "loss": 1.6327, "step": 1544 }, { "epoch": 0.2213149978513107, "grad_norm": 1.132380485534668, "learning_rate": 0.00018144903098066126, "loss": 1.3522, "step": 1545 }, { "epoch": 0.22145824380461251, "grad_norm": 1.3980847597122192, "learning_rate": 0.00018142210329131358, "loss": 1.5795, "step": 1546 }, { "epoch": 0.22160148975791433, "grad_norm": 1.3297758102416992, "learning_rate": 0.0001813951580738019, "loss": 1.5195, "step": 1547 }, { "epoch": 0.22174473571121617, "grad_norm": 1.2794740200042725, "learning_rate": 0.00018136819533392693, "loss": 1.5232, "step": 1548 }, { "epoch": 0.22188798166451798, "grad_norm": 1.3548502922058105, "learning_rate": 0.00018134121507749304, "loss": 1.7228, "step": 1549 }, { "epoch": 0.2220312276178198, "grad_norm": 1.2026373147964478, "learning_rate": 0.0001813142173103084, "loss": 1.6882, "step": 1550 }, { "epoch": 0.2221744735711216, "grad_norm": 1.4961111545562744, "learning_rate": 0.00018128720203818504, "loss": 1.8036, "step": 1551 }, { "epoch": 0.22231771952442345, "grad_norm": 1.2055609226226807, "learning_rate": 0.0001812601692669386, "loss": 1.7288, "step": 1552 }, { "epoch": 0.22246096547772526, "grad_norm": 1.0526481866836548, "learning_rate": 0.0001812331190023886, "loss": 1.5687, "step": 1553 }, { "epoch": 0.22260421143102707, "grad_norm": 1.5377341508865356, "learning_rate": 0.00018120605125035829, "loss": 1.6523, "step": 1554 }, { "epoch": 0.22274745738432888, "grad_norm": 1.0499385595321655, "learning_rate": 0.00018117896601667469, "loss": 1.5939, "step": 1555 }, { "epoch": 0.22289070333763072, "grad_norm": 1.143206238746643, "learning_rate": 0.00018115186330716854, "loss": 1.4929, "step": 1556 }, { "epoch": 0.22303394929093254, "grad_norm": 1.2637383937835693, "learning_rate": 0.00018112474312767445, "loss": 1.7014, "step": 1557 }, { "epoch": 0.22317719524423435, "grad_norm": 1.3360332250595093, "learning_rate": 0.00018109760548403066, "loss": 1.5883, "step": 1558 }, { "epoch": 0.22332044119753616, "grad_norm": 1.4672127962112427, "learning_rate": 0.00018107045038207931, "loss": 1.4252, "step": 1559 }, { "epoch": 0.22346368715083798, "grad_norm": 1.2584389448165894, "learning_rate": 0.00018104327782766615, "loss": 1.6417, "step": 1560 }, { "epoch": 0.22360693310413982, "grad_norm": 1.4231867790222168, "learning_rate": 0.00018101608782664078, "loss": 1.4703, "step": 1561 }, { "epoch": 0.22375017905744163, "grad_norm": 1.3920652866363525, "learning_rate": 0.00018098888038485652, "loss": 1.607, "step": 1562 }, { "epoch": 0.22389342501074344, "grad_norm": 1.2783403396606445, "learning_rate": 0.0001809616555081705, "loss": 1.5689, "step": 1563 }, { "epoch": 0.22403667096404525, "grad_norm": 1.2266747951507568, "learning_rate": 0.00018093441320244353, "loss": 1.502, "step": 1564 }, { "epoch": 0.2241799169173471, "grad_norm": 1.5030161142349243, "learning_rate": 0.00018090715347354023, "loss": 1.5151, "step": 1565 }, { "epoch": 0.2243231628706489, "grad_norm": 1.4128742218017578, "learning_rate": 0.0001808798763273289, "loss": 1.7918, "step": 1566 }, { "epoch": 0.22446640882395072, "grad_norm": 1.0876353979110718, "learning_rate": 0.0001808525817696817, "loss": 1.6094, "step": 1567 }, { "epoch": 0.22460965477725253, "grad_norm": 1.4517312049865723, "learning_rate": 0.0001808252698064744, "loss": 1.5349, "step": 1568 }, { "epoch": 0.22475290073055437, "grad_norm": 1.1075685024261475, "learning_rate": 0.00018079794044358668, "loss": 1.7668, "step": 1569 }, { "epoch": 0.22489614668385619, "grad_norm": 1.1838518381118774, "learning_rate": 0.00018077059368690174, "loss": 1.5717, "step": 1570 }, { "epoch": 0.225039392637158, "grad_norm": 1.39805006980896, "learning_rate": 0.00018074322954230677, "loss": 1.73, "step": 1571 }, { "epoch": 0.2251826385904598, "grad_norm": 1.677505373954773, "learning_rate": 0.0001807158480156926, "loss": 1.7034, "step": 1572 }, { "epoch": 0.22532588454376165, "grad_norm": 1.1713403463363647, "learning_rate": 0.00018068844911295372, "loss": 1.523, "step": 1573 }, { "epoch": 0.22546913049706346, "grad_norm": 1.4243546724319458, "learning_rate": 0.00018066103283998852, "loss": 1.5872, "step": 1574 }, { "epoch": 0.22561237645036528, "grad_norm": 1.1468417644500732, "learning_rate": 0.00018063359920269896, "loss": 1.5199, "step": 1575 }, { "epoch": 0.2257556224036671, "grad_norm": 1.1832425594329834, "learning_rate": 0.00018060614820699085, "loss": 1.5292, "step": 1576 }, { "epoch": 0.22589886835696893, "grad_norm": 1.1657671928405762, "learning_rate": 0.00018057867985877377, "loss": 1.7254, "step": 1577 }, { "epoch": 0.22604211431027074, "grad_norm": 1.1295803785324097, "learning_rate": 0.0001805511941639609, "loss": 1.5211, "step": 1578 }, { "epoch": 0.22618536026357255, "grad_norm": 1.0839145183563232, "learning_rate": 0.0001805236911284693, "loss": 1.3689, "step": 1579 }, { "epoch": 0.22632860621687437, "grad_norm": 1.048897624015808, "learning_rate": 0.00018049617075821962, "loss": 1.5025, "step": 1580 }, { "epoch": 0.22647185217017618, "grad_norm": 1.414628267288208, "learning_rate": 0.00018046863305913632, "loss": 1.7317, "step": 1581 }, { "epoch": 0.22661509812347802, "grad_norm": 1.267639398574829, "learning_rate": 0.00018044107803714764, "loss": 1.6149, "step": 1582 }, { "epoch": 0.22675834407677983, "grad_norm": 1.5322638750076294, "learning_rate": 0.00018041350569818546, "loss": 1.449, "step": 1583 }, { "epoch": 0.22690159003008165, "grad_norm": 1.297226071357727, "learning_rate": 0.00018038591604818544, "loss": 1.5791, "step": 1584 }, { "epoch": 0.22704483598338346, "grad_norm": 1.25324285030365, "learning_rate": 0.00018035830909308694, "loss": 1.5456, "step": 1585 }, { "epoch": 0.2271880819366853, "grad_norm": 1.1723058223724365, "learning_rate": 0.00018033068483883305, "loss": 1.4687, "step": 1586 }, { "epoch": 0.2273313278899871, "grad_norm": 1.394832968711853, "learning_rate": 0.00018030304329137055, "loss": 1.4738, "step": 1587 }, { "epoch": 0.22747457384328892, "grad_norm": 1.088680386543274, "learning_rate": 0.00018027538445665006, "loss": 1.5751, "step": 1588 }, { "epoch": 0.22761781979659074, "grad_norm": 1.1401309967041016, "learning_rate": 0.00018024770834062582, "loss": 1.6398, "step": 1589 }, { "epoch": 0.22776106574989258, "grad_norm": 1.405794620513916, "learning_rate": 0.00018022001494925576, "loss": 1.6903, "step": 1590 }, { "epoch": 0.2279043117031944, "grad_norm": 1.3240201473236084, "learning_rate": 0.00018019230428850165, "loss": 1.6153, "step": 1591 }, { "epoch": 0.2280475576564962, "grad_norm": 1.1839933395385742, "learning_rate": 0.00018016457636432884, "loss": 1.5717, "step": 1592 }, { "epoch": 0.22819080360979802, "grad_norm": 1.3327840566635132, "learning_rate": 0.00018013683118270652, "loss": 1.4738, "step": 1593 }, { "epoch": 0.22833404956309986, "grad_norm": 1.032558798789978, "learning_rate": 0.00018010906874960754, "loss": 1.5717, "step": 1594 }, { "epoch": 0.22847729551640167, "grad_norm": 1.2330785989761353, "learning_rate": 0.00018008128907100844, "loss": 1.711, "step": 1595 }, { "epoch": 0.22862054146970348, "grad_norm": 1.4311246871948242, "learning_rate": 0.0001800534921528895, "loss": 1.5531, "step": 1596 }, { "epoch": 0.2287637874230053, "grad_norm": 1.4510406255722046, "learning_rate": 0.00018002567800123474, "loss": 1.3147, "step": 1597 }, { "epoch": 0.2289070333763071, "grad_norm": 1.4501135349273682, "learning_rate": 0.00017999784662203178, "loss": 1.714, "step": 1598 }, { "epoch": 0.22905027932960895, "grad_norm": 1.0656812191009521, "learning_rate": 0.00017996999802127214, "loss": 1.5801, "step": 1599 }, { "epoch": 0.22919352528291076, "grad_norm": 1.459216594696045, "learning_rate": 0.00017994213220495084, "loss": 1.3665, "step": 1600 }, { "epoch": 0.22933677123621257, "grad_norm": 1.2282342910766602, "learning_rate": 0.00017991424917906677, "loss": 1.6697, "step": 1601 }, { "epoch": 0.22948001718951438, "grad_norm": 1.1494097709655762, "learning_rate": 0.0001798863489496224, "loss": 1.5463, "step": 1602 }, { "epoch": 0.22962326314281623, "grad_norm": 1.1695008277893066, "learning_rate": 0.00017985843152262397, "loss": 1.6297, "step": 1603 }, { "epoch": 0.22976650909611804, "grad_norm": 1.307349443435669, "learning_rate": 0.00017983049690408146, "loss": 1.5685, "step": 1604 }, { "epoch": 0.22990975504941985, "grad_norm": 1.2176740169525146, "learning_rate": 0.00017980254510000844, "loss": 1.5522, "step": 1605 }, { "epoch": 0.23005300100272166, "grad_norm": 1.3855626583099365, "learning_rate": 0.00017977457611642226, "loss": 1.4776, "step": 1606 }, { "epoch": 0.2301962469560235, "grad_norm": 1.0617706775665283, "learning_rate": 0.00017974658995934396, "loss": 1.6589, "step": 1607 }, { "epoch": 0.23033949290932532, "grad_norm": 1.2864140272140503, "learning_rate": 0.00017971858663479826, "loss": 1.5578, "step": 1608 }, { "epoch": 0.23048273886262713, "grad_norm": 1.2283658981323242, "learning_rate": 0.0001796905661488136, "loss": 1.3334, "step": 1609 }, { "epoch": 0.23062598481592894, "grad_norm": 1.1349698305130005, "learning_rate": 0.0001796625285074221, "loss": 1.6847, "step": 1610 }, { "epoch": 0.23076923076923078, "grad_norm": 1.145694613456726, "learning_rate": 0.0001796344737166595, "loss": 1.5393, "step": 1611 }, { "epoch": 0.2309124767225326, "grad_norm": 1.4207184314727783, "learning_rate": 0.0001796064017825654, "loss": 1.77, "step": 1612 }, { "epoch": 0.2310557226758344, "grad_norm": 1.2100273370742798, "learning_rate": 0.00017957831271118294, "loss": 1.6343, "step": 1613 }, { "epoch": 0.23119896862913622, "grad_norm": 1.1784988641738892, "learning_rate": 0.000179550206508559, "loss": 1.5121, "step": 1614 }, { "epoch": 0.23134221458243803, "grad_norm": 1.2938990592956543, "learning_rate": 0.00017952208318074416, "loss": 1.5468, "step": 1615 }, { "epoch": 0.23148546053573987, "grad_norm": 1.2966464757919312, "learning_rate": 0.0001794939427337927, "loss": 1.4341, "step": 1616 }, { "epoch": 0.23162870648904169, "grad_norm": 1.4292927980422974, "learning_rate": 0.0001794657851737625, "loss": 1.4455, "step": 1617 }, { "epoch": 0.2317719524423435, "grad_norm": 1.5883865356445312, "learning_rate": 0.00017943761050671526, "loss": 1.5759, "step": 1618 }, { "epoch": 0.2319151983956453, "grad_norm": 1.201147198677063, "learning_rate": 0.00017940941873871626, "loss": 1.652, "step": 1619 }, { "epoch": 0.23205844434894715, "grad_norm": 1.224470615386963, "learning_rate": 0.00017938120987583445, "loss": 1.6102, "step": 1620 }, { "epoch": 0.23220169030224896, "grad_norm": 1.3202130794525146, "learning_rate": 0.00017935298392414257, "loss": 1.6057, "step": 1621 }, { "epoch": 0.23234493625555078, "grad_norm": 1.2173559665679932, "learning_rate": 0.00017932474088971692, "loss": 1.5765, "step": 1622 }, { "epoch": 0.2324881822088526, "grad_norm": 1.1235346794128418, "learning_rate": 0.0001792964807786375, "loss": 1.5786, "step": 1623 }, { "epoch": 0.23263142816215443, "grad_norm": 0.9646559953689575, "learning_rate": 0.0001792682035969881, "loss": 1.5554, "step": 1624 }, { "epoch": 0.23277467411545624, "grad_norm": 1.3024460077285767, "learning_rate": 0.00017923990935085602, "loss": 1.3862, "step": 1625 }, { "epoch": 0.23291792006875806, "grad_norm": 1.2535837888717651, "learning_rate": 0.00017921159804633238, "loss": 1.717, "step": 1626 }, { "epoch": 0.23306116602205987, "grad_norm": 1.3135361671447754, "learning_rate": 0.00017918326968951182, "loss": 1.7622, "step": 1627 }, { "epoch": 0.2332044119753617, "grad_norm": 1.0134094953536987, "learning_rate": 0.00017915492428649277, "loss": 1.6624, "step": 1628 }, { "epoch": 0.23334765792866352, "grad_norm": 1.239324688911438, "learning_rate": 0.00017912656184337734, "loss": 1.6488, "step": 1629 }, { "epoch": 0.23349090388196533, "grad_norm": 1.1771478652954102, "learning_rate": 0.00017909818236627124, "loss": 1.4368, "step": 1630 }, { "epoch": 0.23363414983526715, "grad_norm": 1.2664926052093506, "learning_rate": 0.0001790697858612838, "loss": 1.4977, "step": 1631 }, { "epoch": 0.23377739578856896, "grad_norm": 1.3430542945861816, "learning_rate": 0.00017904137233452815, "loss": 1.5019, "step": 1632 }, { "epoch": 0.2339206417418708, "grad_norm": 1.1585997343063354, "learning_rate": 0.000179012941792121, "loss": 1.5009, "step": 1633 }, { "epoch": 0.2340638876951726, "grad_norm": 1.1066067218780518, "learning_rate": 0.00017898449424018278, "loss": 1.4445, "step": 1634 }, { "epoch": 0.23420713364847442, "grad_norm": 1.215885877609253, "learning_rate": 0.0001789560296848375, "loss": 1.4501, "step": 1635 }, { "epoch": 0.23435037960177624, "grad_norm": 1.283586859703064, "learning_rate": 0.00017892754813221288, "loss": 1.5474, "step": 1636 }, { "epoch": 0.23449362555507808, "grad_norm": 1.4083003997802734, "learning_rate": 0.0001788990495884403, "loss": 1.4071, "step": 1637 }, { "epoch": 0.2346368715083799, "grad_norm": 1.086255669593811, "learning_rate": 0.00017887053405965482, "loss": 1.3597, "step": 1638 }, { "epoch": 0.2347801174616817, "grad_norm": 1.1243165731430054, "learning_rate": 0.00017884200155199507, "loss": 1.6237, "step": 1639 }, { "epoch": 0.23492336341498352, "grad_norm": 0.9760535359382629, "learning_rate": 0.00017881345207160343, "loss": 1.613, "step": 1640 }, { "epoch": 0.23506660936828536, "grad_norm": 1.387522578239441, "learning_rate": 0.00017878488562462588, "loss": 1.4799, "step": 1641 }, { "epoch": 0.23520985532158717, "grad_norm": 1.0014728307724, "learning_rate": 0.00017875630221721208, "loss": 1.5854, "step": 1642 }, { "epoch": 0.23535310127488898, "grad_norm": 1.2150026559829712, "learning_rate": 0.00017872770185551535, "loss": 1.4556, "step": 1643 }, { "epoch": 0.2354963472281908, "grad_norm": 1.2404147386550903, "learning_rate": 0.00017869908454569257, "loss": 1.6322, "step": 1644 }, { "epoch": 0.23563959318149263, "grad_norm": 1.0801663398742676, "learning_rate": 0.00017867045029390445, "loss": 1.5882, "step": 1645 }, { "epoch": 0.23578283913479445, "grad_norm": 1.4824352264404297, "learning_rate": 0.00017864179910631514, "loss": 1.429, "step": 1646 }, { "epoch": 0.23592608508809626, "grad_norm": 1.1877515316009521, "learning_rate": 0.00017861313098909256, "loss": 1.5041, "step": 1647 }, { "epoch": 0.23606933104139807, "grad_norm": 1.504550576210022, "learning_rate": 0.00017858444594840827, "loss": 1.4743, "step": 1648 }, { "epoch": 0.2362125769946999, "grad_norm": 1.218879222869873, "learning_rate": 0.00017855574399043743, "loss": 1.5652, "step": 1649 }, { "epoch": 0.23635582294800173, "grad_norm": 1.43279230594635, "learning_rate": 0.00017852702512135884, "loss": 1.5999, "step": 1650 }, { "epoch": 0.23649906890130354, "grad_norm": 1.6319609880447388, "learning_rate": 0.00017849828934735497, "loss": 1.3448, "step": 1651 }, { "epoch": 0.23664231485460535, "grad_norm": 1.0608998537063599, "learning_rate": 0.00017846953667461198, "loss": 1.2955, "step": 1652 }, { "epoch": 0.23678556080790716, "grad_norm": 1.1964857578277588, "learning_rate": 0.00017844076710931954, "loss": 1.5142, "step": 1653 }, { "epoch": 0.236928806761209, "grad_norm": 1.2066811323165894, "learning_rate": 0.00017841198065767107, "loss": 1.558, "step": 1654 }, { "epoch": 0.23707205271451082, "grad_norm": 1.0557516813278198, "learning_rate": 0.00017838317732586352, "loss": 1.452, "step": 1655 }, { "epoch": 0.23721529866781263, "grad_norm": 1.3869155645370483, "learning_rate": 0.0001783543571200976, "loss": 1.5494, "step": 1656 }, { "epoch": 0.23735854462111444, "grad_norm": 1.2731993198394775, "learning_rate": 0.00017832552004657756, "loss": 1.5266, "step": 1657 }, { "epoch": 0.23750179057441628, "grad_norm": 1.2293092012405396, "learning_rate": 0.0001782966661115113, "loss": 1.6887, "step": 1658 }, { "epoch": 0.2376450365277181, "grad_norm": 1.3720958232879639, "learning_rate": 0.00017826779532111037, "loss": 1.37, "step": 1659 }, { "epoch": 0.2377882824810199, "grad_norm": 1.0763051509857178, "learning_rate": 0.00017823890768158996, "loss": 1.5682, "step": 1660 }, { "epoch": 0.23793152843432172, "grad_norm": 1.2451633214950562, "learning_rate": 0.0001782100031991688, "loss": 1.5352, "step": 1661 }, { "epoch": 0.23807477438762356, "grad_norm": 1.108236312866211, "learning_rate": 0.00017818108188006937, "loss": 1.6411, "step": 1662 }, { "epoch": 0.23821802034092537, "grad_norm": 1.0257371664047241, "learning_rate": 0.0001781521437305177, "loss": 1.3767, "step": 1663 }, { "epoch": 0.2383612662942272, "grad_norm": 0.9849429130554199, "learning_rate": 0.00017812318875674342, "loss": 1.6562, "step": 1664 }, { "epoch": 0.238504512247529, "grad_norm": 1.2490180730819702, "learning_rate": 0.00017809421696497987, "loss": 1.4807, "step": 1665 }, { "epoch": 0.23864775820083084, "grad_norm": 1.4023358821868896, "learning_rate": 0.00017806522836146395, "loss": 1.435, "step": 1666 }, { "epoch": 0.23879100415413265, "grad_norm": 1.1579004526138306, "learning_rate": 0.00017803622295243615, "loss": 1.6899, "step": 1667 }, { "epoch": 0.23893425010743446, "grad_norm": 1.039962887763977, "learning_rate": 0.00017800720074414068, "loss": 1.7318, "step": 1668 }, { "epoch": 0.23907749606073628, "grad_norm": 1.3556146621704102, "learning_rate": 0.00017797816174282524, "loss": 1.3855, "step": 1669 }, { "epoch": 0.2392207420140381, "grad_norm": 1.213050365447998, "learning_rate": 0.00017794910595474126, "loss": 1.4822, "step": 1670 }, { "epoch": 0.23936398796733993, "grad_norm": 1.2425786256790161, "learning_rate": 0.00017792003338614368, "loss": 1.6006, "step": 1671 }, { "epoch": 0.23950723392064174, "grad_norm": 1.3023830652236938, "learning_rate": 0.00017789094404329115, "loss": 1.4554, "step": 1672 }, { "epoch": 0.23965047987394356, "grad_norm": 1.195408582687378, "learning_rate": 0.0001778618379324459, "loss": 1.5476, "step": 1673 }, { "epoch": 0.23979372582724537, "grad_norm": 1.5445802211761475, "learning_rate": 0.00017783271505987367, "loss": 1.5358, "step": 1674 }, { "epoch": 0.2399369717805472, "grad_norm": 1.1423840522766113, "learning_rate": 0.00017780357543184397, "loss": 1.6489, "step": 1675 }, { "epoch": 0.24008021773384902, "grad_norm": 1.5170398950576782, "learning_rate": 0.00017777441905462982, "loss": 1.5494, "step": 1676 }, { "epoch": 0.24022346368715083, "grad_norm": 1.2335238456726074, "learning_rate": 0.00017774524593450785, "loss": 1.4334, "step": 1677 }, { "epoch": 0.24036670964045265, "grad_norm": 1.2570806741714478, "learning_rate": 0.00017771605607775834, "loss": 1.6468, "step": 1678 }, { "epoch": 0.2405099555937545, "grad_norm": 1.2512234449386597, "learning_rate": 0.00017768684949066515, "loss": 1.5807, "step": 1679 }, { "epoch": 0.2406532015470563, "grad_norm": 1.431533694267273, "learning_rate": 0.0001776576261795157, "loss": 1.5697, "step": 1680 }, { "epoch": 0.2407964475003581, "grad_norm": 1.2543574571609497, "learning_rate": 0.00017762838615060104, "loss": 1.4497, "step": 1681 }, { "epoch": 0.24093969345365993, "grad_norm": 1.1573550701141357, "learning_rate": 0.00017759912941021585, "loss": 1.6002, "step": 1682 }, { "epoch": 0.24108293940696177, "grad_norm": 1.457091212272644, "learning_rate": 0.0001775698559646584, "loss": 1.5237, "step": 1683 }, { "epoch": 0.24122618536026358, "grad_norm": 1.2515188455581665, "learning_rate": 0.00017754056582023052, "loss": 1.491, "step": 1684 }, { "epoch": 0.2413694313135654, "grad_norm": 1.0913156270980835, "learning_rate": 0.00017751125898323765, "loss": 1.4972, "step": 1685 }, { "epoch": 0.2415126772668672, "grad_norm": 1.0807026624679565, "learning_rate": 0.00017748193545998883, "loss": 1.7659, "step": 1686 }, { "epoch": 0.24165592322016902, "grad_norm": 1.176550269126892, "learning_rate": 0.00017745259525679666, "loss": 1.5744, "step": 1687 }, { "epoch": 0.24179916917347086, "grad_norm": 1.1048961877822876, "learning_rate": 0.0001774232383799774, "loss": 1.5629, "step": 1688 }, { "epoch": 0.24194241512677267, "grad_norm": 1.1970665454864502, "learning_rate": 0.00017739386483585083, "loss": 1.4883, "step": 1689 }, { "epoch": 0.24208566108007448, "grad_norm": 1.0201799869537354, "learning_rate": 0.00017736447463074037, "loss": 1.5804, "step": 1690 }, { "epoch": 0.2422289070333763, "grad_norm": 1.2771817445755005, "learning_rate": 0.000177335067770973, "loss": 1.61, "step": 1691 }, { "epoch": 0.24237215298667814, "grad_norm": 1.193446159362793, "learning_rate": 0.0001773056442628793, "loss": 1.5612, "step": 1692 }, { "epoch": 0.24251539893997995, "grad_norm": 1.093091607093811, "learning_rate": 0.00017727620411279337, "loss": 1.6041, "step": 1693 }, { "epoch": 0.24265864489328176, "grad_norm": 1.34632408618927, "learning_rate": 0.00017724674732705301, "loss": 1.6639, "step": 1694 }, { "epoch": 0.24280189084658357, "grad_norm": 1.0040169954299927, "learning_rate": 0.0001772172739119995, "loss": 1.5249, "step": 1695 }, { "epoch": 0.24294513679988541, "grad_norm": 1.1144871711730957, "learning_rate": 0.00017718778387397775, "loss": 1.4971, "step": 1696 }, { "epoch": 0.24308838275318723, "grad_norm": 1.087388038635254, "learning_rate": 0.00017715827721933623, "loss": 1.6966, "step": 1697 }, { "epoch": 0.24323162870648904, "grad_norm": 1.1827090978622437, "learning_rate": 0.00017712875395442702, "loss": 1.5629, "step": 1698 }, { "epoch": 0.24337487465979085, "grad_norm": 1.2033309936523438, "learning_rate": 0.0001770992140856057, "loss": 1.5561, "step": 1699 }, { "epoch": 0.2435181206130927, "grad_norm": 1.340083122253418, "learning_rate": 0.0001770696576192315, "loss": 1.5625, "step": 1700 }, { "epoch": 0.2436613665663945, "grad_norm": 1.171134114265442, "learning_rate": 0.00017704008456166718, "loss": 1.5161, "step": 1701 }, { "epoch": 0.24380461251969632, "grad_norm": 1.5419589281082153, "learning_rate": 0.00017701049491927916, "loss": 1.4079, "step": 1702 }, { "epoch": 0.24394785847299813, "grad_norm": 1.1987342834472656, "learning_rate": 0.00017698088869843722, "loss": 1.52, "step": 1703 }, { "epoch": 0.24409110442629997, "grad_norm": 1.0823737382888794, "learning_rate": 0.00017695126590551495, "loss": 1.5527, "step": 1704 }, { "epoch": 0.24423435037960178, "grad_norm": 1.11407470703125, "learning_rate": 0.00017692162654688943, "loss": 1.5949, "step": 1705 }, { "epoch": 0.2443775963329036, "grad_norm": 1.3796123266220093, "learning_rate": 0.00017689197062894116, "loss": 1.571, "step": 1706 }, { "epoch": 0.2445208422862054, "grad_norm": 1.1390141248703003, "learning_rate": 0.00017686229815805441, "loss": 1.5961, "step": 1707 }, { "epoch": 0.24466408823950722, "grad_norm": 1.210781216621399, "learning_rate": 0.00017683260914061694, "loss": 1.6433, "step": 1708 }, { "epoch": 0.24480733419280906, "grad_norm": 1.1193804740905762, "learning_rate": 0.00017680290358302, "loss": 1.5977, "step": 1709 }, { "epoch": 0.24495058014611087, "grad_norm": 1.3393750190734863, "learning_rate": 0.00017677318149165853, "loss": 1.7253, "step": 1710 }, { "epoch": 0.2450938260994127, "grad_norm": 1.3882328271865845, "learning_rate": 0.00017674344287293089, "loss": 1.6147, "step": 1711 }, { "epoch": 0.2452370720527145, "grad_norm": 1.2727457284927368, "learning_rate": 0.00017671368773323912, "loss": 1.6332, "step": 1712 }, { "epoch": 0.24538031800601634, "grad_norm": 1.394894003868103, "learning_rate": 0.00017668391607898878, "loss": 1.5285, "step": 1713 }, { "epoch": 0.24552356395931815, "grad_norm": 1.038539171218872, "learning_rate": 0.00017665412791658888, "loss": 1.6235, "step": 1714 }, { "epoch": 0.24566680991261997, "grad_norm": 1.1938345432281494, "learning_rate": 0.00017662432325245218, "loss": 1.5873, "step": 1715 }, { "epoch": 0.24581005586592178, "grad_norm": 1.3228634595870972, "learning_rate": 0.00017659450209299484, "loss": 1.5004, "step": 1716 }, { "epoch": 0.24595330181922362, "grad_norm": 1.508263349533081, "learning_rate": 0.00017656466444463658, "loss": 1.5517, "step": 1717 }, { "epoch": 0.24609654777252543, "grad_norm": 1.3848106861114502, "learning_rate": 0.0001765348103138008, "loss": 1.4009, "step": 1718 }, { "epoch": 0.24623979372582724, "grad_norm": 1.3678864240646362, "learning_rate": 0.0001765049397069143, "loss": 1.5484, "step": 1719 }, { "epoch": 0.24638303967912906, "grad_norm": 1.1628413200378418, "learning_rate": 0.00017647505263040743, "loss": 1.5201, "step": 1720 }, { "epoch": 0.2465262856324309, "grad_norm": 1.2587339878082275, "learning_rate": 0.00017644514909071423, "loss": 1.4622, "step": 1721 }, { "epoch": 0.2466695315857327, "grad_norm": 1.0266104936599731, "learning_rate": 0.00017641522909427214, "loss": 1.5732, "step": 1722 }, { "epoch": 0.24681277753903452, "grad_norm": 1.1377971172332764, "learning_rate": 0.00017638529264752225, "loss": 1.5676, "step": 1723 }, { "epoch": 0.24695602349233634, "grad_norm": 1.0568901300430298, "learning_rate": 0.00017635533975690905, "loss": 1.7269, "step": 1724 }, { "epoch": 0.24709926944563815, "grad_norm": 1.209952473640442, "learning_rate": 0.0001763253704288807, "loss": 1.7144, "step": 1725 }, { "epoch": 0.24724251539894, "grad_norm": 1.2651304006576538, "learning_rate": 0.00017629538466988888, "loss": 1.5507, "step": 1726 }, { "epoch": 0.2473857613522418, "grad_norm": 1.3310706615447998, "learning_rate": 0.00017626538248638872, "loss": 1.7686, "step": 1727 }, { "epoch": 0.2475290073055436, "grad_norm": 1.2588021755218506, "learning_rate": 0.00017623536388483905, "loss": 1.5916, "step": 1728 }, { "epoch": 0.24767225325884543, "grad_norm": 1.0571792125701904, "learning_rate": 0.000176205328871702, "loss": 1.6852, "step": 1729 }, { "epoch": 0.24781549921214727, "grad_norm": 1.2348414659500122, "learning_rate": 0.00017617527745344348, "loss": 1.7275, "step": 1730 }, { "epoch": 0.24795874516544908, "grad_norm": 1.1996864080429077, "learning_rate": 0.00017614520963653277, "loss": 1.5245, "step": 1731 }, { "epoch": 0.2481019911187509, "grad_norm": 1.2950910329818726, "learning_rate": 0.0001761151254274427, "loss": 1.5075, "step": 1732 }, { "epoch": 0.2482452370720527, "grad_norm": 1.2490193843841553, "learning_rate": 0.0001760850248326497, "loss": 1.6575, "step": 1733 }, { "epoch": 0.24838848302535455, "grad_norm": 1.1346381902694702, "learning_rate": 0.00017605490785863368, "loss": 1.4507, "step": 1734 }, { "epoch": 0.24853172897865636, "grad_norm": 1.2079449892044067, "learning_rate": 0.00017602477451187802, "loss": 1.4945, "step": 1735 }, { "epoch": 0.24867497493195817, "grad_norm": 1.185489296913147, "learning_rate": 0.00017599462479886974, "loss": 1.5029, "step": 1736 }, { "epoch": 0.24881822088525998, "grad_norm": 1.0169425010681152, "learning_rate": 0.00017596445872609934, "loss": 1.6727, "step": 1737 }, { "epoch": 0.24896146683856182, "grad_norm": 1.2236487865447998, "learning_rate": 0.0001759342763000608, "loss": 1.5149, "step": 1738 }, { "epoch": 0.24910471279186364, "grad_norm": 1.16829252243042, "learning_rate": 0.00017590407752725166, "loss": 1.5967, "step": 1739 }, { "epoch": 0.24924795874516545, "grad_norm": 1.145586609840393, "learning_rate": 0.00017587386241417293, "loss": 1.4762, "step": 1740 }, { "epoch": 0.24939120469846726, "grad_norm": 1.0992064476013184, "learning_rate": 0.00017584363096732926, "loss": 1.6881, "step": 1741 }, { "epoch": 0.24953445065176907, "grad_norm": 1.1603144407272339, "learning_rate": 0.00017581338319322862, "loss": 1.5734, "step": 1742 }, { "epoch": 0.24967769660507091, "grad_norm": 1.15859055519104, "learning_rate": 0.0001757831190983827, "loss": 1.4693, "step": 1743 }, { "epoch": 0.24982094255837273, "grad_norm": 1.2367185354232788, "learning_rate": 0.00017575283868930658, "loss": 1.5508, "step": 1744 }, { "epoch": 0.24996418851167454, "grad_norm": 2.133371591567993, "learning_rate": 0.0001757225419725189, "loss": 1.5486, "step": 1745 }, { "epoch": 0.25010743446497635, "grad_norm": 1.1628042459487915, "learning_rate": 0.00017569222895454174, "loss": 1.5028, "step": 1746 }, { "epoch": 0.2502506804182782, "grad_norm": 1.169555425643921, "learning_rate": 0.0001756618996419008, "loss": 1.6879, "step": 1747 }, { "epoch": 0.25039392637158, "grad_norm": 1.1798641681671143, "learning_rate": 0.00017563155404112522, "loss": 1.4251, "step": 1748 }, { "epoch": 0.2505371723248818, "grad_norm": 1.4336174726486206, "learning_rate": 0.00017560119215874763, "loss": 1.6031, "step": 1749 }, { "epoch": 0.25068041827818366, "grad_norm": 1.2663016319274902, "learning_rate": 0.0001755708140013042, "loss": 1.638, "step": 1750 }, { "epoch": 0.25082366423148544, "grad_norm": 1.095772624015808, "learning_rate": 0.0001755404195753346, "loss": 1.532, "step": 1751 }, { "epoch": 0.2509669101847873, "grad_norm": 1.0250905752182007, "learning_rate": 0.000175510008887382, "loss": 1.5567, "step": 1752 }, { "epoch": 0.2511101561380891, "grad_norm": 1.054046392440796, "learning_rate": 0.00017547958194399308, "loss": 1.7423, "step": 1753 }, { "epoch": 0.2512534020913909, "grad_norm": 1.39911687374115, "learning_rate": 0.000175449138751718, "loss": 1.5785, "step": 1754 }, { "epoch": 0.25139664804469275, "grad_norm": 1.3293801546096802, "learning_rate": 0.00017541867931711042, "loss": 1.4454, "step": 1755 }, { "epoch": 0.25153989399799453, "grad_norm": 1.0630862712860107, "learning_rate": 0.0001753882036467275, "loss": 1.3955, "step": 1756 }, { "epoch": 0.2516831399512964, "grad_norm": 1.1266579627990723, "learning_rate": 0.00017535771174712988, "loss": 1.5938, "step": 1757 }, { "epoch": 0.2518263859045982, "grad_norm": 1.2599951028823853, "learning_rate": 0.0001753272036248818, "loss": 1.6036, "step": 1758 }, { "epoch": 0.2519696318579, "grad_norm": 1.29502534866333, "learning_rate": 0.00017529667928655078, "loss": 1.5951, "step": 1759 }, { "epoch": 0.25211287781120184, "grad_norm": 1.2225018739700317, "learning_rate": 0.00017526613873870806, "loss": 1.4859, "step": 1760 }, { "epoch": 0.2522561237645036, "grad_norm": 1.1359866857528687, "learning_rate": 0.00017523558198792816, "loss": 1.5753, "step": 1761 }, { "epoch": 0.25239936971780547, "grad_norm": 1.1231074333190918, "learning_rate": 0.0001752050090407893, "loss": 1.4674, "step": 1762 }, { "epoch": 0.2525426156711073, "grad_norm": 1.0896810293197632, "learning_rate": 0.000175174419903873, "loss": 1.3369, "step": 1763 }, { "epoch": 0.2526858616244091, "grad_norm": 1.0263469219207764, "learning_rate": 0.00017514381458376437, "loss": 1.4904, "step": 1764 }, { "epoch": 0.25282910757771093, "grad_norm": 1.04498291015625, "learning_rate": 0.00017511319308705198, "loss": 1.5756, "step": 1765 }, { "epoch": 0.2529723535310128, "grad_norm": 1.157073736190796, "learning_rate": 0.0001750825554203279, "loss": 1.5642, "step": 1766 }, { "epoch": 0.25311559948431456, "grad_norm": 1.7774399518966675, "learning_rate": 0.00017505190159018764, "loss": 1.5844, "step": 1767 }, { "epoch": 0.2532588454376164, "grad_norm": 1.69309401512146, "learning_rate": 0.0001750212316032302, "loss": 1.5229, "step": 1768 }, { "epoch": 0.2534020913909182, "grad_norm": 0.99266117811203, "learning_rate": 0.00017499054546605812, "loss": 1.5448, "step": 1769 }, { "epoch": 0.25354533734422, "grad_norm": 1.3999426364898682, "learning_rate": 0.0001749598431852773, "loss": 1.6646, "step": 1770 }, { "epoch": 0.25368858329752186, "grad_norm": 1.0492762327194214, "learning_rate": 0.00017492912476749722, "loss": 1.5, "step": 1771 }, { "epoch": 0.25383182925082365, "grad_norm": 1.188376545906067, "learning_rate": 0.00017489839021933077, "loss": 1.5195, "step": 1772 }, { "epoch": 0.2539750752041255, "grad_norm": 1.1024399995803833, "learning_rate": 0.00017486763954739436, "loss": 1.4209, "step": 1773 }, { "epoch": 0.25411832115742733, "grad_norm": 1.016438364982605, "learning_rate": 0.00017483687275830783, "loss": 1.5207, "step": 1774 }, { "epoch": 0.2542615671107291, "grad_norm": 1.0224852561950684, "learning_rate": 0.00017480608985869452, "loss": 1.4632, "step": 1775 }, { "epoch": 0.25440481306403095, "grad_norm": 1.3319289684295654, "learning_rate": 0.00017477529085518128, "loss": 1.5625, "step": 1776 }, { "epoch": 0.25454805901733274, "grad_norm": 1.487630009651184, "learning_rate": 0.00017474447575439826, "loss": 1.6192, "step": 1777 }, { "epoch": 0.2546913049706346, "grad_norm": 1.419559121131897, "learning_rate": 0.00017471364456297925, "loss": 1.4102, "step": 1778 }, { "epoch": 0.2548345509239364, "grad_norm": 0.9936947822570801, "learning_rate": 0.00017468279728756147, "loss": 1.5845, "step": 1779 }, { "epoch": 0.2549777968772382, "grad_norm": 1.27677321434021, "learning_rate": 0.00017465193393478555, "loss": 1.467, "step": 1780 }, { "epoch": 0.25512104283054005, "grad_norm": 1.1609877347946167, "learning_rate": 0.0001746210545112956, "loss": 1.3479, "step": 1781 }, { "epoch": 0.25526428878384183, "grad_norm": 1.2775970697402954, "learning_rate": 0.00017459015902373916, "loss": 1.5774, "step": 1782 }, { "epoch": 0.25540753473714367, "grad_norm": 0.9970040321350098, "learning_rate": 0.00017455924747876734, "loss": 1.527, "step": 1783 }, { "epoch": 0.2555507806904455, "grad_norm": 1.3389463424682617, "learning_rate": 0.00017452831988303458, "loss": 1.5131, "step": 1784 }, { "epoch": 0.2556940266437473, "grad_norm": 1.401968002319336, "learning_rate": 0.0001744973762431988, "loss": 1.5363, "step": 1785 }, { "epoch": 0.25583727259704914, "grad_norm": 1.3293735980987549, "learning_rate": 0.0001744664165659215, "loss": 1.6003, "step": 1786 }, { "epoch": 0.255980518550351, "grad_norm": 1.1416658163070679, "learning_rate": 0.00017443544085786746, "loss": 1.658, "step": 1787 }, { "epoch": 0.25612376450365276, "grad_norm": 1.2585612535476685, "learning_rate": 0.000174404449125705, "loss": 1.5122, "step": 1788 }, { "epoch": 0.2562670104569546, "grad_norm": 1.2260617017745972, "learning_rate": 0.00017437344137610582, "loss": 1.5141, "step": 1789 }, { "epoch": 0.2564102564102564, "grad_norm": 1.256299614906311, "learning_rate": 0.0001743424176157452, "loss": 1.5242, "step": 1790 }, { "epoch": 0.25655350236355823, "grad_norm": 1.1625959873199463, "learning_rate": 0.00017431137785130178, "loss": 1.5617, "step": 1791 }, { "epoch": 0.25669674831686007, "grad_norm": 1.1972097158432007, "learning_rate": 0.00017428032208945758, "loss": 1.6801, "step": 1792 }, { "epoch": 0.25683999427016185, "grad_norm": 1.440742015838623, "learning_rate": 0.00017424925033689826, "loss": 1.5273, "step": 1793 }, { "epoch": 0.2569832402234637, "grad_norm": 1.1814721822738647, "learning_rate": 0.00017421816260031265, "loss": 1.3023, "step": 1794 }, { "epoch": 0.25712648617676553, "grad_norm": 1.1623408794403076, "learning_rate": 0.00017418705888639328, "loss": 1.5432, "step": 1795 }, { "epoch": 0.2572697321300673, "grad_norm": 1.268006443977356, "learning_rate": 0.00017415593920183596, "loss": 1.4923, "step": 1796 }, { "epoch": 0.25741297808336916, "grad_norm": 1.0926141738891602, "learning_rate": 0.00017412480355334005, "loss": 1.5446, "step": 1797 }, { "epoch": 0.25755622403667094, "grad_norm": 1.3597006797790527, "learning_rate": 0.00017409365194760818, "loss": 1.5101, "step": 1798 }, { "epoch": 0.2576994699899728, "grad_norm": 1.4750958681106567, "learning_rate": 0.00017406248439134662, "loss": 1.5201, "step": 1799 }, { "epoch": 0.2578427159432746, "grad_norm": 1.4153499603271484, "learning_rate": 0.0001740313008912649, "loss": 1.5829, "step": 1800 }, { "epoch": 0.2579859618965764, "grad_norm": 1.1922829151153564, "learning_rate": 0.0001740001014540761, "loss": 1.3815, "step": 1801 }, { "epoch": 0.25812920784987825, "grad_norm": 1.1595674753189087, "learning_rate": 0.0001739688860864967, "loss": 1.5962, "step": 1802 }, { "epoch": 0.25827245380318004, "grad_norm": 1.176070213317871, "learning_rate": 0.00017393765479524652, "loss": 1.6371, "step": 1803 }, { "epoch": 0.2584156997564819, "grad_norm": 1.294527292251587, "learning_rate": 0.000173906407587049, "loss": 1.4008, "step": 1804 }, { "epoch": 0.2585589457097837, "grad_norm": 1.2687008380889893, "learning_rate": 0.00017387514446863078, "loss": 1.8178, "step": 1805 }, { "epoch": 0.2587021916630855, "grad_norm": 1.0846542119979858, "learning_rate": 0.0001738438654467221, "loss": 1.617, "step": 1806 }, { "epoch": 0.25884543761638734, "grad_norm": 0.9451043009757996, "learning_rate": 0.00017381257052805652, "loss": 1.6088, "step": 1807 }, { "epoch": 0.2589886835696892, "grad_norm": 1.077477216720581, "learning_rate": 0.0001737812597193711, "loss": 1.3866, "step": 1808 }, { "epoch": 0.25913192952299097, "grad_norm": 1.4381864070892334, "learning_rate": 0.0001737499330274063, "loss": 1.6399, "step": 1809 }, { "epoch": 0.2592751754762928, "grad_norm": 1.3173840045928955, "learning_rate": 0.0001737185904589059, "loss": 1.4959, "step": 1810 }, { "epoch": 0.2594184214295946, "grad_norm": 1.2679394483566284, "learning_rate": 0.00017368723202061724, "loss": 1.6117, "step": 1811 }, { "epoch": 0.25956166738289643, "grad_norm": 1.32146418094635, "learning_rate": 0.000173655857719291, "loss": 1.547, "step": 1812 }, { "epoch": 0.2597049133361983, "grad_norm": 1.3836877346038818, "learning_rate": 0.00017362446756168128, "loss": 1.655, "step": 1813 }, { "epoch": 0.25984815928950006, "grad_norm": 1.3847954273223877, "learning_rate": 0.00017359306155454563, "loss": 1.4543, "step": 1814 }, { "epoch": 0.2599914052428019, "grad_norm": 1.118926763534546, "learning_rate": 0.00017356163970464497, "loss": 1.6009, "step": 1815 }, { "epoch": 0.2601346511961037, "grad_norm": 1.17046320438385, "learning_rate": 0.00017353020201874367, "loss": 1.7248, "step": 1816 }, { "epoch": 0.2602778971494055, "grad_norm": 1.0434117317199707, "learning_rate": 0.00017349874850360942, "loss": 1.6388, "step": 1817 }, { "epoch": 0.26042114310270736, "grad_norm": 1.6090402603149414, "learning_rate": 0.00017346727916601345, "loss": 1.7155, "step": 1818 }, { "epoch": 0.26056438905600915, "grad_norm": 1.1824876070022583, "learning_rate": 0.00017343579401273034, "loss": 1.6479, "step": 1819 }, { "epoch": 0.260707635009311, "grad_norm": 1.5818202495574951, "learning_rate": 0.00017340429305053801, "loss": 1.5324, "step": 1820 }, { "epoch": 0.26085088096261283, "grad_norm": 1.1676079034805298, "learning_rate": 0.00017337277628621785, "loss": 1.5877, "step": 1821 }, { "epoch": 0.2609941269159146, "grad_norm": 1.0375807285308838, "learning_rate": 0.00017334124372655465, "loss": 1.512, "step": 1822 }, { "epoch": 0.26113737286921646, "grad_norm": 1.2700793743133545, "learning_rate": 0.00017330969537833658, "loss": 1.3492, "step": 1823 }, { "epoch": 0.26128061882251824, "grad_norm": 1.253221869468689, "learning_rate": 0.00017327813124835525, "loss": 1.5618, "step": 1824 }, { "epoch": 0.2614238647758201, "grad_norm": 1.3921515941619873, "learning_rate": 0.0001732465513434056, "loss": 1.3573, "step": 1825 }, { "epoch": 0.2615671107291219, "grad_norm": 1.1654937267303467, "learning_rate": 0.00017321495567028606, "loss": 1.5101, "step": 1826 }, { "epoch": 0.2617103566824237, "grad_norm": 1.394979476928711, "learning_rate": 0.0001731833442357983, "loss": 1.504, "step": 1827 }, { "epoch": 0.26185360263572555, "grad_norm": 1.1021249294281006, "learning_rate": 0.00017315171704674755, "loss": 1.7578, "step": 1828 }, { "epoch": 0.2619968485890274, "grad_norm": 1.193483591079712, "learning_rate": 0.00017312007410994235, "loss": 1.5183, "step": 1829 }, { "epoch": 0.26214009454232917, "grad_norm": 1.2689810991287231, "learning_rate": 0.0001730884154321946, "loss": 1.418, "step": 1830 }, { "epoch": 0.262283340495631, "grad_norm": 1.2471082210540771, "learning_rate": 0.0001730567410203197, "loss": 1.5506, "step": 1831 }, { "epoch": 0.2624265864489328, "grad_norm": 1.299802541732788, "learning_rate": 0.00017302505088113634, "loss": 1.575, "step": 1832 }, { "epoch": 0.26256983240223464, "grad_norm": 1.3700257539749146, "learning_rate": 0.0001729933450214666, "loss": 1.7662, "step": 1833 }, { "epoch": 0.2627130783555365, "grad_norm": 1.193460464477539, "learning_rate": 0.00017296162344813598, "loss": 1.6429, "step": 1834 }, { "epoch": 0.26285632430883826, "grad_norm": 1.0370267629623413, "learning_rate": 0.00017292988616797336, "loss": 1.556, "step": 1835 }, { "epoch": 0.2629995702621401, "grad_norm": 1.2014391422271729, "learning_rate": 0.00017289813318781098, "loss": 1.3148, "step": 1836 }, { "epoch": 0.2631428162154419, "grad_norm": 1.160791039466858, "learning_rate": 0.00017286636451448447, "loss": 1.8107, "step": 1837 }, { "epoch": 0.26328606216874373, "grad_norm": 1.1856682300567627, "learning_rate": 0.00017283458015483283, "loss": 1.5535, "step": 1838 }, { "epoch": 0.26342930812204557, "grad_norm": 1.1617056131362915, "learning_rate": 0.00017280278011569847, "loss": 1.4539, "step": 1839 }, { "epoch": 0.26357255407534735, "grad_norm": 1.1685471534729004, "learning_rate": 0.00017277096440392717, "loss": 1.6533, "step": 1840 }, { "epoch": 0.2637158000286492, "grad_norm": 1.128612756729126, "learning_rate": 0.00017273913302636798, "loss": 1.5026, "step": 1841 }, { "epoch": 0.26385904598195103, "grad_norm": 1.3776590824127197, "learning_rate": 0.00017270728598987352, "loss": 1.4068, "step": 1842 }, { "epoch": 0.2640022919352528, "grad_norm": 1.2262424230575562, "learning_rate": 0.00017267542330129957, "loss": 1.3894, "step": 1843 }, { "epoch": 0.26414553788855466, "grad_norm": 1.2362022399902344, "learning_rate": 0.00017264354496750544, "loss": 1.5976, "step": 1844 }, { "epoch": 0.26428878384185645, "grad_norm": 1.6354926824569702, "learning_rate": 0.0001726116509953537, "loss": 1.3878, "step": 1845 }, { "epoch": 0.2644320297951583, "grad_norm": 0.9997718930244446, "learning_rate": 0.0001725797413917104, "loss": 1.4714, "step": 1846 }, { "epoch": 0.2645752757484601, "grad_norm": 1.2306793928146362, "learning_rate": 0.00017254781616344485, "loss": 1.6471, "step": 1847 }, { "epoch": 0.2647185217017619, "grad_norm": 1.2498055696487427, "learning_rate": 0.00017251587531742971, "loss": 1.6575, "step": 1848 }, { "epoch": 0.26486176765506375, "grad_norm": 1.4215936660766602, "learning_rate": 0.00017248391886054114, "loss": 1.5578, "step": 1849 }, { "epoch": 0.26500501360836554, "grad_norm": 1.362534523010254, "learning_rate": 0.00017245194679965854, "loss": 1.4693, "step": 1850 }, { "epoch": 0.2651482595616674, "grad_norm": 1.0063817501068115, "learning_rate": 0.00017241995914166468, "loss": 1.7323, "step": 1851 }, { "epoch": 0.2652915055149692, "grad_norm": 1.6111321449279785, "learning_rate": 0.00017238795589344575, "loss": 1.4514, "step": 1852 }, { "epoch": 0.265434751468271, "grad_norm": 1.1574232578277588, "learning_rate": 0.00017235593706189123, "loss": 1.6687, "step": 1853 }, { "epoch": 0.26557799742157284, "grad_norm": 1.1148895025253296, "learning_rate": 0.000172323902653894, "loss": 1.5066, "step": 1854 }, { "epoch": 0.2657212433748747, "grad_norm": 1.0806540250778198, "learning_rate": 0.00017229185267635027, "loss": 1.6064, "step": 1855 }, { "epoch": 0.26586448932817647, "grad_norm": 1.039340615272522, "learning_rate": 0.00017225978713615958, "loss": 1.6162, "step": 1856 }, { "epoch": 0.2660077352814783, "grad_norm": 1.3512998819351196, "learning_rate": 0.0001722277060402249, "loss": 1.5065, "step": 1857 }, { "epoch": 0.2661509812347801, "grad_norm": 1.0789835453033447, "learning_rate": 0.00017219560939545246, "loss": 1.5708, "step": 1858 }, { "epoch": 0.26629422718808193, "grad_norm": 1.0697288513183594, "learning_rate": 0.0001721634972087519, "loss": 1.5624, "step": 1859 }, { "epoch": 0.2664374731413838, "grad_norm": 1.2168853282928467, "learning_rate": 0.0001721313694870361, "loss": 1.4506, "step": 1860 }, { "epoch": 0.26658071909468556, "grad_norm": 1.103982925415039, "learning_rate": 0.00017209922623722147, "loss": 1.5687, "step": 1861 }, { "epoch": 0.2667239650479874, "grad_norm": 1.3929922580718994, "learning_rate": 0.0001720670674662276, "loss": 1.6432, "step": 1862 }, { "epoch": 0.26686721100128924, "grad_norm": 1.2387193441390991, "learning_rate": 0.00017203489318097753, "loss": 1.5898, "step": 1863 }, { "epoch": 0.267010456954591, "grad_norm": 1.0843274593353271, "learning_rate": 0.00017200270338839748, "loss": 1.6399, "step": 1864 }, { "epoch": 0.26715370290789286, "grad_norm": 1.4103184938430786, "learning_rate": 0.00017197049809541717, "loss": 1.6718, "step": 1865 }, { "epoch": 0.26729694886119465, "grad_norm": 1.2349909543991089, "learning_rate": 0.00017193827730896968, "loss": 1.4149, "step": 1866 }, { "epoch": 0.2674401948144965, "grad_norm": 1.2657188177108765, "learning_rate": 0.00017190604103599127, "loss": 1.4987, "step": 1867 }, { "epoch": 0.26758344076779833, "grad_norm": 1.0359469652175903, "learning_rate": 0.0001718737892834216, "loss": 1.5554, "step": 1868 }, { "epoch": 0.2677266867211001, "grad_norm": 3.0905113220214844, "learning_rate": 0.00017184152205820368, "loss": 1.5449, "step": 1869 }, { "epoch": 0.26786993267440196, "grad_norm": 1.1746270656585693, "learning_rate": 0.00017180923936728387, "loss": 1.5703, "step": 1870 }, { "epoch": 0.26801317862770374, "grad_norm": 1.328507900238037, "learning_rate": 0.00017177694121761188, "loss": 1.5026, "step": 1871 }, { "epoch": 0.2681564245810056, "grad_norm": 1.324629783630371, "learning_rate": 0.00017174462761614058, "loss": 1.376, "step": 1872 }, { "epoch": 0.2682996705343074, "grad_norm": 1.1610565185546875, "learning_rate": 0.00017171229856982639, "loss": 1.3519, "step": 1873 }, { "epoch": 0.2684429164876092, "grad_norm": 1.3978536128997803, "learning_rate": 0.0001716799540856289, "loss": 1.5496, "step": 1874 }, { "epoch": 0.26858616244091105, "grad_norm": 1.245882511138916, "learning_rate": 0.00017164759417051114, "loss": 1.5168, "step": 1875 }, { "epoch": 0.2687294083942129, "grad_norm": 1.0929161310195923, "learning_rate": 0.00017161521883143934, "loss": 1.6212, "step": 1876 }, { "epoch": 0.2688726543475147, "grad_norm": 1.3749136924743652, "learning_rate": 0.00017158282807538312, "loss": 1.604, "step": 1877 }, { "epoch": 0.2690159003008165, "grad_norm": 1.451964020729065, "learning_rate": 0.00017155042190931542, "loss": 1.7127, "step": 1878 }, { "epoch": 0.2691591462541183, "grad_norm": 1.3378633260726929, "learning_rate": 0.00017151800034021248, "loss": 1.5764, "step": 1879 }, { "epoch": 0.26930239220742014, "grad_norm": 1.1931617259979248, "learning_rate": 0.00017148556337505386, "loss": 1.544, "step": 1880 }, { "epoch": 0.269445638160722, "grad_norm": 1.2224552631378174, "learning_rate": 0.00017145311102082243, "loss": 1.6095, "step": 1881 }, { "epoch": 0.26958888411402376, "grad_norm": 1.4737969636917114, "learning_rate": 0.00017142064328450442, "loss": 1.3622, "step": 1882 }, { "epoch": 0.2697321300673256, "grad_norm": 1.128121018409729, "learning_rate": 0.00017138816017308925, "loss": 1.4319, "step": 1883 }, { "epoch": 0.26987537602062744, "grad_norm": 1.3335082530975342, "learning_rate": 0.00017135566169356983, "loss": 1.4306, "step": 1884 }, { "epoch": 0.27001862197392923, "grad_norm": 1.1364240646362305, "learning_rate": 0.00017132314785294218, "loss": 1.6516, "step": 1885 }, { "epoch": 0.27016186792723107, "grad_norm": 1.3981781005859375, "learning_rate": 0.00017129061865820582, "loss": 1.5686, "step": 1886 }, { "epoch": 0.27030511388053285, "grad_norm": 1.2379019260406494, "learning_rate": 0.0001712580741163634, "loss": 1.4441, "step": 1887 }, { "epoch": 0.2704483598338347, "grad_norm": 1.1378390789031982, "learning_rate": 0.00017122551423442097, "loss": 1.4012, "step": 1888 }, { "epoch": 0.27059160578713654, "grad_norm": 1.2723342180252075, "learning_rate": 0.0001711929390193879, "loss": 1.3448, "step": 1889 }, { "epoch": 0.2707348517404383, "grad_norm": 1.3424850702285767, "learning_rate": 0.0001711603484782768, "loss": 1.5587, "step": 1890 }, { "epoch": 0.27087809769374016, "grad_norm": 1.1520439386367798, "learning_rate": 0.00017112774261810365, "loss": 1.4291, "step": 1891 }, { "epoch": 0.27102134364704195, "grad_norm": 1.2069145441055298, "learning_rate": 0.00017109512144588762, "loss": 1.4241, "step": 1892 }, { "epoch": 0.2711645896003438, "grad_norm": 1.4305672645568848, "learning_rate": 0.0001710624849686513, "loss": 1.5851, "step": 1893 }, { "epoch": 0.2713078355536456, "grad_norm": 1.2192423343658447, "learning_rate": 0.00017102983319342046, "loss": 1.7795, "step": 1894 }, { "epoch": 0.2714510815069474, "grad_norm": 1.0592432022094727, "learning_rate": 0.00017099716612722427, "loss": 1.5891, "step": 1895 }, { "epoch": 0.27159432746024925, "grad_norm": 1.1209107637405396, "learning_rate": 0.00017096448377709514, "loss": 1.3765, "step": 1896 }, { "epoch": 0.2717375734135511, "grad_norm": 1.2197773456573486, "learning_rate": 0.00017093178615006872, "loss": 1.4884, "step": 1897 }, { "epoch": 0.2718808193668529, "grad_norm": 1.066419243812561, "learning_rate": 0.00017089907325318403, "loss": 1.3621, "step": 1898 }, { "epoch": 0.2720240653201547, "grad_norm": 1.3226433992385864, "learning_rate": 0.00017086634509348337, "loss": 1.5844, "step": 1899 }, { "epoch": 0.2721673112734565, "grad_norm": 1.277147889137268, "learning_rate": 0.00017083360167801225, "loss": 1.5643, "step": 1900 }, { "epoch": 0.27231055722675834, "grad_norm": 1.161695957183838, "learning_rate": 0.00017080084301381956, "loss": 1.391, "step": 1901 }, { "epoch": 0.2724538031800602, "grad_norm": 1.0963467359542847, "learning_rate": 0.0001707680691079574, "loss": 1.3788, "step": 1902 }, { "epoch": 0.27259704913336197, "grad_norm": 1.3407822847366333, "learning_rate": 0.00017073527996748123, "loss": 1.606, "step": 1903 }, { "epoch": 0.2727402950866638, "grad_norm": 1.5943769216537476, "learning_rate": 0.0001707024755994497, "loss": 1.58, "step": 1904 }, { "epoch": 0.2728835410399656, "grad_norm": 1.1913115978240967, "learning_rate": 0.00017066965601092478, "loss": 1.6907, "step": 1905 }, { "epoch": 0.27302678699326743, "grad_norm": 1.2669463157653809, "learning_rate": 0.00017063682120897175, "loss": 1.6308, "step": 1906 }, { "epoch": 0.2731700329465693, "grad_norm": 1.4237977266311646, "learning_rate": 0.00017060397120065909, "loss": 1.5553, "step": 1907 }, { "epoch": 0.27331327889987106, "grad_norm": 1.0792198181152344, "learning_rate": 0.0001705711059930586, "loss": 1.5744, "step": 1908 }, { "epoch": 0.2734565248531729, "grad_norm": 1.2365907430648804, "learning_rate": 0.00017053822559324536, "loss": 1.4551, "step": 1909 }, { "epoch": 0.27359977080647474, "grad_norm": 1.0911829471588135, "learning_rate": 0.0001705053300082977, "loss": 1.5115, "step": 1910 }, { "epoch": 0.2737430167597765, "grad_norm": 1.2523435354232788, "learning_rate": 0.00017047241924529721, "loss": 1.497, "step": 1911 }, { "epoch": 0.27388626271307837, "grad_norm": 1.2732208967208862, "learning_rate": 0.00017043949331132878, "loss": 1.6094, "step": 1912 }, { "epoch": 0.27402950866638015, "grad_norm": 1.2161403894424438, "learning_rate": 0.00017040655221348057, "loss": 1.4482, "step": 1913 }, { "epoch": 0.274172754619682, "grad_norm": 1.1641910076141357, "learning_rate": 0.00017037359595884394, "loss": 1.7193, "step": 1914 }, { "epoch": 0.27431600057298383, "grad_norm": 1.3806520700454712, "learning_rate": 0.0001703406245545136, "loss": 1.5051, "step": 1915 }, { "epoch": 0.2744592465262856, "grad_norm": 1.1608500480651855, "learning_rate": 0.00017030763800758743, "loss": 1.5453, "step": 1916 }, { "epoch": 0.27460249247958746, "grad_norm": 1.0880365371704102, "learning_rate": 0.00017027463632516665, "loss": 1.2591, "step": 1917 }, { "epoch": 0.2747457384328893, "grad_norm": 1.606038212776184, "learning_rate": 0.00017024161951435568, "loss": 1.5996, "step": 1918 }, { "epoch": 0.2748889843861911, "grad_norm": 0.9285503625869751, "learning_rate": 0.00017020858758226229, "loss": 1.5651, "step": 1919 }, { "epoch": 0.2750322303394929, "grad_norm": 1.110451579093933, "learning_rate": 0.0001701755405359973, "loss": 1.5984, "step": 1920 }, { "epoch": 0.2751754762927947, "grad_norm": 1.4041178226470947, "learning_rate": 0.00017014247838267508, "loss": 1.5439, "step": 1921 }, { "epoch": 0.27531872224609655, "grad_norm": 1.0831619501113892, "learning_rate": 0.000170109401129413, "loss": 1.6313, "step": 1922 }, { "epoch": 0.2754619681993984, "grad_norm": 1.0522408485412598, "learning_rate": 0.0001700763087833318, "loss": 1.6533, "step": 1923 }, { "epoch": 0.2756052141527002, "grad_norm": 1.0715692043304443, "learning_rate": 0.00017004320135155542, "loss": 1.5457, "step": 1924 }, { "epoch": 0.275748460106002, "grad_norm": 1.3700939416885376, "learning_rate": 0.0001700100788412111, "loss": 1.3969, "step": 1925 }, { "epoch": 0.2758917060593038, "grad_norm": 1.4491013288497925, "learning_rate": 0.0001699769412594293, "loss": 1.4905, "step": 1926 }, { "epoch": 0.27603495201260564, "grad_norm": 1.1049504280090332, "learning_rate": 0.0001699437886133437, "loss": 1.4132, "step": 1927 }, { "epoch": 0.2761781979659075, "grad_norm": 1.2595082521438599, "learning_rate": 0.00016991062091009122, "loss": 1.5063, "step": 1928 }, { "epoch": 0.27632144391920926, "grad_norm": 1.4148094654083252, "learning_rate": 0.0001698774381568121, "loss": 1.589, "step": 1929 }, { "epoch": 0.2764646898725111, "grad_norm": 1.2725532054901123, "learning_rate": 0.00016984424036064975, "loss": 1.683, "step": 1930 }, { "epoch": 0.27660793582581295, "grad_norm": 1.1298161745071411, "learning_rate": 0.0001698110275287508, "loss": 1.4674, "step": 1931 }, { "epoch": 0.27675118177911473, "grad_norm": 1.0645012855529785, "learning_rate": 0.00016977779966826522, "loss": 1.6289, "step": 1932 }, { "epoch": 0.27689442773241657, "grad_norm": 1.1700057983398438, "learning_rate": 0.00016974455678634608, "loss": 1.5735, "step": 1933 }, { "epoch": 0.27703767368571836, "grad_norm": 1.0079413652420044, "learning_rate": 0.0001697112988901498, "loss": 1.4934, "step": 1934 }, { "epoch": 0.2771809196390202, "grad_norm": 1.1607884168624878, "learning_rate": 0.00016967802598683593, "loss": 1.6628, "step": 1935 }, { "epoch": 0.27732416559232204, "grad_norm": 1.0606021881103516, "learning_rate": 0.00016964473808356735, "loss": 1.6346, "step": 1936 }, { "epoch": 0.2774674115456238, "grad_norm": 1.1543084383010864, "learning_rate": 0.00016961143518751004, "loss": 1.5079, "step": 1937 }, { "epoch": 0.27761065749892566, "grad_norm": 1.1040798425674438, "learning_rate": 0.0001695781173058334, "loss": 1.4451, "step": 1938 }, { "epoch": 0.2777539034522275, "grad_norm": 1.569643497467041, "learning_rate": 0.0001695447844457099, "loss": 1.4647, "step": 1939 }, { "epoch": 0.2778971494055293, "grad_norm": 1.18843674659729, "learning_rate": 0.00016951143661431524, "loss": 1.5009, "step": 1940 }, { "epoch": 0.2780403953588311, "grad_norm": 1.3136463165283203, "learning_rate": 0.00016947807381882844, "loss": 1.6103, "step": 1941 }, { "epoch": 0.2781836413121329, "grad_norm": 1.235777497291565, "learning_rate": 0.00016944469606643167, "loss": 1.6389, "step": 1942 }, { "epoch": 0.27832688726543475, "grad_norm": 1.279746413230896, "learning_rate": 0.00016941130336431032, "loss": 1.4525, "step": 1943 }, { "epoch": 0.2784701332187366, "grad_norm": 1.1871589422225952, "learning_rate": 0.00016937789571965303, "loss": 1.5545, "step": 1944 }, { "epoch": 0.2786133791720384, "grad_norm": 1.261185646057129, "learning_rate": 0.0001693444731396516, "loss": 1.6802, "step": 1945 }, { "epoch": 0.2787566251253402, "grad_norm": 1.47560453414917, "learning_rate": 0.00016931103563150112, "loss": 1.3487, "step": 1946 }, { "epoch": 0.278899871078642, "grad_norm": 1.3664454221725464, "learning_rate": 0.00016927758320239988, "loss": 1.4633, "step": 1947 }, { "epoch": 0.27904311703194384, "grad_norm": 1.2071691751480103, "learning_rate": 0.00016924411585954928, "loss": 1.6243, "step": 1948 }, { "epoch": 0.2791863629852457, "grad_norm": 0.9982584714889526, "learning_rate": 0.00016921063361015413, "loss": 1.5383, "step": 1949 }, { "epoch": 0.27932960893854747, "grad_norm": 1.2211787700653076, "learning_rate": 0.00016917713646142222, "loss": 1.7046, "step": 1950 }, { "epoch": 0.2794728548918493, "grad_norm": 1.3502827882766724, "learning_rate": 0.0001691436244205647, "loss": 1.7373, "step": 1951 }, { "epoch": 0.27961610084515115, "grad_norm": 1.5361008644104004, "learning_rate": 0.00016911009749479586, "loss": 1.4525, "step": 1952 }, { "epoch": 0.27975934679845293, "grad_norm": 1.3411003351211548, "learning_rate": 0.00016907655569133327, "loss": 1.5494, "step": 1953 }, { "epoch": 0.2799025927517548, "grad_norm": 1.2605154514312744, "learning_rate": 0.0001690429990173976, "loss": 1.5698, "step": 1954 }, { "epoch": 0.28004583870505656, "grad_norm": 1.1115424633026123, "learning_rate": 0.0001690094274802128, "loss": 1.6592, "step": 1955 }, { "epoch": 0.2801890846583584, "grad_norm": 1.273525595664978, "learning_rate": 0.00016897584108700598, "loss": 1.5399, "step": 1956 }, { "epoch": 0.28033233061166024, "grad_norm": 0.9633428454399109, "learning_rate": 0.00016894223984500746, "loss": 1.572, "step": 1957 }, { "epoch": 0.280475576564962, "grad_norm": 1.0412379503250122, "learning_rate": 0.00016890862376145075, "loss": 1.5685, "step": 1958 }, { "epoch": 0.28061882251826387, "grad_norm": 1.1368736028671265, "learning_rate": 0.00016887499284357258, "loss": 1.4473, "step": 1959 }, { "epoch": 0.28076206847156565, "grad_norm": 1.1595324277877808, "learning_rate": 0.00016884134709861285, "loss": 1.501, "step": 1960 }, { "epoch": 0.2809053144248675, "grad_norm": 1.57081937789917, "learning_rate": 0.00016880768653381462, "loss": 1.4562, "step": 1961 }, { "epoch": 0.28104856037816933, "grad_norm": 1.3805454969406128, "learning_rate": 0.0001687740111564242, "loss": 1.4706, "step": 1962 }, { "epoch": 0.2811918063314711, "grad_norm": 1.4245744943618774, "learning_rate": 0.00016874032097369113, "loss": 1.4477, "step": 1963 }, { "epoch": 0.28133505228477296, "grad_norm": 1.2686541080474854, "learning_rate": 0.00016870661599286798, "loss": 1.6281, "step": 1964 }, { "epoch": 0.2814782982380748, "grad_norm": 1.3416484594345093, "learning_rate": 0.0001686728962212106, "loss": 1.6984, "step": 1965 }, { "epoch": 0.2816215441913766, "grad_norm": 1.14289391040802, "learning_rate": 0.0001686391616659781, "loss": 1.4695, "step": 1966 }, { "epoch": 0.2817647901446784, "grad_norm": 1.4048131704330444, "learning_rate": 0.00016860541233443263, "loss": 1.5697, "step": 1967 }, { "epoch": 0.2819080360979802, "grad_norm": 1.1891977787017822, "learning_rate": 0.0001685716482338396, "loss": 1.6293, "step": 1968 }, { "epoch": 0.28205128205128205, "grad_norm": 1.0610711574554443, "learning_rate": 0.0001685378693714676, "loss": 1.6318, "step": 1969 }, { "epoch": 0.2821945280045839, "grad_norm": 1.237954020500183, "learning_rate": 0.00016850407575458838, "loss": 1.4891, "step": 1970 }, { "epoch": 0.2823377739578857, "grad_norm": 1.3440676927566528, "learning_rate": 0.00016847026739047683, "loss": 1.5435, "step": 1971 }, { "epoch": 0.2824810199111875, "grad_norm": 1.228694200515747, "learning_rate": 0.00016843644428641113, "loss": 1.5258, "step": 1972 }, { "epoch": 0.28262426586448935, "grad_norm": 1.042784333229065, "learning_rate": 0.00016840260644967247, "loss": 1.5328, "step": 1973 }, { "epoch": 0.28276751181779114, "grad_norm": 1.305555820465088, "learning_rate": 0.00016836875388754537, "loss": 1.5631, "step": 1974 }, { "epoch": 0.282910757771093, "grad_norm": 1.3586883544921875, "learning_rate": 0.00016833488660731742, "loss": 1.4806, "step": 1975 }, { "epoch": 0.28305400372439476, "grad_norm": 1.212345004081726, "learning_rate": 0.0001683010046162794, "loss": 1.4711, "step": 1976 }, { "epoch": 0.2831972496776966, "grad_norm": 1.4201418161392212, "learning_rate": 0.0001682671079217253, "loss": 1.5687, "step": 1977 }, { "epoch": 0.28334049563099845, "grad_norm": 1.244446039199829, "learning_rate": 0.00016823319653095218, "loss": 1.4643, "step": 1978 }, { "epoch": 0.28348374158430023, "grad_norm": 1.2243931293487549, "learning_rate": 0.00016819927045126035, "loss": 1.4978, "step": 1979 }, { "epoch": 0.28362698753760207, "grad_norm": 1.0532848834991455, "learning_rate": 0.00016816532968995328, "loss": 1.4825, "step": 1980 }, { "epoch": 0.28377023349090386, "grad_norm": 1.3386598825454712, "learning_rate": 0.00016813137425433758, "loss": 1.5976, "step": 1981 }, { "epoch": 0.2839134794442057, "grad_norm": 1.3692759275436401, "learning_rate": 0.00016809740415172297, "loss": 1.5005, "step": 1982 }, { "epoch": 0.28405672539750754, "grad_norm": 1.152801275253296, "learning_rate": 0.0001680634193894224, "loss": 1.3208, "step": 1983 }, { "epoch": 0.2841999713508093, "grad_norm": 1.2282850742340088, "learning_rate": 0.00016802941997475196, "loss": 1.5888, "step": 1984 }, { "epoch": 0.28434321730411116, "grad_norm": 1.0203220844268799, "learning_rate": 0.00016799540591503083, "loss": 1.526, "step": 1985 }, { "epoch": 0.284486463257413, "grad_norm": 1.1952900886535645, "learning_rate": 0.00016796137721758142, "loss": 1.5155, "step": 1986 }, { "epoch": 0.2846297092107148, "grad_norm": 1.1555944681167603, "learning_rate": 0.00016792733388972932, "loss": 1.6648, "step": 1987 }, { "epoch": 0.28477295516401663, "grad_norm": 1.1344774961471558, "learning_rate": 0.00016789327593880318, "loss": 1.5524, "step": 1988 }, { "epoch": 0.2849162011173184, "grad_norm": 1.2828137874603271, "learning_rate": 0.0001678592033721348, "loss": 1.6899, "step": 1989 }, { "epoch": 0.28505944707062025, "grad_norm": 1.2367011308670044, "learning_rate": 0.0001678251161970592, "loss": 1.5394, "step": 1990 }, { "epoch": 0.2852026930239221, "grad_norm": 1.3519110679626465, "learning_rate": 0.00016779101442091447, "loss": 1.6153, "step": 1991 }, { "epoch": 0.2853459389772239, "grad_norm": 1.4142125844955444, "learning_rate": 0.00016775689805104192, "loss": 1.4675, "step": 1992 }, { "epoch": 0.2854891849305257, "grad_norm": 1.0034823417663574, "learning_rate": 0.00016772276709478597, "loss": 1.4704, "step": 1993 }, { "epoch": 0.28563243088382756, "grad_norm": 1.1246708631515503, "learning_rate": 0.00016768862155949413, "loss": 1.632, "step": 1994 }, { "epoch": 0.28577567683712934, "grad_norm": 1.2918721437454224, "learning_rate": 0.00016765446145251706, "loss": 1.7127, "step": 1995 }, { "epoch": 0.2859189227904312, "grad_norm": 1.1178702116012573, "learning_rate": 0.00016762028678120867, "loss": 1.727, "step": 1996 }, { "epoch": 0.28606216874373297, "grad_norm": 1.1690654754638672, "learning_rate": 0.00016758609755292584, "loss": 1.313, "step": 1997 }, { "epoch": 0.2862054146970348, "grad_norm": 1.0547363758087158, "learning_rate": 0.00016755189377502876, "loss": 1.44, "step": 1998 }, { "epoch": 0.28634866065033665, "grad_norm": 1.1478240489959717, "learning_rate": 0.00016751767545488056, "loss": 1.5424, "step": 1999 }, { "epoch": 0.28649190660363844, "grad_norm": 1.341033697128296, "learning_rate": 0.00016748344259984762, "loss": 1.4876, "step": 2000 }, { "epoch": 0.2866351525569403, "grad_norm": 1.1681870222091675, "learning_rate": 0.00016744919521729948, "loss": 1.5186, "step": 2001 }, { "epoch": 0.28677839851024206, "grad_norm": 1.088470697402954, "learning_rate": 0.0001674149333146087, "loss": 1.4719, "step": 2002 }, { "epoch": 0.2869216444635439, "grad_norm": 1.1227941513061523, "learning_rate": 0.00016738065689915106, "loss": 1.4101, "step": 2003 }, { "epoch": 0.28706489041684574, "grad_norm": 1.359222173690796, "learning_rate": 0.0001673463659783054, "loss": 1.4193, "step": 2004 }, { "epoch": 0.2872081363701475, "grad_norm": 1.052613615989685, "learning_rate": 0.00016731206055945366, "loss": 1.5453, "step": 2005 }, { "epoch": 0.28735138232344937, "grad_norm": 1.2241803407669067, "learning_rate": 0.00016727774064998106, "loss": 1.5206, "step": 2006 }, { "epoch": 0.2874946282767512, "grad_norm": 0.9814631938934326, "learning_rate": 0.00016724340625727573, "loss": 1.3914, "step": 2007 }, { "epoch": 0.287637874230053, "grad_norm": 1.1713379621505737, "learning_rate": 0.00016720905738872905, "loss": 1.3603, "step": 2008 }, { "epoch": 0.28778112018335483, "grad_norm": 1.0520036220550537, "learning_rate": 0.00016717469405173549, "loss": 1.7029, "step": 2009 }, { "epoch": 0.2879243661366566, "grad_norm": 1.5106563568115234, "learning_rate": 0.00016714031625369264, "loss": 1.4327, "step": 2010 }, { "epoch": 0.28806761208995846, "grad_norm": 1.0656976699829102, "learning_rate": 0.0001671059240020011, "loss": 1.7828, "step": 2011 }, { "epoch": 0.2882108580432603, "grad_norm": 1.1733362674713135, "learning_rate": 0.00016707151730406482, "loss": 1.5512, "step": 2012 }, { "epoch": 0.2883541039965621, "grad_norm": 1.1205933094024658, "learning_rate": 0.00016703709616729055, "loss": 1.5478, "step": 2013 }, { "epoch": 0.2884973499498639, "grad_norm": 0.9729814529418945, "learning_rate": 0.00016700266059908842, "loss": 1.5052, "step": 2014 }, { "epoch": 0.2886405959031657, "grad_norm": 1.1738072633743286, "learning_rate": 0.00016696821060687155, "loss": 1.5716, "step": 2015 }, { "epoch": 0.28878384185646755, "grad_norm": 1.1370347738265991, "learning_rate": 0.00016693374619805606, "loss": 1.5585, "step": 2016 }, { "epoch": 0.2889270878097694, "grad_norm": 1.301529884338379, "learning_rate": 0.00016689926738006143, "loss": 1.5651, "step": 2017 }, { "epoch": 0.2890703337630712, "grad_norm": 1.3988656997680664, "learning_rate": 0.00016686477416031004, "loss": 1.5401, "step": 2018 }, { "epoch": 0.289213579716373, "grad_norm": 1.1389795541763306, "learning_rate": 0.0001668302665462274, "loss": 1.6099, "step": 2019 }, { "epoch": 0.28935682566967486, "grad_norm": 1.0524451732635498, "learning_rate": 0.00016679574454524213, "loss": 1.5922, "step": 2020 }, { "epoch": 0.28950007162297664, "grad_norm": 0.988725483417511, "learning_rate": 0.00016676120816478605, "loss": 1.6898, "step": 2021 }, { "epoch": 0.2896433175762785, "grad_norm": 1.1801432371139526, "learning_rate": 0.00016672665741229392, "loss": 1.5246, "step": 2022 }, { "epoch": 0.28978656352958027, "grad_norm": 0.9883184432983398, "learning_rate": 0.00016669209229520367, "loss": 1.4182, "step": 2023 }, { "epoch": 0.2899298094828821, "grad_norm": 1.2279008626937866, "learning_rate": 0.00016665751282095634, "loss": 1.6834, "step": 2024 }, { "epoch": 0.29007305543618395, "grad_norm": 1.1624658107757568, "learning_rate": 0.000166622918996996, "loss": 1.5831, "step": 2025 }, { "epoch": 0.29021630138948573, "grad_norm": 0.9519708752632141, "learning_rate": 0.00016658831083076985, "loss": 1.5385, "step": 2026 }, { "epoch": 0.29035954734278757, "grad_norm": 1.4991109371185303, "learning_rate": 0.0001665536883297282, "loss": 1.4016, "step": 2027 }, { "epoch": 0.2905027932960894, "grad_norm": 1.1272568702697754, "learning_rate": 0.0001665190515013244, "loss": 1.6114, "step": 2028 }, { "epoch": 0.2906460392493912, "grad_norm": 1.2224667072296143, "learning_rate": 0.0001664844003530149, "loss": 1.544, "step": 2029 }, { "epoch": 0.29078928520269304, "grad_norm": 1.0759963989257812, "learning_rate": 0.00016644973489225926, "loss": 1.6215, "step": 2030 }, { "epoch": 0.2909325311559948, "grad_norm": 1.3602945804595947, "learning_rate": 0.00016641505512652005, "loss": 1.2762, "step": 2031 }, { "epoch": 0.29107577710929666, "grad_norm": 1.202794075012207, "learning_rate": 0.00016638036106326296, "loss": 1.461, "step": 2032 }, { "epoch": 0.2912190230625985, "grad_norm": 1.307026743888855, "learning_rate": 0.00016634565270995684, "loss": 1.506, "step": 2033 }, { "epoch": 0.2913622690159003, "grad_norm": 1.222687840461731, "learning_rate": 0.00016631093007407347, "loss": 1.5522, "step": 2034 }, { "epoch": 0.29150551496920213, "grad_norm": 0.877815306186676, "learning_rate": 0.0001662761931630878, "loss": 1.5648, "step": 2035 }, { "epoch": 0.2916487609225039, "grad_norm": 1.1700665950775146, "learning_rate": 0.00016624144198447782, "loss": 1.3906, "step": 2036 }, { "epoch": 0.29179200687580575, "grad_norm": 1.3144936561584473, "learning_rate": 0.0001662066765457246, "loss": 1.5504, "step": 2037 }, { "epoch": 0.2919352528291076, "grad_norm": 1.2586084604263306, "learning_rate": 0.00016617189685431228, "loss": 1.488, "step": 2038 }, { "epoch": 0.2920784987824094, "grad_norm": 1.039439082145691, "learning_rate": 0.00016613710291772812, "loss": 1.4571, "step": 2039 }, { "epoch": 0.2922217447357112, "grad_norm": 1.086835265159607, "learning_rate": 0.00016610229474346228, "loss": 1.5298, "step": 2040 }, { "epoch": 0.29236499068901306, "grad_norm": 1.1901931762695312, "learning_rate": 0.00016606747233900815, "loss": 1.5963, "step": 2041 }, { "epoch": 0.29250823664231484, "grad_norm": 1.163295865058899, "learning_rate": 0.00016603263571186218, "loss": 1.4776, "step": 2042 }, { "epoch": 0.2926514825956167, "grad_norm": 1.1810826063156128, "learning_rate": 0.0001659977848695238, "loss": 1.4251, "step": 2043 }, { "epoch": 0.29279472854891847, "grad_norm": 1.4355628490447998, "learning_rate": 0.0001659629198194955, "loss": 1.3328, "step": 2044 }, { "epoch": 0.2929379745022203, "grad_norm": 1.263792872428894, "learning_rate": 0.00016592804056928285, "loss": 1.5333, "step": 2045 }, { "epoch": 0.29308122045552215, "grad_norm": 1.1681278944015503, "learning_rate": 0.0001658931471263946, "loss": 1.4066, "step": 2046 }, { "epoch": 0.29322446640882394, "grad_norm": 1.1122502088546753, "learning_rate": 0.0001658582394983424, "loss": 1.5278, "step": 2047 }, { "epoch": 0.2933677123621258, "grad_norm": 1.3065193891525269, "learning_rate": 0.0001658233176926409, "loss": 1.6378, "step": 2048 }, { "epoch": 0.29351095831542756, "grad_norm": 1.237809658050537, "learning_rate": 0.000165788381716808, "loss": 1.3732, "step": 2049 }, { "epoch": 0.2936542042687294, "grad_norm": 1.2365648746490479, "learning_rate": 0.00016575343157836455, "loss": 1.5975, "step": 2050 }, { "epoch": 0.29379745022203124, "grad_norm": 1.2680165767669678, "learning_rate": 0.00016571846728483442, "loss": 1.4558, "step": 2051 }, { "epoch": 0.293940696175333, "grad_norm": 1.1777329444885254, "learning_rate": 0.00016568348884374453, "loss": 1.5314, "step": 2052 }, { "epoch": 0.29408394212863487, "grad_norm": 1.0654865503311157, "learning_rate": 0.00016564849626262492, "loss": 1.5016, "step": 2053 }, { "epoch": 0.2942271880819367, "grad_norm": 1.2531150579452515, "learning_rate": 0.00016561348954900865, "loss": 1.493, "step": 2054 }, { "epoch": 0.2943704340352385, "grad_norm": 1.0660320520401, "learning_rate": 0.00016557846871043173, "loss": 1.6956, "step": 2055 }, { "epoch": 0.29451367998854033, "grad_norm": 1.2347978353500366, "learning_rate": 0.00016554343375443331, "loss": 1.5185, "step": 2056 }, { "epoch": 0.2946569259418421, "grad_norm": 1.2452187538146973, "learning_rate": 0.00016550838468855553, "loss": 1.5557, "step": 2057 }, { "epoch": 0.29480017189514396, "grad_norm": 1.2250832319259644, "learning_rate": 0.00016547332152034365, "loss": 1.5764, "step": 2058 }, { "epoch": 0.2949434178484458, "grad_norm": 1.1190541982650757, "learning_rate": 0.00016543824425734583, "loss": 1.3557, "step": 2059 }, { "epoch": 0.2950866638017476, "grad_norm": 1.1365809440612793, "learning_rate": 0.00016540315290711338, "loss": 1.5388, "step": 2060 }, { "epoch": 0.2952299097550494, "grad_norm": 1.1335299015045166, "learning_rate": 0.0001653680474772006, "loss": 1.4182, "step": 2061 }, { "epoch": 0.29537315570835126, "grad_norm": 1.0756864547729492, "learning_rate": 0.0001653329279751648, "loss": 1.5416, "step": 2062 }, { "epoch": 0.29551640166165305, "grad_norm": 1.4491865634918213, "learning_rate": 0.00016529779440856634, "loss": 1.586, "step": 2063 }, { "epoch": 0.2956596476149549, "grad_norm": 1.335219144821167, "learning_rate": 0.0001652626467849686, "loss": 1.5382, "step": 2064 }, { "epoch": 0.2958028935682567, "grad_norm": 1.221994161605835, "learning_rate": 0.00016522748511193803, "loss": 1.5505, "step": 2065 }, { "epoch": 0.2959461395215585, "grad_norm": 1.3121684789657593, "learning_rate": 0.00016519230939704402, "loss": 1.5812, "step": 2066 }, { "epoch": 0.29608938547486036, "grad_norm": 1.2029885053634644, "learning_rate": 0.00016515711964785906, "loss": 1.518, "step": 2067 }, { "epoch": 0.29623263142816214, "grad_norm": 1.1270010471343994, "learning_rate": 0.00016512191587195866, "loss": 1.481, "step": 2068 }, { "epoch": 0.296375877381464, "grad_norm": 1.1432565450668335, "learning_rate": 0.00016508669807692127, "loss": 1.5106, "step": 2069 }, { "epoch": 0.29651912333476577, "grad_norm": 1.1251963376998901, "learning_rate": 0.0001650514662703284, "loss": 1.6167, "step": 2070 }, { "epoch": 0.2966623692880676, "grad_norm": 1.1076250076293945, "learning_rate": 0.00016501622045976463, "loss": 1.618, "step": 2071 }, { "epoch": 0.29680561524136945, "grad_norm": 1.3478925228118896, "learning_rate": 0.00016498096065281753, "loss": 1.4049, "step": 2072 }, { "epoch": 0.29694886119467123, "grad_norm": 1.1754755973815918, "learning_rate": 0.00016494568685707762, "loss": 1.5193, "step": 2073 }, { "epoch": 0.2970921071479731, "grad_norm": 1.0825507640838623, "learning_rate": 0.00016491039908013847, "loss": 1.5604, "step": 2074 }, { "epoch": 0.2972353531012749, "grad_norm": 1.1847012042999268, "learning_rate": 0.00016487509732959668, "loss": 1.4495, "step": 2075 }, { "epoch": 0.2973785990545767, "grad_norm": 1.202048897743225, "learning_rate": 0.00016483978161305188, "loss": 1.5373, "step": 2076 }, { "epoch": 0.29752184500787854, "grad_norm": 1.0766477584838867, "learning_rate": 0.0001648044519381066, "loss": 1.4799, "step": 2077 }, { "epoch": 0.2976650909611803, "grad_norm": 1.326422095298767, "learning_rate": 0.00016476910831236648, "loss": 1.5965, "step": 2078 }, { "epoch": 0.29780833691448216, "grad_norm": 0.9984796643257141, "learning_rate": 0.0001647337507434401, "loss": 1.5461, "step": 2079 }, { "epoch": 0.297951582867784, "grad_norm": 1.0483592748641968, "learning_rate": 0.00016469837923893915, "loss": 1.5414, "step": 2080 }, { "epoch": 0.2980948288210858, "grad_norm": 1.0608564615249634, "learning_rate": 0.00016466299380647818, "loss": 1.4004, "step": 2081 }, { "epoch": 0.29823807477438763, "grad_norm": 1.0678863525390625, "learning_rate": 0.00016462759445367477, "loss": 1.5354, "step": 2082 }, { "epoch": 0.29838132072768947, "grad_norm": 1.113074779510498, "learning_rate": 0.00016459218118814958, "loss": 1.2301, "step": 2083 }, { "epoch": 0.29852456668099125, "grad_norm": 1.4846665859222412, "learning_rate": 0.00016455675401752618, "loss": 1.5377, "step": 2084 }, { "epoch": 0.2986678126342931, "grad_norm": 1.0702201128005981, "learning_rate": 0.00016452131294943117, "loss": 1.6, "step": 2085 }, { "epoch": 0.2988110585875949, "grad_norm": 1.4116028547286987, "learning_rate": 0.00016448585799149417, "loss": 1.57, "step": 2086 }, { "epoch": 0.2989543045408967, "grad_norm": 1.1725035905838013, "learning_rate": 0.00016445038915134766, "loss": 1.5396, "step": 2087 }, { "epoch": 0.29909755049419856, "grad_norm": 1.3610206842422485, "learning_rate": 0.00016441490643662732, "loss": 1.5831, "step": 2088 }, { "epoch": 0.29924079644750035, "grad_norm": 1.1565113067626953, "learning_rate": 0.0001643794098549716, "loss": 1.8068, "step": 2089 }, { "epoch": 0.2993840424008022, "grad_norm": 0.989720344543457, "learning_rate": 0.0001643438994140221, "loss": 1.7064, "step": 2090 }, { "epoch": 0.29952728835410397, "grad_norm": 1.097925066947937, "learning_rate": 0.00016430837512142334, "loss": 1.4491, "step": 2091 }, { "epoch": 0.2996705343074058, "grad_norm": 1.265156865119934, "learning_rate": 0.00016427283698482281, "loss": 1.6055, "step": 2092 }, { "epoch": 0.29981378026070765, "grad_norm": 0.979518711566925, "learning_rate": 0.00016423728501187098, "loss": 1.4324, "step": 2093 }, { "epoch": 0.29995702621400944, "grad_norm": 0.9842087030410767, "learning_rate": 0.0001642017192102213, "loss": 1.4779, "step": 2094 }, { "epoch": 0.3001002721673113, "grad_norm": 1.1137053966522217, "learning_rate": 0.0001641661395875302, "loss": 1.5169, "step": 2095 }, { "epoch": 0.3002435181206131, "grad_norm": 1.2165104150772095, "learning_rate": 0.0001641305461514571, "loss": 1.4821, "step": 2096 }, { "epoch": 0.3003867640739149, "grad_norm": 1.1817777156829834, "learning_rate": 0.00016409493890966442, "loss": 1.5754, "step": 2097 }, { "epoch": 0.30053001002721674, "grad_norm": 1.1919370889663696, "learning_rate": 0.00016405931786981755, "loss": 1.3193, "step": 2098 }, { "epoch": 0.30067325598051853, "grad_norm": 1.0608690977096558, "learning_rate": 0.00016402368303958468, "loss": 1.3342, "step": 2099 }, { "epoch": 0.30081650193382037, "grad_norm": 1.5053035020828247, "learning_rate": 0.00016398803442663724, "loss": 1.3495, "step": 2100 }, { "epoch": 0.3009597478871222, "grad_norm": 1.2427634000778198, "learning_rate": 0.00016395237203864943, "loss": 1.3984, "step": 2101 }, { "epoch": 0.301102993840424, "grad_norm": 1.1313562393188477, "learning_rate": 0.0001639166958832985, "loss": 1.3387, "step": 2102 }, { "epoch": 0.30124623979372583, "grad_norm": 0.9577500224113464, "learning_rate": 0.00016388100596826465, "loss": 1.4214, "step": 2103 }, { "epoch": 0.3013894857470276, "grad_norm": 1.256043791770935, "learning_rate": 0.000163845302301231, "loss": 1.6095, "step": 2104 }, { "epoch": 0.30153273170032946, "grad_norm": 1.2144078016281128, "learning_rate": 0.00016380958488988368, "loss": 1.3954, "step": 2105 }, { "epoch": 0.3016759776536313, "grad_norm": 0.9309338331222534, "learning_rate": 0.00016377385374191183, "loss": 1.5184, "step": 2106 }, { "epoch": 0.3018192236069331, "grad_norm": 1.0648123025894165, "learning_rate": 0.0001637381088650074, "loss": 1.5401, "step": 2107 }, { "epoch": 0.3019624695602349, "grad_norm": 1.1124775409698486, "learning_rate": 0.0001637023502668654, "loss": 1.2796, "step": 2108 }, { "epoch": 0.30210571551353677, "grad_norm": 1.1787291765213013, "learning_rate": 0.00016366657795518377, "loss": 1.6127, "step": 2109 }, { "epoch": 0.30224896146683855, "grad_norm": 1.4197443723678589, "learning_rate": 0.00016363079193766345, "loss": 1.5459, "step": 2110 }, { "epoch": 0.3023922074201404, "grad_norm": 1.2080607414245605, "learning_rate": 0.00016359499222200818, "loss": 1.657, "step": 2111 }, { "epoch": 0.3025354533734422, "grad_norm": 1.2439638376235962, "learning_rate": 0.00016355917881592485, "loss": 1.8027, "step": 2112 }, { "epoch": 0.302678699326744, "grad_norm": 1.3518328666687012, "learning_rate": 0.00016352335172712317, "loss": 1.5276, "step": 2113 }, { "epoch": 0.30282194528004586, "grad_norm": 1.0452747344970703, "learning_rate": 0.0001634875109633158, "loss": 1.5041, "step": 2114 }, { "epoch": 0.30296519123334764, "grad_norm": 1.233038306236267, "learning_rate": 0.00016345165653221838, "loss": 1.504, "step": 2115 }, { "epoch": 0.3031084371866495, "grad_norm": 1.1606652736663818, "learning_rate": 0.00016341578844154955, "loss": 1.6446, "step": 2116 }, { "epoch": 0.3032516831399513, "grad_norm": 1.1208771467208862, "learning_rate": 0.00016337990669903073, "loss": 1.6463, "step": 2117 }, { "epoch": 0.3033949290932531, "grad_norm": 1.0898821353912354, "learning_rate": 0.00016334401131238644, "loss": 1.4871, "step": 2118 }, { "epoch": 0.30353817504655495, "grad_norm": 1.1626981496810913, "learning_rate": 0.000163308102289344, "loss": 1.3341, "step": 2119 }, { "epoch": 0.30368142099985673, "grad_norm": 1.1931121349334717, "learning_rate": 0.00016327217963763374, "loss": 1.6295, "step": 2120 }, { "epoch": 0.3038246669531586, "grad_norm": 1.210267186164856, "learning_rate": 0.000163236243364989, "loss": 1.6429, "step": 2121 }, { "epoch": 0.3039679129064604, "grad_norm": 1.1638072729110718, "learning_rate": 0.0001632002934791459, "loss": 1.4613, "step": 2122 }, { "epoch": 0.3041111588597622, "grad_norm": 1.187139630317688, "learning_rate": 0.0001631643299878436, "loss": 1.6713, "step": 2123 }, { "epoch": 0.30425440481306404, "grad_norm": 1.2632700204849243, "learning_rate": 0.00016312835289882408, "loss": 1.4449, "step": 2124 }, { "epoch": 0.3043976507663658, "grad_norm": 1.282806158065796, "learning_rate": 0.0001630923622198324, "loss": 1.4624, "step": 2125 }, { "epoch": 0.30454089671966766, "grad_norm": 1.295003056526184, "learning_rate": 0.00016305635795861643, "loss": 1.3911, "step": 2126 }, { "epoch": 0.3046841426729695, "grad_norm": 1.4277797937393188, "learning_rate": 0.00016302034012292697, "loss": 1.5861, "step": 2127 }, { "epoch": 0.3048273886262713, "grad_norm": 1.27116060256958, "learning_rate": 0.0001629843087205178, "loss": 1.4702, "step": 2128 }, { "epoch": 0.30497063457957313, "grad_norm": 1.2481271028518677, "learning_rate": 0.0001629482637591456, "loss": 1.6228, "step": 2129 }, { "epoch": 0.30511388053287497, "grad_norm": 1.1447417736053467, "learning_rate": 0.00016291220524656993, "loss": 1.5206, "step": 2130 }, { "epoch": 0.30525712648617676, "grad_norm": 1.6537429094314575, "learning_rate": 0.00016287613319055332, "loss": 1.5686, "step": 2131 }, { "epoch": 0.3054003724394786, "grad_norm": 1.060086727142334, "learning_rate": 0.00016284004759886114, "loss": 1.5382, "step": 2132 }, { "epoch": 0.3055436183927804, "grad_norm": 1.0188056230545044, "learning_rate": 0.00016280394847926178, "loss": 1.4739, "step": 2133 }, { "epoch": 0.3056868643460822, "grad_norm": 1.2733073234558105, "learning_rate": 0.0001627678358395265, "loss": 1.3874, "step": 2134 }, { "epoch": 0.30583011029938406, "grad_norm": 1.1738461256027222, "learning_rate": 0.0001627317096874294, "loss": 1.5351, "step": 2135 }, { "epoch": 0.30597335625268585, "grad_norm": 1.1572927236557007, "learning_rate": 0.00016269557003074757, "loss": 1.4754, "step": 2136 }, { "epoch": 0.3061166022059877, "grad_norm": 1.1594451665878296, "learning_rate": 0.00016265941687726099, "loss": 1.5165, "step": 2137 }, { "epoch": 0.3062598481592895, "grad_norm": 1.1947145462036133, "learning_rate": 0.00016262325023475252, "loss": 1.4641, "step": 2138 }, { "epoch": 0.3064030941125913, "grad_norm": 1.3480371236801147, "learning_rate": 0.000162587070111008, "loss": 1.4881, "step": 2139 }, { "epoch": 0.30654634006589315, "grad_norm": 1.0744448900222778, "learning_rate": 0.00016255087651381603, "loss": 1.3569, "step": 2140 }, { "epoch": 0.30668958601919494, "grad_norm": 1.0117100477218628, "learning_rate": 0.00016251466945096826, "loss": 1.4594, "step": 2141 }, { "epoch": 0.3068328319724968, "grad_norm": 1.112363338470459, "learning_rate": 0.00016247844893025918, "loss": 1.4839, "step": 2142 }, { "epoch": 0.3069760779257986, "grad_norm": 1.1908221244812012, "learning_rate": 0.0001624422149594861, "loss": 1.4035, "step": 2143 }, { "epoch": 0.3071193238791004, "grad_norm": 1.2563285827636719, "learning_rate": 0.00016240596754644936, "loss": 1.6785, "step": 2144 }, { "epoch": 0.30726256983240224, "grad_norm": 1.1688882112503052, "learning_rate": 0.00016236970669895214, "loss": 1.6509, "step": 2145 }, { "epoch": 0.30740581578570403, "grad_norm": 1.2515830993652344, "learning_rate": 0.00016233343242480047, "loss": 1.6316, "step": 2146 }, { "epoch": 0.30754906173900587, "grad_norm": 1.3079081773757935, "learning_rate": 0.00016229714473180328, "loss": 1.4211, "step": 2147 }, { "epoch": 0.3076923076923077, "grad_norm": 1.183365821838379, "learning_rate": 0.00016226084362777247, "loss": 1.3167, "step": 2148 }, { "epoch": 0.3078355536456095, "grad_norm": 1.2476295232772827, "learning_rate": 0.00016222452912052272, "loss": 1.5167, "step": 2149 }, { "epoch": 0.30797879959891133, "grad_norm": 1.134782314300537, "learning_rate": 0.0001621882012178717, "loss": 1.4883, "step": 2150 }, { "epoch": 0.3081220455522132, "grad_norm": 1.1389729976654053, "learning_rate": 0.0001621518599276399, "loss": 1.5512, "step": 2151 }, { "epoch": 0.30826529150551496, "grad_norm": 1.3985515832901, "learning_rate": 0.00016211550525765063, "loss": 1.3982, "step": 2152 }, { "epoch": 0.3084085374588168, "grad_norm": 1.3249740600585938, "learning_rate": 0.0001620791372157302, "loss": 1.4575, "step": 2153 }, { "epoch": 0.3085517834121186, "grad_norm": 1.3010331392288208, "learning_rate": 0.00016204275580970773, "loss": 1.5867, "step": 2154 }, { "epoch": 0.3086950293654204, "grad_norm": 1.1831095218658447, "learning_rate": 0.00016200636104741532, "loss": 1.6426, "step": 2155 }, { "epoch": 0.30883827531872227, "grad_norm": 1.4063340425491333, "learning_rate": 0.00016196995293668774, "loss": 1.5769, "step": 2156 }, { "epoch": 0.30898152127202405, "grad_norm": 1.0844824314117432, "learning_rate": 0.0001619335314853628, "loss": 1.3827, "step": 2157 }, { "epoch": 0.3091247672253259, "grad_norm": 1.1624436378479004, "learning_rate": 0.00016189709670128122, "loss": 1.4785, "step": 2158 }, { "epoch": 0.3092680131786277, "grad_norm": 0.9787557125091553, "learning_rate": 0.00016186064859228638, "loss": 1.5382, "step": 2159 }, { "epoch": 0.3094112591319295, "grad_norm": 1.1418516635894775, "learning_rate": 0.0001618241871662247, "loss": 1.4623, "step": 2160 }, { "epoch": 0.30955450508523136, "grad_norm": 1.3446447849273682, "learning_rate": 0.00016178771243094547, "loss": 1.5839, "step": 2161 }, { "epoch": 0.30969775103853314, "grad_norm": 1.1753807067871094, "learning_rate": 0.00016175122439430077, "loss": 1.4595, "step": 2162 }, { "epoch": 0.309840996991835, "grad_norm": 1.0798187255859375, "learning_rate": 0.00016171472306414554, "loss": 1.4958, "step": 2163 }, { "epoch": 0.3099842429451368, "grad_norm": 0.92396080493927, "learning_rate": 0.00016167820844833764, "loss": 1.563, "step": 2164 }, { "epoch": 0.3101274888984386, "grad_norm": 1.0034147500991821, "learning_rate": 0.00016164168055473775, "loss": 1.6703, "step": 2165 }, { "epoch": 0.31027073485174045, "grad_norm": 1.1584593057632446, "learning_rate": 0.00016160513939120943, "loss": 1.6473, "step": 2166 }, { "epoch": 0.31041398080504223, "grad_norm": 1.0906035900115967, "learning_rate": 0.0001615685849656191, "loss": 1.6559, "step": 2167 }, { "epoch": 0.3105572267583441, "grad_norm": 1.190224051475525, "learning_rate": 0.00016153201728583602, "loss": 1.5179, "step": 2168 }, { "epoch": 0.3107004727116459, "grad_norm": 1.2246826887130737, "learning_rate": 0.0001614954363597323, "loss": 1.5772, "step": 2169 }, { "epoch": 0.3108437186649477, "grad_norm": 1.277646780014038, "learning_rate": 0.0001614588421951829, "loss": 1.4338, "step": 2170 }, { "epoch": 0.31098696461824954, "grad_norm": 1.1071202754974365, "learning_rate": 0.00016142223480006563, "loss": 1.5813, "step": 2171 }, { "epoch": 0.3111302105715514, "grad_norm": 1.2446386814117432, "learning_rate": 0.0001613856141822612, "loss": 1.6911, "step": 2172 }, { "epoch": 0.31127345652485316, "grad_norm": 1.3296643495559692, "learning_rate": 0.0001613489803496531, "loss": 1.5323, "step": 2173 }, { "epoch": 0.311416702478155, "grad_norm": 1.1802195310592651, "learning_rate": 0.00016131233331012762, "loss": 1.3824, "step": 2174 }, { "epoch": 0.3115599484314568, "grad_norm": 1.1795939207077026, "learning_rate": 0.0001612756730715741, "loss": 1.4237, "step": 2175 }, { "epoch": 0.31170319438475863, "grad_norm": 1.3027054071426392, "learning_rate": 0.00016123899964188448, "loss": 1.4945, "step": 2176 }, { "epoch": 0.31184644033806047, "grad_norm": 1.557706594467163, "learning_rate": 0.00016120231302895366, "loss": 1.3466, "step": 2177 }, { "epoch": 0.31198968629136226, "grad_norm": 1.1421482563018799, "learning_rate": 0.00016116561324067935, "loss": 1.6284, "step": 2178 }, { "epoch": 0.3121329322446641, "grad_norm": 1.1731735467910767, "learning_rate": 0.00016112890028496216, "loss": 1.6234, "step": 2179 }, { "epoch": 0.3122761781979659, "grad_norm": 1.2815722227096558, "learning_rate": 0.0001610921741697054, "loss": 1.6412, "step": 2180 }, { "epoch": 0.3124194241512677, "grad_norm": 1.639469027519226, "learning_rate": 0.00016105543490281535, "loss": 1.7375, "step": 2181 }, { "epoch": 0.31256267010456956, "grad_norm": 1.347538948059082, "learning_rate": 0.0001610186824922011, "loss": 1.8432, "step": 2182 }, { "epoch": 0.31270591605787135, "grad_norm": 1.192521333694458, "learning_rate": 0.00016098191694577442, "loss": 1.481, "step": 2183 }, { "epoch": 0.3128491620111732, "grad_norm": 1.2069753408432007, "learning_rate": 0.00016094513827145016, "loss": 1.6028, "step": 2184 }, { "epoch": 0.31299240796447503, "grad_norm": 1.18004310131073, "learning_rate": 0.00016090834647714575, "loss": 1.417, "step": 2185 }, { "epoch": 0.3131356539177768, "grad_norm": 1.099818468093872, "learning_rate": 0.00016087154157078156, "loss": 1.7394, "step": 2186 }, { "epoch": 0.31327889987107865, "grad_norm": 1.3205980062484741, "learning_rate": 0.00016083472356028086, "loss": 1.4009, "step": 2187 }, { "epoch": 0.31342214582438044, "grad_norm": 1.202562928199768, "learning_rate": 0.00016079789245356958, "loss": 1.5413, "step": 2188 }, { "epoch": 0.3135653917776823, "grad_norm": 1.3773061037063599, "learning_rate": 0.00016076104825857657, "loss": 1.4651, "step": 2189 }, { "epoch": 0.3137086377309841, "grad_norm": 1.0373605489730835, "learning_rate": 0.00016072419098323346, "loss": 1.47, "step": 2190 }, { "epoch": 0.3138518836842859, "grad_norm": 1.0439152717590332, "learning_rate": 0.00016068732063547473, "loss": 1.488, "step": 2191 }, { "epoch": 0.31399512963758774, "grad_norm": 1.0512992143630981, "learning_rate": 0.00016065043722323768, "loss": 1.4161, "step": 2192 }, { "epoch": 0.3141383755908896, "grad_norm": 1.2137712240219116, "learning_rate": 0.0001606135407544623, "loss": 1.4444, "step": 2193 }, { "epoch": 0.31428162154419137, "grad_norm": 1.102614164352417, "learning_rate": 0.0001605766312370916, "loss": 1.5654, "step": 2194 }, { "epoch": 0.3144248674974932, "grad_norm": 1.1308907270431519, "learning_rate": 0.0001605397086790712, "loss": 1.5509, "step": 2195 }, { "epoch": 0.314568113450795, "grad_norm": 0.9930155277252197, "learning_rate": 0.00016050277308834966, "loss": 1.4258, "step": 2196 }, { "epoch": 0.31471135940409684, "grad_norm": 1.1857571601867676, "learning_rate": 0.00016046582447287828, "loss": 1.3974, "step": 2197 }, { "epoch": 0.3148546053573987, "grad_norm": 1.3258522748947144, "learning_rate": 0.0001604288628406112, "loss": 1.4701, "step": 2198 }, { "epoch": 0.31499785131070046, "grad_norm": 1.2565850019454956, "learning_rate": 0.0001603918881995053, "loss": 1.3446, "step": 2199 }, { "epoch": 0.3151410972640023, "grad_norm": 1.2637200355529785, "learning_rate": 0.00016035490055752035, "loss": 1.4865, "step": 2200 }, { "epoch": 0.3152843432173041, "grad_norm": 1.170576572418213, "learning_rate": 0.0001603178999226189, "loss": 1.4885, "step": 2201 }, { "epoch": 0.3154275891706059, "grad_norm": 1.33774995803833, "learning_rate": 0.0001602808863027662, "loss": 1.659, "step": 2202 }, { "epoch": 0.31557083512390777, "grad_norm": 1.246603012084961, "learning_rate": 0.0001602438597059304, "loss": 1.463, "step": 2203 }, { "epoch": 0.31571408107720955, "grad_norm": 1.33806574344635, "learning_rate": 0.00016020682014008242, "loss": 1.4385, "step": 2204 }, { "epoch": 0.3158573270305114, "grad_norm": 1.0758123397827148, "learning_rate": 0.00016016976761319595, "loss": 1.3232, "step": 2205 }, { "epoch": 0.31600057298381323, "grad_norm": 1.2229743003845215, "learning_rate": 0.0001601327021332475, "loss": 1.3938, "step": 2206 }, { "epoch": 0.316143818937115, "grad_norm": 1.134279489517212, "learning_rate": 0.0001600956237082163, "loss": 1.5548, "step": 2207 }, { "epoch": 0.31628706489041686, "grad_norm": 1.2918243408203125, "learning_rate": 0.00016005853234608446, "loss": 1.5337, "step": 2208 }, { "epoch": 0.31643031084371864, "grad_norm": 1.070294976234436, "learning_rate": 0.00016002142805483685, "loss": 1.4378, "step": 2209 }, { "epoch": 0.3165735567970205, "grad_norm": 0.9638130068778992, "learning_rate": 0.00015998431084246107, "loss": 1.5197, "step": 2210 }, { "epoch": 0.3167168027503223, "grad_norm": 1.0661437511444092, "learning_rate": 0.0001599471807169475, "loss": 1.4474, "step": 2211 }, { "epoch": 0.3168600487036241, "grad_norm": 0.9932203888893127, "learning_rate": 0.00015991003768628942, "loss": 1.5267, "step": 2212 }, { "epoch": 0.31700329465692595, "grad_norm": 1.4060076475143433, "learning_rate": 0.00015987288175848277, "loss": 1.4741, "step": 2213 }, { "epoch": 0.31714654061022773, "grad_norm": 1.059059500694275, "learning_rate": 0.0001598357129415263, "loss": 1.6324, "step": 2214 }, { "epoch": 0.3172897865635296, "grad_norm": 1.2361373901367188, "learning_rate": 0.00015979853124342154, "loss": 1.3951, "step": 2215 }, { "epoch": 0.3174330325168314, "grad_norm": 1.1302155256271362, "learning_rate": 0.00015976133667217277, "loss": 1.628, "step": 2216 }, { "epoch": 0.3175762784701332, "grad_norm": 1.0503168106079102, "learning_rate": 0.00015972412923578712, "loss": 1.4576, "step": 2217 }, { "epoch": 0.31771952442343504, "grad_norm": 1.1819865703582764, "learning_rate": 0.00015968690894227437, "loss": 1.5407, "step": 2218 }, { "epoch": 0.3178627703767369, "grad_norm": 1.3359627723693848, "learning_rate": 0.0001596496757996471, "loss": 1.3131, "step": 2219 }, { "epoch": 0.31800601633003867, "grad_norm": 0.9438854455947876, "learning_rate": 0.00015961242981592076, "loss": 1.5764, "step": 2220 }, { "epoch": 0.3181492622833405, "grad_norm": 1.0362292528152466, "learning_rate": 0.00015957517099911349, "loss": 1.711, "step": 2221 }, { "epoch": 0.3182925082366423, "grad_norm": 1.251639485359192, "learning_rate": 0.00015953789935724613, "loss": 1.3226, "step": 2222 }, { "epoch": 0.31843575418994413, "grad_norm": 1.4984397888183594, "learning_rate": 0.0001595006148983424, "loss": 1.4871, "step": 2223 }, { "epoch": 0.31857900014324597, "grad_norm": 1.1789679527282715, "learning_rate": 0.00015946331763042867, "loss": 1.411, "step": 2224 }, { "epoch": 0.31872224609654776, "grad_norm": 1.0282535552978516, "learning_rate": 0.00015942600756153414, "loss": 1.4731, "step": 2225 }, { "epoch": 0.3188654920498496, "grad_norm": 1.1996649503707886, "learning_rate": 0.00015938868469969077, "loss": 1.641, "step": 2226 }, { "epoch": 0.31900873800315144, "grad_norm": 1.229291319847107, "learning_rate": 0.00015935134905293322, "loss": 1.4413, "step": 2227 }, { "epoch": 0.3191519839564532, "grad_norm": 1.1473612785339355, "learning_rate": 0.0001593140006292989, "loss": 1.3477, "step": 2228 }, { "epoch": 0.31929522990975506, "grad_norm": 1.3567787408828735, "learning_rate": 0.00015927663943682808, "loss": 1.5504, "step": 2229 }, { "epoch": 0.31943847586305685, "grad_norm": 1.0172733068466187, "learning_rate": 0.00015923926548356364, "loss": 1.5448, "step": 2230 }, { "epoch": 0.3195817218163587, "grad_norm": 1.3115447759628296, "learning_rate": 0.00015920187877755128, "loss": 1.4644, "step": 2231 }, { "epoch": 0.31972496776966053, "grad_norm": 1.0022472143173218, "learning_rate": 0.00015916447932683947, "loss": 1.5497, "step": 2232 }, { "epoch": 0.3198682137229623, "grad_norm": 1.1219295263290405, "learning_rate": 0.00015912706713947932, "loss": 1.6879, "step": 2233 }, { "epoch": 0.32001145967626415, "grad_norm": 1.1250125169754028, "learning_rate": 0.0001590896422235248, "loss": 1.376, "step": 2234 }, { "epoch": 0.32015470562956594, "grad_norm": 1.062451958656311, "learning_rate": 0.00015905220458703253, "loss": 1.4129, "step": 2235 }, { "epoch": 0.3202979515828678, "grad_norm": 1.1898211240768433, "learning_rate": 0.00015901475423806195, "loss": 1.4829, "step": 2236 }, { "epoch": 0.3204411975361696, "grad_norm": 1.3344218730926514, "learning_rate": 0.00015897729118467517, "loss": 1.5264, "step": 2237 }, { "epoch": 0.3205844434894714, "grad_norm": 1.243374228477478, "learning_rate": 0.00015893981543493705, "loss": 1.6194, "step": 2238 }, { "epoch": 0.32072768944277324, "grad_norm": 1.0976192951202393, "learning_rate": 0.0001589023269969152, "loss": 1.6497, "step": 2239 }, { "epoch": 0.3208709353960751, "grad_norm": 0.9603078961372375, "learning_rate": 0.00015886482587867997, "loss": 1.4631, "step": 2240 }, { "epoch": 0.32101418134937687, "grad_norm": 1.171647310256958, "learning_rate": 0.0001588273120883044, "loss": 1.673, "step": 2241 }, { "epoch": 0.3211574273026787, "grad_norm": 1.0268837213516235, "learning_rate": 0.00015878978563386425, "loss": 1.5003, "step": 2242 }, { "epoch": 0.3213006732559805, "grad_norm": 1.29710853099823, "learning_rate": 0.00015875224652343815, "loss": 1.4024, "step": 2243 }, { "epoch": 0.32144391920928234, "grad_norm": 1.0878986120224, "learning_rate": 0.00015871469476510722, "loss": 1.4315, "step": 2244 }, { "epoch": 0.3215871651625842, "grad_norm": 1.182474970817566, "learning_rate": 0.00015867713036695546, "loss": 1.5324, "step": 2245 }, { "epoch": 0.32173041111588596, "grad_norm": 1.1906991004943848, "learning_rate": 0.00015863955333706957, "loss": 1.4941, "step": 2246 }, { "epoch": 0.3218736570691878, "grad_norm": 1.0907808542251587, "learning_rate": 0.00015860196368353897, "loss": 1.6392, "step": 2247 }, { "epoch": 0.32201690302248964, "grad_norm": 1.0034211874008179, "learning_rate": 0.00015856436141445577, "loss": 1.3608, "step": 2248 }, { "epoch": 0.3221601489757914, "grad_norm": 1.0663737058639526, "learning_rate": 0.00015852674653791477, "loss": 1.5279, "step": 2249 }, { "epoch": 0.32230339492909327, "grad_norm": 1.3582559823989868, "learning_rate": 0.00015848911906201355, "loss": 1.5769, "step": 2250 }, { "epoch": 0.32244664088239505, "grad_norm": 1.1087359189987183, "learning_rate": 0.0001584514789948524, "loss": 1.3983, "step": 2251 }, { "epoch": 0.3225898868356969, "grad_norm": 1.2976455688476562, "learning_rate": 0.00015841382634453427, "loss": 1.472, "step": 2252 }, { "epoch": 0.32273313278899873, "grad_norm": 1.1020113229751587, "learning_rate": 0.00015837616111916483, "loss": 1.7045, "step": 2253 }, { "epoch": 0.3228763787423005, "grad_norm": 1.2095621824264526, "learning_rate": 0.0001583384833268525, "loss": 1.4808, "step": 2254 }, { "epoch": 0.32301962469560236, "grad_norm": 1.1679112911224365, "learning_rate": 0.00015830079297570837, "loss": 1.391, "step": 2255 }, { "epoch": 0.32316287064890414, "grad_norm": 1.1608699560165405, "learning_rate": 0.0001582630900738462, "loss": 1.4031, "step": 2256 }, { "epoch": 0.323306116602206, "grad_norm": 1.7239489555358887, "learning_rate": 0.00015822537462938254, "loss": 1.496, "step": 2257 }, { "epoch": 0.3234493625555078, "grad_norm": 1.1595648527145386, "learning_rate": 0.0001581876466504366, "loss": 1.3547, "step": 2258 }, { "epoch": 0.3235926085088096, "grad_norm": 1.252260684967041, "learning_rate": 0.00015814990614513024, "loss": 1.3419, "step": 2259 }, { "epoch": 0.32373585446211145, "grad_norm": 1.2473567724227905, "learning_rate": 0.00015811215312158807, "loss": 1.6324, "step": 2260 }, { "epoch": 0.3238791004154133, "grad_norm": 1.429236888885498, "learning_rate": 0.00015807438758793735, "loss": 1.4586, "step": 2261 }, { "epoch": 0.3240223463687151, "grad_norm": 1.2644495964050293, "learning_rate": 0.00015803660955230817, "loss": 1.4626, "step": 2262 }, { "epoch": 0.3241655923220169, "grad_norm": 1.2003517150878906, "learning_rate": 0.0001579988190228331, "loss": 1.5295, "step": 2263 }, { "epoch": 0.3243088382753187, "grad_norm": 1.5481727123260498, "learning_rate": 0.00015796101600764755, "loss": 1.5739, "step": 2264 }, { "epoch": 0.32445208422862054, "grad_norm": 1.2670458555221558, "learning_rate": 0.00015792320051488955, "loss": 1.7023, "step": 2265 }, { "epoch": 0.3245953301819224, "grad_norm": 1.7492073774337769, "learning_rate": 0.0001578853725526999, "loss": 1.6082, "step": 2266 }, { "epoch": 0.32473857613522417, "grad_norm": 1.0925644636154175, "learning_rate": 0.00015784753212922192, "loss": 1.5033, "step": 2267 }, { "epoch": 0.324881822088526, "grad_norm": 1.1214485168457031, "learning_rate": 0.0001578096792526018, "loss": 1.5169, "step": 2268 }, { "epoch": 0.3250250680418278, "grad_norm": 1.150760293006897, "learning_rate": 0.00015777181393098833, "loss": 1.5535, "step": 2269 }, { "epoch": 0.32516831399512963, "grad_norm": 1.5171329975128174, "learning_rate": 0.0001577339361725329, "loss": 1.4626, "step": 2270 }, { "epoch": 0.3253115599484315, "grad_norm": 1.1188410520553589, "learning_rate": 0.0001576960459853898, "loss": 1.4159, "step": 2271 }, { "epoch": 0.32545480590173326, "grad_norm": 1.140702724456787, "learning_rate": 0.00015765814337771565, "loss": 1.7028, "step": 2272 }, { "epoch": 0.3255980518550351, "grad_norm": 1.4552578926086426, "learning_rate": 0.00015762022835767012, "loss": 1.6244, "step": 2273 }, { "epoch": 0.32574129780833694, "grad_norm": 0.9635079503059387, "learning_rate": 0.00015758230093341523, "loss": 1.4716, "step": 2274 }, { "epoch": 0.3258845437616387, "grad_norm": 1.3385297060012817, "learning_rate": 0.00015754436111311594, "loss": 1.3383, "step": 2275 }, { "epoch": 0.32602778971494056, "grad_norm": 1.082505226135254, "learning_rate": 0.0001575064089049397, "loss": 1.5454, "step": 2276 }, { "epoch": 0.32617103566824235, "grad_norm": 1.1876015663146973, "learning_rate": 0.0001574684443170567, "loss": 1.4513, "step": 2277 }, { "epoch": 0.3263142816215442, "grad_norm": 1.1671292781829834, "learning_rate": 0.00015743046735763975, "loss": 1.4244, "step": 2278 }, { "epoch": 0.32645752757484603, "grad_norm": 1.4781322479248047, "learning_rate": 0.00015739247803486434, "loss": 1.4179, "step": 2279 }, { "epoch": 0.3266007735281478, "grad_norm": 1.1260558366775513, "learning_rate": 0.00015735447635690868, "loss": 1.5457, "step": 2280 }, { "epoch": 0.32674401948144965, "grad_norm": 1.258539080619812, "learning_rate": 0.00015731646233195357, "loss": 1.3918, "step": 2281 }, { "epoch": 0.3268872654347515, "grad_norm": 0.9559856057167053, "learning_rate": 0.00015727843596818243, "loss": 1.7238, "step": 2282 }, { "epoch": 0.3270305113880533, "grad_norm": 1.3738961219787598, "learning_rate": 0.00015724039727378148, "loss": 1.6064, "step": 2283 }, { "epoch": 0.3271737573413551, "grad_norm": 1.1471166610717773, "learning_rate": 0.0001572023462569395, "loss": 1.6804, "step": 2284 }, { "epoch": 0.3273170032946569, "grad_norm": 1.031479001045227, "learning_rate": 0.00015716428292584787, "loss": 1.6262, "step": 2285 }, { "epoch": 0.32746024924795875, "grad_norm": 1.016486406326294, "learning_rate": 0.00015712620728870074, "loss": 1.3927, "step": 2286 }, { "epoch": 0.3276034952012606, "grad_norm": 1.1097673177719116, "learning_rate": 0.00015708811935369484, "loss": 1.2791, "step": 2287 }, { "epoch": 0.32774674115456237, "grad_norm": 1.3645130395889282, "learning_rate": 0.00015705001912902957, "loss": 1.4728, "step": 2288 }, { "epoch": 0.3278899871078642, "grad_norm": 1.1995320320129395, "learning_rate": 0.00015701190662290693, "loss": 1.5572, "step": 2289 }, { "epoch": 0.328033233061166, "grad_norm": 1.338292121887207, "learning_rate": 0.0001569737818435316, "loss": 1.5091, "step": 2290 }, { "epoch": 0.32817647901446784, "grad_norm": 1.14542818069458, "learning_rate": 0.00015693564479911097, "loss": 1.4814, "step": 2291 }, { "epoch": 0.3283197249677697, "grad_norm": 1.1103192567825317, "learning_rate": 0.0001568974954978549, "loss": 1.5678, "step": 2292 }, { "epoch": 0.32846297092107146, "grad_norm": 1.0646166801452637, "learning_rate": 0.00015685933394797607, "loss": 1.3466, "step": 2293 }, { "epoch": 0.3286062168743733, "grad_norm": 1.1033306121826172, "learning_rate": 0.00015682116015768965, "loss": 1.5594, "step": 2294 }, { "epoch": 0.32874946282767514, "grad_norm": 1.1840343475341797, "learning_rate": 0.00015678297413521363, "loss": 1.5043, "step": 2295 }, { "epoch": 0.32889270878097693, "grad_norm": 1.3017816543579102, "learning_rate": 0.00015674477588876838, "loss": 1.6793, "step": 2296 }, { "epoch": 0.32903595473427877, "grad_norm": 1.0799204111099243, "learning_rate": 0.00015670656542657714, "loss": 1.6474, "step": 2297 }, { "epoch": 0.32917920068758055, "grad_norm": 1.232673168182373, "learning_rate": 0.0001566683427568656, "loss": 1.4657, "step": 2298 }, { "epoch": 0.3293224466408824, "grad_norm": 1.4009846448898315, "learning_rate": 0.00015663010788786221, "loss": 1.5891, "step": 2299 }, { "epoch": 0.32946569259418423, "grad_norm": 1.2172659635543823, "learning_rate": 0.00015659186082779797, "loss": 1.4843, "step": 2300 }, { "epoch": 0.329608938547486, "grad_norm": 1.1192210912704468, "learning_rate": 0.00015655360158490651, "loss": 1.8017, "step": 2301 }, { "epoch": 0.32975218450078786, "grad_norm": 1.1573307514190674, "learning_rate": 0.00015651533016742414, "loss": 1.5574, "step": 2302 }, { "epoch": 0.32989543045408964, "grad_norm": 0.9968916773796082, "learning_rate": 0.00015647704658358966, "loss": 1.6278, "step": 2303 }, { "epoch": 0.3300386764073915, "grad_norm": 1.275581955909729, "learning_rate": 0.00015643875084164467, "loss": 1.7992, "step": 2304 }, { "epoch": 0.3301819223606933, "grad_norm": 1.1056625843048096, "learning_rate": 0.00015640044294983329, "loss": 1.4009, "step": 2305 }, { "epoch": 0.3303251683139951, "grad_norm": 1.2805744409561157, "learning_rate": 0.00015636212291640224, "loss": 1.7041, "step": 2306 }, { "epoch": 0.33046841426729695, "grad_norm": 1.2893344163894653, "learning_rate": 0.00015632379074960082, "loss": 1.5771, "step": 2307 }, { "epoch": 0.3306116602205988, "grad_norm": 1.6476190090179443, "learning_rate": 0.0001562854464576811, "loss": 1.8503, "step": 2308 }, { "epoch": 0.3307549061739006, "grad_norm": 1.2533774375915527, "learning_rate": 0.00015624709004889758, "loss": 1.4707, "step": 2309 }, { "epoch": 0.3308981521272024, "grad_norm": 0.9150172472000122, "learning_rate": 0.0001562087215315075, "loss": 1.4367, "step": 2310 }, { "epoch": 0.3310413980805042, "grad_norm": 1.239235520362854, "learning_rate": 0.0001561703409137706, "loss": 1.407, "step": 2311 }, { "epoch": 0.33118464403380604, "grad_norm": 1.1505075693130493, "learning_rate": 0.0001561319482039493, "loss": 1.2941, "step": 2312 }, { "epoch": 0.3313278899871079, "grad_norm": 1.3796610832214355, "learning_rate": 0.0001560935434103086, "loss": 1.4374, "step": 2313 }, { "epoch": 0.33147113594040967, "grad_norm": 0.9538559317588806, "learning_rate": 0.00015605512654111615, "loss": 1.3898, "step": 2314 }, { "epoch": 0.3316143818937115, "grad_norm": 1.1141252517700195, "learning_rate": 0.00015601669760464203, "loss": 1.4083, "step": 2315 }, { "epoch": 0.33175762784701335, "grad_norm": 0.929050862789154, "learning_rate": 0.00015597825660915916, "loss": 1.399, "step": 2316 }, { "epoch": 0.33190087380031513, "grad_norm": 1.3936436176300049, "learning_rate": 0.0001559398035629429, "loss": 1.7724, "step": 2317 }, { "epoch": 0.332044119753617, "grad_norm": 1.1468356847763062, "learning_rate": 0.00015590133847427116, "loss": 1.5302, "step": 2318 }, { "epoch": 0.33218736570691876, "grad_norm": 0.9529779553413391, "learning_rate": 0.00015586286135142467, "loss": 1.6896, "step": 2319 }, { "epoch": 0.3323306116602206, "grad_norm": 1.0019365549087524, "learning_rate": 0.00015582437220268647, "loss": 1.4994, "step": 2320 }, { "epoch": 0.33247385761352244, "grad_norm": 1.0686752796173096, "learning_rate": 0.00015578587103634242, "loss": 1.573, "step": 2321 }, { "epoch": 0.3326171035668242, "grad_norm": 1.1038154363632202, "learning_rate": 0.00015574735786068078, "loss": 1.4369, "step": 2322 }, { "epoch": 0.33276034952012606, "grad_norm": 1.109255075454712, "learning_rate": 0.00015570883268399257, "loss": 1.4729, "step": 2323 }, { "epoch": 0.33290359547342785, "grad_norm": 1.6633360385894775, "learning_rate": 0.0001556702955145712, "loss": 1.5155, "step": 2324 }, { "epoch": 0.3330468414267297, "grad_norm": 1.1674869060516357, "learning_rate": 0.00015563174636071286, "loss": 1.3255, "step": 2325 }, { "epoch": 0.33319008738003153, "grad_norm": 1.412041425704956, "learning_rate": 0.00015559318523071622, "loss": 1.3511, "step": 2326 }, { "epoch": 0.3333333333333333, "grad_norm": 1.2201018333435059, "learning_rate": 0.0001555546121328825, "loss": 1.5166, "step": 2327 }, { "epoch": 0.33347657928663516, "grad_norm": 1.243205189704895, "learning_rate": 0.00015551602707551557, "loss": 1.4416, "step": 2328 }, { "epoch": 0.333619825239937, "grad_norm": 1.2978392839431763, "learning_rate": 0.00015547743006692177, "loss": 1.4771, "step": 2329 }, { "epoch": 0.3337630711932388, "grad_norm": 1.057361125946045, "learning_rate": 0.00015543882111541016, "loss": 1.3502, "step": 2330 }, { "epoch": 0.3339063171465406, "grad_norm": 1.2050553560256958, "learning_rate": 0.0001554002002292923, "loss": 1.6941, "step": 2331 }, { "epoch": 0.3340495630998424, "grad_norm": 1.4313703775405884, "learning_rate": 0.00015536156741688222, "loss": 1.4604, "step": 2332 }, { "epoch": 0.33419280905314425, "grad_norm": 1.3878430128097534, "learning_rate": 0.00015532292268649668, "loss": 1.6244, "step": 2333 }, { "epoch": 0.3343360550064461, "grad_norm": 1.005408525466919, "learning_rate": 0.0001552842660464549, "loss": 1.4441, "step": 2334 }, { "epoch": 0.33447930095974787, "grad_norm": 1.4121220111846924, "learning_rate": 0.00015524559750507873, "loss": 1.2828, "step": 2335 }, { "epoch": 0.3346225469130497, "grad_norm": 1.1042163372039795, "learning_rate": 0.00015520691707069254, "loss": 1.59, "step": 2336 }, { "epoch": 0.33476579286635155, "grad_norm": 1.2868388891220093, "learning_rate": 0.00015516822475162325, "loss": 1.5102, "step": 2337 }, { "epoch": 0.33490903881965334, "grad_norm": 1.318485140800476, "learning_rate": 0.0001551295205562004, "loss": 1.6114, "step": 2338 }, { "epoch": 0.3350522847729552, "grad_norm": 1.2359061241149902, "learning_rate": 0.000155090804492756, "loss": 1.5082, "step": 2339 }, { "epoch": 0.33519553072625696, "grad_norm": 1.212730884552002, "learning_rate": 0.00015505207656962467, "loss": 1.4821, "step": 2340 }, { "epoch": 0.3353387766795588, "grad_norm": 1.168373465538025, "learning_rate": 0.00015501333679514357, "loss": 1.4124, "step": 2341 }, { "epoch": 0.33548202263286064, "grad_norm": 1.3763903379440308, "learning_rate": 0.00015497458517765245, "loss": 1.4044, "step": 2342 }, { "epoch": 0.33562526858616243, "grad_norm": 1.2368966341018677, "learning_rate": 0.00015493582172549354, "loss": 1.4022, "step": 2343 }, { "epoch": 0.33576851453946427, "grad_norm": 1.183207392692566, "learning_rate": 0.00015489704644701166, "loss": 1.4821, "step": 2344 }, { "epoch": 0.33591176049276605, "grad_norm": 1.022795557975769, "learning_rate": 0.00015485825935055418, "loss": 1.5293, "step": 2345 }, { "epoch": 0.3360550064460679, "grad_norm": 1.0540978908538818, "learning_rate": 0.00015481946044447099, "loss": 1.4781, "step": 2346 }, { "epoch": 0.33619825239936973, "grad_norm": 1.2271462678909302, "learning_rate": 0.0001547806497371145, "loss": 1.5136, "step": 2347 }, { "epoch": 0.3363414983526715, "grad_norm": 1.192640781402588, "learning_rate": 0.0001547418272368398, "loss": 1.4809, "step": 2348 }, { "epoch": 0.33648474430597336, "grad_norm": 1.247894525527954, "learning_rate": 0.00015470299295200434, "loss": 1.5017, "step": 2349 }, { "epoch": 0.3366279902592752, "grad_norm": 1.5326576232910156, "learning_rate": 0.00015466414689096816, "loss": 1.4908, "step": 2350 }, { "epoch": 0.336771236212577, "grad_norm": 1.0735533237457275, "learning_rate": 0.00015462528906209387, "loss": 1.4924, "step": 2351 }, { "epoch": 0.3369144821658788, "grad_norm": 1.3608354330062866, "learning_rate": 0.0001545864194737466, "loss": 1.3531, "step": 2352 }, { "epoch": 0.3370577281191806, "grad_norm": 1.0713679790496826, "learning_rate": 0.00015454753813429407, "loss": 1.5085, "step": 2353 }, { "epoch": 0.33720097407248245, "grad_norm": 1.1861876249313354, "learning_rate": 0.0001545086450521064, "loss": 1.3409, "step": 2354 }, { "epoch": 0.3373442200257843, "grad_norm": 1.1499731540679932, "learning_rate": 0.00015446974023555632, "loss": 1.4401, "step": 2355 }, { "epoch": 0.3374874659790861, "grad_norm": 1.1002559661865234, "learning_rate": 0.00015443082369301912, "loss": 1.4117, "step": 2356 }, { "epoch": 0.3376307119323879, "grad_norm": 1.5714609622955322, "learning_rate": 0.00015439189543287247, "loss": 1.4257, "step": 2357 }, { "epoch": 0.3377739578856897, "grad_norm": 0.9928547739982605, "learning_rate": 0.00015435295546349678, "loss": 1.57, "step": 2358 }, { "epoch": 0.33791720383899154, "grad_norm": 1.5789834260940552, "learning_rate": 0.0001543140037932748, "loss": 1.5919, "step": 2359 }, { "epoch": 0.3380604497922934, "grad_norm": 1.1126257181167603, "learning_rate": 0.0001542750404305918, "loss": 1.5196, "step": 2360 }, { "epoch": 0.33820369574559517, "grad_norm": 1.1339365243911743, "learning_rate": 0.00015423606538383577, "loss": 1.4806, "step": 2361 }, { "epoch": 0.338346941698897, "grad_norm": 1.0545049905776978, "learning_rate": 0.00015419707866139696, "loss": 1.4189, "step": 2362 }, { "epoch": 0.33849018765219885, "grad_norm": 1.2604862451553345, "learning_rate": 0.00015415808027166832, "loss": 1.3928, "step": 2363 }, { "epoch": 0.33863343360550063, "grad_norm": 1.1889513731002808, "learning_rate": 0.00015411907022304516, "loss": 1.4717, "step": 2364 }, { "epoch": 0.3387766795588025, "grad_norm": 1.174076795578003, "learning_rate": 0.00015408004852392543, "loss": 1.6123, "step": 2365 }, { "epoch": 0.33891992551210426, "grad_norm": 1.198115587234497, "learning_rate": 0.00015404101518270955, "loss": 1.4888, "step": 2366 }, { "epoch": 0.3390631714654061, "grad_norm": 1.0147464275360107, "learning_rate": 0.00015400197020780038, "loss": 1.4535, "step": 2367 }, { "epoch": 0.33920641741870794, "grad_norm": 0.9284340739250183, "learning_rate": 0.0001539629136076034, "loss": 1.412, "step": 2368 }, { "epoch": 0.3393496633720097, "grad_norm": 1.1739517450332642, "learning_rate": 0.00015392384539052642, "loss": 1.4011, "step": 2369 }, { "epoch": 0.33949290932531156, "grad_norm": 1.3563016653060913, "learning_rate": 0.00015388476556498003, "loss": 1.4236, "step": 2370 }, { "epoch": 0.3396361552786134, "grad_norm": 0.977913498878479, "learning_rate": 0.000153845674139377, "loss": 1.6591, "step": 2371 }, { "epoch": 0.3397794012319152, "grad_norm": 1.0012260675430298, "learning_rate": 0.0001538065711221328, "loss": 1.5479, "step": 2372 }, { "epoch": 0.33992264718521703, "grad_norm": 1.0641791820526123, "learning_rate": 0.00015376745652166535, "loss": 1.4071, "step": 2373 }, { "epoch": 0.3400658931385188, "grad_norm": 1.4500188827514648, "learning_rate": 0.0001537283303463951, "loss": 1.5278, "step": 2374 }, { "epoch": 0.34020913909182066, "grad_norm": 1.109776258468628, "learning_rate": 0.00015368919260474483, "loss": 1.6064, "step": 2375 }, { "epoch": 0.3403523850451225, "grad_norm": 1.2239558696746826, "learning_rate": 0.00015365004330514, "loss": 1.5384, "step": 2376 }, { "epoch": 0.3404956309984243, "grad_norm": 1.0161080360412598, "learning_rate": 0.0001536108824560085, "loss": 1.4502, "step": 2377 }, { "epoch": 0.3406388769517261, "grad_norm": 0.9235588312149048, "learning_rate": 0.00015357171006578067, "loss": 1.4636, "step": 2378 }, { "epoch": 0.3407821229050279, "grad_norm": 0.9304924011230469, "learning_rate": 0.00015353252614288935, "loss": 1.5116, "step": 2379 }, { "epoch": 0.34092536885832975, "grad_norm": 0.9604153037071228, "learning_rate": 0.0001534933306957699, "loss": 1.5204, "step": 2380 }, { "epoch": 0.3410686148116316, "grad_norm": 1.0621789693832397, "learning_rate": 0.00015345412373286008, "loss": 1.5618, "step": 2381 }, { "epoch": 0.3412118607649334, "grad_norm": 1.3620684146881104, "learning_rate": 0.00015341490526260017, "loss": 1.5487, "step": 2382 }, { "epoch": 0.3413551067182352, "grad_norm": 1.0895845890045166, "learning_rate": 0.00015337567529343302, "loss": 1.573, "step": 2383 }, { "epoch": 0.34149835267153705, "grad_norm": 1.0534428358078003, "learning_rate": 0.00015333643383380383, "loss": 1.386, "step": 2384 }, { "epoch": 0.34164159862483884, "grad_norm": 1.2282408475875854, "learning_rate": 0.0001532971808921603, "loss": 1.6357, "step": 2385 }, { "epoch": 0.3417848445781407, "grad_norm": 1.139158844947815, "learning_rate": 0.00015325791647695258, "loss": 1.5316, "step": 2386 }, { "epoch": 0.34192809053144246, "grad_norm": 1.1749458312988281, "learning_rate": 0.00015321864059663343, "loss": 1.7085, "step": 2387 }, { "epoch": 0.3420713364847443, "grad_norm": 1.1126371622085571, "learning_rate": 0.00015317935325965789, "loss": 1.4726, "step": 2388 }, { "epoch": 0.34221458243804614, "grad_norm": 1.2727515697479248, "learning_rate": 0.00015314005447448356, "loss": 1.5935, "step": 2389 }, { "epoch": 0.34235782839134793, "grad_norm": 1.2181775569915771, "learning_rate": 0.00015310074424957054, "loss": 1.6155, "step": 2390 }, { "epoch": 0.34250107434464977, "grad_norm": 1.2520304918289185, "learning_rate": 0.00015306142259338129, "loss": 1.3922, "step": 2391 }, { "epoch": 0.3426443202979516, "grad_norm": 1.6494877338409424, "learning_rate": 0.00015302208951438085, "loss": 1.4734, "step": 2392 }, { "epoch": 0.3427875662512534, "grad_norm": 1.0668015480041504, "learning_rate": 0.00015298274502103664, "loss": 1.4376, "step": 2393 }, { "epoch": 0.34293081220455524, "grad_norm": 1.249842882156372, "learning_rate": 0.0001529433891218185, "loss": 1.361, "step": 2394 }, { "epoch": 0.343074058157857, "grad_norm": 1.028368353843689, "learning_rate": 0.0001529040218251989, "loss": 1.4806, "step": 2395 }, { "epoch": 0.34321730411115886, "grad_norm": 1.1280142068862915, "learning_rate": 0.00015286464313965254, "loss": 1.4978, "step": 2396 }, { "epoch": 0.3433605500644607, "grad_norm": 1.2706682682037354, "learning_rate": 0.0001528252530736567, "loss": 1.3706, "step": 2397 }, { "epoch": 0.3435037960177625, "grad_norm": 0.9882610440254211, "learning_rate": 0.00015278585163569107, "loss": 1.5396, "step": 2398 }, { "epoch": 0.3436470419710643, "grad_norm": 1.2555196285247803, "learning_rate": 0.0001527464388342379, "loss": 1.6746, "step": 2399 }, { "epoch": 0.3437902879243661, "grad_norm": 1.198387622833252, "learning_rate": 0.00015270701467778167, "loss": 1.7163, "step": 2400 }, { "epoch": 0.34393353387766795, "grad_norm": 1.296813726425171, "learning_rate": 0.00015266757917480948, "loss": 1.5786, "step": 2401 }, { "epoch": 0.3440767798309698, "grad_norm": 1.3276405334472656, "learning_rate": 0.00015262813233381082, "loss": 1.6853, "step": 2402 }, { "epoch": 0.3442200257842716, "grad_norm": 1.3754833936691284, "learning_rate": 0.00015258867416327758, "loss": 1.446, "step": 2403 }, { "epoch": 0.3443632717375734, "grad_norm": 1.0328303575515747, "learning_rate": 0.0001525492046717042, "loss": 1.4587, "step": 2404 }, { "epoch": 0.34450651769087526, "grad_norm": 1.3652325868606567, "learning_rate": 0.00015250972386758745, "loss": 1.6442, "step": 2405 }, { "epoch": 0.34464976364417704, "grad_norm": 1.258459210395813, "learning_rate": 0.00015247023175942657, "loss": 1.5014, "step": 2406 }, { "epoch": 0.3447930095974789, "grad_norm": 1.3175007104873657, "learning_rate": 0.00015243072835572318, "loss": 1.5166, "step": 2407 }, { "epoch": 0.34493625555078067, "grad_norm": 1.153136134147644, "learning_rate": 0.00015239121366498147, "loss": 1.4398, "step": 2408 }, { "epoch": 0.3450795015040825, "grad_norm": 1.221692442893982, "learning_rate": 0.00015235168769570792, "loss": 1.4915, "step": 2409 }, { "epoch": 0.34522274745738435, "grad_norm": 1.1645784378051758, "learning_rate": 0.0001523121504564115, "loss": 1.4491, "step": 2410 }, { "epoch": 0.34536599341068613, "grad_norm": 0.9600573182106018, "learning_rate": 0.00015227260195560367, "loss": 1.5591, "step": 2411 }, { "epoch": 0.345509239363988, "grad_norm": 1.0963431596755981, "learning_rate": 0.00015223304220179812, "loss": 1.5942, "step": 2412 }, { "epoch": 0.34565248531728976, "grad_norm": 1.0370967388153076, "learning_rate": 0.00015219347120351123, "loss": 1.4596, "step": 2413 }, { "epoch": 0.3457957312705916, "grad_norm": 1.1523886919021606, "learning_rate": 0.00015215388896926152, "loss": 1.5649, "step": 2414 }, { "epoch": 0.34593897722389344, "grad_norm": 1.2195308208465576, "learning_rate": 0.00015211429550757012, "loss": 1.5017, "step": 2415 }, { "epoch": 0.3460822231771952, "grad_norm": 0.9751647114753723, "learning_rate": 0.00015207469082696053, "loss": 1.412, "step": 2416 }, { "epoch": 0.34622546913049707, "grad_norm": 1.0202950239181519, "learning_rate": 0.00015203507493595866, "loss": 1.6492, "step": 2417 }, { "epoch": 0.3463687150837989, "grad_norm": 1.3397481441497803, "learning_rate": 0.00015199544784309286, "loss": 1.3786, "step": 2418 }, { "epoch": 0.3465119610371007, "grad_norm": 1.0124703645706177, "learning_rate": 0.00015195580955689378, "loss": 1.4109, "step": 2419 }, { "epoch": 0.34665520699040253, "grad_norm": 0.8878458142280579, "learning_rate": 0.00015191616008589461, "loss": 1.5293, "step": 2420 }, { "epoch": 0.3467984529437043, "grad_norm": 1.1833335161209106, "learning_rate": 0.00015187649943863095, "loss": 1.6248, "step": 2421 }, { "epoch": 0.34694169889700616, "grad_norm": 1.0171722173690796, "learning_rate": 0.00015183682762364066, "loss": 1.5245, "step": 2422 }, { "epoch": 0.347084944850308, "grad_norm": 1.3770653009414673, "learning_rate": 0.0001517971446494641, "loss": 1.4816, "step": 2423 }, { "epoch": 0.3472281908036098, "grad_norm": 1.164110779762268, "learning_rate": 0.00015175745052464415, "loss": 1.4355, "step": 2424 }, { "epoch": 0.3473714367569116, "grad_norm": 1.16922128200531, "learning_rate": 0.00015171774525772592, "loss": 1.6582, "step": 2425 }, { "epoch": 0.34751468271021346, "grad_norm": 1.3604944944381714, "learning_rate": 0.00015167802885725687, "loss": 1.4076, "step": 2426 }, { "epoch": 0.34765792866351525, "grad_norm": 1.4667576551437378, "learning_rate": 0.0001516383013317871, "loss": 1.6372, "step": 2427 }, { "epoch": 0.3478011746168171, "grad_norm": 1.068772554397583, "learning_rate": 0.00015159856268986887, "loss": 1.5354, "step": 2428 }, { "epoch": 0.3479444205701189, "grad_norm": 1.1650193929672241, "learning_rate": 0.00015155881294005697, "loss": 1.3784, "step": 2429 }, { "epoch": 0.3480876665234207, "grad_norm": 1.2051031589508057, "learning_rate": 0.00015151905209090854, "loss": 1.4646, "step": 2430 }, { "epoch": 0.34823091247672255, "grad_norm": 1.2185115814208984, "learning_rate": 0.0001514792801509831, "loss": 1.442, "step": 2431 }, { "epoch": 0.34837415843002434, "grad_norm": 1.1507916450500488, "learning_rate": 0.00015143949712884252, "loss": 1.452, "step": 2432 }, { "epoch": 0.3485174043833262, "grad_norm": 1.469565510749817, "learning_rate": 0.00015139970303305119, "loss": 1.332, "step": 2433 }, { "epoch": 0.34866065033662796, "grad_norm": 0.9992859959602356, "learning_rate": 0.00015135989787217567, "loss": 1.3623, "step": 2434 }, { "epoch": 0.3488038962899298, "grad_norm": 1.0058226585388184, "learning_rate": 0.00015132008165478516, "loss": 1.5755, "step": 2435 }, { "epoch": 0.34894714224323164, "grad_norm": 1.126875400543213, "learning_rate": 0.00015128025438945102, "loss": 1.5735, "step": 2436 }, { "epoch": 0.34909038819653343, "grad_norm": 1.0908129215240479, "learning_rate": 0.0001512404160847471, "loss": 1.3855, "step": 2437 }, { "epoch": 0.34923363414983527, "grad_norm": 1.0196985006332397, "learning_rate": 0.0001512005667492496, "loss": 1.4619, "step": 2438 }, { "epoch": 0.3493768801031371, "grad_norm": 1.063112497329712, "learning_rate": 0.0001511607063915371, "loss": 1.5497, "step": 2439 }, { "epoch": 0.3495201260564389, "grad_norm": 1.0817198753356934, "learning_rate": 0.00015112083502019056, "loss": 1.5627, "step": 2440 }, { "epoch": 0.34966337200974074, "grad_norm": 1.2257436513900757, "learning_rate": 0.00015108095264379325, "loss": 1.3234, "step": 2441 }, { "epoch": 0.3498066179630425, "grad_norm": 1.5121076107025146, "learning_rate": 0.00015104105927093092, "loss": 1.494, "step": 2442 }, { "epoch": 0.34994986391634436, "grad_norm": 1.0976884365081787, "learning_rate": 0.0001510011549101916, "loss": 1.6262, "step": 2443 }, { "epoch": 0.3500931098696462, "grad_norm": 1.332228660583496, "learning_rate": 0.00015096123957016565, "loss": 1.517, "step": 2444 }, { "epoch": 0.350236355822948, "grad_norm": 1.1686046123504639, "learning_rate": 0.000150921313259446, "loss": 1.432, "step": 2445 }, { "epoch": 0.3503796017762498, "grad_norm": 1.118184208869934, "learning_rate": 0.0001508813759866277, "loss": 1.4186, "step": 2446 }, { "epoch": 0.35052284772955167, "grad_norm": 1.0070140361785889, "learning_rate": 0.00015084142776030824, "loss": 1.419, "step": 2447 }, { "epoch": 0.35066609368285345, "grad_norm": 1.191916584968567, "learning_rate": 0.0001508014685890875, "loss": 1.6369, "step": 2448 }, { "epoch": 0.3508093396361553, "grad_norm": 1.3602490425109863, "learning_rate": 0.00015076149848156775, "loss": 1.4541, "step": 2449 }, { "epoch": 0.3509525855894571, "grad_norm": 1.035728931427002, "learning_rate": 0.00015072151744635352, "loss": 1.4169, "step": 2450 }, { "epoch": 0.3510958315427589, "grad_norm": 1.021276831626892, "learning_rate": 0.00015068152549205173, "loss": 1.4165, "step": 2451 }, { "epoch": 0.35123907749606076, "grad_norm": 1.0635027885437012, "learning_rate": 0.0001506415226272717, "loss": 1.4157, "step": 2452 }, { "epoch": 0.35138232344936254, "grad_norm": 0.9754464626312256, "learning_rate": 0.000150601508860625, "loss": 1.3942, "step": 2453 }, { "epoch": 0.3515255694026644, "grad_norm": 1.062987208366394, "learning_rate": 0.00015056148420072564, "loss": 1.3717, "step": 2454 }, { "epoch": 0.35166881535596617, "grad_norm": 1.1465121507644653, "learning_rate": 0.00015052144865618995, "loss": 1.6084, "step": 2455 }, { "epoch": 0.351812061309268, "grad_norm": 1.3296527862548828, "learning_rate": 0.0001504814022356366, "loss": 1.7109, "step": 2456 }, { "epoch": 0.35195530726256985, "grad_norm": 1.081697702407837, "learning_rate": 0.0001504413449476865, "loss": 1.6029, "step": 2457 }, { "epoch": 0.35209855321587163, "grad_norm": 1.0676558017730713, "learning_rate": 0.00015040127680096313, "loss": 1.4338, "step": 2458 }, { "epoch": 0.3522417991691735, "grad_norm": 1.273602843284607, "learning_rate": 0.00015036119780409207, "loss": 1.4245, "step": 2459 }, { "epoch": 0.3523850451224753, "grad_norm": 1.1675504446029663, "learning_rate": 0.00015032110796570137, "loss": 1.2969, "step": 2460 }, { "epoch": 0.3525282910757771, "grad_norm": 1.0443973541259766, "learning_rate": 0.00015028100729442138, "loss": 1.403, "step": 2461 }, { "epoch": 0.35267153702907894, "grad_norm": 1.1359124183654785, "learning_rate": 0.00015024089579888478, "loss": 1.608, "step": 2462 }, { "epoch": 0.3528147829823807, "grad_norm": 1.2274936437606812, "learning_rate": 0.0001502007734877266, "loss": 1.2706, "step": 2463 }, { "epoch": 0.35295802893568257, "grad_norm": 1.1904631853103638, "learning_rate": 0.00015016064036958414, "loss": 1.5274, "step": 2464 }, { "epoch": 0.3531012748889844, "grad_norm": 1.1383012533187866, "learning_rate": 0.00015012049645309712, "loss": 1.3954, "step": 2465 }, { "epoch": 0.3532445208422862, "grad_norm": 1.0985225439071655, "learning_rate": 0.0001500803417469075, "loss": 1.5561, "step": 2466 }, { "epoch": 0.35338776679558803, "grad_norm": 1.1519122123718262, "learning_rate": 0.00015004017625965958, "loss": 1.4451, "step": 2467 }, { "epoch": 0.3535310127488898, "grad_norm": 1.2340168952941895, "learning_rate": 0.00015000000000000001, "loss": 1.6654, "step": 2468 }, { "epoch": 0.35367425870219166, "grad_norm": 1.180052638053894, "learning_rate": 0.00014995981297657776, "loss": 1.4239, "step": 2469 }, { "epoch": 0.3538175046554935, "grad_norm": 1.0578117370605469, "learning_rate": 0.00014991961519804408, "loss": 1.7077, "step": 2470 }, { "epoch": 0.3539607506087953, "grad_norm": 1.0340454578399658, "learning_rate": 0.00014987940667305258, "loss": 1.2842, "step": 2471 }, { "epoch": 0.3541039965620971, "grad_norm": 1.10843026638031, "learning_rate": 0.00014983918741025916, "loss": 1.4239, "step": 2472 }, { "epoch": 0.35424724251539896, "grad_norm": 1.1356840133666992, "learning_rate": 0.00014979895741832198, "loss": 1.6478, "step": 2473 }, { "epoch": 0.35439048846870075, "grad_norm": 1.3381547927856445, "learning_rate": 0.00014975871670590163, "loss": 1.5453, "step": 2474 }, { "epoch": 0.3545337344220026, "grad_norm": 1.352461576461792, "learning_rate": 0.0001497184652816609, "loss": 1.4486, "step": 2475 }, { "epoch": 0.3546769803753044, "grad_norm": 1.6107097864151, "learning_rate": 0.0001496782031542649, "loss": 1.7178, "step": 2476 }, { "epoch": 0.3548202263286062, "grad_norm": 1.3607009649276733, "learning_rate": 0.0001496379303323812, "loss": 1.4537, "step": 2477 }, { "epoch": 0.35496347228190805, "grad_norm": 1.397679090499878, "learning_rate": 0.00014959764682467933, "loss": 1.5663, "step": 2478 }, { "epoch": 0.35510671823520984, "grad_norm": 1.0773042440414429, "learning_rate": 0.00014955735263983154, "loss": 1.5157, "step": 2479 }, { "epoch": 0.3552499641885117, "grad_norm": 1.7587699890136719, "learning_rate": 0.00014951704778651202, "loss": 1.5582, "step": 2480 }, { "epoch": 0.3553932101418135, "grad_norm": 1.1093618869781494, "learning_rate": 0.00014947673227339755, "loss": 1.4858, "step": 2481 }, { "epoch": 0.3555364560951153, "grad_norm": 1.3127498626708984, "learning_rate": 0.00014943640610916688, "loss": 1.4025, "step": 2482 }, { "epoch": 0.35567970204841715, "grad_norm": 1.0097037553787231, "learning_rate": 0.00014939606930250142, "loss": 1.4505, "step": 2483 }, { "epoch": 0.35582294800171893, "grad_norm": 1.1122691631317139, "learning_rate": 0.00014935572186208456, "loss": 1.5633, "step": 2484 }, { "epoch": 0.35596619395502077, "grad_norm": 1.183367371559143, "learning_rate": 0.00014931536379660213, "loss": 1.4499, "step": 2485 }, { "epoch": 0.3561094399083226, "grad_norm": 1.1898260116577148, "learning_rate": 0.00014927499511474228, "loss": 1.4613, "step": 2486 }, { "epoch": 0.3562526858616244, "grad_norm": 1.1713939905166626, "learning_rate": 0.00014923461582519532, "loss": 1.3852, "step": 2487 }, { "epoch": 0.35639593181492624, "grad_norm": 1.0173311233520508, "learning_rate": 0.00014919422593665397, "loss": 1.5917, "step": 2488 }, { "epoch": 0.356539177768228, "grad_norm": 1.647491216659546, "learning_rate": 0.00014915382545781315, "loss": 1.4458, "step": 2489 }, { "epoch": 0.35668242372152986, "grad_norm": 1.0861961841583252, "learning_rate": 0.00014911341439737002, "loss": 1.5451, "step": 2490 }, { "epoch": 0.3568256696748317, "grad_norm": 1.187209129333496, "learning_rate": 0.00014907299276402418, "loss": 1.417, "step": 2491 }, { "epoch": 0.3569689156281335, "grad_norm": 1.0657665729522705, "learning_rate": 0.00014903256056647736, "loss": 1.4407, "step": 2492 }, { "epoch": 0.35711216158143533, "grad_norm": 1.3011682033538818, "learning_rate": 0.0001489921178134336, "loss": 1.4028, "step": 2493 }, { "epoch": 0.35725540753473717, "grad_norm": 0.9900391101837158, "learning_rate": 0.0001489516645135993, "loss": 1.4448, "step": 2494 }, { "epoch": 0.35739865348803895, "grad_norm": 1.2444653511047363, "learning_rate": 0.00014891120067568294, "loss": 1.509, "step": 2495 }, { "epoch": 0.3575418994413408, "grad_norm": 0.9945913553237915, "learning_rate": 0.00014887072630839546, "loss": 1.5719, "step": 2496 }, { "epoch": 0.3576851453946426, "grad_norm": 1.2441489696502686, "learning_rate": 0.00014883024142044995, "loss": 1.4101, "step": 2497 }, { "epoch": 0.3578283913479444, "grad_norm": 1.2306331396102905, "learning_rate": 0.00014878974602056181, "loss": 1.2832, "step": 2498 }, { "epoch": 0.35797163730124626, "grad_norm": 1.2081427574157715, "learning_rate": 0.00014874924011744876, "loss": 1.5041, "step": 2499 }, { "epoch": 0.35811488325454804, "grad_norm": 1.2065150737762451, "learning_rate": 0.00014870872371983062, "loss": 1.4311, "step": 2500 }, { "epoch": 0.3582581292078499, "grad_norm": 1.1523321866989136, "learning_rate": 0.00014866819683642966, "loss": 1.2966, "step": 2501 }, { "epoch": 0.3584013751611517, "grad_norm": 1.0647544860839844, "learning_rate": 0.00014862765947597025, "loss": 1.5409, "step": 2502 }, { "epoch": 0.3585446211144535, "grad_norm": 1.2403420209884644, "learning_rate": 0.00014858711164717912, "loss": 1.596, "step": 2503 }, { "epoch": 0.35868786706775535, "grad_norm": 1.1628293991088867, "learning_rate": 0.00014854655335878517, "loss": 1.4383, "step": 2504 }, { "epoch": 0.35883111302105714, "grad_norm": 0.9906929135322571, "learning_rate": 0.00014850598461951963, "loss": 1.3916, "step": 2505 }, { "epoch": 0.358974358974359, "grad_norm": 1.183174729347229, "learning_rate": 0.00014846540543811596, "loss": 1.3881, "step": 2506 }, { "epoch": 0.3591176049276608, "grad_norm": 1.2935236692428589, "learning_rate": 0.0001484248158233098, "loss": 1.4778, "step": 2507 }, { "epoch": 0.3592608508809626, "grad_norm": 0.9641519784927368, "learning_rate": 0.00014838421578383914, "loss": 1.2966, "step": 2508 }, { "epoch": 0.35940409683426444, "grad_norm": 0.9788966178894043, "learning_rate": 0.00014834360532844413, "loss": 1.4717, "step": 2509 }, { "epoch": 0.3595473427875662, "grad_norm": 1.3540804386138916, "learning_rate": 0.0001483029844658672, "loss": 1.5376, "step": 2510 }, { "epoch": 0.35969058874086807, "grad_norm": 1.000067949295044, "learning_rate": 0.00014826235320485306, "loss": 1.617, "step": 2511 }, { "epoch": 0.3598338346941699, "grad_norm": 1.2987313270568848, "learning_rate": 0.00014822171155414856, "loss": 1.5106, "step": 2512 }, { "epoch": 0.3599770806474717, "grad_norm": 1.5515838861465454, "learning_rate": 0.00014818105952250292, "loss": 1.3537, "step": 2513 }, { "epoch": 0.36012032660077353, "grad_norm": 1.267909288406372, "learning_rate": 0.0001481403971186674, "loss": 1.4506, "step": 2514 }, { "epoch": 0.3602635725540754, "grad_norm": 1.2744228839874268, "learning_rate": 0.00014809972435139568, "loss": 1.3024, "step": 2515 }, { "epoch": 0.36040681850737716, "grad_norm": 1.0210374593734741, "learning_rate": 0.0001480590412294436, "loss": 1.6707, "step": 2516 }, { "epoch": 0.360550064460679, "grad_norm": 1.1282813549041748, "learning_rate": 0.00014801834776156925, "loss": 1.517, "step": 2517 }, { "epoch": 0.3606933104139808, "grad_norm": 1.2667559385299683, "learning_rate": 0.00014797764395653283, "loss": 1.3967, "step": 2518 }, { "epoch": 0.3608365563672826, "grad_norm": 1.1080670356750488, "learning_rate": 0.000147936929823097, "loss": 1.4713, "step": 2519 }, { "epoch": 0.36097980232058446, "grad_norm": 1.3098253011703491, "learning_rate": 0.00014789620537002639, "loss": 1.5435, "step": 2520 }, { "epoch": 0.36112304827388625, "grad_norm": 1.2551429271697998, "learning_rate": 0.000147855470606088, "loss": 1.506, "step": 2521 }, { "epoch": 0.3612662942271881, "grad_norm": 1.1395729780197144, "learning_rate": 0.00014781472554005107, "loss": 1.5119, "step": 2522 }, { "epoch": 0.3614095401804899, "grad_norm": 1.1035698652267456, "learning_rate": 0.0001477739701806869, "loss": 1.3335, "step": 2523 }, { "epoch": 0.3615527861337917, "grad_norm": 1.1796032190322876, "learning_rate": 0.00014773320453676924, "loss": 1.4708, "step": 2524 }, { "epoch": 0.36169603208709356, "grad_norm": 1.3056159019470215, "learning_rate": 0.00014769242861707382, "loss": 1.5562, "step": 2525 }, { "epoch": 0.36183927804039534, "grad_norm": 0.9874420762062073, "learning_rate": 0.00014765164243037875, "loss": 1.4837, "step": 2526 }, { "epoch": 0.3619825239936972, "grad_norm": 1.363956093788147, "learning_rate": 0.0001476108459854642, "loss": 1.5417, "step": 2527 }, { "epoch": 0.362125769946999, "grad_norm": 1.66774320602417, "learning_rate": 0.00014757003929111276, "loss": 1.5354, "step": 2528 }, { "epoch": 0.3622690159003008, "grad_norm": 1.3750470876693726, "learning_rate": 0.000147529222356109, "loss": 1.4314, "step": 2529 }, { "epoch": 0.36241226185360265, "grad_norm": 1.2411657571792603, "learning_rate": 0.00014748839518923985, "loss": 1.5396, "step": 2530 }, { "epoch": 0.36255550780690443, "grad_norm": 1.1171464920043945, "learning_rate": 0.00014744755779929437, "loss": 1.3958, "step": 2531 }, { "epoch": 0.36269875376020627, "grad_norm": 1.0497206449508667, "learning_rate": 0.00014740671019506383, "loss": 1.5383, "step": 2532 }, { "epoch": 0.3628419997135081, "grad_norm": 0.9318369030952454, "learning_rate": 0.00014736585238534172, "loss": 1.4715, "step": 2533 }, { "epoch": 0.3629852456668099, "grad_norm": 1.3428465127944946, "learning_rate": 0.00014732498437892373, "loss": 1.6655, "step": 2534 }, { "epoch": 0.36312849162011174, "grad_norm": 1.0421596765518188, "learning_rate": 0.0001472841061846077, "loss": 1.5001, "step": 2535 }, { "epoch": 0.3632717375734136, "grad_norm": 1.2710673809051514, "learning_rate": 0.0001472432178111937, "loss": 1.5641, "step": 2536 }, { "epoch": 0.36341498352671536, "grad_norm": 1.126789927482605, "learning_rate": 0.000147202319267484, "loss": 1.5096, "step": 2537 }, { "epoch": 0.3635582294800172, "grad_norm": 1.4331297874450684, "learning_rate": 0.00014716141056228305, "loss": 1.3318, "step": 2538 }, { "epoch": 0.363701475433319, "grad_norm": 1.083577275276184, "learning_rate": 0.00014712049170439748, "loss": 1.4214, "step": 2539 }, { "epoch": 0.36384472138662083, "grad_norm": 1.1746078729629517, "learning_rate": 0.00014707956270263605, "loss": 1.5188, "step": 2540 }, { "epoch": 0.36398796733992267, "grad_norm": 1.186699628829956, "learning_rate": 0.00014703862356580985, "loss": 1.4362, "step": 2541 }, { "epoch": 0.36413121329322445, "grad_norm": 1.1415461301803589, "learning_rate": 0.000146997674302732, "loss": 1.4489, "step": 2542 }, { "epoch": 0.3642744592465263, "grad_norm": 1.063730239868164, "learning_rate": 0.00014695671492221792, "loss": 1.539, "step": 2543 }, { "epoch": 0.3644177051998281, "grad_norm": 1.0581134557724, "learning_rate": 0.0001469157454330851, "loss": 1.3613, "step": 2544 }, { "epoch": 0.3645609511531299, "grad_norm": 1.1400113105773926, "learning_rate": 0.00014687476584415325, "loss": 1.6471, "step": 2545 }, { "epoch": 0.36470419710643176, "grad_norm": 1.0558629035949707, "learning_rate": 0.00014683377616424428, "loss": 1.3941, "step": 2546 }, { "epoch": 0.36484744305973354, "grad_norm": 1.0809000730514526, "learning_rate": 0.0001467927764021823, "loss": 1.2795, "step": 2547 }, { "epoch": 0.3649906890130354, "grad_norm": 1.088512659072876, "learning_rate": 0.00014675176656679345, "loss": 1.5487, "step": 2548 }, { "epoch": 0.3651339349663372, "grad_norm": 1.121283769607544, "learning_rate": 0.0001467107466669062, "loss": 1.4535, "step": 2549 }, { "epoch": 0.365277180919639, "grad_norm": 1.162358283996582, "learning_rate": 0.00014666971671135112, "loss": 1.3388, "step": 2550 }, { "epoch": 0.36542042687294085, "grad_norm": 1.0518362522125244, "learning_rate": 0.00014662867670896094, "loss": 1.6026, "step": 2551 }, { "epoch": 0.36556367282624264, "grad_norm": 1.0328153371810913, "learning_rate": 0.00014658762666857052, "loss": 1.5295, "step": 2552 }, { "epoch": 0.3657069187795445, "grad_norm": 1.1687570810317993, "learning_rate": 0.00014654656659901695, "loss": 1.4954, "step": 2553 }, { "epoch": 0.3658501647328463, "grad_norm": 1.1708500385284424, "learning_rate": 0.00014650549650913945, "loss": 1.5348, "step": 2554 }, { "epoch": 0.3659934106861481, "grad_norm": 1.1635791063308716, "learning_rate": 0.00014646441640777936, "loss": 1.4963, "step": 2555 }, { "epoch": 0.36613665663944994, "grad_norm": 1.6929184198379517, "learning_rate": 0.0001464233263037803, "loss": 1.4126, "step": 2556 }, { "epoch": 0.3662799025927517, "grad_norm": 1.0210154056549072, "learning_rate": 0.00014638222620598777, "loss": 1.4841, "step": 2557 }, { "epoch": 0.36642314854605357, "grad_norm": 1.0416018962860107, "learning_rate": 0.00014634111612324982, "loss": 1.524, "step": 2558 }, { "epoch": 0.3665663944993554, "grad_norm": 0.9904972910881042, "learning_rate": 0.0001462999960644163, "loss": 1.4398, "step": 2559 }, { "epoch": 0.3667096404526572, "grad_norm": 1.1696679592132568, "learning_rate": 0.00014625886603833937, "loss": 1.7013, "step": 2560 }, { "epoch": 0.36685288640595903, "grad_norm": 1.2703205347061157, "learning_rate": 0.0001462177260538733, "loss": 1.4032, "step": 2561 }, { "epoch": 0.3669961323592609, "grad_norm": 1.2331905364990234, "learning_rate": 0.00014617657611987455, "loss": 1.5215, "step": 2562 }, { "epoch": 0.36713937831256266, "grad_norm": 0.9010087251663208, "learning_rate": 0.00014613541624520165, "loss": 1.41, "step": 2563 }, { "epoch": 0.3672826242658645, "grad_norm": 1.2480542659759521, "learning_rate": 0.0001460942464387153, "loss": 1.3595, "step": 2564 }, { "epoch": 0.3674258702191663, "grad_norm": 1.2442294359207153, "learning_rate": 0.0001460530667092783, "loss": 1.5293, "step": 2565 }, { "epoch": 0.3675691161724681, "grad_norm": 1.25730562210083, "learning_rate": 0.00014601187706575572, "loss": 1.4981, "step": 2566 }, { "epoch": 0.36771236212576996, "grad_norm": 1.1515611410140991, "learning_rate": 0.00014597067751701465, "loss": 1.6066, "step": 2567 }, { "epoch": 0.36785560807907175, "grad_norm": 1.016032099723816, "learning_rate": 0.00014592946807192426, "loss": 1.4767, "step": 2568 }, { "epoch": 0.3679988540323736, "grad_norm": 0.9360166192054749, "learning_rate": 0.000145888248739356, "loss": 1.4088, "step": 2569 }, { "epoch": 0.36814209998567543, "grad_norm": 1.203100562095642, "learning_rate": 0.00014584701952818333, "loss": 1.4096, "step": 2570 }, { "epoch": 0.3682853459389772, "grad_norm": 0.9581813216209412, "learning_rate": 0.00014580578044728188, "loss": 1.5297, "step": 2571 }, { "epoch": 0.36842859189227906, "grad_norm": 1.2191156148910522, "learning_rate": 0.00014576453150552947, "loss": 1.2974, "step": 2572 }, { "epoch": 0.36857183784558084, "grad_norm": 1.1174956560134888, "learning_rate": 0.00014572327271180586, "loss": 1.4474, "step": 2573 }, { "epoch": 0.3687150837988827, "grad_norm": 0.9789025783538818, "learning_rate": 0.00014568200407499314, "loss": 1.4183, "step": 2574 }, { "epoch": 0.3688583297521845, "grad_norm": 1.4054831266403198, "learning_rate": 0.00014564072560397542, "loss": 1.5604, "step": 2575 }, { "epoch": 0.3690015757054863, "grad_norm": 1.24605131149292, "learning_rate": 0.00014559943730763889, "loss": 1.4583, "step": 2576 }, { "epoch": 0.36914482165878815, "grad_norm": 1.1597728729248047, "learning_rate": 0.0001455581391948719, "loss": 1.4862, "step": 2577 }, { "epoch": 0.36928806761208993, "grad_norm": 1.1954950094223022, "learning_rate": 0.00014551683127456494, "loss": 1.4137, "step": 2578 }, { "epoch": 0.3694313135653918, "grad_norm": 1.4966473579406738, "learning_rate": 0.0001454755135556106, "loss": 1.4503, "step": 2579 }, { "epoch": 0.3695745595186936, "grad_norm": 1.1068466901779175, "learning_rate": 0.0001454341860469035, "loss": 1.3949, "step": 2580 }, { "epoch": 0.3697178054719954, "grad_norm": 0.9411541819572449, "learning_rate": 0.0001453928487573405, "loss": 1.401, "step": 2581 }, { "epoch": 0.36986105142529724, "grad_norm": 1.118491530418396, "learning_rate": 0.0001453515016958204, "loss": 1.2846, "step": 2582 }, { "epoch": 0.3700042973785991, "grad_norm": 1.0983659029006958, "learning_rate": 0.00014531014487124432, "loss": 1.4528, "step": 2583 }, { "epoch": 0.37014754333190086, "grad_norm": 1.2989587783813477, "learning_rate": 0.00014526877829251528, "loss": 1.4982, "step": 2584 }, { "epoch": 0.3702907892852027, "grad_norm": 1.1976028680801392, "learning_rate": 0.00014522740196853853, "loss": 1.5517, "step": 2585 }, { "epoch": 0.3704340352385045, "grad_norm": 1.0420176982879639, "learning_rate": 0.0001451860159082213, "loss": 1.3698, "step": 2586 }, { "epoch": 0.37057728119180633, "grad_norm": 1.1175856590270996, "learning_rate": 0.00014514462012047306, "loss": 1.5734, "step": 2587 }, { "epoch": 0.37072052714510817, "grad_norm": 1.1818602085113525, "learning_rate": 0.00014510321461420523, "loss": 1.5215, "step": 2588 }, { "epoch": 0.37086377309840995, "grad_norm": 1.0060656070709229, "learning_rate": 0.00014506179939833142, "loss": 1.629, "step": 2589 }, { "epoch": 0.3710070190517118, "grad_norm": 1.1051430702209473, "learning_rate": 0.00014502037448176734, "loss": 1.2696, "step": 2590 }, { "epoch": 0.37115026500501364, "grad_norm": 1.2394700050354004, "learning_rate": 0.0001449789398734307, "loss": 1.4668, "step": 2591 }, { "epoch": 0.3712935109583154, "grad_norm": 1.0987330675125122, "learning_rate": 0.00014493749558224138, "loss": 1.5635, "step": 2592 }, { "epoch": 0.37143675691161726, "grad_norm": 1.19431471824646, "learning_rate": 0.00014489604161712128, "loss": 1.4333, "step": 2593 }, { "epoch": 0.37158000286491905, "grad_norm": 1.1408723592758179, "learning_rate": 0.0001448545779869944, "loss": 1.536, "step": 2594 }, { "epoch": 0.3717232488182209, "grad_norm": 1.090216875076294, "learning_rate": 0.00014481310470078687, "loss": 1.4401, "step": 2595 }, { "epoch": 0.3718664947715227, "grad_norm": 1.2622264623641968, "learning_rate": 0.00014477162176742688, "loss": 1.5633, "step": 2596 }, { "epoch": 0.3720097407248245, "grad_norm": 1.4285918474197388, "learning_rate": 0.00014473012919584462, "loss": 1.761, "step": 2597 }, { "epoch": 0.37215298667812635, "grad_norm": 1.0353248119354248, "learning_rate": 0.00014468862699497243, "loss": 1.5627, "step": 2598 }, { "epoch": 0.37229623263142814, "grad_norm": 1.1372559070587158, "learning_rate": 0.00014464711517374475, "loss": 1.4426, "step": 2599 }, { "epoch": 0.37243947858473, "grad_norm": 1.1176289319992065, "learning_rate": 0.000144605593741098, "loss": 1.4568, "step": 2600 }, { "epoch": 0.3725827245380318, "grad_norm": 1.2003538608551025, "learning_rate": 0.00014456406270597073, "loss": 1.5814, "step": 2601 }, { "epoch": 0.3727259704913336, "grad_norm": 1.1750000715255737, "learning_rate": 0.00014452252207730354, "loss": 1.3707, "step": 2602 }, { "epoch": 0.37286921644463544, "grad_norm": 1.5065418481826782, "learning_rate": 0.00014448097186403914, "loss": 1.4056, "step": 2603 }, { "epoch": 0.3730124623979373, "grad_norm": 1.2554476261138916, "learning_rate": 0.0001444394120751222, "loss": 1.4975, "step": 2604 }, { "epoch": 0.37315570835123907, "grad_norm": 1.2604297399520874, "learning_rate": 0.0001443978427194996, "loss": 1.4896, "step": 2605 }, { "epoch": 0.3732989543045409, "grad_norm": 1.3805670738220215, "learning_rate": 0.0001443562638061201, "loss": 1.3894, "step": 2606 }, { "epoch": 0.3734422002578427, "grad_norm": 1.1768583059310913, "learning_rate": 0.00014431467534393463, "loss": 1.4104, "step": 2607 }, { "epoch": 0.37358544621114453, "grad_norm": 1.0680559873580933, "learning_rate": 0.0001442730773418962, "loss": 1.4938, "step": 2608 }, { "epoch": 0.3737286921644464, "grad_norm": 1.3203309774398804, "learning_rate": 0.0001442314698089598, "loss": 1.4271, "step": 2609 }, { "epoch": 0.37387193811774816, "grad_norm": 1.951980710029602, "learning_rate": 0.00014418985275408254, "loss": 1.3663, "step": 2610 }, { "epoch": 0.37401518407105, "grad_norm": 1.2458083629608154, "learning_rate": 0.00014414822618622345, "loss": 1.5576, "step": 2611 }, { "epoch": 0.3741584300243518, "grad_norm": 1.2693766355514526, "learning_rate": 0.00014410659011434383, "loss": 1.4721, "step": 2612 }, { "epoch": 0.3743016759776536, "grad_norm": 1.5349161624908447, "learning_rate": 0.00014406494454740677, "loss": 1.4824, "step": 2613 }, { "epoch": 0.37444492193095547, "grad_norm": 1.2987388372421265, "learning_rate": 0.0001440232894943776, "loss": 1.3801, "step": 2614 }, { "epoch": 0.37458816788425725, "grad_norm": 1.2265496253967285, "learning_rate": 0.00014398162496422363, "loss": 1.4193, "step": 2615 }, { "epoch": 0.3747314138375591, "grad_norm": 1.193305253982544, "learning_rate": 0.00014393995096591416, "loss": 1.5396, "step": 2616 }, { "epoch": 0.37487465979086093, "grad_norm": 1.2383296489715576, "learning_rate": 0.0001438982675084206, "loss": 1.3536, "step": 2617 }, { "epoch": 0.3750179057441627, "grad_norm": 1.393020510673523, "learning_rate": 0.00014385657460071639, "loss": 1.7502, "step": 2618 }, { "epoch": 0.37516115169746456, "grad_norm": 0.9689300656318665, "learning_rate": 0.0001438148722517769, "loss": 1.3756, "step": 2619 }, { "epoch": 0.37530439765076634, "grad_norm": 1.2859368324279785, "learning_rate": 0.0001437731604705797, "loss": 1.281, "step": 2620 }, { "epoch": 0.3754476436040682, "grad_norm": 1.1940879821777344, "learning_rate": 0.00014373143926610425, "loss": 1.4497, "step": 2621 }, { "epoch": 0.37559088955737, "grad_norm": 1.3047678470611572, "learning_rate": 0.0001436897086473321, "loss": 1.6069, "step": 2622 }, { "epoch": 0.3757341355106718, "grad_norm": 1.130306601524353, "learning_rate": 0.00014364796862324685, "loss": 1.443, "step": 2623 }, { "epoch": 0.37587738146397365, "grad_norm": 1.0784518718719482, "learning_rate": 0.00014360621920283406, "loss": 1.6053, "step": 2624 }, { "epoch": 0.3760206274172755, "grad_norm": 1.1287773847579956, "learning_rate": 0.00014356446039508138, "loss": 1.4548, "step": 2625 }, { "epoch": 0.3761638733705773, "grad_norm": 1.3351603746414185, "learning_rate": 0.00014352269220897844, "loss": 1.488, "step": 2626 }, { "epoch": 0.3763071193238791, "grad_norm": 1.167651891708374, "learning_rate": 0.00014348091465351683, "loss": 1.5562, "step": 2627 }, { "epoch": 0.3764503652771809, "grad_norm": 1.1213951110839844, "learning_rate": 0.00014343912773769036, "loss": 1.5517, "step": 2628 }, { "epoch": 0.37659361123048274, "grad_norm": 1.0198639631271362, "learning_rate": 0.00014339733147049458, "loss": 1.5404, "step": 2629 }, { "epoch": 0.3767368571837846, "grad_norm": 1.0821881294250488, "learning_rate": 0.0001433555258609273, "loss": 1.5992, "step": 2630 }, { "epoch": 0.37688010313708636, "grad_norm": 1.0415178537368774, "learning_rate": 0.0001433137109179881, "loss": 1.3492, "step": 2631 }, { "epoch": 0.3770233490903882, "grad_norm": 1.2015984058380127, "learning_rate": 0.00014327188665067887, "loss": 1.5104, "step": 2632 }, { "epoch": 0.37716659504369, "grad_norm": 1.0819928646087646, "learning_rate": 0.00014323005306800322, "loss": 1.4553, "step": 2633 }, { "epoch": 0.37730984099699183, "grad_norm": 1.2092911005020142, "learning_rate": 0.00014318821017896693, "loss": 1.6452, "step": 2634 }, { "epoch": 0.37745308695029367, "grad_norm": 1.185867190361023, "learning_rate": 0.00014314635799257775, "loss": 1.466, "step": 2635 }, { "epoch": 0.37759633290359546, "grad_norm": 1.0372024774551392, "learning_rate": 0.00014310449651784536, "loss": 1.5949, "step": 2636 }, { "epoch": 0.3777395788568973, "grad_norm": 1.2412039041519165, "learning_rate": 0.00014306262576378157, "loss": 1.4369, "step": 2637 }, { "epoch": 0.37788282481019914, "grad_norm": 0.9821012020111084, "learning_rate": 0.00014302074573940008, "loss": 1.62, "step": 2638 }, { "epoch": 0.3780260707635009, "grad_norm": 0.9968156218528748, "learning_rate": 0.00014297885645371663, "loss": 1.4888, "step": 2639 }, { "epoch": 0.37816931671680276, "grad_norm": 1.2157645225524902, "learning_rate": 0.00014293695791574895, "loss": 1.4829, "step": 2640 }, { "epoch": 0.37831256267010455, "grad_norm": 1.0411347150802612, "learning_rate": 0.00014289505013451677, "loss": 1.4413, "step": 2641 }, { "epoch": 0.3784558086234064, "grad_norm": 1.2033705711364746, "learning_rate": 0.00014285313311904177, "loss": 1.4252, "step": 2642 }, { "epoch": 0.3785990545767082, "grad_norm": 1.2810757160186768, "learning_rate": 0.00014281120687834764, "loss": 1.446, "step": 2643 }, { "epoch": 0.37874230053001, "grad_norm": 1.0567219257354736, "learning_rate": 0.0001427692714214601, "loss": 1.3891, "step": 2644 }, { "epoch": 0.37888554648331185, "grad_norm": 1.2776429653167725, "learning_rate": 0.0001427273267574068, "loss": 1.4431, "step": 2645 }, { "epoch": 0.3790287924366137, "grad_norm": 1.1671308279037476, "learning_rate": 0.0001426853728952174, "loss": 1.4963, "step": 2646 }, { "epoch": 0.3791720383899155, "grad_norm": 1.1742963790893555, "learning_rate": 0.0001426434098439235, "loss": 1.525, "step": 2647 }, { "epoch": 0.3793152843432173, "grad_norm": 1.1647059917449951, "learning_rate": 0.0001426014376125587, "loss": 1.4975, "step": 2648 }, { "epoch": 0.3794585302965191, "grad_norm": 1.1187360286712646, "learning_rate": 0.00014255945621015863, "loss": 1.5707, "step": 2649 }, { "epoch": 0.37960177624982094, "grad_norm": 1.0042479038238525, "learning_rate": 0.00014251746564576082, "loss": 1.2743, "step": 2650 }, { "epoch": 0.3797450222031228, "grad_norm": 1.220226526260376, "learning_rate": 0.0001424754659284048, "loss": 1.3941, "step": 2651 }, { "epoch": 0.37988826815642457, "grad_norm": 1.3280271291732788, "learning_rate": 0.00014243345706713205, "loss": 1.4091, "step": 2652 }, { "epoch": 0.3800315141097264, "grad_norm": 1.0047911405563354, "learning_rate": 0.0001423914390709861, "loss": 1.5594, "step": 2653 }, { "epoch": 0.3801747600630282, "grad_norm": 1.0218576192855835, "learning_rate": 0.0001423494119490123, "loss": 1.4555, "step": 2654 }, { "epoch": 0.38031800601633003, "grad_norm": 1.0564790964126587, "learning_rate": 0.00014230737571025812, "loss": 1.4583, "step": 2655 }, { "epoch": 0.3804612519696319, "grad_norm": 1.1990478038787842, "learning_rate": 0.00014226533036377286, "loss": 1.4324, "step": 2656 }, { "epoch": 0.38060449792293366, "grad_norm": 1.4129847288131714, "learning_rate": 0.00014222327591860792, "loss": 1.425, "step": 2657 }, { "epoch": 0.3807477438762355, "grad_norm": 1.1036990880966187, "learning_rate": 0.00014218121238381652, "loss": 1.4147, "step": 2658 }, { "epoch": 0.38089098982953734, "grad_norm": 1.2164376974105835, "learning_rate": 0.0001421391397684539, "loss": 1.6163, "step": 2659 }, { "epoch": 0.3810342357828391, "grad_norm": 1.1472569704055786, "learning_rate": 0.00014209705808157733, "loss": 1.5706, "step": 2660 }, { "epoch": 0.38117748173614097, "grad_norm": 1.2388029098510742, "learning_rate": 0.00014205496733224582, "loss": 1.6083, "step": 2661 }, { "epoch": 0.38132072768944275, "grad_norm": 1.1360584497451782, "learning_rate": 0.00014201286752952056, "loss": 1.473, "step": 2662 }, { "epoch": 0.3814639736427446, "grad_norm": 1.0844684839248657, "learning_rate": 0.00014197075868246461, "loss": 1.6469, "step": 2663 }, { "epoch": 0.38160721959604643, "grad_norm": 1.1765862703323364, "learning_rate": 0.0001419286408001429, "loss": 1.5417, "step": 2664 }, { "epoch": 0.3817504655493482, "grad_norm": 1.2477549314498901, "learning_rate": 0.00014188651389162243, "loss": 1.5455, "step": 2665 }, { "epoch": 0.38189371150265006, "grad_norm": 1.0923058986663818, "learning_rate": 0.00014184437796597202, "loss": 1.4605, "step": 2666 }, { "epoch": 0.38203695745595184, "grad_norm": 1.0809216499328613, "learning_rate": 0.00014180223303226255, "loss": 1.5313, "step": 2667 }, { "epoch": 0.3821802034092537, "grad_norm": 1.3436511754989624, "learning_rate": 0.0001417600790995667, "loss": 1.4136, "step": 2668 }, { "epoch": 0.3823234493625555, "grad_norm": 1.1448014974594116, "learning_rate": 0.00014171791617695927, "loss": 1.4709, "step": 2669 }, { "epoch": 0.3824666953158573, "grad_norm": 1.2089442014694214, "learning_rate": 0.00014167574427351683, "loss": 1.5446, "step": 2670 }, { "epoch": 0.38260994126915915, "grad_norm": 1.187323808670044, "learning_rate": 0.00014163356339831797, "loss": 1.405, "step": 2671 }, { "epoch": 0.382753187222461, "grad_norm": 1.1194077730178833, "learning_rate": 0.00014159137356044318, "loss": 1.5474, "step": 2672 }, { "epoch": 0.3828964331757628, "grad_norm": 1.3586173057556152, "learning_rate": 0.00014154917476897486, "loss": 1.4409, "step": 2673 }, { "epoch": 0.3830396791290646, "grad_norm": 1.0654659271240234, "learning_rate": 0.00014150696703299744, "loss": 1.5945, "step": 2674 }, { "epoch": 0.3831829250823664, "grad_norm": 1.0216246843338013, "learning_rate": 0.00014146475036159713, "loss": 1.6109, "step": 2675 }, { "epoch": 0.38332617103566824, "grad_norm": 1.2388262748718262, "learning_rate": 0.00014142252476386218, "loss": 1.4851, "step": 2676 }, { "epoch": 0.3834694169889701, "grad_norm": 1.1507729291915894, "learning_rate": 0.00014138029024888263, "loss": 1.426, "step": 2677 }, { "epoch": 0.38361266294227186, "grad_norm": 1.1322814226150513, "learning_rate": 0.00014133804682575068, "loss": 1.3933, "step": 2678 }, { "epoch": 0.3837559088955737, "grad_norm": 1.1495423316955566, "learning_rate": 0.00014129579450356016, "loss": 1.6722, "step": 2679 }, { "epoch": 0.38389915484887555, "grad_norm": 1.1246495246887207, "learning_rate": 0.00014125353329140703, "loss": 1.6087, "step": 2680 }, { "epoch": 0.38404240080217733, "grad_norm": 1.2214932441711426, "learning_rate": 0.000141211263198389, "loss": 1.5044, "step": 2681 }, { "epoch": 0.38418564675547917, "grad_norm": 1.299994707107544, "learning_rate": 0.00014116898423360586, "loss": 1.4857, "step": 2682 }, { "epoch": 0.38432889270878096, "grad_norm": 1.3046157360076904, "learning_rate": 0.00014112669640615918, "loss": 1.4781, "step": 2683 }, { "epoch": 0.3844721386620828, "grad_norm": 1.2488771677017212, "learning_rate": 0.00014108439972515248, "loss": 1.424, "step": 2684 }, { "epoch": 0.38461538461538464, "grad_norm": 0.9880814552307129, "learning_rate": 0.00014104209419969123, "loss": 1.5098, "step": 2685 }, { "epoch": 0.3847586305686864, "grad_norm": 1.3499400615692139, "learning_rate": 0.00014099977983888267, "loss": 1.4428, "step": 2686 }, { "epoch": 0.38490187652198826, "grad_norm": 1.1573457717895508, "learning_rate": 0.00014095745665183618, "loss": 1.4268, "step": 2687 }, { "epoch": 0.38504512247529005, "grad_norm": 0.9957888722419739, "learning_rate": 0.00014091512464766277, "loss": 1.467, "step": 2688 }, { "epoch": 0.3851883684285919, "grad_norm": 1.0542409420013428, "learning_rate": 0.00014087278383547553, "loss": 1.4327, "step": 2689 }, { "epoch": 0.38533161438189373, "grad_norm": 1.0902760028839111, "learning_rate": 0.00014083043422438935, "loss": 1.4391, "step": 2690 }, { "epoch": 0.3854748603351955, "grad_norm": 1.2940832376480103, "learning_rate": 0.00014078807582352108, "loss": 1.4816, "step": 2691 }, { "epoch": 0.38561810628849735, "grad_norm": 1.0089707374572754, "learning_rate": 0.00014074570864198947, "loss": 1.6002, "step": 2692 }, { "epoch": 0.3857613522417992, "grad_norm": 1.2270383834838867, "learning_rate": 0.00014070333268891504, "loss": 1.462, "step": 2693 }, { "epoch": 0.385904598195101, "grad_norm": 1.410273790359497, "learning_rate": 0.00014066094797342036, "loss": 1.5282, "step": 2694 }, { "epoch": 0.3860478441484028, "grad_norm": 1.1045509576797485, "learning_rate": 0.00014061855450462978, "loss": 1.5639, "step": 2695 }, { "epoch": 0.3861910901017046, "grad_norm": 1.2309067249298096, "learning_rate": 0.0001405761522916696, "loss": 1.4809, "step": 2696 }, { "epoch": 0.38633433605500644, "grad_norm": 1.1455552577972412, "learning_rate": 0.00014053374134366788, "loss": 1.6551, "step": 2697 }, { "epoch": 0.3864775820083083, "grad_norm": 1.0640569925308228, "learning_rate": 0.0001404913216697547, "loss": 1.5257, "step": 2698 }, { "epoch": 0.38662082796161007, "grad_norm": 1.031901478767395, "learning_rate": 0.00014044889327906202, "loss": 1.5718, "step": 2699 }, { "epoch": 0.3867640739149119, "grad_norm": 1.2474766969680786, "learning_rate": 0.00014040645618072355, "loss": 1.5011, "step": 2700 }, { "epoch": 0.38690731986821375, "grad_norm": 1.0588314533233643, "learning_rate": 0.00014036401038387497, "loss": 1.4851, "step": 2701 }, { "epoch": 0.38705056582151554, "grad_norm": 1.0032662153244019, "learning_rate": 0.00014032155589765378, "loss": 1.2493, "step": 2702 }, { "epoch": 0.3871938117748174, "grad_norm": 0.9487358331680298, "learning_rate": 0.00014027909273119944, "loss": 1.5521, "step": 2703 }, { "epoch": 0.38733705772811916, "grad_norm": 1.2969071865081787, "learning_rate": 0.00014023662089365319, "loss": 1.5437, "step": 2704 }, { "epoch": 0.387480303681421, "grad_norm": 1.1035399436950684, "learning_rate": 0.00014019414039415817, "loss": 1.2714, "step": 2705 }, { "epoch": 0.38762354963472284, "grad_norm": 1.3056426048278809, "learning_rate": 0.00014015165124185933, "loss": 1.4855, "step": 2706 }, { "epoch": 0.3877667955880246, "grad_norm": 1.2600029706954956, "learning_rate": 0.00014010915344590363, "loss": 1.5154, "step": 2707 }, { "epoch": 0.38791004154132647, "grad_norm": 1.292824149131775, "learning_rate": 0.0001400666470154397, "loss": 1.4218, "step": 2708 }, { "epoch": 0.38805328749462825, "grad_norm": 1.2237238883972168, "learning_rate": 0.00014002413195961819, "loss": 1.4724, "step": 2709 }, { "epoch": 0.3881965334479301, "grad_norm": 1.0942076444625854, "learning_rate": 0.0001399816082875915, "loss": 1.5634, "step": 2710 }, { "epoch": 0.38833977940123193, "grad_norm": 0.8648032546043396, "learning_rate": 0.0001399390760085139, "loss": 1.5449, "step": 2711 }, { "epoch": 0.3884830253545337, "grad_norm": 1.2516201734542847, "learning_rate": 0.00013989653513154165, "loss": 1.4152, "step": 2712 }, { "epoch": 0.38862627130783556, "grad_norm": 1.1037263870239258, "learning_rate": 0.00013985398566583262, "loss": 1.3311, "step": 2713 }, { "epoch": 0.3887695172611374, "grad_norm": 1.2341102361679077, "learning_rate": 0.00013981142762054674, "loss": 1.4219, "step": 2714 }, { "epoch": 0.3889127632144392, "grad_norm": 1.0416769981384277, "learning_rate": 0.00013976886100484562, "loss": 1.4991, "step": 2715 }, { "epoch": 0.389056009167741, "grad_norm": 1.2499676942825317, "learning_rate": 0.00013972628582789294, "loss": 1.4584, "step": 2716 }, { "epoch": 0.3891992551210428, "grad_norm": 1.236547827720642, "learning_rate": 0.00013968370209885392, "loss": 1.5598, "step": 2717 }, { "epoch": 0.38934250107434465, "grad_norm": 1.2368247509002686, "learning_rate": 0.0001396411098268959, "loss": 1.1851, "step": 2718 }, { "epoch": 0.3894857470276465, "grad_norm": 1.2517056465148926, "learning_rate": 0.00013959850902118786, "loss": 1.5141, "step": 2719 }, { "epoch": 0.3896289929809483, "grad_norm": 1.1641813516616821, "learning_rate": 0.00013955589969090075, "loss": 1.5539, "step": 2720 }, { "epoch": 0.3897722389342501, "grad_norm": 1.1909139156341553, "learning_rate": 0.00013951328184520732, "loss": 1.5473, "step": 2721 }, { "epoch": 0.3899154848875519, "grad_norm": 1.1649895906448364, "learning_rate": 0.0001394706554932821, "loss": 1.5972, "step": 2722 }, { "epoch": 0.39005873084085374, "grad_norm": 1.102317214012146, "learning_rate": 0.00013942802064430146, "loss": 1.339, "step": 2723 }, { "epoch": 0.3902019767941556, "grad_norm": 1.0654678344726562, "learning_rate": 0.0001393853773074437, "loss": 1.4308, "step": 2724 }, { "epoch": 0.39034522274745737, "grad_norm": 0.9474630951881409, "learning_rate": 0.00013934272549188888, "loss": 1.4505, "step": 2725 }, { "epoch": 0.3904884687007592, "grad_norm": 1.0712337493896484, "learning_rate": 0.0001393000652068188, "loss": 1.5036, "step": 2726 }, { "epoch": 0.39063171465406105, "grad_norm": 1.5540616512298584, "learning_rate": 0.0001392573964614172, "loss": 1.5744, "step": 2727 }, { "epoch": 0.39077496060736283, "grad_norm": 1.0921162366867065, "learning_rate": 0.00013921471926486961, "loss": 1.5447, "step": 2728 }, { "epoch": 0.39091820656066467, "grad_norm": 1.1279165744781494, "learning_rate": 0.0001391720336263634, "loss": 1.3181, "step": 2729 }, { "epoch": 0.39106145251396646, "grad_norm": 1.3597286939620972, "learning_rate": 0.0001391293395550877, "loss": 1.4979, "step": 2730 }, { "epoch": 0.3912046984672683, "grad_norm": 1.1407291889190674, "learning_rate": 0.00013908663706023347, "loss": 1.4425, "step": 2731 }, { "epoch": 0.39134794442057014, "grad_norm": 1.2837620973587036, "learning_rate": 0.00013904392615099356, "loss": 1.4059, "step": 2732 }, { "epoch": 0.3914911903738719, "grad_norm": 1.135113000869751, "learning_rate": 0.00013900120683656253, "loss": 1.2906, "step": 2733 }, { "epoch": 0.39163443632717376, "grad_norm": 1.1434237957000732, "learning_rate": 0.00013895847912613678, "loss": 1.524, "step": 2734 }, { "epoch": 0.3917776822804756, "grad_norm": 1.270049810409546, "learning_rate": 0.00013891574302891458, "loss": 1.4419, "step": 2735 }, { "epoch": 0.3919209282337774, "grad_norm": 1.1081808805465698, "learning_rate": 0.00013887299855409586, "loss": 1.3389, "step": 2736 }, { "epoch": 0.39206417418707923, "grad_norm": 1.391874074935913, "learning_rate": 0.00013883024571088257, "loss": 1.5145, "step": 2737 }, { "epoch": 0.392207420140381, "grad_norm": 1.163817048072815, "learning_rate": 0.00013878748450847826, "loss": 1.484, "step": 2738 }, { "epoch": 0.39235066609368285, "grad_norm": 1.0876201391220093, "learning_rate": 0.00013874471495608836, "loss": 1.6888, "step": 2739 }, { "epoch": 0.3924939120469847, "grad_norm": 1.198694109916687, "learning_rate": 0.00013870193706292012, "loss": 1.5966, "step": 2740 }, { "epoch": 0.3926371580002865, "grad_norm": 1.1250633001327515, "learning_rate": 0.00013865915083818256, "loss": 1.5093, "step": 2741 }, { "epoch": 0.3927804039535883, "grad_norm": 1.0257309675216675, "learning_rate": 0.0001386163562910865, "loss": 1.5567, "step": 2742 }, { "epoch": 0.3929236499068901, "grad_norm": 1.318430781364441, "learning_rate": 0.00013857355343084452, "loss": 1.3583, "step": 2743 }, { "epoch": 0.39306689586019194, "grad_norm": 1.1563712358474731, "learning_rate": 0.00013853074226667102, "loss": 1.6061, "step": 2744 }, { "epoch": 0.3932101418134938, "grad_norm": 1.361019253730774, "learning_rate": 0.00013848792280778222, "loss": 1.539, "step": 2745 }, { "epoch": 0.39335338776679557, "grad_norm": 1.1432775259017944, "learning_rate": 0.0001384450950633961, "loss": 1.644, "step": 2746 }, { "epoch": 0.3934966337200974, "grad_norm": 1.2742453813552856, "learning_rate": 0.00013840225904273234, "loss": 1.4264, "step": 2747 }, { "epoch": 0.39363987967339925, "grad_norm": 1.1689114570617676, "learning_rate": 0.00013835941475501251, "loss": 1.5092, "step": 2748 }, { "epoch": 0.39378312562670104, "grad_norm": 1.050249695777893, "learning_rate": 0.00013831656220945993, "loss": 1.5075, "step": 2749 }, { "epoch": 0.3939263715800029, "grad_norm": 1.2299082279205322, "learning_rate": 0.0001382737014152997, "loss": 1.5348, "step": 2750 }, { "epoch": 0.39406961753330466, "grad_norm": 1.2148571014404297, "learning_rate": 0.00013823083238175872, "loss": 1.5532, "step": 2751 }, { "epoch": 0.3942128634866065, "grad_norm": 1.1775099039077759, "learning_rate": 0.00013818795511806554, "loss": 1.447, "step": 2752 }, { "epoch": 0.39435610943990834, "grad_norm": 1.3566728830337524, "learning_rate": 0.00013814506963345067, "loss": 1.7176, "step": 2753 }, { "epoch": 0.3944993553932101, "grad_norm": 1.0904375314712524, "learning_rate": 0.00013810217593714623, "loss": 1.4391, "step": 2754 }, { "epoch": 0.39464260134651197, "grad_norm": 1.279283046722412, "learning_rate": 0.00013805927403838622, "loss": 1.3852, "step": 2755 }, { "epoch": 0.3947858472998138, "grad_norm": 1.1559151411056519, "learning_rate": 0.00013801636394640627, "loss": 1.462, "step": 2756 }, { "epoch": 0.3949290932531156, "grad_norm": 1.0611779689788818, "learning_rate": 0.00013797344567044396, "loss": 1.5412, "step": 2757 }, { "epoch": 0.39507233920641743, "grad_norm": 1.0106325149536133, "learning_rate": 0.00013793051921973852, "loss": 1.4866, "step": 2758 }, { "epoch": 0.3952155851597192, "grad_norm": 1.2036898136138916, "learning_rate": 0.00013788758460353087, "loss": 1.5536, "step": 2759 }, { "epoch": 0.39535883111302106, "grad_norm": 0.9703605771064758, "learning_rate": 0.00013784464183106389, "loss": 1.3454, "step": 2760 }, { "epoch": 0.3955020770663229, "grad_norm": 1.1918504238128662, "learning_rate": 0.00013780169091158197, "loss": 1.3637, "step": 2761 }, { "epoch": 0.3956453230196247, "grad_norm": 1.1605980396270752, "learning_rate": 0.0001377587318543315, "loss": 1.5529, "step": 2762 }, { "epoch": 0.3957885689729265, "grad_norm": 1.1891136169433594, "learning_rate": 0.0001377157646685604, "loss": 1.5186, "step": 2763 }, { "epoch": 0.3959318149262283, "grad_norm": 1.5366371870040894, "learning_rate": 0.00013767278936351854, "loss": 1.4772, "step": 2764 }, { "epoch": 0.39607506087953015, "grad_norm": 1.002192497253418, "learning_rate": 0.0001376298059484573, "loss": 1.5432, "step": 2765 }, { "epoch": 0.396218306832832, "grad_norm": 1.192426323890686, "learning_rate": 0.00013758681443263012, "loss": 1.6059, "step": 2766 }, { "epoch": 0.3963615527861338, "grad_norm": 1.2909477949142456, "learning_rate": 0.00013754381482529188, "loss": 1.7818, "step": 2767 }, { "epoch": 0.3965047987394356, "grad_norm": 1.05674147605896, "learning_rate": 0.0001375008071356994, "loss": 1.4633, "step": 2768 }, { "epoch": 0.39664804469273746, "grad_norm": 0.9963230490684509, "learning_rate": 0.0001374577913731111, "loss": 1.4343, "step": 2769 }, { "epoch": 0.39679129064603924, "grad_norm": 1.683857798576355, "learning_rate": 0.0001374147675467873, "loss": 1.5223, "step": 2770 }, { "epoch": 0.3969345365993411, "grad_norm": 1.2410809993743896, "learning_rate": 0.00013737173566598991, "loss": 1.4896, "step": 2771 }, { "epoch": 0.39707778255264287, "grad_norm": 1.0263532400131226, "learning_rate": 0.00013732869573998262, "loss": 1.4911, "step": 2772 }, { "epoch": 0.3972210285059447, "grad_norm": 1.4428693056106567, "learning_rate": 0.00013728564777803088, "loss": 1.5923, "step": 2773 }, { "epoch": 0.39736427445924655, "grad_norm": 1.254270315170288, "learning_rate": 0.00013724259178940184, "loss": 1.5254, "step": 2774 }, { "epoch": 0.39750752041254833, "grad_norm": 1.5237646102905273, "learning_rate": 0.00013719952778336442, "loss": 1.5527, "step": 2775 }, { "epoch": 0.3976507663658502, "grad_norm": 1.165190577507019, "learning_rate": 0.0001371564557691892, "loss": 1.4487, "step": 2776 }, { "epoch": 0.39779401231915196, "grad_norm": 1.0827488899230957, "learning_rate": 0.00013711337575614857, "loss": 1.299, "step": 2777 }, { "epoch": 0.3979372582724538, "grad_norm": 0.9943476319313049, "learning_rate": 0.0001370702877535165, "loss": 1.7679, "step": 2778 }, { "epoch": 0.39808050422575564, "grad_norm": 1.1672650575637817, "learning_rate": 0.00013702719177056884, "loss": 1.4542, "step": 2779 }, { "epoch": 0.3982237501790574, "grad_norm": 1.4412822723388672, "learning_rate": 0.0001369840878165831, "loss": 1.4738, "step": 2780 }, { "epoch": 0.39836699613235926, "grad_norm": 1.2942067384719849, "learning_rate": 0.00013694097590083844, "loss": 1.5741, "step": 2781 }, { "epoch": 0.3985102420856611, "grad_norm": 1.1723740100860596, "learning_rate": 0.00013689785603261583, "loss": 1.5107, "step": 2782 }, { "epoch": 0.3986534880389629, "grad_norm": 1.210911750793457, "learning_rate": 0.00013685472822119786, "loss": 1.5528, "step": 2783 }, { "epoch": 0.39879673399226473, "grad_norm": 1.1275266408920288, "learning_rate": 0.00013681159247586896, "loss": 1.5861, "step": 2784 }, { "epoch": 0.3989399799455665, "grad_norm": 1.0115891695022583, "learning_rate": 0.00013676844880591512, "loss": 1.5642, "step": 2785 }, { "epoch": 0.39908322589886835, "grad_norm": 1.149572730064392, "learning_rate": 0.00013672529722062415, "loss": 1.5964, "step": 2786 }, { "epoch": 0.3992264718521702, "grad_norm": 1.2612196207046509, "learning_rate": 0.0001366821377292855, "loss": 1.6142, "step": 2787 }, { "epoch": 0.399369717805472, "grad_norm": 1.1390632390975952, "learning_rate": 0.0001366389703411903, "loss": 1.6679, "step": 2788 }, { "epoch": 0.3995129637587738, "grad_norm": 1.201200246810913, "learning_rate": 0.0001365957950656315, "loss": 1.5567, "step": 2789 }, { "epoch": 0.39965620971207566, "grad_norm": 0.9184206128120422, "learning_rate": 0.0001365526119119036, "loss": 1.4596, "step": 2790 }, { "epoch": 0.39979945566537745, "grad_norm": 1.334398865699768, "learning_rate": 0.00013650942088930295, "loss": 1.3837, "step": 2791 }, { "epoch": 0.3999427016186793, "grad_norm": 1.2922738790512085, "learning_rate": 0.00013646622200712738, "loss": 1.3105, "step": 2792 }, { "epoch": 0.40008594757198107, "grad_norm": 1.221731424331665, "learning_rate": 0.00013642301527467664, "loss": 1.5434, "step": 2793 }, { "epoch": 0.4002291935252829, "grad_norm": 1.124998688697815, "learning_rate": 0.00013637980070125205, "loss": 1.3795, "step": 2794 }, { "epoch": 0.40037243947858475, "grad_norm": 1.1205888986587524, "learning_rate": 0.0001363365782961566, "loss": 1.6927, "step": 2795 }, { "epoch": 0.40051568543188654, "grad_norm": 0.9571004509925842, "learning_rate": 0.00013629334806869507, "loss": 1.3598, "step": 2796 }, { "epoch": 0.4006589313851884, "grad_norm": 1.3338227272033691, "learning_rate": 0.0001362501100281738, "loss": 1.6704, "step": 2797 }, { "epoch": 0.40080217733849016, "grad_norm": 1.53742253780365, "learning_rate": 0.0001362068641839009, "loss": 1.4185, "step": 2798 }, { "epoch": 0.400945423291792, "grad_norm": 1.1568914651870728, "learning_rate": 0.0001361636105451861, "loss": 1.5731, "step": 2799 }, { "epoch": 0.40108866924509384, "grad_norm": 1.1020174026489258, "learning_rate": 0.0001361203491213409, "loss": 1.4061, "step": 2800 }, { "epoch": 0.40123191519839563, "grad_norm": 1.5683600902557373, "learning_rate": 0.00013607707992167834, "loss": 1.5755, "step": 2801 }, { "epoch": 0.40137516115169747, "grad_norm": 1.140917420387268, "learning_rate": 0.00013603380295551328, "loss": 1.3123, "step": 2802 }, { "epoch": 0.4015184071049993, "grad_norm": 1.1981101036071777, "learning_rate": 0.0001359905182321621, "loss": 1.4814, "step": 2803 }, { "epoch": 0.4016616530583011, "grad_norm": 1.4439482688903809, "learning_rate": 0.00013594722576094296, "loss": 1.6232, "step": 2804 }, { "epoch": 0.40180489901160293, "grad_norm": 1.0367356538772583, "learning_rate": 0.00013590392555117573, "loss": 1.4473, "step": 2805 }, { "epoch": 0.4019481449649047, "grad_norm": 1.1641727685928345, "learning_rate": 0.00013586061761218176, "loss": 1.57, "step": 2806 }, { "epoch": 0.40209139091820656, "grad_norm": 1.055579423904419, "learning_rate": 0.00013581730195328425, "loss": 1.5654, "step": 2807 }, { "epoch": 0.4022346368715084, "grad_norm": 1.215779185295105, "learning_rate": 0.00013577397858380798, "loss": 1.4089, "step": 2808 }, { "epoch": 0.4023778828248102, "grad_norm": 1.1298054456710815, "learning_rate": 0.0001357306475130794, "loss": 1.4163, "step": 2809 }, { "epoch": 0.402521128778112, "grad_norm": 1.2456693649291992, "learning_rate": 0.00013568730875042654, "loss": 1.5579, "step": 2810 }, { "epoch": 0.4026643747314138, "grad_norm": 1.0602993965148926, "learning_rate": 0.0001356439623051793, "loss": 1.538, "step": 2811 }, { "epoch": 0.40280762068471565, "grad_norm": 1.2081440687179565, "learning_rate": 0.000135600608186669, "loss": 1.5203, "step": 2812 }, { "epoch": 0.4029508666380175, "grad_norm": 1.0199353694915771, "learning_rate": 0.00013555724640422874, "loss": 1.5697, "step": 2813 }, { "epoch": 0.4030941125913193, "grad_norm": 1.031089186668396, "learning_rate": 0.00013551387696719326, "loss": 1.7821, "step": 2814 }, { "epoch": 0.4032373585446211, "grad_norm": 1.2243852615356445, "learning_rate": 0.00013547049988489883, "loss": 1.5586, "step": 2815 }, { "epoch": 0.40338060449792296, "grad_norm": 1.2231223583221436, "learning_rate": 0.0001354271151666836, "loss": 1.6282, "step": 2816 }, { "epoch": 0.40352385045122474, "grad_norm": 1.099962830543518, "learning_rate": 0.00013538372282188713, "loss": 1.4472, "step": 2817 }, { "epoch": 0.4036670964045266, "grad_norm": 1.4149789810180664, "learning_rate": 0.0001353403228598508, "loss": 1.5027, "step": 2818 }, { "epoch": 0.40381034235782837, "grad_norm": 1.2322527170181274, "learning_rate": 0.00013529691528991743, "loss": 1.4613, "step": 2819 }, { "epoch": 0.4039535883111302, "grad_norm": 1.1616967916488647, "learning_rate": 0.00013525350012143168, "loss": 1.4583, "step": 2820 }, { "epoch": 0.40409683426443205, "grad_norm": 1.1408765316009521, "learning_rate": 0.00013521007736373974, "loss": 1.5123, "step": 2821 }, { "epoch": 0.40424008021773383, "grad_norm": 1.0059444904327393, "learning_rate": 0.00013516664702618948, "loss": 1.5454, "step": 2822 }, { "epoch": 0.4043833261710357, "grad_norm": 1.0549180507659912, "learning_rate": 0.0001351232091181303, "loss": 1.4207, "step": 2823 }, { "epoch": 0.4045265721243375, "grad_norm": 1.1092208623886108, "learning_rate": 0.0001350797636489134, "loss": 1.5176, "step": 2824 }, { "epoch": 0.4046698180776393, "grad_norm": 0.9712927341461182, "learning_rate": 0.0001350363106278915, "loss": 1.3275, "step": 2825 }, { "epoch": 0.40481306403094114, "grad_norm": 1.1474981307983398, "learning_rate": 0.00013499285006441888, "loss": 1.5541, "step": 2826 }, { "epoch": 0.4049563099842429, "grad_norm": 1.2580596208572388, "learning_rate": 0.00013494938196785162, "loss": 1.5888, "step": 2827 }, { "epoch": 0.40509955593754476, "grad_norm": 1.1927697658538818, "learning_rate": 0.00013490590634754728, "loss": 1.4205, "step": 2828 }, { "epoch": 0.4052428018908466, "grad_norm": 1.2242077589035034, "learning_rate": 0.00013486242321286506, "loss": 1.416, "step": 2829 }, { "epoch": 0.4053860478441484, "grad_norm": 0.9283555150032043, "learning_rate": 0.0001348189325731659, "loss": 1.5341, "step": 2830 }, { "epoch": 0.40552929379745023, "grad_norm": 1.2833658456802368, "learning_rate": 0.0001347754344378121, "loss": 1.5718, "step": 2831 }, { "epoch": 0.405672539750752, "grad_norm": 1.2399424314498901, "learning_rate": 0.0001347319288161679, "loss": 1.2715, "step": 2832 }, { "epoch": 0.40581578570405386, "grad_norm": 1.542478322982788, "learning_rate": 0.00013468841571759888, "loss": 1.367, "step": 2833 }, { "epoch": 0.4059590316573557, "grad_norm": 1.0797381401062012, "learning_rate": 0.00013464489515147238, "loss": 1.6221, "step": 2834 }, { "epoch": 0.4061022776106575, "grad_norm": 1.3266401290893555, "learning_rate": 0.00013460136712715724, "loss": 1.5181, "step": 2835 }, { "epoch": 0.4062455235639593, "grad_norm": 1.2950903177261353, "learning_rate": 0.00013455783165402404, "loss": 1.3957, "step": 2836 }, { "epoch": 0.40638876951726116, "grad_norm": 1.0217623710632324, "learning_rate": 0.00013451428874144484, "loss": 1.4634, "step": 2837 }, { "epoch": 0.40653201547056295, "grad_norm": 1.4090055227279663, "learning_rate": 0.0001344707383987934, "loss": 1.4121, "step": 2838 }, { "epoch": 0.4066752614238648, "grad_norm": 1.1282591819763184, "learning_rate": 0.000134427180635445, "loss": 1.4668, "step": 2839 }, { "epoch": 0.40681850737716657, "grad_norm": 1.1637719869613647, "learning_rate": 0.0001343836154607765, "loss": 1.6252, "step": 2840 }, { "epoch": 0.4069617533304684, "grad_norm": 1.1857134103775024, "learning_rate": 0.0001343400428841665, "loss": 1.5508, "step": 2841 }, { "epoch": 0.40710499928377025, "grad_norm": 1.2307132482528687, "learning_rate": 0.00013429646291499502, "loss": 1.4364, "step": 2842 }, { "epoch": 0.40724824523707204, "grad_norm": 1.22222900390625, "learning_rate": 0.00013425287556264383, "loss": 1.5867, "step": 2843 }, { "epoch": 0.4073914911903739, "grad_norm": 1.130599021911621, "learning_rate": 0.00013420928083649608, "loss": 1.5419, "step": 2844 }, { "epoch": 0.4075347371436757, "grad_norm": 0.8783159852027893, "learning_rate": 0.0001341656787459368, "loss": 1.4019, "step": 2845 }, { "epoch": 0.4076779830969775, "grad_norm": 1.3458658456802368, "learning_rate": 0.0001341220693003523, "loss": 1.4081, "step": 2846 }, { "epoch": 0.40782122905027934, "grad_norm": 1.0724071264266968, "learning_rate": 0.00013407845250913066, "loss": 1.5155, "step": 2847 }, { "epoch": 0.40796447500358113, "grad_norm": 1.3692351579666138, "learning_rate": 0.00013403482838166155, "loss": 1.6292, "step": 2848 }, { "epoch": 0.40810772095688297, "grad_norm": 1.0729633569717407, "learning_rate": 0.00013399119692733612, "loss": 1.5675, "step": 2849 }, { "epoch": 0.4082509669101848, "grad_norm": 1.2195243835449219, "learning_rate": 0.00013394755815554713, "loss": 1.5134, "step": 2850 }, { "epoch": 0.4083942128634866, "grad_norm": 1.230333685874939, "learning_rate": 0.0001339039120756889, "loss": 1.6348, "step": 2851 }, { "epoch": 0.40853745881678843, "grad_norm": 1.08534574508667, "learning_rate": 0.00013386025869715744, "loss": 1.3647, "step": 2852 }, { "epoch": 0.4086807047700902, "grad_norm": 1.1792153120040894, "learning_rate": 0.00013381659802935017, "loss": 1.5104, "step": 2853 }, { "epoch": 0.40882395072339206, "grad_norm": 1.105631947517395, "learning_rate": 0.0001337729300816662, "loss": 1.3031, "step": 2854 }, { "epoch": 0.4089671966766939, "grad_norm": 1.062870979309082, "learning_rate": 0.0001337292548635061, "loss": 1.5053, "step": 2855 }, { "epoch": 0.4091104426299957, "grad_norm": 1.1856683492660522, "learning_rate": 0.00013368557238427212, "loss": 1.5948, "step": 2856 }, { "epoch": 0.4092536885832975, "grad_norm": 1.2329175472259521, "learning_rate": 0.000133641882653368, "loss": 1.4225, "step": 2857 }, { "epoch": 0.40939693453659937, "grad_norm": 1.0203795433044434, "learning_rate": 0.00013359818568019904, "loss": 1.4716, "step": 2858 }, { "epoch": 0.40954018048990115, "grad_norm": 1.197117805480957, "learning_rate": 0.00013355448147417214, "loss": 1.5505, "step": 2859 }, { "epoch": 0.409683426443203, "grad_norm": 1.0218416452407837, "learning_rate": 0.0001335107700446957, "loss": 1.61, "step": 2860 }, { "epoch": 0.4098266723965048, "grad_norm": 1.2811695337295532, "learning_rate": 0.00013346705140117977, "loss": 1.5602, "step": 2861 }, { "epoch": 0.4099699183498066, "grad_norm": 1.0614163875579834, "learning_rate": 0.0001334233255530358, "loss": 1.3666, "step": 2862 }, { "epoch": 0.41011316430310846, "grad_norm": 1.3823022842407227, "learning_rate": 0.00013337959250967697, "loss": 1.5528, "step": 2863 }, { "epoch": 0.41025641025641024, "grad_norm": 1.2922849655151367, "learning_rate": 0.0001333358522805179, "loss": 1.4745, "step": 2864 }, { "epoch": 0.4103996562097121, "grad_norm": 1.0239113569259644, "learning_rate": 0.00013329210487497475, "loss": 1.5447, "step": 2865 }, { "epoch": 0.41054290216301387, "grad_norm": 1.2409710884094238, "learning_rate": 0.0001332483503024653, "loss": 1.4809, "step": 2866 }, { "epoch": 0.4106861481163157, "grad_norm": 1.1123075485229492, "learning_rate": 0.00013320458857240877, "loss": 1.4086, "step": 2867 }, { "epoch": 0.41082939406961755, "grad_norm": 1.0756897926330566, "learning_rate": 0.00013316081969422602, "loss": 1.4661, "step": 2868 }, { "epoch": 0.41097264002291933, "grad_norm": 1.0827782154083252, "learning_rate": 0.00013311704367733937, "loss": 1.4039, "step": 2869 }, { "epoch": 0.4111158859762212, "grad_norm": 1.4105809926986694, "learning_rate": 0.00013307326053117277, "loss": 1.725, "step": 2870 }, { "epoch": 0.411259131929523, "grad_norm": 1.0170164108276367, "learning_rate": 0.00013302947026515158, "loss": 1.6817, "step": 2871 }, { "epoch": 0.4114023778828248, "grad_norm": 1.0120043754577637, "learning_rate": 0.0001329856728887028, "loss": 1.4653, "step": 2872 }, { "epoch": 0.41154562383612664, "grad_norm": 1.180802583694458, "learning_rate": 0.00013294186841125493, "loss": 1.5243, "step": 2873 }, { "epoch": 0.4116888697894284, "grad_norm": 1.164393424987793, "learning_rate": 0.00013289805684223798, "loss": 1.4833, "step": 2874 }, { "epoch": 0.41183211574273026, "grad_norm": 1.2291781902313232, "learning_rate": 0.0001328542381910835, "loss": 1.3078, "step": 2875 }, { "epoch": 0.4119753616960321, "grad_norm": 1.0965158939361572, "learning_rate": 0.00013281041246722452, "loss": 1.4204, "step": 2876 }, { "epoch": 0.4121186076493339, "grad_norm": 1.106292724609375, "learning_rate": 0.00013276657968009568, "loss": 1.5608, "step": 2877 }, { "epoch": 0.41226185360263573, "grad_norm": 1.0221314430236816, "learning_rate": 0.0001327227398391331, "loss": 1.3565, "step": 2878 }, { "epoch": 0.41240509955593757, "grad_norm": 1.0669028759002686, "learning_rate": 0.00013267889295377438, "loss": 1.5007, "step": 2879 }, { "epoch": 0.41254834550923936, "grad_norm": 0.9647644758224487, "learning_rate": 0.00013263503903345868, "loss": 1.4506, "step": 2880 }, { "epoch": 0.4126915914625412, "grad_norm": 1.160662293434143, "learning_rate": 0.0001325911780876267, "loss": 1.4747, "step": 2881 }, { "epoch": 0.412834837415843, "grad_norm": 0.9538294076919556, "learning_rate": 0.00013254731012572052, "loss": 1.4954, "step": 2882 }, { "epoch": 0.4129780833691448, "grad_norm": 1.0596097707748413, "learning_rate": 0.00013250343515718393, "loss": 1.3582, "step": 2883 }, { "epoch": 0.41312132932244666, "grad_norm": 1.3182281255722046, "learning_rate": 0.0001324595531914621, "loss": 1.4293, "step": 2884 }, { "epoch": 0.41326457527574845, "grad_norm": 1.038257360458374, "learning_rate": 0.0001324156642380017, "loss": 1.4392, "step": 2885 }, { "epoch": 0.4134078212290503, "grad_norm": 1.4457060098648071, "learning_rate": 0.00013237176830625095, "loss": 1.5628, "step": 2886 }, { "epoch": 0.4135510671823521, "grad_norm": 1.463173270225525, "learning_rate": 0.00013232786540565955, "loss": 1.3942, "step": 2887 }, { "epoch": 0.4136943131356539, "grad_norm": 1.1232414245605469, "learning_rate": 0.00013228395554567874, "loss": 1.3162, "step": 2888 }, { "epoch": 0.41383755908895575, "grad_norm": 1.1720107793807983, "learning_rate": 0.00013224003873576123, "loss": 1.4886, "step": 2889 }, { "epoch": 0.41398080504225754, "grad_norm": 1.1019911766052246, "learning_rate": 0.00013219611498536114, "loss": 1.4, "step": 2890 }, { "epoch": 0.4141240509955594, "grad_norm": 1.223069667816162, "learning_rate": 0.00013215218430393425, "loss": 1.4969, "step": 2891 }, { "epoch": 0.4142672969488612, "grad_norm": 1.1943271160125732, "learning_rate": 0.0001321082467009377, "loss": 1.37, "step": 2892 }, { "epoch": 0.414410542902163, "grad_norm": 1.333351969718933, "learning_rate": 0.00013206430218583025, "loss": 1.6165, "step": 2893 }, { "epoch": 0.41455378885546484, "grad_norm": 1.1737723350524902, "learning_rate": 0.00013202035076807195, "loss": 1.3803, "step": 2894 }, { "epoch": 0.41469703480876663, "grad_norm": 1.2018879652023315, "learning_rate": 0.00013197639245712454, "loss": 1.4133, "step": 2895 }, { "epoch": 0.41484028076206847, "grad_norm": 1.1486068964004517, "learning_rate": 0.0001319324272624511, "loss": 1.4042, "step": 2896 }, { "epoch": 0.4149835267153703, "grad_norm": 1.0386711359024048, "learning_rate": 0.00013188845519351632, "loss": 1.281, "step": 2897 }, { "epoch": 0.4151267726686721, "grad_norm": 1.0844521522521973, "learning_rate": 0.0001318444762597862, "loss": 1.3715, "step": 2898 }, { "epoch": 0.41527001862197394, "grad_norm": 1.0879794359207153, "learning_rate": 0.0001318004904707284, "loss": 1.6486, "step": 2899 }, { "epoch": 0.4154132645752758, "grad_norm": 1.3134979009628296, "learning_rate": 0.00013175649783581195, "loss": 1.6096, "step": 2900 }, { "epoch": 0.41555651052857756, "grad_norm": 1.026983380317688, "learning_rate": 0.00013171249836450735, "loss": 1.5436, "step": 2901 }, { "epoch": 0.4156997564818794, "grad_norm": 1.0705406665802002, "learning_rate": 0.00013166849206628663, "loss": 1.5313, "step": 2902 }, { "epoch": 0.4158430024351812, "grad_norm": 0.9650298357009888, "learning_rate": 0.00013162447895062322, "loss": 1.576, "step": 2903 }, { "epoch": 0.415986248388483, "grad_norm": 1.2378631830215454, "learning_rate": 0.0001315804590269921, "loss": 1.6286, "step": 2904 }, { "epoch": 0.41612949434178487, "grad_norm": 1.1715314388275146, "learning_rate": 0.00013153643230486963, "loss": 1.6733, "step": 2905 }, { "epoch": 0.41627274029508665, "grad_norm": 1.0728809833526611, "learning_rate": 0.0001314923987937337, "loss": 1.6885, "step": 2906 }, { "epoch": 0.4164159862483885, "grad_norm": 1.325488805770874, "learning_rate": 0.0001314483585030636, "loss": 1.4412, "step": 2907 }, { "epoch": 0.4165592322016903, "grad_norm": 1.3094624280929565, "learning_rate": 0.00013140431144234018, "loss": 1.5664, "step": 2908 }, { "epoch": 0.4167024781549921, "grad_norm": 0.9077602624893188, "learning_rate": 0.00013136025762104563, "loss": 1.5567, "step": 2909 }, { "epoch": 0.41684572410829396, "grad_norm": 1.1055291891098022, "learning_rate": 0.00013131619704866363, "loss": 1.4093, "step": 2910 }, { "epoch": 0.41698897006159574, "grad_norm": 1.1394619941711426, "learning_rate": 0.0001312721297346794, "loss": 1.5167, "step": 2911 }, { "epoch": 0.4171322160148976, "grad_norm": 1.3825119733810425, "learning_rate": 0.00013122805568857948, "loss": 1.5573, "step": 2912 }, { "epoch": 0.4172754619681994, "grad_norm": 1.1434381008148193, "learning_rate": 0.00013118397491985198, "loss": 1.6236, "step": 2913 }, { "epoch": 0.4174187079215012, "grad_norm": 1.1055811643600464, "learning_rate": 0.00013113988743798628, "loss": 1.4474, "step": 2914 }, { "epoch": 0.41756195387480305, "grad_norm": 1.353328824043274, "learning_rate": 0.0001310957932524735, "loss": 1.6088, "step": 2915 }, { "epoch": 0.41770519982810483, "grad_norm": 1.4064003229141235, "learning_rate": 0.00013105169237280587, "loss": 1.4233, "step": 2916 }, { "epoch": 0.4178484457814067, "grad_norm": 1.3868550062179565, "learning_rate": 0.00013100758480847732, "loss": 1.6059, "step": 2917 }, { "epoch": 0.4179916917347085, "grad_norm": 1.0595946311950684, "learning_rate": 0.00013096347056898308, "loss": 1.3924, "step": 2918 }, { "epoch": 0.4181349376880103, "grad_norm": 1.1131072044372559, "learning_rate": 0.00013091934966381983, "loss": 1.4722, "step": 2919 }, { "epoch": 0.41827818364131214, "grad_norm": 1.1191039085388184, "learning_rate": 0.00013087522210248576, "loss": 1.5256, "step": 2920 }, { "epoch": 0.4184214295946139, "grad_norm": 1.276625633239746, "learning_rate": 0.00013083108789448038, "loss": 1.5901, "step": 2921 }, { "epoch": 0.41856467554791577, "grad_norm": 1.2246551513671875, "learning_rate": 0.00013078694704930473, "loss": 1.4468, "step": 2922 }, { "epoch": 0.4187079215012176, "grad_norm": 1.1267962455749512, "learning_rate": 0.00013074279957646126, "loss": 1.4579, "step": 2923 }, { "epoch": 0.4188511674545194, "grad_norm": 1.1189368963241577, "learning_rate": 0.0001306986454854538, "loss": 1.5885, "step": 2924 }, { "epoch": 0.41899441340782123, "grad_norm": 1.1947506666183472, "learning_rate": 0.00013065448478578764, "loss": 1.5625, "step": 2925 }, { "epoch": 0.41913765936112307, "grad_norm": 1.188049554824829, "learning_rate": 0.0001306103174869695, "loss": 1.4059, "step": 2926 }, { "epoch": 0.41928090531442486, "grad_norm": 1.0239427089691162, "learning_rate": 0.0001305661435985075, "loss": 1.3354, "step": 2927 }, { "epoch": 0.4194241512677267, "grad_norm": 1.0095629692077637, "learning_rate": 0.00013052196312991114, "loss": 1.4388, "step": 2928 }, { "epoch": 0.4195673972210285, "grad_norm": 1.10767662525177, "learning_rate": 0.00013047777609069146, "loss": 1.3824, "step": 2929 }, { "epoch": 0.4197106431743303, "grad_norm": 1.1147247552871704, "learning_rate": 0.00013043358249036077, "loss": 1.5709, "step": 2930 }, { "epoch": 0.41985388912763216, "grad_norm": 1.1097533702850342, "learning_rate": 0.00013038938233843287, "loss": 1.4984, "step": 2931 }, { "epoch": 0.41999713508093395, "grad_norm": 1.217271327972412, "learning_rate": 0.000130345175644423, "loss": 1.4566, "step": 2932 }, { "epoch": 0.4201403810342358, "grad_norm": 1.1242679357528687, "learning_rate": 0.00013030096241784772, "loss": 1.3825, "step": 2933 }, { "epoch": 0.42028362698753763, "grad_norm": 1.124475359916687, "learning_rate": 0.0001302567426682251, "loss": 1.3905, "step": 2934 }, { "epoch": 0.4204268729408394, "grad_norm": 1.1563829183578491, "learning_rate": 0.00013021251640507448, "loss": 1.6416, "step": 2935 }, { "epoch": 0.42057011889414125, "grad_norm": 1.035422444343567, "learning_rate": 0.00013016828363791673, "loss": 1.5769, "step": 2936 }, { "epoch": 0.42071336484744304, "grad_norm": 1.3420732021331787, "learning_rate": 0.00013012404437627407, "loss": 1.4956, "step": 2937 }, { "epoch": 0.4208566108007449, "grad_norm": 1.1856571435928345, "learning_rate": 0.00013007979862967014, "loss": 1.5406, "step": 2938 }, { "epoch": 0.4209998567540467, "grad_norm": 1.0544228553771973, "learning_rate": 0.00013003554640762986, "loss": 1.5423, "step": 2939 }, { "epoch": 0.4211431027073485, "grad_norm": 1.1093649864196777, "learning_rate": 0.00012999128771967977, "loss": 1.5924, "step": 2940 }, { "epoch": 0.42128634866065034, "grad_norm": 1.1809723377227783, "learning_rate": 0.00012994702257534756, "loss": 1.701, "step": 2941 }, { "epoch": 0.42142959461395213, "grad_norm": 1.278200626373291, "learning_rate": 0.00012990275098416246, "loss": 1.5239, "step": 2942 }, { "epoch": 0.42157284056725397, "grad_norm": 1.1760241985321045, "learning_rate": 0.00012985847295565508, "loss": 1.6247, "step": 2943 }, { "epoch": 0.4217160865205558, "grad_norm": 1.1853834390640259, "learning_rate": 0.00012981418849935733, "loss": 1.297, "step": 2944 }, { "epoch": 0.4218593324738576, "grad_norm": 1.072561264038086, "learning_rate": 0.0001297698976248026, "loss": 1.5129, "step": 2945 }, { "epoch": 0.42200257842715944, "grad_norm": 0.9901103973388672, "learning_rate": 0.00012972560034152562, "loss": 1.392, "step": 2946 }, { "epoch": 0.4221458243804613, "grad_norm": 1.2704654932022095, "learning_rate": 0.00012968129665906242, "loss": 1.5398, "step": 2947 }, { "epoch": 0.42228907033376306, "grad_norm": 1.2966411113739014, "learning_rate": 0.0001296369865869506, "loss": 1.503, "step": 2948 }, { "epoch": 0.4224323162870649, "grad_norm": 1.1061540842056274, "learning_rate": 0.00012959267013472892, "loss": 1.4872, "step": 2949 }, { "epoch": 0.4225755622403667, "grad_norm": 0.8887167572975159, "learning_rate": 0.00012954834731193775, "loss": 1.3417, "step": 2950 }, { "epoch": 0.4227188081936685, "grad_norm": 1.037367582321167, "learning_rate": 0.00012950401812811854, "loss": 1.3604, "step": 2951 }, { "epoch": 0.42286205414697037, "grad_norm": 1.0680986642837524, "learning_rate": 0.00012945968259281437, "loss": 1.2712, "step": 2952 }, { "epoch": 0.42300530010027215, "grad_norm": 1.2016894817352295, "learning_rate": 0.00012941534071556952, "loss": 1.5952, "step": 2953 }, { "epoch": 0.423148546053574, "grad_norm": 1.086963415145874, "learning_rate": 0.00012937099250592975, "loss": 1.5117, "step": 2954 }, { "epoch": 0.42329179200687583, "grad_norm": 1.1631067991256714, "learning_rate": 0.00012932663797344214, "loss": 1.395, "step": 2955 }, { "epoch": 0.4234350379601776, "grad_norm": 0.9772118330001831, "learning_rate": 0.00012928227712765504, "loss": 1.5567, "step": 2956 }, { "epoch": 0.42357828391347946, "grad_norm": 1.1256173849105835, "learning_rate": 0.00012923790997811834, "loss": 1.7219, "step": 2957 }, { "epoch": 0.42372152986678124, "grad_norm": 1.1013638973236084, "learning_rate": 0.00012919353653438313, "loss": 1.3383, "step": 2958 }, { "epoch": 0.4238647758200831, "grad_norm": 1.1755183935165405, "learning_rate": 0.00012914915680600195, "loss": 1.5647, "step": 2959 }, { "epoch": 0.4240080217733849, "grad_norm": 1.0311675071716309, "learning_rate": 0.00012910477080252858, "loss": 1.2683, "step": 2960 }, { "epoch": 0.4241512677266867, "grad_norm": 1.306514024734497, "learning_rate": 0.00012906037853351835, "loss": 1.4571, "step": 2961 }, { "epoch": 0.42429451367998855, "grad_norm": 0.9356301426887512, "learning_rate": 0.00012901598000852774, "loss": 1.5614, "step": 2962 }, { "epoch": 0.42443775963329033, "grad_norm": 1.1032782793045044, "learning_rate": 0.0001289715752371147, "loss": 1.6873, "step": 2963 }, { "epoch": 0.4245810055865922, "grad_norm": 0.9693393111228943, "learning_rate": 0.00012892716422883838, "loss": 1.5955, "step": 2964 }, { "epoch": 0.424724251539894, "grad_norm": 0.8935614824295044, "learning_rate": 0.0001288827469932595, "loss": 1.4806, "step": 2965 }, { "epoch": 0.4248674974931958, "grad_norm": 1.5495136976242065, "learning_rate": 0.00012883832353993986, "loss": 1.5968, "step": 2966 }, { "epoch": 0.42501074344649764, "grad_norm": 1.0972645282745361, "learning_rate": 0.00012879389387844285, "loss": 1.2795, "step": 2967 }, { "epoch": 0.4251539893997995, "grad_norm": 1.1585068702697754, "learning_rate": 0.00012874945801833306, "loss": 1.7341, "step": 2968 }, { "epoch": 0.42529723535310127, "grad_norm": 1.0645571947097778, "learning_rate": 0.00012870501596917632, "loss": 1.5655, "step": 2969 }, { "epoch": 0.4254404813064031, "grad_norm": 1.148591160774231, "learning_rate": 0.00012866056774054002, "loss": 1.644, "step": 2970 }, { "epoch": 0.4255837272597049, "grad_norm": 1.1192280054092407, "learning_rate": 0.0001286161133419927, "loss": 1.6597, "step": 2971 }, { "epoch": 0.42572697321300673, "grad_norm": 1.0613759756088257, "learning_rate": 0.00012857165278310436, "loss": 1.4726, "step": 2972 }, { "epoch": 0.4258702191663086, "grad_norm": 1.1115548610687256, "learning_rate": 0.00012852718607344617, "loss": 1.5416, "step": 2973 }, { "epoch": 0.42601346511961036, "grad_norm": 0.9438016414642334, "learning_rate": 0.0001284827132225908, "loss": 1.3574, "step": 2974 }, { "epoch": 0.4261567110729122, "grad_norm": 1.0485955476760864, "learning_rate": 0.00012843823424011207, "loss": 1.6145, "step": 2975 }, { "epoch": 0.426299957026214, "grad_norm": 1.3775779008865356, "learning_rate": 0.00012839374913558527, "loss": 1.4208, "step": 2976 }, { "epoch": 0.4264432029795158, "grad_norm": 1.0588425397872925, "learning_rate": 0.0001283492579185869, "loss": 1.5253, "step": 2977 }, { "epoch": 0.42658644893281766, "grad_norm": 1.0168206691741943, "learning_rate": 0.00012830476059869482, "loss": 1.3862, "step": 2978 }, { "epoch": 0.42672969488611945, "grad_norm": 1.0943965911865234, "learning_rate": 0.0001282602571854883, "loss": 1.504, "step": 2979 }, { "epoch": 0.4268729408394213, "grad_norm": 1.0079994201660156, "learning_rate": 0.00012821574768854765, "loss": 1.5877, "step": 2980 }, { "epoch": 0.42701618679272313, "grad_norm": 1.2735977172851562, "learning_rate": 0.0001281712321174548, "loss": 1.5487, "step": 2981 }, { "epoch": 0.4271594327460249, "grad_norm": 1.0860555171966553, "learning_rate": 0.00012812671048179276, "loss": 1.5614, "step": 2982 }, { "epoch": 0.42730267869932675, "grad_norm": 1.1374680995941162, "learning_rate": 0.00012808218279114603, "loss": 1.4441, "step": 2983 }, { "epoch": 0.42744592465262854, "grad_norm": 1.1925822496414185, "learning_rate": 0.00012803764905510025, "loss": 1.5007, "step": 2984 }, { "epoch": 0.4275891706059304, "grad_norm": 1.167211651802063, "learning_rate": 0.00012799310928324244, "loss": 1.3904, "step": 2985 }, { "epoch": 0.4277324165592322, "grad_norm": 0.9663708806037903, "learning_rate": 0.00012794856348516095, "loss": 1.3999, "step": 2986 }, { "epoch": 0.427875662512534, "grad_norm": 1.630013108253479, "learning_rate": 0.00012790401167044535, "loss": 1.3355, "step": 2987 }, { "epoch": 0.42801890846583585, "grad_norm": 1.1290802955627441, "learning_rate": 0.00012785945384868658, "loss": 1.272, "step": 2988 }, { "epoch": 0.4281621544191377, "grad_norm": 1.2320325374603271, "learning_rate": 0.00012781489002947676, "loss": 1.4645, "step": 2989 }, { "epoch": 0.42830540037243947, "grad_norm": 1.0531063079833984, "learning_rate": 0.00012777032022240947, "loss": 1.4208, "step": 2990 }, { "epoch": 0.4284486463257413, "grad_norm": 1.1587480306625366, "learning_rate": 0.00012772574443707942, "loss": 1.4492, "step": 2991 }, { "epoch": 0.4285918922790431, "grad_norm": 1.3658435344696045, "learning_rate": 0.0001276811626830827, "loss": 1.4112, "step": 2992 }, { "epoch": 0.42873513823234494, "grad_norm": 1.3322261571884155, "learning_rate": 0.00012763657497001668, "loss": 1.2234, "step": 2993 }, { "epoch": 0.4288783841856468, "grad_norm": 1.0259974002838135, "learning_rate": 0.00012759198130747994, "loss": 1.329, "step": 2994 }, { "epoch": 0.42902163013894856, "grad_norm": 1.0637564659118652, "learning_rate": 0.00012754738170507243, "loss": 1.4548, "step": 2995 }, { "epoch": 0.4291648760922504, "grad_norm": 1.1005356311798096, "learning_rate": 0.00012750277617239533, "loss": 1.5642, "step": 2996 }, { "epoch": 0.4293081220455522, "grad_norm": 1.3858085870742798, "learning_rate": 0.00012745816471905114, "loss": 1.6184, "step": 2997 }, { "epoch": 0.42945136799885403, "grad_norm": 1.178139328956604, "learning_rate": 0.0001274135473546435, "loss": 1.4701, "step": 2998 }, { "epoch": 0.42959461395215587, "grad_norm": 1.0503560304641724, "learning_rate": 0.00012736892408877758, "loss": 1.3992, "step": 2999 }, { "epoch": 0.42973785990545765, "grad_norm": 1.0629841089248657, "learning_rate": 0.0001273242949310595, "loss": 1.2691, "step": 3000 }, { "epoch": 0.4298811058587595, "grad_norm": 1.409101963043213, "learning_rate": 0.00012727965989109692, "loss": 1.4754, "step": 3001 }, { "epoch": 0.43002435181206133, "grad_norm": 1.363560676574707, "learning_rate": 0.00012723501897849866, "loss": 1.4119, "step": 3002 }, { "epoch": 0.4301675977653631, "grad_norm": 1.0263805389404297, "learning_rate": 0.00012719037220287476, "loss": 1.4857, "step": 3003 }, { "epoch": 0.43031084371866496, "grad_norm": 1.23080575466156, "learning_rate": 0.00012714571957383658, "loss": 1.5875, "step": 3004 }, { "epoch": 0.43045408967196674, "grad_norm": 1.1559325456619263, "learning_rate": 0.0001271010611009967, "loss": 1.5228, "step": 3005 }, { "epoch": 0.4305973356252686, "grad_norm": 1.0628306865692139, "learning_rate": 0.00012705639679396906, "loss": 1.4944, "step": 3006 }, { "epoch": 0.4307405815785704, "grad_norm": 1.1796555519104004, "learning_rate": 0.00012701172666236869, "loss": 1.3764, "step": 3007 }, { "epoch": 0.4308838275318722, "grad_norm": 1.003879189491272, "learning_rate": 0.00012696705071581205, "loss": 1.3714, "step": 3008 }, { "epoch": 0.43102707348517405, "grad_norm": 1.1037019491195679, "learning_rate": 0.0001269223689639167, "loss": 1.5267, "step": 3009 }, { "epoch": 0.43117031943847584, "grad_norm": 1.1033031940460205, "learning_rate": 0.00012687768141630152, "loss": 1.5182, "step": 3010 }, { "epoch": 0.4313135653917777, "grad_norm": 1.1002765893936157, "learning_rate": 0.00012683298808258665, "loss": 1.6048, "step": 3011 }, { "epoch": 0.4314568113450795, "grad_norm": 1.2625983953475952, "learning_rate": 0.00012678828897239348, "loss": 1.3896, "step": 3012 }, { "epoch": 0.4316000572983813, "grad_norm": 1.2411541938781738, "learning_rate": 0.00012674358409534462, "loss": 1.6562, "step": 3013 }, { "epoch": 0.43174330325168314, "grad_norm": 1.3173563480377197, "learning_rate": 0.00012669887346106385, "loss": 1.4406, "step": 3014 }, { "epoch": 0.431886549204985, "grad_norm": 1.088472843170166, "learning_rate": 0.00012665415707917638, "loss": 1.3295, "step": 3015 }, { "epoch": 0.43202979515828677, "grad_norm": 1.149087905883789, "learning_rate": 0.00012660943495930845, "loss": 1.533, "step": 3016 }, { "epoch": 0.4321730411115886, "grad_norm": 1.1028615236282349, "learning_rate": 0.00012656470711108764, "loss": 1.5414, "step": 3017 }, { "epoch": 0.4323162870648904, "grad_norm": 1.0977202653884888, "learning_rate": 0.00012651997354414278, "loss": 1.3771, "step": 3018 }, { "epoch": 0.43245953301819223, "grad_norm": 0.9966946840286255, "learning_rate": 0.00012647523426810386, "loss": 1.5384, "step": 3019 }, { "epoch": 0.4326027789714941, "grad_norm": 0.9869443774223328, "learning_rate": 0.00012643048929260222, "loss": 1.3964, "step": 3020 }, { "epoch": 0.43274602492479586, "grad_norm": 1.2083615064620972, "learning_rate": 0.00012638573862727023, "loss": 1.4728, "step": 3021 }, { "epoch": 0.4328892708780977, "grad_norm": 1.175946593284607, "learning_rate": 0.0001263409822817417, "loss": 1.3331, "step": 3022 }, { "epoch": 0.43303251683139954, "grad_norm": 1.2413744926452637, "learning_rate": 0.00012629622026565147, "loss": 1.5265, "step": 3023 }, { "epoch": 0.4331757627847013, "grad_norm": 1.0116958618164062, "learning_rate": 0.0001262514525886358, "loss": 1.2974, "step": 3024 }, { "epoch": 0.43331900873800316, "grad_norm": 1.157038927078247, "learning_rate": 0.00012620667926033197, "loss": 1.5971, "step": 3025 }, { "epoch": 0.43346225469130495, "grad_norm": 1.1902157068252563, "learning_rate": 0.00012616190029037864, "loss": 1.5395, "step": 3026 }, { "epoch": 0.4336055006446068, "grad_norm": 1.1102172136306763, "learning_rate": 0.00012611711568841558, "loss": 1.394, "step": 3027 }, { "epoch": 0.43374874659790863, "grad_norm": 0.969360888004303, "learning_rate": 0.0001260723254640838, "loss": 1.2316, "step": 3028 }, { "epoch": 0.4338919925512104, "grad_norm": 1.0639179944992065, "learning_rate": 0.00012602752962702555, "loss": 1.7093, "step": 3029 }, { "epoch": 0.43403523850451226, "grad_norm": 1.0692851543426514, "learning_rate": 0.00012598272818688423, "loss": 1.3906, "step": 3030 }, { "epoch": 0.43417848445781404, "grad_norm": 1.2472950220108032, "learning_rate": 0.00012593792115330455, "loss": 1.4133, "step": 3031 }, { "epoch": 0.4343217304111159, "grad_norm": 1.1585959196090698, "learning_rate": 0.00012589310853593224, "loss": 1.5046, "step": 3032 }, { "epoch": 0.4344649763644177, "grad_norm": 1.1850559711456299, "learning_rate": 0.00012584829034441446, "loss": 1.3947, "step": 3033 }, { "epoch": 0.4346082223177195, "grad_norm": 1.3054406642913818, "learning_rate": 0.00012580346658839944, "loss": 1.2905, "step": 3034 }, { "epoch": 0.43475146827102135, "grad_norm": 1.5177661180496216, "learning_rate": 0.00012575863727753655, "loss": 1.4613, "step": 3035 }, { "epoch": 0.4348947142243232, "grad_norm": 1.194543480873108, "learning_rate": 0.0001257138024214765, "loss": 1.5855, "step": 3036 }, { "epoch": 0.43503796017762497, "grad_norm": 1.1772048473358154, "learning_rate": 0.0001256689620298711, "loss": 1.4295, "step": 3037 }, { "epoch": 0.4351812061309268, "grad_norm": 0.9769687056541443, "learning_rate": 0.00012562411611237342, "loss": 1.5222, "step": 3038 }, { "epoch": 0.4353244520842286, "grad_norm": 1.4609148502349854, "learning_rate": 0.00012557926467863758, "loss": 1.4763, "step": 3039 }, { "epoch": 0.43546769803753044, "grad_norm": 1.0291452407836914, "learning_rate": 0.0001255344077383191, "loss": 1.4339, "step": 3040 }, { "epoch": 0.4356109439908323, "grad_norm": 1.3297010660171509, "learning_rate": 0.00012548954530107448, "loss": 1.6165, "step": 3041 }, { "epoch": 0.43575418994413406, "grad_norm": 1.0855640172958374, "learning_rate": 0.00012544467737656153, "loss": 1.3156, "step": 3042 }, { "epoch": 0.4358974358974359, "grad_norm": 0.9920369386672974, "learning_rate": 0.0001253998039744392, "loss": 1.3569, "step": 3043 }, { "epoch": 0.43604068185073774, "grad_norm": 1.1332491636276245, "learning_rate": 0.00012535492510436762, "loss": 1.4818, "step": 3044 }, { "epoch": 0.43618392780403953, "grad_norm": 1.3320003747940063, "learning_rate": 0.0001253100407760081, "loss": 1.5222, "step": 3045 }, { "epoch": 0.43632717375734137, "grad_norm": 1.1496905088424683, "learning_rate": 0.00012526515099902313, "loss": 1.3299, "step": 3046 }, { "epoch": 0.43647041971064315, "grad_norm": 0.9612751603126526, "learning_rate": 0.00012522025578307638, "loss": 1.473, "step": 3047 }, { "epoch": 0.436613665663945, "grad_norm": 1.0251473188400269, "learning_rate": 0.00012517535513783263, "loss": 1.4309, "step": 3048 }, { "epoch": 0.43675691161724683, "grad_norm": 1.087808609008789, "learning_rate": 0.00012513044907295795, "loss": 1.3853, "step": 3049 }, { "epoch": 0.4369001575705486, "grad_norm": 1.1043897867202759, "learning_rate": 0.00012508553759811946, "loss": 1.4591, "step": 3050 }, { "epoch": 0.43704340352385046, "grad_norm": 1.0634304285049438, "learning_rate": 0.00012504062072298544, "loss": 1.3488, "step": 3051 }, { "epoch": 0.43718664947715224, "grad_norm": 0.9935649037361145, "learning_rate": 0.0001249956984572255, "loss": 1.4443, "step": 3052 }, { "epoch": 0.4373298954304541, "grad_norm": 1.173378348350525, "learning_rate": 0.0001249507708105102, "loss": 1.5622, "step": 3053 }, { "epoch": 0.4374731413837559, "grad_norm": 0.9579426050186157, "learning_rate": 0.00012490583779251142, "loss": 1.4893, "step": 3054 }, { "epoch": 0.4376163873370577, "grad_norm": 1.4100185632705688, "learning_rate": 0.00012486089941290206, "loss": 1.3759, "step": 3055 }, { "epoch": 0.43775963329035955, "grad_norm": 1.3769272565841675, "learning_rate": 0.00012481595568135628, "loss": 1.458, "step": 3056 }, { "epoch": 0.4379028792436614, "grad_norm": 1.0760036706924438, "learning_rate": 0.00012477100660754933, "loss": 1.5153, "step": 3057 }, { "epoch": 0.4380461251969632, "grad_norm": 1.2993892431259155, "learning_rate": 0.00012472605220115765, "loss": 1.4564, "step": 3058 }, { "epoch": 0.438189371150265, "grad_norm": 1.2786661386489868, "learning_rate": 0.0001246810924718588, "loss": 1.4667, "step": 3059 }, { "epoch": 0.4383326171035668, "grad_norm": 1.1581568717956543, "learning_rate": 0.00012463612742933148, "loss": 1.2991, "step": 3060 }, { "epoch": 0.43847586305686864, "grad_norm": 1.000996708869934, "learning_rate": 0.0001245911570832556, "loss": 1.3708, "step": 3061 }, { "epoch": 0.4386191090101705, "grad_norm": 1.2092115879058838, "learning_rate": 0.00012454618144331213, "loss": 1.4639, "step": 3062 }, { "epoch": 0.43876235496347227, "grad_norm": 0.9845905303955078, "learning_rate": 0.00012450120051918324, "loss": 1.5034, "step": 3063 }, { "epoch": 0.4389056009167741, "grad_norm": 0.9372521638870239, "learning_rate": 0.00012445621432055214, "loss": 1.482, "step": 3064 }, { "epoch": 0.4390488468700759, "grad_norm": 1.103734016418457, "learning_rate": 0.00012441122285710335, "loss": 1.3356, "step": 3065 }, { "epoch": 0.43919209282337773, "grad_norm": 1.0892448425292969, "learning_rate": 0.00012436622613852234, "loss": 1.5257, "step": 3066 }, { "epoch": 0.4393353387766796, "grad_norm": 1.1291526556015015, "learning_rate": 0.0001243212241744958, "loss": 1.3473, "step": 3067 }, { "epoch": 0.43947858472998136, "grad_norm": 1.0791828632354736, "learning_rate": 0.00012427621697471157, "loss": 1.4985, "step": 3068 }, { "epoch": 0.4396218306832832, "grad_norm": 1.1271023750305176, "learning_rate": 0.00012423120454885857, "loss": 1.5384, "step": 3069 }, { "epoch": 0.43976507663658504, "grad_norm": 1.1117109060287476, "learning_rate": 0.00012418618690662685, "loss": 1.4324, "step": 3070 }, { "epoch": 0.4399083225898868, "grad_norm": 1.011369228363037, "learning_rate": 0.00012414116405770758, "loss": 1.5482, "step": 3071 }, { "epoch": 0.44005156854318866, "grad_norm": 1.0561023950576782, "learning_rate": 0.00012409613601179316, "loss": 1.4493, "step": 3072 }, { "epoch": 0.44019481449649045, "grad_norm": 1.261890172958374, "learning_rate": 0.00012405110277857685, "loss": 1.5087, "step": 3073 }, { "epoch": 0.4403380604497923, "grad_norm": 1.231773018836975, "learning_rate": 0.00012400606436775336, "loss": 1.5027, "step": 3074 }, { "epoch": 0.44048130640309413, "grad_norm": 1.0396860837936401, "learning_rate": 0.00012396102078901823, "loss": 1.2771, "step": 3075 }, { "epoch": 0.4406245523563959, "grad_norm": 1.1152276992797852, "learning_rate": 0.00012391597205206828, "loss": 1.4733, "step": 3076 }, { "epoch": 0.44076779830969776, "grad_norm": 1.1698585748672485, "learning_rate": 0.00012387091816660136, "loss": 1.4656, "step": 3077 }, { "epoch": 0.4409110442629996, "grad_norm": 1.3149389028549194, "learning_rate": 0.0001238258591423165, "loss": 1.5714, "step": 3078 }, { "epoch": 0.4410542902163014, "grad_norm": 1.2365179061889648, "learning_rate": 0.00012378079498891377, "loss": 1.3109, "step": 3079 }, { "epoch": 0.4411975361696032, "grad_norm": 1.1512644290924072, "learning_rate": 0.00012373572571609432, "loss": 1.4572, "step": 3080 }, { "epoch": 0.441340782122905, "grad_norm": 1.165844440460205, "learning_rate": 0.00012369065133356052, "loss": 1.3984, "step": 3081 }, { "epoch": 0.44148402807620685, "grad_norm": 1.1984187364578247, "learning_rate": 0.00012364557185101576, "loss": 1.5708, "step": 3082 }, { "epoch": 0.4416272740295087, "grad_norm": 1.191343069076538, "learning_rate": 0.00012360048727816448, "loss": 1.5837, "step": 3083 }, { "epoch": 0.4417705199828105, "grad_norm": 1.1448837518692017, "learning_rate": 0.00012355539762471234, "loss": 1.4681, "step": 3084 }, { "epoch": 0.4419137659361123, "grad_norm": 1.1050268411636353, "learning_rate": 0.000123510302900366, "loss": 1.6287, "step": 3085 }, { "epoch": 0.4420570118894141, "grad_norm": 1.2584904432296753, "learning_rate": 0.00012346520311483318, "loss": 1.38, "step": 3086 }, { "epoch": 0.44220025784271594, "grad_norm": 1.1780387163162231, "learning_rate": 0.00012342009827782284, "loss": 1.4314, "step": 3087 }, { "epoch": 0.4423435037960178, "grad_norm": 1.2857754230499268, "learning_rate": 0.00012337498839904492, "loss": 1.4041, "step": 3088 }, { "epoch": 0.44248674974931956, "grad_norm": 1.1543657779693604, "learning_rate": 0.0001233298734882104, "loss": 1.3729, "step": 3089 }, { "epoch": 0.4426299957026214, "grad_norm": 1.25899076461792, "learning_rate": 0.00012328475355503145, "loss": 1.4525, "step": 3090 }, { "epoch": 0.44277324165592324, "grad_norm": 1.0398709774017334, "learning_rate": 0.0001232396286092213, "loss": 1.3959, "step": 3091 }, { "epoch": 0.44291648760922503, "grad_norm": 1.3459936380386353, "learning_rate": 0.00012319449866049416, "loss": 1.6002, "step": 3092 }, { "epoch": 0.44305973356252687, "grad_norm": 1.0195494890213013, "learning_rate": 0.00012314936371856543, "loss": 1.5286, "step": 3093 }, { "epoch": 0.44320297951582865, "grad_norm": 1.2109040021896362, "learning_rate": 0.00012310422379315162, "loss": 1.5535, "step": 3094 }, { "epoch": 0.4433462254691305, "grad_norm": 1.2494463920593262, "learning_rate": 0.0001230590788939701, "loss": 1.4512, "step": 3095 }, { "epoch": 0.44348947142243234, "grad_norm": 1.1511486768722534, "learning_rate": 0.00012301392903073954, "loss": 1.5829, "step": 3096 }, { "epoch": 0.4436327173757341, "grad_norm": 1.491208791732788, "learning_rate": 0.0001229687742131796, "loss": 1.5212, "step": 3097 }, { "epoch": 0.44377596332903596, "grad_norm": 0.9837605953216553, "learning_rate": 0.0001229236144510109, "loss": 1.4559, "step": 3098 }, { "epoch": 0.4439192092823378, "grad_norm": 1.1483603715896606, "learning_rate": 0.0001228784497539553, "loss": 1.5596, "step": 3099 }, { "epoch": 0.4440624552356396, "grad_norm": 1.0905228853225708, "learning_rate": 0.00012283328013173563, "loss": 1.459, "step": 3100 }, { "epoch": 0.4442057011889414, "grad_norm": 1.26743483543396, "learning_rate": 0.00012278810559407578, "loss": 1.5153, "step": 3101 }, { "epoch": 0.4443489471422432, "grad_norm": 1.206329584121704, "learning_rate": 0.00012274292615070068, "loss": 1.681, "step": 3102 }, { "epoch": 0.44449219309554505, "grad_norm": 1.1900231838226318, "learning_rate": 0.00012269774181133643, "loss": 1.3467, "step": 3103 }, { "epoch": 0.4446354390488469, "grad_norm": 1.0916862487792969, "learning_rate": 0.00012265255258571005, "loss": 1.4544, "step": 3104 }, { "epoch": 0.4447786850021487, "grad_norm": 1.112952709197998, "learning_rate": 0.00012260735848354962, "loss": 1.6628, "step": 3105 }, { "epoch": 0.4449219309554505, "grad_norm": 1.182957649230957, "learning_rate": 0.00012256215951458437, "loss": 1.3074, "step": 3106 }, { "epoch": 0.4450651769087523, "grad_norm": 1.2020386457443237, "learning_rate": 0.00012251695568854453, "loss": 1.3492, "step": 3107 }, { "epoch": 0.44520842286205414, "grad_norm": 1.213978886604309, "learning_rate": 0.00012247174701516134, "loss": 1.4891, "step": 3108 }, { "epoch": 0.445351668815356, "grad_norm": 1.5456002950668335, "learning_rate": 0.00012242653350416708, "loss": 1.4656, "step": 3109 }, { "epoch": 0.44549491476865777, "grad_norm": 1.161339521408081, "learning_rate": 0.00012238131516529514, "loss": 1.3669, "step": 3110 }, { "epoch": 0.4456381607219596, "grad_norm": 1.3069754838943481, "learning_rate": 0.00012233609200827986, "loss": 1.4999, "step": 3111 }, { "epoch": 0.44578140667526145, "grad_norm": 1.0870051383972168, "learning_rate": 0.00012229086404285674, "loss": 1.5256, "step": 3112 }, { "epoch": 0.44592465262856323, "grad_norm": 1.1172115802764893, "learning_rate": 0.00012224563127876222, "loss": 1.2513, "step": 3113 }, { "epoch": 0.4460678985818651, "grad_norm": 1.2250396013259888, "learning_rate": 0.00012220039372573373, "loss": 1.5243, "step": 3114 }, { "epoch": 0.44621114453516686, "grad_norm": 1.0939322710037231, "learning_rate": 0.0001221551513935099, "loss": 1.5798, "step": 3115 }, { "epoch": 0.4463543904884687, "grad_norm": 1.297761082649231, "learning_rate": 0.0001221099042918302, "loss": 1.5573, "step": 3116 }, { "epoch": 0.44649763644177054, "grad_norm": 1.1402711868286133, "learning_rate": 0.00012206465243043525, "loss": 1.4311, "step": 3117 }, { "epoch": 0.4466408823950723, "grad_norm": 1.2594259977340698, "learning_rate": 0.00012201939581906662, "loss": 1.525, "step": 3118 }, { "epoch": 0.44678412834837417, "grad_norm": 1.0936704874038696, "learning_rate": 0.00012197413446746702, "loss": 1.513, "step": 3119 }, { "epoch": 0.44692737430167595, "grad_norm": 1.4192631244659424, "learning_rate": 0.00012192886838538, "loss": 1.5443, "step": 3120 }, { "epoch": 0.4470706202549778, "grad_norm": 1.0494555234909058, "learning_rate": 0.00012188359758255028, "loss": 1.4248, "step": 3121 }, { "epoch": 0.44721386620827963, "grad_norm": 1.0246657133102417, "learning_rate": 0.00012183832206872355, "loss": 1.6292, "step": 3122 }, { "epoch": 0.4473571121615814, "grad_norm": 1.1405394077301025, "learning_rate": 0.00012179304185364646, "loss": 1.6136, "step": 3123 }, { "epoch": 0.44750035811488326, "grad_norm": 1.2595856189727783, "learning_rate": 0.00012174775694706679, "loss": 1.4637, "step": 3124 }, { "epoch": 0.4476436040681851, "grad_norm": 1.179434061050415, "learning_rate": 0.00012170246735873321, "loss": 1.5586, "step": 3125 }, { "epoch": 0.4477868500214869, "grad_norm": 1.2602344751358032, "learning_rate": 0.00012165717309839548, "loss": 1.6471, "step": 3126 }, { "epoch": 0.4479300959747887, "grad_norm": 1.0316052436828613, "learning_rate": 0.00012161187417580427, "loss": 1.4429, "step": 3127 }, { "epoch": 0.4480733419280905, "grad_norm": 1.4175562858581543, "learning_rate": 0.0001215665706007114, "loss": 1.5642, "step": 3128 }, { "epoch": 0.44821658788139235, "grad_norm": 1.2237098217010498, "learning_rate": 0.00012152126238286953, "loss": 1.5854, "step": 3129 }, { "epoch": 0.4483598338346942, "grad_norm": 1.0493561029434204, "learning_rate": 0.00012147594953203247, "loss": 1.4135, "step": 3130 }, { "epoch": 0.448503079787996, "grad_norm": 1.1857844591140747, "learning_rate": 0.0001214306320579549, "loss": 1.7446, "step": 3131 }, { "epoch": 0.4486463257412978, "grad_norm": 1.286437749862671, "learning_rate": 0.00012138530997039259, "loss": 1.4233, "step": 3132 }, { "epoch": 0.44878957169459965, "grad_norm": 1.306151270866394, "learning_rate": 0.00012133998327910225, "loss": 1.596, "step": 3133 }, { "epoch": 0.44893281764790144, "grad_norm": 1.2872573137283325, "learning_rate": 0.00012129465199384157, "loss": 1.4687, "step": 3134 }, { "epoch": 0.4490760636012033, "grad_norm": 1.3020161390304565, "learning_rate": 0.00012124931612436932, "loss": 1.5112, "step": 3135 }, { "epoch": 0.44921930955450506, "grad_norm": 1.121009349822998, "learning_rate": 0.0001212039756804451, "loss": 1.4184, "step": 3136 }, { "epoch": 0.4493625555078069, "grad_norm": 1.3300981521606445, "learning_rate": 0.00012115863067182967, "loss": 1.4236, "step": 3137 }, { "epoch": 0.44950580146110874, "grad_norm": 1.0866230726242065, "learning_rate": 0.00012111328110828464, "loss": 1.6867, "step": 3138 }, { "epoch": 0.44964904741441053, "grad_norm": 1.1317719221115112, "learning_rate": 0.00012106792699957263, "loss": 1.3135, "step": 3139 }, { "epoch": 0.44979229336771237, "grad_norm": 1.1488943099975586, "learning_rate": 0.00012102256835545734, "loss": 1.4476, "step": 3140 }, { "epoch": 0.44993553932101416, "grad_norm": 1.020513653755188, "learning_rate": 0.00012097720518570326, "loss": 1.3504, "step": 3141 }, { "epoch": 0.450078785274316, "grad_norm": 1.0248788595199585, "learning_rate": 0.00012093183750007606, "loss": 1.4489, "step": 3142 }, { "epoch": 0.45022203122761784, "grad_norm": 1.084900975227356, "learning_rate": 0.00012088646530834218, "loss": 1.4488, "step": 3143 }, { "epoch": 0.4503652771809196, "grad_norm": 1.1388241052627563, "learning_rate": 0.0001208410886202692, "loss": 1.4693, "step": 3144 }, { "epoch": 0.45050852313422146, "grad_norm": 1.2316937446594238, "learning_rate": 0.00012079570744562558, "loss": 1.5066, "step": 3145 }, { "epoch": 0.4506517690875233, "grad_norm": 1.0961016416549683, "learning_rate": 0.00012075032179418076, "loss": 1.596, "step": 3146 }, { "epoch": 0.4507950150408251, "grad_norm": 1.2796114683151245, "learning_rate": 0.00012070493167570516, "loss": 1.5465, "step": 3147 }, { "epoch": 0.4509382609941269, "grad_norm": 1.001691460609436, "learning_rate": 0.00012065953709997009, "loss": 1.2934, "step": 3148 }, { "epoch": 0.4510815069474287, "grad_norm": 1.000536561012268, "learning_rate": 0.00012061413807674797, "loss": 1.4037, "step": 3149 }, { "epoch": 0.45122475290073055, "grad_norm": 1.1320574283599854, "learning_rate": 0.00012056873461581204, "loss": 1.3156, "step": 3150 }, { "epoch": 0.4513679988540324, "grad_norm": 1.2488324642181396, "learning_rate": 0.00012052332672693656, "loss": 1.654, "step": 3151 }, { "epoch": 0.4515112448073342, "grad_norm": 1.1280473470687866, "learning_rate": 0.00012047791441989665, "loss": 1.3956, "step": 3152 }, { "epoch": 0.451654490760636, "grad_norm": 1.290539026260376, "learning_rate": 0.00012043249770446856, "loss": 1.5538, "step": 3153 }, { "epoch": 0.45179773671393786, "grad_norm": 1.135595440864563, "learning_rate": 0.00012038707659042934, "loss": 1.6069, "step": 3154 }, { "epoch": 0.45194098266723964, "grad_norm": 1.1256420612335205, "learning_rate": 0.00012034165108755702, "loss": 1.5341, "step": 3155 }, { "epoch": 0.4520842286205415, "grad_norm": 0.9917011857032776, "learning_rate": 0.0001202962212056306, "loss": 1.3792, "step": 3156 }, { "epoch": 0.45222747457384327, "grad_norm": 1.1166476011276245, "learning_rate": 0.00012025078695442999, "loss": 1.6314, "step": 3157 }, { "epoch": 0.4523707205271451, "grad_norm": 1.3799906969070435, "learning_rate": 0.00012020534834373612, "loss": 1.6004, "step": 3158 }, { "epoch": 0.45251396648044695, "grad_norm": 1.0500123500823975, "learning_rate": 0.00012015990538333073, "loss": 1.5516, "step": 3159 }, { "epoch": 0.45265721243374873, "grad_norm": 1.000496745109558, "learning_rate": 0.00012011445808299659, "loss": 1.3987, "step": 3160 }, { "epoch": 0.4528004583870506, "grad_norm": 1.2447670698165894, "learning_rate": 0.0001200690064525174, "loss": 1.2693, "step": 3161 }, { "epoch": 0.45294370434035236, "grad_norm": 1.1051541566848755, "learning_rate": 0.00012002355050167776, "loss": 1.4146, "step": 3162 }, { "epoch": 0.4530869502936542, "grad_norm": 1.4651917219161987, "learning_rate": 0.00011997809024026316, "loss": 1.5577, "step": 3163 }, { "epoch": 0.45323019624695604, "grad_norm": 1.4386762380599976, "learning_rate": 0.00011993262567806012, "loss": 1.5066, "step": 3164 }, { "epoch": 0.4533734422002578, "grad_norm": 1.0089925527572632, "learning_rate": 0.00011988715682485602, "loss": 1.5253, "step": 3165 }, { "epoch": 0.45351668815355967, "grad_norm": 1.1390166282653809, "learning_rate": 0.00011984168369043922, "loss": 1.6318, "step": 3166 }, { "epoch": 0.4536599341068615, "grad_norm": 1.1663395166397095, "learning_rate": 0.00011979620628459893, "loss": 1.5577, "step": 3167 }, { "epoch": 0.4538031800601633, "grad_norm": 1.1122777462005615, "learning_rate": 0.00011975072461712527, "loss": 1.289, "step": 3168 }, { "epoch": 0.45394642601346513, "grad_norm": 1.0953245162963867, "learning_rate": 0.00011970523869780938, "loss": 1.357, "step": 3169 }, { "epoch": 0.4540896719667669, "grad_norm": 1.1185871362686157, "learning_rate": 0.00011965974853644321, "loss": 1.5582, "step": 3170 }, { "epoch": 0.45423291792006876, "grad_norm": 0.8848204016685486, "learning_rate": 0.0001196142541428197, "loss": 1.4751, "step": 3171 }, { "epoch": 0.4543761638733706, "grad_norm": 1.0851625204086304, "learning_rate": 0.00011956875552673268, "loss": 1.3979, "step": 3172 }, { "epoch": 0.4545194098266724, "grad_norm": 0.8973032832145691, "learning_rate": 0.00011952325269797677, "loss": 1.5557, "step": 3173 }, { "epoch": 0.4546626557799742, "grad_norm": 1.1015524864196777, "learning_rate": 0.00011947774566634775, "loss": 1.4956, "step": 3174 }, { "epoch": 0.454805901733276, "grad_norm": 1.3304615020751953, "learning_rate": 0.00011943223444164205, "loss": 1.3905, "step": 3175 }, { "epoch": 0.45494914768657785, "grad_norm": 0.8992533683776855, "learning_rate": 0.00011938671903365717, "loss": 1.5126, "step": 3176 }, { "epoch": 0.4550923936398797, "grad_norm": 1.1368868350982666, "learning_rate": 0.0001193411994521914, "loss": 1.4214, "step": 3177 }, { "epoch": 0.4552356395931815, "grad_norm": 1.0306721925735474, "learning_rate": 0.00011929567570704403, "loss": 1.4475, "step": 3178 }, { "epoch": 0.4553788855464833, "grad_norm": 0.9626904726028442, "learning_rate": 0.00011925014780801516, "loss": 1.4349, "step": 3179 }, { "epoch": 0.45552213149978515, "grad_norm": 1.0968292951583862, "learning_rate": 0.00011920461576490584, "loss": 1.5206, "step": 3180 }, { "epoch": 0.45566537745308694, "grad_norm": 1.0577499866485596, "learning_rate": 0.000119159079587518, "loss": 1.6073, "step": 3181 }, { "epoch": 0.4558086234063888, "grad_norm": 1.1996663808822632, "learning_rate": 0.0001191135392856544, "loss": 1.5111, "step": 3182 }, { "epoch": 0.45595186935969056, "grad_norm": 1.1528682708740234, "learning_rate": 0.00011906799486911884, "loss": 1.3852, "step": 3183 }, { "epoch": 0.4560951153129924, "grad_norm": 1.2124192714691162, "learning_rate": 0.0001190224463477158, "loss": 1.3976, "step": 3184 }, { "epoch": 0.45623836126629425, "grad_norm": 1.031524658203125, "learning_rate": 0.00011897689373125081, "loss": 1.4574, "step": 3185 }, { "epoch": 0.45638160721959603, "grad_norm": 1.2920843362808228, "learning_rate": 0.00011893133702953018, "loss": 1.337, "step": 3186 }, { "epoch": 0.45652485317289787, "grad_norm": 1.2023402452468872, "learning_rate": 0.00011888577625236122, "loss": 1.5628, "step": 3187 }, { "epoch": 0.4566680991261997, "grad_norm": 1.042397379875183, "learning_rate": 0.00011884021140955193, "loss": 1.5363, "step": 3188 }, { "epoch": 0.4568113450795015, "grad_norm": 1.049320936203003, "learning_rate": 0.00011879464251091135, "loss": 1.6724, "step": 3189 }, { "epoch": 0.45695459103280334, "grad_norm": 1.096657633781433, "learning_rate": 0.00011874906956624934, "loss": 1.455, "step": 3190 }, { "epoch": 0.4570978369861051, "grad_norm": 1.6191881895065308, "learning_rate": 0.00011870349258537663, "loss": 1.4983, "step": 3191 }, { "epoch": 0.45724108293940696, "grad_norm": 1.1210522651672363, "learning_rate": 0.00011865791157810482, "loss": 1.4312, "step": 3192 }, { "epoch": 0.4573843288927088, "grad_norm": 1.0992144346237183, "learning_rate": 0.00011861232655424633, "loss": 1.5135, "step": 3193 }, { "epoch": 0.4575275748460106, "grad_norm": 0.9991959929466248, "learning_rate": 0.00011856673752361453, "loss": 1.4379, "step": 3194 }, { "epoch": 0.45767082079931243, "grad_norm": 1.1315364837646484, "learning_rate": 0.00011852114449602358, "loss": 1.2098, "step": 3195 }, { "epoch": 0.4578140667526142, "grad_norm": 0.9892229437828064, "learning_rate": 0.0001184755474812886, "loss": 1.5271, "step": 3196 }, { "epoch": 0.45795731270591605, "grad_norm": 1.134285807609558, "learning_rate": 0.00011842994648922536, "loss": 1.5191, "step": 3197 }, { "epoch": 0.4581005586592179, "grad_norm": 1.2596986293792725, "learning_rate": 0.00011838434152965079, "loss": 1.7602, "step": 3198 }, { "epoch": 0.4582438046125197, "grad_norm": 1.05851149559021, "learning_rate": 0.0001183387326123824, "loss": 1.3102, "step": 3199 }, { "epoch": 0.4583870505658215, "grad_norm": 1.212060809135437, "learning_rate": 0.00011829311974723867, "loss": 1.5732, "step": 3200 }, { "epoch": 0.45853029651912336, "grad_norm": 1.5055166482925415, "learning_rate": 0.00011824750294403899, "loss": 1.4154, "step": 3201 }, { "epoch": 0.45867354247242514, "grad_norm": 1.1464976072311401, "learning_rate": 0.00011820188221260341, "loss": 1.1807, "step": 3202 }, { "epoch": 0.458816788425727, "grad_norm": 1.1664738655090332, "learning_rate": 0.0001181562575627531, "loss": 1.3686, "step": 3203 }, { "epoch": 0.45896003437902877, "grad_norm": 1.090941071510315, "learning_rate": 0.00011811062900430978, "loss": 1.4948, "step": 3204 }, { "epoch": 0.4591032803323306, "grad_norm": 1.0906645059585571, "learning_rate": 0.00011806499654709621, "loss": 1.4818, "step": 3205 }, { "epoch": 0.45924652628563245, "grad_norm": 1.075850248336792, "learning_rate": 0.00011801936020093594, "loss": 1.5219, "step": 3206 }, { "epoch": 0.45938977223893424, "grad_norm": 1.2285093069076538, "learning_rate": 0.00011797371997565332, "loss": 1.3623, "step": 3207 }, { "epoch": 0.4595330181922361, "grad_norm": 1.1431947946548462, "learning_rate": 0.00011792807588107357, "loss": 1.4711, "step": 3208 }, { "epoch": 0.4596762641455379, "grad_norm": 1.026307463645935, "learning_rate": 0.00011788242792702275, "loss": 1.4672, "step": 3209 }, { "epoch": 0.4598195100988397, "grad_norm": 1.1010686159133911, "learning_rate": 0.00011783677612332769, "loss": 1.5379, "step": 3210 }, { "epoch": 0.45996275605214154, "grad_norm": 0.9650765061378479, "learning_rate": 0.00011779112047981613, "loss": 1.3829, "step": 3211 }, { "epoch": 0.4601060020054433, "grad_norm": 0.9946237802505493, "learning_rate": 0.00011774546100631662, "loss": 1.35, "step": 3212 }, { "epoch": 0.46024924795874517, "grad_norm": 1.1635483503341675, "learning_rate": 0.00011769979771265846, "loss": 1.4547, "step": 3213 }, { "epoch": 0.460392493912047, "grad_norm": 1.290520191192627, "learning_rate": 0.00011765413060867185, "loss": 1.4352, "step": 3214 }, { "epoch": 0.4605357398653488, "grad_norm": 1.0893737077713013, "learning_rate": 0.00011760845970418782, "loss": 1.5261, "step": 3215 }, { "epoch": 0.46067898581865063, "grad_norm": 1.1100637912750244, "learning_rate": 0.00011756278500903812, "loss": 1.5418, "step": 3216 }, { "epoch": 0.4608222317719524, "grad_norm": 0.9315115809440613, "learning_rate": 0.00011751710653305546, "loss": 1.5853, "step": 3217 }, { "epoch": 0.46096547772525426, "grad_norm": 1.2445460557937622, "learning_rate": 0.00011747142428607318, "loss": 1.5143, "step": 3218 }, { "epoch": 0.4611087236785561, "grad_norm": 1.309849500656128, "learning_rate": 0.00011742573827792567, "loss": 1.2848, "step": 3219 }, { "epoch": 0.4612519696318579, "grad_norm": 1.222452998161316, "learning_rate": 0.0001173800485184479, "loss": 1.4051, "step": 3220 }, { "epoch": 0.4613952155851597, "grad_norm": 1.0889021158218384, "learning_rate": 0.00011733435501747578, "loss": 1.4286, "step": 3221 }, { "epoch": 0.46153846153846156, "grad_norm": 1.071033000946045, "learning_rate": 0.00011728865778484597, "loss": 1.5786, "step": 3222 }, { "epoch": 0.46168170749176335, "grad_norm": 1.0363655090332031, "learning_rate": 0.00011724295683039599, "loss": 1.4877, "step": 3223 }, { "epoch": 0.4618249534450652, "grad_norm": 1.147303819656372, "learning_rate": 0.00011719725216396409, "loss": 1.37, "step": 3224 }, { "epoch": 0.461968199398367, "grad_norm": 1.2108420133590698, "learning_rate": 0.00011715154379538935, "loss": 1.544, "step": 3225 }, { "epoch": 0.4621114453516688, "grad_norm": 1.2431963682174683, "learning_rate": 0.0001171058317345117, "loss": 1.6235, "step": 3226 }, { "epoch": 0.46225469130497066, "grad_norm": 1.1180698871612549, "learning_rate": 0.00011706011599117173, "loss": 1.4591, "step": 3227 }, { "epoch": 0.46239793725827244, "grad_norm": 1.249788522720337, "learning_rate": 0.000117014396575211, "loss": 1.4284, "step": 3228 }, { "epoch": 0.4625411832115743, "grad_norm": 1.12009596824646, "learning_rate": 0.00011696867349647171, "loss": 1.4558, "step": 3229 }, { "epoch": 0.46268442916487607, "grad_norm": 1.0417641401290894, "learning_rate": 0.00011692294676479696, "loss": 1.3909, "step": 3230 }, { "epoch": 0.4628276751181779, "grad_norm": 1.2301995754241943, "learning_rate": 0.00011687721639003051, "loss": 1.3593, "step": 3231 }, { "epoch": 0.46297092107147975, "grad_norm": 1.1385197639465332, "learning_rate": 0.00011683148238201704, "loss": 1.3172, "step": 3232 }, { "epoch": 0.46311416702478153, "grad_norm": 1.1522725820541382, "learning_rate": 0.00011678574475060191, "loss": 1.3315, "step": 3233 }, { "epoch": 0.46325741297808337, "grad_norm": 1.056440830230713, "learning_rate": 0.00011674000350563133, "loss": 1.4312, "step": 3234 }, { "epoch": 0.4634006589313852, "grad_norm": 1.0666872262954712, "learning_rate": 0.00011669425865695223, "loss": 1.4525, "step": 3235 }, { "epoch": 0.463543904884687, "grad_norm": 1.1347888708114624, "learning_rate": 0.00011664851021441237, "loss": 1.4984, "step": 3236 }, { "epoch": 0.46368715083798884, "grad_norm": 1.2883591651916504, "learning_rate": 0.00011660275818786027, "loss": 1.4263, "step": 3237 }, { "epoch": 0.4638303967912906, "grad_norm": 1.2693606615066528, "learning_rate": 0.00011655700258714517, "loss": 1.3976, "step": 3238 }, { "epoch": 0.46397364274459246, "grad_norm": 1.0672329664230347, "learning_rate": 0.00011651124342211712, "loss": 1.4357, "step": 3239 }, { "epoch": 0.4641168886978943, "grad_norm": 1.3022429943084717, "learning_rate": 0.00011646548070262695, "loss": 1.4494, "step": 3240 }, { "epoch": 0.4642601346511961, "grad_norm": 1.0523098707199097, "learning_rate": 0.00011641971443852627, "loss": 1.3581, "step": 3241 }, { "epoch": 0.46440338060449793, "grad_norm": 1.079986810684204, "learning_rate": 0.00011637394463966737, "loss": 1.4575, "step": 3242 }, { "epoch": 0.46454662655779977, "grad_norm": 0.9372568726539612, "learning_rate": 0.00011632817131590339, "loss": 1.3717, "step": 3243 }, { "epoch": 0.46468987251110155, "grad_norm": 1.197654128074646, "learning_rate": 0.0001162823944770882, "loss": 1.3723, "step": 3244 }, { "epoch": 0.4648331184644034, "grad_norm": 1.268088698387146, "learning_rate": 0.00011623661413307639, "loss": 1.2595, "step": 3245 }, { "epoch": 0.4649763644177052, "grad_norm": 1.2092959880828857, "learning_rate": 0.00011619083029372338, "loss": 1.6454, "step": 3246 }, { "epoch": 0.465119610371007, "grad_norm": 1.1908023357391357, "learning_rate": 0.0001161450429688852, "loss": 1.5659, "step": 3247 }, { "epoch": 0.46526285632430886, "grad_norm": 1.0129014253616333, "learning_rate": 0.00011609925216841886, "loss": 1.2195, "step": 3248 }, { "epoch": 0.46540610227761064, "grad_norm": 1.0606540441513062, "learning_rate": 0.00011605345790218189, "loss": 1.6816, "step": 3249 }, { "epoch": 0.4655493482309125, "grad_norm": 0.9883731007575989, "learning_rate": 0.0001160076601800327, "loss": 1.5093, "step": 3250 }, { "epoch": 0.46569259418421427, "grad_norm": 1.1119035482406616, "learning_rate": 0.00011596185901183043, "loss": 1.6487, "step": 3251 }, { "epoch": 0.4658358401375161, "grad_norm": 1.2177107334136963, "learning_rate": 0.00011591605440743488, "loss": 1.4966, "step": 3252 }, { "epoch": 0.46597908609081795, "grad_norm": 1.0626349449157715, "learning_rate": 0.00011587024637670669, "loss": 1.5204, "step": 3253 }, { "epoch": 0.46612233204411974, "grad_norm": 1.2371116876602173, "learning_rate": 0.00011582443492950716, "loss": 1.4893, "step": 3254 }, { "epoch": 0.4662655779974216, "grad_norm": 1.1134074926376343, "learning_rate": 0.00011577862007569842, "loss": 1.5642, "step": 3255 }, { "epoch": 0.4664088239507234, "grad_norm": 1.4752849340438843, "learning_rate": 0.00011573280182514321, "loss": 1.4662, "step": 3256 }, { "epoch": 0.4665520699040252, "grad_norm": 1.1920244693756104, "learning_rate": 0.00011568698018770512, "loss": 1.4591, "step": 3257 }, { "epoch": 0.46669531585732704, "grad_norm": 1.1396429538726807, "learning_rate": 0.00011564115517324836, "loss": 1.3689, "step": 3258 }, { "epoch": 0.4668385618106288, "grad_norm": 1.0617574453353882, "learning_rate": 0.00011559532679163796, "loss": 1.307, "step": 3259 }, { "epoch": 0.46698180776393067, "grad_norm": 1.1682683229446411, "learning_rate": 0.00011554949505273962, "loss": 1.2312, "step": 3260 }, { "epoch": 0.4671250537172325, "grad_norm": 1.1680269241333008, "learning_rate": 0.00011550365996641979, "loss": 1.4445, "step": 3261 }, { "epoch": 0.4672682996705343, "grad_norm": 0.9556571841239929, "learning_rate": 0.00011545782154254565, "loss": 1.4546, "step": 3262 }, { "epoch": 0.46741154562383613, "grad_norm": 1.035881757736206, "learning_rate": 0.00011541197979098501, "loss": 1.2763, "step": 3263 }, { "epoch": 0.4675547915771379, "grad_norm": 1.1152753829956055, "learning_rate": 0.00011536613472160653, "loss": 1.4632, "step": 3264 }, { "epoch": 0.46769803753043976, "grad_norm": 1.0087718963623047, "learning_rate": 0.00011532028634427949, "loss": 1.233, "step": 3265 }, { "epoch": 0.4678412834837416, "grad_norm": 1.3248870372772217, "learning_rate": 0.00011527443466887393, "loss": 1.4627, "step": 3266 }, { "epoch": 0.4679845294370434, "grad_norm": 1.0195308923721313, "learning_rate": 0.00011522857970526058, "loss": 1.3447, "step": 3267 }, { "epoch": 0.4681277753903452, "grad_norm": 1.1418203115463257, "learning_rate": 0.00011518272146331082, "loss": 1.5206, "step": 3268 }, { "epoch": 0.46827102134364706, "grad_norm": 1.0765228271484375, "learning_rate": 0.00011513685995289689, "loss": 1.4247, "step": 3269 }, { "epoch": 0.46841426729694885, "grad_norm": 1.0995022058486938, "learning_rate": 0.00011509099518389156, "loss": 1.5482, "step": 3270 }, { "epoch": 0.4685575132502507, "grad_norm": 1.0573937892913818, "learning_rate": 0.00011504512716616846, "loss": 1.5475, "step": 3271 }, { "epoch": 0.4687007592035525, "grad_norm": 1.0786677598953247, "learning_rate": 0.00011499925590960172, "loss": 1.3765, "step": 3272 }, { "epoch": 0.4688440051568543, "grad_norm": 1.0796244144439697, "learning_rate": 0.00011495338142406643, "loss": 1.3029, "step": 3273 }, { "epoch": 0.46898725111015616, "grad_norm": 1.2162913084030151, "learning_rate": 0.00011490750371943813, "loss": 1.5052, "step": 3274 }, { "epoch": 0.46913049706345794, "grad_norm": 1.2971915006637573, "learning_rate": 0.00011486162280559316, "loss": 1.3708, "step": 3275 }, { "epoch": 0.4692737430167598, "grad_norm": 1.2811321020126343, "learning_rate": 0.0001148157386924086, "loss": 1.4009, "step": 3276 }, { "epoch": 0.4694169889700616, "grad_norm": 1.0241044759750366, "learning_rate": 0.00011476985138976209, "loss": 1.313, "step": 3277 }, { "epoch": 0.4695602349233634, "grad_norm": 1.291046142578125, "learning_rate": 0.0001147239609075321, "loss": 1.42, "step": 3278 }, { "epoch": 0.46970348087666525, "grad_norm": 1.1343097686767578, "learning_rate": 0.00011467806725559769, "loss": 1.5026, "step": 3279 }, { "epoch": 0.46984672682996703, "grad_norm": 1.4172641038894653, "learning_rate": 0.00011463217044383865, "loss": 1.3834, "step": 3280 }, { "epoch": 0.4699899727832689, "grad_norm": 1.0918614864349365, "learning_rate": 0.00011458627048213535, "loss": 1.3579, "step": 3281 }, { "epoch": 0.4701332187365707, "grad_norm": 1.157840609550476, "learning_rate": 0.00011454036738036899, "loss": 1.4096, "step": 3282 }, { "epoch": 0.4702764646898725, "grad_norm": 0.9822141528129578, "learning_rate": 0.00011449446114842137, "loss": 1.6138, "step": 3283 }, { "epoch": 0.47041971064317434, "grad_norm": 1.2052268981933594, "learning_rate": 0.00011444855179617493, "loss": 1.6113, "step": 3284 }, { "epoch": 0.4705629565964761, "grad_norm": 1.1383486986160278, "learning_rate": 0.00011440263933351283, "loss": 1.4895, "step": 3285 }, { "epoch": 0.47070620254977796, "grad_norm": 1.1813592910766602, "learning_rate": 0.00011435672377031889, "loss": 1.3521, "step": 3286 }, { "epoch": 0.4708494485030798, "grad_norm": 1.4752061367034912, "learning_rate": 0.00011431080511647763, "loss": 1.5106, "step": 3287 }, { "epoch": 0.4709926944563816, "grad_norm": 1.105905294418335, "learning_rate": 0.00011426488338187414, "loss": 1.4534, "step": 3288 }, { "epoch": 0.47113594040968343, "grad_norm": 1.0869475603103638, "learning_rate": 0.00011421895857639424, "loss": 1.4655, "step": 3289 }, { "epoch": 0.47127918636298527, "grad_norm": 1.1574469804763794, "learning_rate": 0.00011417303070992445, "loss": 1.4881, "step": 3290 }, { "epoch": 0.47142243231628705, "grad_norm": 1.1386638879776, "learning_rate": 0.00011412709979235187, "loss": 1.5175, "step": 3291 }, { "epoch": 0.4715656782695889, "grad_norm": 1.1500166654586792, "learning_rate": 0.0001140811658335643, "loss": 1.5137, "step": 3292 }, { "epoch": 0.4717089242228907, "grad_norm": 1.018385410308838, "learning_rate": 0.00011403522884345017, "loss": 1.6268, "step": 3293 }, { "epoch": 0.4718521701761925, "grad_norm": 1.0819857120513916, "learning_rate": 0.00011398928883189859, "loss": 1.496, "step": 3294 }, { "epoch": 0.47199541612949436, "grad_norm": 1.1723010540008545, "learning_rate": 0.00011394334580879931, "loss": 1.5022, "step": 3295 }, { "epoch": 0.47213866208279615, "grad_norm": 1.1481480598449707, "learning_rate": 0.00011389739978404273, "loss": 1.3645, "step": 3296 }, { "epoch": 0.472281908036098, "grad_norm": 1.3597381114959717, "learning_rate": 0.00011385145076751986, "loss": 1.6515, "step": 3297 }, { "epoch": 0.4724251539893998, "grad_norm": 1.186806321144104, "learning_rate": 0.00011380549876912244, "loss": 1.3683, "step": 3298 }, { "epoch": 0.4725683999427016, "grad_norm": 0.9269993901252747, "learning_rate": 0.00011375954379874274, "loss": 1.3438, "step": 3299 }, { "epoch": 0.47271164589600345, "grad_norm": 0.9802612066268921, "learning_rate": 0.00011371358586627376, "loss": 1.3914, "step": 3300 }, { "epoch": 0.47285489184930524, "grad_norm": 1.0864516496658325, "learning_rate": 0.00011366762498160914, "loss": 1.5858, "step": 3301 }, { "epoch": 0.4729981378026071, "grad_norm": 1.1361186504364014, "learning_rate": 0.00011362166115464304, "loss": 1.5147, "step": 3302 }, { "epoch": 0.4731413837559089, "grad_norm": 1.2907289266586304, "learning_rate": 0.00011357569439527038, "loss": 1.2991, "step": 3303 }, { "epoch": 0.4732846297092107, "grad_norm": 1.1535943746566772, "learning_rate": 0.00011352972471338668, "loss": 1.5526, "step": 3304 }, { "epoch": 0.47342787566251254, "grad_norm": 1.024580955505371, "learning_rate": 0.00011348375211888807, "loss": 1.3561, "step": 3305 }, { "epoch": 0.4735711216158143, "grad_norm": 1.0737757682800293, "learning_rate": 0.00011343777662167126, "loss": 1.6192, "step": 3306 }, { "epoch": 0.47371436756911617, "grad_norm": 1.3476982116699219, "learning_rate": 0.0001133917982316337, "loss": 1.4071, "step": 3307 }, { "epoch": 0.473857613522418, "grad_norm": 1.112101674079895, "learning_rate": 0.00011334581695867339, "loss": 1.4657, "step": 3308 }, { "epoch": 0.4740008594757198, "grad_norm": 0.944963276386261, "learning_rate": 0.00011329983281268892, "loss": 1.5708, "step": 3309 }, { "epoch": 0.47414410542902163, "grad_norm": 1.1166667938232422, "learning_rate": 0.00011325384580357957, "loss": 1.6797, "step": 3310 }, { "epoch": 0.4742873513823235, "grad_norm": 1.2200385332107544, "learning_rate": 0.00011320785594124518, "loss": 1.4006, "step": 3311 }, { "epoch": 0.47443059733562526, "grad_norm": 1.0667027235031128, "learning_rate": 0.00011316186323558631, "loss": 1.6627, "step": 3312 }, { "epoch": 0.4745738432889271, "grad_norm": 0.9932939410209656, "learning_rate": 0.00011311586769650395, "loss": 1.4152, "step": 3313 }, { "epoch": 0.4747170892422289, "grad_norm": 1.087884545326233, "learning_rate": 0.00011306986933389984, "loss": 1.622, "step": 3314 }, { "epoch": 0.4748603351955307, "grad_norm": 1.0795326232910156, "learning_rate": 0.00011302386815767629, "loss": 1.4096, "step": 3315 }, { "epoch": 0.47500358114883257, "grad_norm": 1.206787347793579, "learning_rate": 0.00011297786417773626, "loss": 1.3949, "step": 3316 }, { "epoch": 0.47514682710213435, "grad_norm": 1.0302808284759521, "learning_rate": 0.00011293185740398317, "loss": 1.458, "step": 3317 }, { "epoch": 0.4752900730554362, "grad_norm": 1.0773059129714966, "learning_rate": 0.00011288584784632124, "loss": 1.4646, "step": 3318 }, { "epoch": 0.475433319008738, "grad_norm": 1.0801604986190796, "learning_rate": 0.00011283983551465511, "loss": 1.3775, "step": 3319 }, { "epoch": 0.4755765649620398, "grad_norm": 1.4271061420440674, "learning_rate": 0.00011279382041889013, "loss": 1.4933, "step": 3320 }, { "epoch": 0.47571981091534166, "grad_norm": 1.1232134103775024, "learning_rate": 0.00011274780256893225, "loss": 1.384, "step": 3321 }, { "epoch": 0.47586305686864344, "grad_norm": 1.0800385475158691, "learning_rate": 0.00011270178197468789, "loss": 1.399, "step": 3322 }, { "epoch": 0.4760063028219453, "grad_norm": 1.155958890914917, "learning_rate": 0.00011265575864606421, "loss": 1.2865, "step": 3323 }, { "epoch": 0.4761495487752471, "grad_norm": 1.3391797542572021, "learning_rate": 0.00011260973259296888, "loss": 1.626, "step": 3324 }, { "epoch": 0.4762927947285489, "grad_norm": 1.170339822769165, "learning_rate": 0.00011256370382531017, "loss": 1.4887, "step": 3325 }, { "epoch": 0.47643604068185075, "grad_norm": 0.9008062481880188, "learning_rate": 0.00011251767235299688, "loss": 1.3303, "step": 3326 }, { "epoch": 0.47657928663515253, "grad_norm": 1.3610492944717407, "learning_rate": 0.00011247163818593856, "loss": 1.6229, "step": 3327 }, { "epoch": 0.4767225325884544, "grad_norm": 1.4153848886489868, "learning_rate": 0.00011242560133404513, "loss": 1.5134, "step": 3328 }, { "epoch": 0.4768657785417562, "grad_norm": 0.9834225177764893, "learning_rate": 0.00011237956180722722, "loss": 1.3704, "step": 3329 }, { "epoch": 0.477009024495058, "grad_norm": 1.1418579816818237, "learning_rate": 0.00011233351961539605, "loss": 1.5619, "step": 3330 }, { "epoch": 0.47715227044835984, "grad_norm": 1.1441233158111572, "learning_rate": 0.00011228747476846322, "loss": 1.53, "step": 3331 }, { "epoch": 0.4772955164016617, "grad_norm": 1.1124095916748047, "learning_rate": 0.00011224142727634122, "loss": 1.4748, "step": 3332 }, { "epoch": 0.47743876235496346, "grad_norm": 0.9663305878639221, "learning_rate": 0.00011219537714894282, "loss": 1.6258, "step": 3333 }, { "epoch": 0.4775820083082653, "grad_norm": 1.1099047660827637, "learning_rate": 0.00011214932439618151, "loss": 1.4062, "step": 3334 }, { "epoch": 0.4777252542615671, "grad_norm": 1.1967366933822632, "learning_rate": 0.00011210326902797131, "loss": 1.3045, "step": 3335 }, { "epoch": 0.47786850021486893, "grad_norm": 1.1873387098312378, "learning_rate": 0.00011205721105422679, "loss": 1.4316, "step": 3336 }, { "epoch": 0.47801174616817077, "grad_norm": 1.2075488567352295, "learning_rate": 0.00011201115048486313, "loss": 1.436, "step": 3337 }, { "epoch": 0.47815499212147256, "grad_norm": 0.9980928301811218, "learning_rate": 0.00011196508732979599, "loss": 1.4364, "step": 3338 }, { "epoch": 0.4782982380747744, "grad_norm": 1.2457408905029297, "learning_rate": 0.00011191902159894161, "loss": 1.5365, "step": 3339 }, { "epoch": 0.4784414840280762, "grad_norm": 1.1193900108337402, "learning_rate": 0.00011187295330221686, "loss": 1.547, "step": 3340 }, { "epoch": 0.478584729981378, "grad_norm": 1.2252111434936523, "learning_rate": 0.00011182688244953907, "loss": 1.3891, "step": 3341 }, { "epoch": 0.47872797593467986, "grad_norm": 0.9265722632408142, "learning_rate": 0.00011178080905082615, "loss": 1.3052, "step": 3342 }, { "epoch": 0.47887122188798165, "grad_norm": 1.2359217405319214, "learning_rate": 0.00011173473311599656, "loss": 1.543, "step": 3343 }, { "epoch": 0.4790144678412835, "grad_norm": 1.208631157875061, "learning_rate": 0.00011168865465496932, "loss": 1.3644, "step": 3344 }, { "epoch": 0.4791577137945853, "grad_norm": 1.1615692377090454, "learning_rate": 0.000111642573677664, "loss": 1.4478, "step": 3345 }, { "epoch": 0.4793009597478871, "grad_norm": 1.2925164699554443, "learning_rate": 0.00011159649019400069, "loss": 1.5116, "step": 3346 }, { "epoch": 0.47944420570118895, "grad_norm": 1.3147971630096436, "learning_rate": 0.00011155040421389996, "loss": 1.4869, "step": 3347 }, { "epoch": 0.47958745165449074, "grad_norm": 1.1452064514160156, "learning_rate": 0.00011150431574728308, "loss": 1.3199, "step": 3348 }, { "epoch": 0.4797306976077926, "grad_norm": 1.0164663791656494, "learning_rate": 0.00011145822480407168, "loss": 1.5674, "step": 3349 }, { "epoch": 0.4798739435610944, "grad_norm": 1.0848424434661865, "learning_rate": 0.00011141213139418805, "loss": 1.4609, "step": 3350 }, { "epoch": 0.4800171895143962, "grad_norm": 1.088468313217163, "learning_rate": 0.00011136603552755489, "loss": 1.4898, "step": 3351 }, { "epoch": 0.48016043546769804, "grad_norm": 0.9518824219703674, "learning_rate": 0.00011131993721409559, "loss": 1.3477, "step": 3352 }, { "epoch": 0.4803036814209999, "grad_norm": 0.9744654893875122, "learning_rate": 0.00011127383646373393, "loss": 1.3456, "step": 3353 }, { "epoch": 0.48044692737430167, "grad_norm": 1.3703196048736572, "learning_rate": 0.00011122773328639424, "loss": 1.4912, "step": 3354 }, { "epoch": 0.4805901733276035, "grad_norm": 1.048532485961914, "learning_rate": 0.00011118162769200146, "loss": 1.3422, "step": 3355 }, { "epoch": 0.4807334192809053, "grad_norm": 1.2435318231582642, "learning_rate": 0.00011113551969048089, "loss": 1.5543, "step": 3356 }, { "epoch": 0.48087666523420713, "grad_norm": 1.1021407842636108, "learning_rate": 0.00011108940929175853, "loss": 1.4821, "step": 3357 }, { "epoch": 0.481019911187509, "grad_norm": 1.1331379413604736, "learning_rate": 0.00011104329650576073, "loss": 1.3955, "step": 3358 }, { "epoch": 0.48116315714081076, "grad_norm": 0.973921537399292, "learning_rate": 0.00011099718134241451, "loss": 1.5139, "step": 3359 }, { "epoch": 0.4813064030941126, "grad_norm": 1.0713199377059937, "learning_rate": 0.00011095106381164727, "loss": 1.5502, "step": 3360 }, { "epoch": 0.4814496490474144, "grad_norm": 1.7693744897842407, "learning_rate": 0.00011090494392338697, "loss": 1.485, "step": 3361 }, { "epoch": 0.4815928950007162, "grad_norm": 1.1509101390838623, "learning_rate": 0.00011085882168756212, "loss": 1.5082, "step": 3362 }, { "epoch": 0.48173614095401807, "grad_norm": 1.1039948463439941, "learning_rate": 0.00011081269711410167, "loss": 1.4066, "step": 3363 }, { "epoch": 0.48187938690731985, "grad_norm": 1.040331244468689, "learning_rate": 0.0001107665702129351, "loss": 1.4914, "step": 3364 }, { "epoch": 0.4820226328606217, "grad_norm": 0.994448184967041, "learning_rate": 0.00011072044099399242, "loss": 1.5184, "step": 3365 }, { "epoch": 0.48216587881392353, "grad_norm": 1.1898798942565918, "learning_rate": 0.00011067430946720408, "loss": 1.4252, "step": 3366 }, { "epoch": 0.4823091247672253, "grad_norm": 1.2397408485412598, "learning_rate": 0.00011062817564250103, "loss": 1.5617, "step": 3367 }, { "epoch": 0.48245237072052716, "grad_norm": 1.310971975326538, "learning_rate": 0.00011058203952981476, "loss": 1.521, "step": 3368 }, { "epoch": 0.48259561667382894, "grad_norm": 1.112984538078308, "learning_rate": 0.00011053590113907728, "loss": 1.5257, "step": 3369 }, { "epoch": 0.4827388626271308, "grad_norm": 1.036872148513794, "learning_rate": 0.000110489760480221, "loss": 1.4617, "step": 3370 }, { "epoch": 0.4828821085804326, "grad_norm": 1.2088261842727661, "learning_rate": 0.00011044361756317887, "loss": 1.415, "step": 3371 }, { "epoch": 0.4830253545337344, "grad_norm": 1.2457714080810547, "learning_rate": 0.0001103974723978843, "loss": 1.4838, "step": 3372 }, { "epoch": 0.48316860048703625, "grad_norm": 1.0319205522537231, "learning_rate": 0.00011035132499427123, "loss": 1.2253, "step": 3373 }, { "epoch": 0.48331184644033803, "grad_norm": 1.1339499950408936, "learning_rate": 0.00011030517536227405, "loss": 1.3781, "step": 3374 }, { "epoch": 0.4834550923936399, "grad_norm": 1.1151762008666992, "learning_rate": 0.00011025902351182765, "loss": 1.4924, "step": 3375 }, { "epoch": 0.4835983383469417, "grad_norm": 1.17976975440979, "learning_rate": 0.00011021286945286731, "loss": 1.6241, "step": 3376 }, { "epoch": 0.4837415843002435, "grad_norm": 1.213753581047058, "learning_rate": 0.00011016671319532894, "loss": 1.4362, "step": 3377 }, { "epoch": 0.48388483025354534, "grad_norm": 1.0604629516601562, "learning_rate": 0.0001101205547491488, "loss": 1.4856, "step": 3378 }, { "epoch": 0.4840280762068472, "grad_norm": 1.2181966304779053, "learning_rate": 0.00011007439412426365, "loss": 1.409, "step": 3379 }, { "epoch": 0.48417132216014896, "grad_norm": 1.2243969440460205, "learning_rate": 0.00011002823133061079, "loss": 1.4336, "step": 3380 }, { "epoch": 0.4843145681134508, "grad_norm": 1.1662894487380981, "learning_rate": 0.00010998206637812783, "loss": 1.3889, "step": 3381 }, { "epoch": 0.4844578140667526, "grad_norm": 1.183326005935669, "learning_rate": 0.00010993589927675305, "loss": 1.4406, "step": 3382 }, { "epoch": 0.48460106002005443, "grad_norm": 1.5117802619934082, "learning_rate": 0.00010988973003642499, "loss": 1.4511, "step": 3383 }, { "epoch": 0.48474430597335627, "grad_norm": 1.094020128250122, "learning_rate": 0.00010984355866708282, "loss": 1.5751, "step": 3384 }, { "epoch": 0.48488755192665806, "grad_norm": 1.1255096197128296, "learning_rate": 0.000109797385178666, "loss": 1.5607, "step": 3385 }, { "epoch": 0.4850307978799599, "grad_norm": 1.0145245790481567, "learning_rate": 0.00010975120958111467, "loss": 1.66, "step": 3386 }, { "epoch": 0.48517404383326174, "grad_norm": 1.2211908102035522, "learning_rate": 0.00010970503188436918, "loss": 1.5144, "step": 3387 }, { "epoch": 0.4853172897865635, "grad_norm": 1.147626280784607, "learning_rate": 0.0001096588520983705, "loss": 1.3995, "step": 3388 }, { "epoch": 0.48546053573986536, "grad_norm": 1.08010733127594, "learning_rate": 0.00010961267023305996, "loss": 1.4002, "step": 3389 }, { "epoch": 0.48560378169316715, "grad_norm": 1.216078519821167, "learning_rate": 0.00010956648629837943, "loss": 1.4301, "step": 3390 }, { "epoch": 0.485747027646469, "grad_norm": 1.5745607614517212, "learning_rate": 0.00010952030030427114, "loss": 1.4908, "step": 3391 }, { "epoch": 0.48589027359977083, "grad_norm": 1.4555200338363647, "learning_rate": 0.00010947411226067777, "loss": 1.3767, "step": 3392 }, { "epoch": 0.4860335195530726, "grad_norm": 1.1925334930419922, "learning_rate": 0.00010942792217754245, "loss": 1.5949, "step": 3393 }, { "epoch": 0.48617676550637445, "grad_norm": 1.046634554862976, "learning_rate": 0.00010938173006480881, "loss": 1.4549, "step": 3394 }, { "epoch": 0.48632001145967624, "grad_norm": 1.0390502214431763, "learning_rate": 0.00010933553593242085, "loss": 1.4628, "step": 3395 }, { "epoch": 0.4864632574129781, "grad_norm": 1.1968730688095093, "learning_rate": 0.00010928933979032305, "loss": 1.4044, "step": 3396 }, { "epoch": 0.4866065033662799, "grad_norm": 1.0941928625106812, "learning_rate": 0.00010924314164846021, "loss": 1.5119, "step": 3397 }, { "epoch": 0.4867497493195817, "grad_norm": 1.3465262651443481, "learning_rate": 0.00010919694151677778, "loss": 1.3476, "step": 3398 }, { "epoch": 0.48689299527288354, "grad_norm": 1.2691694498062134, "learning_rate": 0.00010915073940522136, "loss": 1.5623, "step": 3399 }, { "epoch": 0.4870362412261854, "grad_norm": 1.358580231666565, "learning_rate": 0.00010910453532373726, "loss": 1.5636, "step": 3400 }, { "epoch": 0.48717948717948717, "grad_norm": 1.0150936841964722, "learning_rate": 0.00010905832928227193, "loss": 1.492, "step": 3401 }, { "epoch": 0.487322733132789, "grad_norm": 1.1287716627120972, "learning_rate": 0.00010901212129077252, "loss": 1.3862, "step": 3402 }, { "epoch": 0.4874659790860908, "grad_norm": 1.1050970554351807, "learning_rate": 0.00010896591135918638, "loss": 1.3022, "step": 3403 }, { "epoch": 0.48760922503939264, "grad_norm": 1.2142102718353271, "learning_rate": 0.00010891969949746141, "loss": 1.5356, "step": 3404 }, { "epoch": 0.4877524709926945, "grad_norm": 1.1582467555999756, "learning_rate": 0.0001088734857155459, "loss": 1.5056, "step": 3405 }, { "epoch": 0.48789571694599626, "grad_norm": 1.208370566368103, "learning_rate": 0.00010882727002338842, "loss": 1.4675, "step": 3406 }, { "epoch": 0.4880389628992981, "grad_norm": 1.1286687850952148, "learning_rate": 0.00010878105243093821, "loss": 1.4124, "step": 3407 }, { "epoch": 0.48818220885259994, "grad_norm": 1.1695168018341064, "learning_rate": 0.00010873483294814471, "loss": 1.3558, "step": 3408 }, { "epoch": 0.4883254548059017, "grad_norm": 1.0616552829742432, "learning_rate": 0.00010868861158495782, "loss": 1.4408, "step": 3409 }, { "epoch": 0.48846870075920357, "grad_norm": 1.1130940914154053, "learning_rate": 0.00010864238835132783, "loss": 1.4247, "step": 3410 }, { "epoch": 0.48861194671250535, "grad_norm": 1.2358649969100952, "learning_rate": 0.00010859616325720554, "loss": 1.6288, "step": 3411 }, { "epoch": 0.4887551926658072, "grad_norm": 1.186363697052002, "learning_rate": 0.000108549936312542, "loss": 1.3682, "step": 3412 }, { "epoch": 0.48889843861910903, "grad_norm": 1.0015051364898682, "learning_rate": 0.00010850370752728874, "loss": 1.4704, "step": 3413 }, { "epoch": 0.4890416845724108, "grad_norm": 1.1904553174972534, "learning_rate": 0.0001084574769113977, "loss": 1.4473, "step": 3414 }, { "epoch": 0.48918493052571266, "grad_norm": 1.1983376741409302, "learning_rate": 0.00010841124447482115, "loss": 1.3232, "step": 3415 }, { "epoch": 0.48932817647901444, "grad_norm": 1.0688844919204712, "learning_rate": 0.00010836501022751184, "loss": 1.4294, "step": 3416 }, { "epoch": 0.4894714224323163, "grad_norm": 1.1069602966308594, "learning_rate": 0.00010831877417942283, "loss": 1.2488, "step": 3417 }, { "epoch": 0.4896146683856181, "grad_norm": 1.1865772008895874, "learning_rate": 0.00010827253634050758, "loss": 1.5328, "step": 3418 }, { "epoch": 0.4897579143389199, "grad_norm": 1.0765496492385864, "learning_rate": 0.00010822629672071995, "loss": 1.5252, "step": 3419 }, { "epoch": 0.48990116029222175, "grad_norm": 1.1689852476119995, "learning_rate": 0.00010818005533001425, "loss": 1.47, "step": 3420 }, { "epoch": 0.4900444062455236, "grad_norm": 1.1215379238128662, "learning_rate": 0.00010813381217834503, "loss": 1.3493, "step": 3421 }, { "epoch": 0.4901876521988254, "grad_norm": 1.6537460088729858, "learning_rate": 0.00010808756727566736, "loss": 1.4929, "step": 3422 }, { "epoch": 0.4903308981521272, "grad_norm": 1.136476993560791, "learning_rate": 0.00010804132063193655, "loss": 1.4882, "step": 3423 }, { "epoch": 0.490474144105429, "grad_norm": 1.0345782041549683, "learning_rate": 0.00010799507225710843, "loss": 1.3742, "step": 3424 }, { "epoch": 0.49061739005873084, "grad_norm": 1.2097022533416748, "learning_rate": 0.00010794882216113911, "loss": 1.5083, "step": 3425 }, { "epoch": 0.4907606360120327, "grad_norm": 1.0984169244766235, "learning_rate": 0.00010790257035398503, "loss": 1.4635, "step": 3426 }, { "epoch": 0.49090388196533447, "grad_norm": 1.0245141983032227, "learning_rate": 0.00010785631684560316, "loss": 1.4226, "step": 3427 }, { "epoch": 0.4910471279186363, "grad_norm": 1.4097518920898438, "learning_rate": 0.00010781006164595067, "loss": 1.3966, "step": 3428 }, { "epoch": 0.4911903738719381, "grad_norm": 1.069345235824585, "learning_rate": 0.00010776380476498518, "loss": 1.468, "step": 3429 }, { "epoch": 0.49133361982523993, "grad_norm": 1.1448043584823608, "learning_rate": 0.00010771754621266466, "loss": 1.4406, "step": 3430 }, { "epoch": 0.49147686577854177, "grad_norm": 1.1663097143173218, "learning_rate": 0.00010767128599894738, "loss": 1.464, "step": 3431 }, { "epoch": 0.49162011173184356, "grad_norm": 1.1089999675750732, "learning_rate": 0.00010762502413379209, "loss": 1.4586, "step": 3432 }, { "epoch": 0.4917633576851454, "grad_norm": 1.3007017374038696, "learning_rate": 0.0001075787606271578, "loss": 1.5278, "step": 3433 }, { "epoch": 0.49190660363844724, "grad_norm": 1.2848637104034424, "learning_rate": 0.0001075324954890039, "loss": 1.4243, "step": 3434 }, { "epoch": 0.492049849591749, "grad_norm": 1.0054789781570435, "learning_rate": 0.00010748622872929009, "loss": 1.5028, "step": 3435 }, { "epoch": 0.49219309554505086, "grad_norm": 1.0452539920806885, "learning_rate": 0.0001074399603579765, "loss": 1.5444, "step": 3436 }, { "epoch": 0.49233634149835265, "grad_norm": 1.15946364402771, "learning_rate": 0.00010739369038502356, "loss": 1.3588, "step": 3437 }, { "epoch": 0.4924795874516545, "grad_norm": 1.2754580974578857, "learning_rate": 0.00010734741882039204, "loss": 1.6796, "step": 3438 }, { "epoch": 0.49262283340495633, "grad_norm": 1.124145269393921, "learning_rate": 0.00010730114567404305, "loss": 1.519, "step": 3439 }, { "epoch": 0.4927660793582581, "grad_norm": 1.1078345775604248, "learning_rate": 0.00010725487095593811, "loss": 1.4547, "step": 3440 }, { "epoch": 0.49290932531155995, "grad_norm": 1.041854977607727, "learning_rate": 0.00010720859467603898, "loss": 1.4286, "step": 3441 }, { "epoch": 0.4930525712648618, "grad_norm": 1.1259956359863281, "learning_rate": 0.00010716231684430779, "loss": 1.2218, "step": 3442 }, { "epoch": 0.4931958172181636, "grad_norm": 1.2673567533493042, "learning_rate": 0.00010711603747070702, "loss": 1.396, "step": 3443 }, { "epoch": 0.4933390631714654, "grad_norm": 1.1557953357696533, "learning_rate": 0.00010706975656519946, "loss": 1.4831, "step": 3444 }, { "epoch": 0.4934823091247672, "grad_norm": 0.9598127603530884, "learning_rate": 0.00010702347413774832, "loss": 1.5148, "step": 3445 }, { "epoch": 0.49362555507806904, "grad_norm": 1.0985145568847656, "learning_rate": 0.00010697719019831695, "loss": 1.4051, "step": 3446 }, { "epoch": 0.4937688010313709, "grad_norm": 0.8837293386459351, "learning_rate": 0.0001069309047568692, "loss": 1.5701, "step": 3447 }, { "epoch": 0.49391204698467267, "grad_norm": 1.0502780675888062, "learning_rate": 0.00010688461782336915, "loss": 1.2204, "step": 3448 }, { "epoch": 0.4940552929379745, "grad_norm": 1.0196443796157837, "learning_rate": 0.00010683832940778127, "loss": 1.2234, "step": 3449 }, { "epoch": 0.4941985388912763, "grad_norm": 1.013756513595581, "learning_rate": 0.00010679203952007031, "loss": 1.4731, "step": 3450 }, { "epoch": 0.49434178484457814, "grad_norm": 1.258577585220337, "learning_rate": 0.00010674574817020128, "loss": 1.3739, "step": 3451 }, { "epoch": 0.49448503079788, "grad_norm": 1.1581677198410034, "learning_rate": 0.00010669945536813963, "loss": 1.4165, "step": 3452 }, { "epoch": 0.49462827675118176, "grad_norm": 1.1439077854156494, "learning_rate": 0.00010665316112385102, "loss": 1.3545, "step": 3453 }, { "epoch": 0.4947715227044836, "grad_norm": 1.2497040033340454, "learning_rate": 0.00010660686544730145, "loss": 1.4952, "step": 3454 }, { "epoch": 0.49491476865778544, "grad_norm": 1.1711071729660034, "learning_rate": 0.00010656056834845727, "loss": 1.2318, "step": 3455 }, { "epoch": 0.4950580146110872, "grad_norm": 1.291214108467102, "learning_rate": 0.00010651426983728503, "loss": 1.4765, "step": 3456 }, { "epoch": 0.49520126056438907, "grad_norm": 1.2223942279815674, "learning_rate": 0.00010646796992375172, "loss": 1.4039, "step": 3457 }, { "epoch": 0.49534450651769085, "grad_norm": 1.1790255308151245, "learning_rate": 0.00010642166861782455, "loss": 1.5554, "step": 3458 }, { "epoch": 0.4954877524709927, "grad_norm": 1.0292067527770996, "learning_rate": 0.00010637536592947103, "loss": 1.4417, "step": 3459 }, { "epoch": 0.49563099842429453, "grad_norm": 1.1095960140228271, "learning_rate": 0.00010632906186865899, "loss": 1.3685, "step": 3460 }, { "epoch": 0.4957742443775963, "grad_norm": 1.1283036470413208, "learning_rate": 0.00010628275644535657, "loss": 1.5092, "step": 3461 }, { "epoch": 0.49591749033089816, "grad_norm": 1.0702052116394043, "learning_rate": 0.00010623644966953212, "loss": 1.458, "step": 3462 }, { "epoch": 0.4960607362842, "grad_norm": 1.1234889030456543, "learning_rate": 0.00010619014155115441, "loss": 1.2532, "step": 3463 }, { "epoch": 0.4962039822375018, "grad_norm": 1.0504356622695923, "learning_rate": 0.00010614383210019241, "loss": 1.3048, "step": 3464 }, { "epoch": 0.4963472281908036, "grad_norm": 1.1960824728012085, "learning_rate": 0.00010609752132661539, "loss": 1.4627, "step": 3465 }, { "epoch": 0.4964904741441054, "grad_norm": 1.237145185470581, "learning_rate": 0.00010605120924039293, "loss": 1.4215, "step": 3466 }, { "epoch": 0.49663372009740725, "grad_norm": 1.198744535446167, "learning_rate": 0.00010600489585149484, "loss": 1.3301, "step": 3467 }, { "epoch": 0.4967769660507091, "grad_norm": 1.2066015005111694, "learning_rate": 0.00010595858116989128, "loss": 1.265, "step": 3468 }, { "epoch": 0.4969202120040109, "grad_norm": 1.202065348625183, "learning_rate": 0.00010591226520555264, "loss": 1.4988, "step": 3469 }, { "epoch": 0.4970634579573127, "grad_norm": 1.2704044580459595, "learning_rate": 0.00010586594796844965, "loss": 1.6693, "step": 3470 }, { "epoch": 0.4972067039106145, "grad_norm": 1.300209641456604, "learning_rate": 0.00010581962946855317, "loss": 1.7585, "step": 3471 }, { "epoch": 0.49734994986391634, "grad_norm": 1.1643625497817993, "learning_rate": 0.0001057733097158345, "loss": 1.4466, "step": 3472 }, { "epoch": 0.4974931958172182, "grad_norm": 1.142340064048767, "learning_rate": 0.0001057269887202651, "loss": 1.3975, "step": 3473 }, { "epoch": 0.49763644177051997, "grad_norm": 1.1778205633163452, "learning_rate": 0.00010568066649181676, "loss": 1.3515, "step": 3474 }, { "epoch": 0.4977796877238218, "grad_norm": 1.2639034986495972, "learning_rate": 0.00010563434304046151, "loss": 1.5115, "step": 3475 }, { "epoch": 0.49792293367712365, "grad_norm": 1.2969005107879639, "learning_rate": 0.0001055880183761716, "loss": 1.2736, "step": 3476 }, { "epoch": 0.49806617963042543, "grad_norm": 1.3313193321228027, "learning_rate": 0.00010554169250891967, "loss": 1.3961, "step": 3477 }, { "epoch": 0.4982094255837273, "grad_norm": 1.166468858718872, "learning_rate": 0.00010549536544867845, "loss": 1.4841, "step": 3478 }, { "epoch": 0.49835267153702906, "grad_norm": 1.1413065195083618, "learning_rate": 0.00010544903720542105, "loss": 1.501, "step": 3479 }, { "epoch": 0.4984959174903309, "grad_norm": 1.1154061555862427, "learning_rate": 0.00010540270778912073, "loss": 1.4168, "step": 3480 }, { "epoch": 0.49863916344363274, "grad_norm": 1.2981587648391724, "learning_rate": 0.00010535637720975117, "loss": 1.4842, "step": 3481 }, { "epoch": 0.4987824093969345, "grad_norm": 0.9239138960838318, "learning_rate": 0.00010531004547728613, "loss": 1.3953, "step": 3482 }, { "epoch": 0.49892565535023636, "grad_norm": 1.1687954664230347, "learning_rate": 0.00010526371260169972, "loss": 1.5191, "step": 3483 }, { "epoch": 0.49906890130353815, "grad_norm": 1.1618293523788452, "learning_rate": 0.00010521737859296623, "loss": 1.2857, "step": 3484 }, { "epoch": 0.49921214725684, "grad_norm": 1.1281825304031372, "learning_rate": 0.00010517104346106022, "loss": 1.2911, "step": 3485 }, { "epoch": 0.49935539321014183, "grad_norm": 1.0716654062271118, "learning_rate": 0.00010512470721595655, "loss": 1.5974, "step": 3486 }, { "epoch": 0.4994986391634436, "grad_norm": 1.1116315126419067, "learning_rate": 0.00010507836986763022, "loss": 1.3426, "step": 3487 }, { "epoch": 0.49964188511674545, "grad_norm": 1.3257536888122559, "learning_rate": 0.0001050320314260565, "loss": 1.4361, "step": 3488 }, { "epoch": 0.4997851310700473, "grad_norm": 1.4002201557159424, "learning_rate": 0.00010498569190121097, "loss": 1.3007, "step": 3489 }, { "epoch": 0.4999283770233491, "grad_norm": 1.2307900190353394, "learning_rate": 0.00010493935130306934, "loss": 1.4293, "step": 3490 }, { "epoch": 0.5000716229766509, "grad_norm": 1.0054088830947876, "learning_rate": 0.00010489300964160762, "loss": 1.3056, "step": 3491 }, { "epoch": 0.5002148689299527, "grad_norm": 1.149009346961975, "learning_rate": 0.00010484666692680201, "loss": 1.3605, "step": 3492 }, { "epoch": 0.5003581148832545, "grad_norm": 1.1011995077133179, "learning_rate": 0.00010480032316862891, "loss": 1.3379, "step": 3493 }, { "epoch": 0.5005013608365564, "grad_norm": 0.9761946797370911, "learning_rate": 0.00010475397837706504, "loss": 1.4056, "step": 3494 }, { "epoch": 0.5006446067898582, "grad_norm": 1.1446142196655273, "learning_rate": 0.00010470763256208729, "loss": 1.6161, "step": 3495 }, { "epoch": 0.50078785274316, "grad_norm": 1.2673285007476807, "learning_rate": 0.00010466128573367273, "loss": 1.5247, "step": 3496 }, { "epoch": 0.5009310986964618, "grad_norm": 0.9973851442337036, "learning_rate": 0.00010461493790179868, "loss": 1.4122, "step": 3497 }, { "epoch": 0.5010743446497636, "grad_norm": 0.9240918755531311, "learning_rate": 0.00010456858907644271, "loss": 1.5036, "step": 3498 }, { "epoch": 0.5012175906030655, "grad_norm": 1.0919827222824097, "learning_rate": 0.00010452223926758258, "loss": 1.6375, "step": 3499 }, { "epoch": 0.5013608365563673, "grad_norm": 1.0109374523162842, "learning_rate": 0.00010447588848519625, "loss": 1.3839, "step": 3500 }, { "epoch": 0.501504082509669, "grad_norm": 1.0929967164993286, "learning_rate": 0.00010442953673926185, "loss": 1.7065, "step": 3501 }, { "epoch": 0.5016473284629709, "grad_norm": 1.0462990999221802, "learning_rate": 0.00010438318403975786, "loss": 1.2633, "step": 3502 }, { "epoch": 0.5017905744162727, "grad_norm": 1.1100492477416992, "learning_rate": 0.00010433683039666278, "loss": 1.4736, "step": 3503 }, { "epoch": 0.5019338203695746, "grad_norm": 1.2015297412872314, "learning_rate": 0.00010429047581995546, "loss": 1.4694, "step": 3504 }, { "epoch": 0.5020770663228764, "grad_norm": 1.0902291536331177, "learning_rate": 0.00010424412031961484, "loss": 1.4631, "step": 3505 }, { "epoch": 0.5022203122761782, "grad_norm": 1.4253610372543335, "learning_rate": 0.00010419776390562015, "loss": 1.2938, "step": 3506 }, { "epoch": 0.50236355822948, "grad_norm": 1.206774115562439, "learning_rate": 0.00010415140658795077, "loss": 1.4686, "step": 3507 }, { "epoch": 0.5025068041827818, "grad_norm": 1.1173324584960938, "learning_rate": 0.00010410504837658627, "loss": 1.4696, "step": 3508 }, { "epoch": 0.5026500501360837, "grad_norm": 1.203869342803955, "learning_rate": 0.00010405868928150648, "loss": 1.5432, "step": 3509 }, { "epoch": 0.5027932960893855, "grad_norm": 0.9921224117279053, "learning_rate": 0.00010401232931269127, "loss": 1.375, "step": 3510 }, { "epoch": 0.5029365420426873, "grad_norm": 1.1027369499206543, "learning_rate": 0.0001039659684801209, "loss": 1.2518, "step": 3511 }, { "epoch": 0.5030797879959891, "grad_norm": 1.1626970767974854, "learning_rate": 0.00010391960679377563, "loss": 1.4704, "step": 3512 }, { "epoch": 0.5032230339492909, "grad_norm": 1.0251049995422363, "learning_rate": 0.00010387324426363605, "loss": 1.4801, "step": 3513 }, { "epoch": 0.5033662799025928, "grad_norm": 1.130698561668396, "learning_rate": 0.00010382688089968275, "loss": 1.4591, "step": 3514 }, { "epoch": 0.5035095258558946, "grad_norm": 1.2539726495742798, "learning_rate": 0.00010378051671189677, "loss": 1.5644, "step": 3515 }, { "epoch": 0.5036527718091964, "grad_norm": 1.0322028398513794, "learning_rate": 0.00010373415171025904, "loss": 1.6491, "step": 3516 }, { "epoch": 0.5037960177624982, "grad_norm": 1.2747957706451416, "learning_rate": 0.00010368778590475088, "loss": 1.472, "step": 3517 }, { "epoch": 0.5039392637158, "grad_norm": 1.2452256679534912, "learning_rate": 0.00010364141930535367, "loss": 1.3797, "step": 3518 }, { "epoch": 0.5040825096691018, "grad_norm": 1.1841557025909424, "learning_rate": 0.00010359505192204899, "loss": 1.4533, "step": 3519 }, { "epoch": 0.5042257556224037, "grad_norm": 1.231706976890564, "learning_rate": 0.00010354868376481862, "loss": 1.3962, "step": 3520 }, { "epoch": 0.5043690015757055, "grad_norm": 1.0423682928085327, "learning_rate": 0.00010350231484364443, "loss": 1.4413, "step": 3521 }, { "epoch": 0.5045122475290073, "grad_norm": 0.9694684743881226, "learning_rate": 0.00010345594516850851, "loss": 1.4447, "step": 3522 }, { "epoch": 0.5046554934823091, "grad_norm": 1.156149983406067, "learning_rate": 0.00010340957474939312, "loss": 1.3022, "step": 3523 }, { "epoch": 0.5047987394356109, "grad_norm": 1.2355780601501465, "learning_rate": 0.00010336320359628067, "loss": 1.5747, "step": 3524 }, { "epoch": 0.5049419853889128, "grad_norm": 1.0577422380447388, "learning_rate": 0.00010331683171915374, "loss": 1.2036, "step": 3525 }, { "epoch": 0.5050852313422146, "grad_norm": 1.0409634113311768, "learning_rate": 0.00010327045912799496, "loss": 1.3208, "step": 3526 }, { "epoch": 0.5052284772955165, "grad_norm": 1.1987453699111938, "learning_rate": 0.00010322408583278732, "loss": 1.4662, "step": 3527 }, { "epoch": 0.5053717232488182, "grad_norm": 1.1479896306991577, "learning_rate": 0.00010317771184351375, "loss": 1.4152, "step": 3528 }, { "epoch": 0.50551496920212, "grad_norm": 1.1418026685714722, "learning_rate": 0.00010313133717015749, "loss": 1.4116, "step": 3529 }, { "epoch": 0.5056582151554219, "grad_norm": 1.1631759405136108, "learning_rate": 0.00010308496182270176, "loss": 1.4703, "step": 3530 }, { "epoch": 0.5058014611087237, "grad_norm": 1.139899730682373, "learning_rate": 0.00010303858581113015, "loss": 1.3514, "step": 3531 }, { "epoch": 0.5059447070620255, "grad_norm": 1.0779730081558228, "learning_rate": 0.00010299220914542618, "loss": 1.3088, "step": 3532 }, { "epoch": 0.5060879530153273, "grad_norm": 1.2521034479141235, "learning_rate": 0.00010294583183557362, "loss": 1.5054, "step": 3533 }, { "epoch": 0.5062311989686291, "grad_norm": 1.0833410024642944, "learning_rate": 0.00010289945389155643, "loss": 1.3673, "step": 3534 }, { "epoch": 0.506374444921931, "grad_norm": 1.2322765588760376, "learning_rate": 0.00010285307532335846, "loss": 1.5418, "step": 3535 }, { "epoch": 0.5065176908752328, "grad_norm": 1.0470800399780273, "learning_rate": 0.00010280669614096405, "loss": 1.3235, "step": 3536 }, { "epoch": 0.5066609368285346, "grad_norm": 1.051418423652649, "learning_rate": 0.00010276031635435741, "loss": 1.694, "step": 3537 }, { "epoch": 0.5068041827818364, "grad_norm": 1.309741497039795, "learning_rate": 0.00010271393597352297, "loss": 1.4042, "step": 3538 }, { "epoch": 0.5069474287351382, "grad_norm": 1.2561089992523193, "learning_rate": 0.00010266755500844523, "loss": 1.4806, "step": 3539 }, { "epoch": 0.50709067468844, "grad_norm": 1.2582608461380005, "learning_rate": 0.00010262117346910896, "loss": 1.4972, "step": 3540 }, { "epoch": 0.5072339206417419, "grad_norm": 1.153204321861267, "learning_rate": 0.00010257479136549889, "loss": 1.5228, "step": 3541 }, { "epoch": 0.5073771665950437, "grad_norm": 1.0570608377456665, "learning_rate": 0.00010252840870759993, "loss": 1.4583, "step": 3542 }, { "epoch": 0.5075204125483455, "grad_norm": 1.0807303190231323, "learning_rate": 0.00010248202550539716, "loss": 1.443, "step": 3543 }, { "epoch": 0.5076636585016473, "grad_norm": 1.0454888343811035, "learning_rate": 0.0001024356417688757, "loss": 1.3752, "step": 3544 }, { "epoch": 0.5078069044549491, "grad_norm": 1.9777027368545532, "learning_rate": 0.00010238925750802089, "loss": 1.3977, "step": 3545 }, { "epoch": 0.507950150408251, "grad_norm": 1.2866982221603394, "learning_rate": 0.00010234287273281802, "loss": 1.3555, "step": 3546 }, { "epoch": 0.5080933963615528, "grad_norm": 1.198765754699707, "learning_rate": 0.00010229648745325265, "loss": 1.641, "step": 3547 }, { "epoch": 0.5082366423148547, "grad_norm": 0.968391478061676, "learning_rate": 0.00010225010167931035, "loss": 1.3959, "step": 3548 }, { "epoch": 0.5083798882681564, "grad_norm": 1.1934484243392944, "learning_rate": 0.00010220371542097682, "loss": 1.431, "step": 3549 }, { "epoch": 0.5085231342214582, "grad_norm": 0.9414583444595337, "learning_rate": 0.00010215732868823795, "loss": 1.4242, "step": 3550 }, { "epoch": 0.5086663801747601, "grad_norm": 1.0558662414550781, "learning_rate": 0.00010211094149107954, "loss": 1.4286, "step": 3551 }, { "epoch": 0.5088096261280619, "grad_norm": 0.9759284257888794, "learning_rate": 0.0001020645538394877, "loss": 1.4069, "step": 3552 }, { "epoch": 0.5089528720813637, "grad_norm": 1.2101167440414429, "learning_rate": 0.00010201816574344849, "loss": 1.3891, "step": 3553 }, { "epoch": 0.5090961180346655, "grad_norm": 1.1877031326293945, "learning_rate": 0.00010197177721294818, "loss": 1.3501, "step": 3554 }, { "epoch": 0.5092393639879673, "grad_norm": 1.2673450708389282, "learning_rate": 0.00010192538825797296, "loss": 1.4352, "step": 3555 }, { "epoch": 0.5093826099412692, "grad_norm": 1.1861858367919922, "learning_rate": 0.00010187899888850933, "loss": 1.3951, "step": 3556 }, { "epoch": 0.509525855894571, "grad_norm": 1.069964051246643, "learning_rate": 0.00010183260911454373, "loss": 1.541, "step": 3557 }, { "epoch": 0.5096691018478728, "grad_norm": 1.421069622039795, "learning_rate": 0.00010178621894606275, "loss": 1.4498, "step": 3558 }, { "epoch": 0.5098123478011746, "grad_norm": 1.2999821901321411, "learning_rate": 0.00010173982839305304, "loss": 1.5733, "step": 3559 }, { "epoch": 0.5099555937544764, "grad_norm": 0.9489443898200989, "learning_rate": 0.0001016934374655013, "loss": 1.5153, "step": 3560 }, { "epoch": 0.5100988397077783, "grad_norm": 1.0192890167236328, "learning_rate": 0.00010164704617339442, "loss": 1.4364, "step": 3561 }, { "epoch": 0.5102420856610801, "grad_norm": 1.1171261072158813, "learning_rate": 0.00010160065452671923, "loss": 1.4547, "step": 3562 }, { "epoch": 0.5103853316143819, "grad_norm": 1.2785452604293823, "learning_rate": 0.00010155426253546274, "loss": 1.5359, "step": 3563 }, { "epoch": 0.5105285775676837, "grad_norm": 1.0734423398971558, "learning_rate": 0.00010150787020961197, "loss": 1.5796, "step": 3564 }, { "epoch": 0.5106718235209855, "grad_norm": 1.0974465608596802, "learning_rate": 0.00010146147755915407, "loss": 1.5607, "step": 3565 }, { "epoch": 0.5108150694742873, "grad_norm": 1.2799713611602783, "learning_rate": 0.00010141508459407623, "loss": 1.3361, "step": 3566 }, { "epoch": 0.5109583154275892, "grad_norm": 0.9287564754486084, "learning_rate": 0.00010136869132436568, "loss": 1.4186, "step": 3567 }, { "epoch": 0.511101561380891, "grad_norm": 1.0645002126693726, "learning_rate": 0.00010132229776000974, "loss": 1.4191, "step": 3568 }, { "epoch": 0.5112448073341929, "grad_norm": 1.026987910270691, "learning_rate": 0.00010127590391099584, "loss": 1.3998, "step": 3569 }, { "epoch": 0.5113880532874946, "grad_norm": 1.283682942390442, "learning_rate": 0.00010122950978731141, "loss": 1.6089, "step": 3570 }, { "epoch": 0.5115312992407964, "grad_norm": 0.9998469352722168, "learning_rate": 0.00010118311539894394, "loss": 1.4065, "step": 3571 }, { "epoch": 0.5116745451940983, "grad_norm": 1.1862092018127441, "learning_rate": 0.00010113672075588099, "loss": 1.3224, "step": 3572 }, { "epoch": 0.5118177911474001, "grad_norm": 1.10554039478302, "learning_rate": 0.0001010903258681102, "loss": 1.4003, "step": 3573 }, { "epoch": 0.511961037100702, "grad_norm": 1.1894596815109253, "learning_rate": 0.00010104393074561924, "loss": 1.4571, "step": 3574 }, { "epoch": 0.5121042830540037, "grad_norm": 1.0433622598648071, "learning_rate": 0.0001009975353983958, "loss": 1.4102, "step": 3575 }, { "epoch": 0.5122475290073055, "grad_norm": 1.1458901166915894, "learning_rate": 0.0001009511398364277, "loss": 1.3882, "step": 3576 }, { "epoch": 0.5123907749606074, "grad_norm": 1.0135594606399536, "learning_rate": 0.00010090474406970271, "loss": 1.5209, "step": 3577 }, { "epoch": 0.5125340209139092, "grad_norm": 1.0498710870742798, "learning_rate": 0.00010085834810820871, "loss": 1.5453, "step": 3578 }, { "epoch": 0.512677266867211, "grad_norm": 1.1239097118377686, "learning_rate": 0.00010081195196193362, "loss": 1.6923, "step": 3579 }, { "epoch": 0.5128205128205128, "grad_norm": 0.9303428530693054, "learning_rate": 0.00010076555564086534, "loss": 1.461, "step": 3580 }, { "epoch": 0.5129637587738146, "grad_norm": 1.1695005893707275, "learning_rate": 0.0001007191591549919, "loss": 1.3316, "step": 3581 }, { "epoch": 0.5131070047271165, "grad_norm": 1.288093090057373, "learning_rate": 0.0001006727625143013, "loss": 1.4717, "step": 3582 }, { "epoch": 0.5132502506804183, "grad_norm": 0.9967794418334961, "learning_rate": 0.00010062636572878155, "loss": 1.5422, "step": 3583 }, { "epoch": 0.5133934966337201, "grad_norm": 1.2726439237594604, "learning_rate": 0.00010057996880842078, "loss": 1.3472, "step": 3584 }, { "epoch": 0.5135367425870219, "grad_norm": 1.6482325792312622, "learning_rate": 0.00010053357176320703, "loss": 1.3549, "step": 3585 }, { "epoch": 0.5136799885403237, "grad_norm": 1.0138734579086304, "learning_rate": 0.00010048717460312855, "loss": 1.6789, "step": 3586 }, { "epoch": 0.5138232344936255, "grad_norm": 1.1671876907348633, "learning_rate": 0.00010044077733817341, "loss": 1.4795, "step": 3587 }, { "epoch": 0.5139664804469274, "grad_norm": 1.459424376487732, "learning_rate": 0.00010039437997832984, "loss": 1.5057, "step": 3588 }, { "epoch": 0.5141097264002292, "grad_norm": 1.0552257299423218, "learning_rate": 0.00010034798253358595, "loss": 1.4697, "step": 3589 }, { "epoch": 0.5142529723535311, "grad_norm": 1.2884804010391235, "learning_rate": 0.00010030158501393008, "loss": 1.2487, "step": 3590 }, { "epoch": 0.5143962183068328, "grad_norm": 1.0488123893737793, "learning_rate": 0.00010025518742935041, "loss": 1.5682, "step": 3591 }, { "epoch": 0.5145394642601346, "grad_norm": 1.146166443824768, "learning_rate": 0.00010020878978983522, "loss": 1.518, "step": 3592 }, { "epoch": 0.5146827102134365, "grad_norm": 0.9955335259437561, "learning_rate": 0.00010016239210537273, "loss": 1.5437, "step": 3593 }, { "epoch": 0.5148259561667383, "grad_norm": 1.0879486799240112, "learning_rate": 0.00010011599438595123, "loss": 1.422, "step": 3594 }, { "epoch": 0.5149692021200402, "grad_norm": 1.1293834447860718, "learning_rate": 0.00010006959664155904, "loss": 1.7446, "step": 3595 }, { "epoch": 0.5151124480733419, "grad_norm": 1.3175745010375977, "learning_rate": 0.0001000231988821844, "loss": 1.5915, "step": 3596 }, { "epoch": 0.5152556940266437, "grad_norm": 1.1754212379455566, "learning_rate": 9.997680111781562e-05, "loss": 1.4116, "step": 3597 }, { "epoch": 0.5153989399799456, "grad_norm": 1.1510165929794312, "learning_rate": 9.9930403358441e-05, "loss": 1.2543, "step": 3598 }, { "epoch": 0.5155421859332474, "grad_norm": 1.2780470848083496, "learning_rate": 9.98840056140488e-05, "loss": 1.5373, "step": 3599 }, { "epoch": 0.5156854318865493, "grad_norm": 0.9108158946037292, "learning_rate": 9.983760789462728e-05, "loss": 1.2627, "step": 3600 }, { "epoch": 0.515828677839851, "grad_norm": 1.0297930240631104, "learning_rate": 9.979121021016482e-05, "loss": 1.5713, "step": 3601 }, { "epoch": 0.5159719237931528, "grad_norm": 1.2642993927001953, "learning_rate": 9.97448125706496e-05, "loss": 1.5584, "step": 3602 }, { "epoch": 0.5161151697464547, "grad_norm": 0.9611561298370361, "learning_rate": 9.969841498606993e-05, "loss": 1.3623, "step": 3603 }, { "epoch": 0.5162584156997565, "grad_norm": 1.1655004024505615, "learning_rate": 9.965201746641407e-05, "loss": 1.4318, "step": 3604 }, { "epoch": 0.5164016616530583, "grad_norm": 0.9704083204269409, "learning_rate": 9.96056200216702e-05, "loss": 1.4692, "step": 3605 }, { "epoch": 0.5165449076063601, "grad_norm": 1.269442081451416, "learning_rate": 9.955922266182664e-05, "loss": 1.3589, "step": 3606 }, { "epoch": 0.5166881535596619, "grad_norm": 1.2144579887390137, "learning_rate": 9.951282539687146e-05, "loss": 1.3605, "step": 3607 }, { "epoch": 0.5168313995129638, "grad_norm": 1.2092665433883667, "learning_rate": 9.946642823679295e-05, "loss": 1.3395, "step": 3608 }, { "epoch": 0.5169746454662656, "grad_norm": 1.2226508855819702, "learning_rate": 9.942003119157926e-05, "loss": 1.4841, "step": 3609 }, { "epoch": 0.5171178914195674, "grad_norm": 1.0640671253204346, "learning_rate": 9.937363427121847e-05, "loss": 1.3957, "step": 3610 }, { "epoch": 0.5172611373728692, "grad_norm": 1.2777565717697144, "learning_rate": 9.932723748569876e-05, "loss": 1.4814, "step": 3611 }, { "epoch": 0.517404383326171, "grad_norm": 1.1613305807113647, "learning_rate": 9.928084084500812e-05, "loss": 1.3451, "step": 3612 }, { "epoch": 0.5175476292794728, "grad_norm": 1.0288172960281372, "learning_rate": 9.923444435913466e-05, "loss": 1.3387, "step": 3613 }, { "epoch": 0.5176908752327747, "grad_norm": 1.1038094758987427, "learning_rate": 9.918804803806642e-05, "loss": 1.26, "step": 3614 }, { "epoch": 0.5178341211860765, "grad_norm": 1.2033536434173584, "learning_rate": 9.914165189179131e-05, "loss": 1.5684, "step": 3615 }, { "epoch": 0.5179773671393784, "grad_norm": 1.255765676498413, "learning_rate": 9.90952559302973e-05, "loss": 1.5327, "step": 3616 }, { "epoch": 0.5181206130926801, "grad_norm": 1.0548994541168213, "learning_rate": 9.904886016357233e-05, "loss": 1.5176, "step": 3617 }, { "epoch": 0.5182638590459819, "grad_norm": 1.081447720527649, "learning_rate": 9.900246460160422e-05, "loss": 1.4533, "step": 3618 }, { "epoch": 0.5184071049992838, "grad_norm": 1.3018251657485962, "learning_rate": 9.89560692543808e-05, "loss": 1.4397, "step": 3619 }, { "epoch": 0.5185503509525856, "grad_norm": 0.9713574051856995, "learning_rate": 9.890967413188983e-05, "loss": 1.3478, "step": 3620 }, { "epoch": 0.5186935969058875, "grad_norm": 1.125131368637085, "learning_rate": 9.886327924411902e-05, "loss": 1.6241, "step": 3621 }, { "epoch": 0.5188368428591892, "grad_norm": 1.2078266143798828, "learning_rate": 9.88168846010561e-05, "loss": 1.3243, "step": 3622 }, { "epoch": 0.518980088812491, "grad_norm": 1.280287504196167, "learning_rate": 9.87704902126886e-05, "loss": 1.3718, "step": 3623 }, { "epoch": 0.5191233347657929, "grad_norm": 1.2313079833984375, "learning_rate": 9.872409608900416e-05, "loss": 1.3328, "step": 3624 }, { "epoch": 0.5192665807190947, "grad_norm": 0.9809384346008301, "learning_rate": 9.867770223999028e-05, "loss": 1.4586, "step": 3625 }, { "epoch": 0.5194098266723965, "grad_norm": 1.0655990839004517, "learning_rate": 9.863130867563435e-05, "loss": 1.4681, "step": 3626 }, { "epoch": 0.5195530726256983, "grad_norm": 0.9086384773254395, "learning_rate": 9.858491540592382e-05, "loss": 1.3887, "step": 3627 }, { "epoch": 0.5196963185790001, "grad_norm": 1.2372816801071167, "learning_rate": 9.853852244084594e-05, "loss": 1.4728, "step": 3628 }, { "epoch": 0.519839564532302, "grad_norm": 1.1435480117797852, "learning_rate": 9.849212979038804e-05, "loss": 1.3829, "step": 3629 }, { "epoch": 0.5199828104856038, "grad_norm": 1.1206979751586914, "learning_rate": 9.84457374645373e-05, "loss": 1.341, "step": 3630 }, { "epoch": 0.5201260564389056, "grad_norm": 1.0238715410232544, "learning_rate": 9.83993454732808e-05, "loss": 1.4099, "step": 3631 }, { "epoch": 0.5202693023922074, "grad_norm": 1.0766881704330444, "learning_rate": 9.835295382660559e-05, "loss": 1.3573, "step": 3632 }, { "epoch": 0.5204125483455092, "grad_norm": 1.368090033531189, "learning_rate": 9.830656253449872e-05, "loss": 1.3895, "step": 3633 }, { "epoch": 0.520555794298811, "grad_norm": 0.9791733026504517, "learning_rate": 9.826017160694697e-05, "loss": 1.4506, "step": 3634 }, { "epoch": 0.5206990402521129, "grad_norm": 1.1157081127166748, "learning_rate": 9.821378105393727e-05, "loss": 1.5768, "step": 3635 }, { "epoch": 0.5208422862054147, "grad_norm": 1.189009666442871, "learning_rate": 9.816739088545628e-05, "loss": 1.6534, "step": 3636 }, { "epoch": 0.5209855321587166, "grad_norm": 1.0857149362564087, "learning_rate": 9.812100111149068e-05, "loss": 1.3446, "step": 3637 }, { "epoch": 0.5211287781120183, "grad_norm": 1.0060172080993652, "learning_rate": 9.807461174202707e-05, "loss": 1.3901, "step": 3638 }, { "epoch": 0.5212720240653201, "grad_norm": 1.0296005010604858, "learning_rate": 9.802822278705186e-05, "loss": 1.5357, "step": 3639 }, { "epoch": 0.521415270018622, "grad_norm": 1.1376733779907227, "learning_rate": 9.798183425655156e-05, "loss": 1.4571, "step": 3640 }, { "epoch": 0.5215585159719238, "grad_norm": 1.0092979669570923, "learning_rate": 9.793544616051232e-05, "loss": 1.5495, "step": 3641 }, { "epoch": 0.5217017619252257, "grad_norm": 1.150482177734375, "learning_rate": 9.788905850892047e-05, "loss": 1.468, "step": 3642 }, { "epoch": 0.5218450078785274, "grad_norm": 1.3985867500305176, "learning_rate": 9.78426713117621e-05, "loss": 1.5072, "step": 3643 }, { "epoch": 0.5219882538318292, "grad_norm": 1.2363663911819458, "learning_rate": 9.779628457902319e-05, "loss": 1.4661, "step": 3644 }, { "epoch": 0.5221314997851311, "grad_norm": 1.2758383750915527, "learning_rate": 9.774989832068966e-05, "loss": 1.4393, "step": 3645 }, { "epoch": 0.5222747457384329, "grad_norm": 1.0888595581054688, "learning_rate": 9.770351254674738e-05, "loss": 1.4324, "step": 3646 }, { "epoch": 0.5224179916917348, "grad_norm": 1.0480656623840332, "learning_rate": 9.765712726718199e-05, "loss": 1.3434, "step": 3647 }, { "epoch": 0.5225612376450365, "grad_norm": 1.117262840270996, "learning_rate": 9.761074249197915e-05, "loss": 1.4524, "step": 3648 }, { "epoch": 0.5227044835983383, "grad_norm": 0.9950396418571472, "learning_rate": 9.75643582311243e-05, "loss": 1.5741, "step": 3649 }, { "epoch": 0.5228477295516402, "grad_norm": 1.1143760681152344, "learning_rate": 9.751797449460285e-05, "loss": 1.3461, "step": 3650 }, { "epoch": 0.522990975504942, "grad_norm": 1.036259651184082, "learning_rate": 9.74715912924001e-05, "loss": 1.3113, "step": 3651 }, { "epoch": 0.5231342214582438, "grad_norm": 1.130225419998169, "learning_rate": 9.742520863450115e-05, "loss": 1.4508, "step": 3652 }, { "epoch": 0.5232774674115456, "grad_norm": 1.0935786962509155, "learning_rate": 9.737882653089107e-05, "loss": 1.5764, "step": 3653 }, { "epoch": 0.5234207133648474, "grad_norm": 1.209086298942566, "learning_rate": 9.733244499155479e-05, "loss": 1.4767, "step": 3654 }, { "epoch": 0.5235639593181493, "grad_norm": 1.3295915126800537, "learning_rate": 9.728606402647705e-05, "loss": 1.5101, "step": 3655 }, { "epoch": 0.5237072052714511, "grad_norm": 1.402650237083435, "learning_rate": 9.723968364564264e-05, "loss": 1.5505, "step": 3656 }, { "epoch": 0.5238504512247529, "grad_norm": 1.1903241872787476, "learning_rate": 9.719330385903596e-05, "loss": 1.3318, "step": 3657 }, { "epoch": 0.5239936971780548, "grad_norm": 1.1131162643432617, "learning_rate": 9.714692467664152e-05, "loss": 1.6759, "step": 3658 }, { "epoch": 0.5241369431313565, "grad_norm": 1.4578924179077148, "learning_rate": 9.710054610844364e-05, "loss": 1.5585, "step": 3659 }, { "epoch": 0.5242801890846583, "grad_norm": 0.9325810074806213, "learning_rate": 9.705416816442639e-05, "loss": 1.4063, "step": 3660 }, { "epoch": 0.5244234350379602, "grad_norm": 1.3123407363891602, "learning_rate": 9.700779085457386e-05, "loss": 1.4458, "step": 3661 }, { "epoch": 0.524566680991262, "grad_norm": 1.0928868055343628, "learning_rate": 9.696141418886987e-05, "loss": 1.5986, "step": 3662 }, { "epoch": 0.5247099269445639, "grad_norm": 1.1981276273727417, "learning_rate": 9.691503817729824e-05, "loss": 1.2662, "step": 3663 }, { "epoch": 0.5248531728978656, "grad_norm": 0.9656707048416138, "learning_rate": 9.686866282984256e-05, "loss": 1.3525, "step": 3664 }, { "epoch": 0.5249964188511674, "grad_norm": 1.2443674802780151, "learning_rate": 9.682228815648628e-05, "loss": 1.3734, "step": 3665 }, { "epoch": 0.5251396648044693, "grad_norm": 1.1076823472976685, "learning_rate": 9.67759141672127e-05, "loss": 1.437, "step": 3666 }, { "epoch": 0.5252829107577711, "grad_norm": 1.0221062898635864, "learning_rate": 9.672954087200506e-05, "loss": 1.4132, "step": 3667 }, { "epoch": 0.525426156711073, "grad_norm": 1.0709164142608643, "learning_rate": 9.668316828084628e-05, "loss": 1.3634, "step": 3668 }, { "epoch": 0.5255694026643747, "grad_norm": 1.0664676427841187, "learning_rate": 9.663679640371934e-05, "loss": 1.62, "step": 3669 }, { "epoch": 0.5257126486176765, "grad_norm": 1.017284631729126, "learning_rate": 9.65904252506069e-05, "loss": 1.6718, "step": 3670 }, { "epoch": 0.5258558945709784, "grad_norm": 0.9969238042831421, "learning_rate": 9.654405483149151e-05, "loss": 1.4467, "step": 3671 }, { "epoch": 0.5259991405242802, "grad_norm": 1.1512519121170044, "learning_rate": 9.649768515635562e-05, "loss": 1.433, "step": 3672 }, { "epoch": 0.526142386477582, "grad_norm": 1.1627496480941772, "learning_rate": 9.64513162351814e-05, "loss": 1.7093, "step": 3673 }, { "epoch": 0.5262856324308838, "grad_norm": 1.2153645753860474, "learning_rate": 9.6404948077951e-05, "loss": 1.4602, "step": 3674 }, { "epoch": 0.5264288783841856, "grad_norm": 1.1094894409179688, "learning_rate": 9.635858069464635e-05, "loss": 1.2682, "step": 3675 }, { "epoch": 0.5265721243374875, "grad_norm": 0.9624099731445312, "learning_rate": 9.631221409524913e-05, "loss": 1.4129, "step": 3676 }, { "epoch": 0.5267153702907893, "grad_norm": 1.2378541231155396, "learning_rate": 9.6265848289741e-05, "loss": 1.4691, "step": 3677 }, { "epoch": 0.5268586162440911, "grad_norm": 1.2533034086227417, "learning_rate": 9.621948328810326e-05, "loss": 1.5363, "step": 3678 }, { "epoch": 0.527001862197393, "grad_norm": 1.0834462642669678, "learning_rate": 9.617311910031724e-05, "loss": 1.4034, "step": 3679 }, { "epoch": 0.5271451081506947, "grad_norm": 1.1853669881820679, "learning_rate": 9.6126755736364e-05, "loss": 1.5746, "step": 3680 }, { "epoch": 0.5272883541039965, "grad_norm": 1.2055891752243042, "learning_rate": 9.608039320622439e-05, "loss": 1.5398, "step": 3681 }, { "epoch": 0.5274316000572984, "grad_norm": 1.105594515800476, "learning_rate": 9.60340315198791e-05, "loss": 1.5224, "step": 3682 }, { "epoch": 0.5275748460106002, "grad_norm": 1.0148367881774902, "learning_rate": 9.598767068730874e-05, "loss": 1.4485, "step": 3683 }, { "epoch": 0.5277180919639021, "grad_norm": 0.9427760243415833, "learning_rate": 9.594131071849353e-05, "loss": 1.5218, "step": 3684 }, { "epoch": 0.5278613379172038, "grad_norm": 1.2620155811309814, "learning_rate": 9.589495162341374e-05, "loss": 1.3702, "step": 3685 }, { "epoch": 0.5280045838705056, "grad_norm": 1.0290160179138184, "learning_rate": 9.584859341204924e-05, "loss": 1.4102, "step": 3686 }, { "epoch": 0.5281478298238075, "grad_norm": 1.194306492805481, "learning_rate": 9.580223609437986e-05, "loss": 1.3595, "step": 3687 }, { "epoch": 0.5282910757771093, "grad_norm": 1.0129711627960205, "learning_rate": 9.57558796803852e-05, "loss": 1.5332, "step": 3688 }, { "epoch": 0.5284343217304112, "grad_norm": 0.977285623550415, "learning_rate": 9.570952418004455e-05, "loss": 1.4232, "step": 3689 }, { "epoch": 0.5285775676837129, "grad_norm": 1.2597768306732178, "learning_rate": 9.566316960333726e-05, "loss": 1.5625, "step": 3690 }, { "epoch": 0.5287208136370147, "grad_norm": 1.0141105651855469, "learning_rate": 9.561681596024215e-05, "loss": 1.4557, "step": 3691 }, { "epoch": 0.5288640595903166, "grad_norm": 1.209643006324768, "learning_rate": 9.557046326073814e-05, "loss": 1.4417, "step": 3692 }, { "epoch": 0.5290073055436184, "grad_norm": 1.2381027936935425, "learning_rate": 9.552411151480378e-05, "loss": 1.5137, "step": 3693 }, { "epoch": 0.5291505514969203, "grad_norm": 1.0516688823699951, "learning_rate": 9.547776073241744e-05, "loss": 1.2539, "step": 3694 }, { "epoch": 0.529293797450222, "grad_norm": 1.021891713142395, "learning_rate": 9.543141092355727e-05, "loss": 1.3311, "step": 3695 }, { "epoch": 0.5294370434035238, "grad_norm": 1.0345849990844727, "learning_rate": 9.538506209820133e-05, "loss": 1.4938, "step": 3696 }, { "epoch": 0.5295802893568257, "grad_norm": 1.1452932357788086, "learning_rate": 9.53387142663273e-05, "loss": 1.4643, "step": 3697 }, { "epoch": 0.5297235353101275, "grad_norm": 1.3652962446212769, "learning_rate": 9.529236743791276e-05, "loss": 1.6754, "step": 3698 }, { "epoch": 0.5298667812634293, "grad_norm": 1.0047944784164429, "learning_rate": 9.524602162293497e-05, "loss": 1.4456, "step": 3699 }, { "epoch": 0.5300100272167311, "grad_norm": 1.2246770858764648, "learning_rate": 9.519967683137108e-05, "loss": 1.6058, "step": 3700 }, { "epoch": 0.5301532731700329, "grad_norm": 1.1262931823730469, "learning_rate": 9.515333307319805e-05, "loss": 1.2925, "step": 3701 }, { "epoch": 0.5302965191233348, "grad_norm": 1.6753981113433838, "learning_rate": 9.51069903583924e-05, "loss": 1.4622, "step": 3702 }, { "epoch": 0.5304397650766366, "grad_norm": 1.1636813879013062, "learning_rate": 9.506064869693066e-05, "loss": 1.4347, "step": 3703 }, { "epoch": 0.5305830110299384, "grad_norm": 1.1416330337524414, "learning_rate": 9.501430809878906e-05, "loss": 1.5211, "step": 3704 }, { "epoch": 0.5307262569832403, "grad_norm": 0.963975191116333, "learning_rate": 9.496796857394352e-05, "loss": 1.4622, "step": 3705 }, { "epoch": 0.530869502936542, "grad_norm": 1.2977166175842285, "learning_rate": 9.492163013236983e-05, "loss": 1.317, "step": 3706 }, { "epoch": 0.5310127488898438, "grad_norm": 1.162918210029602, "learning_rate": 9.487529278404347e-05, "loss": 1.4083, "step": 3707 }, { "epoch": 0.5311559948431457, "grad_norm": 0.9931765198707581, "learning_rate": 9.482895653893978e-05, "loss": 1.5069, "step": 3708 }, { "epoch": 0.5312992407964475, "grad_norm": 1.4347114562988281, "learning_rate": 9.478262140703381e-05, "loss": 1.3439, "step": 3709 }, { "epoch": 0.5314424867497494, "grad_norm": 1.0678484439849854, "learning_rate": 9.473628739830032e-05, "loss": 1.4573, "step": 3710 }, { "epoch": 0.5315857327030511, "grad_norm": 1.028002381324768, "learning_rate": 9.468995452271387e-05, "loss": 1.4549, "step": 3711 }, { "epoch": 0.5317289786563529, "grad_norm": 1.111526608467102, "learning_rate": 9.464362279024884e-05, "loss": 1.5291, "step": 3712 }, { "epoch": 0.5318722246096548, "grad_norm": 1.134765386581421, "learning_rate": 9.459729221087926e-05, "loss": 1.4809, "step": 3713 }, { "epoch": 0.5320154705629566, "grad_norm": 1.2564013004302979, "learning_rate": 9.4550962794579e-05, "loss": 1.3956, "step": 3714 }, { "epoch": 0.5321587165162585, "grad_norm": 1.0721120834350586, "learning_rate": 9.450463455132158e-05, "loss": 1.4064, "step": 3715 }, { "epoch": 0.5323019624695602, "grad_norm": 1.259359359741211, "learning_rate": 9.445830749108034e-05, "loss": 1.3558, "step": 3716 }, { "epoch": 0.532445208422862, "grad_norm": 1.2158257961273193, "learning_rate": 9.44119816238284e-05, "loss": 1.2197, "step": 3717 }, { "epoch": 0.5325884543761639, "grad_norm": 1.1172995567321777, "learning_rate": 9.43656569595385e-05, "loss": 1.5537, "step": 3718 }, { "epoch": 0.5327317003294657, "grad_norm": 1.1248152256011963, "learning_rate": 9.431933350818326e-05, "loss": 1.5481, "step": 3719 }, { "epoch": 0.5328749462827675, "grad_norm": 1.150938868522644, "learning_rate": 9.427301127973491e-05, "loss": 1.3822, "step": 3720 }, { "epoch": 0.5330181922360693, "grad_norm": 1.2874780893325806, "learning_rate": 9.422669028416552e-05, "loss": 1.5448, "step": 3721 }, { "epoch": 0.5331614381893711, "grad_norm": 1.1352540254592896, "learning_rate": 9.418037053144687e-05, "loss": 1.37, "step": 3722 }, { "epoch": 0.533304684142673, "grad_norm": 1.1158472299575806, "learning_rate": 9.413405203155038e-05, "loss": 1.3835, "step": 3723 }, { "epoch": 0.5334479300959748, "grad_norm": 1.2794890403747559, "learning_rate": 9.408773479444736e-05, "loss": 1.514, "step": 3724 }, { "epoch": 0.5335911760492766, "grad_norm": 1.1471434831619263, "learning_rate": 9.404141883010874e-05, "loss": 1.4502, "step": 3725 }, { "epoch": 0.5337344220025785, "grad_norm": 1.1912345886230469, "learning_rate": 9.399510414850518e-05, "loss": 1.4875, "step": 3726 }, { "epoch": 0.5338776679558802, "grad_norm": 1.2215592861175537, "learning_rate": 9.394879075960712e-05, "loss": 1.3573, "step": 3727 }, { "epoch": 0.534020913909182, "grad_norm": 1.2018468379974365, "learning_rate": 9.390247867338464e-05, "loss": 1.3554, "step": 3728 }, { "epoch": 0.5341641598624839, "grad_norm": 1.1597234010696411, "learning_rate": 9.385616789980759e-05, "loss": 1.278, "step": 3729 }, { "epoch": 0.5343074058157857, "grad_norm": 0.953069269657135, "learning_rate": 9.380985844884561e-05, "loss": 1.4994, "step": 3730 }, { "epoch": 0.5344506517690876, "grad_norm": 1.2250703573226929, "learning_rate": 9.376355033046789e-05, "loss": 1.5556, "step": 3731 }, { "epoch": 0.5345938977223893, "grad_norm": 0.9650528430938721, "learning_rate": 9.371724355464346e-05, "loss": 1.4416, "step": 3732 }, { "epoch": 0.5347371436756911, "grad_norm": 1.0986504554748535, "learning_rate": 9.367093813134103e-05, "loss": 1.57, "step": 3733 }, { "epoch": 0.534880389628993, "grad_norm": 1.2434649467468262, "learning_rate": 9.362463407052898e-05, "loss": 1.4284, "step": 3734 }, { "epoch": 0.5350236355822948, "grad_norm": 1.0045307874679565, "learning_rate": 9.357833138217549e-05, "loss": 1.3237, "step": 3735 }, { "epoch": 0.5351668815355967, "grad_norm": 1.2073403596878052, "learning_rate": 9.35320300762483e-05, "loss": 1.6345, "step": 3736 }, { "epoch": 0.5353101274888984, "grad_norm": 1.539879322052002, "learning_rate": 9.348573016271498e-05, "loss": 1.6637, "step": 3737 }, { "epoch": 0.5354533734422002, "grad_norm": 1.1526130437850952, "learning_rate": 9.343943165154278e-05, "loss": 1.5267, "step": 3738 }, { "epoch": 0.5355966193955021, "grad_norm": 1.0055005550384521, "learning_rate": 9.339313455269856e-05, "loss": 1.5097, "step": 3739 }, { "epoch": 0.5357398653488039, "grad_norm": 1.1364173889160156, "learning_rate": 9.334683887614902e-05, "loss": 1.5762, "step": 3740 }, { "epoch": 0.5358831113021058, "grad_norm": 1.16120445728302, "learning_rate": 9.330054463186038e-05, "loss": 1.4666, "step": 3741 }, { "epoch": 0.5360263572554075, "grad_norm": 1.304872989654541, "learning_rate": 9.325425182979873e-05, "loss": 1.3208, "step": 3742 }, { "epoch": 0.5361696032087093, "grad_norm": 1.0244348049163818, "learning_rate": 9.320796047992973e-05, "loss": 1.5499, "step": 3743 }, { "epoch": 0.5363128491620112, "grad_norm": 1.1036639213562012, "learning_rate": 9.316167059221874e-05, "loss": 1.3784, "step": 3744 }, { "epoch": 0.536456095115313, "grad_norm": 1.2434087991714478, "learning_rate": 9.311538217663084e-05, "loss": 1.3876, "step": 3745 }, { "epoch": 0.5365993410686148, "grad_norm": 1.1324681043624878, "learning_rate": 9.306909524313083e-05, "loss": 1.3801, "step": 3746 }, { "epoch": 0.5367425870219167, "grad_norm": 1.0222913026809692, "learning_rate": 9.302280980168307e-05, "loss": 1.5761, "step": 3747 }, { "epoch": 0.5368858329752184, "grad_norm": 1.1205248832702637, "learning_rate": 9.297652586225175e-05, "loss": 1.3857, "step": 3748 }, { "epoch": 0.5370290789285203, "grad_norm": 1.0428438186645508, "learning_rate": 9.293024343480055e-05, "loss": 1.4097, "step": 3749 }, { "epoch": 0.5371723248818221, "grad_norm": 1.1651946306228638, "learning_rate": 9.288396252929299e-05, "loss": 1.4858, "step": 3750 }, { "epoch": 0.5373155708351239, "grad_norm": 1.0102474689483643, "learning_rate": 9.283768315569226e-05, "loss": 1.3827, "step": 3751 }, { "epoch": 0.5374588167884258, "grad_norm": 1.3291069269180298, "learning_rate": 9.279140532396104e-05, "loss": 1.4992, "step": 3752 }, { "epoch": 0.5376020627417275, "grad_norm": 1.1129823923110962, "learning_rate": 9.27451290440619e-05, "loss": 1.6202, "step": 3753 }, { "epoch": 0.5377453086950293, "grad_norm": 1.0610359907150269, "learning_rate": 9.269885432595696e-05, "loss": 1.5354, "step": 3754 }, { "epoch": 0.5378885546483312, "grad_norm": 1.1070361137390137, "learning_rate": 9.265258117960797e-05, "loss": 1.3406, "step": 3755 }, { "epoch": 0.538031800601633, "grad_norm": 0.9756036996841431, "learning_rate": 9.260630961497648e-05, "loss": 1.6447, "step": 3756 }, { "epoch": 0.5381750465549349, "grad_norm": 1.2553449869155884, "learning_rate": 9.256003964202352e-05, "loss": 1.4602, "step": 3757 }, { "epoch": 0.5383182925082366, "grad_norm": 0.9686275720596313, "learning_rate": 9.251377127070992e-05, "loss": 1.3458, "step": 3758 }, { "epoch": 0.5384615384615384, "grad_norm": 1.188857913017273, "learning_rate": 9.246750451099615e-05, "loss": 1.6229, "step": 3759 }, { "epoch": 0.5386047844148403, "grad_norm": 1.2297013998031616, "learning_rate": 9.242123937284223e-05, "loss": 1.4431, "step": 3760 }, { "epoch": 0.5387480303681421, "grad_norm": 1.396612286567688, "learning_rate": 9.23749758662079e-05, "loss": 1.3322, "step": 3761 }, { "epoch": 0.538891276321444, "grad_norm": 0.9658654928207397, "learning_rate": 9.232871400105265e-05, "loss": 1.3076, "step": 3762 }, { "epoch": 0.5390345222747457, "grad_norm": 1.2772880792617798, "learning_rate": 9.228245378733537e-05, "loss": 1.5344, "step": 3763 }, { "epoch": 0.5391777682280475, "grad_norm": 1.0893158912658691, "learning_rate": 9.223619523501484e-05, "loss": 1.3945, "step": 3764 }, { "epoch": 0.5393210141813494, "grad_norm": 1.1131144762039185, "learning_rate": 9.218993835404935e-05, "loss": 1.482, "step": 3765 }, { "epoch": 0.5394642601346512, "grad_norm": 1.1111451387405396, "learning_rate": 9.214368315439685e-05, "loss": 1.5078, "step": 3766 }, { "epoch": 0.539607506087953, "grad_norm": 1.157244086265564, "learning_rate": 9.209742964601498e-05, "loss": 1.4373, "step": 3767 }, { "epoch": 0.5397507520412549, "grad_norm": 1.1658291816711426, "learning_rate": 9.205117783886091e-05, "loss": 1.43, "step": 3768 }, { "epoch": 0.5398939979945566, "grad_norm": 0.997931957244873, "learning_rate": 9.20049277428916e-05, "loss": 1.3688, "step": 3769 }, { "epoch": 0.5400372439478585, "grad_norm": 1.2365038394927979, "learning_rate": 9.195867936806346e-05, "loss": 1.4106, "step": 3770 }, { "epoch": 0.5401804899011603, "grad_norm": 1.3280868530273438, "learning_rate": 9.191243272433268e-05, "loss": 1.5641, "step": 3771 }, { "epoch": 0.5403237358544621, "grad_norm": 1.1488747596740723, "learning_rate": 9.1866187821655e-05, "loss": 1.1997, "step": 3772 }, { "epoch": 0.540466981807764, "grad_norm": 1.0865997076034546, "learning_rate": 9.181994466998577e-05, "loss": 1.4058, "step": 3773 }, { "epoch": 0.5406102277610657, "grad_norm": 1.0705485343933105, "learning_rate": 9.177370327928004e-05, "loss": 1.3785, "step": 3774 }, { "epoch": 0.5407534737143675, "grad_norm": 1.2165418863296509, "learning_rate": 9.172746365949246e-05, "loss": 1.4866, "step": 3775 }, { "epoch": 0.5408967196676694, "grad_norm": 1.0586025714874268, "learning_rate": 9.16812258205772e-05, "loss": 1.3165, "step": 3776 }, { "epoch": 0.5410399656209712, "grad_norm": 1.0338263511657715, "learning_rate": 9.16349897724882e-05, "loss": 1.3806, "step": 3777 }, { "epoch": 0.5411832115742731, "grad_norm": 1.0262380838394165, "learning_rate": 9.158875552517887e-05, "loss": 1.2545, "step": 3778 }, { "epoch": 0.5413264575275748, "grad_norm": 1.1584936380386353, "learning_rate": 9.154252308860231e-05, "loss": 1.503, "step": 3779 }, { "epoch": 0.5414697034808766, "grad_norm": 1.250121831893921, "learning_rate": 9.149629247271128e-05, "loss": 1.2028, "step": 3780 }, { "epoch": 0.5416129494341785, "grad_norm": 1.314084529876709, "learning_rate": 9.145006368745803e-05, "loss": 1.5149, "step": 3781 }, { "epoch": 0.5417561953874803, "grad_norm": 1.11372709274292, "learning_rate": 9.14038367427945e-05, "loss": 1.5536, "step": 3782 }, { "epoch": 0.5418994413407822, "grad_norm": 1.1796424388885498, "learning_rate": 9.135761164867221e-05, "loss": 1.3874, "step": 3783 }, { "epoch": 0.5420426872940839, "grad_norm": 1.1041358709335327, "learning_rate": 9.131138841504222e-05, "loss": 1.3477, "step": 3784 }, { "epoch": 0.5421859332473857, "grad_norm": 1.1590811014175415, "learning_rate": 9.126516705185535e-05, "loss": 1.5473, "step": 3785 }, { "epoch": 0.5423291792006876, "grad_norm": 1.077998161315918, "learning_rate": 9.121894756906181e-05, "loss": 1.5326, "step": 3786 }, { "epoch": 0.5424724251539894, "grad_norm": 1.167808175086975, "learning_rate": 9.117272997661157e-05, "loss": 1.5809, "step": 3787 }, { "epoch": 0.5426156711072913, "grad_norm": 1.184683918952942, "learning_rate": 9.112651428445416e-05, "loss": 1.3783, "step": 3788 }, { "epoch": 0.5427589170605931, "grad_norm": 1.2382107973098755, "learning_rate": 9.108030050253861e-05, "loss": 1.3602, "step": 3789 }, { "epoch": 0.5429021630138948, "grad_norm": 1.1313680410385132, "learning_rate": 9.103408864081365e-05, "loss": 1.537, "step": 3790 }, { "epoch": 0.5430454089671967, "grad_norm": 1.2598986625671387, "learning_rate": 9.098787870922751e-05, "loss": 1.3982, "step": 3791 }, { "epoch": 0.5431886549204985, "grad_norm": 1.0685791969299316, "learning_rate": 9.094167071772808e-05, "loss": 1.405, "step": 3792 }, { "epoch": 0.5433319008738003, "grad_norm": 1.0668638944625854, "learning_rate": 9.08954646762628e-05, "loss": 1.377, "step": 3793 }, { "epoch": 0.5434751468271022, "grad_norm": 1.3687176704406738, "learning_rate": 9.084926059477865e-05, "loss": 1.3562, "step": 3794 }, { "epoch": 0.5436183927804039, "grad_norm": 0.9627201557159424, "learning_rate": 9.080305848322223e-05, "loss": 1.3928, "step": 3795 }, { "epoch": 0.5437616387337058, "grad_norm": 1.0312392711639404, "learning_rate": 9.075685835153981e-05, "loss": 1.4286, "step": 3796 }, { "epoch": 0.5439048846870076, "grad_norm": 1.0432853698730469, "learning_rate": 9.071066020967698e-05, "loss": 1.4555, "step": 3797 }, { "epoch": 0.5440481306403094, "grad_norm": 1.4255825281143188, "learning_rate": 9.066446406757916e-05, "loss": 1.3394, "step": 3798 }, { "epoch": 0.5441913765936113, "grad_norm": 1.2928752899169922, "learning_rate": 9.06182699351912e-05, "loss": 1.5141, "step": 3799 }, { "epoch": 0.544334622546913, "grad_norm": 1.304749846458435, "learning_rate": 9.057207782245757e-05, "loss": 1.3309, "step": 3800 }, { "epoch": 0.5444778685002148, "grad_norm": 1.1724631786346436, "learning_rate": 9.052588773932228e-05, "loss": 1.5049, "step": 3801 }, { "epoch": 0.5446211144535167, "grad_norm": 1.2254942655563354, "learning_rate": 9.047969969572889e-05, "loss": 1.3129, "step": 3802 }, { "epoch": 0.5447643604068185, "grad_norm": 1.054332971572876, "learning_rate": 9.043351370162058e-05, "loss": 1.4154, "step": 3803 }, { "epoch": 0.5449076063601204, "grad_norm": 1.076788306236267, "learning_rate": 9.038732976694005e-05, "loss": 1.5393, "step": 3804 }, { "epoch": 0.5450508523134221, "grad_norm": 1.0744738578796387, "learning_rate": 9.034114790162952e-05, "loss": 1.4359, "step": 3805 }, { "epoch": 0.5451940982667239, "grad_norm": 1.3311492204666138, "learning_rate": 9.029496811563086e-05, "loss": 1.7857, "step": 3806 }, { "epoch": 0.5453373442200258, "grad_norm": 1.0885087251663208, "learning_rate": 9.024879041888535e-05, "loss": 1.3856, "step": 3807 }, { "epoch": 0.5454805901733276, "grad_norm": 1.2214841842651367, "learning_rate": 9.020261482133398e-05, "loss": 1.3506, "step": 3808 }, { "epoch": 0.5456238361266295, "grad_norm": 0.9928548336029053, "learning_rate": 9.015644133291723e-05, "loss": 1.4204, "step": 3809 }, { "epoch": 0.5457670820799312, "grad_norm": 1.1702306270599365, "learning_rate": 9.011026996357503e-05, "loss": 1.4993, "step": 3810 }, { "epoch": 0.545910328033233, "grad_norm": 0.9813712239265442, "learning_rate": 9.006410072324696e-05, "loss": 1.5622, "step": 3811 }, { "epoch": 0.5460535739865349, "grad_norm": 1.085888385772705, "learning_rate": 9.00179336218722e-05, "loss": 1.7155, "step": 3812 }, { "epoch": 0.5461968199398367, "grad_norm": 1.2337164878845215, "learning_rate": 8.997176866938923e-05, "loss": 1.4404, "step": 3813 }, { "epoch": 0.5463400658931385, "grad_norm": 1.0420974493026733, "learning_rate": 8.992560587573636e-05, "loss": 1.5439, "step": 3814 }, { "epoch": 0.5464833118464404, "grad_norm": 1.4346414804458618, "learning_rate": 8.987944525085123e-05, "loss": 1.4521, "step": 3815 }, { "epoch": 0.5466265577997421, "grad_norm": 1.5219919681549072, "learning_rate": 8.983328680467108e-05, "loss": 1.7328, "step": 3816 }, { "epoch": 0.546769803753044, "grad_norm": 1.19607412815094, "learning_rate": 8.978713054713271e-05, "loss": 1.4813, "step": 3817 }, { "epoch": 0.5469130497063458, "grad_norm": 0.9698666334152222, "learning_rate": 8.974097648817238e-05, "loss": 1.4765, "step": 3818 }, { "epoch": 0.5470562956596476, "grad_norm": 1.1443860530853271, "learning_rate": 8.9694824637726e-05, "loss": 1.4875, "step": 3819 }, { "epoch": 0.5471995416129495, "grad_norm": 1.0689992904663086, "learning_rate": 8.964867500572878e-05, "loss": 1.2831, "step": 3820 }, { "epoch": 0.5473427875662512, "grad_norm": 1.2121115922927856, "learning_rate": 8.960252760211571e-05, "loss": 1.3405, "step": 3821 }, { "epoch": 0.547486033519553, "grad_norm": 1.1287785768508911, "learning_rate": 8.955638243682118e-05, "loss": 1.4129, "step": 3822 }, { "epoch": 0.5476292794728549, "grad_norm": 1.1705784797668457, "learning_rate": 8.951023951977904e-05, "loss": 1.2129, "step": 3823 }, { "epoch": 0.5477725254261567, "grad_norm": 0.9737319946289062, "learning_rate": 8.946409886092273e-05, "loss": 1.6115, "step": 3824 }, { "epoch": 0.5479157713794586, "grad_norm": 1.1572811603546143, "learning_rate": 8.941796047018525e-05, "loss": 1.3554, "step": 3825 }, { "epoch": 0.5480590173327603, "grad_norm": 1.2384272813796997, "learning_rate": 8.9371824357499e-05, "loss": 1.205, "step": 3826 }, { "epoch": 0.5482022632860621, "grad_norm": 1.3571808338165283, "learning_rate": 8.932569053279599e-05, "loss": 1.575, "step": 3827 }, { "epoch": 0.548345509239364, "grad_norm": 1.106500506401062, "learning_rate": 8.927955900600762e-05, "loss": 1.7205, "step": 3828 }, { "epoch": 0.5484887551926658, "grad_norm": 1.2937997579574585, "learning_rate": 8.92334297870649e-05, "loss": 1.3851, "step": 3829 }, { "epoch": 0.5486320011459677, "grad_norm": 1.1528923511505127, "learning_rate": 8.918730288589835e-05, "loss": 1.4176, "step": 3830 }, { "epoch": 0.5487752470992694, "grad_norm": 1.329996943473816, "learning_rate": 8.914117831243789e-05, "loss": 1.3904, "step": 3831 }, { "epoch": 0.5489184930525712, "grad_norm": 1.035642147064209, "learning_rate": 8.909505607661304e-05, "loss": 1.6628, "step": 3832 }, { "epoch": 0.5490617390058731, "grad_norm": 1.0105066299438477, "learning_rate": 8.904893618835277e-05, "loss": 1.4247, "step": 3833 }, { "epoch": 0.5492049849591749, "grad_norm": 1.2962956428527832, "learning_rate": 8.900281865758551e-05, "loss": 1.544, "step": 3834 }, { "epoch": 0.5493482309124768, "grad_norm": 1.3534297943115234, "learning_rate": 8.89567034942393e-05, "loss": 1.3387, "step": 3835 }, { "epoch": 0.5494914768657786, "grad_norm": 1.123201847076416, "learning_rate": 8.89105907082415e-05, "loss": 1.5337, "step": 3836 }, { "epoch": 0.5496347228190803, "grad_norm": 1.2261749505996704, "learning_rate": 8.886448030951912e-05, "loss": 1.4843, "step": 3837 }, { "epoch": 0.5497779687723822, "grad_norm": 1.1547399759292603, "learning_rate": 8.881837230799859e-05, "loss": 1.4158, "step": 3838 }, { "epoch": 0.549921214725684, "grad_norm": 1.1991835832595825, "learning_rate": 8.877226671360577e-05, "loss": 1.4689, "step": 3839 }, { "epoch": 0.5500644606789858, "grad_norm": 1.1199498176574707, "learning_rate": 8.872616353626608e-05, "loss": 1.4522, "step": 3840 }, { "epoch": 0.5502077066322877, "grad_norm": 1.0771890878677368, "learning_rate": 8.868006278590442e-05, "loss": 1.4265, "step": 3841 }, { "epoch": 0.5503509525855894, "grad_norm": 1.0158658027648926, "learning_rate": 8.863396447244511e-05, "loss": 1.4496, "step": 3842 }, { "epoch": 0.5504941985388913, "grad_norm": 1.1728618144989014, "learning_rate": 8.8587868605812e-05, "loss": 1.5709, "step": 3843 }, { "epoch": 0.5506374444921931, "grad_norm": 1.04984450340271, "learning_rate": 8.854177519592834e-05, "loss": 1.4431, "step": 3844 }, { "epoch": 0.5507806904454949, "grad_norm": 1.0375322103500366, "learning_rate": 8.849568425271693e-05, "loss": 1.4599, "step": 3845 }, { "epoch": 0.5509239363987968, "grad_norm": 1.3541265726089478, "learning_rate": 8.844959578610005e-05, "loss": 1.441, "step": 3846 }, { "epoch": 0.5510671823520985, "grad_norm": 1.1232185363769531, "learning_rate": 8.840350980599934e-05, "loss": 1.6492, "step": 3847 }, { "epoch": 0.5512104283054003, "grad_norm": 1.0477490425109863, "learning_rate": 8.835742632233603e-05, "loss": 1.4301, "step": 3848 }, { "epoch": 0.5513536742587022, "grad_norm": 1.1232941150665283, "learning_rate": 8.831134534503069e-05, "loss": 1.5839, "step": 3849 }, { "epoch": 0.551496920212004, "grad_norm": 1.060860514640808, "learning_rate": 8.826526688400346e-05, "loss": 1.5115, "step": 3850 }, { "epoch": 0.5516401661653059, "grad_norm": 1.21254563331604, "learning_rate": 8.82191909491739e-05, "loss": 1.3491, "step": 3851 }, { "epoch": 0.5517834121186076, "grad_norm": 1.008589267730713, "learning_rate": 8.817311755046095e-05, "loss": 1.2769, "step": 3852 }, { "epoch": 0.5519266580719094, "grad_norm": 1.0381547212600708, "learning_rate": 8.812704669778315e-05, "loss": 1.5142, "step": 3853 }, { "epoch": 0.5520699040252113, "grad_norm": 1.0759553909301758, "learning_rate": 8.808097840105841e-05, "loss": 1.4496, "step": 3854 }, { "epoch": 0.5522131499785131, "grad_norm": 1.2774295806884766, "learning_rate": 8.803491267020404e-05, "loss": 1.4154, "step": 3855 }, { "epoch": 0.552356395931815, "grad_norm": 1.0634576082229614, "learning_rate": 8.79888495151369e-05, "loss": 1.4314, "step": 3856 }, { "epoch": 0.5524996418851168, "grad_norm": 1.1186951398849487, "learning_rate": 8.794278894577322e-05, "loss": 1.398, "step": 3857 }, { "epoch": 0.5526428878384185, "grad_norm": 1.0316025018692017, "learning_rate": 8.789673097202869e-05, "loss": 1.4106, "step": 3858 }, { "epoch": 0.5527861337917204, "grad_norm": 1.2792577743530273, "learning_rate": 8.785067560381852e-05, "loss": 1.5475, "step": 3859 }, { "epoch": 0.5529293797450222, "grad_norm": 1.1569132804870605, "learning_rate": 8.78046228510572e-05, "loss": 1.4462, "step": 3860 }, { "epoch": 0.553072625698324, "grad_norm": 1.2156575918197632, "learning_rate": 8.77585727236588e-05, "loss": 1.4311, "step": 3861 }, { "epoch": 0.5532158716516259, "grad_norm": 1.2542413473129272, "learning_rate": 8.771252523153679e-05, "loss": 1.5362, "step": 3862 }, { "epoch": 0.5533591176049276, "grad_norm": 1.078599214553833, "learning_rate": 8.766648038460399e-05, "loss": 1.4702, "step": 3863 }, { "epoch": 0.5535023635582295, "grad_norm": 1.3124828338623047, "learning_rate": 8.762043819277279e-05, "loss": 1.4087, "step": 3864 }, { "epoch": 0.5536456095115313, "grad_norm": 1.128368854522705, "learning_rate": 8.757439866595489e-05, "loss": 1.4168, "step": 3865 }, { "epoch": 0.5537888554648331, "grad_norm": 1.060189127922058, "learning_rate": 8.752836181406147e-05, "loss": 1.4647, "step": 3866 }, { "epoch": 0.553932101418135, "grad_norm": 0.9809279441833496, "learning_rate": 8.748232764700314e-05, "loss": 1.4052, "step": 3867 }, { "epoch": 0.5540753473714367, "grad_norm": 1.586708426475525, "learning_rate": 8.743629617468986e-05, "loss": 1.4075, "step": 3868 }, { "epoch": 0.5542185933247386, "grad_norm": 1.4831597805023193, "learning_rate": 8.739026740703119e-05, "loss": 1.5685, "step": 3869 }, { "epoch": 0.5543618392780404, "grad_norm": 1.1498953104019165, "learning_rate": 8.734424135393582e-05, "loss": 1.4413, "step": 3870 }, { "epoch": 0.5545050852313422, "grad_norm": 1.7392728328704834, "learning_rate": 8.729821802531212e-05, "loss": 1.5503, "step": 3871 }, { "epoch": 0.5546483311846441, "grad_norm": 1.0217995643615723, "learning_rate": 8.72521974310678e-05, "loss": 1.3752, "step": 3872 }, { "epoch": 0.5547915771379458, "grad_norm": 1.0087727308273315, "learning_rate": 8.720617958110989e-05, "loss": 1.3519, "step": 3873 }, { "epoch": 0.5549348230912476, "grad_norm": 1.1481389999389648, "learning_rate": 8.71601644853449e-05, "loss": 1.4484, "step": 3874 }, { "epoch": 0.5550780690445495, "grad_norm": 0.9921812415122986, "learning_rate": 8.71141521536788e-05, "loss": 1.4096, "step": 3875 }, { "epoch": 0.5552213149978513, "grad_norm": 1.034953236579895, "learning_rate": 8.706814259601684e-05, "loss": 1.4915, "step": 3876 }, { "epoch": 0.5553645609511532, "grad_norm": 0.9842970967292786, "learning_rate": 8.702213582226379e-05, "loss": 1.5006, "step": 3877 }, { "epoch": 0.555507806904455, "grad_norm": 1.2548067569732666, "learning_rate": 8.697613184232372e-05, "loss": 1.41, "step": 3878 }, { "epoch": 0.5556510528577567, "grad_norm": 0.9699662327766418, "learning_rate": 8.693013066610016e-05, "loss": 1.3674, "step": 3879 }, { "epoch": 0.5557942988110586, "grad_norm": 1.069921851158142, "learning_rate": 8.68841323034961e-05, "loss": 1.361, "step": 3880 }, { "epoch": 0.5559375447643604, "grad_norm": 1.1991045475006104, "learning_rate": 8.68381367644137e-05, "loss": 1.487, "step": 3881 }, { "epoch": 0.5560807907176623, "grad_norm": 1.0368291139602661, "learning_rate": 8.67921440587548e-05, "loss": 1.4969, "step": 3882 }, { "epoch": 0.5562240366709641, "grad_norm": 1.4181945323944092, "learning_rate": 8.674615419642044e-05, "loss": 1.4132, "step": 3883 }, { "epoch": 0.5563672826242658, "grad_norm": 1.089892864227295, "learning_rate": 8.670016718731111e-05, "loss": 1.5795, "step": 3884 }, { "epoch": 0.5565105285775677, "grad_norm": 1.2528786659240723, "learning_rate": 8.665418304132666e-05, "loss": 1.3463, "step": 3885 }, { "epoch": 0.5566537745308695, "grad_norm": 1.0907570123672485, "learning_rate": 8.66082017683663e-05, "loss": 1.5563, "step": 3886 }, { "epoch": 0.5567970204841713, "grad_norm": 1.096030592918396, "learning_rate": 8.656222337832875e-05, "loss": 1.4372, "step": 3887 }, { "epoch": 0.5569402664374732, "grad_norm": 1.1569348573684692, "learning_rate": 8.651624788111197e-05, "loss": 1.3602, "step": 3888 }, { "epoch": 0.5570835123907749, "grad_norm": 1.0363959074020386, "learning_rate": 8.647027528661333e-05, "loss": 1.3755, "step": 3889 }, { "epoch": 0.5572267583440768, "grad_norm": 1.127984881401062, "learning_rate": 8.64243056047296e-05, "loss": 1.506, "step": 3890 }, { "epoch": 0.5573700042973786, "grad_norm": 1.0365749597549438, "learning_rate": 8.6378338845357e-05, "loss": 1.4768, "step": 3891 }, { "epoch": 0.5575132502506804, "grad_norm": 1.134334683418274, "learning_rate": 8.633237501839089e-05, "loss": 1.4589, "step": 3892 }, { "epoch": 0.5576564962039823, "grad_norm": 1.1454731225967407, "learning_rate": 8.628641413372625e-05, "loss": 1.3813, "step": 3893 }, { "epoch": 0.557799742157284, "grad_norm": 1.0015209913253784, "learning_rate": 8.624045620125727e-05, "loss": 1.49, "step": 3894 }, { "epoch": 0.5579429881105858, "grad_norm": 1.1860630512237549, "learning_rate": 8.619450123087757e-05, "loss": 1.2962, "step": 3895 }, { "epoch": 0.5580862340638877, "grad_norm": 1.2602014541625977, "learning_rate": 8.614854923248016e-05, "loss": 1.5706, "step": 3896 }, { "epoch": 0.5582294800171895, "grad_norm": 1.1934503316879272, "learning_rate": 8.610260021595728e-05, "loss": 1.5822, "step": 3897 }, { "epoch": 0.5583727259704914, "grad_norm": 1.1712443828582764, "learning_rate": 8.605665419120071e-05, "loss": 1.2835, "step": 3898 }, { "epoch": 0.5585159719237931, "grad_norm": 1.2621870040893555, "learning_rate": 8.601071116810143e-05, "loss": 1.4252, "step": 3899 }, { "epoch": 0.5586592178770949, "grad_norm": 1.0441410541534424, "learning_rate": 8.596477115654985e-05, "loss": 1.3694, "step": 3900 }, { "epoch": 0.5588024638303968, "grad_norm": 1.0356183052062988, "learning_rate": 8.591883416643574e-05, "loss": 1.4268, "step": 3901 }, { "epoch": 0.5589457097836986, "grad_norm": 1.2976032495498657, "learning_rate": 8.587290020764814e-05, "loss": 1.6673, "step": 3902 }, { "epoch": 0.5590889557370005, "grad_norm": 1.366070032119751, "learning_rate": 8.582696929007556e-05, "loss": 1.2025, "step": 3903 }, { "epoch": 0.5592322016903023, "grad_norm": 1.2491025924682617, "learning_rate": 8.578104142360578e-05, "loss": 1.5581, "step": 3904 }, { "epoch": 0.559375447643604, "grad_norm": 1.128080129623413, "learning_rate": 8.573511661812589e-05, "loss": 1.4527, "step": 3905 }, { "epoch": 0.5595186935969059, "grad_norm": 1.378066062927246, "learning_rate": 8.568919488352242e-05, "loss": 1.5583, "step": 3906 }, { "epoch": 0.5596619395502077, "grad_norm": 1.0771844387054443, "learning_rate": 8.564327622968113e-05, "loss": 1.5658, "step": 3907 }, { "epoch": 0.5598051855035096, "grad_norm": 0.9619322419166565, "learning_rate": 8.559736066648717e-05, "loss": 1.4096, "step": 3908 }, { "epoch": 0.5599484314568114, "grad_norm": 1.0864485502243042, "learning_rate": 8.55514482038251e-05, "loss": 1.3858, "step": 3909 }, { "epoch": 0.5600916774101131, "grad_norm": 1.114277958869934, "learning_rate": 8.550553885157866e-05, "loss": 1.5714, "step": 3910 }, { "epoch": 0.560234923363415, "grad_norm": 1.0554165840148926, "learning_rate": 8.545963261963102e-05, "loss": 1.4778, "step": 3911 }, { "epoch": 0.5603781693167168, "grad_norm": 0.9876953363418579, "learning_rate": 8.541372951786469e-05, "loss": 1.3799, "step": 3912 }, { "epoch": 0.5605214152700186, "grad_norm": 1.2109671831130981, "learning_rate": 8.536782955616138e-05, "loss": 1.6097, "step": 3913 }, { "epoch": 0.5606646612233205, "grad_norm": 1.1476106643676758, "learning_rate": 8.532193274440235e-05, "loss": 1.2998, "step": 3914 }, { "epoch": 0.5608079071766222, "grad_norm": 1.262734293937683, "learning_rate": 8.527603909246791e-05, "loss": 1.43, "step": 3915 }, { "epoch": 0.560951153129924, "grad_norm": 1.1031489372253418, "learning_rate": 8.523014861023791e-05, "loss": 1.3749, "step": 3916 }, { "epoch": 0.5610943990832259, "grad_norm": 1.0776389837265015, "learning_rate": 8.518426130759145e-05, "loss": 1.3603, "step": 3917 }, { "epoch": 0.5612376450365277, "grad_norm": 0.9908851981163025, "learning_rate": 8.513837719440688e-05, "loss": 1.4642, "step": 3918 }, { "epoch": 0.5613808909898296, "grad_norm": 1.3063948154449463, "learning_rate": 8.509249628056192e-05, "loss": 1.3972, "step": 3919 }, { "epoch": 0.5615241369431313, "grad_norm": 1.1847165822982788, "learning_rate": 8.504661857593358e-05, "loss": 1.4727, "step": 3920 }, { "epoch": 0.5616673828964331, "grad_norm": 0.9869985580444336, "learning_rate": 8.500074409039827e-05, "loss": 1.4307, "step": 3921 }, { "epoch": 0.561810628849735, "grad_norm": 1.18565833568573, "learning_rate": 8.495487283383158e-05, "loss": 1.3788, "step": 3922 }, { "epoch": 0.5619538748030368, "grad_norm": 1.1054866313934326, "learning_rate": 8.490900481610845e-05, "loss": 1.3779, "step": 3923 }, { "epoch": 0.5620971207563387, "grad_norm": 1.0978024005889893, "learning_rate": 8.486314004710311e-05, "loss": 1.3887, "step": 3924 }, { "epoch": 0.5622403667096405, "grad_norm": 1.2477519512176514, "learning_rate": 8.48172785366892e-05, "loss": 1.4354, "step": 3925 }, { "epoch": 0.5623836126629422, "grad_norm": 1.0421793460845947, "learning_rate": 8.477142029473945e-05, "loss": 1.572, "step": 3926 }, { "epoch": 0.5625268586162441, "grad_norm": 1.0786559581756592, "learning_rate": 8.47255653311261e-05, "loss": 1.4609, "step": 3927 }, { "epoch": 0.5626701045695459, "grad_norm": 1.2271462678909302, "learning_rate": 8.467971365572053e-05, "loss": 1.3661, "step": 3928 }, { "epoch": 0.5628133505228478, "grad_norm": 1.0970946550369263, "learning_rate": 8.463386527839349e-05, "loss": 1.405, "step": 3929 }, { "epoch": 0.5629565964761496, "grad_norm": 1.431236743927002, "learning_rate": 8.458802020901503e-05, "loss": 1.5289, "step": 3930 }, { "epoch": 0.5630998424294513, "grad_norm": 1.2120460271835327, "learning_rate": 8.454217845745439e-05, "loss": 1.3232, "step": 3931 }, { "epoch": 0.5632430883827532, "grad_norm": 1.1474130153656006, "learning_rate": 8.449634003358022e-05, "loss": 1.5438, "step": 3932 }, { "epoch": 0.563386334336055, "grad_norm": 1.3706706762313843, "learning_rate": 8.44505049472604e-05, "loss": 1.5151, "step": 3933 }, { "epoch": 0.5635295802893568, "grad_norm": 1.0257145166397095, "learning_rate": 8.440467320836208e-05, "loss": 1.3579, "step": 3934 }, { "epoch": 0.5636728262426587, "grad_norm": 1.1697652339935303, "learning_rate": 8.435884482675168e-05, "loss": 1.3828, "step": 3935 }, { "epoch": 0.5638160721959604, "grad_norm": 1.5371254682540894, "learning_rate": 8.431301981229492e-05, "loss": 1.4234, "step": 3936 }, { "epoch": 0.5639593181492623, "grad_norm": 1.0536261796951294, "learning_rate": 8.42671981748568e-05, "loss": 1.4001, "step": 3937 }, { "epoch": 0.5641025641025641, "grad_norm": 1.1714766025543213, "learning_rate": 8.422137992430162e-05, "loss": 1.4227, "step": 3938 }, { "epoch": 0.5642458100558659, "grad_norm": 1.1704045534133911, "learning_rate": 8.417556507049285e-05, "loss": 1.2836, "step": 3939 }, { "epoch": 0.5643890560091678, "grad_norm": 1.0839508771896362, "learning_rate": 8.412975362329332e-05, "loss": 1.2472, "step": 3940 }, { "epoch": 0.5645323019624695, "grad_norm": 0.9749448895454407, "learning_rate": 8.408394559256516e-05, "loss": 1.5037, "step": 3941 }, { "epoch": 0.5646755479157713, "grad_norm": 1.0083789825439453, "learning_rate": 8.40381409881696e-05, "loss": 1.4631, "step": 3942 }, { "epoch": 0.5648187938690732, "grad_norm": 1.2076188325881958, "learning_rate": 8.399233981996731e-05, "loss": 1.4924, "step": 3943 }, { "epoch": 0.564962039822375, "grad_norm": 1.1061431169509888, "learning_rate": 8.394654209781813e-05, "loss": 1.4437, "step": 3944 }, { "epoch": 0.5651052857756769, "grad_norm": 0.9922691583633423, "learning_rate": 8.390074783158116e-05, "loss": 1.2537, "step": 3945 }, { "epoch": 0.5652485317289787, "grad_norm": 1.1751137971878052, "learning_rate": 8.385495703111483e-05, "loss": 1.4131, "step": 3946 }, { "epoch": 0.5653917776822804, "grad_norm": 1.0556309223175049, "learning_rate": 8.380916970627666e-05, "loss": 1.3317, "step": 3947 }, { "epoch": 0.5655350236355823, "grad_norm": 1.277162790298462, "learning_rate": 8.376338586692366e-05, "loss": 1.4075, "step": 3948 }, { "epoch": 0.5656782695888841, "grad_norm": 1.2637059688568115, "learning_rate": 8.371760552291183e-05, "loss": 1.4415, "step": 3949 }, { "epoch": 0.565821515542186, "grad_norm": 1.2300158739089966, "learning_rate": 8.367182868409662e-05, "loss": 1.3873, "step": 3950 }, { "epoch": 0.5659647614954878, "grad_norm": 1.2689992189407349, "learning_rate": 8.362605536033265e-05, "loss": 1.3347, "step": 3951 }, { "epoch": 0.5661080074487895, "grad_norm": 1.1090478897094727, "learning_rate": 8.358028556147377e-05, "loss": 1.4422, "step": 3952 }, { "epoch": 0.5662512534020914, "grad_norm": 1.6464838981628418, "learning_rate": 8.353451929737304e-05, "loss": 1.4485, "step": 3953 }, { "epoch": 0.5663944993553932, "grad_norm": 1.225919485092163, "learning_rate": 8.348875657788291e-05, "loss": 1.3279, "step": 3954 }, { "epoch": 0.566537745308695, "grad_norm": 1.0428861379623413, "learning_rate": 8.344299741285486e-05, "loss": 1.2717, "step": 3955 }, { "epoch": 0.5666809912619969, "grad_norm": 1.1783533096313477, "learning_rate": 8.339724181213977e-05, "loss": 1.4457, "step": 3956 }, { "epoch": 0.5668242372152986, "grad_norm": 1.1148102283477783, "learning_rate": 8.335148978558764e-05, "loss": 1.2687, "step": 3957 }, { "epoch": 0.5669674831686005, "grad_norm": 1.292420506477356, "learning_rate": 8.330574134304776e-05, "loss": 1.2572, "step": 3958 }, { "epoch": 0.5671107291219023, "grad_norm": 1.290417194366455, "learning_rate": 8.32599964943687e-05, "loss": 1.5039, "step": 3959 }, { "epoch": 0.5672539750752041, "grad_norm": 1.1382256746292114, "learning_rate": 8.32142552493981e-05, "loss": 1.3523, "step": 3960 }, { "epoch": 0.567397221028506, "grad_norm": 1.1668968200683594, "learning_rate": 8.316851761798298e-05, "loss": 1.3122, "step": 3961 }, { "epoch": 0.5675404669818077, "grad_norm": 1.4155834913253784, "learning_rate": 8.312278360996952e-05, "loss": 1.4369, "step": 3962 }, { "epoch": 0.5676837129351096, "grad_norm": 1.1860640048980713, "learning_rate": 8.307705323520305e-05, "loss": 1.3012, "step": 3963 }, { "epoch": 0.5678269588884114, "grad_norm": 1.1565544605255127, "learning_rate": 8.303132650352831e-05, "loss": 1.5066, "step": 3964 }, { "epoch": 0.5679702048417132, "grad_norm": 1.3378422260284424, "learning_rate": 8.298560342478901e-05, "loss": 1.3931, "step": 3965 }, { "epoch": 0.5681134507950151, "grad_norm": 1.1496847867965698, "learning_rate": 8.293988400882826e-05, "loss": 1.6177, "step": 3966 }, { "epoch": 0.5682566967483169, "grad_norm": 1.2598527669906616, "learning_rate": 8.289416826548834e-05, "loss": 1.6493, "step": 3967 }, { "epoch": 0.5683999427016186, "grad_norm": 1.3219451904296875, "learning_rate": 8.284845620461067e-05, "loss": 1.3353, "step": 3968 }, { "epoch": 0.5685431886549205, "grad_norm": 1.2905850410461426, "learning_rate": 8.28027478360359e-05, "loss": 1.2892, "step": 3969 }, { "epoch": 0.5686864346082223, "grad_norm": 1.1840497255325317, "learning_rate": 8.275704316960403e-05, "loss": 1.3757, "step": 3970 }, { "epoch": 0.5688296805615242, "grad_norm": 1.205560326576233, "learning_rate": 8.271134221515403e-05, "loss": 1.6795, "step": 3971 }, { "epoch": 0.568972926514826, "grad_norm": 1.0615930557250977, "learning_rate": 8.266564498252425e-05, "loss": 1.462, "step": 3972 }, { "epoch": 0.5691161724681277, "grad_norm": 1.0974124670028687, "learning_rate": 8.261995148155213e-05, "loss": 1.3021, "step": 3973 }, { "epoch": 0.5692594184214296, "grad_norm": 1.2044427394866943, "learning_rate": 8.257426172207434e-05, "loss": 1.5225, "step": 3974 }, { "epoch": 0.5694026643747314, "grad_norm": 1.117107629776001, "learning_rate": 8.252857571392683e-05, "loss": 1.5632, "step": 3975 }, { "epoch": 0.5695459103280333, "grad_norm": 1.1638672351837158, "learning_rate": 8.248289346694457e-05, "loss": 1.3739, "step": 3976 }, { "epoch": 0.5696891562813351, "grad_norm": 1.0431588888168335, "learning_rate": 8.24372149909619e-05, "loss": 1.406, "step": 3977 }, { "epoch": 0.5698324022346368, "grad_norm": 1.149625301361084, "learning_rate": 8.239154029581222e-05, "loss": 1.3399, "step": 3978 }, { "epoch": 0.5699756481879387, "grad_norm": 1.0371580123901367, "learning_rate": 8.234586939132817e-05, "loss": 1.546, "step": 3979 }, { "epoch": 0.5701188941412405, "grad_norm": 1.167177677154541, "learning_rate": 8.230020228734159e-05, "loss": 1.4579, "step": 3980 }, { "epoch": 0.5702621400945423, "grad_norm": 1.203093409538269, "learning_rate": 8.22545389936834e-05, "loss": 1.4442, "step": 3981 }, { "epoch": 0.5704053860478442, "grad_norm": 1.1320503950119019, "learning_rate": 8.220887952018387e-05, "loss": 1.3198, "step": 3982 }, { "epoch": 0.5705486320011459, "grad_norm": 1.146273136138916, "learning_rate": 8.216322387667232e-05, "loss": 1.5901, "step": 3983 }, { "epoch": 0.5706918779544478, "grad_norm": 1.1225979328155518, "learning_rate": 8.211757207297727e-05, "loss": 1.4345, "step": 3984 }, { "epoch": 0.5708351239077496, "grad_norm": 1.0608364343643188, "learning_rate": 8.207192411892646e-05, "loss": 1.5799, "step": 3985 }, { "epoch": 0.5709783698610514, "grad_norm": 1.1014986038208008, "learning_rate": 8.202628002434672e-05, "loss": 1.5379, "step": 3986 }, { "epoch": 0.5711216158143533, "grad_norm": 1.366817831993103, "learning_rate": 8.198063979906407e-05, "loss": 1.4199, "step": 3987 }, { "epoch": 0.5712648617676551, "grad_norm": 1.0609391927719116, "learning_rate": 8.19350034529038e-05, "loss": 1.3257, "step": 3988 }, { "epoch": 0.5714081077209568, "grad_norm": 1.097229242324829, "learning_rate": 8.188937099569024e-05, "loss": 1.3338, "step": 3989 }, { "epoch": 0.5715513536742587, "grad_norm": 1.0792393684387207, "learning_rate": 8.184374243724693e-05, "loss": 1.4144, "step": 3990 }, { "epoch": 0.5716945996275605, "grad_norm": 1.0728702545166016, "learning_rate": 8.17981177873966e-05, "loss": 1.3447, "step": 3991 }, { "epoch": 0.5718378455808624, "grad_norm": 1.1972821950912476, "learning_rate": 8.175249705596105e-05, "loss": 1.37, "step": 3992 }, { "epoch": 0.5719810915341642, "grad_norm": 1.059789776802063, "learning_rate": 8.170688025276134e-05, "loss": 1.4666, "step": 3993 }, { "epoch": 0.5721243374874659, "grad_norm": 1.1565126180648804, "learning_rate": 8.166126738761764e-05, "loss": 1.354, "step": 3994 }, { "epoch": 0.5722675834407678, "grad_norm": 1.0718340873718262, "learning_rate": 8.161565847034925e-05, "loss": 1.3636, "step": 3995 }, { "epoch": 0.5724108293940696, "grad_norm": 1.3045673370361328, "learning_rate": 8.157005351077465e-05, "loss": 1.5122, "step": 3996 }, { "epoch": 0.5725540753473715, "grad_norm": 1.5571624040603638, "learning_rate": 8.152445251871144e-05, "loss": 1.5694, "step": 3997 }, { "epoch": 0.5726973213006733, "grad_norm": 1.1673660278320312, "learning_rate": 8.147885550397645e-05, "loss": 1.4945, "step": 3998 }, { "epoch": 0.572840567253975, "grad_norm": 1.0279871225357056, "learning_rate": 8.143326247638549e-05, "loss": 1.5993, "step": 3999 }, { "epoch": 0.5729838132072769, "grad_norm": 1.0814629793167114, "learning_rate": 8.138767344575368e-05, "loss": 1.4563, "step": 4000 }, { "epoch": 0.5731270591605787, "grad_norm": 1.3680696487426758, "learning_rate": 8.134208842189522e-05, "loss": 1.5034, "step": 4001 }, { "epoch": 0.5732703051138806, "grad_norm": 1.0448832511901855, "learning_rate": 8.12965074146234e-05, "loss": 1.3423, "step": 4002 }, { "epoch": 0.5734135510671824, "grad_norm": 1.1998704671859741, "learning_rate": 8.125093043375064e-05, "loss": 1.5248, "step": 4003 }, { "epoch": 0.5735567970204841, "grad_norm": 1.0345035791397095, "learning_rate": 8.120535748908866e-05, "loss": 1.2994, "step": 4004 }, { "epoch": 0.573700042973786, "grad_norm": 1.2050114870071411, "learning_rate": 8.11597885904481e-05, "loss": 1.6566, "step": 4005 }, { "epoch": 0.5738432889270878, "grad_norm": 1.1663048267364502, "learning_rate": 8.111422374763883e-05, "loss": 1.503, "step": 4006 }, { "epoch": 0.5739865348803896, "grad_norm": 0.981922447681427, "learning_rate": 8.106866297046983e-05, "loss": 1.4254, "step": 4007 }, { "epoch": 0.5741297808336915, "grad_norm": 1.0976204872131348, "learning_rate": 8.10231062687492e-05, "loss": 1.4059, "step": 4008 }, { "epoch": 0.5742730267869932, "grad_norm": 1.2109730243682861, "learning_rate": 8.097755365228425e-05, "loss": 1.5166, "step": 4009 }, { "epoch": 0.574416272740295, "grad_norm": 1.1045068502426147, "learning_rate": 8.093200513088118e-05, "loss": 1.5394, "step": 4010 }, { "epoch": 0.5745595186935969, "grad_norm": 1.165842890739441, "learning_rate": 8.088646071434559e-05, "loss": 1.4096, "step": 4011 }, { "epoch": 0.5747027646468987, "grad_norm": 0.9336256980895996, "learning_rate": 8.084092041248203e-05, "loss": 1.7167, "step": 4012 }, { "epoch": 0.5748460106002006, "grad_norm": 1.029255747795105, "learning_rate": 8.079538423509417e-05, "loss": 1.4256, "step": 4013 }, { "epoch": 0.5749892565535024, "grad_norm": 1.2203580141067505, "learning_rate": 8.074985219198488e-05, "loss": 1.4428, "step": 4014 }, { "epoch": 0.5751325025068041, "grad_norm": 1.2242995500564575, "learning_rate": 8.070432429295599e-05, "loss": 1.5042, "step": 4015 }, { "epoch": 0.575275748460106, "grad_norm": 1.2936469316482544, "learning_rate": 8.065880054780861e-05, "loss": 1.4866, "step": 4016 }, { "epoch": 0.5754189944134078, "grad_norm": 1.0355972051620483, "learning_rate": 8.061328096634288e-05, "loss": 1.381, "step": 4017 }, { "epoch": 0.5755622403667097, "grad_norm": 1.128451943397522, "learning_rate": 8.056776555835798e-05, "loss": 1.4394, "step": 4018 }, { "epoch": 0.5757054863200115, "grad_norm": 1.1621708869934082, "learning_rate": 8.052225433365226e-05, "loss": 1.5442, "step": 4019 }, { "epoch": 0.5758487322733132, "grad_norm": 1.1235061883926392, "learning_rate": 8.047674730202325e-05, "loss": 1.4713, "step": 4020 }, { "epoch": 0.5759919782266151, "grad_norm": 1.1414810419082642, "learning_rate": 8.043124447326736e-05, "loss": 1.3321, "step": 4021 }, { "epoch": 0.5761352241799169, "grad_norm": 1.2065949440002441, "learning_rate": 8.038574585718032e-05, "loss": 1.5907, "step": 4022 }, { "epoch": 0.5762784701332188, "grad_norm": 1.0530744791030884, "learning_rate": 8.03402514635568e-05, "loss": 1.2437, "step": 4023 }, { "epoch": 0.5764217160865206, "grad_norm": 1.1663130521774292, "learning_rate": 8.029476130219064e-05, "loss": 1.3423, "step": 4024 }, { "epoch": 0.5765649620398223, "grad_norm": 1.1673799753189087, "learning_rate": 8.024927538287476e-05, "loss": 1.4283, "step": 4025 }, { "epoch": 0.5767082079931242, "grad_norm": 1.2280105352401733, "learning_rate": 8.02037937154011e-05, "loss": 1.459, "step": 4026 }, { "epoch": 0.576851453946426, "grad_norm": 1.1846588850021362, "learning_rate": 8.015831630956079e-05, "loss": 1.3226, "step": 4027 }, { "epoch": 0.5769946998997278, "grad_norm": 1.2068185806274414, "learning_rate": 8.011284317514398e-05, "loss": 1.4864, "step": 4028 }, { "epoch": 0.5771379458530297, "grad_norm": 1.1816045045852661, "learning_rate": 8.006737432193989e-05, "loss": 1.329, "step": 4029 }, { "epoch": 0.5772811918063314, "grad_norm": 1.1531915664672852, "learning_rate": 8.002190975973689e-05, "loss": 1.4316, "step": 4030 }, { "epoch": 0.5774244377596333, "grad_norm": 1.0691043138504028, "learning_rate": 7.997644949832228e-05, "loss": 1.3663, "step": 4031 }, { "epoch": 0.5775676837129351, "grad_norm": 1.2969982624053955, "learning_rate": 7.993099354748261e-05, "loss": 1.557, "step": 4032 }, { "epoch": 0.5777109296662369, "grad_norm": 1.0185694694519043, "learning_rate": 7.988554191700343e-05, "loss": 1.4557, "step": 4033 }, { "epoch": 0.5778541756195388, "grad_norm": 1.2483097314834595, "learning_rate": 7.98400946166693e-05, "loss": 1.3563, "step": 4034 }, { "epoch": 0.5779974215728406, "grad_norm": 1.0204237699508667, "learning_rate": 7.979465165626392e-05, "loss": 1.3468, "step": 4035 }, { "epoch": 0.5781406675261423, "grad_norm": 1.2091480493545532, "learning_rate": 7.974921304557002e-05, "loss": 1.3786, "step": 4036 }, { "epoch": 0.5782839134794442, "grad_norm": 1.118525505065918, "learning_rate": 7.970377879436941e-05, "loss": 1.4364, "step": 4037 }, { "epoch": 0.578427159432746, "grad_norm": 1.1731637716293335, "learning_rate": 7.965834891244301e-05, "loss": 1.453, "step": 4038 }, { "epoch": 0.5785704053860479, "grad_norm": 0.9149682521820068, "learning_rate": 7.961292340957069e-05, "loss": 1.4582, "step": 4039 }, { "epoch": 0.5787136513393497, "grad_norm": 1.1028584241867065, "learning_rate": 7.956750229553145e-05, "loss": 1.5014, "step": 4040 }, { "epoch": 0.5788568972926514, "grad_norm": 0.9707110524177551, "learning_rate": 7.952208558010336e-05, "loss": 1.4464, "step": 4041 }, { "epoch": 0.5790001432459533, "grad_norm": 0.951582670211792, "learning_rate": 7.947667327306348e-05, "loss": 1.3684, "step": 4042 }, { "epoch": 0.5791433891992551, "grad_norm": 1.3854985237121582, "learning_rate": 7.943126538418802e-05, "loss": 1.5119, "step": 4043 }, { "epoch": 0.579286635152557, "grad_norm": 1.1185824871063232, "learning_rate": 7.938586192325205e-05, "loss": 1.4191, "step": 4044 }, { "epoch": 0.5794298811058588, "grad_norm": 1.1147524118423462, "learning_rate": 7.934046290002991e-05, "loss": 1.6209, "step": 4045 }, { "epoch": 0.5795731270591605, "grad_norm": 1.3271050453186035, "learning_rate": 7.929506832429489e-05, "loss": 1.5979, "step": 4046 }, { "epoch": 0.5797163730124624, "grad_norm": 1.0175105333328247, "learning_rate": 7.924967820581928e-05, "loss": 1.3698, "step": 4047 }, { "epoch": 0.5798596189657642, "grad_norm": 1.2606736421585083, "learning_rate": 7.920429255437447e-05, "loss": 1.3346, "step": 4048 }, { "epoch": 0.580002864919066, "grad_norm": 1.093549132347107, "learning_rate": 7.915891137973082e-05, "loss": 1.419, "step": 4049 }, { "epoch": 0.5801461108723679, "grad_norm": 0.9534938335418701, "learning_rate": 7.911353469165782e-05, "loss": 1.4395, "step": 4050 }, { "epoch": 0.5802893568256696, "grad_norm": 1.136242151260376, "learning_rate": 7.9068162499924e-05, "loss": 1.3371, "step": 4051 }, { "epoch": 0.5804326027789715, "grad_norm": 1.090457797050476, "learning_rate": 7.902279481429675e-05, "loss": 1.4873, "step": 4052 }, { "epoch": 0.5805758487322733, "grad_norm": 1.1384838819503784, "learning_rate": 7.897743164454267e-05, "loss": 1.4172, "step": 4053 }, { "epoch": 0.5807190946855751, "grad_norm": 1.2101212739944458, "learning_rate": 7.89320730004274e-05, "loss": 1.687, "step": 4054 }, { "epoch": 0.580862340638877, "grad_norm": 1.0580434799194336, "learning_rate": 7.888671889171538e-05, "loss": 1.5594, "step": 4055 }, { "epoch": 0.5810055865921788, "grad_norm": 1.3892474174499512, "learning_rate": 7.884136932817037e-05, "loss": 1.4371, "step": 4056 }, { "epoch": 0.5811488325454806, "grad_norm": 1.2107031345367432, "learning_rate": 7.879602431955492e-05, "loss": 1.2393, "step": 4057 }, { "epoch": 0.5812920784987824, "grad_norm": 1.131335973739624, "learning_rate": 7.875068387563069e-05, "loss": 1.2592, "step": 4058 }, { "epoch": 0.5814353244520842, "grad_norm": 1.3255914449691772, "learning_rate": 7.870534800615845e-05, "loss": 1.5265, "step": 4059 }, { "epoch": 0.5815785704053861, "grad_norm": 0.9842118620872498, "learning_rate": 7.866001672089776e-05, "loss": 1.2726, "step": 4060 }, { "epoch": 0.5817218163586879, "grad_norm": 1.1963412761688232, "learning_rate": 7.861469002960742e-05, "loss": 1.5314, "step": 4061 }, { "epoch": 0.5818650623119896, "grad_norm": 1.1365402936935425, "learning_rate": 7.856936794204513e-05, "loss": 1.5027, "step": 4062 }, { "epoch": 0.5820083082652915, "grad_norm": 1.1422992944717407, "learning_rate": 7.852405046796756e-05, "loss": 1.3331, "step": 4063 }, { "epoch": 0.5821515542185933, "grad_norm": 1.1397908926010132, "learning_rate": 7.84787376171305e-05, "loss": 1.4188, "step": 4064 }, { "epoch": 0.5822948001718952, "grad_norm": 1.0699501037597656, "learning_rate": 7.843342939928864e-05, "loss": 1.4645, "step": 4065 }, { "epoch": 0.582438046125197, "grad_norm": 1.137501835823059, "learning_rate": 7.838812582419574e-05, "loss": 1.3552, "step": 4066 }, { "epoch": 0.5825812920784987, "grad_norm": 1.0379517078399658, "learning_rate": 7.834282690160457e-05, "loss": 1.3841, "step": 4067 }, { "epoch": 0.5827245380318006, "grad_norm": 1.2014309167861938, "learning_rate": 7.829753264126681e-05, "loss": 1.4708, "step": 4068 }, { "epoch": 0.5828677839851024, "grad_norm": 1.2360470294952393, "learning_rate": 7.82522430529332e-05, "loss": 1.4489, "step": 4069 }, { "epoch": 0.5830110299384043, "grad_norm": 1.1220011711120605, "learning_rate": 7.820695814635356e-05, "loss": 1.4201, "step": 4070 }, { "epoch": 0.5831542758917061, "grad_norm": 1.029133677482605, "learning_rate": 7.816167793127646e-05, "loss": 1.3302, "step": 4071 }, { "epoch": 0.5832975218450078, "grad_norm": 1.1910643577575684, "learning_rate": 7.811640241744975e-05, "loss": 1.3298, "step": 4072 }, { "epoch": 0.5834407677983097, "grad_norm": 0.9886586666107178, "learning_rate": 7.807113161462003e-05, "loss": 1.3882, "step": 4073 }, { "epoch": 0.5835840137516115, "grad_norm": 1.0012569427490234, "learning_rate": 7.802586553253301e-05, "loss": 1.3714, "step": 4074 }, { "epoch": 0.5837272597049133, "grad_norm": 1.4726563692092896, "learning_rate": 7.79806041809334e-05, "loss": 1.4588, "step": 4075 }, { "epoch": 0.5838705056582152, "grad_norm": 1.1509028673171997, "learning_rate": 7.793534756956477e-05, "loss": 1.6458, "step": 4076 }, { "epoch": 0.584013751611517, "grad_norm": 1.2103217840194702, "learning_rate": 7.789009570816985e-05, "loss": 1.4099, "step": 4077 }, { "epoch": 0.5841569975648188, "grad_norm": 1.232513666152954, "learning_rate": 7.784484860649013e-05, "loss": 1.5455, "step": 4078 }, { "epoch": 0.5843002435181206, "grad_norm": 1.1109777688980103, "learning_rate": 7.779960627426627e-05, "loss": 1.5549, "step": 4079 }, { "epoch": 0.5844434894714224, "grad_norm": 1.0656781196594238, "learning_rate": 7.775436872123783e-05, "loss": 1.3306, "step": 4080 }, { "epoch": 0.5845867354247243, "grad_norm": 1.4316449165344238, "learning_rate": 7.770913595714327e-05, "loss": 1.4211, "step": 4081 }, { "epoch": 0.5847299813780261, "grad_norm": 1.2875453233718872, "learning_rate": 7.766390799172012e-05, "loss": 1.2777, "step": 4082 }, { "epoch": 0.5848732273313278, "grad_norm": 1.1123470067977905, "learning_rate": 7.76186848347049e-05, "loss": 1.4696, "step": 4083 }, { "epoch": 0.5850164732846297, "grad_norm": 1.2400542497634888, "learning_rate": 7.757346649583294e-05, "loss": 1.3072, "step": 4084 }, { "epoch": 0.5851597192379315, "grad_norm": 0.959944486618042, "learning_rate": 7.75282529848387e-05, "loss": 1.5056, "step": 4085 }, { "epoch": 0.5853029651912334, "grad_norm": 1.1940209865570068, "learning_rate": 7.74830443114555e-05, "loss": 1.3586, "step": 4086 }, { "epoch": 0.5854462111445352, "grad_norm": 1.3005777597427368, "learning_rate": 7.743784048541561e-05, "loss": 1.4495, "step": 4087 }, { "epoch": 0.5855894570978369, "grad_norm": 1.1398003101348877, "learning_rate": 7.739264151645039e-05, "loss": 1.5188, "step": 4088 }, { "epoch": 0.5857327030511388, "grad_norm": 1.0585899353027344, "learning_rate": 7.734744741428997e-05, "loss": 1.4402, "step": 4089 }, { "epoch": 0.5858759490044406, "grad_norm": 1.2522401809692383, "learning_rate": 7.730225818866358e-05, "loss": 1.4799, "step": 4090 }, { "epoch": 0.5860191949577425, "grad_norm": 0.9709560871124268, "learning_rate": 7.725707384929932e-05, "loss": 1.3676, "step": 4091 }, { "epoch": 0.5861624409110443, "grad_norm": 0.9776063561439514, "learning_rate": 7.721189440592423e-05, "loss": 1.3749, "step": 4092 }, { "epoch": 0.586305686864346, "grad_norm": 1.110272765159607, "learning_rate": 7.716671986826443e-05, "loss": 1.4793, "step": 4093 }, { "epoch": 0.5864489328176479, "grad_norm": 1.091881275177002, "learning_rate": 7.712155024604473e-05, "loss": 1.466, "step": 4094 }, { "epoch": 0.5865921787709497, "grad_norm": 1.1279401779174805, "learning_rate": 7.707638554898913e-05, "loss": 1.4933, "step": 4095 }, { "epoch": 0.5867354247242516, "grad_norm": 1.2103724479675293, "learning_rate": 7.703122578682046e-05, "loss": 1.4395, "step": 4096 }, { "epoch": 0.5868786706775534, "grad_norm": 1.0300121307373047, "learning_rate": 7.698607096926048e-05, "loss": 1.4156, "step": 4097 }, { "epoch": 0.5870219166308551, "grad_norm": 1.0761678218841553, "learning_rate": 7.69409211060299e-05, "loss": 1.5089, "step": 4098 }, { "epoch": 0.587165162584157, "grad_norm": 1.0046838521957397, "learning_rate": 7.689577620684842e-05, "loss": 1.3975, "step": 4099 }, { "epoch": 0.5873084085374588, "grad_norm": 1.1688908338546753, "learning_rate": 7.685063628143455e-05, "loss": 1.4861, "step": 4100 }, { "epoch": 0.5874516544907606, "grad_norm": 1.0761762857437134, "learning_rate": 7.680550133950586e-05, "loss": 1.577, "step": 4101 }, { "epoch": 0.5875949004440625, "grad_norm": 1.0463873147964478, "learning_rate": 7.676037139077874e-05, "loss": 1.3635, "step": 4102 }, { "epoch": 0.5877381463973643, "grad_norm": 1.147707462310791, "learning_rate": 7.671524644496853e-05, "loss": 1.434, "step": 4103 }, { "epoch": 0.587881392350666, "grad_norm": 1.2260719537734985, "learning_rate": 7.667012651178963e-05, "loss": 1.5492, "step": 4104 }, { "epoch": 0.5880246383039679, "grad_norm": 1.0165655612945557, "learning_rate": 7.662501160095509e-05, "loss": 1.4655, "step": 4105 }, { "epoch": 0.5881678842572697, "grad_norm": 1.3608142137527466, "learning_rate": 7.657990172217718e-05, "loss": 1.3643, "step": 4106 }, { "epoch": 0.5883111302105716, "grad_norm": 1.1346837282180786, "learning_rate": 7.653479688516683e-05, "loss": 1.4194, "step": 4107 }, { "epoch": 0.5884543761638734, "grad_norm": 1.179919719696045, "learning_rate": 7.648969709963405e-05, "loss": 1.4452, "step": 4108 }, { "epoch": 0.5885976221171751, "grad_norm": 1.126090407371521, "learning_rate": 7.644460237528771e-05, "loss": 1.5153, "step": 4109 }, { "epoch": 0.588740868070477, "grad_norm": 1.1512258052825928, "learning_rate": 7.639951272183551e-05, "loss": 1.4644, "step": 4110 }, { "epoch": 0.5888841140237788, "grad_norm": 1.1665078401565552, "learning_rate": 7.635442814898426e-05, "loss": 1.26, "step": 4111 }, { "epoch": 0.5890273599770807, "grad_norm": 1.3920634984970093, "learning_rate": 7.630934866643949e-05, "loss": 1.4097, "step": 4112 }, { "epoch": 0.5891706059303825, "grad_norm": 1.3023431301116943, "learning_rate": 7.626427428390567e-05, "loss": 1.5213, "step": 4113 }, { "epoch": 0.5893138518836842, "grad_norm": 1.0747606754302979, "learning_rate": 7.621920501108627e-05, "loss": 1.2963, "step": 4114 }, { "epoch": 0.5894570978369861, "grad_norm": 1.1278852224349976, "learning_rate": 7.617414085768351e-05, "loss": 1.291, "step": 4115 }, { "epoch": 0.5896003437902879, "grad_norm": 1.445031762123108, "learning_rate": 7.612908183339862e-05, "loss": 1.4838, "step": 4116 }, { "epoch": 0.5897435897435898, "grad_norm": 1.0447640419006348, "learning_rate": 7.608402794793174e-05, "loss": 1.6066, "step": 4117 }, { "epoch": 0.5898868356968916, "grad_norm": 1.1560271978378296, "learning_rate": 7.603897921098177e-05, "loss": 1.505, "step": 4118 }, { "epoch": 0.5900300816501933, "grad_norm": 0.966730535030365, "learning_rate": 7.599393563224666e-05, "loss": 1.3939, "step": 4119 }, { "epoch": 0.5901733276034952, "grad_norm": 1.1407861709594727, "learning_rate": 7.594889722142316e-05, "loss": 1.4412, "step": 4120 }, { "epoch": 0.590316573556797, "grad_norm": 1.2845348119735718, "learning_rate": 7.590386398820687e-05, "loss": 1.4193, "step": 4121 }, { "epoch": 0.5904598195100988, "grad_norm": 1.162536382675171, "learning_rate": 7.585883594229243e-05, "loss": 1.5735, "step": 4122 }, { "epoch": 0.5906030654634007, "grad_norm": 1.0764204263687134, "learning_rate": 7.581381309337318e-05, "loss": 1.4274, "step": 4123 }, { "epoch": 0.5907463114167025, "grad_norm": 1.1800615787506104, "learning_rate": 7.576879545114145e-05, "loss": 1.5045, "step": 4124 }, { "epoch": 0.5908895573700043, "grad_norm": 1.0906771421432495, "learning_rate": 7.572378302528847e-05, "loss": 1.3201, "step": 4125 }, { "epoch": 0.5910328033233061, "grad_norm": 1.0225157737731934, "learning_rate": 7.56787758255042e-05, "loss": 1.4202, "step": 4126 }, { "epoch": 0.5911760492766079, "grad_norm": 1.1171581745147705, "learning_rate": 7.56337738614777e-05, "loss": 1.3844, "step": 4127 }, { "epoch": 0.5913192952299098, "grad_norm": 1.0337929725646973, "learning_rate": 7.558877714289667e-05, "loss": 1.4272, "step": 4128 }, { "epoch": 0.5914625411832116, "grad_norm": 1.2413755655288696, "learning_rate": 7.554378567944786e-05, "loss": 1.6877, "step": 4129 }, { "epoch": 0.5916057871365134, "grad_norm": 1.1427229642868042, "learning_rate": 7.549879948081681e-05, "loss": 1.5087, "step": 4130 }, { "epoch": 0.5917490330898152, "grad_norm": 0.9711329340934753, "learning_rate": 7.54538185566879e-05, "loss": 1.3517, "step": 4131 }, { "epoch": 0.591892279043117, "grad_norm": 1.3876253366470337, "learning_rate": 7.54088429167444e-05, "loss": 1.3819, "step": 4132 }, { "epoch": 0.5920355249964189, "grad_norm": 1.1013669967651367, "learning_rate": 7.536387257066854e-05, "loss": 1.4199, "step": 4133 }, { "epoch": 0.5921787709497207, "grad_norm": 1.2899092435836792, "learning_rate": 7.531890752814123e-05, "loss": 1.6998, "step": 4134 }, { "epoch": 0.5923220169030224, "grad_norm": 1.1318042278289795, "learning_rate": 7.52739477988424e-05, "loss": 1.4482, "step": 4135 }, { "epoch": 0.5924652628563243, "grad_norm": 1.1379857063293457, "learning_rate": 7.52289933924507e-05, "loss": 1.3222, "step": 4136 }, { "epoch": 0.5926085088096261, "grad_norm": 1.0665597915649414, "learning_rate": 7.518404431864373e-05, "loss": 1.5582, "step": 4137 }, { "epoch": 0.592751754762928, "grad_norm": 1.1518982648849487, "learning_rate": 7.513910058709798e-05, "loss": 1.3661, "step": 4138 }, { "epoch": 0.5928950007162298, "grad_norm": 1.0021525621414185, "learning_rate": 7.50941622074886e-05, "loss": 1.4363, "step": 4139 }, { "epoch": 0.5930382466695315, "grad_norm": 1.2551062107086182, "learning_rate": 7.504922918948978e-05, "loss": 1.5817, "step": 4140 }, { "epoch": 0.5931814926228334, "grad_norm": 1.1200854778289795, "learning_rate": 7.500430154277452e-05, "loss": 1.33, "step": 4141 }, { "epoch": 0.5933247385761352, "grad_norm": 1.1013273000717163, "learning_rate": 7.495937927701455e-05, "loss": 1.3511, "step": 4142 }, { "epoch": 0.593467984529437, "grad_norm": 1.0911341905593872, "learning_rate": 7.49144624018806e-05, "loss": 1.4725, "step": 4143 }, { "epoch": 0.5936112304827389, "grad_norm": 1.1610873937606812, "learning_rate": 7.486955092704207e-05, "loss": 1.2591, "step": 4144 }, { "epoch": 0.5937544764360407, "grad_norm": 1.1986345052719116, "learning_rate": 7.482464486216737e-05, "loss": 1.4006, "step": 4145 }, { "epoch": 0.5938977223893425, "grad_norm": 1.1037352085113525, "learning_rate": 7.477974421692367e-05, "loss": 1.4246, "step": 4146 }, { "epoch": 0.5940409683426443, "grad_norm": 1.0950713157653809, "learning_rate": 7.47348490009769e-05, "loss": 1.5653, "step": 4147 }, { "epoch": 0.5941842142959461, "grad_norm": 1.2724562883377075, "learning_rate": 7.468995922399189e-05, "loss": 1.5866, "step": 4148 }, { "epoch": 0.594327460249248, "grad_norm": 1.1343566179275513, "learning_rate": 7.464507489563242e-05, "loss": 1.4804, "step": 4149 }, { "epoch": 0.5944707062025498, "grad_norm": 1.1699497699737549, "learning_rate": 7.460019602556081e-05, "loss": 1.4199, "step": 4150 }, { "epoch": 0.5946139521558516, "grad_norm": 1.1542929410934448, "learning_rate": 7.45553226234385e-05, "loss": 1.3033, "step": 4151 }, { "epoch": 0.5947571981091534, "grad_norm": 1.3180019855499268, "learning_rate": 7.451045469892554e-05, "loss": 1.5483, "step": 4152 }, { "epoch": 0.5949004440624552, "grad_norm": 1.127726435661316, "learning_rate": 7.44655922616809e-05, "loss": 1.4871, "step": 4153 }, { "epoch": 0.5950436900157571, "grad_norm": 0.8942027688026428, "learning_rate": 7.442073532136244e-05, "loss": 1.4239, "step": 4154 }, { "epoch": 0.5951869359690589, "grad_norm": 1.253646731376648, "learning_rate": 7.43758838876266e-05, "loss": 1.4855, "step": 4155 }, { "epoch": 0.5953301819223606, "grad_norm": 1.2329021692276, "learning_rate": 7.433103797012892e-05, "loss": 1.4808, "step": 4156 }, { "epoch": 0.5954734278756625, "grad_norm": 1.4448988437652588, "learning_rate": 7.428619757852352e-05, "loss": 1.5001, "step": 4157 }, { "epoch": 0.5956166738289643, "grad_norm": 1.182334542274475, "learning_rate": 7.424136272246347e-05, "loss": 1.4054, "step": 4158 }, { "epoch": 0.5957599197822662, "grad_norm": 1.3208832740783691, "learning_rate": 7.419653341160062e-05, "loss": 1.4316, "step": 4159 }, { "epoch": 0.595903165735568, "grad_norm": 1.098093867301941, "learning_rate": 7.415170965558553e-05, "loss": 1.332, "step": 4160 }, { "epoch": 0.5960464116888697, "grad_norm": 1.3343297243118286, "learning_rate": 7.410689146406775e-05, "loss": 1.3522, "step": 4161 }, { "epoch": 0.5961896576421716, "grad_norm": 1.1276735067367554, "learning_rate": 7.40620788466955e-05, "loss": 1.5347, "step": 4162 }, { "epoch": 0.5963329035954734, "grad_norm": 1.0192652940750122, "learning_rate": 7.401727181311578e-05, "loss": 1.3619, "step": 4163 }, { "epoch": 0.5964761495487753, "grad_norm": 0.969628095626831, "learning_rate": 7.397247037297449e-05, "loss": 1.3498, "step": 4164 }, { "epoch": 0.5966193955020771, "grad_norm": 1.0842705965042114, "learning_rate": 7.392767453591623e-05, "loss": 1.3539, "step": 4165 }, { "epoch": 0.5967626414553789, "grad_norm": 1.1247751712799072, "learning_rate": 7.388288431158443e-05, "loss": 1.4629, "step": 4166 }, { "epoch": 0.5969058874086807, "grad_norm": 1.4672436714172363, "learning_rate": 7.383809970962137e-05, "loss": 1.2268, "step": 4167 }, { "epoch": 0.5970491333619825, "grad_norm": 1.126079797744751, "learning_rate": 7.379332073966804e-05, "loss": 1.5136, "step": 4168 }, { "epoch": 0.5971923793152843, "grad_norm": 1.2257808446884155, "learning_rate": 7.374854741136422e-05, "loss": 1.4249, "step": 4169 }, { "epoch": 0.5973356252685862, "grad_norm": 1.148816704750061, "learning_rate": 7.370377973434855e-05, "loss": 1.4888, "step": 4170 }, { "epoch": 0.597478871221888, "grad_norm": 1.0229889154434204, "learning_rate": 7.365901771825833e-05, "loss": 1.4019, "step": 4171 }, { "epoch": 0.5976221171751898, "grad_norm": 1.3453727960586548, "learning_rate": 7.361426137272982e-05, "loss": 1.3441, "step": 4172 }, { "epoch": 0.5977653631284916, "grad_norm": 1.0701814889907837, "learning_rate": 7.356951070739781e-05, "loss": 1.5124, "step": 4173 }, { "epoch": 0.5979086090817934, "grad_norm": 1.1490201950073242, "learning_rate": 7.352476573189614e-05, "loss": 1.4376, "step": 4174 }, { "epoch": 0.5980518550350953, "grad_norm": 1.3485469818115234, "learning_rate": 7.348002645585725e-05, "loss": 1.2619, "step": 4175 }, { "epoch": 0.5981951009883971, "grad_norm": 1.2652554512023926, "learning_rate": 7.343529288891239e-05, "loss": 1.3954, "step": 4176 }, { "epoch": 0.5983383469416989, "grad_norm": 1.2090704441070557, "learning_rate": 7.33905650406916e-05, "loss": 1.5526, "step": 4177 }, { "epoch": 0.5984815928950007, "grad_norm": 1.1644854545593262, "learning_rate": 7.334584292082365e-05, "loss": 1.5128, "step": 4178 }, { "epoch": 0.5986248388483025, "grad_norm": 1.1333390474319458, "learning_rate": 7.330112653893614e-05, "loss": 1.3994, "step": 4179 }, { "epoch": 0.5987680848016044, "grad_norm": 1.245866060256958, "learning_rate": 7.325641590465542e-05, "loss": 1.4937, "step": 4180 }, { "epoch": 0.5989113307549062, "grad_norm": 1.1208797693252563, "learning_rate": 7.321171102760653e-05, "loss": 1.4001, "step": 4181 }, { "epoch": 0.5990545767082079, "grad_norm": 1.09479558467865, "learning_rate": 7.316701191741333e-05, "loss": 1.4121, "step": 4182 }, { "epoch": 0.5991978226615098, "grad_norm": 1.08538019657135, "learning_rate": 7.31223185836985e-05, "loss": 1.6017, "step": 4183 }, { "epoch": 0.5993410686148116, "grad_norm": 1.1451808214187622, "learning_rate": 7.307763103608332e-05, "loss": 1.339, "step": 4184 }, { "epoch": 0.5994843145681135, "grad_norm": 1.022583246231079, "learning_rate": 7.303294928418799e-05, "loss": 1.4978, "step": 4185 }, { "epoch": 0.5996275605214153, "grad_norm": 1.1408665180206299, "learning_rate": 7.298827333763132e-05, "loss": 1.5996, "step": 4186 }, { "epoch": 0.5997708064747171, "grad_norm": 1.1718382835388184, "learning_rate": 7.294360320603095e-05, "loss": 1.3508, "step": 4187 }, { "epoch": 0.5999140524280189, "grad_norm": 1.1444815397262573, "learning_rate": 7.289893889900332e-05, "loss": 1.4831, "step": 4188 }, { "epoch": 0.6000572983813207, "grad_norm": 1.2503138780593872, "learning_rate": 7.285428042616344e-05, "loss": 1.4345, "step": 4189 }, { "epoch": 0.6002005443346226, "grad_norm": 1.2045994997024536, "learning_rate": 7.280962779712525e-05, "loss": 1.3203, "step": 4190 }, { "epoch": 0.6003437902879244, "grad_norm": 0.9749893546104431, "learning_rate": 7.276498102150138e-05, "loss": 1.5792, "step": 4191 }, { "epoch": 0.6004870362412262, "grad_norm": 1.3314417600631714, "learning_rate": 7.272034010890309e-05, "loss": 1.4139, "step": 4192 }, { "epoch": 0.600630282194528, "grad_norm": 0.9749581813812256, "learning_rate": 7.267570506894052e-05, "loss": 1.4471, "step": 4193 }, { "epoch": 0.6007735281478298, "grad_norm": 1.1609665155410767, "learning_rate": 7.263107591122246e-05, "loss": 1.5528, "step": 4194 }, { "epoch": 0.6009167741011316, "grad_norm": 1.028211236000061, "learning_rate": 7.258645264535649e-05, "loss": 1.2524, "step": 4195 }, { "epoch": 0.6010600200544335, "grad_norm": 1.1497012376785278, "learning_rate": 7.254183528094891e-05, "loss": 1.4144, "step": 4196 }, { "epoch": 0.6012032660077353, "grad_norm": 1.1085944175720215, "learning_rate": 7.249722382760469e-05, "loss": 1.3618, "step": 4197 }, { "epoch": 0.6013465119610371, "grad_norm": 1.0840877294540405, "learning_rate": 7.245261829492755e-05, "loss": 1.5699, "step": 4198 }, { "epoch": 0.6014897579143389, "grad_norm": 1.0820575952529907, "learning_rate": 7.24080186925201e-05, "loss": 1.5274, "step": 4199 }, { "epoch": 0.6016330038676407, "grad_norm": 0.9790958166122437, "learning_rate": 7.236342502998335e-05, "loss": 1.7417, "step": 4200 }, { "epoch": 0.6017762498209426, "grad_norm": 1.2154924869537354, "learning_rate": 7.231883731691732e-05, "loss": 1.4436, "step": 4201 }, { "epoch": 0.6019194957742444, "grad_norm": 1.1999479532241821, "learning_rate": 7.22742555629206e-05, "loss": 1.4308, "step": 4202 }, { "epoch": 0.6020627417275461, "grad_norm": 1.018477439880371, "learning_rate": 7.222967977759056e-05, "loss": 1.355, "step": 4203 }, { "epoch": 0.602205987680848, "grad_norm": 1.1790064573287964, "learning_rate": 7.218510997052326e-05, "loss": 1.1994, "step": 4204 }, { "epoch": 0.6023492336341498, "grad_norm": 1.010616660118103, "learning_rate": 7.214054615131345e-05, "loss": 1.4614, "step": 4205 }, { "epoch": 0.6024924795874517, "grad_norm": 1.0062549114227295, "learning_rate": 7.209598832955469e-05, "loss": 1.3939, "step": 4206 }, { "epoch": 0.6026357255407535, "grad_norm": 1.1047641038894653, "learning_rate": 7.205143651483906e-05, "loss": 1.432, "step": 4207 }, { "epoch": 0.6027789714940552, "grad_norm": 1.235935091972351, "learning_rate": 7.200689071675755e-05, "loss": 1.4056, "step": 4208 }, { "epoch": 0.6029222174473571, "grad_norm": 1.2573401927947998, "learning_rate": 7.196235094489978e-05, "loss": 1.5375, "step": 4209 }, { "epoch": 0.6030654634006589, "grad_norm": 1.146323561668396, "learning_rate": 7.1917817208854e-05, "loss": 1.4844, "step": 4210 }, { "epoch": 0.6032087093539608, "grad_norm": 1.0489208698272705, "learning_rate": 7.187328951820723e-05, "loss": 1.6133, "step": 4211 }, { "epoch": 0.6033519553072626, "grad_norm": 0.9818617105484009, "learning_rate": 7.182876788254525e-05, "loss": 1.3494, "step": 4212 }, { "epoch": 0.6034952012605644, "grad_norm": 1.1256752014160156, "learning_rate": 7.178425231145236e-05, "loss": 1.6435, "step": 4213 }, { "epoch": 0.6036384472138662, "grad_norm": 1.070942759513855, "learning_rate": 7.173974281451176e-05, "loss": 1.3518, "step": 4214 }, { "epoch": 0.603781693167168, "grad_norm": 0.8929803967475891, "learning_rate": 7.169523940130519e-05, "loss": 1.6109, "step": 4215 }, { "epoch": 0.6039249391204699, "grad_norm": 1.1128506660461426, "learning_rate": 7.16507420814131e-05, "loss": 1.4826, "step": 4216 }, { "epoch": 0.6040681850737717, "grad_norm": 1.0680562257766724, "learning_rate": 7.160625086441476e-05, "loss": 1.2377, "step": 4217 }, { "epoch": 0.6042114310270735, "grad_norm": 1.1258862018585205, "learning_rate": 7.156176575988794e-05, "loss": 1.2049, "step": 4218 }, { "epoch": 0.6043546769803753, "grad_norm": 1.066379427909851, "learning_rate": 7.151728677740923e-05, "loss": 1.6333, "step": 4219 }, { "epoch": 0.6044979229336771, "grad_norm": 1.1222875118255615, "learning_rate": 7.147281392655385e-05, "loss": 1.5763, "step": 4220 }, { "epoch": 0.6046411688869789, "grad_norm": 1.2587907314300537, "learning_rate": 7.142834721689565e-05, "loss": 1.2897, "step": 4221 }, { "epoch": 0.6047844148402808, "grad_norm": 1.168574333190918, "learning_rate": 7.138388665800733e-05, "loss": 1.2936, "step": 4222 }, { "epoch": 0.6049276607935826, "grad_norm": 1.0632528066635132, "learning_rate": 7.133943225946e-05, "loss": 1.4177, "step": 4223 }, { "epoch": 0.6050709067468844, "grad_norm": 1.0872769355773926, "learning_rate": 7.129498403082369e-05, "loss": 1.3622, "step": 4224 }, { "epoch": 0.6052141527001862, "grad_norm": 1.0940521955490112, "learning_rate": 7.125054198166701e-05, "loss": 1.5998, "step": 4225 }, { "epoch": 0.605357398653488, "grad_norm": 1.0477849245071411, "learning_rate": 7.120610612155716e-05, "loss": 1.8118, "step": 4226 }, { "epoch": 0.6055006446067899, "grad_norm": 1.2331570386886597, "learning_rate": 7.11616764600601e-05, "loss": 1.3899, "step": 4227 }, { "epoch": 0.6056438905600917, "grad_norm": 1.0639318227767944, "learning_rate": 7.111725300674052e-05, "loss": 1.2789, "step": 4228 }, { "epoch": 0.6057871365133934, "grad_norm": 1.0440737009048462, "learning_rate": 7.107283577116161e-05, "loss": 1.4212, "step": 4229 }, { "epoch": 0.6059303824666953, "grad_norm": 1.1756181716918945, "learning_rate": 7.102842476288534e-05, "loss": 1.3551, "step": 4230 }, { "epoch": 0.6060736284199971, "grad_norm": 0.9752718806266785, "learning_rate": 7.098401999147226e-05, "loss": 1.4841, "step": 4231 }, { "epoch": 0.606216874373299, "grad_norm": 1.0898563861846924, "learning_rate": 7.093962146648164e-05, "loss": 1.5383, "step": 4232 }, { "epoch": 0.6063601203266008, "grad_norm": 1.511345624923706, "learning_rate": 7.089522919747142e-05, "loss": 1.5601, "step": 4233 }, { "epoch": 0.6065033662799026, "grad_norm": 1.1627968549728394, "learning_rate": 7.085084319399808e-05, "loss": 1.3997, "step": 4234 }, { "epoch": 0.6066466122332044, "grad_norm": 1.2176806926727295, "learning_rate": 7.08064634656169e-05, "loss": 1.4282, "step": 4235 }, { "epoch": 0.6067898581865062, "grad_norm": 1.1652377843856812, "learning_rate": 7.076209002188168e-05, "loss": 1.5238, "step": 4236 }, { "epoch": 0.606933104139808, "grad_norm": 1.0856783390045166, "learning_rate": 7.071772287234497e-05, "loss": 1.5297, "step": 4237 }, { "epoch": 0.6070763500931099, "grad_norm": 1.163684606552124, "learning_rate": 7.067336202655792e-05, "loss": 1.3029, "step": 4238 }, { "epoch": 0.6072195960464117, "grad_norm": 1.0622572898864746, "learning_rate": 7.062900749407026e-05, "loss": 1.5115, "step": 4239 }, { "epoch": 0.6073628419997135, "grad_norm": 1.0597134828567505, "learning_rate": 7.058465928443048e-05, "loss": 1.4787, "step": 4240 }, { "epoch": 0.6075060879530153, "grad_norm": 0.9228923320770264, "learning_rate": 7.054031740718567e-05, "loss": 1.3745, "step": 4241 }, { "epoch": 0.6076493339063171, "grad_norm": 1.055535912513733, "learning_rate": 7.049598187188148e-05, "loss": 1.4522, "step": 4242 }, { "epoch": 0.607792579859619, "grad_norm": 1.09255051612854, "learning_rate": 7.045165268806231e-05, "loss": 1.4313, "step": 4243 }, { "epoch": 0.6079358258129208, "grad_norm": 1.345362663269043, "learning_rate": 7.040732986527108e-05, "loss": 1.4796, "step": 4244 }, { "epoch": 0.6080790717662226, "grad_norm": 0.9786936640739441, "learning_rate": 7.03630134130494e-05, "loss": 1.3408, "step": 4245 }, { "epoch": 0.6082223177195244, "grad_norm": 1.0303382873535156, "learning_rate": 7.03187033409376e-05, "loss": 1.4599, "step": 4246 }, { "epoch": 0.6083655636728262, "grad_norm": 1.3315646648406982, "learning_rate": 7.027439965847442e-05, "loss": 1.4668, "step": 4247 }, { "epoch": 0.6085088096261281, "grad_norm": 0.9277085065841675, "learning_rate": 7.023010237519739e-05, "loss": 1.6801, "step": 4248 }, { "epoch": 0.6086520555794299, "grad_norm": 1.1871479749679565, "learning_rate": 7.018581150064269e-05, "loss": 1.3703, "step": 4249 }, { "epoch": 0.6087953015327316, "grad_norm": 1.1531585454940796, "learning_rate": 7.014152704434494e-05, "loss": 1.3295, "step": 4250 }, { "epoch": 0.6089385474860335, "grad_norm": 1.0974829196929932, "learning_rate": 7.009724901583755e-05, "loss": 1.2888, "step": 4251 }, { "epoch": 0.6090817934393353, "grad_norm": 1.3255139589309692, "learning_rate": 7.005297742465247e-05, "loss": 1.3727, "step": 4252 }, { "epoch": 0.6092250393926372, "grad_norm": 0.9593663215637207, "learning_rate": 7.000871228032027e-05, "loss": 1.4637, "step": 4253 }, { "epoch": 0.609368285345939, "grad_norm": 1.1611112356185913, "learning_rate": 6.996445359237016e-05, "loss": 1.5378, "step": 4254 }, { "epoch": 0.6095115312992408, "grad_norm": 1.1825108528137207, "learning_rate": 6.992020137032988e-05, "loss": 1.3882, "step": 4255 }, { "epoch": 0.6096547772525426, "grad_norm": 1.138529658317566, "learning_rate": 6.987595562372596e-05, "loss": 1.5079, "step": 4256 }, { "epoch": 0.6097980232058444, "grad_norm": 1.156195044517517, "learning_rate": 6.983171636208328e-05, "loss": 1.4831, "step": 4257 }, { "epoch": 0.6099412691591463, "grad_norm": 1.1191006898880005, "learning_rate": 6.978748359492553e-05, "loss": 1.4122, "step": 4258 }, { "epoch": 0.6100845151124481, "grad_norm": 1.1648168563842773, "learning_rate": 6.974325733177495e-05, "loss": 1.3209, "step": 4259 }, { "epoch": 0.6102277610657499, "grad_norm": 1.5466960668563843, "learning_rate": 6.96990375821523e-05, "loss": 1.3727, "step": 4260 }, { "epoch": 0.6103710070190517, "grad_norm": 1.2400637865066528, "learning_rate": 6.9654824355577e-05, "loss": 1.5019, "step": 4261 }, { "epoch": 0.6105142529723535, "grad_norm": 0.9756989479064941, "learning_rate": 6.961061766156715e-05, "loss": 1.4358, "step": 4262 }, { "epoch": 0.6106574989256554, "grad_norm": 1.0483461618423462, "learning_rate": 6.956641750963927e-05, "loss": 1.4438, "step": 4263 }, { "epoch": 0.6108007448789572, "grad_norm": 1.017721176147461, "learning_rate": 6.952222390930858e-05, "loss": 1.4794, "step": 4264 }, { "epoch": 0.610943990832259, "grad_norm": 1.046010136604309, "learning_rate": 6.947803687008888e-05, "loss": 1.4865, "step": 4265 }, { "epoch": 0.6110872367855608, "grad_norm": 1.0964988470077515, "learning_rate": 6.943385640149251e-05, "loss": 1.4287, "step": 4266 }, { "epoch": 0.6112304827388626, "grad_norm": 1.1878373622894287, "learning_rate": 6.938968251303053e-05, "loss": 1.3212, "step": 4267 }, { "epoch": 0.6113737286921644, "grad_norm": 0.9585728049278259, "learning_rate": 6.934551521421235e-05, "loss": 1.3376, "step": 4268 }, { "epoch": 0.6115169746454663, "grad_norm": 1.156432867050171, "learning_rate": 6.93013545145462e-05, "loss": 1.646, "step": 4269 }, { "epoch": 0.6116602205987681, "grad_norm": 1.1259299516677856, "learning_rate": 6.925720042353876e-05, "loss": 1.3983, "step": 4270 }, { "epoch": 0.6118034665520699, "grad_norm": 1.2397981882095337, "learning_rate": 6.921305295069528e-05, "loss": 1.1742, "step": 4271 }, { "epoch": 0.6119467125053717, "grad_norm": 1.0969380140304565, "learning_rate": 6.916891210551965e-05, "loss": 1.2905, "step": 4272 }, { "epoch": 0.6120899584586735, "grad_norm": 1.0822852849960327, "learning_rate": 6.912477789751426e-05, "loss": 1.607, "step": 4273 }, { "epoch": 0.6122332044119754, "grad_norm": 1.282238245010376, "learning_rate": 6.908065033618018e-05, "loss": 1.5079, "step": 4274 }, { "epoch": 0.6123764503652772, "grad_norm": 0.9994431734085083, "learning_rate": 6.903652943101697e-05, "loss": 1.4408, "step": 4275 }, { "epoch": 0.612519696318579, "grad_norm": 1.0387200117111206, "learning_rate": 6.89924151915227e-05, "loss": 1.4052, "step": 4276 }, { "epoch": 0.6126629422718808, "grad_norm": 1.0311651229858398, "learning_rate": 6.894830762719411e-05, "loss": 1.4887, "step": 4277 }, { "epoch": 0.6128061882251826, "grad_norm": 1.169585943222046, "learning_rate": 6.890420674752653e-05, "loss": 1.4591, "step": 4278 }, { "epoch": 0.6129494341784845, "grad_norm": 1.1838040351867676, "learning_rate": 6.886011256201371e-05, "loss": 1.4668, "step": 4279 }, { "epoch": 0.6130926801317863, "grad_norm": 1.05080246925354, "learning_rate": 6.881602508014808e-05, "loss": 1.4836, "step": 4280 }, { "epoch": 0.6132359260850881, "grad_norm": 1.0124226808547974, "learning_rate": 6.877194431142055e-05, "loss": 1.4576, "step": 4281 }, { "epoch": 0.6133791720383899, "grad_norm": 1.2042455673217773, "learning_rate": 6.872787026532062e-05, "loss": 1.1873, "step": 4282 }, { "epoch": 0.6135224179916917, "grad_norm": 1.1711006164550781, "learning_rate": 6.868380295133641e-05, "loss": 1.4421, "step": 4283 }, { "epoch": 0.6136656639449936, "grad_norm": 1.4034318923950195, "learning_rate": 6.86397423789544e-05, "loss": 1.445, "step": 4284 }, { "epoch": 0.6138089098982954, "grad_norm": 1.0700427293777466, "learning_rate": 6.859568855765985e-05, "loss": 1.3202, "step": 4285 }, { "epoch": 0.6139521558515972, "grad_norm": 1.136765956878662, "learning_rate": 6.855164149693641e-05, "loss": 1.3828, "step": 4286 }, { "epoch": 0.614095401804899, "grad_norm": 1.1164528131484985, "learning_rate": 6.850760120626633e-05, "loss": 1.3804, "step": 4287 }, { "epoch": 0.6142386477582008, "grad_norm": 1.0326247215270996, "learning_rate": 6.84635676951304e-05, "loss": 1.4765, "step": 4288 }, { "epoch": 0.6143818937115026, "grad_norm": 1.1922988891601562, "learning_rate": 6.841954097300791e-05, "loss": 1.4618, "step": 4289 }, { "epoch": 0.6145251396648045, "grad_norm": 1.31527578830719, "learning_rate": 6.837552104937679e-05, "loss": 1.2412, "step": 4290 }, { "epoch": 0.6146683856181063, "grad_norm": 0.9861143231391907, "learning_rate": 6.833150793371341e-05, "loss": 1.3715, "step": 4291 }, { "epoch": 0.6148116315714081, "grad_norm": 0.8896774053573608, "learning_rate": 6.828750163549267e-05, "loss": 1.3217, "step": 4292 }, { "epoch": 0.6149548775247099, "grad_norm": 1.0360058546066284, "learning_rate": 6.824350216418808e-05, "loss": 1.3669, "step": 4293 }, { "epoch": 0.6150981234780117, "grad_norm": 1.0123547315597534, "learning_rate": 6.819950952927161e-05, "loss": 1.5443, "step": 4294 }, { "epoch": 0.6152413694313136, "grad_norm": 1.0284956693649292, "learning_rate": 6.815552374021378e-05, "loss": 1.5482, "step": 4295 }, { "epoch": 0.6153846153846154, "grad_norm": 0.9953281879425049, "learning_rate": 6.811154480648371e-05, "loss": 1.5406, "step": 4296 }, { "epoch": 0.6155278613379173, "grad_norm": 1.1610767841339111, "learning_rate": 6.80675727375489e-05, "loss": 1.3533, "step": 4297 }, { "epoch": 0.615671107291219, "grad_norm": 1.383579134941101, "learning_rate": 6.802360754287547e-05, "loss": 1.2288, "step": 4298 }, { "epoch": 0.6158143532445208, "grad_norm": 1.1796855926513672, "learning_rate": 6.797964923192807e-05, "loss": 1.4432, "step": 4299 }, { "epoch": 0.6159575991978227, "grad_norm": 1.0965690612792969, "learning_rate": 6.793569781416978e-05, "loss": 1.6171, "step": 4300 }, { "epoch": 0.6161008451511245, "grad_norm": 1.4718953371047974, "learning_rate": 6.789175329906232e-05, "loss": 1.3847, "step": 4301 }, { "epoch": 0.6162440911044264, "grad_norm": 1.0558202266693115, "learning_rate": 6.784781569606576e-05, "loss": 1.2803, "step": 4302 }, { "epoch": 0.6163873370577281, "grad_norm": 1.2051622867584229, "learning_rate": 6.780388501463887e-05, "loss": 1.3133, "step": 4303 }, { "epoch": 0.6165305830110299, "grad_norm": 1.2898198366165161, "learning_rate": 6.775996126423882e-05, "loss": 1.4376, "step": 4304 }, { "epoch": 0.6166738289643318, "grad_norm": 1.035155177116394, "learning_rate": 6.771604445432127e-05, "loss": 1.4687, "step": 4305 }, { "epoch": 0.6168170749176336, "grad_norm": 1.1722652912139893, "learning_rate": 6.767213459434047e-05, "loss": 1.3517, "step": 4306 }, { "epoch": 0.6169603208709354, "grad_norm": 1.1174097061157227, "learning_rate": 6.762823169374906e-05, "loss": 1.5539, "step": 4307 }, { "epoch": 0.6171035668242372, "grad_norm": 1.1836035251617432, "learning_rate": 6.758433576199832e-05, "loss": 1.4287, "step": 4308 }, { "epoch": 0.617246812777539, "grad_norm": 1.026642084121704, "learning_rate": 6.754044680853794e-05, "loss": 1.3904, "step": 4309 }, { "epoch": 0.6173900587308409, "grad_norm": 1.0696532726287842, "learning_rate": 6.749656484281608e-05, "loss": 1.4178, "step": 4310 }, { "epoch": 0.6175333046841427, "grad_norm": 1.0841660499572754, "learning_rate": 6.745268987427946e-05, "loss": 1.5925, "step": 4311 }, { "epoch": 0.6176765506374445, "grad_norm": 1.1488522291183472, "learning_rate": 6.740882191237334e-05, "loss": 1.3217, "step": 4312 }, { "epoch": 0.6178197965907463, "grad_norm": 1.198958158493042, "learning_rate": 6.736496096654134e-05, "loss": 1.4284, "step": 4313 }, { "epoch": 0.6179630425440481, "grad_norm": 1.0057673454284668, "learning_rate": 6.732110704622564e-05, "loss": 1.5161, "step": 4314 }, { "epoch": 0.6181062884973499, "grad_norm": 1.1507371664047241, "learning_rate": 6.727726016086693e-05, "loss": 1.5376, "step": 4315 }, { "epoch": 0.6182495344506518, "grad_norm": 0.9906706213951111, "learning_rate": 6.723342031990431e-05, "loss": 1.2112, "step": 4316 }, { "epoch": 0.6183927804039536, "grad_norm": 1.3517733812332153, "learning_rate": 6.71895875327755e-05, "loss": 1.4475, "step": 4317 }, { "epoch": 0.6185360263572554, "grad_norm": 0.9619729518890381, "learning_rate": 6.714576180891654e-05, "loss": 1.4712, "step": 4318 }, { "epoch": 0.6186792723105572, "grad_norm": 0.9794148206710815, "learning_rate": 6.710194315776203e-05, "loss": 1.4797, "step": 4319 }, { "epoch": 0.618822518263859, "grad_norm": 1.027116060256958, "learning_rate": 6.705813158874509e-05, "loss": 1.1894, "step": 4320 }, { "epoch": 0.6189657642171609, "grad_norm": 1.2014038562774658, "learning_rate": 6.70143271112972e-05, "loss": 1.2853, "step": 4321 }, { "epoch": 0.6191090101704627, "grad_norm": 1.2216103076934814, "learning_rate": 6.697052973484845e-05, "loss": 1.5218, "step": 4322 }, { "epoch": 0.6192522561237646, "grad_norm": 1.0790290832519531, "learning_rate": 6.692673946882727e-05, "loss": 1.3789, "step": 4323 }, { "epoch": 0.6193955020770663, "grad_norm": 1.2942510843276978, "learning_rate": 6.688295632266064e-05, "loss": 1.6199, "step": 4324 }, { "epoch": 0.6195387480303681, "grad_norm": 1.085420846939087, "learning_rate": 6.683918030577402e-05, "loss": 1.4176, "step": 4325 }, { "epoch": 0.61968199398367, "grad_norm": 1.05784010887146, "learning_rate": 6.679541142759126e-05, "loss": 1.347, "step": 4326 }, { "epoch": 0.6198252399369718, "grad_norm": 1.0274940729141235, "learning_rate": 6.675164969753472e-05, "loss": 1.4039, "step": 4327 }, { "epoch": 0.6199684858902736, "grad_norm": 1.1883533000946045, "learning_rate": 6.670789512502527e-05, "loss": 1.4771, "step": 4328 }, { "epoch": 0.6201117318435754, "grad_norm": 0.9951431155204773, "learning_rate": 6.666414771948211e-05, "loss": 1.5404, "step": 4329 }, { "epoch": 0.6202549777968772, "grad_norm": 1.1721469163894653, "learning_rate": 6.662040749032303e-05, "loss": 1.5811, "step": 4330 }, { "epoch": 0.6203982237501791, "grad_norm": 1.4056416749954224, "learning_rate": 6.65766744469642e-05, "loss": 1.0948, "step": 4331 }, { "epoch": 0.6205414697034809, "grad_norm": 1.2731767892837524, "learning_rate": 6.653294859882027e-05, "loss": 1.4503, "step": 4332 }, { "epoch": 0.6206847156567827, "grad_norm": 1.1195292472839355, "learning_rate": 6.648922995530433e-05, "loss": 1.6758, "step": 4333 }, { "epoch": 0.6208279616100845, "grad_norm": 1.1020455360412598, "learning_rate": 6.644551852582787e-05, "loss": 1.1022, "step": 4334 }, { "epoch": 0.6209712075633863, "grad_norm": 1.1526288986206055, "learning_rate": 6.6401814319801e-05, "loss": 1.4826, "step": 4335 }, { "epoch": 0.6211144535166881, "grad_norm": 1.0930039882659912, "learning_rate": 6.635811734663202e-05, "loss": 1.2796, "step": 4336 }, { "epoch": 0.62125769946999, "grad_norm": 1.3573743104934692, "learning_rate": 6.631442761572788e-05, "loss": 1.3886, "step": 4337 }, { "epoch": 0.6214009454232918, "grad_norm": 1.312254548072815, "learning_rate": 6.627074513649392e-05, "loss": 1.2988, "step": 4338 }, { "epoch": 0.6215441913765936, "grad_norm": 1.1354514360427856, "learning_rate": 6.622706991833383e-05, "loss": 1.3857, "step": 4339 }, { "epoch": 0.6216874373298954, "grad_norm": 1.3920341730117798, "learning_rate": 6.618340197064983e-05, "loss": 1.5398, "step": 4340 }, { "epoch": 0.6218306832831972, "grad_norm": 1.0604654550552368, "learning_rate": 6.613974130284258e-05, "loss": 1.5231, "step": 4341 }, { "epoch": 0.6219739292364991, "grad_norm": 1.1310856342315674, "learning_rate": 6.60960879243111e-05, "loss": 1.3582, "step": 4342 }, { "epoch": 0.6221171751898009, "grad_norm": 1.3000630140304565, "learning_rate": 6.605244184445292e-05, "loss": 1.4494, "step": 4343 }, { "epoch": 0.6222604211431028, "grad_norm": 1.0721409320831299, "learning_rate": 6.600880307266393e-05, "loss": 1.3778, "step": 4344 }, { "epoch": 0.6224036670964045, "grad_norm": 1.0897825956344604, "learning_rate": 6.596517161833845e-05, "loss": 1.3731, "step": 4345 }, { "epoch": 0.6225469130497063, "grad_norm": 1.2897216081619263, "learning_rate": 6.592154749086934e-05, "loss": 1.5978, "step": 4346 }, { "epoch": 0.6226901590030082, "grad_norm": 1.2409048080444336, "learning_rate": 6.587793069964771e-05, "loss": 1.3957, "step": 4347 }, { "epoch": 0.62283340495631, "grad_norm": 1.0066745281219482, "learning_rate": 6.583432125406323e-05, "loss": 1.585, "step": 4348 }, { "epoch": 0.6229766509096119, "grad_norm": 1.0633467435836792, "learning_rate": 6.579071916350393e-05, "loss": 1.4127, "step": 4349 }, { "epoch": 0.6231198968629136, "grad_norm": 1.1040815114974976, "learning_rate": 6.57471244373562e-05, "loss": 1.4464, "step": 4350 }, { "epoch": 0.6232631428162154, "grad_norm": 1.3406316041946411, "learning_rate": 6.5703537085005e-05, "loss": 1.4988, "step": 4351 }, { "epoch": 0.6234063887695173, "grad_norm": 1.0772210359573364, "learning_rate": 6.565995711583353e-05, "loss": 1.4178, "step": 4352 }, { "epoch": 0.6235496347228191, "grad_norm": 1.0118277072906494, "learning_rate": 6.561638453922349e-05, "loss": 1.3661, "step": 4353 }, { "epoch": 0.6236928806761209, "grad_norm": 1.1502290964126587, "learning_rate": 6.557281936455506e-05, "loss": 1.4313, "step": 4354 }, { "epoch": 0.6238361266294227, "grad_norm": 0.9608575701713562, "learning_rate": 6.552926160120663e-05, "loss": 1.5175, "step": 4355 }, { "epoch": 0.6239793725827245, "grad_norm": 1.2027487754821777, "learning_rate": 6.548571125855519e-05, "loss": 1.4245, "step": 4356 }, { "epoch": 0.6241226185360264, "grad_norm": 1.2962158918380737, "learning_rate": 6.544216834597597e-05, "loss": 1.5141, "step": 4357 }, { "epoch": 0.6242658644893282, "grad_norm": 0.9798980355262756, "learning_rate": 6.539863287284275e-05, "loss": 1.4812, "step": 4358 }, { "epoch": 0.62440911044263, "grad_norm": 0.9980088472366333, "learning_rate": 6.535510484852767e-05, "loss": 1.6063, "step": 4359 }, { "epoch": 0.6245523563959318, "grad_norm": 1.0252913236618042, "learning_rate": 6.531158428240113e-05, "loss": 1.3394, "step": 4360 }, { "epoch": 0.6246956023492336, "grad_norm": 1.2365180253982544, "learning_rate": 6.52680711838321e-05, "loss": 1.3966, "step": 4361 }, { "epoch": 0.6248388483025354, "grad_norm": 1.0831849575042725, "learning_rate": 6.522456556218791e-05, "loss": 1.4495, "step": 4362 }, { "epoch": 0.6249820942558373, "grad_norm": 1.11063814163208, "learning_rate": 6.518106742683415e-05, "loss": 1.4705, "step": 4363 }, { "epoch": 0.6251253402091391, "grad_norm": 1.2590703964233398, "learning_rate": 6.513757678713495e-05, "loss": 1.3649, "step": 4364 }, { "epoch": 0.625268586162441, "grad_norm": 1.1312358379364014, "learning_rate": 6.509409365245276e-05, "loss": 1.1834, "step": 4365 }, { "epoch": 0.6254118321157427, "grad_norm": 1.3639585971832275, "learning_rate": 6.50506180321484e-05, "loss": 1.4835, "step": 4366 }, { "epoch": 0.6255550780690445, "grad_norm": 1.112052083015442, "learning_rate": 6.500714993558115e-05, "loss": 1.4962, "step": 4367 }, { "epoch": 0.6256983240223464, "grad_norm": 0.977105975151062, "learning_rate": 6.496368937210853e-05, "loss": 1.351, "step": 4368 }, { "epoch": 0.6258415699756482, "grad_norm": 1.2740505933761597, "learning_rate": 6.49202363510866e-05, "loss": 1.3894, "step": 4369 }, { "epoch": 0.6259848159289501, "grad_norm": 1.0919435024261475, "learning_rate": 6.487679088186973e-05, "loss": 1.4608, "step": 4370 }, { "epoch": 0.6261280618822518, "grad_norm": 0.9704021215438843, "learning_rate": 6.483335297381057e-05, "loss": 1.4003, "step": 4371 }, { "epoch": 0.6262713078355536, "grad_norm": 1.137825608253479, "learning_rate": 6.478992263626031e-05, "loss": 1.4177, "step": 4372 }, { "epoch": 0.6264145537888555, "grad_norm": 1.0024917125701904, "learning_rate": 6.474649987856834e-05, "loss": 1.5011, "step": 4373 }, { "epoch": 0.6265577997421573, "grad_norm": 1.253300666809082, "learning_rate": 6.47030847100826e-05, "loss": 1.3927, "step": 4374 }, { "epoch": 0.6267010456954591, "grad_norm": 0.9548689723014832, "learning_rate": 6.465967714014927e-05, "loss": 1.3104, "step": 4375 }, { "epoch": 0.6268442916487609, "grad_norm": 1.2680814266204834, "learning_rate": 6.461627717811288e-05, "loss": 1.4504, "step": 4376 }, { "epoch": 0.6269875376020627, "grad_norm": 1.127995491027832, "learning_rate": 6.457288483331639e-05, "loss": 1.6023, "step": 4377 }, { "epoch": 0.6271307835553646, "grad_norm": 0.9732784628868103, "learning_rate": 6.452950011510118e-05, "loss": 1.4686, "step": 4378 }, { "epoch": 0.6272740295086664, "grad_norm": 1.12776517868042, "learning_rate": 6.448612303280677e-05, "loss": 1.4189, "step": 4379 }, { "epoch": 0.6274172754619682, "grad_norm": 1.0020201206207275, "learning_rate": 6.444275359577128e-05, "loss": 1.5062, "step": 4380 }, { "epoch": 0.62756052141527, "grad_norm": 1.3143055438995361, "learning_rate": 6.439939181333101e-05, "loss": 1.1331, "step": 4381 }, { "epoch": 0.6277037673685718, "grad_norm": 0.9757946729660034, "learning_rate": 6.435603769482071e-05, "loss": 1.6639, "step": 4382 }, { "epoch": 0.6278470133218736, "grad_norm": 1.1390036344528198, "learning_rate": 6.431269124957347e-05, "loss": 1.2147, "step": 4383 }, { "epoch": 0.6279902592751755, "grad_norm": 1.1585760116577148, "learning_rate": 6.426935248692064e-05, "loss": 1.5506, "step": 4384 }, { "epoch": 0.6281335052284773, "grad_norm": 1.0946505069732666, "learning_rate": 6.422602141619207e-05, "loss": 1.5181, "step": 4385 }, { "epoch": 0.6282767511817792, "grad_norm": 1.1699894666671753, "learning_rate": 6.418269804671576e-05, "loss": 1.3557, "step": 4386 }, { "epoch": 0.6284199971350809, "grad_norm": 1.1317129135131836, "learning_rate": 6.413938238781824e-05, "loss": 1.4305, "step": 4387 }, { "epoch": 0.6285632430883827, "grad_norm": 1.079715371131897, "learning_rate": 6.409607444882431e-05, "loss": 1.2822, "step": 4388 }, { "epoch": 0.6287064890416846, "grad_norm": 1.172074794769287, "learning_rate": 6.405277423905705e-05, "loss": 1.569, "step": 4389 }, { "epoch": 0.6288497349949864, "grad_norm": 1.1845581531524658, "learning_rate": 6.40094817678379e-05, "loss": 1.472, "step": 4390 }, { "epoch": 0.6289929809482883, "grad_norm": 1.047263264656067, "learning_rate": 6.396619704448677e-05, "loss": 1.4188, "step": 4391 }, { "epoch": 0.62913622690159, "grad_norm": 1.0432939529418945, "learning_rate": 6.392292007832168e-05, "loss": 1.367, "step": 4392 }, { "epoch": 0.6292794728548918, "grad_norm": 1.2254811525344849, "learning_rate": 6.387965087865915e-05, "loss": 1.4847, "step": 4393 }, { "epoch": 0.6294227188081937, "grad_norm": 1.226952075958252, "learning_rate": 6.383638945481391e-05, "loss": 1.6096, "step": 4394 }, { "epoch": 0.6295659647614955, "grad_norm": 1.1258150339126587, "learning_rate": 6.379313581609912e-05, "loss": 1.4181, "step": 4395 }, { "epoch": 0.6297092107147974, "grad_norm": 1.23808753490448, "learning_rate": 6.374988997182623e-05, "loss": 1.4212, "step": 4396 }, { "epoch": 0.6298524566680991, "grad_norm": 1.29201078414917, "learning_rate": 6.370665193130495e-05, "loss": 1.4617, "step": 4397 }, { "epoch": 0.6299957026214009, "grad_norm": 1.327603816986084, "learning_rate": 6.36634217038434e-05, "loss": 1.3541, "step": 4398 }, { "epoch": 0.6301389485747028, "grad_norm": 1.2675610780715942, "learning_rate": 6.362019929874799e-05, "loss": 1.3754, "step": 4399 }, { "epoch": 0.6302821945280046, "grad_norm": 1.0129035711288452, "learning_rate": 6.357698472532338e-05, "loss": 1.6173, "step": 4400 }, { "epoch": 0.6304254404813064, "grad_norm": 0.9448611736297607, "learning_rate": 6.353377799287266e-05, "loss": 1.5676, "step": 4401 }, { "epoch": 0.6305686864346082, "grad_norm": 1.3164101839065552, "learning_rate": 6.349057911069709e-05, "loss": 1.5345, "step": 4402 }, { "epoch": 0.63071193238791, "grad_norm": 1.1153144836425781, "learning_rate": 6.344738808809639e-05, "loss": 1.2079, "step": 4403 }, { "epoch": 0.6308551783412119, "grad_norm": 1.1490134000778198, "learning_rate": 6.340420493436851e-05, "loss": 1.3594, "step": 4404 }, { "epoch": 0.6309984242945137, "grad_norm": 1.0669857263565063, "learning_rate": 6.33610296588097e-05, "loss": 1.2969, "step": 4405 }, { "epoch": 0.6311416702478155, "grad_norm": 1.2407759428024292, "learning_rate": 6.33178622707145e-05, "loss": 1.4674, "step": 4406 }, { "epoch": 0.6312849162011173, "grad_norm": 1.1548502445220947, "learning_rate": 6.327470277937586e-05, "loss": 1.5919, "step": 4407 }, { "epoch": 0.6314281621544191, "grad_norm": 1.0576072931289673, "learning_rate": 6.323155119408489e-05, "loss": 1.4694, "step": 4408 }, { "epoch": 0.6315714081077209, "grad_norm": 1.0145364999771118, "learning_rate": 6.318840752413106e-05, "loss": 1.3618, "step": 4409 }, { "epoch": 0.6317146540610228, "grad_norm": 1.1035380363464355, "learning_rate": 6.314527177880215e-05, "loss": 1.5512, "step": 4410 }, { "epoch": 0.6318579000143246, "grad_norm": 1.2770285606384277, "learning_rate": 6.310214396738419e-05, "loss": 1.5214, "step": 4411 }, { "epoch": 0.6320011459676265, "grad_norm": 1.064840316772461, "learning_rate": 6.30590240991616e-05, "loss": 1.4628, "step": 4412 }, { "epoch": 0.6321443919209282, "grad_norm": 0.9682191610336304, "learning_rate": 6.301591218341693e-05, "loss": 1.2709, "step": 4413 }, { "epoch": 0.63228763787423, "grad_norm": 1.0400118827819824, "learning_rate": 6.297280822943118e-05, "loss": 1.5443, "step": 4414 }, { "epoch": 0.6324308838275319, "grad_norm": 0.985525369644165, "learning_rate": 6.292971224648352e-05, "loss": 1.4423, "step": 4415 }, { "epoch": 0.6325741297808337, "grad_norm": 1.2941104173660278, "learning_rate": 6.288662424385148e-05, "loss": 1.4826, "step": 4416 }, { "epoch": 0.6327173757341356, "grad_norm": 1.1155363321304321, "learning_rate": 6.284354423081083e-05, "loss": 1.3875, "step": 4417 }, { "epoch": 0.6328606216874373, "grad_norm": 1.32936429977417, "learning_rate": 6.280047221663558e-05, "loss": 1.4407, "step": 4418 }, { "epoch": 0.6330038676407391, "grad_norm": 1.0095332860946655, "learning_rate": 6.275740821059817e-05, "loss": 1.446, "step": 4419 }, { "epoch": 0.633147113594041, "grad_norm": 1.2688881158828735, "learning_rate": 6.271435222196916e-05, "loss": 1.5386, "step": 4420 }, { "epoch": 0.6332903595473428, "grad_norm": 1.1637020111083984, "learning_rate": 6.267130426001742e-05, "loss": 1.4565, "step": 4421 }, { "epoch": 0.6334336055006446, "grad_norm": 1.1409856081008911, "learning_rate": 6.262826433401015e-05, "loss": 1.36, "step": 4422 }, { "epoch": 0.6335768514539464, "grad_norm": 1.0667866468429565, "learning_rate": 6.258523245321274e-05, "loss": 1.5215, "step": 4423 }, { "epoch": 0.6337200974072482, "grad_norm": 0.9465048909187317, "learning_rate": 6.254220862688889e-05, "loss": 1.4174, "step": 4424 }, { "epoch": 0.6338633433605501, "grad_norm": 1.1549687385559082, "learning_rate": 6.249919286430063e-05, "loss": 1.4441, "step": 4425 }, { "epoch": 0.6340065893138519, "grad_norm": 1.1122524738311768, "learning_rate": 6.245618517470813e-05, "loss": 1.2383, "step": 4426 }, { "epoch": 0.6341498352671537, "grad_norm": 1.4171867370605469, "learning_rate": 6.24131855673699e-05, "loss": 1.4357, "step": 4427 }, { "epoch": 0.6342930812204555, "grad_norm": 1.0893977880477905, "learning_rate": 6.23701940515427e-05, "loss": 1.4589, "step": 4428 }, { "epoch": 0.6344363271737573, "grad_norm": 1.21721351146698, "learning_rate": 6.232721063648148e-05, "loss": 1.5055, "step": 4429 }, { "epoch": 0.6345795731270591, "grad_norm": 1.2622164487838745, "learning_rate": 6.228423533143963e-05, "loss": 1.5442, "step": 4430 }, { "epoch": 0.634722819080361, "grad_norm": 1.0875318050384521, "learning_rate": 6.224126814566853e-05, "loss": 1.3827, "step": 4431 }, { "epoch": 0.6348660650336628, "grad_norm": 1.180962085723877, "learning_rate": 6.219830908841802e-05, "loss": 1.4425, "step": 4432 }, { "epoch": 0.6350093109869647, "grad_norm": 1.0907139778137207, "learning_rate": 6.215535816893615e-05, "loss": 1.5147, "step": 4433 }, { "epoch": 0.6351525569402664, "grad_norm": 1.1626256704330444, "learning_rate": 6.211241539646913e-05, "loss": 1.4485, "step": 4434 }, { "epoch": 0.6352958028935682, "grad_norm": 1.2271250486373901, "learning_rate": 6.206948078026154e-05, "loss": 1.4252, "step": 4435 }, { "epoch": 0.6354390488468701, "grad_norm": 1.227423906326294, "learning_rate": 6.202655432955604e-05, "loss": 1.3003, "step": 4436 }, { "epoch": 0.6355822948001719, "grad_norm": 1.1963754892349243, "learning_rate": 6.198363605359373e-05, "loss": 1.456, "step": 4437 }, { "epoch": 0.6357255407534738, "grad_norm": 1.0410053730010986, "learning_rate": 6.194072596161383e-05, "loss": 1.413, "step": 4438 }, { "epoch": 0.6358687867067755, "grad_norm": 1.1525322198867798, "learning_rate": 6.18978240628538e-05, "loss": 1.3009, "step": 4439 }, { "epoch": 0.6360120326600773, "grad_norm": 1.2511171102523804, "learning_rate": 6.185493036654934e-05, "loss": 1.5775, "step": 4440 }, { "epoch": 0.6361552786133792, "grad_norm": 0.9261684417724609, "learning_rate": 6.181204488193446e-05, "loss": 1.4493, "step": 4441 }, { "epoch": 0.636298524566681, "grad_norm": 1.0560733079910278, "learning_rate": 6.176916761824129e-05, "loss": 1.4802, "step": 4442 }, { "epoch": 0.6364417705199829, "grad_norm": 1.2946867942810059, "learning_rate": 6.172629858470031e-05, "loss": 1.3834, "step": 4443 }, { "epoch": 0.6365850164732846, "grad_norm": 1.330717921257019, "learning_rate": 6.168343779054009e-05, "loss": 1.4895, "step": 4444 }, { "epoch": 0.6367282624265864, "grad_norm": 1.1620519161224365, "learning_rate": 6.16405852449875e-05, "loss": 1.3921, "step": 4445 }, { "epoch": 0.6368715083798883, "grad_norm": 1.3948991298675537, "learning_rate": 6.159774095726771e-05, "loss": 1.4549, "step": 4446 }, { "epoch": 0.6370147543331901, "grad_norm": 1.2637699842453003, "learning_rate": 6.155490493660394e-05, "loss": 1.5239, "step": 4447 }, { "epoch": 0.6371580002864919, "grad_norm": 1.1849713325500488, "learning_rate": 6.151207719221778e-05, "loss": 1.3702, "step": 4448 }, { "epoch": 0.6373012462397937, "grad_norm": 1.0947566032409668, "learning_rate": 6.146925773332899e-05, "loss": 1.5183, "step": 4449 }, { "epoch": 0.6374444921930955, "grad_norm": 1.108784556388855, "learning_rate": 6.14264465691555e-05, "loss": 1.5117, "step": 4450 }, { "epoch": 0.6375877381463974, "grad_norm": 1.0778671503067017, "learning_rate": 6.138364370891354e-05, "loss": 1.4321, "step": 4451 }, { "epoch": 0.6377309840996992, "grad_norm": 1.0631675720214844, "learning_rate": 6.134084916181746e-05, "loss": 1.3896, "step": 4452 }, { "epoch": 0.637874230053001, "grad_norm": 0.9859858155250549, "learning_rate": 6.129806293707989e-05, "loss": 1.5103, "step": 4453 }, { "epoch": 0.6380174760063029, "grad_norm": 1.1190159320831299, "learning_rate": 6.125528504391167e-05, "loss": 1.3011, "step": 4454 }, { "epoch": 0.6381607219596046, "grad_norm": 1.0975273847579956, "learning_rate": 6.121251549152178e-05, "loss": 1.4369, "step": 4455 }, { "epoch": 0.6383039679129064, "grad_norm": 1.0383461713790894, "learning_rate": 6.116975428911744e-05, "loss": 1.553, "step": 4456 }, { "epoch": 0.6384472138662083, "grad_norm": 1.1674754619598389, "learning_rate": 6.112700144590416e-05, "loss": 1.5261, "step": 4457 }, { "epoch": 0.6385904598195101, "grad_norm": 1.3234057426452637, "learning_rate": 6.108425697108546e-05, "loss": 1.4883, "step": 4458 }, { "epoch": 0.638733705772812, "grad_norm": 1.4287654161453247, "learning_rate": 6.104152087386325e-05, "loss": 1.5403, "step": 4459 }, { "epoch": 0.6388769517261137, "grad_norm": 0.9834151268005371, "learning_rate": 6.0998793163437505e-05, "loss": 1.3995, "step": 4460 }, { "epoch": 0.6390201976794155, "grad_norm": 1.130052924156189, "learning_rate": 6.0956073849006456e-05, "loss": 1.446, "step": 4461 }, { "epoch": 0.6391634436327174, "grad_norm": 1.3726046085357666, "learning_rate": 6.091336293976655e-05, "loss": 1.4425, "step": 4462 }, { "epoch": 0.6393066895860192, "grad_norm": 1.1894687414169312, "learning_rate": 6.087066044491232e-05, "loss": 1.4289, "step": 4463 }, { "epoch": 0.6394499355393211, "grad_norm": 1.2986059188842773, "learning_rate": 6.0827966373636656e-05, "loss": 1.4439, "step": 4464 }, { "epoch": 0.6395931814926228, "grad_norm": 1.111060380935669, "learning_rate": 6.078528073513041e-05, "loss": 1.4963, "step": 4465 }, { "epoch": 0.6397364274459246, "grad_norm": 1.344793438911438, "learning_rate": 6.0742603538582835e-05, "loss": 1.4535, "step": 4466 }, { "epoch": 0.6398796733992265, "grad_norm": 1.189284086227417, "learning_rate": 6.069993479318126e-05, "loss": 1.5176, "step": 4467 }, { "epoch": 0.6400229193525283, "grad_norm": 1.089331030845642, "learning_rate": 6.065727450811115e-05, "loss": 1.451, "step": 4468 }, { "epoch": 0.6401661653058301, "grad_norm": 1.1603879928588867, "learning_rate": 6.061462269255629e-05, "loss": 1.5282, "step": 4469 }, { "epoch": 0.6403094112591319, "grad_norm": 1.047631025314331, "learning_rate": 6.057197935569854e-05, "loss": 1.4021, "step": 4470 }, { "epoch": 0.6404526572124337, "grad_norm": 1.141322374343872, "learning_rate": 6.0529344506717935e-05, "loss": 1.4327, "step": 4471 }, { "epoch": 0.6405959031657356, "grad_norm": 1.7458254098892212, "learning_rate": 6.0486718154792724e-05, "loss": 1.4753, "step": 4472 }, { "epoch": 0.6407391491190374, "grad_norm": 1.090705394744873, "learning_rate": 6.044410030909926e-05, "loss": 1.4763, "step": 4473 }, { "epoch": 0.6408823950723392, "grad_norm": 1.0630391836166382, "learning_rate": 6.040149097881214e-05, "loss": 1.5238, "step": 4474 }, { "epoch": 0.6410256410256411, "grad_norm": 1.1306729316711426, "learning_rate": 6.035889017310414e-05, "loss": 1.5606, "step": 4475 }, { "epoch": 0.6411688869789428, "grad_norm": 1.1455961465835571, "learning_rate": 6.0316297901146103e-05, "loss": 1.5379, "step": 4476 }, { "epoch": 0.6413121329322446, "grad_norm": 1.091823935508728, "learning_rate": 6.02737141721071e-05, "loss": 1.606, "step": 4477 }, { "epoch": 0.6414553788855465, "grad_norm": 1.2753171920776367, "learning_rate": 6.023113899515438e-05, "loss": 1.3085, "step": 4478 }, { "epoch": 0.6415986248388483, "grad_norm": 0.9803798198699951, "learning_rate": 6.0188572379453276e-05, "loss": 1.4587, "step": 4479 }, { "epoch": 0.6417418707921502, "grad_norm": 1.3936856985092163, "learning_rate": 6.014601433416741e-05, "loss": 1.4711, "step": 4480 }, { "epoch": 0.6418851167454519, "grad_norm": 1.1153671741485596, "learning_rate": 6.010346486845837e-05, "loss": 1.2893, "step": 4481 }, { "epoch": 0.6420283626987537, "grad_norm": 1.1821467876434326, "learning_rate": 6.0060923991486084e-05, "loss": 1.3419, "step": 4482 }, { "epoch": 0.6421716086520556, "grad_norm": 1.0970436334609985, "learning_rate": 6.001839171240853e-05, "loss": 1.4249, "step": 4483 }, { "epoch": 0.6423148546053574, "grad_norm": 1.1034257411956787, "learning_rate": 5.9975868040381844e-05, "loss": 1.491, "step": 4484 }, { "epoch": 0.6424581005586593, "grad_norm": 1.068557858467102, "learning_rate": 5.9933352984560334e-05, "loss": 1.3578, "step": 4485 }, { "epoch": 0.642601346511961, "grad_norm": 1.1183393001556396, "learning_rate": 5.98908465540964e-05, "loss": 1.4756, "step": 4486 }, { "epoch": 0.6427445924652628, "grad_norm": 1.0114208459854126, "learning_rate": 5.9848348758140674e-05, "loss": 1.5201, "step": 4487 }, { "epoch": 0.6428878384185647, "grad_norm": 1.3996479511260986, "learning_rate": 5.980585960584187e-05, "loss": 1.6688, "step": 4488 }, { "epoch": 0.6430310843718665, "grad_norm": 1.2602158784866333, "learning_rate": 5.976337910634684e-05, "loss": 1.3494, "step": 4489 }, { "epoch": 0.6431743303251684, "grad_norm": 1.3959579467773438, "learning_rate": 5.972090726880055e-05, "loss": 1.3666, "step": 4490 }, { "epoch": 0.6433175762784701, "grad_norm": 1.3741917610168457, "learning_rate": 5.967844410234624e-05, "loss": 1.6026, "step": 4491 }, { "epoch": 0.6434608222317719, "grad_norm": 1.0980783700942993, "learning_rate": 5.963598961612506e-05, "loss": 1.4189, "step": 4492 }, { "epoch": 0.6436040681850738, "grad_norm": 1.0962692499160767, "learning_rate": 5.9593543819276486e-05, "loss": 1.4135, "step": 4493 }, { "epoch": 0.6437473141383756, "grad_norm": 1.064880132675171, "learning_rate": 5.955110672093801e-05, "loss": 1.5281, "step": 4494 }, { "epoch": 0.6438905600916774, "grad_norm": 0.9786617159843445, "learning_rate": 5.950867833024529e-05, "loss": 1.4413, "step": 4495 }, { "epoch": 0.6440338060449793, "grad_norm": 1.1842151880264282, "learning_rate": 5.946625865633216e-05, "loss": 1.6617, "step": 4496 }, { "epoch": 0.644177051998281, "grad_norm": 1.248489499092102, "learning_rate": 5.942384770833045e-05, "loss": 1.333, "step": 4497 }, { "epoch": 0.6443202979515829, "grad_norm": 1.0941545963287354, "learning_rate": 5.938144549537023e-05, "loss": 1.6338, "step": 4498 }, { "epoch": 0.6444635439048847, "grad_norm": 1.1537346839904785, "learning_rate": 5.9339052026579654e-05, "loss": 1.4115, "step": 4499 }, { "epoch": 0.6446067898581865, "grad_norm": 0.9913920760154724, "learning_rate": 5.929666731108497e-05, "loss": 1.465, "step": 4500 }, { "epoch": 0.6447500358114884, "grad_norm": 1.0328162908554077, "learning_rate": 5.9254291358010586e-05, "loss": 1.4305, "step": 4501 }, { "epoch": 0.6448932817647901, "grad_norm": 0.9749317765235901, "learning_rate": 5.9211924176478915e-05, "loss": 1.2948, "step": 4502 }, { "epoch": 0.645036527718092, "grad_norm": 1.022205114364624, "learning_rate": 5.9169565775610656e-05, "loss": 1.5671, "step": 4503 }, { "epoch": 0.6451797736713938, "grad_norm": 0.9826886057853699, "learning_rate": 5.9127216164524504e-05, "loss": 1.3627, "step": 4504 }, { "epoch": 0.6453230196246956, "grad_norm": 1.2081366777420044, "learning_rate": 5.908487535233725e-05, "loss": 1.3164, "step": 4505 }, { "epoch": 0.6454662655779975, "grad_norm": 1.2156579494476318, "learning_rate": 5.904254334816381e-05, "loss": 1.5052, "step": 4506 }, { "epoch": 0.6456095115312992, "grad_norm": 1.0528117418289185, "learning_rate": 5.900022016111733e-05, "loss": 1.4505, "step": 4507 }, { "epoch": 0.645752757484601, "grad_norm": 1.1564947366714478, "learning_rate": 5.895790580030879e-05, "loss": 1.4364, "step": 4508 }, { "epoch": 0.6458960034379029, "grad_norm": 1.0814497470855713, "learning_rate": 5.891560027484753e-05, "loss": 1.5585, "step": 4509 }, { "epoch": 0.6460392493912047, "grad_norm": 1.2179501056671143, "learning_rate": 5.8873303593840846e-05, "loss": 1.5081, "step": 4510 }, { "epoch": 0.6461824953445066, "grad_norm": 1.0988117456436157, "learning_rate": 5.883101576639415e-05, "loss": 1.5349, "step": 4511 }, { "epoch": 0.6463257412978083, "grad_norm": 1.0699806213378906, "learning_rate": 5.878873680161101e-05, "loss": 1.5146, "step": 4512 }, { "epoch": 0.6464689872511101, "grad_norm": 1.185715913772583, "learning_rate": 5.8746466708592986e-05, "loss": 1.4824, "step": 4513 }, { "epoch": 0.646612233204412, "grad_norm": 1.1765772104263306, "learning_rate": 5.870420549643987e-05, "loss": 1.5552, "step": 4514 }, { "epoch": 0.6467554791577138, "grad_norm": 1.0131865739822388, "learning_rate": 5.866195317424934e-05, "loss": 1.3782, "step": 4515 }, { "epoch": 0.6468987251110156, "grad_norm": 1.091248869895935, "learning_rate": 5.8619709751117344e-05, "loss": 1.3904, "step": 4516 }, { "epoch": 0.6470419710643174, "grad_norm": 1.1151150465011597, "learning_rate": 5.8577475236137855e-05, "loss": 1.4657, "step": 4517 }, { "epoch": 0.6471852170176192, "grad_norm": 1.2071292400360107, "learning_rate": 5.853524963840289e-05, "loss": 1.3939, "step": 4518 }, { "epoch": 0.6473284629709211, "grad_norm": 1.3980448246002197, "learning_rate": 5.849303296700257e-05, "loss": 1.4357, "step": 4519 }, { "epoch": 0.6474717089242229, "grad_norm": 1.0827422142028809, "learning_rate": 5.845082523102514e-05, "loss": 1.4321, "step": 4520 }, { "epoch": 0.6476149548775247, "grad_norm": 1.1302344799041748, "learning_rate": 5.8408626439556845e-05, "loss": 1.3504, "step": 4521 }, { "epoch": 0.6477582008308266, "grad_norm": 1.2347482442855835, "learning_rate": 5.8366436601682084e-05, "loss": 1.3669, "step": 4522 }, { "epoch": 0.6479014467841283, "grad_norm": 1.0000845193862915, "learning_rate": 5.832425572648317e-05, "loss": 1.3133, "step": 4523 }, { "epoch": 0.6480446927374302, "grad_norm": 1.1394531726837158, "learning_rate": 5.828208382304072e-05, "loss": 1.3696, "step": 4524 }, { "epoch": 0.648187938690732, "grad_norm": 1.0541324615478516, "learning_rate": 5.823992090043333e-05, "loss": 1.3951, "step": 4525 }, { "epoch": 0.6483311846440338, "grad_norm": 1.0459247827529907, "learning_rate": 5.819776696773751e-05, "loss": 1.5707, "step": 4526 }, { "epoch": 0.6484744305973357, "grad_norm": 1.1614558696746826, "learning_rate": 5.815562203402798e-05, "loss": 1.2869, "step": 4527 }, { "epoch": 0.6486176765506374, "grad_norm": 0.98748779296875, "learning_rate": 5.8113486108377615e-05, "loss": 1.4323, "step": 4528 }, { "epoch": 0.6487609225039392, "grad_norm": 1.3415852785110474, "learning_rate": 5.8071359199857114e-05, "loss": 1.7015, "step": 4529 }, { "epoch": 0.6489041684572411, "grad_norm": 1.1512844562530518, "learning_rate": 5.802924131753542e-05, "loss": 1.4762, "step": 4530 }, { "epoch": 0.6490474144105429, "grad_norm": 1.2748430967330933, "learning_rate": 5.798713247047944e-05, "loss": 1.5392, "step": 4531 }, { "epoch": 0.6491906603638448, "grad_norm": 1.154200553894043, "learning_rate": 5.79450326677542e-05, "loss": 1.3945, "step": 4532 }, { "epoch": 0.6493339063171465, "grad_norm": 0.9837687015533447, "learning_rate": 5.790294191842276e-05, "loss": 1.3517, "step": 4533 }, { "epoch": 0.6494771522704483, "grad_norm": 0.8380252718925476, "learning_rate": 5.786086023154609e-05, "loss": 1.2219, "step": 4534 }, { "epoch": 0.6496203982237502, "grad_norm": 1.0011017322540283, "learning_rate": 5.781878761618349e-05, "loss": 1.3772, "step": 4535 }, { "epoch": 0.649763644177052, "grad_norm": 1.1292611360549927, "learning_rate": 5.777672408139212e-05, "loss": 1.5269, "step": 4536 }, { "epoch": 0.6499068901303539, "grad_norm": 0.9947747588157654, "learning_rate": 5.773466963622716e-05, "loss": 1.4733, "step": 4537 }, { "epoch": 0.6500501360836556, "grad_norm": 1.1494261026382446, "learning_rate": 5.7692624289741914e-05, "loss": 1.437, "step": 4538 }, { "epoch": 0.6501933820369574, "grad_norm": 1.0893393754959106, "learning_rate": 5.765058805098773e-05, "loss": 1.2395, "step": 4539 }, { "epoch": 0.6503366279902593, "grad_norm": 1.1427130699157715, "learning_rate": 5.7608560929013946e-05, "loss": 1.4638, "step": 4540 }, { "epoch": 0.6504798739435611, "grad_norm": 1.0221378803253174, "learning_rate": 5.756654293286796e-05, "loss": 1.4724, "step": 4541 }, { "epoch": 0.650623119896863, "grad_norm": 1.1547068357467651, "learning_rate": 5.752453407159522e-05, "loss": 1.4711, "step": 4542 }, { "epoch": 0.6507663658501648, "grad_norm": 1.1155989170074463, "learning_rate": 5.7482534354239225e-05, "loss": 1.4708, "step": 4543 }, { "epoch": 0.6509096118034665, "grad_norm": 1.2078620195388794, "learning_rate": 5.74405437898414e-05, "loss": 1.3329, "step": 4544 }, { "epoch": 0.6510528577567684, "grad_norm": 1.3042616844177246, "learning_rate": 5.739856238744129e-05, "loss": 1.4016, "step": 4545 }, { "epoch": 0.6511961037100702, "grad_norm": 1.061548113822937, "learning_rate": 5.735659015607655e-05, "loss": 1.4555, "step": 4546 }, { "epoch": 0.651339349663372, "grad_norm": 1.0976771116256714, "learning_rate": 5.731462710478264e-05, "loss": 1.455, "step": 4547 }, { "epoch": 0.6514825956166739, "grad_norm": 1.1134206056594849, "learning_rate": 5.7272673242593174e-05, "loss": 1.4063, "step": 4548 }, { "epoch": 0.6516258415699756, "grad_norm": 1.0864022970199585, "learning_rate": 5.723072857853992e-05, "loss": 1.2942, "step": 4549 }, { "epoch": 0.6517690875232774, "grad_norm": 1.3141577243804932, "learning_rate": 5.7188793121652374e-05, "loss": 1.3888, "step": 4550 }, { "epoch": 0.6519123334765793, "grad_norm": 1.113267421722412, "learning_rate": 5.714686688095825e-05, "loss": 1.2815, "step": 4551 }, { "epoch": 0.6520555794298811, "grad_norm": 1.0440294742584229, "learning_rate": 5.7104949865483246e-05, "loss": 1.4649, "step": 4552 }, { "epoch": 0.652198825383183, "grad_norm": 1.0255975723266602, "learning_rate": 5.706304208425105e-05, "loss": 1.3997, "step": 4553 }, { "epoch": 0.6523420713364847, "grad_norm": 1.071507215499878, "learning_rate": 5.702114354628341e-05, "loss": 1.2096, "step": 4554 }, { "epoch": 0.6524853172897865, "grad_norm": 1.0574589967727661, "learning_rate": 5.697925426059991e-05, "loss": 1.3802, "step": 4555 }, { "epoch": 0.6526285632430884, "grad_norm": 1.227358341217041, "learning_rate": 5.6937374236218424e-05, "loss": 1.2703, "step": 4556 }, { "epoch": 0.6527718091963902, "grad_norm": 1.1622474193572998, "learning_rate": 5.6895503482154666e-05, "loss": 1.4141, "step": 4557 }, { "epoch": 0.6529150551496921, "grad_norm": 1.1430712938308716, "learning_rate": 5.6853642007422294e-05, "loss": 1.4892, "step": 4558 }, { "epoch": 0.6530583011029938, "grad_norm": 1.050152063369751, "learning_rate": 5.681178982103309e-05, "loss": 1.3814, "step": 4559 }, { "epoch": 0.6532015470562956, "grad_norm": 1.3386648893356323, "learning_rate": 5.6769946931996795e-05, "loss": 1.2433, "step": 4560 }, { "epoch": 0.6533447930095975, "grad_norm": 1.2757692337036133, "learning_rate": 5.672811334932116e-05, "loss": 1.4705, "step": 4561 }, { "epoch": 0.6534880389628993, "grad_norm": 1.096329927444458, "learning_rate": 5.668628908201189e-05, "loss": 1.4245, "step": 4562 }, { "epoch": 0.6536312849162011, "grad_norm": 1.081300139427185, "learning_rate": 5.6644474139072746e-05, "loss": 1.4628, "step": 4563 }, { "epoch": 0.653774530869503, "grad_norm": 1.0749168395996094, "learning_rate": 5.660266852950547e-05, "loss": 1.4174, "step": 4564 }, { "epoch": 0.6539177768228047, "grad_norm": 1.1181756258010864, "learning_rate": 5.6560872262309704e-05, "loss": 1.5401, "step": 4565 }, { "epoch": 0.6540610227761066, "grad_norm": 1.1073769330978394, "learning_rate": 5.651908534648315e-05, "loss": 1.268, "step": 4566 }, { "epoch": 0.6542042687294084, "grad_norm": 1.018141746520996, "learning_rate": 5.647730779102161e-05, "loss": 1.3816, "step": 4567 }, { "epoch": 0.6543475146827102, "grad_norm": 1.0469310283660889, "learning_rate": 5.6435539604918654e-05, "loss": 1.2902, "step": 4568 }, { "epoch": 0.6544907606360121, "grad_norm": 1.038442611694336, "learning_rate": 5.639378079716595e-05, "loss": 1.4566, "step": 4569 }, { "epoch": 0.6546340065893138, "grad_norm": 1.025829792022705, "learning_rate": 5.635203137675318e-05, "loss": 1.4288, "step": 4570 }, { "epoch": 0.6547772525426157, "grad_norm": 1.2322522401809692, "learning_rate": 5.631029135266791e-05, "loss": 1.4131, "step": 4571 }, { "epoch": 0.6549204984959175, "grad_norm": 1.0420337915420532, "learning_rate": 5.6268560733895816e-05, "loss": 1.4606, "step": 4572 }, { "epoch": 0.6550637444492193, "grad_norm": 1.2627524137496948, "learning_rate": 5.6226839529420314e-05, "loss": 1.3696, "step": 4573 }, { "epoch": 0.6552069904025212, "grad_norm": 1.1218619346618652, "learning_rate": 5.618512774822311e-05, "loss": 1.4626, "step": 4574 }, { "epoch": 0.6553502363558229, "grad_norm": 1.211631178855896, "learning_rate": 5.6143425399283664e-05, "loss": 1.4401, "step": 4575 }, { "epoch": 0.6554934823091247, "grad_norm": 1.0802208185195923, "learning_rate": 5.610173249157942e-05, "loss": 1.3866, "step": 4576 }, { "epoch": 0.6556367282624266, "grad_norm": 1.1085671186447144, "learning_rate": 5.6060049034085815e-05, "loss": 1.406, "step": 4577 }, { "epoch": 0.6557799742157284, "grad_norm": 1.0617015361785889, "learning_rate": 5.6018375035776406e-05, "loss": 1.6772, "step": 4578 }, { "epoch": 0.6559232201690303, "grad_norm": 1.2875553369522095, "learning_rate": 5.597671050562241e-05, "loss": 1.2714, "step": 4579 }, { "epoch": 0.656066466122332, "grad_norm": 1.207047939300537, "learning_rate": 5.5935055452593254e-05, "loss": 1.4701, "step": 4580 }, { "epoch": 0.6562097120756338, "grad_norm": 1.0921099185943604, "learning_rate": 5.5893409885656214e-05, "loss": 1.6638, "step": 4581 }, { "epoch": 0.6563529580289357, "grad_norm": 1.055417776107788, "learning_rate": 5.5851773813776556e-05, "loss": 1.3781, "step": 4582 }, { "epoch": 0.6564962039822375, "grad_norm": 1.2416764497756958, "learning_rate": 5.5810147245917535e-05, "loss": 1.3681, "step": 4583 }, { "epoch": 0.6566394499355394, "grad_norm": 1.1044549942016602, "learning_rate": 5.5768530191040206e-05, "loss": 1.4276, "step": 4584 }, { "epoch": 0.6567826958888412, "grad_norm": 1.0946972370147705, "learning_rate": 5.57269226581038e-05, "loss": 1.4551, "step": 4585 }, { "epoch": 0.6569259418421429, "grad_norm": 1.0739290714263916, "learning_rate": 5.56853246560654e-05, "loss": 1.4399, "step": 4586 }, { "epoch": 0.6570691877954448, "grad_norm": 1.1780658960342407, "learning_rate": 5.564373619387995e-05, "loss": 1.3673, "step": 4587 }, { "epoch": 0.6572124337487466, "grad_norm": 1.0285625457763672, "learning_rate": 5.5602157280500446e-05, "loss": 1.5443, "step": 4588 }, { "epoch": 0.6573556797020484, "grad_norm": 1.4831175804138184, "learning_rate": 5.556058792487779e-05, "loss": 1.3192, "step": 4589 }, { "epoch": 0.6574989256553503, "grad_norm": 1.0914247035980225, "learning_rate": 5.551902813596087e-05, "loss": 1.4107, "step": 4590 }, { "epoch": 0.657642171608652, "grad_norm": 1.1795514822006226, "learning_rate": 5.5477477922696465e-05, "loss": 1.4561, "step": 4591 }, { "epoch": 0.6577854175619539, "grad_norm": 0.9336190819740295, "learning_rate": 5.543593729402927e-05, "loss": 1.5151, "step": 4592 }, { "epoch": 0.6579286635152557, "grad_norm": 1.0827986001968384, "learning_rate": 5.5394406258902054e-05, "loss": 1.4191, "step": 4593 }, { "epoch": 0.6580719094685575, "grad_norm": 1.0531554222106934, "learning_rate": 5.5352884826255317e-05, "loss": 1.5306, "step": 4594 }, { "epoch": 0.6582151554218594, "grad_norm": 1.066885232925415, "learning_rate": 5.531137300502758e-05, "loss": 1.4357, "step": 4595 }, { "epoch": 0.6583584013751611, "grad_norm": 1.073481798171997, "learning_rate": 5.526987080415543e-05, "loss": 1.5838, "step": 4596 }, { "epoch": 0.658501647328463, "grad_norm": 1.2641152143478394, "learning_rate": 5.522837823257317e-05, "loss": 1.5301, "step": 4597 }, { "epoch": 0.6586448932817648, "grad_norm": 1.1367545127868652, "learning_rate": 5.518689529921315e-05, "loss": 1.3564, "step": 4598 }, { "epoch": 0.6587881392350666, "grad_norm": 0.98895663022995, "learning_rate": 5.514542201300563e-05, "loss": 1.482, "step": 4599 }, { "epoch": 0.6589313851883685, "grad_norm": 1.1632660627365112, "learning_rate": 5.5103958382878765e-05, "loss": 1.4931, "step": 4600 }, { "epoch": 0.6590746311416702, "grad_norm": 1.1964939832687378, "learning_rate": 5.5062504417758684e-05, "loss": 1.4642, "step": 4601 }, { "epoch": 0.659217877094972, "grad_norm": 1.086006760597229, "learning_rate": 5.502106012656931e-05, "loss": 1.5564, "step": 4602 }, { "epoch": 0.6593611230482739, "grad_norm": 1.1498935222625732, "learning_rate": 5.497962551823266e-05, "loss": 1.3983, "step": 4603 }, { "epoch": 0.6595043690015757, "grad_norm": 1.2935854196548462, "learning_rate": 5.493820060166861e-05, "loss": 1.308, "step": 4604 }, { "epoch": 0.6596476149548776, "grad_norm": 1.1543046236038208, "learning_rate": 5.4896785385794815e-05, "loss": 1.5321, "step": 4605 }, { "epoch": 0.6597908609081793, "grad_norm": 1.1246274709701538, "learning_rate": 5.485537987952696e-05, "loss": 1.5154, "step": 4606 }, { "epoch": 0.6599341068614811, "grad_norm": 1.162213921546936, "learning_rate": 5.4813984091778734e-05, "loss": 1.3668, "step": 4607 }, { "epoch": 0.660077352814783, "grad_norm": 1.0154352188110352, "learning_rate": 5.4772598031461507e-05, "loss": 1.3892, "step": 4608 }, { "epoch": 0.6602205987680848, "grad_norm": 1.1157153844833374, "learning_rate": 5.473122170748472e-05, "loss": 1.4373, "step": 4609 }, { "epoch": 0.6603638447213867, "grad_norm": 1.139030933380127, "learning_rate": 5.4689855128755686e-05, "loss": 1.2714, "step": 4610 }, { "epoch": 0.6605070906746885, "grad_norm": 1.0971623659133911, "learning_rate": 5.4648498304179585e-05, "loss": 1.4077, "step": 4611 }, { "epoch": 0.6606503366279902, "grad_norm": 1.1632561683654785, "learning_rate": 5.4607151242659524e-05, "loss": 1.5878, "step": 4612 }, { "epoch": 0.6607935825812921, "grad_norm": 1.2411365509033203, "learning_rate": 5.45658139530965e-05, "loss": 1.5197, "step": 4613 }, { "epoch": 0.6609368285345939, "grad_norm": 1.1518573760986328, "learning_rate": 5.452448644438946e-05, "loss": 1.3713, "step": 4614 }, { "epoch": 0.6610800744878957, "grad_norm": 1.1217316389083862, "learning_rate": 5.4483168725435086e-05, "loss": 1.3202, "step": 4615 }, { "epoch": 0.6612233204411976, "grad_norm": 1.0922865867614746, "learning_rate": 5.444186080512809e-05, "loss": 1.3008, "step": 4616 }, { "epoch": 0.6613665663944993, "grad_norm": 1.0986576080322266, "learning_rate": 5.4400562692361145e-05, "loss": 1.5421, "step": 4617 }, { "epoch": 0.6615098123478012, "grad_norm": 1.1198687553405762, "learning_rate": 5.435927439602462e-05, "loss": 1.413, "step": 4618 }, { "epoch": 0.661653058301103, "grad_norm": 1.2281591892242432, "learning_rate": 5.431799592500686e-05, "loss": 1.4991, "step": 4619 }, { "epoch": 0.6617963042544048, "grad_norm": 1.086848258972168, "learning_rate": 5.427672728819414e-05, "loss": 1.4309, "step": 4620 }, { "epoch": 0.6619395502077067, "grad_norm": 1.0861985683441162, "learning_rate": 5.423546849447055e-05, "loss": 1.4501, "step": 4621 }, { "epoch": 0.6620827961610084, "grad_norm": 1.2614893913269043, "learning_rate": 5.419421955271815e-05, "loss": 1.6691, "step": 4622 }, { "epoch": 0.6622260421143102, "grad_norm": 1.2433855533599854, "learning_rate": 5.415298047181667e-05, "loss": 1.445, "step": 4623 }, { "epoch": 0.6623692880676121, "grad_norm": 1.0563346147537231, "learning_rate": 5.4111751260644e-05, "loss": 1.2762, "step": 4624 }, { "epoch": 0.6625125340209139, "grad_norm": 1.1371698379516602, "learning_rate": 5.407053192807576e-05, "loss": 1.387, "step": 4625 }, { "epoch": 0.6626557799742158, "grad_norm": 1.09165620803833, "learning_rate": 5.40293224829854e-05, "loss": 1.4077, "step": 4626 }, { "epoch": 0.6627990259275175, "grad_norm": 1.0293549299240112, "learning_rate": 5.398812293424426e-05, "loss": 1.3404, "step": 4627 }, { "epoch": 0.6629422718808193, "grad_norm": 1.2052700519561768, "learning_rate": 5.394693329072171e-05, "loss": 1.4121, "step": 4628 }, { "epoch": 0.6630855178341212, "grad_norm": 1.2064213752746582, "learning_rate": 5.390575356128474e-05, "loss": 1.3681, "step": 4629 }, { "epoch": 0.663228763787423, "grad_norm": 1.0688049793243408, "learning_rate": 5.386458375479839e-05, "loss": 1.6877, "step": 4630 }, { "epoch": 0.6633720097407249, "grad_norm": 0.9817887544631958, "learning_rate": 5.382342388012547e-05, "loss": 1.3412, "step": 4631 }, { "epoch": 0.6635152556940267, "grad_norm": 1.1601709127426147, "learning_rate": 5.37822739461267e-05, "loss": 1.3779, "step": 4632 }, { "epoch": 0.6636585016473284, "grad_norm": 1.2823655605316162, "learning_rate": 5.3741133961660686e-05, "loss": 1.7397, "step": 4633 }, { "epoch": 0.6638017476006303, "grad_norm": 1.1961654424667358, "learning_rate": 5.370000393558371e-05, "loss": 1.3073, "step": 4634 }, { "epoch": 0.6639449935539321, "grad_norm": 1.2618054151535034, "learning_rate": 5.365888387675018e-05, "loss": 1.4494, "step": 4635 }, { "epoch": 0.664088239507234, "grad_norm": 1.054547905921936, "learning_rate": 5.361777379401223e-05, "loss": 1.4927, "step": 4636 }, { "epoch": 0.6642314854605358, "grad_norm": 1.020743489265442, "learning_rate": 5.357667369621977e-05, "loss": 1.4866, "step": 4637 }, { "epoch": 0.6643747314138375, "grad_norm": 1.2406256198883057, "learning_rate": 5.353558359222065e-05, "loss": 1.4321, "step": 4638 }, { "epoch": 0.6645179773671394, "grad_norm": 1.031090259552002, "learning_rate": 5.349450349086057e-05, "loss": 1.4247, "step": 4639 }, { "epoch": 0.6646612233204412, "grad_norm": 0.9442228078842163, "learning_rate": 5.3453433400983055e-05, "loss": 1.4894, "step": 4640 }, { "epoch": 0.664804469273743, "grad_norm": 1.2339543104171753, "learning_rate": 5.3412373331429474e-05, "loss": 1.5547, "step": 4641 }, { "epoch": 0.6649477152270449, "grad_norm": 1.2132046222686768, "learning_rate": 5.337132329103907e-05, "loss": 1.4698, "step": 4642 }, { "epoch": 0.6650909611803466, "grad_norm": 1.1132028102874756, "learning_rate": 5.3330283288648906e-05, "loss": 1.6366, "step": 4643 }, { "epoch": 0.6652342071336484, "grad_norm": 1.1934804916381836, "learning_rate": 5.3289253333093826e-05, "loss": 1.4182, "step": 4644 }, { "epoch": 0.6653774530869503, "grad_norm": 1.2797000408172607, "learning_rate": 5.324823343320654e-05, "loss": 1.4234, "step": 4645 }, { "epoch": 0.6655206990402521, "grad_norm": 1.0069880485534668, "learning_rate": 5.320722359781776e-05, "loss": 1.4869, "step": 4646 }, { "epoch": 0.665663944993554, "grad_norm": 1.058996319770813, "learning_rate": 5.3166223835755736e-05, "loss": 1.426, "step": 4647 }, { "epoch": 0.6658071909468557, "grad_norm": 1.0293855667114258, "learning_rate": 5.312523415584678e-05, "loss": 1.3555, "step": 4648 }, { "epoch": 0.6659504369001575, "grad_norm": 1.073056697845459, "learning_rate": 5.308425456691495e-05, "loss": 1.4734, "step": 4649 }, { "epoch": 0.6660936828534594, "grad_norm": 1.1267619132995605, "learning_rate": 5.3043285077782114e-05, "loss": 1.3704, "step": 4650 }, { "epoch": 0.6662369288067612, "grad_norm": 1.240592360496521, "learning_rate": 5.300232569726804e-05, "loss": 1.2893, "step": 4651 }, { "epoch": 0.6663801747600631, "grad_norm": 1.0078508853912354, "learning_rate": 5.2961376434190144e-05, "loss": 1.5036, "step": 4652 }, { "epoch": 0.6665234207133649, "grad_norm": 1.0908281803131104, "learning_rate": 5.292043729736394e-05, "loss": 1.4477, "step": 4653 }, { "epoch": 0.6666666666666666, "grad_norm": 1.2605586051940918, "learning_rate": 5.2879508295602575e-05, "loss": 1.5425, "step": 4654 }, { "epoch": 0.6668099126199685, "grad_norm": 1.2396403551101685, "learning_rate": 5.283858943771698e-05, "loss": 1.5392, "step": 4655 }, { "epoch": 0.6669531585732703, "grad_norm": 1.031543254852295, "learning_rate": 5.2797680732515986e-05, "loss": 1.4982, "step": 4656 }, { "epoch": 0.6670964045265722, "grad_norm": 1.204143762588501, "learning_rate": 5.275678218880632e-05, "loss": 1.3493, "step": 4657 }, { "epoch": 0.667239650479874, "grad_norm": 1.0083363056182861, "learning_rate": 5.2715893815392325e-05, "loss": 1.3654, "step": 4658 }, { "epoch": 0.6673828964331757, "grad_norm": 1.1703487634658813, "learning_rate": 5.26750156210763e-05, "loss": 1.4638, "step": 4659 }, { "epoch": 0.6675261423864776, "grad_norm": 1.0888350009918213, "learning_rate": 5.2634147614658294e-05, "loss": 1.1204, "step": 4660 }, { "epoch": 0.6676693883397794, "grad_norm": 1.0271443128585815, "learning_rate": 5.259328980493618e-05, "loss": 1.5585, "step": 4661 }, { "epoch": 0.6678126342930812, "grad_norm": 1.3372961282730103, "learning_rate": 5.255244220070567e-05, "loss": 1.3249, "step": 4662 }, { "epoch": 0.6679558802463831, "grad_norm": 1.208622932434082, "learning_rate": 5.251160481076016e-05, "loss": 1.4479, "step": 4663 }, { "epoch": 0.6680991261996848, "grad_norm": 1.084515929222107, "learning_rate": 5.247077764389099e-05, "loss": 1.3492, "step": 4664 }, { "epoch": 0.6682423721529867, "grad_norm": 1.0711519718170166, "learning_rate": 5.242996070888728e-05, "loss": 1.316, "step": 4665 }, { "epoch": 0.6683856181062885, "grad_norm": 1.0069084167480469, "learning_rate": 5.2389154014535814e-05, "loss": 1.3754, "step": 4666 }, { "epoch": 0.6685288640595903, "grad_norm": 1.1453715562820435, "learning_rate": 5.23483575696213e-05, "loss": 1.3642, "step": 4667 }, { "epoch": 0.6686721100128922, "grad_norm": 1.0388261079788208, "learning_rate": 5.2307571382926215e-05, "loss": 1.2789, "step": 4668 }, { "epoch": 0.6688153559661939, "grad_norm": 1.0358388423919678, "learning_rate": 5.226679546323079e-05, "loss": 1.3986, "step": 4669 }, { "epoch": 0.6689586019194957, "grad_norm": 1.015290379524231, "learning_rate": 5.222602981931309e-05, "loss": 1.4101, "step": 4670 }, { "epoch": 0.6691018478727976, "grad_norm": 1.0272936820983887, "learning_rate": 5.2185274459948965e-05, "loss": 1.4133, "step": 4671 }, { "epoch": 0.6692450938260994, "grad_norm": 1.0329028367996216, "learning_rate": 5.2144529393912036e-05, "loss": 1.5581, "step": 4672 }, { "epoch": 0.6693883397794013, "grad_norm": 1.475765585899353, "learning_rate": 5.210379462997364e-05, "loss": 1.5461, "step": 4673 }, { "epoch": 0.6695315857327031, "grad_norm": 1.011217474937439, "learning_rate": 5.206307017690302e-05, "loss": 1.3441, "step": 4674 }, { "epoch": 0.6696748316860048, "grad_norm": 1.089285135269165, "learning_rate": 5.202235604346719e-05, "loss": 1.3825, "step": 4675 }, { "epoch": 0.6698180776393067, "grad_norm": 1.10187828540802, "learning_rate": 5.19816522384308e-05, "loss": 1.3298, "step": 4676 }, { "epoch": 0.6699613235926085, "grad_norm": 1.1578638553619385, "learning_rate": 5.1940958770556383e-05, "loss": 1.4931, "step": 4677 }, { "epoch": 0.6701045695459104, "grad_norm": 0.9503713846206665, "learning_rate": 5.190027564860433e-05, "loss": 1.3583, "step": 4678 }, { "epoch": 0.6702478154992122, "grad_norm": 1.0788397789001465, "learning_rate": 5.185960288133261e-05, "loss": 1.5613, "step": 4679 }, { "epoch": 0.6703910614525139, "grad_norm": 1.2687796354293823, "learning_rate": 5.181894047749711e-05, "loss": 1.2806, "step": 4680 }, { "epoch": 0.6705343074058158, "grad_norm": 1.260794758796692, "learning_rate": 5.177828844585142e-05, "loss": 1.3425, "step": 4681 }, { "epoch": 0.6706775533591176, "grad_norm": 1.4638421535491943, "learning_rate": 5.173764679514693e-05, "loss": 1.5314, "step": 4682 }, { "epoch": 0.6708207993124194, "grad_norm": 1.1221532821655273, "learning_rate": 5.169701553413282e-05, "loss": 1.2818, "step": 4683 }, { "epoch": 0.6709640452657213, "grad_norm": 1.2523716688156128, "learning_rate": 5.1656394671555875e-05, "loss": 1.527, "step": 4684 }, { "epoch": 0.671107291219023, "grad_norm": 1.0717252492904663, "learning_rate": 5.161578421616087e-05, "loss": 1.2228, "step": 4685 }, { "epoch": 0.6712505371723249, "grad_norm": 1.0427154302597046, "learning_rate": 5.157518417669023e-05, "loss": 1.2567, "step": 4686 }, { "epoch": 0.6713937831256267, "grad_norm": 1.1482542753219604, "learning_rate": 5.1534594561884086e-05, "loss": 1.256, "step": 4687 }, { "epoch": 0.6715370290789285, "grad_norm": 1.1927971839904785, "learning_rate": 5.1494015380480396e-05, "loss": 1.3282, "step": 4688 }, { "epoch": 0.6716802750322304, "grad_norm": 1.4063557386398315, "learning_rate": 5.1453446641214854e-05, "loss": 1.5275, "step": 4689 }, { "epoch": 0.6718235209855321, "grad_norm": 1.1688284873962402, "learning_rate": 5.141288835282092e-05, "loss": 1.4224, "step": 4690 }, { "epoch": 0.671966766938834, "grad_norm": 1.2405600547790527, "learning_rate": 5.137234052402977e-05, "loss": 1.2923, "step": 4691 }, { "epoch": 0.6721100128921358, "grad_norm": 1.1342674493789673, "learning_rate": 5.133180316357036e-05, "loss": 1.273, "step": 4692 }, { "epoch": 0.6722532588454376, "grad_norm": 1.1593042612075806, "learning_rate": 5.129127628016941e-05, "loss": 1.4585, "step": 4693 }, { "epoch": 0.6723965047987395, "grad_norm": 1.1867427825927734, "learning_rate": 5.1250759882551284e-05, "loss": 1.354, "step": 4694 }, { "epoch": 0.6725397507520413, "grad_norm": 1.3010904788970947, "learning_rate": 5.1210253979438174e-05, "loss": 1.2385, "step": 4695 }, { "epoch": 0.672682996705343, "grad_norm": 1.0134435892105103, "learning_rate": 5.1169758579550084e-05, "loss": 1.4905, "step": 4696 }, { "epoch": 0.6728262426586449, "grad_norm": 0.9926326274871826, "learning_rate": 5.112927369160458e-05, "loss": 1.4794, "step": 4697 }, { "epoch": 0.6729694886119467, "grad_norm": 1.220411777496338, "learning_rate": 5.108879932431709e-05, "loss": 1.3788, "step": 4698 }, { "epoch": 0.6731127345652486, "grad_norm": 1.1191716194152832, "learning_rate": 5.104833548640074e-05, "loss": 1.2847, "step": 4699 }, { "epoch": 0.6732559805185504, "grad_norm": 0.9757780432701111, "learning_rate": 5.10078821865664e-05, "loss": 1.3831, "step": 4700 }, { "epoch": 0.6733992264718521, "grad_norm": 1.2373939752578735, "learning_rate": 5.096743943352269e-05, "loss": 1.4552, "step": 4701 }, { "epoch": 0.673542472425154, "grad_norm": 1.2530090808868408, "learning_rate": 5.092700723597583e-05, "loss": 1.4199, "step": 4702 }, { "epoch": 0.6736857183784558, "grad_norm": 1.0883584022521973, "learning_rate": 5.088658560262998e-05, "loss": 1.4265, "step": 4703 }, { "epoch": 0.6738289643317577, "grad_norm": 1.4465304613113403, "learning_rate": 5.084617454218692e-05, "loss": 1.6174, "step": 4704 }, { "epoch": 0.6739722102850595, "grad_norm": 1.1673014163970947, "learning_rate": 5.080577406334607e-05, "loss": 1.6545, "step": 4705 }, { "epoch": 0.6741154562383612, "grad_norm": 1.3463194370269775, "learning_rate": 5.076538417480465e-05, "loss": 1.4105, "step": 4706 }, { "epoch": 0.6742587021916631, "grad_norm": 1.1564925909042358, "learning_rate": 5.072500488525775e-05, "loss": 1.4162, "step": 4707 }, { "epoch": 0.6744019481449649, "grad_norm": 0.9265849590301514, "learning_rate": 5.068463620339787e-05, "loss": 1.3756, "step": 4708 }, { "epoch": 0.6745451940982667, "grad_norm": 1.1846328973770142, "learning_rate": 5.064427813791547e-05, "loss": 1.7668, "step": 4709 }, { "epoch": 0.6746884400515686, "grad_norm": 1.0400694608688354, "learning_rate": 5.0603930697498605e-05, "loss": 1.3133, "step": 4710 }, { "epoch": 0.6748316860048703, "grad_norm": 1.3465063571929932, "learning_rate": 5.0563593890833115e-05, "loss": 1.4712, "step": 4711 }, { "epoch": 0.6749749319581722, "grad_norm": 1.3169689178466797, "learning_rate": 5.052326772660252e-05, "loss": 1.4627, "step": 4712 }, { "epoch": 0.675118177911474, "grad_norm": 1.046216607093811, "learning_rate": 5.048295221348795e-05, "loss": 1.4191, "step": 4713 }, { "epoch": 0.6752614238647758, "grad_norm": 1.2996163368225098, "learning_rate": 5.044264736016846e-05, "loss": 1.4226, "step": 4714 }, { "epoch": 0.6754046698180777, "grad_norm": 1.1588609218597412, "learning_rate": 5.040235317532067e-05, "loss": 1.299, "step": 4715 }, { "epoch": 0.6755479157713794, "grad_norm": 1.1012383699417114, "learning_rate": 5.036206966761887e-05, "loss": 1.3635, "step": 4716 }, { "epoch": 0.6756911617246812, "grad_norm": 1.2760194540023804, "learning_rate": 5.03217968457351e-05, "loss": 1.4756, "step": 4717 }, { "epoch": 0.6758344076779831, "grad_norm": 1.1731791496276855, "learning_rate": 5.028153471833914e-05, "loss": 1.4501, "step": 4718 }, { "epoch": 0.6759776536312849, "grad_norm": 1.090256690979004, "learning_rate": 5.024128329409841e-05, "loss": 1.5094, "step": 4719 }, { "epoch": 0.6761208995845868, "grad_norm": 1.1733123064041138, "learning_rate": 5.0201042581678034e-05, "loss": 1.2859, "step": 4720 }, { "epoch": 0.6762641455378886, "grad_norm": 1.1697572469711304, "learning_rate": 5.016081258974088e-05, "loss": 1.3596, "step": 4721 }, { "epoch": 0.6764073914911903, "grad_norm": 1.2339195013046265, "learning_rate": 5.012059332694747e-05, "loss": 1.4655, "step": 4722 }, { "epoch": 0.6765506374444922, "grad_norm": 1.0706316232681274, "learning_rate": 5.008038480195596e-05, "loss": 1.4339, "step": 4723 }, { "epoch": 0.676693883397794, "grad_norm": 1.2210077047348022, "learning_rate": 5.0040187023422237e-05, "loss": 1.2749, "step": 4724 }, { "epoch": 0.6768371293510959, "grad_norm": 1.2067290544509888, "learning_rate": 5.000000000000002e-05, "loss": 1.4499, "step": 4725 }, { "epoch": 0.6769803753043977, "grad_norm": 1.183017611503601, "learning_rate": 4.995982374034046e-05, "loss": 1.4288, "step": 4726 }, { "epoch": 0.6771236212576994, "grad_norm": 1.2002090215682983, "learning_rate": 4.991965825309254e-05, "loss": 1.4644, "step": 4727 }, { "epoch": 0.6772668672110013, "grad_norm": 1.1189401149749756, "learning_rate": 4.9879503546902906e-05, "loss": 1.4164, "step": 4728 }, { "epoch": 0.6774101131643031, "grad_norm": 1.1736527681350708, "learning_rate": 4.983935963041586e-05, "loss": 1.3107, "step": 4729 }, { "epoch": 0.677553359117605, "grad_norm": 1.042734146118164, "learning_rate": 4.979922651227346e-05, "loss": 1.4285, "step": 4730 }, { "epoch": 0.6776966050709068, "grad_norm": 1.1893593072891235, "learning_rate": 4.9759104201115214e-05, "loss": 1.4026, "step": 4731 }, { "epoch": 0.6778398510242085, "grad_norm": 0.9894520044326782, "learning_rate": 4.9718992705578615e-05, "loss": 1.2148, "step": 4732 }, { "epoch": 0.6779830969775104, "grad_norm": 1.173743724822998, "learning_rate": 4.967889203429866e-05, "loss": 1.3649, "step": 4733 }, { "epoch": 0.6781263429308122, "grad_norm": 0.9450235366821289, "learning_rate": 4.9638802195907976e-05, "loss": 1.3488, "step": 4734 }, { "epoch": 0.678269588884114, "grad_norm": 1.1506727933883667, "learning_rate": 4.9598723199036865e-05, "loss": 1.3627, "step": 4735 }, { "epoch": 0.6784128348374159, "grad_norm": 1.2022823095321655, "learning_rate": 4.955865505231351e-05, "loss": 1.5642, "step": 4736 }, { "epoch": 0.6785560807907176, "grad_norm": 1.2086657285690308, "learning_rate": 4.951859776436345e-05, "loss": 1.4472, "step": 4737 }, { "epoch": 0.6786993267440194, "grad_norm": 1.000524878501892, "learning_rate": 4.947855134381007e-05, "loss": 1.4375, "step": 4738 }, { "epoch": 0.6788425726973213, "grad_norm": 1.175370454788208, "learning_rate": 4.943851579927436e-05, "loss": 1.316, "step": 4739 }, { "epoch": 0.6789858186506231, "grad_norm": 1.283309817314148, "learning_rate": 4.9398491139375016e-05, "loss": 1.5281, "step": 4740 }, { "epoch": 0.679129064603925, "grad_norm": 1.0577925443649292, "learning_rate": 4.9358477372728326e-05, "loss": 1.4472, "step": 4741 }, { "epoch": 0.6792723105572268, "grad_norm": 1.1821345090866089, "learning_rate": 4.931847450794828e-05, "loss": 1.3876, "step": 4742 }, { "epoch": 0.6794155565105285, "grad_norm": 1.058488368988037, "learning_rate": 4.9278482553646535e-05, "loss": 1.5706, "step": 4743 }, { "epoch": 0.6795588024638304, "grad_norm": 1.065329909324646, "learning_rate": 4.923850151843229e-05, "loss": 1.4385, "step": 4744 }, { "epoch": 0.6797020484171322, "grad_norm": 1.2394007444381714, "learning_rate": 4.9198531410912486e-05, "loss": 1.6193, "step": 4745 }, { "epoch": 0.6798452943704341, "grad_norm": 1.0929758548736572, "learning_rate": 4.91585722396918e-05, "loss": 1.352, "step": 4746 }, { "epoch": 0.6799885403237359, "grad_norm": 1.3069825172424316, "learning_rate": 4.911862401337234e-05, "loss": 1.5623, "step": 4747 }, { "epoch": 0.6801317862770376, "grad_norm": 1.2225638628005981, "learning_rate": 4.907868674055401e-05, "loss": 1.3576, "step": 4748 }, { "epoch": 0.6802750322303395, "grad_norm": 1.2815358638763428, "learning_rate": 4.903876042983433e-05, "loss": 1.2937, "step": 4749 }, { "epoch": 0.6804182781836413, "grad_norm": 1.2711007595062256, "learning_rate": 4.899884508980843e-05, "loss": 1.375, "step": 4750 }, { "epoch": 0.6805615241369432, "grad_norm": 1.2203829288482666, "learning_rate": 4.8958940729069134e-05, "loss": 1.5317, "step": 4751 }, { "epoch": 0.680704770090245, "grad_norm": 1.2992411851882935, "learning_rate": 4.891904735620675e-05, "loss": 1.3877, "step": 4752 }, { "epoch": 0.6808480160435467, "grad_norm": 1.0130432844161987, "learning_rate": 4.8879164979809454e-05, "loss": 1.4383, "step": 4753 }, { "epoch": 0.6809912619968486, "grad_norm": 1.051542043685913, "learning_rate": 4.883929360846293e-05, "loss": 1.4036, "step": 4754 }, { "epoch": 0.6811345079501504, "grad_norm": 0.9251922369003296, "learning_rate": 4.8799433250750425e-05, "loss": 1.5412, "step": 4755 }, { "epoch": 0.6812777539034522, "grad_norm": 1.1594884395599365, "learning_rate": 4.875958391525289e-05, "loss": 1.403, "step": 4756 }, { "epoch": 0.6814209998567541, "grad_norm": 1.365166425704956, "learning_rate": 4.871974561054901e-05, "loss": 1.5097, "step": 4757 }, { "epoch": 0.6815642458100558, "grad_norm": 1.2200548648834229, "learning_rate": 4.867991834521487e-05, "loss": 1.4187, "step": 4758 }, { "epoch": 0.6817074917633577, "grad_norm": 1.2967957258224487, "learning_rate": 4.864010212782434e-05, "loss": 1.3663, "step": 4759 }, { "epoch": 0.6818507377166595, "grad_norm": 1.0779865980148315, "learning_rate": 4.860029696694886e-05, "loss": 1.4694, "step": 4760 }, { "epoch": 0.6819939836699613, "grad_norm": 0.9485265612602234, "learning_rate": 4.8560502871157485e-05, "loss": 1.2669, "step": 4761 }, { "epoch": 0.6821372296232632, "grad_norm": 1.1545518636703491, "learning_rate": 4.852071984901696e-05, "loss": 1.3147, "step": 4762 }, { "epoch": 0.682280475576565, "grad_norm": 1.130214810371399, "learning_rate": 4.8480947909091454e-05, "loss": 1.4383, "step": 4763 }, { "epoch": 0.6824237215298667, "grad_norm": 1.0748084783554077, "learning_rate": 4.8441187059943015e-05, "loss": 1.5168, "step": 4764 }, { "epoch": 0.6825669674831686, "grad_norm": 1.2678245306015015, "learning_rate": 4.840143731013115e-05, "loss": 1.4784, "step": 4765 }, { "epoch": 0.6827102134364704, "grad_norm": 1.0654397010803223, "learning_rate": 4.836169866821293e-05, "loss": 1.348, "step": 4766 }, { "epoch": 0.6828534593897723, "grad_norm": 0.9968636631965637, "learning_rate": 4.832197114274314e-05, "loss": 1.5085, "step": 4767 }, { "epoch": 0.6829967053430741, "grad_norm": 1.2067439556121826, "learning_rate": 4.8282254742274126e-05, "loss": 1.404, "step": 4768 }, { "epoch": 0.6831399512963758, "grad_norm": 1.1469359397888184, "learning_rate": 4.8242549475355844e-05, "loss": 1.5049, "step": 4769 }, { "epoch": 0.6832831972496777, "grad_norm": 1.07965886592865, "learning_rate": 4.820285535053588e-05, "loss": 1.3586, "step": 4770 }, { "epoch": 0.6834264432029795, "grad_norm": 1.198075294494629, "learning_rate": 4.816317237635937e-05, "loss": 1.376, "step": 4771 }, { "epoch": 0.6835696891562814, "grad_norm": 1.1456480026245117, "learning_rate": 4.8123500561369115e-05, "loss": 1.3614, "step": 4772 }, { "epoch": 0.6837129351095832, "grad_norm": 1.25392484664917, "learning_rate": 4.808383991410542e-05, "loss": 1.5231, "step": 4773 }, { "epoch": 0.6838561810628849, "grad_norm": 1.5404049158096313, "learning_rate": 4.804419044310622e-05, "loss": 1.3265, "step": 4774 }, { "epoch": 0.6839994270161868, "grad_norm": 1.1292073726654053, "learning_rate": 4.800455215690719e-05, "loss": 1.4286, "step": 4775 }, { "epoch": 0.6841426729694886, "grad_norm": 1.0415793657302856, "learning_rate": 4.7964925064041356e-05, "loss": 1.3825, "step": 4776 }, { "epoch": 0.6842859189227904, "grad_norm": 1.2570927143096924, "learning_rate": 4.792530917303948e-05, "loss": 1.4154, "step": 4777 }, { "epoch": 0.6844291648760923, "grad_norm": 1.139022707939148, "learning_rate": 4.7885704492429894e-05, "loss": 1.5012, "step": 4778 }, { "epoch": 0.684572410829394, "grad_norm": 1.0863858461380005, "learning_rate": 4.7846111030738506e-05, "loss": 1.5434, "step": 4779 }, { "epoch": 0.6847156567826959, "grad_norm": 1.1278661489486694, "learning_rate": 4.780652879648884e-05, "loss": 1.35, "step": 4780 }, { "epoch": 0.6848589027359977, "grad_norm": 1.1405705213546753, "learning_rate": 4.776695779820186e-05, "loss": 1.4563, "step": 4781 }, { "epoch": 0.6850021486892995, "grad_norm": 1.1120787858963013, "learning_rate": 4.772739804439633e-05, "loss": 1.4489, "step": 4782 }, { "epoch": 0.6851453946426014, "grad_norm": 1.2349562644958496, "learning_rate": 4.7687849543588505e-05, "loss": 1.3859, "step": 4783 }, { "epoch": 0.6852886405959032, "grad_norm": 1.1682991981506348, "learning_rate": 4.764831230429211e-05, "loss": 1.3659, "step": 4784 }, { "epoch": 0.685431886549205, "grad_norm": 1.016930341720581, "learning_rate": 4.760878633501853e-05, "loss": 1.5746, "step": 4785 }, { "epoch": 0.6855751325025068, "grad_norm": 1.2348135709762573, "learning_rate": 4.756927164427685e-05, "loss": 1.4854, "step": 4786 }, { "epoch": 0.6857183784558086, "grad_norm": 0.9982423186302185, "learning_rate": 4.752976824057348e-05, "loss": 1.4244, "step": 4787 }, { "epoch": 0.6858616244091105, "grad_norm": 0.9760091304779053, "learning_rate": 4.7490276132412584e-05, "loss": 1.1865, "step": 4788 }, { "epoch": 0.6860048703624123, "grad_norm": 1.0857889652252197, "learning_rate": 4.745079532829582e-05, "loss": 1.4926, "step": 4789 }, { "epoch": 0.686148116315714, "grad_norm": 1.1714333295822144, "learning_rate": 4.741132583672242e-05, "loss": 1.0294, "step": 4790 }, { "epoch": 0.6862913622690159, "grad_norm": 1.0968866348266602, "learning_rate": 4.737186766618924e-05, "loss": 1.4282, "step": 4791 }, { "epoch": 0.6864346082223177, "grad_norm": 1.390229344367981, "learning_rate": 4.733242082519054e-05, "loss": 1.5485, "step": 4792 }, { "epoch": 0.6865778541756196, "grad_norm": 1.2034766674041748, "learning_rate": 4.7292985322218355e-05, "loss": 1.4075, "step": 4793 }, { "epoch": 0.6867211001289214, "grad_norm": 1.2084518671035767, "learning_rate": 4.7253561165762163e-05, "loss": 1.4313, "step": 4794 }, { "epoch": 0.6868643460822231, "grad_norm": 0.9785750508308411, "learning_rate": 4.721414836430894e-05, "loss": 1.5215, "step": 4795 }, { "epoch": 0.687007592035525, "grad_norm": 1.1656543016433716, "learning_rate": 4.717474692634334e-05, "loss": 1.3589, "step": 4796 }, { "epoch": 0.6871508379888268, "grad_norm": 1.2332234382629395, "learning_rate": 4.713535686034749e-05, "loss": 1.5626, "step": 4797 }, { "epoch": 0.6872940839421287, "grad_norm": 1.1107747554779053, "learning_rate": 4.709597817480112e-05, "loss": 1.3971, "step": 4798 }, { "epoch": 0.6874373298954305, "grad_norm": 1.3085471391677856, "learning_rate": 4.7056610878181486e-05, "loss": 1.4617, "step": 4799 }, { "epoch": 0.6875805758487322, "grad_norm": 1.2864677906036377, "learning_rate": 4.7017254978963366e-05, "loss": 1.3318, "step": 4800 }, { "epoch": 0.6877238218020341, "grad_norm": 1.077457308769226, "learning_rate": 4.697791048561918e-05, "loss": 1.4553, "step": 4801 }, { "epoch": 0.6878670677553359, "grad_norm": 1.08990478515625, "learning_rate": 4.693857740661869e-05, "loss": 1.1497, "step": 4802 }, { "epoch": 0.6880103137086377, "grad_norm": 1.2435383796691895, "learning_rate": 4.689925575042946e-05, "loss": 1.5489, "step": 4803 }, { "epoch": 0.6881535596619396, "grad_norm": 1.2475719451904297, "learning_rate": 4.685994552551647e-05, "loss": 1.4437, "step": 4804 }, { "epoch": 0.6882968056152414, "grad_norm": 1.1075929403305054, "learning_rate": 4.682064674034216e-05, "loss": 1.344, "step": 4805 }, { "epoch": 0.6884400515685432, "grad_norm": 1.0733587741851807, "learning_rate": 4.6781359403366576e-05, "loss": 1.2852, "step": 4806 }, { "epoch": 0.688583297521845, "grad_norm": 1.1062713861465454, "learning_rate": 4.6742083523047434e-05, "loss": 1.6158, "step": 4807 }, { "epoch": 0.6887265434751468, "grad_norm": 1.1850636005401611, "learning_rate": 4.670281910783974e-05, "loss": 1.3247, "step": 4808 }, { "epoch": 0.6888697894284487, "grad_norm": 0.996476411819458, "learning_rate": 4.666356616619619e-05, "loss": 1.3747, "step": 4809 }, { "epoch": 0.6890130353817505, "grad_norm": 1.4157705307006836, "learning_rate": 4.662432470656698e-05, "loss": 1.4367, "step": 4810 }, { "epoch": 0.6891562813350522, "grad_norm": 1.0638737678527832, "learning_rate": 4.658509473739981e-05, "loss": 1.3942, "step": 4811 }, { "epoch": 0.6892995272883541, "grad_norm": 0.9203993678092957, "learning_rate": 4.6545876267139974e-05, "loss": 1.4334, "step": 4812 }, { "epoch": 0.6894427732416559, "grad_norm": 1.0290254354476929, "learning_rate": 4.650666930423012e-05, "loss": 1.3251, "step": 4813 }, { "epoch": 0.6895860191949578, "grad_norm": 1.2874394655227661, "learning_rate": 4.646747385711064e-05, "loss": 1.4082, "step": 4814 }, { "epoch": 0.6897292651482596, "grad_norm": 1.1400178670883179, "learning_rate": 4.642828993421936e-05, "loss": 1.4999, "step": 4815 }, { "epoch": 0.6898725111015613, "grad_norm": 1.1018484830856323, "learning_rate": 4.638911754399152e-05, "loss": 1.3538, "step": 4816 }, { "epoch": 0.6900157570548632, "grad_norm": 1.0352429151535034, "learning_rate": 4.634995669486001e-05, "loss": 1.1642, "step": 4817 }, { "epoch": 0.690159003008165, "grad_norm": 1.030706524848938, "learning_rate": 4.631080739525518e-05, "loss": 1.3426, "step": 4818 }, { "epoch": 0.6903022489614669, "grad_norm": 1.1398271322250366, "learning_rate": 4.6271669653604945e-05, "loss": 1.3754, "step": 4819 }, { "epoch": 0.6904454949147687, "grad_norm": 1.1331428289413452, "learning_rate": 4.623254347833464e-05, "loss": 1.3415, "step": 4820 }, { "epoch": 0.6905887408680704, "grad_norm": 1.0317420959472656, "learning_rate": 4.61934288778672e-05, "loss": 1.4098, "step": 4821 }, { "epoch": 0.6907319868213723, "grad_norm": 1.0121934413909912, "learning_rate": 4.6154325860623037e-05, "loss": 1.3769, "step": 4822 }, { "epoch": 0.6908752327746741, "grad_norm": 1.267476201057434, "learning_rate": 4.6115234435020016e-05, "loss": 1.5656, "step": 4823 }, { "epoch": 0.691018478727976, "grad_norm": 1.2096225023269653, "learning_rate": 4.607615460947354e-05, "loss": 1.4495, "step": 4824 }, { "epoch": 0.6911617246812778, "grad_norm": 1.1023597717285156, "learning_rate": 4.6037086392396654e-05, "loss": 1.5072, "step": 4825 }, { "epoch": 0.6913049706345795, "grad_norm": 1.0012470483779907, "learning_rate": 4.599802979219965e-05, "loss": 1.4742, "step": 4826 }, { "epoch": 0.6914482165878814, "grad_norm": 1.066522479057312, "learning_rate": 4.5958984817290484e-05, "loss": 1.3822, "step": 4827 }, { "epoch": 0.6915914625411832, "grad_norm": 1.4842320680618286, "learning_rate": 4.5919951476074583e-05, "loss": 1.428, "step": 4828 }, { "epoch": 0.691734708494485, "grad_norm": 1.083647608757019, "learning_rate": 4.588092977695485e-05, "loss": 1.4243, "step": 4829 }, { "epoch": 0.6918779544477869, "grad_norm": 1.1569230556488037, "learning_rate": 4.584191972833175e-05, "loss": 1.4927, "step": 4830 }, { "epoch": 0.6920212004010887, "grad_norm": 1.0714370012283325, "learning_rate": 4.580292133860303e-05, "loss": 1.5418, "step": 4831 }, { "epoch": 0.6921644463543905, "grad_norm": 1.3244935274124146, "learning_rate": 4.576393461616424e-05, "loss": 1.4996, "step": 4832 }, { "epoch": 0.6923076923076923, "grad_norm": 1.041006326675415, "learning_rate": 4.572495956940821e-05, "loss": 1.371, "step": 4833 }, { "epoch": 0.6924509382609941, "grad_norm": 1.0931391716003418, "learning_rate": 4.5685996206725253e-05, "loss": 1.4855, "step": 4834 }, { "epoch": 0.692594184214296, "grad_norm": 1.3896008729934692, "learning_rate": 4.564704453650323e-05, "loss": 1.5538, "step": 4835 }, { "epoch": 0.6927374301675978, "grad_norm": 1.0022923946380615, "learning_rate": 4.560810456712754e-05, "loss": 1.43, "step": 4836 }, { "epoch": 0.6928806761208995, "grad_norm": 1.258270263671875, "learning_rate": 4.556917630698092e-05, "loss": 1.2392, "step": 4837 }, { "epoch": 0.6930239220742014, "grad_norm": 1.156607747077942, "learning_rate": 4.553025976444369e-05, "loss": 1.4572, "step": 4838 }, { "epoch": 0.6931671680275032, "grad_norm": 1.2251403331756592, "learning_rate": 4.5491354947893616e-05, "loss": 1.3823, "step": 4839 }, { "epoch": 0.6933104139808051, "grad_norm": 0.9343545436859131, "learning_rate": 4.545246186570594e-05, "loss": 1.5248, "step": 4840 }, { "epoch": 0.6934536599341069, "grad_norm": 0.9937069416046143, "learning_rate": 4.541358052625342e-05, "loss": 1.3882, "step": 4841 }, { "epoch": 0.6935969058874086, "grad_norm": 1.0542631149291992, "learning_rate": 4.537471093790614e-05, "loss": 1.394, "step": 4842 }, { "epoch": 0.6937401518407105, "grad_norm": 1.275731086730957, "learning_rate": 4.5335853109031855e-05, "loss": 1.4517, "step": 4843 }, { "epoch": 0.6938833977940123, "grad_norm": 0.9734557271003723, "learning_rate": 4.529700704799571e-05, "loss": 1.3354, "step": 4844 }, { "epoch": 0.6940266437473142, "grad_norm": 1.2689855098724365, "learning_rate": 4.525817276316022e-05, "loss": 1.7076, "step": 4845 }, { "epoch": 0.694169889700616, "grad_norm": 1.2597824335098267, "learning_rate": 4.5219350262885484e-05, "loss": 1.579, "step": 4846 }, { "epoch": 0.6943131356539177, "grad_norm": 1.380022644996643, "learning_rate": 4.518053955552903e-05, "loss": 1.2647, "step": 4847 }, { "epoch": 0.6944563816072196, "grad_norm": 1.0374553203582764, "learning_rate": 4.514174064944584e-05, "loss": 1.2531, "step": 4848 }, { "epoch": 0.6945996275605214, "grad_norm": 1.2924906015396118, "learning_rate": 4.510295355298835e-05, "loss": 1.4053, "step": 4849 }, { "epoch": 0.6947428735138232, "grad_norm": 1.1516618728637695, "learning_rate": 4.506417827450647e-05, "loss": 1.1458, "step": 4850 }, { "epoch": 0.6948861194671251, "grad_norm": 1.0579884052276611, "learning_rate": 4.50254148223476e-05, "loss": 1.3647, "step": 4851 }, { "epoch": 0.6950293654204269, "grad_norm": 1.0875803232192993, "learning_rate": 4.498666320485647e-05, "loss": 1.6354, "step": 4852 }, { "epoch": 0.6951726113737287, "grad_norm": 1.0230839252471924, "learning_rate": 4.494792343037535e-05, "loss": 1.4246, "step": 4853 }, { "epoch": 0.6953158573270305, "grad_norm": 1.232643485069275, "learning_rate": 4.490919550724405e-05, "loss": 1.3813, "step": 4854 }, { "epoch": 0.6954591032803323, "grad_norm": 1.0915628671646118, "learning_rate": 4.487047944379964e-05, "loss": 1.4397, "step": 4855 }, { "epoch": 0.6956023492336342, "grad_norm": 1.0719890594482422, "learning_rate": 4.4831775248376764e-05, "loss": 1.4162, "step": 4856 }, { "epoch": 0.695745595186936, "grad_norm": 1.1589277982711792, "learning_rate": 4.4793082929307474e-05, "loss": 1.3122, "step": 4857 }, { "epoch": 0.6958888411402377, "grad_norm": 1.2044216394424438, "learning_rate": 4.4754402494921275e-05, "loss": 1.3349, "step": 4858 }, { "epoch": 0.6960320870935396, "grad_norm": 1.0755696296691895, "learning_rate": 4.471573395354514e-05, "loss": 1.471, "step": 4859 }, { "epoch": 0.6961753330468414, "grad_norm": 1.2005761861801147, "learning_rate": 4.467707731350332e-05, "loss": 1.4826, "step": 4860 }, { "epoch": 0.6963185790001433, "grad_norm": 1.0596221685409546, "learning_rate": 4.4638432583117786e-05, "loss": 1.3858, "step": 4861 }, { "epoch": 0.6964618249534451, "grad_norm": 1.1816372871398926, "learning_rate": 4.4599799770707754e-05, "loss": 1.3938, "step": 4862 }, { "epoch": 0.6966050709067468, "grad_norm": 1.2724205255508423, "learning_rate": 4.4561178884589815e-05, "loss": 1.4806, "step": 4863 }, { "epoch": 0.6967483168600487, "grad_norm": 1.0089033842086792, "learning_rate": 4.452256993307821e-05, "loss": 1.5523, "step": 4864 }, { "epoch": 0.6968915628133505, "grad_norm": 1.0715930461883545, "learning_rate": 4.4483972924484474e-05, "loss": 1.4398, "step": 4865 }, { "epoch": 0.6970348087666524, "grad_norm": 1.283207893371582, "learning_rate": 4.4445387867117526e-05, "loss": 1.538, "step": 4866 }, { "epoch": 0.6971780547199542, "grad_norm": 0.952569305896759, "learning_rate": 4.44068147692838e-05, "loss": 1.3304, "step": 4867 }, { "epoch": 0.6973213006732559, "grad_norm": 1.0312291383743286, "learning_rate": 4.436825363928714e-05, "loss": 1.4213, "step": 4868 }, { "epoch": 0.6974645466265578, "grad_norm": 0.9561575055122375, "learning_rate": 4.43297044854288e-05, "loss": 1.4941, "step": 4869 }, { "epoch": 0.6976077925798596, "grad_norm": 1.0977745056152344, "learning_rate": 4.4291167316007464e-05, "loss": 1.3523, "step": 4870 }, { "epoch": 0.6977510385331614, "grad_norm": 1.073885202407837, "learning_rate": 4.4252642139319226e-05, "loss": 1.2422, "step": 4871 }, { "epoch": 0.6978942844864633, "grad_norm": 1.1924763917922974, "learning_rate": 4.421412896365763e-05, "loss": 1.6215, "step": 4872 }, { "epoch": 0.6980375304397651, "grad_norm": 1.1526187658309937, "learning_rate": 4.417562779731355e-05, "loss": 1.3518, "step": 4873 }, { "epoch": 0.6981807763930669, "grad_norm": 1.1373279094696045, "learning_rate": 4.413713864857533e-05, "loss": 1.5773, "step": 4874 }, { "epoch": 0.6983240223463687, "grad_norm": 1.1128966808319092, "learning_rate": 4.409866152572883e-05, "loss": 1.4571, "step": 4875 }, { "epoch": 0.6984672682996705, "grad_norm": 1.1714192628860474, "learning_rate": 4.406019643705715e-05, "loss": 1.485, "step": 4876 }, { "epoch": 0.6986105142529724, "grad_norm": 1.0907427072525024, "learning_rate": 4.4021743390840866e-05, "loss": 1.5718, "step": 4877 }, { "epoch": 0.6987537602062742, "grad_norm": 1.1447240114212036, "learning_rate": 4.398330239535797e-05, "loss": 1.5541, "step": 4878 }, { "epoch": 0.698897006159576, "grad_norm": 1.1853219270706177, "learning_rate": 4.3944873458883887e-05, "loss": 1.3359, "step": 4879 }, { "epoch": 0.6990402521128778, "grad_norm": 1.0839258432388306, "learning_rate": 4.390645658969143e-05, "loss": 1.3042, "step": 4880 }, { "epoch": 0.6991834980661796, "grad_norm": 1.4131336212158203, "learning_rate": 4.3868051796050705e-05, "loss": 1.4921, "step": 4881 }, { "epoch": 0.6993267440194815, "grad_norm": 1.169594168663025, "learning_rate": 4.3829659086229403e-05, "loss": 1.5097, "step": 4882 }, { "epoch": 0.6994699899727833, "grad_norm": 1.1265010833740234, "learning_rate": 4.379127846849255e-05, "loss": 1.2754, "step": 4883 }, { "epoch": 0.699613235926085, "grad_norm": 1.1865190267562866, "learning_rate": 4.375290995110244e-05, "loss": 1.3134, "step": 4884 }, { "epoch": 0.6997564818793869, "grad_norm": 1.2389074563980103, "learning_rate": 4.371455354231889e-05, "loss": 1.3776, "step": 4885 }, { "epoch": 0.6998997278326887, "grad_norm": 1.1422584056854248, "learning_rate": 4.3676209250399194e-05, "loss": 1.5634, "step": 4886 }, { "epoch": 0.7000429737859906, "grad_norm": 1.2955741882324219, "learning_rate": 4.36378770835978e-05, "loss": 1.5391, "step": 4887 }, { "epoch": 0.7001862197392924, "grad_norm": 1.270967721939087, "learning_rate": 4.359955705016673e-05, "loss": 1.5095, "step": 4888 }, { "epoch": 0.7003294656925941, "grad_norm": 1.3086878061294556, "learning_rate": 4.356124915835533e-05, "loss": 1.2246, "step": 4889 }, { "epoch": 0.700472711645896, "grad_norm": 1.0485615730285645, "learning_rate": 4.352295341641035e-05, "loss": 1.418, "step": 4890 }, { "epoch": 0.7006159575991978, "grad_norm": 1.3110233545303345, "learning_rate": 4.348466983257594e-05, "loss": 1.4166, "step": 4891 }, { "epoch": 0.7007592035524997, "grad_norm": 1.2107936143875122, "learning_rate": 4.34463984150935e-05, "loss": 1.4714, "step": 4892 }, { "epoch": 0.7009024495058015, "grad_norm": 1.2495006322860718, "learning_rate": 4.3408139172202044e-05, "loss": 1.529, "step": 4893 }, { "epoch": 0.7010456954591033, "grad_norm": 0.9875035881996155, "learning_rate": 4.336989211213782e-05, "loss": 1.3983, "step": 4894 }, { "epoch": 0.7011889414124051, "grad_norm": 1.1666017770767212, "learning_rate": 4.333165724313442e-05, "loss": 1.4197, "step": 4895 }, { "epoch": 0.7013321873657069, "grad_norm": 1.0390037298202515, "learning_rate": 4.329343457342289e-05, "loss": 1.3572, "step": 4896 }, { "epoch": 0.7014754333190087, "grad_norm": 1.3812659978866577, "learning_rate": 4.325522411123162e-05, "loss": 1.211, "step": 4897 }, { "epoch": 0.7016186792723106, "grad_norm": 1.2809722423553467, "learning_rate": 4.321702586478639e-05, "loss": 1.4861, "step": 4898 }, { "epoch": 0.7017619252256124, "grad_norm": 1.0004661083221436, "learning_rate": 4.3178839842310326e-05, "loss": 1.3297, "step": 4899 }, { "epoch": 0.7019051711789142, "grad_norm": 1.0453087091445923, "learning_rate": 4.3140666052023936e-05, "loss": 1.3098, "step": 4900 }, { "epoch": 0.702048417132216, "grad_norm": 0.9832549095153809, "learning_rate": 4.3102504502145147e-05, "loss": 1.3052, "step": 4901 }, { "epoch": 0.7021916630855178, "grad_norm": 0.9752545952796936, "learning_rate": 4.3064355200889094e-05, "loss": 1.497, "step": 4902 }, { "epoch": 0.7023349090388197, "grad_norm": 1.254325032234192, "learning_rate": 4.302621815646839e-05, "loss": 1.4998, "step": 4903 }, { "epoch": 0.7024781549921215, "grad_norm": 1.0897290706634521, "learning_rate": 4.298809337709312e-05, "loss": 1.4262, "step": 4904 }, { "epoch": 0.7026214009454232, "grad_norm": 0.9794754981994629, "learning_rate": 4.294998087097048e-05, "loss": 1.1575, "step": 4905 }, { "epoch": 0.7027646468987251, "grad_norm": 1.1029446125030518, "learning_rate": 4.2911880646305184e-05, "loss": 1.369, "step": 4906 }, { "epoch": 0.7029078928520269, "grad_norm": 1.2487399578094482, "learning_rate": 4.2873792711299266e-05, "loss": 1.506, "step": 4907 }, { "epoch": 0.7030511388053288, "grad_norm": 1.0142292976379395, "learning_rate": 4.283571707415214e-05, "loss": 1.5871, "step": 4908 }, { "epoch": 0.7031943847586306, "grad_norm": 1.3917914628982544, "learning_rate": 4.279765374306055e-05, "loss": 1.4421, "step": 4909 }, { "epoch": 0.7033376307119323, "grad_norm": 1.0371735095977783, "learning_rate": 4.275960272621852e-05, "loss": 1.4366, "step": 4910 }, { "epoch": 0.7034808766652342, "grad_norm": 0.9551295638084412, "learning_rate": 4.272156403181756e-05, "loss": 1.3222, "step": 4911 }, { "epoch": 0.703624122618536, "grad_norm": 1.116145133972168, "learning_rate": 4.268353766804649e-05, "loss": 1.4393, "step": 4912 }, { "epoch": 0.7037673685718379, "grad_norm": 1.046194076538086, "learning_rate": 4.2645523643091354e-05, "loss": 1.2484, "step": 4913 }, { "epoch": 0.7039106145251397, "grad_norm": 1.2296277284622192, "learning_rate": 4.2607521965135645e-05, "loss": 1.5334, "step": 4914 }, { "epoch": 0.7040538604784414, "grad_norm": 1.1367355585098267, "learning_rate": 4.256953264236029e-05, "loss": 1.5345, "step": 4915 }, { "epoch": 0.7041971064317433, "grad_norm": 1.0450211763381958, "learning_rate": 4.253155568294333e-05, "loss": 1.3849, "step": 4916 }, { "epoch": 0.7043403523850451, "grad_norm": 1.2514803409576416, "learning_rate": 4.24935910950603e-05, "loss": 1.5526, "step": 4917 }, { "epoch": 0.704483598338347, "grad_norm": 1.421496868133545, "learning_rate": 4.245563888688405e-05, "loss": 1.546, "step": 4918 }, { "epoch": 0.7046268442916488, "grad_norm": 1.192635178565979, "learning_rate": 4.241769906658476e-05, "loss": 1.5755, "step": 4919 }, { "epoch": 0.7047700902449506, "grad_norm": 1.072604775428772, "learning_rate": 4.2379771642329944e-05, "loss": 1.5113, "step": 4920 }, { "epoch": 0.7049133361982524, "grad_norm": 1.194846272468567, "learning_rate": 4.234185662228435e-05, "loss": 1.3465, "step": 4921 }, { "epoch": 0.7050565821515542, "grad_norm": 1.1469340324401855, "learning_rate": 4.2303954014610235e-05, "loss": 1.4763, "step": 4922 }, { "epoch": 0.705199828104856, "grad_norm": 1.1629632711410522, "learning_rate": 4.226606382746711e-05, "loss": 1.3756, "step": 4923 }, { "epoch": 0.7053430740581579, "grad_norm": 1.093117117881775, "learning_rate": 4.2228186069011696e-05, "loss": 1.4884, "step": 4924 }, { "epoch": 0.7054863200114597, "grad_norm": 1.1291934251785278, "learning_rate": 4.2190320747398206e-05, "loss": 1.2404, "step": 4925 }, { "epoch": 0.7056295659647615, "grad_norm": 0.97789067029953, "learning_rate": 4.215246787077809e-05, "loss": 1.4671, "step": 4926 }, { "epoch": 0.7057728119180633, "grad_norm": 1.0043669939041138, "learning_rate": 4.2114627447300134e-05, "loss": 1.3758, "step": 4927 }, { "epoch": 0.7059160578713651, "grad_norm": 1.0834614038467407, "learning_rate": 4.2076799485110454e-05, "loss": 1.4235, "step": 4928 }, { "epoch": 0.706059303824667, "grad_norm": 1.0054686069488525, "learning_rate": 4.203898399235246e-05, "loss": 1.4387, "step": 4929 }, { "epoch": 0.7062025497779688, "grad_norm": 1.1352018117904663, "learning_rate": 4.200118097716694e-05, "loss": 1.5178, "step": 4930 }, { "epoch": 0.7063457957312705, "grad_norm": 1.2461295127868652, "learning_rate": 4.196339044769184e-05, "loss": 1.3716, "step": 4931 }, { "epoch": 0.7064890416845724, "grad_norm": 1.1491222381591797, "learning_rate": 4.192561241206262e-05, "loss": 1.4561, "step": 4932 }, { "epoch": 0.7066322876378742, "grad_norm": 1.2600260972976685, "learning_rate": 4.1887846878411975e-05, "loss": 1.5439, "step": 4933 }, { "epoch": 0.7067755335911761, "grad_norm": 1.0265192985534668, "learning_rate": 4.18500938548698e-05, "loss": 1.3935, "step": 4934 }, { "epoch": 0.7069187795444779, "grad_norm": 1.076937198638916, "learning_rate": 4.181235334956341e-05, "loss": 1.7463, "step": 4935 }, { "epoch": 0.7070620254977796, "grad_norm": 1.2180107831954956, "learning_rate": 4.1774625370617484e-05, "loss": 1.2209, "step": 4936 }, { "epoch": 0.7072052714510815, "grad_norm": 1.0773173570632935, "learning_rate": 4.173690992615382e-05, "loss": 1.5628, "step": 4937 }, { "epoch": 0.7073485174043833, "grad_norm": 1.116751790046692, "learning_rate": 4.1699207024291666e-05, "loss": 1.5277, "step": 4938 }, { "epoch": 0.7074917633576852, "grad_norm": 1.0961850881576538, "learning_rate": 4.1661516673147517e-05, "loss": 1.4639, "step": 4939 }, { "epoch": 0.707635009310987, "grad_norm": 1.1937569379806519, "learning_rate": 4.162383888083519e-05, "loss": 1.603, "step": 4940 }, { "epoch": 0.7077782552642888, "grad_norm": 1.0768812894821167, "learning_rate": 4.1586173655465785e-05, "loss": 1.6704, "step": 4941 }, { "epoch": 0.7079215012175906, "grad_norm": 1.1815319061279297, "learning_rate": 4.154852100514761e-05, "loss": 1.4643, "step": 4942 }, { "epoch": 0.7080647471708924, "grad_norm": 1.1142280101776123, "learning_rate": 4.151088093798644e-05, "loss": 1.3067, "step": 4943 }, { "epoch": 0.7082079931241942, "grad_norm": 1.0164415836334229, "learning_rate": 4.147325346208527e-05, "loss": 1.4199, "step": 4944 }, { "epoch": 0.7083512390774961, "grad_norm": 1.0798677206039429, "learning_rate": 4.143563858554428e-05, "loss": 1.3528, "step": 4945 }, { "epoch": 0.7084944850307979, "grad_norm": 1.295035481452942, "learning_rate": 4.1398036316461054e-05, "loss": 1.2348, "step": 4946 }, { "epoch": 0.7086377309840997, "grad_norm": 1.0006767511367798, "learning_rate": 4.136044666293044e-05, "loss": 1.2611, "step": 4947 }, { "epoch": 0.7087809769374015, "grad_norm": 1.216193675994873, "learning_rate": 4.132286963304456e-05, "loss": 1.4825, "step": 4948 }, { "epoch": 0.7089242228907033, "grad_norm": 1.374414086341858, "learning_rate": 4.1285305234892803e-05, "loss": 1.2038, "step": 4949 }, { "epoch": 0.7090674688440052, "grad_norm": 1.1293323040008545, "learning_rate": 4.124775347656187e-05, "loss": 1.3963, "step": 4950 }, { "epoch": 0.709210714797307, "grad_norm": 1.2127183675765991, "learning_rate": 4.121021436613576e-05, "loss": 1.2061, "step": 4951 }, { "epoch": 0.7093539607506087, "grad_norm": 1.2056610584259033, "learning_rate": 4.117268791169564e-05, "loss": 1.2384, "step": 4952 }, { "epoch": 0.7094972067039106, "grad_norm": 0.9564344882965088, "learning_rate": 4.113517412132003e-05, "loss": 1.4189, "step": 4953 }, { "epoch": 0.7096404526572124, "grad_norm": 1.0678762197494507, "learning_rate": 4.109767300308481e-05, "loss": 1.4843, "step": 4954 }, { "epoch": 0.7097836986105143, "grad_norm": 1.0629723072052002, "learning_rate": 4.1060184565062963e-05, "loss": 1.4938, "step": 4955 }, { "epoch": 0.7099269445638161, "grad_norm": 1.1597607135772705, "learning_rate": 4.102270881532485e-05, "loss": 1.3843, "step": 4956 }, { "epoch": 0.7100701905171178, "grad_norm": 1.0593912601470947, "learning_rate": 4.0985245761938064e-05, "loss": 1.3874, "step": 4957 }, { "epoch": 0.7102134364704197, "grad_norm": 1.0953495502471924, "learning_rate": 4.094779541296747e-05, "loss": 1.4738, "step": 4958 }, { "epoch": 0.7103566824237215, "grad_norm": 1.0574756860733032, "learning_rate": 4.091035777647525e-05, "loss": 1.3829, "step": 4959 }, { "epoch": 0.7104999283770234, "grad_norm": 1.0883156061172485, "learning_rate": 4.087293286052069e-05, "loss": 1.4248, "step": 4960 }, { "epoch": 0.7106431743303252, "grad_norm": 1.133220911026001, "learning_rate": 4.083552067316054e-05, "loss": 1.2206, "step": 4961 }, { "epoch": 0.710786420283627, "grad_norm": 1.292496919631958, "learning_rate": 4.079812122244874e-05, "loss": 1.2444, "step": 4962 }, { "epoch": 0.7109296662369288, "grad_norm": 1.0247219800949097, "learning_rate": 4.076073451643639e-05, "loss": 1.6158, "step": 4963 }, { "epoch": 0.7110729121902306, "grad_norm": 1.0615828037261963, "learning_rate": 4.0723360563171906e-05, "loss": 1.2178, "step": 4964 }, { "epoch": 0.7112161581435325, "grad_norm": 1.1727991104125977, "learning_rate": 4.068599937070111e-05, "loss": 1.4022, "step": 4965 }, { "epoch": 0.7113594040968343, "grad_norm": 1.0751651525497437, "learning_rate": 4.064865094706681e-05, "loss": 1.4948, "step": 4966 }, { "epoch": 0.7115026500501361, "grad_norm": 0.9361088871955872, "learning_rate": 4.0611315300309246e-05, "loss": 1.4569, "step": 4967 }, { "epoch": 0.7116458960034379, "grad_norm": 1.1843054294586182, "learning_rate": 4.0573992438465866e-05, "loss": 1.2892, "step": 4968 }, { "epoch": 0.7117891419567397, "grad_norm": 1.0246272087097168, "learning_rate": 4.053668236957134e-05, "loss": 1.333, "step": 4969 }, { "epoch": 0.7119323879100415, "grad_norm": 1.0828496217727661, "learning_rate": 4.049938510165765e-05, "loss": 1.3238, "step": 4970 }, { "epoch": 0.7120756338633434, "grad_norm": 1.1489287614822388, "learning_rate": 4.0462100642753856e-05, "loss": 1.4133, "step": 4971 }, { "epoch": 0.7122188798166452, "grad_norm": 0.8985934853553772, "learning_rate": 4.04248290008865e-05, "loss": 1.2514, "step": 4972 }, { "epoch": 0.712362125769947, "grad_norm": 1.2668442726135254, "learning_rate": 4.038757018407924e-05, "loss": 1.4026, "step": 4973 }, { "epoch": 0.7125053717232488, "grad_norm": 1.1732838153839111, "learning_rate": 4.035032420035291e-05, "loss": 1.4256, "step": 4974 }, { "epoch": 0.7126486176765506, "grad_norm": 1.1764147281646729, "learning_rate": 4.031309105772567e-05, "loss": 1.5322, "step": 4975 }, { "epoch": 0.7127918636298525, "grad_norm": 1.1898000240325928, "learning_rate": 4.027587076421291e-05, "loss": 1.3741, "step": 4976 }, { "epoch": 0.7129351095831543, "grad_norm": 1.004818320274353, "learning_rate": 4.023866332782723e-05, "loss": 1.4324, "step": 4977 }, { "epoch": 0.713078355536456, "grad_norm": 1.0467044115066528, "learning_rate": 4.0201468756578465e-05, "loss": 1.3984, "step": 4978 }, { "epoch": 0.7132216014897579, "grad_norm": 1.1990551948547363, "learning_rate": 4.01642870584737e-05, "loss": 1.4753, "step": 4979 }, { "epoch": 0.7133648474430597, "grad_norm": 1.4979761838912964, "learning_rate": 4.012711824151727e-05, "loss": 1.5478, "step": 4980 }, { "epoch": 0.7135080933963616, "grad_norm": 1.0831280946731567, "learning_rate": 4.008996231371062e-05, "loss": 1.5083, "step": 4981 }, { "epoch": 0.7136513393496634, "grad_norm": 1.0576773881912231, "learning_rate": 4.005281928305249e-05, "loss": 1.583, "step": 4982 }, { "epoch": 0.7137945853029652, "grad_norm": 1.0935578346252441, "learning_rate": 4.0015689157538994e-05, "loss": 1.5068, "step": 4983 }, { "epoch": 0.713937831256267, "grad_norm": 1.045615315437317, "learning_rate": 3.997857194516319e-05, "loss": 1.4952, "step": 4984 }, { "epoch": 0.7140810772095688, "grad_norm": 1.1952487230300903, "learning_rate": 3.994146765391557e-05, "loss": 1.4229, "step": 4985 }, { "epoch": 0.7142243231628707, "grad_norm": 1.2653799057006836, "learning_rate": 3.990437629178372e-05, "loss": 1.53, "step": 4986 }, { "epoch": 0.7143675691161725, "grad_norm": 1.0661966800689697, "learning_rate": 3.986729786675253e-05, "loss": 1.3021, "step": 4987 }, { "epoch": 0.7145108150694743, "grad_norm": 1.0226942300796509, "learning_rate": 3.983023238680406e-05, "loss": 1.2725, "step": 4988 }, { "epoch": 0.7146540610227761, "grad_norm": 1.2797305583953857, "learning_rate": 3.9793179859917585e-05, "loss": 1.4366, "step": 4989 }, { "epoch": 0.7147973069760779, "grad_norm": 1.089646577835083, "learning_rate": 3.97561402940696e-05, "loss": 1.3739, "step": 4990 }, { "epoch": 0.7149405529293797, "grad_norm": 1.0271741151809692, "learning_rate": 3.9719113697233835e-05, "loss": 1.3737, "step": 4991 }, { "epoch": 0.7150837988826816, "grad_norm": 1.269885778427124, "learning_rate": 3.96821000773811e-05, "loss": 1.4631, "step": 4992 }, { "epoch": 0.7152270448359834, "grad_norm": 1.228425145149231, "learning_rate": 3.964509944247962e-05, "loss": 1.6662, "step": 4993 }, { "epoch": 0.7153702907892852, "grad_norm": 1.0611827373504639, "learning_rate": 3.9608111800494715e-05, "loss": 1.5446, "step": 4994 }, { "epoch": 0.715513536742587, "grad_norm": 1.162035346031189, "learning_rate": 3.957113715938884e-05, "loss": 1.4841, "step": 4995 }, { "epoch": 0.7156567826958888, "grad_norm": 1.1959831714630127, "learning_rate": 3.953417552712174e-05, "loss": 1.4676, "step": 4996 }, { "epoch": 0.7158000286491907, "grad_norm": 1.1311061382293701, "learning_rate": 3.949722691165036e-05, "loss": 1.4306, "step": 4997 }, { "epoch": 0.7159432746024925, "grad_norm": 1.359912395477295, "learning_rate": 3.9460291320928825e-05, "loss": 1.4727, "step": 4998 }, { "epoch": 0.7160865205557942, "grad_norm": 1.0230799913406372, "learning_rate": 3.942336876290843e-05, "loss": 1.4541, "step": 4999 }, { "epoch": 0.7162297665090961, "grad_norm": 1.058834195137024, "learning_rate": 3.93864592455377e-05, "loss": 1.3178, "step": 5000 }, { "epoch": 0.7163730124623979, "grad_norm": 1.215747356414795, "learning_rate": 3.9349562776762385e-05, "loss": 1.2663, "step": 5001 }, { "epoch": 0.7165162584156998, "grad_norm": 1.3212130069732666, "learning_rate": 3.9312679364525294e-05, "loss": 1.5006, "step": 5002 }, { "epoch": 0.7166595043690016, "grad_norm": 1.1770306825637817, "learning_rate": 3.927580901676653e-05, "loss": 1.3749, "step": 5003 }, { "epoch": 0.7168027503223035, "grad_norm": 0.9677147269248962, "learning_rate": 3.923895174142347e-05, "loss": 1.3488, "step": 5004 }, { "epoch": 0.7169459962756052, "grad_norm": 1.1054186820983887, "learning_rate": 3.9202107546430456e-05, "loss": 1.4437, "step": 5005 }, { "epoch": 0.717089242228907, "grad_norm": 1.160576343536377, "learning_rate": 3.916527643971917e-05, "loss": 1.3579, "step": 5006 }, { "epoch": 0.7172324881822089, "grad_norm": 1.1056917905807495, "learning_rate": 3.912845842921844e-05, "loss": 1.3906, "step": 5007 }, { "epoch": 0.7173757341355107, "grad_norm": 0.9884079694747925, "learning_rate": 3.909165352285429e-05, "loss": 1.3534, "step": 5008 }, { "epoch": 0.7175189800888125, "grad_norm": 0.9287542700767517, "learning_rate": 3.905486172854991e-05, "loss": 1.4732, "step": 5009 }, { "epoch": 0.7176622260421143, "grad_norm": 1.3127312660217285, "learning_rate": 3.9018083054225564e-05, "loss": 1.3264, "step": 5010 }, { "epoch": 0.7178054719954161, "grad_norm": 1.0460777282714844, "learning_rate": 3.898131750779892e-05, "loss": 1.2867, "step": 5011 }, { "epoch": 0.717948717948718, "grad_norm": 1.0951322317123413, "learning_rate": 3.894456509718466e-05, "loss": 1.3572, "step": 5012 }, { "epoch": 0.7180919639020198, "grad_norm": 1.3226550817489624, "learning_rate": 3.890782583029462e-05, "loss": 1.525, "step": 5013 }, { "epoch": 0.7182352098553216, "grad_norm": 1.4739888906478882, "learning_rate": 3.887109971503785e-05, "loss": 1.3045, "step": 5014 }, { "epoch": 0.7183784558086234, "grad_norm": 1.0984445810317993, "learning_rate": 3.883438675932067e-05, "loss": 1.5656, "step": 5015 }, { "epoch": 0.7185217017619252, "grad_norm": 1.083709478378296, "learning_rate": 3.8797686971046375e-05, "loss": 1.3165, "step": 5016 }, { "epoch": 0.718664947715227, "grad_norm": 1.2398499250411987, "learning_rate": 3.876100035811555e-05, "loss": 1.7049, "step": 5017 }, { "epoch": 0.7188081936685289, "grad_norm": 1.1589994430541992, "learning_rate": 3.872432692842591e-05, "loss": 1.4608, "step": 5018 }, { "epoch": 0.7189514396218307, "grad_norm": 1.1455938816070557, "learning_rate": 3.868766668987236e-05, "loss": 1.4466, "step": 5019 }, { "epoch": 0.7190946855751325, "grad_norm": 1.163764238357544, "learning_rate": 3.8651019650346955e-05, "loss": 1.4691, "step": 5020 }, { "epoch": 0.7192379315284343, "grad_norm": 1.1781357526779175, "learning_rate": 3.8614385817738794e-05, "loss": 1.4197, "step": 5021 }, { "epoch": 0.7193811774817361, "grad_norm": 1.069103717803955, "learning_rate": 3.8577765199934354e-05, "loss": 1.4332, "step": 5022 }, { "epoch": 0.719524423435038, "grad_norm": 1.0578707456588745, "learning_rate": 3.854115780481713e-05, "loss": 1.478, "step": 5023 }, { "epoch": 0.7196676693883398, "grad_norm": 1.0581450462341309, "learning_rate": 3.8504563640267735e-05, "loss": 1.5387, "step": 5024 }, { "epoch": 0.7198109153416415, "grad_norm": 1.1096394062042236, "learning_rate": 3.8467982714163994e-05, "loss": 1.311, "step": 5025 }, { "epoch": 0.7199541612949434, "grad_norm": 1.1719540357589722, "learning_rate": 3.843141503438091e-05, "loss": 1.316, "step": 5026 }, { "epoch": 0.7200974072482452, "grad_norm": 1.3736746311187744, "learning_rate": 3.839486060879057e-05, "loss": 1.3147, "step": 5027 }, { "epoch": 0.7202406532015471, "grad_norm": 1.1375168561935425, "learning_rate": 3.8358319445262256e-05, "loss": 1.3797, "step": 5028 }, { "epoch": 0.7203838991548489, "grad_norm": 1.1240938901901245, "learning_rate": 3.832179155166238e-05, "loss": 1.3362, "step": 5029 }, { "epoch": 0.7205271451081507, "grad_norm": 1.2123749256134033, "learning_rate": 3.828527693585451e-05, "loss": 1.4234, "step": 5030 }, { "epoch": 0.7206703910614525, "grad_norm": 1.1414717435836792, "learning_rate": 3.8248775605699285e-05, "loss": 1.3121, "step": 5031 }, { "epoch": 0.7208136370147543, "grad_norm": 1.129688024520874, "learning_rate": 3.821228756905452e-05, "loss": 1.3708, "step": 5032 }, { "epoch": 0.7209568829680562, "grad_norm": 1.1107715368270874, "learning_rate": 3.817581283377532e-05, "loss": 1.1907, "step": 5033 }, { "epoch": 0.721100128921358, "grad_norm": 1.2803198099136353, "learning_rate": 3.813935140771365e-05, "loss": 1.2585, "step": 5034 }, { "epoch": 0.7212433748746598, "grad_norm": 1.2456905841827393, "learning_rate": 3.810290329871882e-05, "loss": 1.3733, "step": 5035 }, { "epoch": 0.7213866208279616, "grad_norm": 1.1083892583847046, "learning_rate": 3.8066468514637186e-05, "loss": 1.5655, "step": 5036 }, { "epoch": 0.7215298667812634, "grad_norm": 1.0969256162643433, "learning_rate": 3.803004706331228e-05, "loss": 1.3649, "step": 5037 }, { "epoch": 0.7216731127345652, "grad_norm": 1.4823991060256958, "learning_rate": 3.7993638952584744e-05, "loss": 1.4809, "step": 5038 }, { "epoch": 0.7218163586878671, "grad_norm": 1.1436653137207031, "learning_rate": 3.7957244190292264e-05, "loss": 1.3989, "step": 5039 }, { "epoch": 0.7219596046411689, "grad_norm": 1.0482831001281738, "learning_rate": 3.792086278426982e-05, "loss": 1.4177, "step": 5040 }, { "epoch": 0.7221028505944707, "grad_norm": 1.3176190853118896, "learning_rate": 3.788449474234943e-05, "loss": 1.4209, "step": 5041 }, { "epoch": 0.7222460965477725, "grad_norm": 1.076823353767395, "learning_rate": 3.784814007236016e-05, "loss": 1.3703, "step": 5042 }, { "epoch": 0.7223893425010743, "grad_norm": 1.0498149394989014, "learning_rate": 3.781179878212829e-05, "loss": 1.4057, "step": 5043 }, { "epoch": 0.7225325884543762, "grad_norm": 1.0373271703720093, "learning_rate": 3.777547087947729e-05, "loss": 1.2758, "step": 5044 }, { "epoch": 0.722675834407678, "grad_norm": 1.1445913314819336, "learning_rate": 3.773915637222756e-05, "loss": 1.3026, "step": 5045 }, { "epoch": 0.7228190803609797, "grad_norm": 1.1593588590621948, "learning_rate": 3.770285526819674e-05, "loss": 1.3704, "step": 5046 }, { "epoch": 0.7229623263142816, "grad_norm": 1.0304133892059326, "learning_rate": 3.766656757519956e-05, "loss": 1.3547, "step": 5047 }, { "epoch": 0.7231055722675834, "grad_norm": 1.309165120124817, "learning_rate": 3.763029330104788e-05, "loss": 1.2513, "step": 5048 }, { "epoch": 0.7232488182208853, "grad_norm": 0.9601624608039856, "learning_rate": 3.759403245355068e-05, "loss": 1.2474, "step": 5049 }, { "epoch": 0.7233920641741871, "grad_norm": 1.1106131076812744, "learning_rate": 3.7557785040513905e-05, "loss": 1.4681, "step": 5050 }, { "epoch": 0.723535310127489, "grad_norm": 1.2391899824142456, "learning_rate": 3.752155106974085e-05, "loss": 1.3575, "step": 5051 }, { "epoch": 0.7236785560807907, "grad_norm": 1.260866641998291, "learning_rate": 3.7485330549031775e-05, "loss": 1.47, "step": 5052 }, { "epoch": 0.7238218020340925, "grad_norm": 1.1886149644851685, "learning_rate": 3.744912348618399e-05, "loss": 1.631, "step": 5053 }, { "epoch": 0.7239650479873944, "grad_norm": 1.0159744024276733, "learning_rate": 3.741292988899204e-05, "loss": 1.4806, "step": 5054 }, { "epoch": 0.7241082939406962, "grad_norm": 1.078202486038208, "learning_rate": 3.737674976524749e-05, "loss": 1.4302, "step": 5055 }, { "epoch": 0.724251539893998, "grad_norm": 1.2899090051651, "learning_rate": 3.734058312273904e-05, "loss": 1.6124, "step": 5056 }, { "epoch": 0.7243947858472998, "grad_norm": 1.227173924446106, "learning_rate": 3.730442996925245e-05, "loss": 1.2599, "step": 5057 }, { "epoch": 0.7245380318006016, "grad_norm": 1.1580674648284912, "learning_rate": 3.726829031257062e-05, "loss": 1.2511, "step": 5058 }, { "epoch": 0.7246812777539035, "grad_norm": 0.8700530529022217, "learning_rate": 3.723216416047355e-05, "loss": 1.4088, "step": 5059 }, { "epoch": 0.7248245237072053, "grad_norm": 1.0443872213363647, "learning_rate": 3.7196051520738204e-05, "loss": 1.3214, "step": 5060 }, { "epoch": 0.7249677696605071, "grad_norm": 0.9927628040313721, "learning_rate": 3.7159952401138844e-05, "loss": 1.5233, "step": 5061 }, { "epoch": 0.7251110156138089, "grad_norm": 1.187017560005188, "learning_rate": 3.712386680944672e-05, "loss": 1.5368, "step": 5062 }, { "epoch": 0.7252542615671107, "grad_norm": 1.0443201065063477, "learning_rate": 3.708779475343009e-05, "loss": 1.4665, "step": 5063 }, { "epoch": 0.7253975075204125, "grad_norm": 1.1706702709197998, "learning_rate": 3.705173624085438e-05, "loss": 1.3417, "step": 5064 }, { "epoch": 0.7255407534737144, "grad_norm": 1.2596733570098877, "learning_rate": 3.70156912794822e-05, "loss": 1.359, "step": 5065 }, { "epoch": 0.7256839994270162, "grad_norm": 1.1945362091064453, "learning_rate": 3.6979659877073034e-05, "loss": 1.5288, "step": 5066 }, { "epoch": 0.725827245380318, "grad_norm": 1.1323626041412354, "learning_rate": 3.694364204138359e-05, "loss": 1.4039, "step": 5067 }, { "epoch": 0.7259704913336198, "grad_norm": 1.1299585103988647, "learning_rate": 3.6907637780167604e-05, "loss": 1.3272, "step": 5068 }, { "epoch": 0.7261137372869216, "grad_norm": 1.0844881534576416, "learning_rate": 3.687164710117592e-05, "loss": 1.4856, "step": 5069 }, { "epoch": 0.7262569832402235, "grad_norm": 1.1019644737243652, "learning_rate": 3.6835670012156456e-05, "loss": 1.5316, "step": 5070 }, { "epoch": 0.7264002291935253, "grad_norm": 1.3012523651123047, "learning_rate": 3.6799706520854094e-05, "loss": 1.5888, "step": 5071 }, { "epoch": 0.7265434751468272, "grad_norm": 0.9897657036781311, "learning_rate": 3.6763756635010993e-05, "loss": 1.4358, "step": 5072 }, { "epoch": 0.7266867211001289, "grad_norm": 1.2435823678970337, "learning_rate": 3.672782036236627e-05, "loss": 1.4918, "step": 5073 }, { "epoch": 0.7268299670534307, "grad_norm": 1.2064108848571777, "learning_rate": 3.669189771065604e-05, "loss": 1.4607, "step": 5074 }, { "epoch": 0.7269732130067326, "grad_norm": 1.1323789358139038, "learning_rate": 3.6655988687613605e-05, "loss": 1.3481, "step": 5075 }, { "epoch": 0.7271164589600344, "grad_norm": 1.1548043489456177, "learning_rate": 3.6620093300969284e-05, "loss": 1.5723, "step": 5076 }, { "epoch": 0.7272597049133362, "grad_norm": 1.2267212867736816, "learning_rate": 3.658421155845047e-05, "loss": 1.3687, "step": 5077 }, { "epoch": 0.727402950866638, "grad_norm": 1.2573171854019165, "learning_rate": 3.654834346778161e-05, "loss": 1.2876, "step": 5078 }, { "epoch": 0.7275461968199398, "grad_norm": 1.2740806341171265, "learning_rate": 3.651248903668421e-05, "loss": 1.2539, "step": 5079 }, { "epoch": 0.7276894427732417, "grad_norm": 1.0733959674835205, "learning_rate": 3.647664827287688e-05, "loss": 1.5291, "step": 5080 }, { "epoch": 0.7278326887265435, "grad_norm": 1.0868635177612305, "learning_rate": 3.644082118407519e-05, "loss": 1.2913, "step": 5081 }, { "epoch": 0.7279759346798453, "grad_norm": 1.001835584640503, "learning_rate": 3.640500777799182e-05, "loss": 1.4521, "step": 5082 }, { "epoch": 0.7281191806331471, "grad_norm": 1.1507211923599243, "learning_rate": 3.636920806233661e-05, "loss": 1.272, "step": 5083 }, { "epoch": 0.7282624265864489, "grad_norm": 1.074479341506958, "learning_rate": 3.6333422044816256e-05, "loss": 1.3763, "step": 5084 }, { "epoch": 0.7284056725397507, "grad_norm": 1.0682810544967651, "learning_rate": 3.629764973313463e-05, "loss": 1.4071, "step": 5085 }, { "epoch": 0.7285489184930526, "grad_norm": 1.0697369575500488, "learning_rate": 3.626189113499262e-05, "loss": 1.4071, "step": 5086 }, { "epoch": 0.7286921644463544, "grad_norm": 1.2354894876480103, "learning_rate": 3.622614625808819e-05, "loss": 1.1925, "step": 5087 }, { "epoch": 0.7288354103996562, "grad_norm": 0.9559935927391052, "learning_rate": 3.6190415110116336e-05, "loss": 1.5443, "step": 5088 }, { "epoch": 0.728978656352958, "grad_norm": 1.3013520240783691, "learning_rate": 3.6154697698769005e-05, "loss": 1.3304, "step": 5089 }, { "epoch": 0.7291219023062598, "grad_norm": 1.1134874820709229, "learning_rate": 3.6118994031735365e-05, "loss": 1.4819, "step": 5090 }, { "epoch": 0.7292651482595617, "grad_norm": 1.1534790992736816, "learning_rate": 3.6083304116701535e-05, "loss": 1.2807, "step": 5091 }, { "epoch": 0.7294083942128635, "grad_norm": 1.0838967561721802, "learning_rate": 3.604762796135059e-05, "loss": 1.2891, "step": 5092 }, { "epoch": 0.7295516401661654, "grad_norm": 1.125765323638916, "learning_rate": 3.601196557336275e-05, "loss": 1.283, "step": 5093 }, { "epoch": 0.7296948861194671, "grad_norm": 1.1465321779251099, "learning_rate": 3.5976316960415334e-05, "loss": 1.3881, "step": 5094 }, { "epoch": 0.7298381320727689, "grad_norm": 1.2596232891082764, "learning_rate": 3.594068213018249e-05, "loss": 1.4965, "step": 5095 }, { "epoch": 0.7299813780260708, "grad_norm": 1.0479146242141724, "learning_rate": 3.590506109033558e-05, "loss": 1.4195, "step": 5096 }, { "epoch": 0.7301246239793726, "grad_norm": 1.2078180313110352, "learning_rate": 3.58694538485429e-05, "loss": 1.6226, "step": 5097 }, { "epoch": 0.7302678699326745, "grad_norm": 1.2631756067276, "learning_rate": 3.583386041246982e-05, "loss": 1.2311, "step": 5098 }, { "epoch": 0.7304111158859762, "grad_norm": 1.186761736869812, "learning_rate": 3.579828078977877e-05, "loss": 1.5364, "step": 5099 }, { "epoch": 0.730554361839278, "grad_norm": 1.188675880432129, "learning_rate": 3.576271498812904e-05, "loss": 1.2621, "step": 5100 }, { "epoch": 0.7306976077925799, "grad_norm": 1.1299264430999756, "learning_rate": 3.572716301517719e-05, "loss": 1.5605, "step": 5101 }, { "epoch": 0.7308408537458817, "grad_norm": 1.0550850629806519, "learning_rate": 3.5691624878576666e-05, "loss": 1.4875, "step": 5102 }, { "epoch": 0.7309840996991835, "grad_norm": 1.2195994853973389, "learning_rate": 3.565610058597789e-05, "loss": 1.3682, "step": 5103 }, { "epoch": 0.7311273456524853, "grad_norm": 0.9243895411491394, "learning_rate": 3.56205901450284e-05, "loss": 1.4823, "step": 5104 }, { "epoch": 0.7312705916057871, "grad_norm": 1.259942889213562, "learning_rate": 3.5585093563372706e-05, "loss": 1.6089, "step": 5105 }, { "epoch": 0.731413837559089, "grad_norm": 1.2642111778259277, "learning_rate": 3.5549610848652335e-05, "loss": 1.4983, "step": 5106 }, { "epoch": 0.7315570835123908, "grad_norm": 1.2183876037597656, "learning_rate": 3.551414200850586e-05, "loss": 1.3483, "step": 5107 }, { "epoch": 0.7317003294656926, "grad_norm": 1.2128781080245972, "learning_rate": 3.547868705056882e-05, "loss": 1.2486, "step": 5108 }, { "epoch": 0.7318435754189944, "grad_norm": 1.0286191701889038, "learning_rate": 3.544324598247386e-05, "loss": 1.5399, "step": 5109 }, { "epoch": 0.7319868213722962, "grad_norm": 1.0709208250045776, "learning_rate": 3.540781881185046e-05, "loss": 1.3217, "step": 5110 }, { "epoch": 0.732130067325598, "grad_norm": 1.0790868997573853, "learning_rate": 3.537240554632523e-05, "loss": 1.2436, "step": 5111 }, { "epoch": 0.7322733132788999, "grad_norm": 1.6189539432525635, "learning_rate": 3.533700619352187e-05, "loss": 1.4116, "step": 5112 }, { "epoch": 0.7324165592322017, "grad_norm": 1.0976566076278687, "learning_rate": 3.530162076106088e-05, "loss": 1.2579, "step": 5113 }, { "epoch": 0.7325598051855035, "grad_norm": 1.1255980730056763, "learning_rate": 3.52662492565599e-05, "loss": 1.3065, "step": 5114 }, { "epoch": 0.7327030511388053, "grad_norm": 0.9940956234931946, "learning_rate": 3.523089168763355e-05, "loss": 1.4463, "step": 5115 }, { "epoch": 0.7328462970921071, "grad_norm": 1.3461437225341797, "learning_rate": 3.519554806189343e-05, "loss": 1.4058, "step": 5116 }, { "epoch": 0.732989543045409, "grad_norm": 1.1426135301589966, "learning_rate": 3.516021838694815e-05, "loss": 1.289, "step": 5117 }, { "epoch": 0.7331327889987108, "grad_norm": 1.451385736465454, "learning_rate": 3.512490267040333e-05, "loss": 1.2901, "step": 5118 }, { "epoch": 0.7332760349520127, "grad_norm": 1.0993014574050903, "learning_rate": 3.5089600919861535e-05, "loss": 1.4081, "step": 5119 }, { "epoch": 0.7334192809053144, "grad_norm": 1.1274062395095825, "learning_rate": 3.505431314292243e-05, "loss": 1.4502, "step": 5120 }, { "epoch": 0.7335625268586162, "grad_norm": 1.118840217590332, "learning_rate": 3.501903934718247e-05, "loss": 1.539, "step": 5121 }, { "epoch": 0.7337057728119181, "grad_norm": 1.2095909118652344, "learning_rate": 3.498377954023534e-05, "loss": 1.2015, "step": 5122 }, { "epoch": 0.7338490187652199, "grad_norm": 1.2172831296920776, "learning_rate": 3.4948533729671616e-05, "loss": 1.3441, "step": 5123 }, { "epoch": 0.7339922647185217, "grad_norm": 1.0299334526062012, "learning_rate": 3.4913301923078765e-05, "loss": 1.4579, "step": 5124 }, { "epoch": 0.7341355106718235, "grad_norm": 1.2233995199203491, "learning_rate": 3.4878084128041366e-05, "loss": 1.3798, "step": 5125 }, { "epoch": 0.7342787566251253, "grad_norm": 1.1269375085830688, "learning_rate": 3.484288035214095e-05, "loss": 1.3822, "step": 5126 }, { "epoch": 0.7344220025784272, "grad_norm": 1.1770092248916626, "learning_rate": 3.480769060295599e-05, "loss": 1.3385, "step": 5127 }, { "epoch": 0.734565248531729, "grad_norm": 1.1989469528198242, "learning_rate": 3.477251488806199e-05, "loss": 1.4458, "step": 5128 }, { "epoch": 0.7347084944850308, "grad_norm": 1.1624791622161865, "learning_rate": 3.47373532150314e-05, "loss": 1.435, "step": 5129 }, { "epoch": 0.7348517404383326, "grad_norm": 0.9366243481636047, "learning_rate": 3.47022055914337e-05, "loss": 1.4498, "step": 5130 }, { "epoch": 0.7349949863916344, "grad_norm": 1.2255891561508179, "learning_rate": 3.466707202483523e-05, "loss": 1.4099, "step": 5131 }, { "epoch": 0.7351382323449362, "grad_norm": 1.207460641860962, "learning_rate": 3.463195252279939e-05, "loss": 1.3743, "step": 5132 }, { "epoch": 0.7352814782982381, "grad_norm": 1.0543222427368164, "learning_rate": 3.459684709288663e-05, "loss": 1.4278, "step": 5133 }, { "epoch": 0.7354247242515399, "grad_norm": 1.0448447465896606, "learning_rate": 3.456175574265418e-05, "loss": 1.3991, "step": 5134 }, { "epoch": 0.7355679702048417, "grad_norm": 1.0047661066055298, "learning_rate": 3.452667847965636e-05, "loss": 1.3303, "step": 5135 }, { "epoch": 0.7357112161581435, "grad_norm": 1.1322604417800903, "learning_rate": 3.449161531144447e-05, "loss": 1.3746, "step": 5136 }, { "epoch": 0.7358544621114453, "grad_norm": 1.0146491527557373, "learning_rate": 3.4456566245566715e-05, "loss": 1.4097, "step": 5137 }, { "epoch": 0.7359977080647472, "grad_norm": 1.0880353450775146, "learning_rate": 3.4421531289568324e-05, "loss": 1.4876, "step": 5138 }, { "epoch": 0.736140954018049, "grad_norm": 1.1464815139770508, "learning_rate": 3.438651045099137e-05, "loss": 1.5125, "step": 5139 }, { "epoch": 0.7362841999713509, "grad_norm": 1.254262089729309, "learning_rate": 3.4351503737375065e-05, "loss": 1.208, "step": 5140 }, { "epoch": 0.7364274459246526, "grad_norm": 1.2733190059661865, "learning_rate": 3.4316511156255494e-05, "loss": 1.2675, "step": 5141 }, { "epoch": 0.7365706918779544, "grad_norm": 1.020726203918457, "learning_rate": 3.428153271516562e-05, "loss": 1.3277, "step": 5142 }, { "epoch": 0.7367139378312563, "grad_norm": 0.9686973690986633, "learning_rate": 3.424656842163545e-05, "loss": 1.3402, "step": 5143 }, { "epoch": 0.7368571837845581, "grad_norm": 1.0724056959152222, "learning_rate": 3.4211618283192014e-05, "loss": 1.3346, "step": 5144 }, { "epoch": 0.73700042973786, "grad_norm": 1.113033652305603, "learning_rate": 3.417668230735912e-05, "loss": 1.4603, "step": 5145 }, { "epoch": 0.7371436756911617, "grad_norm": 1.2005728483200073, "learning_rate": 3.414176050165765e-05, "loss": 1.4719, "step": 5146 }, { "epoch": 0.7372869216444635, "grad_norm": 1.1824235916137695, "learning_rate": 3.41068528736054e-05, "loss": 1.2843, "step": 5147 }, { "epoch": 0.7374301675977654, "grad_norm": 1.0441292524337769, "learning_rate": 3.4071959430717124e-05, "loss": 1.0946, "step": 5148 }, { "epoch": 0.7375734135510672, "grad_norm": 1.3666788339614868, "learning_rate": 3.403708018050456e-05, "loss": 1.2495, "step": 5149 }, { "epoch": 0.737716659504369, "grad_norm": 1.004373550415039, "learning_rate": 3.4002215130476236e-05, "loss": 1.2044, "step": 5150 }, { "epoch": 0.7378599054576708, "grad_norm": 1.172498106956482, "learning_rate": 3.3967364288137824e-05, "loss": 1.4754, "step": 5151 }, { "epoch": 0.7380031514109726, "grad_norm": 1.0266865491867065, "learning_rate": 3.393252766099187e-05, "loss": 1.2558, "step": 5152 }, { "epoch": 0.7381463973642745, "grad_norm": 1.0367158651351929, "learning_rate": 3.389770525653777e-05, "loss": 1.5902, "step": 5153 }, { "epoch": 0.7382896433175763, "grad_norm": 1.2140647172927856, "learning_rate": 3.386289708227194e-05, "loss": 1.3265, "step": 5154 }, { "epoch": 0.7384328892708781, "grad_norm": 1.15199613571167, "learning_rate": 3.3828103145687726e-05, "loss": 1.3891, "step": 5155 }, { "epoch": 0.7385761352241799, "grad_norm": 1.1596858501434326, "learning_rate": 3.379332345427541e-05, "loss": 1.4505, "step": 5156 }, { "epoch": 0.7387193811774817, "grad_norm": 1.0261905193328857, "learning_rate": 3.375855801552219e-05, "loss": 1.3436, "step": 5157 }, { "epoch": 0.7388626271307835, "grad_norm": 1.0811244249343872, "learning_rate": 3.372380683691221e-05, "loss": 1.4554, "step": 5158 }, { "epoch": 0.7390058730840854, "grad_norm": 1.294348120689392, "learning_rate": 3.368906992592656e-05, "loss": 1.4084, "step": 5159 }, { "epoch": 0.7391491190373872, "grad_norm": 1.066070795059204, "learning_rate": 3.36543472900432e-05, "loss": 1.5789, "step": 5160 }, { "epoch": 0.7392923649906891, "grad_norm": 1.1045114994049072, "learning_rate": 3.361963893673703e-05, "loss": 1.3533, "step": 5161 }, { "epoch": 0.7394356109439908, "grad_norm": 0.9879827499389648, "learning_rate": 3.358494487348e-05, "loss": 1.4095, "step": 5162 }, { "epoch": 0.7395788568972926, "grad_norm": 1.1674392223358154, "learning_rate": 3.355026510774079e-05, "loss": 1.3249, "step": 5163 }, { "epoch": 0.7397221028505945, "grad_norm": 1.0892313718795776, "learning_rate": 3.3515599646985116e-05, "loss": 1.2696, "step": 5164 }, { "epoch": 0.7398653488038963, "grad_norm": 1.0873539447784424, "learning_rate": 3.3480948498675616e-05, "loss": 1.4241, "step": 5165 }, { "epoch": 0.7400085947571982, "grad_norm": 1.176135778427124, "learning_rate": 3.344631167027181e-05, "loss": 1.4697, "step": 5166 }, { "epoch": 0.7401518407104999, "grad_norm": 1.0498485565185547, "learning_rate": 3.341168916923019e-05, "loss": 1.4549, "step": 5167 }, { "epoch": 0.7402950866638017, "grad_norm": 1.0449223518371582, "learning_rate": 3.3377081003004017e-05, "loss": 1.3323, "step": 5168 }, { "epoch": 0.7404383326171036, "grad_norm": 1.5824793577194214, "learning_rate": 3.334248717904368e-05, "loss": 1.4333, "step": 5169 }, { "epoch": 0.7405815785704054, "grad_norm": 1.2888814210891724, "learning_rate": 3.330790770479636e-05, "loss": 1.4696, "step": 5170 }, { "epoch": 0.7407248245237072, "grad_norm": 1.1184964179992676, "learning_rate": 3.327334258770611e-05, "loss": 1.3917, "step": 5171 }, { "epoch": 0.740868070477009, "grad_norm": 1.1289799213409424, "learning_rate": 3.3238791835213944e-05, "loss": 1.6047, "step": 5172 }, { "epoch": 0.7410113164303108, "grad_norm": 0.9914992451667786, "learning_rate": 3.320425545475787e-05, "loss": 1.4468, "step": 5173 }, { "epoch": 0.7411545623836127, "grad_norm": 0.9856660962104797, "learning_rate": 3.316973345377263e-05, "loss": 1.535, "step": 5174 }, { "epoch": 0.7412978083369145, "grad_norm": 1.2684179544448853, "learning_rate": 3.313522583969e-05, "loss": 1.3731, "step": 5175 }, { "epoch": 0.7414410542902163, "grad_norm": 1.0974550247192383, "learning_rate": 3.3100732619938576e-05, "loss": 1.2982, "step": 5176 }, { "epoch": 0.7415843002435181, "grad_norm": 0.9968685507774353, "learning_rate": 3.306625380194394e-05, "loss": 1.6061, "step": 5177 }, { "epoch": 0.7417275461968199, "grad_norm": 1.1147903203964233, "learning_rate": 3.303178939312849e-05, "loss": 1.2204, "step": 5178 }, { "epoch": 0.7418707921501217, "grad_norm": 1.1064265966415405, "learning_rate": 3.299733940091159e-05, "loss": 1.374, "step": 5179 }, { "epoch": 0.7420140381034236, "grad_norm": 1.075097680091858, "learning_rate": 3.296290383270948e-05, "loss": 1.2833, "step": 5180 }, { "epoch": 0.7421572840567254, "grad_norm": 0.8952064514160156, "learning_rate": 3.292848269593524e-05, "loss": 1.4186, "step": 5181 }, { "epoch": 0.7423005300100273, "grad_norm": 1.056009292602539, "learning_rate": 3.2894075997998876e-05, "loss": 1.6138, "step": 5182 }, { "epoch": 0.742443775963329, "grad_norm": 1.0715703964233398, "learning_rate": 3.28596837463074e-05, "loss": 1.4098, "step": 5183 }, { "epoch": 0.7425870219166308, "grad_norm": 1.029760479927063, "learning_rate": 3.282530594826452e-05, "loss": 1.513, "step": 5184 }, { "epoch": 0.7427302678699327, "grad_norm": 1.1541615724563599, "learning_rate": 3.2790942611270955e-05, "loss": 1.407, "step": 5185 }, { "epoch": 0.7428735138232345, "grad_norm": 1.0866365432739258, "learning_rate": 3.2756593742724274e-05, "loss": 1.4969, "step": 5186 }, { "epoch": 0.7430167597765364, "grad_norm": 1.1411486864089966, "learning_rate": 3.272225935001895e-05, "loss": 1.4811, "step": 5187 }, { "epoch": 0.7431600057298381, "grad_norm": 1.197404146194458, "learning_rate": 3.268793944054636e-05, "loss": 1.2723, "step": 5188 }, { "epoch": 0.7433032516831399, "grad_norm": 1.0996851921081543, "learning_rate": 3.265363402169461e-05, "loss": 1.4667, "step": 5189 }, { "epoch": 0.7434464976364418, "grad_norm": 1.1757173538208008, "learning_rate": 3.261934310084894e-05, "loss": 1.5325, "step": 5190 }, { "epoch": 0.7435897435897436, "grad_norm": 1.1916917562484741, "learning_rate": 3.258506668539132e-05, "loss": 1.4952, "step": 5191 }, { "epoch": 0.7437329895430455, "grad_norm": 1.1961439847946167, "learning_rate": 3.255080478270054e-05, "loss": 1.695, "step": 5192 }, { "epoch": 0.7438762354963472, "grad_norm": 1.219416856765747, "learning_rate": 3.251655740015236e-05, "loss": 1.4318, "step": 5193 }, { "epoch": 0.744019481449649, "grad_norm": 1.0704971551895142, "learning_rate": 3.248232454511947e-05, "loss": 1.4094, "step": 5194 }, { "epoch": 0.7441627274029509, "grad_norm": 1.1415612697601318, "learning_rate": 3.2448106224971275e-05, "loss": 1.4812, "step": 5195 }, { "epoch": 0.7443059733562527, "grad_norm": 1.047178864479065, "learning_rate": 3.2413902447074164e-05, "loss": 1.1752, "step": 5196 }, { "epoch": 0.7444492193095545, "grad_norm": 1.1858075857162476, "learning_rate": 3.2379713218791355e-05, "loss": 1.3443, "step": 5197 }, { "epoch": 0.7445924652628563, "grad_norm": 1.195770502090454, "learning_rate": 3.2345538547482945e-05, "loss": 1.3732, "step": 5198 }, { "epoch": 0.7447357112161581, "grad_norm": 1.1657339334487915, "learning_rate": 3.231137844050593e-05, "loss": 1.5231, "step": 5199 }, { "epoch": 0.74487895716946, "grad_norm": 1.1602109670639038, "learning_rate": 3.227723290521405e-05, "loss": 1.3294, "step": 5200 }, { "epoch": 0.7450222031227618, "grad_norm": 1.223780870437622, "learning_rate": 3.224310194895807e-05, "loss": 1.4628, "step": 5201 }, { "epoch": 0.7451654490760636, "grad_norm": 1.2210118770599365, "learning_rate": 3.220898557908555e-05, "loss": 1.3901, "step": 5202 }, { "epoch": 0.7453086950293655, "grad_norm": 1.172743558883667, "learning_rate": 3.217488380294083e-05, "loss": 1.3974, "step": 5203 }, { "epoch": 0.7454519409826672, "grad_norm": 1.0990151166915894, "learning_rate": 3.214079662786523e-05, "loss": 1.5187, "step": 5204 }, { "epoch": 0.745595186935969, "grad_norm": 1.1975841522216797, "learning_rate": 3.210672406119686e-05, "loss": 1.3191, "step": 5205 }, { "epoch": 0.7457384328892709, "grad_norm": 1.2681065797805786, "learning_rate": 3.207266611027069e-05, "loss": 1.3653, "step": 5206 }, { "epoch": 0.7458816788425727, "grad_norm": 1.0499192476272583, "learning_rate": 3.203862278241857e-05, "loss": 1.4562, "step": 5207 }, { "epoch": 0.7460249247958746, "grad_norm": 1.1028032302856445, "learning_rate": 3.200459408496919e-05, "loss": 1.1865, "step": 5208 }, { "epoch": 0.7461681707491763, "grad_norm": 1.0313899517059326, "learning_rate": 3.197058002524811e-05, "loss": 1.4094, "step": 5209 }, { "epoch": 0.7463114167024781, "grad_norm": 1.352752685546875, "learning_rate": 3.1936580610577636e-05, "loss": 1.3413, "step": 5210 }, { "epoch": 0.74645466265578, "grad_norm": 1.2039912939071655, "learning_rate": 3.190259584827704e-05, "loss": 1.4096, "step": 5211 }, { "epoch": 0.7465979086090818, "grad_norm": 1.2009607553482056, "learning_rate": 3.186862574566245e-05, "loss": 1.4538, "step": 5212 }, { "epoch": 0.7467411545623837, "grad_norm": 1.0683406591415405, "learning_rate": 3.1834670310046734e-05, "loss": 1.3246, "step": 5213 }, { "epoch": 0.7468844005156854, "grad_norm": 1.1308684349060059, "learning_rate": 3.180072954873966e-05, "loss": 1.3343, "step": 5214 }, { "epoch": 0.7470276464689872, "grad_norm": 1.0525604486465454, "learning_rate": 3.1766803469047846e-05, "loss": 1.4233, "step": 5215 }, { "epoch": 0.7471708924222891, "grad_norm": 1.2752550840377808, "learning_rate": 3.1732892078274735e-05, "loss": 1.4504, "step": 5216 }, { "epoch": 0.7473141383755909, "grad_norm": 1.0193434953689575, "learning_rate": 3.1698995383720645e-05, "loss": 1.436, "step": 5217 }, { "epoch": 0.7474573843288927, "grad_norm": 2.2145111560821533, "learning_rate": 3.166511339268259e-05, "loss": 1.4872, "step": 5218 }, { "epoch": 0.7476006302821945, "grad_norm": 1.2425800561904907, "learning_rate": 3.163124611245464e-05, "loss": 1.3313, "step": 5219 }, { "epoch": 0.7477438762354963, "grad_norm": 1.1517326831817627, "learning_rate": 3.1597393550327556e-05, "loss": 1.4176, "step": 5220 }, { "epoch": 0.7478871221887982, "grad_norm": 1.1516485214233398, "learning_rate": 3.1563555713588924e-05, "loss": 1.5277, "step": 5221 }, { "epoch": 0.7480303681421, "grad_norm": 1.1432104110717773, "learning_rate": 3.152973260952315e-05, "loss": 1.4912, "step": 5222 }, { "epoch": 0.7481736140954018, "grad_norm": 1.0943008661270142, "learning_rate": 3.149592424541166e-05, "loss": 1.484, "step": 5223 }, { "epoch": 0.7483168600487036, "grad_norm": 0.9732567071914673, "learning_rate": 3.146213062853243e-05, "loss": 1.3784, "step": 5224 }, { "epoch": 0.7484601060020054, "grad_norm": 0.9418982863426208, "learning_rate": 3.1428351766160415e-05, "loss": 1.4974, "step": 5225 }, { "epoch": 0.7486033519553073, "grad_norm": 1.1346194744110107, "learning_rate": 3.139458766556739e-05, "loss": 1.3035, "step": 5226 }, { "epoch": 0.7487465979086091, "grad_norm": 1.2182563543319702, "learning_rate": 3.136083833402192e-05, "loss": 1.4748, "step": 5227 }, { "epoch": 0.7488898438619109, "grad_norm": 0.9515708088874817, "learning_rate": 3.132710377878942e-05, "loss": 1.5906, "step": 5228 }, { "epoch": 0.7490330898152128, "grad_norm": 1.014391541481018, "learning_rate": 3.1293384007132035e-05, "loss": 1.3078, "step": 5229 }, { "epoch": 0.7491763357685145, "grad_norm": 1.2892000675201416, "learning_rate": 3.1259679026308875e-05, "loss": 1.3661, "step": 5230 }, { "epoch": 0.7493195817218163, "grad_norm": 1.1478554010391235, "learning_rate": 3.12259888435758e-05, "loss": 1.388, "step": 5231 }, { "epoch": 0.7494628276751182, "grad_norm": 1.1152008771896362, "learning_rate": 3.11923134661854e-05, "loss": 1.3354, "step": 5232 }, { "epoch": 0.74960607362842, "grad_norm": 1.015796184539795, "learning_rate": 3.1158652901387186e-05, "loss": 1.3843, "step": 5233 }, { "epoch": 0.7497493195817219, "grad_norm": 1.1166149377822876, "learning_rate": 3.112500715642743e-05, "loss": 1.4432, "step": 5234 }, { "epoch": 0.7498925655350236, "grad_norm": 1.4819976091384888, "learning_rate": 3.1091376238549265e-05, "loss": 1.3664, "step": 5235 }, { "epoch": 0.7500358114883254, "grad_norm": 1.4364807605743408, "learning_rate": 3.105776015499255e-05, "loss": 1.3818, "step": 5236 }, { "epoch": 0.7501790574416273, "grad_norm": 1.0751434564590454, "learning_rate": 3.102415891299403e-05, "loss": 1.5086, "step": 5237 }, { "epoch": 0.7503223033949291, "grad_norm": 1.1340422630310059, "learning_rate": 3.099057251978724e-05, "loss": 1.2314, "step": 5238 }, { "epoch": 0.750465549348231, "grad_norm": 1.148711085319519, "learning_rate": 3.0957000982602436e-05, "loss": 1.3882, "step": 5239 }, { "epoch": 0.7506087953015327, "grad_norm": 1.3449269533157349, "learning_rate": 3.092344430866674e-05, "loss": 1.4088, "step": 5240 }, { "epoch": 0.7507520412548345, "grad_norm": 1.0129867792129517, "learning_rate": 3.088990250520417e-05, "loss": 1.4072, "step": 5241 }, { "epoch": 0.7508952872081364, "grad_norm": 0.9381749033927917, "learning_rate": 3.085637557943535e-05, "loss": 1.3497, "step": 5242 }, { "epoch": 0.7510385331614382, "grad_norm": 1.1388713121414185, "learning_rate": 3.082286353857782e-05, "loss": 1.404, "step": 5243 }, { "epoch": 0.75118177911474, "grad_norm": 1.2525579929351807, "learning_rate": 3.0789366389845905e-05, "loss": 1.3789, "step": 5244 }, { "epoch": 0.7513250250680418, "grad_norm": 1.1343111991882324, "learning_rate": 3.0755884140450705e-05, "loss": 1.469, "step": 5245 }, { "epoch": 0.7514682710213436, "grad_norm": 1.196776032447815, "learning_rate": 3.072241679760014e-05, "loss": 1.3112, "step": 5246 }, { "epoch": 0.7516115169746455, "grad_norm": 1.3610581159591675, "learning_rate": 3.068896436849888e-05, "loss": 1.4176, "step": 5247 }, { "epoch": 0.7517547629279473, "grad_norm": 1.1410738229751587, "learning_rate": 3.0655526860348396e-05, "loss": 1.3556, "step": 5248 }, { "epoch": 0.7518980088812491, "grad_norm": 1.3354347944259644, "learning_rate": 3.062210428034701e-05, "loss": 1.3808, "step": 5249 }, { "epoch": 0.752041254834551, "grad_norm": 1.3814704418182373, "learning_rate": 3.058869663568967e-05, "loss": 1.5877, "step": 5250 }, { "epoch": 0.7521845007878527, "grad_norm": 1.1505372524261475, "learning_rate": 3.055530393356831e-05, "loss": 1.3844, "step": 5251 }, { "epoch": 0.7523277467411545, "grad_norm": 1.1042063236236572, "learning_rate": 3.0521926181171566e-05, "loss": 1.223, "step": 5252 }, { "epoch": 0.7524709926944564, "grad_norm": 1.031711220741272, "learning_rate": 3.0488563385684764e-05, "loss": 1.2315, "step": 5253 }, { "epoch": 0.7526142386477582, "grad_norm": 1.1051437854766846, "learning_rate": 3.0455215554290128e-05, "loss": 1.5809, "step": 5254 }, { "epoch": 0.7527574846010601, "grad_norm": 1.1071306467056274, "learning_rate": 3.0421882694166602e-05, "loss": 1.3291, "step": 5255 }, { "epoch": 0.7529007305543618, "grad_norm": 1.0118831396102905, "learning_rate": 3.038856481248996e-05, "loss": 1.3289, "step": 5256 }, { "epoch": 0.7530439765076636, "grad_norm": 1.1136201620101929, "learning_rate": 3.0355261916432688e-05, "loss": 1.3588, "step": 5257 }, { "epoch": 0.7531872224609655, "grad_norm": 1.2333881855010986, "learning_rate": 3.032197401316409e-05, "loss": 1.4159, "step": 5258 }, { "epoch": 0.7533304684142673, "grad_norm": 1.2058079242706299, "learning_rate": 3.028870110985025e-05, "loss": 1.2923, "step": 5259 }, { "epoch": 0.7534737143675692, "grad_norm": 1.0736134052276611, "learning_rate": 3.0255443213653943e-05, "loss": 1.3585, "step": 5260 }, { "epoch": 0.7536169603208709, "grad_norm": 1.1557823419570923, "learning_rate": 3.0222200331734772e-05, "loss": 1.4585, "step": 5261 }, { "epoch": 0.7537602062741727, "grad_norm": 1.0705029964447021, "learning_rate": 3.0188972471249198e-05, "loss": 1.1966, "step": 5262 }, { "epoch": 0.7539034522274746, "grad_norm": 1.0374021530151367, "learning_rate": 3.015575963935027e-05, "loss": 1.44, "step": 5263 }, { "epoch": 0.7540466981807764, "grad_norm": 1.1262493133544922, "learning_rate": 3.0122561843187914e-05, "loss": 1.3474, "step": 5264 }, { "epoch": 0.7541899441340782, "grad_norm": 1.168507695198059, "learning_rate": 3.0089379089908786e-05, "loss": 1.5376, "step": 5265 }, { "epoch": 0.75433319008738, "grad_norm": 0.9681943655014038, "learning_rate": 3.005621138665633e-05, "loss": 1.3712, "step": 5266 }, { "epoch": 0.7544764360406818, "grad_norm": 1.2163854837417603, "learning_rate": 3.0023058740570754e-05, "loss": 1.5155, "step": 5267 }, { "epoch": 0.7546196819939837, "grad_norm": 1.104276418685913, "learning_rate": 2.9989921158788902e-05, "loss": 1.3428, "step": 5268 }, { "epoch": 0.7547629279472855, "grad_norm": 1.0557022094726562, "learning_rate": 2.9956798648444584e-05, "loss": 1.4927, "step": 5269 }, { "epoch": 0.7549061739005873, "grad_norm": 1.1239663362503052, "learning_rate": 2.9923691216668238e-05, "loss": 1.3739, "step": 5270 }, { "epoch": 0.7550494198538892, "grad_norm": 1.4194425344467163, "learning_rate": 2.989059887058703e-05, "loss": 1.3987, "step": 5271 }, { "epoch": 0.7551926658071909, "grad_norm": 1.0798317193984985, "learning_rate": 2.9857521617324914e-05, "loss": 1.3464, "step": 5272 }, { "epoch": 0.7553359117604928, "grad_norm": 0.913284182548523, "learning_rate": 2.9824459464002697e-05, "loss": 1.4302, "step": 5273 }, { "epoch": 0.7554791577137946, "grad_norm": 1.131149411201477, "learning_rate": 2.979141241773775e-05, "loss": 1.325, "step": 5274 }, { "epoch": 0.7556224036670964, "grad_norm": 1.1224730014801025, "learning_rate": 2.9758380485644323e-05, "loss": 1.3631, "step": 5275 }, { "epoch": 0.7557656496203983, "grad_norm": 1.1537500619888306, "learning_rate": 2.9725363674833362e-05, "loss": 1.3594, "step": 5276 }, { "epoch": 0.7559088955737, "grad_norm": 0.9476206302642822, "learning_rate": 2.9692361992412577e-05, "loss": 1.5843, "step": 5277 }, { "epoch": 0.7560521415270018, "grad_norm": 1.2734843492507935, "learning_rate": 2.965937544548645e-05, "loss": 1.616, "step": 5278 }, { "epoch": 0.7561953874803037, "grad_norm": 1.0564649105072021, "learning_rate": 2.9626404041156053e-05, "loss": 1.3728, "step": 5279 }, { "epoch": 0.7563386334336055, "grad_norm": 1.0379307270050049, "learning_rate": 2.9593447786519425e-05, "loss": 1.4802, "step": 5280 }, { "epoch": 0.7564818793869074, "grad_norm": 1.4160501956939697, "learning_rate": 2.956050668867123e-05, "loss": 1.4769, "step": 5281 }, { "epoch": 0.7566251253402091, "grad_norm": 1.135905146598816, "learning_rate": 2.952758075470281e-05, "loss": 1.4214, "step": 5282 }, { "epoch": 0.7567683712935109, "grad_norm": 1.1821452379226685, "learning_rate": 2.949466999170233e-05, "loss": 1.4386, "step": 5283 }, { "epoch": 0.7569116172468128, "grad_norm": 1.388188362121582, "learning_rate": 2.946177440675466e-05, "loss": 1.3534, "step": 5284 }, { "epoch": 0.7570548632001146, "grad_norm": 1.2739142179489136, "learning_rate": 2.942889400694141e-05, "loss": 1.4483, "step": 5285 }, { "epoch": 0.7571981091534165, "grad_norm": 1.1688488721847534, "learning_rate": 2.9396028799340924e-05, "loss": 1.4273, "step": 5286 }, { "epoch": 0.7573413551067182, "grad_norm": 1.1491557359695435, "learning_rate": 2.9363178791028257e-05, "loss": 1.5083, "step": 5287 }, { "epoch": 0.75748460106002, "grad_norm": 0.9826465249061584, "learning_rate": 2.9330343989075236e-05, "loss": 1.2902, "step": 5288 }, { "epoch": 0.7576278470133219, "grad_norm": 1.139729619026184, "learning_rate": 2.9297524400550325e-05, "loss": 1.2905, "step": 5289 }, { "epoch": 0.7577710929666237, "grad_norm": 1.1182794570922852, "learning_rate": 2.9264720032518756e-05, "loss": 1.3023, "step": 5290 }, { "epoch": 0.7579143389199255, "grad_norm": 1.0319541692733765, "learning_rate": 2.9231930892042614e-05, "loss": 1.3816, "step": 5291 }, { "epoch": 0.7580575848732274, "grad_norm": 1.2374285459518433, "learning_rate": 2.9199156986180463e-05, "loss": 1.5106, "step": 5292 }, { "epoch": 0.7582008308265291, "grad_norm": 0.9802339673042297, "learning_rate": 2.9166398321987774e-05, "loss": 1.5368, "step": 5293 }, { "epoch": 0.758344076779831, "grad_norm": 1.107215166091919, "learning_rate": 2.9133654906516672e-05, "loss": 1.5294, "step": 5294 }, { "epoch": 0.7584873227331328, "grad_norm": 1.0214197635650635, "learning_rate": 2.9100926746815992e-05, "loss": 1.4728, "step": 5295 }, { "epoch": 0.7586305686864346, "grad_norm": 1.0935407876968384, "learning_rate": 2.9068213849931338e-05, "loss": 1.5007, "step": 5296 }, { "epoch": 0.7587738146397365, "grad_norm": 1.3506642580032349, "learning_rate": 2.903551622290489e-05, "loss": 1.3123, "step": 5297 }, { "epoch": 0.7589170605930382, "grad_norm": 1.219518780708313, "learning_rate": 2.9002833872775735e-05, "loss": 1.2219, "step": 5298 }, { "epoch": 0.75906030654634, "grad_norm": 1.4454237222671509, "learning_rate": 2.8970166806579568e-05, "loss": 1.3357, "step": 5299 }, { "epoch": 0.7592035524996419, "grad_norm": 1.222368836402893, "learning_rate": 2.893751503134874e-05, "loss": 1.179, "step": 5300 }, { "epoch": 0.7593467984529437, "grad_norm": 1.0929145812988281, "learning_rate": 2.8904878554112367e-05, "loss": 1.496, "step": 5301 }, { "epoch": 0.7594900444062456, "grad_norm": 1.1646777391433716, "learning_rate": 2.8872257381896385e-05, "loss": 1.37, "step": 5302 }, { "epoch": 0.7596332903595473, "grad_norm": 1.1711838245391846, "learning_rate": 2.883965152172321e-05, "loss": 1.4795, "step": 5303 }, { "epoch": 0.7597765363128491, "grad_norm": 0.9670712351799011, "learning_rate": 2.880706098061211e-05, "loss": 1.3942, "step": 5304 }, { "epoch": 0.759919782266151, "grad_norm": 1.146607518196106, "learning_rate": 2.877448576557904e-05, "loss": 1.2952, "step": 5305 }, { "epoch": 0.7600630282194528, "grad_norm": 0.9714531898498535, "learning_rate": 2.874192588363662e-05, "loss": 1.3447, "step": 5306 }, { "epoch": 0.7602062741727547, "grad_norm": 1.1408425569534302, "learning_rate": 2.8709381341794204e-05, "loss": 1.4178, "step": 5307 }, { "epoch": 0.7603495201260564, "grad_norm": 1.2025913000106812, "learning_rate": 2.867685214705781e-05, "loss": 1.3849, "step": 5308 }, { "epoch": 0.7604927660793582, "grad_norm": 1.3199913501739502, "learning_rate": 2.8644338306430208e-05, "loss": 1.2801, "step": 5309 }, { "epoch": 0.7606360120326601, "grad_norm": 1.433640956878662, "learning_rate": 2.8611839826910757e-05, "loss": 1.4491, "step": 5310 }, { "epoch": 0.7607792579859619, "grad_norm": 1.2183752059936523, "learning_rate": 2.8579356715495577e-05, "loss": 1.5618, "step": 5311 }, { "epoch": 0.7609225039392638, "grad_norm": 1.1698110103607178, "learning_rate": 2.8546888979177578e-05, "loss": 1.4317, "step": 5312 }, { "epoch": 0.7610657498925655, "grad_norm": 1.5054632425308228, "learning_rate": 2.851443662494615e-05, "loss": 1.5355, "step": 5313 }, { "epoch": 0.7612089958458673, "grad_norm": 1.3222661018371582, "learning_rate": 2.848199965978753e-05, "loss": 1.4042, "step": 5314 }, { "epoch": 0.7613522417991692, "grad_norm": 1.151539921760559, "learning_rate": 2.8449578090684593e-05, "loss": 1.3628, "step": 5315 }, { "epoch": 0.761495487752471, "grad_norm": 0.9641599655151367, "learning_rate": 2.841717192461688e-05, "loss": 1.5038, "step": 5316 }, { "epoch": 0.7616387337057728, "grad_norm": 1.2110258340835571, "learning_rate": 2.8384781168560693e-05, "loss": 1.2809, "step": 5317 }, { "epoch": 0.7617819796590747, "grad_norm": 1.1271201372146606, "learning_rate": 2.835240582948886e-05, "loss": 1.4996, "step": 5318 }, { "epoch": 0.7619252256123764, "grad_norm": 1.3189318180084229, "learning_rate": 2.8320045914371074e-05, "loss": 1.2239, "step": 5319 }, { "epoch": 0.7620684715656783, "grad_norm": 1.5562286376953125, "learning_rate": 2.828770143017363e-05, "loss": 1.4378, "step": 5320 }, { "epoch": 0.7622117175189801, "grad_norm": 1.1331384181976318, "learning_rate": 2.8255372383859435e-05, "loss": 1.5428, "step": 5321 }, { "epoch": 0.7623549634722819, "grad_norm": 0.9790300726890564, "learning_rate": 2.8223058782388134e-05, "loss": 1.4503, "step": 5322 }, { "epoch": 0.7624982094255838, "grad_norm": 1.2932196855545044, "learning_rate": 2.8190760632716127e-05, "loss": 1.4202, "step": 5323 }, { "epoch": 0.7626414553788855, "grad_norm": 1.269231915473938, "learning_rate": 2.8158477941796336e-05, "loss": 1.3142, "step": 5324 }, { "epoch": 0.7627847013321873, "grad_norm": 1.205325961112976, "learning_rate": 2.8126210716578427e-05, "loss": 1.3755, "step": 5325 }, { "epoch": 0.7629279472854892, "grad_norm": 1.125288724899292, "learning_rate": 2.809395896400876e-05, "loss": 1.2237, "step": 5326 }, { "epoch": 0.763071193238791, "grad_norm": 1.1800947189331055, "learning_rate": 2.8061722691030335e-05, "loss": 1.4234, "step": 5327 }, { "epoch": 0.7632144391920929, "grad_norm": 1.1227686405181885, "learning_rate": 2.8029501904582835e-05, "loss": 1.4261, "step": 5328 }, { "epoch": 0.7633576851453946, "grad_norm": 1.1338157653808594, "learning_rate": 2.799729661160253e-05, "loss": 1.492, "step": 5329 }, { "epoch": 0.7635009310986964, "grad_norm": 1.0628855228424072, "learning_rate": 2.7965106819022504e-05, "loss": 1.5028, "step": 5330 }, { "epoch": 0.7636441770519983, "grad_norm": 1.0829917192459106, "learning_rate": 2.7932932533772417e-05, "loss": 1.3138, "step": 5331 }, { "epoch": 0.7637874230053001, "grad_norm": 1.5637726783752441, "learning_rate": 2.790077376277854e-05, "loss": 1.5129, "step": 5332 }, { "epoch": 0.763930668958602, "grad_norm": 1.0665093660354614, "learning_rate": 2.786863051296391e-05, "loss": 1.4956, "step": 5333 }, { "epoch": 0.7640739149119037, "grad_norm": 1.0621932744979858, "learning_rate": 2.7836502791248142e-05, "loss": 1.4074, "step": 5334 }, { "epoch": 0.7642171608652055, "grad_norm": 1.0974544286727905, "learning_rate": 2.7804390604547557e-05, "loss": 1.5255, "step": 5335 }, { "epoch": 0.7643604068185074, "grad_norm": 1.1569782495498657, "learning_rate": 2.777229395977511e-05, "loss": 1.3889, "step": 5336 }, { "epoch": 0.7645036527718092, "grad_norm": 1.2209279537200928, "learning_rate": 2.774021286384042e-05, "loss": 1.4404, "step": 5337 }, { "epoch": 0.764646898725111, "grad_norm": 1.0105319023132324, "learning_rate": 2.770814732364978e-05, "loss": 1.3208, "step": 5338 }, { "epoch": 0.7647901446784129, "grad_norm": 1.213148593902588, "learning_rate": 2.7676097346106034e-05, "loss": 1.333, "step": 5339 }, { "epoch": 0.7649333906317146, "grad_norm": 0.9935119152069092, "learning_rate": 2.7644062938108774e-05, "loss": 1.5289, "step": 5340 }, { "epoch": 0.7650766365850165, "grad_norm": 1.215820550918579, "learning_rate": 2.761204410655428e-05, "loss": 1.4, "step": 5341 }, { "epoch": 0.7652198825383183, "grad_norm": 1.1359901428222656, "learning_rate": 2.7580040858335345e-05, "loss": 1.3123, "step": 5342 }, { "epoch": 0.7653631284916201, "grad_norm": 1.2058206796646118, "learning_rate": 2.7548053200341496e-05, "loss": 1.4832, "step": 5343 }, { "epoch": 0.765506374444922, "grad_norm": 1.121343731880188, "learning_rate": 2.7516081139458883e-05, "loss": 1.4175, "step": 5344 }, { "epoch": 0.7656496203982237, "grad_norm": 1.0725351572036743, "learning_rate": 2.7484124682570302e-05, "loss": 1.292, "step": 5345 }, { "epoch": 0.7657928663515255, "grad_norm": 1.0862213373184204, "learning_rate": 2.7452183836555212e-05, "loss": 1.3847, "step": 5346 }, { "epoch": 0.7659361123048274, "grad_norm": 1.0742701292037964, "learning_rate": 2.7420258608289607e-05, "loss": 1.2805, "step": 5347 }, { "epoch": 0.7660793582581292, "grad_norm": 1.1603034734725952, "learning_rate": 2.7388349004646285e-05, "loss": 1.4104, "step": 5348 }, { "epoch": 0.7662226042114311, "grad_norm": 1.2115422487258911, "learning_rate": 2.7356455032494598e-05, "loss": 1.4186, "step": 5349 }, { "epoch": 0.7663658501647328, "grad_norm": 1.1273058652877808, "learning_rate": 2.7324576698700453e-05, "loss": 1.4385, "step": 5350 }, { "epoch": 0.7665090961180346, "grad_norm": 1.1561442613601685, "learning_rate": 2.7292714010126484e-05, "loss": 1.2927, "step": 5351 }, { "epoch": 0.7666523420713365, "grad_norm": 1.1237396001815796, "learning_rate": 2.7260866973632025e-05, "loss": 1.4667, "step": 5352 }, { "epoch": 0.7667955880246383, "grad_norm": 1.262074589729309, "learning_rate": 2.722903559607286e-05, "loss": 1.3351, "step": 5353 }, { "epoch": 0.7669388339779402, "grad_norm": 1.2682061195373535, "learning_rate": 2.719721988430153e-05, "loss": 1.5798, "step": 5354 }, { "epoch": 0.7670820799312419, "grad_norm": 1.0273834466934204, "learning_rate": 2.7165419845167172e-05, "loss": 1.4675, "step": 5355 }, { "epoch": 0.7672253258845437, "grad_norm": 1.5034195184707642, "learning_rate": 2.713363548551554e-05, "loss": 1.3283, "step": 5356 }, { "epoch": 0.7673685718378456, "grad_norm": 1.0740734338760376, "learning_rate": 2.7101866812189057e-05, "loss": 1.418, "step": 5357 }, { "epoch": 0.7675118177911474, "grad_norm": 1.1853370666503906, "learning_rate": 2.7070113832026643e-05, "loss": 1.5137, "step": 5358 }, { "epoch": 0.7676550637444493, "grad_norm": 1.1415952444076538, "learning_rate": 2.7038376551864008e-05, "loss": 1.3673, "step": 5359 }, { "epoch": 0.7677983096977511, "grad_norm": 1.1010288000106812, "learning_rate": 2.7006654978533417e-05, "loss": 1.3161, "step": 5360 }, { "epoch": 0.7679415556510528, "grad_norm": 1.1175627708435059, "learning_rate": 2.697494911886368e-05, "loss": 1.368, "step": 5361 }, { "epoch": 0.7680848016043547, "grad_norm": 1.1210414171218872, "learning_rate": 2.6943258979680308e-05, "loss": 1.5063, "step": 5362 }, { "epoch": 0.7682280475576565, "grad_norm": 1.042063593864441, "learning_rate": 2.6911584567805393e-05, "loss": 1.3535, "step": 5363 }, { "epoch": 0.7683712935109583, "grad_norm": 1.2704012393951416, "learning_rate": 2.687992589005768e-05, "loss": 1.4667, "step": 5364 }, { "epoch": 0.7685145394642602, "grad_norm": 1.1659082174301147, "learning_rate": 2.6848282953252467e-05, "loss": 1.2773, "step": 5365 }, { "epoch": 0.7686577854175619, "grad_norm": 0.9882277846336365, "learning_rate": 2.6816655764201714e-05, "loss": 1.3807, "step": 5366 }, { "epoch": 0.7688010313708638, "grad_norm": 1.1100071668624878, "learning_rate": 2.6785044329714004e-05, "loss": 1.2747, "step": 5367 }, { "epoch": 0.7689442773241656, "grad_norm": 1.0267250537872314, "learning_rate": 2.6753448656594393e-05, "loss": 1.3433, "step": 5368 }, { "epoch": 0.7690875232774674, "grad_norm": 1.0061768293380737, "learning_rate": 2.672186875164475e-05, "loss": 1.575, "step": 5369 }, { "epoch": 0.7692307692307693, "grad_norm": 1.0760829448699951, "learning_rate": 2.6690304621663442e-05, "loss": 1.4173, "step": 5370 }, { "epoch": 0.769374015184071, "grad_norm": 1.434942603111267, "learning_rate": 2.6658756273445386e-05, "loss": 1.4873, "step": 5371 }, { "epoch": 0.7695172611373728, "grad_norm": 1.1667957305908203, "learning_rate": 2.6627223713782157e-05, "loss": 1.4942, "step": 5372 }, { "epoch": 0.7696605070906747, "grad_norm": 1.3787634372711182, "learning_rate": 2.659570694946203e-05, "loss": 1.3868, "step": 5373 }, { "epoch": 0.7698037530439765, "grad_norm": 1.3617966175079346, "learning_rate": 2.6564205987269696e-05, "loss": 1.3939, "step": 5374 }, { "epoch": 0.7699469989972784, "grad_norm": 1.1538478136062622, "learning_rate": 2.653272083398656e-05, "loss": 1.1931, "step": 5375 }, { "epoch": 0.7700902449505801, "grad_norm": 1.117051124572754, "learning_rate": 2.650125149639059e-05, "loss": 1.5006, "step": 5376 }, { "epoch": 0.7702334909038819, "grad_norm": 1.1586629152297974, "learning_rate": 2.646979798125636e-05, "loss": 1.5604, "step": 5377 }, { "epoch": 0.7703767368571838, "grad_norm": 1.1163069009780884, "learning_rate": 2.643836029535507e-05, "loss": 1.3406, "step": 5378 }, { "epoch": 0.7705199828104856, "grad_norm": 1.017738938331604, "learning_rate": 2.6406938445454376e-05, "loss": 1.4145, "step": 5379 }, { "epoch": 0.7706632287637875, "grad_norm": 1.021098256111145, "learning_rate": 2.6375532438318716e-05, "loss": 1.4276, "step": 5380 }, { "epoch": 0.7708064747170893, "grad_norm": 1.1048510074615479, "learning_rate": 2.634414228070904e-05, "loss": 1.4764, "step": 5381 }, { "epoch": 0.770949720670391, "grad_norm": 1.0907765626907349, "learning_rate": 2.631276797938279e-05, "loss": 1.4274, "step": 5382 }, { "epoch": 0.7710929666236929, "grad_norm": 1.073249340057373, "learning_rate": 2.6281409541094127e-05, "loss": 1.3103, "step": 5383 }, { "epoch": 0.7712362125769947, "grad_norm": 1.1724997758865356, "learning_rate": 2.6250066972593735e-05, "loss": 1.4505, "step": 5384 }, { "epoch": 0.7713794585302965, "grad_norm": 1.0559632778167725, "learning_rate": 2.6218740280628896e-05, "loss": 1.4616, "step": 5385 }, { "epoch": 0.7715227044835984, "grad_norm": 1.138774037361145, "learning_rate": 2.618742947194347e-05, "loss": 1.6708, "step": 5386 }, { "epoch": 0.7716659504369001, "grad_norm": 1.1856560707092285, "learning_rate": 2.615613455327791e-05, "loss": 1.3443, "step": 5387 }, { "epoch": 0.771809196390202, "grad_norm": 1.24358069896698, "learning_rate": 2.612485553136925e-05, "loss": 1.4347, "step": 5388 }, { "epoch": 0.7719524423435038, "grad_norm": 1.1890244483947754, "learning_rate": 2.609359241295104e-05, "loss": 1.2328, "step": 5389 }, { "epoch": 0.7720956882968056, "grad_norm": 1.0892583131790161, "learning_rate": 2.6062345204753457e-05, "loss": 1.5489, "step": 5390 }, { "epoch": 0.7722389342501075, "grad_norm": 1.1279690265655518, "learning_rate": 2.6031113913503337e-05, "loss": 1.3821, "step": 5391 }, { "epoch": 0.7723821802034092, "grad_norm": 1.0113961696624756, "learning_rate": 2.5999898545923908e-05, "loss": 1.4202, "step": 5392 }, { "epoch": 0.772525426156711, "grad_norm": 0.9813072085380554, "learning_rate": 2.596869910873512e-05, "loss": 1.3065, "step": 5393 }, { "epoch": 0.7726686721100129, "grad_norm": 1.272920846939087, "learning_rate": 2.5937515608653408e-05, "loss": 1.4707, "step": 5394 }, { "epoch": 0.7728119180633147, "grad_norm": 1.004937767982483, "learning_rate": 2.5906348052391828e-05, "loss": 1.4025, "step": 5395 }, { "epoch": 0.7729551640166166, "grad_norm": 1.1815632581710815, "learning_rate": 2.587519644666001e-05, "loss": 1.3736, "step": 5396 }, { "epoch": 0.7730984099699183, "grad_norm": 1.2436124086380005, "learning_rate": 2.5844060798164038e-05, "loss": 1.4798, "step": 5397 }, { "epoch": 0.7732416559232201, "grad_norm": 1.1138877868652344, "learning_rate": 2.5812941113606726e-05, "loss": 1.4731, "step": 5398 }, { "epoch": 0.773384901876522, "grad_norm": 1.0761055946350098, "learning_rate": 2.578183739968738e-05, "loss": 1.3464, "step": 5399 }, { "epoch": 0.7735281478298238, "grad_norm": 1.0789021253585815, "learning_rate": 2.575074966310179e-05, "loss": 1.5199, "step": 5400 }, { "epoch": 0.7736713937831257, "grad_norm": 1.1697269678115845, "learning_rate": 2.5719677910542394e-05, "loss": 1.2341, "step": 5401 }, { "epoch": 0.7738146397364275, "grad_norm": 1.1344894170761108, "learning_rate": 2.568862214869825e-05, "loss": 1.4434, "step": 5402 }, { "epoch": 0.7739578856897292, "grad_norm": 1.090374231338501, "learning_rate": 2.5657582384254796e-05, "loss": 1.4797, "step": 5403 }, { "epoch": 0.7741011316430311, "grad_norm": 1.1563138961791992, "learning_rate": 2.562655862389418e-05, "loss": 1.5872, "step": 5404 }, { "epoch": 0.7742443775963329, "grad_norm": 1.1082887649536133, "learning_rate": 2.5595550874295027e-05, "loss": 1.4169, "step": 5405 }, { "epoch": 0.7743876235496348, "grad_norm": 1.0537232160568237, "learning_rate": 2.556455914213255e-05, "loss": 1.0906, "step": 5406 }, { "epoch": 0.7745308695029366, "grad_norm": 1.0760984420776367, "learning_rate": 2.5533583434078523e-05, "loss": 1.2199, "step": 5407 }, { "epoch": 0.7746741154562383, "grad_norm": 1.4577350616455078, "learning_rate": 2.550262375680117e-05, "loss": 1.3269, "step": 5408 }, { "epoch": 0.7748173614095402, "grad_norm": 1.0272389650344849, "learning_rate": 2.5471680116965425e-05, "loss": 1.2902, "step": 5409 }, { "epoch": 0.774960607362842, "grad_norm": 1.159835696220398, "learning_rate": 2.5440752521232692e-05, "loss": 1.4554, "step": 5410 }, { "epoch": 0.7751038533161438, "grad_norm": 1.2322609424591064, "learning_rate": 2.5409840976260855e-05, "loss": 1.6017, "step": 5411 }, { "epoch": 0.7752470992694457, "grad_norm": 1.060215711593628, "learning_rate": 2.5378945488704443e-05, "loss": 1.4242, "step": 5412 }, { "epoch": 0.7753903452227474, "grad_norm": 1.0659550428390503, "learning_rate": 2.5348066065214482e-05, "loss": 1.3048, "step": 5413 }, { "epoch": 0.7755335911760493, "grad_norm": 1.2544704675674438, "learning_rate": 2.5317202712438535e-05, "loss": 1.3722, "step": 5414 }, { "epoch": 0.7756768371293511, "grad_norm": 1.2200325727462769, "learning_rate": 2.5286355437020746e-05, "loss": 1.4668, "step": 5415 }, { "epoch": 0.7758200830826529, "grad_norm": 1.1702799797058105, "learning_rate": 2.5255524245601748e-05, "loss": 1.4443, "step": 5416 }, { "epoch": 0.7759633290359548, "grad_norm": 1.3354469537734985, "learning_rate": 2.5224709144818782e-05, "loss": 1.5694, "step": 5417 }, { "epoch": 0.7761065749892565, "grad_norm": 1.0452967882156372, "learning_rate": 2.51939101413055e-05, "loss": 1.3758, "step": 5418 }, { "epoch": 0.7762498209425583, "grad_norm": 0.9769402742385864, "learning_rate": 2.5163127241692165e-05, "loss": 1.3558, "step": 5419 }, { "epoch": 0.7763930668958602, "grad_norm": 1.2091920375823975, "learning_rate": 2.5132360452605673e-05, "loss": 1.2855, "step": 5420 }, { "epoch": 0.776536312849162, "grad_norm": 1.1534196138381958, "learning_rate": 2.5101609780669266e-05, "loss": 1.4432, "step": 5421 }, { "epoch": 0.7766795588024639, "grad_norm": 1.0705344676971436, "learning_rate": 2.507087523250282e-05, "loss": 1.3521, "step": 5422 }, { "epoch": 0.7768228047557656, "grad_norm": 1.0681270360946655, "learning_rate": 2.5040156814722727e-05, "loss": 1.3922, "step": 5423 }, { "epoch": 0.7769660507090674, "grad_norm": 1.2822209596633911, "learning_rate": 2.50094545339419e-05, "loss": 1.2994, "step": 5424 }, { "epoch": 0.7771092966623693, "grad_norm": 1.1417210102081299, "learning_rate": 2.4978768396769824e-05, "loss": 1.4651, "step": 5425 }, { "epoch": 0.7772525426156711, "grad_norm": 0.9796889424324036, "learning_rate": 2.494809840981236e-05, "loss": 1.3796, "step": 5426 }, { "epoch": 0.777395788568973, "grad_norm": 1.0135589838027954, "learning_rate": 2.491744457967209e-05, "loss": 1.4104, "step": 5427 }, { "epoch": 0.7775390345222748, "grad_norm": 1.2210229635238647, "learning_rate": 2.4886806912948035e-05, "loss": 1.4653, "step": 5428 }, { "epoch": 0.7776822804755765, "grad_norm": 1.1993392705917358, "learning_rate": 2.4856185416235656e-05, "loss": 1.4144, "step": 5429 }, { "epoch": 0.7778255264288784, "grad_norm": 1.2444665431976318, "learning_rate": 2.4825580096126998e-05, "loss": 1.5123, "step": 5430 }, { "epoch": 0.7779687723821802, "grad_norm": 1.0430423021316528, "learning_rate": 2.479499095921074e-05, "loss": 1.3916, "step": 5431 }, { "epoch": 0.778112018335482, "grad_norm": 1.2109394073486328, "learning_rate": 2.4764418012071855e-05, "loss": 1.3192, "step": 5432 }, { "epoch": 0.7782552642887839, "grad_norm": 1.0087724924087524, "learning_rate": 2.473386126129198e-05, "loss": 1.3951, "step": 5433 }, { "epoch": 0.7783985102420856, "grad_norm": 1.2584174871444702, "learning_rate": 2.470332071344923e-05, "loss": 1.5608, "step": 5434 }, { "epoch": 0.7785417561953875, "grad_norm": 1.0493251085281372, "learning_rate": 2.4672796375118225e-05, "loss": 1.2775, "step": 5435 }, { "epoch": 0.7786850021486893, "grad_norm": 1.1680946350097656, "learning_rate": 2.4642288252870106e-05, "loss": 1.4167, "step": 5436 }, { "epoch": 0.7788282481019911, "grad_norm": 1.1928473711013794, "learning_rate": 2.461179635327251e-05, "loss": 1.3766, "step": 5437 }, { "epoch": 0.778971494055293, "grad_norm": 1.3128305673599243, "learning_rate": 2.458132068288962e-05, "loss": 1.2888, "step": 5438 }, { "epoch": 0.7791147400085947, "grad_norm": 1.0615981817245483, "learning_rate": 2.4550861248282032e-05, "loss": 1.4929, "step": 5439 }, { "epoch": 0.7792579859618965, "grad_norm": 1.122450590133667, "learning_rate": 2.4520418056006912e-05, "loss": 1.3693, "step": 5440 }, { "epoch": 0.7794012319151984, "grad_norm": 1.0633469820022583, "learning_rate": 2.4489991112618017e-05, "loss": 1.3652, "step": 5441 }, { "epoch": 0.7795444778685002, "grad_norm": 1.078426480293274, "learning_rate": 2.4459580424665417e-05, "loss": 1.4458, "step": 5442 }, { "epoch": 0.7796877238218021, "grad_norm": 1.1719071865081787, "learning_rate": 2.4429185998695825e-05, "loss": 1.3698, "step": 5443 }, { "epoch": 0.7798309697751038, "grad_norm": 0.9753327965736389, "learning_rate": 2.43988078412524e-05, "loss": 1.4183, "step": 5444 }, { "epoch": 0.7799742157284056, "grad_norm": 1.174289345741272, "learning_rate": 2.4368445958874807e-05, "loss": 1.5046, "step": 5445 }, { "epoch": 0.7801174616817075, "grad_norm": 1.263795018196106, "learning_rate": 2.4338100358099235e-05, "loss": 1.3085, "step": 5446 }, { "epoch": 0.7802607076350093, "grad_norm": 1.1214165687561035, "learning_rate": 2.430777104545826e-05, "loss": 1.4078, "step": 5447 }, { "epoch": 0.7804039535883112, "grad_norm": 1.4973238706588745, "learning_rate": 2.4277458027481104e-05, "loss": 1.3537, "step": 5448 }, { "epoch": 0.780547199541613, "grad_norm": 1.1330205202102661, "learning_rate": 2.4247161310693434e-05, "loss": 1.4106, "step": 5449 }, { "epoch": 0.7806904454949147, "grad_norm": 0.9895519614219666, "learning_rate": 2.4216880901617313e-05, "loss": 1.2789, "step": 5450 }, { "epoch": 0.7808336914482166, "grad_norm": 1.1539469957351685, "learning_rate": 2.4186616806771357e-05, "loss": 1.4034, "step": 5451 }, { "epoch": 0.7809769374015184, "grad_norm": 1.3670371770858765, "learning_rate": 2.415636903267078e-05, "loss": 1.4371, "step": 5452 }, { "epoch": 0.7811201833548203, "grad_norm": 1.038837194442749, "learning_rate": 2.412613758582707e-05, "loss": 1.3046, "step": 5453 }, { "epoch": 0.7812634293081221, "grad_norm": 0.950283944606781, "learning_rate": 2.4095922472748367e-05, "loss": 1.6969, "step": 5454 }, { "epoch": 0.7814066752614238, "grad_norm": 1.1155037879943848, "learning_rate": 2.4065723699939203e-05, "loss": 1.6149, "step": 5455 }, { "epoch": 0.7815499212147257, "grad_norm": 1.0110894441604614, "learning_rate": 2.4035541273900663e-05, "loss": 1.3217, "step": 5456 }, { "epoch": 0.7816931671680275, "grad_norm": 1.0786069631576538, "learning_rate": 2.4005375201130274e-05, "loss": 1.3999, "step": 5457 }, { "epoch": 0.7818364131213293, "grad_norm": 1.023313283920288, "learning_rate": 2.3975225488121976e-05, "loss": 1.3233, "step": 5458 }, { "epoch": 0.7819796590746312, "grad_norm": 1.130247712135315, "learning_rate": 2.3945092141366343e-05, "loss": 1.4929, "step": 5459 }, { "epoch": 0.7821229050279329, "grad_norm": 1.0118794441223145, "learning_rate": 2.3914975167350328e-05, "loss": 1.4408, "step": 5460 }, { "epoch": 0.7822661509812348, "grad_norm": 1.20806086063385, "learning_rate": 2.3884874572557316e-05, "loss": 1.3679, "step": 5461 }, { "epoch": 0.7824093969345366, "grad_norm": 1.1729750633239746, "learning_rate": 2.3854790363467262e-05, "loss": 1.1888, "step": 5462 }, { "epoch": 0.7825526428878384, "grad_norm": 1.148452877998352, "learning_rate": 2.3824722546556533e-05, "loss": 1.1926, "step": 5463 }, { "epoch": 0.7826958888411403, "grad_norm": 1.1590125560760498, "learning_rate": 2.3794671128297995e-05, "loss": 1.3049, "step": 5464 }, { "epoch": 0.782839134794442, "grad_norm": 1.3871899843215942, "learning_rate": 2.3764636115160978e-05, "loss": 1.3246, "step": 5465 }, { "epoch": 0.7829823807477438, "grad_norm": 1.0718415975570679, "learning_rate": 2.3734617513611266e-05, "loss": 1.2754, "step": 5466 }, { "epoch": 0.7831256267010457, "grad_norm": 1.270702600479126, "learning_rate": 2.3704615330111156e-05, "loss": 1.576, "step": 5467 }, { "epoch": 0.7832688726543475, "grad_norm": 1.151659369468689, "learning_rate": 2.3674629571119332e-05, "loss": 1.3378, "step": 5468 }, { "epoch": 0.7834121186076494, "grad_norm": 1.2349132299423218, "learning_rate": 2.3644660243090966e-05, "loss": 1.2462, "step": 5469 }, { "epoch": 0.7835553645609512, "grad_norm": 1.009192943572998, "learning_rate": 2.3614707352477804e-05, "loss": 1.3414, "step": 5470 }, { "epoch": 0.7836986105142529, "grad_norm": 1.0657734870910645, "learning_rate": 2.358477090572787e-05, "loss": 1.3535, "step": 5471 }, { "epoch": 0.7838418564675548, "grad_norm": 1.024234414100647, "learning_rate": 2.3554850909285786e-05, "loss": 1.1659, "step": 5472 }, { "epoch": 0.7839851024208566, "grad_norm": 1.292818546295166, "learning_rate": 2.3524947369592575e-05, "loss": 1.4461, "step": 5473 }, { "epoch": 0.7841283483741585, "grad_norm": 1.049169659614563, "learning_rate": 2.3495060293085735e-05, "loss": 1.3259, "step": 5474 }, { "epoch": 0.7842715943274603, "grad_norm": 1.074534296989441, "learning_rate": 2.346518968619924e-05, "loss": 1.3282, "step": 5475 }, { "epoch": 0.784414840280762, "grad_norm": 1.089288592338562, "learning_rate": 2.34353355553634e-05, "loss": 1.5309, "step": 5476 }, { "epoch": 0.7845580862340639, "grad_norm": 1.935948133468628, "learning_rate": 2.340549790700517e-05, "loss": 1.2538, "step": 5477 }, { "epoch": 0.7847013321873657, "grad_norm": 1.1959465742111206, "learning_rate": 2.337567674754785e-05, "loss": 1.45, "step": 5478 }, { "epoch": 0.7848445781406675, "grad_norm": 1.268339991569519, "learning_rate": 2.3345872083411135e-05, "loss": 1.3653, "step": 5479 }, { "epoch": 0.7849878240939694, "grad_norm": 1.0768113136291504, "learning_rate": 2.3316083921011235e-05, "loss": 1.2653, "step": 5480 }, { "epoch": 0.7851310700472711, "grad_norm": 1.1144527196884155, "learning_rate": 2.328631226676089e-05, "loss": 1.2729, "step": 5481 }, { "epoch": 0.785274316000573, "grad_norm": 1.3518757820129395, "learning_rate": 2.3256557127069124e-05, "loss": 1.5266, "step": 5482 }, { "epoch": 0.7854175619538748, "grad_norm": 1.1734957695007324, "learning_rate": 2.3226818508341496e-05, "loss": 1.45, "step": 5483 }, { "epoch": 0.7855608079071766, "grad_norm": 1.0362545251846313, "learning_rate": 2.3197096416980013e-05, "loss": 1.2805, "step": 5484 }, { "epoch": 0.7857040538604785, "grad_norm": 1.1859338283538818, "learning_rate": 2.3167390859383088e-05, "loss": 1.4235, "step": 5485 }, { "epoch": 0.7858472998137802, "grad_norm": 0.943030059337616, "learning_rate": 2.3137701841945627e-05, "loss": 1.3226, "step": 5486 }, { "epoch": 0.785990545767082, "grad_norm": 1.1166712045669556, "learning_rate": 2.3108029371058848e-05, "loss": 1.4324, "step": 5487 }, { "epoch": 0.7861337917203839, "grad_norm": 1.2404212951660156, "learning_rate": 2.30783734531106e-05, "loss": 1.3031, "step": 5488 }, { "epoch": 0.7862770376736857, "grad_norm": 1.4043556451797485, "learning_rate": 2.304873409448506e-05, "loss": 1.4199, "step": 5489 }, { "epoch": 0.7864202836269876, "grad_norm": 1.0839459896087646, "learning_rate": 2.3019111301562802e-05, "loss": 1.3166, "step": 5490 }, { "epoch": 0.7865635295802894, "grad_norm": 0.984799325466156, "learning_rate": 2.2989505080720886e-05, "loss": 1.2436, "step": 5491 }, { "epoch": 0.7867067755335911, "grad_norm": 1.297173261642456, "learning_rate": 2.2959915438332823e-05, "loss": 1.2736, "step": 5492 }, { "epoch": 0.786850021486893, "grad_norm": 1.1529662609100342, "learning_rate": 2.293034238076851e-05, "loss": 1.449, "step": 5493 }, { "epoch": 0.7869932674401948, "grad_norm": 1.00979745388031, "learning_rate": 2.2900785914394308e-05, "loss": 1.437, "step": 5494 }, { "epoch": 0.7871365133934967, "grad_norm": 1.2558783292770386, "learning_rate": 2.2871246045572993e-05, "loss": 1.4626, "step": 5495 }, { "epoch": 0.7872797593467985, "grad_norm": 1.2355085611343384, "learning_rate": 2.2841722780663788e-05, "loss": 1.4002, "step": 5496 }, { "epoch": 0.7874230053001002, "grad_norm": 1.0001651048660278, "learning_rate": 2.2812216126022245e-05, "loss": 1.4953, "step": 5497 }, { "epoch": 0.7875662512534021, "grad_norm": 0.9824298620223999, "learning_rate": 2.2782726088000495e-05, "loss": 1.3526, "step": 5498 }, { "epoch": 0.7877094972067039, "grad_norm": 1.1729116439819336, "learning_rate": 2.2753252672947022e-05, "loss": 1.5695, "step": 5499 }, { "epoch": 0.7878527431600058, "grad_norm": 1.2363277673721313, "learning_rate": 2.2723795887206657e-05, "loss": 1.5002, "step": 5500 }, { "epoch": 0.7879959891133076, "grad_norm": 0.9645327925682068, "learning_rate": 2.269435573712071e-05, "loss": 1.4041, "step": 5501 }, { "epoch": 0.7881392350666093, "grad_norm": 1.184617519378662, "learning_rate": 2.2664932229027024e-05, "loss": 1.5081, "step": 5502 }, { "epoch": 0.7882824810199112, "grad_norm": 1.2025713920593262, "learning_rate": 2.2635525369259648e-05, "loss": 1.4568, "step": 5503 }, { "epoch": 0.788425726973213, "grad_norm": 1.0535212755203247, "learning_rate": 2.260613516414919e-05, "loss": 1.2905, "step": 5504 }, { "epoch": 0.7885689729265148, "grad_norm": 1.1532846689224243, "learning_rate": 2.2576761620022626e-05, "loss": 1.3319, "step": 5505 }, { "epoch": 0.7887122188798167, "grad_norm": 1.0475072860717773, "learning_rate": 2.254740474320336e-05, "loss": 1.2129, "step": 5506 }, { "epoch": 0.7888554648331184, "grad_norm": 1.084766149520874, "learning_rate": 2.251806454001123e-05, "loss": 1.5613, "step": 5507 }, { "epoch": 0.7889987107864203, "grad_norm": 1.2487167119979858, "learning_rate": 2.248874101676236e-05, "loss": 1.3728, "step": 5508 }, { "epoch": 0.7891419567397221, "grad_norm": 1.4352549314498901, "learning_rate": 2.245943417976948e-05, "loss": 1.3487, "step": 5509 }, { "epoch": 0.7892852026930239, "grad_norm": 1.1768232583999634, "learning_rate": 2.2430144035341617e-05, "loss": 1.3665, "step": 5510 }, { "epoch": 0.7894284486463258, "grad_norm": 1.257313847541809, "learning_rate": 2.240087058978415e-05, "loss": 1.5307, "step": 5511 }, { "epoch": 0.7895716945996276, "grad_norm": 1.1198176145553589, "learning_rate": 2.2371613849398975e-05, "loss": 1.2624, "step": 5512 }, { "epoch": 0.7897149405529293, "grad_norm": 0.9662644267082214, "learning_rate": 2.234237382048433e-05, "loss": 1.3375, "step": 5513 }, { "epoch": 0.7898581865062312, "grad_norm": 1.083932876586914, "learning_rate": 2.231315050933488e-05, "loss": 1.5142, "step": 5514 }, { "epoch": 0.790001432459533, "grad_norm": 1.2198902368545532, "learning_rate": 2.228394392224167e-05, "loss": 1.3904, "step": 5515 }, { "epoch": 0.7901446784128349, "grad_norm": 1.0237241983413696, "learning_rate": 2.2254754065492157e-05, "loss": 1.4946, "step": 5516 }, { "epoch": 0.7902879243661367, "grad_norm": 1.0335474014282227, "learning_rate": 2.2225580945370228e-05, "loss": 1.4157, "step": 5517 }, { "epoch": 0.7904311703194384, "grad_norm": 1.2380541563034058, "learning_rate": 2.2196424568156073e-05, "loss": 1.3634, "step": 5518 }, { "epoch": 0.7905744162727403, "grad_norm": 1.2220160961151123, "learning_rate": 2.2167284940126344e-05, "loss": 1.5599, "step": 5519 }, { "epoch": 0.7907176622260421, "grad_norm": 0.9633556008338928, "learning_rate": 2.2138162067554158e-05, "loss": 1.3268, "step": 5520 }, { "epoch": 0.790860908179344, "grad_norm": 1.1982711553573608, "learning_rate": 2.210905595670887e-05, "loss": 1.4092, "step": 5521 }, { "epoch": 0.7910041541326458, "grad_norm": 0.9696144461631775, "learning_rate": 2.207996661385634e-05, "loss": 1.3695, "step": 5522 }, { "epoch": 0.7911474000859475, "grad_norm": 1.1793391704559326, "learning_rate": 2.2050894045258762e-05, "loss": 1.414, "step": 5523 }, { "epoch": 0.7912906460392494, "grad_norm": 1.1220320463180542, "learning_rate": 2.2021838257174765e-05, "loss": 1.4761, "step": 5524 }, { "epoch": 0.7914338919925512, "grad_norm": 1.2628411054611206, "learning_rate": 2.1992799255859364e-05, "loss": 1.4401, "step": 5525 }, { "epoch": 0.791577137945853, "grad_norm": 1.0343470573425293, "learning_rate": 2.196377704756385e-05, "loss": 1.4623, "step": 5526 }, { "epoch": 0.7917203838991549, "grad_norm": 1.1845364570617676, "learning_rate": 2.1934771638536054e-05, "loss": 1.2764, "step": 5527 }, { "epoch": 0.7918636298524566, "grad_norm": 1.0869394540786743, "learning_rate": 2.1905783035020157e-05, "loss": 1.548, "step": 5528 }, { "epoch": 0.7920068758057585, "grad_norm": 1.023400068283081, "learning_rate": 2.18768112432566e-05, "loss": 1.4783, "step": 5529 }, { "epoch": 0.7921501217590603, "grad_norm": 1.1633423566818237, "learning_rate": 2.1847856269482303e-05, "loss": 1.3347, "step": 5530 }, { "epoch": 0.7922933677123621, "grad_norm": 1.1528067588806152, "learning_rate": 2.1818918119930644e-05, "loss": 1.4161, "step": 5531 }, { "epoch": 0.792436613665664, "grad_norm": 1.2658747434616089, "learning_rate": 2.1789996800831215e-05, "loss": 1.455, "step": 5532 }, { "epoch": 0.7925798596189657, "grad_norm": 0.9751430749893188, "learning_rate": 2.1761092318410072e-05, "loss": 1.4101, "step": 5533 }, { "epoch": 0.7927231055722676, "grad_norm": 1.0840858221054077, "learning_rate": 2.1732204678889632e-05, "loss": 1.3339, "step": 5534 }, { "epoch": 0.7928663515255694, "grad_norm": 1.1222376823425293, "learning_rate": 2.1703333888488708e-05, "loss": 1.4052, "step": 5535 }, { "epoch": 0.7930095974788712, "grad_norm": 1.1830703020095825, "learning_rate": 2.1674479953422477e-05, "loss": 1.2824, "step": 5536 }, { "epoch": 0.7931528434321731, "grad_norm": 1.0536298751831055, "learning_rate": 2.1645642879902406e-05, "loss": 1.3529, "step": 5537 }, { "epoch": 0.7932960893854749, "grad_norm": 1.0150973796844482, "learning_rate": 2.1616822674136473e-05, "loss": 1.2961, "step": 5538 }, { "epoch": 0.7934393353387766, "grad_norm": 1.3773647546768188, "learning_rate": 2.1588019342328968e-05, "loss": 1.5177, "step": 5539 }, { "epoch": 0.7935825812920785, "grad_norm": 1.0160630941390991, "learning_rate": 2.155923289068048e-05, "loss": 1.5568, "step": 5540 }, { "epoch": 0.7937258272453803, "grad_norm": 1.009186863899231, "learning_rate": 2.153046332538804e-05, "loss": 1.2756, "step": 5541 }, { "epoch": 0.7938690731986822, "grad_norm": 1.1777245998382568, "learning_rate": 2.1501710652645034e-05, "loss": 1.0728, "step": 5542 }, { "epoch": 0.794012319151984, "grad_norm": 0.9974313974380493, "learning_rate": 2.1472974878641183e-05, "loss": 1.3715, "step": 5543 }, { "epoch": 0.7941555651052857, "grad_norm": 1.2147984504699707, "learning_rate": 2.1444256009562602e-05, "loss": 1.5052, "step": 5544 }, { "epoch": 0.7942988110585876, "grad_norm": 1.1711236238479614, "learning_rate": 2.1415554051591746e-05, "loss": 1.3083, "step": 5545 }, { "epoch": 0.7944420570118894, "grad_norm": 1.1466517448425293, "learning_rate": 2.1386869010907472e-05, "loss": 1.3658, "step": 5546 }, { "epoch": 0.7945853029651913, "grad_norm": 1.2629218101501465, "learning_rate": 2.1358200893684898e-05, "loss": 1.3879, "step": 5547 }, { "epoch": 0.7947285489184931, "grad_norm": 0.9764704704284668, "learning_rate": 2.1329549706095562e-05, "loss": 1.5301, "step": 5548 }, { "epoch": 0.7948717948717948, "grad_norm": 1.107223629951477, "learning_rate": 2.1300915454307435e-05, "loss": 1.3698, "step": 5549 }, { "epoch": 0.7950150408250967, "grad_norm": 1.0651649236679077, "learning_rate": 2.1272298144484682e-05, "loss": 1.5032, "step": 5550 }, { "epoch": 0.7951582867783985, "grad_norm": 1.0884113311767578, "learning_rate": 2.124369778278794e-05, "loss": 1.5723, "step": 5551 }, { "epoch": 0.7953015327317003, "grad_norm": 1.1962863206863403, "learning_rate": 2.1215114375374144e-05, "loss": 1.437, "step": 5552 }, { "epoch": 0.7954447786850022, "grad_norm": 1.154276967048645, "learning_rate": 2.1186547928396594e-05, "loss": 1.3435, "step": 5553 }, { "epoch": 0.7955880246383039, "grad_norm": 1.0191293954849243, "learning_rate": 2.115799844800498e-05, "loss": 1.4038, "step": 5554 }, { "epoch": 0.7957312705916058, "grad_norm": 1.0362082719802856, "learning_rate": 2.1129465940345206e-05, "loss": 1.3995, "step": 5555 }, { "epoch": 0.7958745165449076, "grad_norm": 0.9984225630760193, "learning_rate": 2.1100950411559706e-05, "loss": 1.376, "step": 5556 }, { "epoch": 0.7960177624982094, "grad_norm": 0.9830582141876221, "learning_rate": 2.1072451867787146e-05, "loss": 1.3807, "step": 5557 }, { "epoch": 0.7961610084515113, "grad_norm": 1.4728686809539795, "learning_rate": 2.104397031516253e-05, "loss": 1.4196, "step": 5558 }, { "epoch": 0.7963042544048131, "grad_norm": 1.1464824676513672, "learning_rate": 2.1015505759817223e-05, "loss": 1.4044, "step": 5559 }, { "epoch": 0.7964475003581148, "grad_norm": 1.0864269733428955, "learning_rate": 2.098705820787901e-05, "loss": 1.3833, "step": 5560 }, { "epoch": 0.7965907463114167, "grad_norm": 1.2495574951171875, "learning_rate": 2.0958627665471865e-05, "loss": 1.5435, "step": 5561 }, { "epoch": 0.7967339922647185, "grad_norm": 1.1132633686065674, "learning_rate": 2.093021413871622e-05, "loss": 1.4766, "step": 5562 }, { "epoch": 0.7968772382180204, "grad_norm": 1.2485014200210571, "learning_rate": 2.0901817633728804e-05, "loss": 1.2584, "step": 5563 }, { "epoch": 0.7970204841713222, "grad_norm": 1.3476371765136719, "learning_rate": 2.087343815662267e-05, "loss": 1.5156, "step": 5564 }, { "epoch": 0.7971637301246239, "grad_norm": 1.319467544555664, "learning_rate": 2.0845075713507222e-05, "loss": 1.3051, "step": 5565 }, { "epoch": 0.7973069760779258, "grad_norm": 1.1854621171951294, "learning_rate": 2.0816730310488186e-05, "loss": 1.529, "step": 5566 }, { "epoch": 0.7974502220312276, "grad_norm": 1.1623022556304932, "learning_rate": 2.0788401953667668e-05, "loss": 1.1458, "step": 5567 }, { "epoch": 0.7975934679845295, "grad_norm": 0.9959256649017334, "learning_rate": 2.0760090649144005e-05, "loss": 1.4789, "step": 5568 }, { "epoch": 0.7977367139378313, "grad_norm": 1.1312322616577148, "learning_rate": 2.0731796403011906e-05, "loss": 1.4353, "step": 5569 }, { "epoch": 0.797879959891133, "grad_norm": 1.158463716506958, "learning_rate": 2.070351922136251e-05, "loss": 1.5469, "step": 5570 }, { "epoch": 0.7980232058444349, "grad_norm": 1.077291488647461, "learning_rate": 2.0675259110283117e-05, "loss": 1.5523, "step": 5571 }, { "epoch": 0.7981664517977367, "grad_norm": 1.0715768337249756, "learning_rate": 2.0647016075857463e-05, "loss": 1.4129, "step": 5572 }, { "epoch": 0.7983096977510385, "grad_norm": 0.9449760913848877, "learning_rate": 2.0618790124165556e-05, "loss": 1.3355, "step": 5573 }, { "epoch": 0.7984529437043404, "grad_norm": 0.9706355929374695, "learning_rate": 2.059058126128376e-05, "loss": 1.5075, "step": 5574 }, { "epoch": 0.7985961896576421, "grad_norm": 1.1908096075057983, "learning_rate": 2.0562389493284763e-05, "loss": 1.3276, "step": 5575 }, { "epoch": 0.798739435610944, "grad_norm": 1.3943650722503662, "learning_rate": 2.0534214826237484e-05, "loss": 1.3936, "step": 5576 }, { "epoch": 0.7988826815642458, "grad_norm": 1.127379298210144, "learning_rate": 2.0506057266207313e-05, "loss": 1.3068, "step": 5577 }, { "epoch": 0.7990259275175476, "grad_norm": 1.2279309034347534, "learning_rate": 2.047791681925586e-05, "loss": 1.2535, "step": 5578 }, { "epoch": 0.7991691734708495, "grad_norm": 1.0406328439712524, "learning_rate": 2.0449793491441028e-05, "loss": 1.3955, "step": 5579 }, { "epoch": 0.7993124194241513, "grad_norm": 1.0676759481430054, "learning_rate": 2.0421687288817058e-05, "loss": 1.3891, "step": 5580 }, { "epoch": 0.799455665377453, "grad_norm": 1.1169242858886719, "learning_rate": 2.0393598217434616e-05, "loss": 1.3966, "step": 5581 }, { "epoch": 0.7995989113307549, "grad_norm": 1.1342101097106934, "learning_rate": 2.0365526283340508e-05, "loss": 1.3439, "step": 5582 }, { "epoch": 0.7997421572840567, "grad_norm": 1.111365556716919, "learning_rate": 2.033747149257793e-05, "loss": 1.5338, "step": 5583 }, { "epoch": 0.7998854032373586, "grad_norm": 1.206855058670044, "learning_rate": 2.030943385118641e-05, "loss": 1.3817, "step": 5584 }, { "epoch": 0.8000286491906604, "grad_norm": 0.9659613966941833, "learning_rate": 2.028141336520174e-05, "loss": 1.3513, "step": 5585 }, { "epoch": 0.8001718951439621, "grad_norm": 1.142003059387207, "learning_rate": 2.0253410040656073e-05, "loss": 1.3998, "step": 5586 }, { "epoch": 0.800315141097264, "grad_norm": 1.1976842880249023, "learning_rate": 2.0225423883577754e-05, "loss": 1.3813, "step": 5587 }, { "epoch": 0.8004583870505658, "grad_norm": 1.3971842527389526, "learning_rate": 2.0197454899991573e-05, "loss": 1.3268, "step": 5588 }, { "epoch": 0.8006016330038677, "grad_norm": 1.2353951930999756, "learning_rate": 2.0169503095918586e-05, "loss": 1.2486, "step": 5589 }, { "epoch": 0.8007448789571695, "grad_norm": 1.0223701000213623, "learning_rate": 2.014156847737605e-05, "loss": 1.4485, "step": 5590 }, { "epoch": 0.8008881249104712, "grad_norm": 1.1558271646499634, "learning_rate": 2.0113651050377623e-05, "loss": 1.5249, "step": 5591 }, { "epoch": 0.8010313708637731, "grad_norm": 1.0448578596115112, "learning_rate": 2.0085750820933257e-05, "loss": 1.3629, "step": 5592 }, { "epoch": 0.8011746168170749, "grad_norm": 1.015825629234314, "learning_rate": 2.005786779504917e-05, "loss": 1.4305, "step": 5593 }, { "epoch": 0.8013178627703768, "grad_norm": 1.1189409494400024, "learning_rate": 2.0030001978727874e-05, "loss": 1.3261, "step": 5594 }, { "epoch": 0.8014611087236786, "grad_norm": 1.1034654378890991, "learning_rate": 2.0002153377968213e-05, "loss": 1.3288, "step": 5595 }, { "epoch": 0.8016043546769803, "grad_norm": 1.2844926118850708, "learning_rate": 1.997432199876531e-05, "loss": 1.4709, "step": 5596 }, { "epoch": 0.8017476006302822, "grad_norm": 1.0648685693740845, "learning_rate": 1.994650784711053e-05, "loss": 1.2189, "step": 5597 }, { "epoch": 0.801890846583584, "grad_norm": 1.1838964223861694, "learning_rate": 1.9918710928991567e-05, "loss": 1.4195, "step": 5598 }, { "epoch": 0.8020340925368858, "grad_norm": 1.000622034072876, "learning_rate": 1.9890931250392498e-05, "loss": 1.4605, "step": 5599 }, { "epoch": 0.8021773384901877, "grad_norm": 1.2030577659606934, "learning_rate": 1.9863168817293497e-05, "loss": 1.35, "step": 5600 }, { "epoch": 0.8023205844434895, "grad_norm": 1.0764532089233398, "learning_rate": 1.983542363567118e-05, "loss": 1.5422, "step": 5601 }, { "epoch": 0.8024638303967913, "grad_norm": 1.1412255764007568, "learning_rate": 1.9807695711498385e-05, "loss": 1.5684, "step": 5602 }, { "epoch": 0.8026070763500931, "grad_norm": 1.1570570468902588, "learning_rate": 1.9779985050744256e-05, "loss": 1.5541, "step": 5603 }, { "epoch": 0.8027503223033949, "grad_norm": 1.0986528396606445, "learning_rate": 1.9752291659374234e-05, "loss": 1.2834, "step": 5604 }, { "epoch": 0.8028935682566968, "grad_norm": 1.1315523386001587, "learning_rate": 1.9724615543349943e-05, "loss": 1.4909, "step": 5605 }, { "epoch": 0.8030368142099986, "grad_norm": 1.2034919261932373, "learning_rate": 1.9696956708629445e-05, "loss": 1.2457, "step": 5606 }, { "epoch": 0.8031800601633003, "grad_norm": 1.1352487802505493, "learning_rate": 1.9669315161167e-05, "loss": 1.4475, "step": 5607 }, { "epoch": 0.8033233061166022, "grad_norm": 1.1558226346969604, "learning_rate": 1.964169090691309e-05, "loss": 1.3008, "step": 5608 }, { "epoch": 0.803466552069904, "grad_norm": 1.215633749961853, "learning_rate": 1.9614083951814554e-05, "loss": 1.502, "step": 5609 }, { "epoch": 0.8036097980232059, "grad_norm": 1.0943409204483032, "learning_rate": 1.958649430181455e-05, "loss": 1.4433, "step": 5610 }, { "epoch": 0.8037530439765077, "grad_norm": 1.0599017143249512, "learning_rate": 1.955892196285237e-05, "loss": 1.3415, "step": 5611 }, { "epoch": 0.8038962899298094, "grad_norm": 1.1296519041061401, "learning_rate": 1.9531366940863694e-05, "loss": 1.3928, "step": 5612 }, { "epoch": 0.8040395358831113, "grad_norm": 1.169357180595398, "learning_rate": 1.9503829241780412e-05, "loss": 1.3189, "step": 5613 }, { "epoch": 0.8041827818364131, "grad_norm": 1.0510852336883545, "learning_rate": 1.9476308871530723e-05, "loss": 1.6558, "step": 5614 }, { "epoch": 0.804326027789715, "grad_norm": 1.3508793115615845, "learning_rate": 1.944880583603912e-05, "loss": 1.2496, "step": 5615 }, { "epoch": 0.8044692737430168, "grad_norm": 1.254439115524292, "learning_rate": 1.9421320141226228e-05, "loss": 1.6002, "step": 5616 }, { "epoch": 0.8046125196963185, "grad_norm": 1.115844964981079, "learning_rate": 1.939385179300912e-05, "loss": 1.3303, "step": 5617 }, { "epoch": 0.8047557656496204, "grad_norm": 1.3152308464050293, "learning_rate": 1.9366400797301066e-05, "loss": 1.519, "step": 5618 }, { "epoch": 0.8048990116029222, "grad_norm": 1.3132092952728271, "learning_rate": 1.9338967160011512e-05, "loss": 1.2843, "step": 5619 }, { "epoch": 0.805042257556224, "grad_norm": 1.099906325340271, "learning_rate": 1.9311550887046282e-05, "loss": 1.4159, "step": 5620 }, { "epoch": 0.8051855035095259, "grad_norm": 0.9570691585540771, "learning_rate": 1.928415198430742e-05, "loss": 1.4924, "step": 5621 }, { "epoch": 0.8053287494628276, "grad_norm": 1.0679765939712524, "learning_rate": 1.925677045769322e-05, "loss": 1.543, "step": 5622 }, { "epoch": 0.8054719954161295, "grad_norm": 1.1903491020202637, "learning_rate": 1.9229406313098264e-05, "loss": 1.4804, "step": 5623 }, { "epoch": 0.8056152413694313, "grad_norm": 1.015075922012329, "learning_rate": 1.9202059556413366e-05, "loss": 1.3903, "step": 5624 }, { "epoch": 0.8057584873227331, "grad_norm": 1.143160104751587, "learning_rate": 1.9174730193525626e-05, "loss": 1.3872, "step": 5625 }, { "epoch": 0.805901733276035, "grad_norm": 1.1725493669509888, "learning_rate": 1.9147418230318316e-05, "loss": 1.4745, "step": 5626 }, { "epoch": 0.8060449792293368, "grad_norm": 1.363784909248352, "learning_rate": 1.9120123672671086e-05, "loss": 1.4913, "step": 5627 }, { "epoch": 0.8061882251826386, "grad_norm": 0.9968937039375305, "learning_rate": 1.9092846526459797e-05, "loss": 1.4202, "step": 5628 }, { "epoch": 0.8063314711359404, "grad_norm": 1.3498636484146118, "learning_rate": 1.906558679755649e-05, "loss": 1.5499, "step": 5629 }, { "epoch": 0.8064747170892422, "grad_norm": 0.9220340251922607, "learning_rate": 1.9038344491829495e-05, "loss": 1.5482, "step": 5630 }, { "epoch": 0.8066179630425441, "grad_norm": 1.117398738861084, "learning_rate": 1.9011119615143492e-05, "loss": 1.539, "step": 5631 }, { "epoch": 0.8067612089958459, "grad_norm": 1.1097244024276733, "learning_rate": 1.898391217335924e-05, "loss": 1.3598, "step": 5632 }, { "epoch": 0.8069044549491476, "grad_norm": 1.047333002090454, "learning_rate": 1.8956722172333875e-05, "loss": 1.464, "step": 5633 }, { "epoch": 0.8070477009024495, "grad_norm": 1.169381022453308, "learning_rate": 1.8929549617920716e-05, "loss": 1.3858, "step": 5634 }, { "epoch": 0.8071909468557513, "grad_norm": 0.9570075273513794, "learning_rate": 1.8902394515969335e-05, "loss": 1.323, "step": 5635 }, { "epoch": 0.8073341928090532, "grad_norm": 1.2535535097122192, "learning_rate": 1.8875256872325587e-05, "loss": 1.4074, "step": 5636 }, { "epoch": 0.807477438762355, "grad_norm": 1.4666746854782104, "learning_rate": 1.884813669283145e-05, "loss": 1.5168, "step": 5637 }, { "epoch": 0.8076206847156567, "grad_norm": 1.0678260326385498, "learning_rate": 1.882103398332533e-05, "loss": 1.4446, "step": 5638 }, { "epoch": 0.8077639306689586, "grad_norm": 1.491484522819519, "learning_rate": 1.8793948749641744e-05, "loss": 1.3809, "step": 5639 }, { "epoch": 0.8079071766222604, "grad_norm": 1.1266841888427734, "learning_rate": 1.8766880997611424e-05, "loss": 1.4085, "step": 5640 }, { "epoch": 0.8080504225755623, "grad_norm": 1.0261117219924927, "learning_rate": 1.8739830733061413e-05, "loss": 1.3018, "step": 5641 }, { "epoch": 0.8081936685288641, "grad_norm": 1.2107295989990234, "learning_rate": 1.8712797961814975e-05, "loss": 1.4481, "step": 5642 }, { "epoch": 0.8083369144821658, "grad_norm": 0.9902212023735046, "learning_rate": 1.8685782689691587e-05, "loss": 1.4038, "step": 5643 }, { "epoch": 0.8084801604354677, "grad_norm": 1.1875301599502563, "learning_rate": 1.865878492250698e-05, "loss": 1.3434, "step": 5644 }, { "epoch": 0.8086234063887695, "grad_norm": 1.1977791786193848, "learning_rate": 1.8631804666073094e-05, "loss": 1.299, "step": 5645 }, { "epoch": 0.8087666523420713, "grad_norm": 1.0960116386413574, "learning_rate": 1.8604841926198135e-05, "loss": 1.5729, "step": 5646 }, { "epoch": 0.8089098982953732, "grad_norm": 1.175320029258728, "learning_rate": 1.857789670868647e-05, "loss": 1.3454, "step": 5647 }, { "epoch": 0.809053144248675, "grad_norm": 1.0131524801254272, "learning_rate": 1.8550969019338725e-05, "loss": 1.1892, "step": 5648 }, { "epoch": 0.8091963902019768, "grad_norm": 1.1026222705841064, "learning_rate": 1.8524058863951854e-05, "loss": 1.4147, "step": 5649 }, { "epoch": 0.8093396361552786, "grad_norm": 1.4005250930786133, "learning_rate": 1.8497166248318876e-05, "loss": 1.3776, "step": 5650 }, { "epoch": 0.8094828821085804, "grad_norm": 1.1908936500549316, "learning_rate": 1.8470291178229116e-05, "loss": 1.4295, "step": 5651 }, { "epoch": 0.8096261280618823, "grad_norm": 1.1457916498184204, "learning_rate": 1.8443433659468123e-05, "loss": 1.2196, "step": 5652 }, { "epoch": 0.8097693740151841, "grad_norm": 1.2274850606918335, "learning_rate": 1.841659369781764e-05, "loss": 1.389, "step": 5653 }, { "epoch": 0.8099126199684858, "grad_norm": 1.3114341497421265, "learning_rate": 1.838977129905569e-05, "loss": 1.3976, "step": 5654 }, { "epoch": 0.8100558659217877, "grad_norm": 1.0833814144134521, "learning_rate": 1.83629664689564e-05, "loss": 1.3795, "step": 5655 }, { "epoch": 0.8101991118750895, "grad_norm": 1.0126831531524658, "learning_rate": 1.833617921329024e-05, "loss": 1.2186, "step": 5656 }, { "epoch": 0.8103423578283914, "grad_norm": 1.0673143863677979, "learning_rate": 1.830940953782385e-05, "loss": 1.3423, "step": 5657 }, { "epoch": 0.8104856037816932, "grad_norm": 1.1814590692520142, "learning_rate": 1.828265744832004e-05, "loss": 1.375, "step": 5658 }, { "epoch": 0.8106288497349949, "grad_norm": 1.2803890705108643, "learning_rate": 1.8255922950537872e-05, "loss": 1.2565, "step": 5659 }, { "epoch": 0.8107720956882968, "grad_norm": 1.4734647274017334, "learning_rate": 1.8229206050232684e-05, "loss": 1.4035, "step": 5660 }, { "epoch": 0.8109153416415986, "grad_norm": 1.186280608177185, "learning_rate": 1.820250675315589e-05, "loss": 1.2662, "step": 5661 }, { "epoch": 0.8110585875949005, "grad_norm": 1.0924103260040283, "learning_rate": 1.817582506505523e-05, "loss": 1.4147, "step": 5662 }, { "epoch": 0.8112018335482023, "grad_norm": 1.1690579652786255, "learning_rate": 1.8149160991674597e-05, "loss": 1.3985, "step": 5663 }, { "epoch": 0.811345079501504, "grad_norm": 1.1321953535079956, "learning_rate": 1.81225145387541e-05, "loss": 1.3679, "step": 5664 }, { "epoch": 0.8114883254548059, "grad_norm": 1.1777729988098145, "learning_rate": 1.80958857120301e-05, "loss": 1.4501, "step": 5665 }, { "epoch": 0.8116315714081077, "grad_norm": 1.2488298416137695, "learning_rate": 1.8069274517235047e-05, "loss": 1.4423, "step": 5666 }, { "epoch": 0.8117748173614096, "grad_norm": 1.5855075120925903, "learning_rate": 1.8042680960097735e-05, "loss": 1.3948, "step": 5667 }, { "epoch": 0.8119180633147114, "grad_norm": 1.0423650741577148, "learning_rate": 1.8016105046343123e-05, "loss": 1.1922, "step": 5668 }, { "epoch": 0.8120613092680132, "grad_norm": 1.0537296533584595, "learning_rate": 1.798954678169228e-05, "loss": 1.3903, "step": 5669 }, { "epoch": 0.812204555221315, "grad_norm": 1.1813715696334839, "learning_rate": 1.7963006171862562e-05, "loss": 1.4229, "step": 5670 }, { "epoch": 0.8123478011746168, "grad_norm": 1.0154478549957275, "learning_rate": 1.7936483222567523e-05, "loss": 1.2641, "step": 5671 }, { "epoch": 0.8124910471279186, "grad_norm": 1.05471932888031, "learning_rate": 1.7909977939516887e-05, "loss": 1.3676, "step": 5672 }, { "epoch": 0.8126342930812205, "grad_norm": 1.3914660215377808, "learning_rate": 1.7883490328416587e-05, "loss": 1.4694, "step": 5673 }, { "epoch": 0.8127775390345223, "grad_norm": 1.0848671197891235, "learning_rate": 1.785702039496875e-05, "loss": 1.5928, "step": 5674 }, { "epoch": 0.812920784987824, "grad_norm": 1.1508536338806152, "learning_rate": 1.783056814487172e-05, "loss": 1.2459, "step": 5675 }, { "epoch": 0.8130640309411259, "grad_norm": 1.0931442975997925, "learning_rate": 1.780413358381997e-05, "loss": 1.4345, "step": 5676 }, { "epoch": 0.8132072768944277, "grad_norm": 1.1575117111206055, "learning_rate": 1.7777716717504213e-05, "loss": 1.2867, "step": 5677 }, { "epoch": 0.8133505228477296, "grad_norm": 1.0061838626861572, "learning_rate": 1.775131755161139e-05, "loss": 1.5188, "step": 5678 }, { "epoch": 0.8134937688010314, "grad_norm": 0.964324414730072, "learning_rate": 1.772493609182455e-05, "loss": 1.4969, "step": 5679 }, { "epoch": 0.8136370147543331, "grad_norm": 1.262006163597107, "learning_rate": 1.7698572343822973e-05, "loss": 1.3187, "step": 5680 }, { "epoch": 0.813780260707635, "grad_norm": 0.9639429450035095, "learning_rate": 1.7672226313282126e-05, "loss": 1.3476, "step": 5681 }, { "epoch": 0.8139235066609368, "grad_norm": 1.1174206733703613, "learning_rate": 1.7645898005873663e-05, "loss": 1.3598, "step": 5682 }, { "epoch": 0.8140667526142387, "grad_norm": 0.9854691624641418, "learning_rate": 1.7619587427265405e-05, "loss": 1.3317, "step": 5683 }, { "epoch": 0.8142099985675405, "grad_norm": 0.961112380027771, "learning_rate": 1.7593294583121377e-05, "loss": 1.4123, "step": 5684 }, { "epoch": 0.8143532445208422, "grad_norm": 1.0793004035949707, "learning_rate": 1.7567019479101776e-05, "loss": 1.2452, "step": 5685 }, { "epoch": 0.8144964904741441, "grad_norm": 1.0701059103012085, "learning_rate": 1.754076212086301e-05, "loss": 1.3234, "step": 5686 }, { "epoch": 0.8146397364274459, "grad_norm": 1.175217866897583, "learning_rate": 1.7514522514057553e-05, "loss": 1.3589, "step": 5687 }, { "epoch": 0.8147829823807478, "grad_norm": 1.036516785621643, "learning_rate": 1.7488300664334236e-05, "loss": 1.2896, "step": 5688 }, { "epoch": 0.8149262283340496, "grad_norm": 1.2480024099349976, "learning_rate": 1.746209657733795e-05, "loss": 1.4057, "step": 5689 }, { "epoch": 0.8150694742873514, "grad_norm": 1.1434650421142578, "learning_rate": 1.7435910258709752e-05, "loss": 1.5092, "step": 5690 }, { "epoch": 0.8152127202406532, "grad_norm": 1.0773417949676514, "learning_rate": 1.7409741714086948e-05, "loss": 1.3227, "step": 5691 }, { "epoch": 0.815355966193955, "grad_norm": 1.1579279899597168, "learning_rate": 1.7383590949102945e-05, "loss": 1.2698, "step": 5692 }, { "epoch": 0.8154992121472568, "grad_norm": 1.2602218389511108, "learning_rate": 1.7357457969387368e-05, "loss": 1.2852, "step": 5693 }, { "epoch": 0.8156424581005587, "grad_norm": 0.9489460587501526, "learning_rate": 1.7331342780566017e-05, "loss": 1.4304, "step": 5694 }, { "epoch": 0.8157857040538605, "grad_norm": 1.070450782775879, "learning_rate": 1.7305245388260826e-05, "loss": 1.5254, "step": 5695 }, { "epoch": 0.8159289500071623, "grad_norm": 1.1234924793243408, "learning_rate": 1.7279165798089957e-05, "loss": 1.5023, "step": 5696 }, { "epoch": 0.8160721959604641, "grad_norm": 1.0871365070343018, "learning_rate": 1.7253104015667643e-05, "loss": 1.3001, "step": 5697 }, { "epoch": 0.8162154419137659, "grad_norm": 1.015113115310669, "learning_rate": 1.7227060046604336e-05, "loss": 1.3441, "step": 5698 }, { "epoch": 0.8163586878670678, "grad_norm": 1.2424969673156738, "learning_rate": 1.7201033896506746e-05, "loss": 1.4027, "step": 5699 }, { "epoch": 0.8165019338203696, "grad_norm": 1.0245485305786133, "learning_rate": 1.7175025570977577e-05, "loss": 1.3343, "step": 5700 }, { "epoch": 0.8166451797736713, "grad_norm": 1.3579782247543335, "learning_rate": 1.7149035075615794e-05, "loss": 1.2797, "step": 5701 }, { "epoch": 0.8167884257269732, "grad_norm": 0.9591355323791504, "learning_rate": 1.7123062416016524e-05, "loss": 1.3306, "step": 5702 }, { "epoch": 0.816931671680275, "grad_norm": 1.1109671592712402, "learning_rate": 1.7097107597771024e-05, "loss": 1.3072, "step": 5703 }, { "epoch": 0.8170749176335769, "grad_norm": 1.3435479402542114, "learning_rate": 1.707117062646676e-05, "loss": 1.3228, "step": 5704 }, { "epoch": 0.8172181635868787, "grad_norm": 1.2221364974975586, "learning_rate": 1.7045251507687232e-05, "loss": 1.3397, "step": 5705 }, { "epoch": 0.8173614095401804, "grad_norm": 1.251620888710022, "learning_rate": 1.7019350247012278e-05, "loss": 1.3735, "step": 5706 }, { "epoch": 0.8175046554934823, "grad_norm": 1.2624701261520386, "learning_rate": 1.699346685001778e-05, "loss": 1.4296, "step": 5707 }, { "epoch": 0.8176479014467841, "grad_norm": 1.2002944946289062, "learning_rate": 1.696760132227576e-05, "loss": 1.2513, "step": 5708 }, { "epoch": 0.817791147400086, "grad_norm": 1.2588214874267578, "learning_rate": 1.694175366935442e-05, "loss": 1.4865, "step": 5709 }, { "epoch": 0.8179343933533878, "grad_norm": 1.074466347694397, "learning_rate": 1.6915923896818188e-05, "loss": 1.4205, "step": 5710 }, { "epoch": 0.8180776393066896, "grad_norm": 1.1661311388015747, "learning_rate": 1.6890112010227498e-05, "loss": 1.3182, "step": 5711 }, { "epoch": 0.8182208852599914, "grad_norm": 1.4667960405349731, "learning_rate": 1.6864318015139047e-05, "loss": 1.3599, "step": 5712 }, { "epoch": 0.8183641312132932, "grad_norm": 1.2076159715652466, "learning_rate": 1.6838541917105632e-05, "loss": 1.2746, "step": 5713 }, { "epoch": 0.818507377166595, "grad_norm": 1.16629958152771, "learning_rate": 1.6812783721676228e-05, "loss": 1.5159, "step": 5714 }, { "epoch": 0.8186506231198969, "grad_norm": 1.110872745513916, "learning_rate": 1.6787043434395942e-05, "loss": 1.3625, "step": 5715 }, { "epoch": 0.8187938690731987, "grad_norm": 0.9943273067474365, "learning_rate": 1.6761321060805957e-05, "loss": 1.1011, "step": 5716 }, { "epoch": 0.8189371150265005, "grad_norm": 1.336700439453125, "learning_rate": 1.6735616606443728e-05, "loss": 1.493, "step": 5717 }, { "epoch": 0.8190803609798023, "grad_norm": 1.0039108991622925, "learning_rate": 1.6709930076842805e-05, "loss": 1.3233, "step": 5718 }, { "epoch": 0.8192236069331041, "grad_norm": 1.1885766983032227, "learning_rate": 1.6684261477532793e-05, "loss": 1.4271, "step": 5719 }, { "epoch": 0.819366852886406, "grad_norm": 1.2821022272109985, "learning_rate": 1.6658610814039544e-05, "loss": 1.5624, "step": 5720 }, { "epoch": 0.8195100988397078, "grad_norm": 1.2000768184661865, "learning_rate": 1.663297809188501e-05, "loss": 1.33, "step": 5721 }, { "epoch": 0.8196533447930096, "grad_norm": 1.2028697729110718, "learning_rate": 1.6607363316587277e-05, "loss": 1.3278, "step": 5722 }, { "epoch": 0.8197965907463114, "grad_norm": 1.2008311748504639, "learning_rate": 1.6581766493660578e-05, "loss": 1.3605, "step": 5723 }, { "epoch": 0.8199398366996132, "grad_norm": 1.0286083221435547, "learning_rate": 1.6556187628615273e-05, "loss": 1.4566, "step": 5724 }, { "epoch": 0.8200830826529151, "grad_norm": 1.147139549255371, "learning_rate": 1.6530626726957877e-05, "loss": 1.6141, "step": 5725 }, { "epoch": 0.8202263286062169, "grad_norm": 1.0844931602478027, "learning_rate": 1.650508379419098e-05, "loss": 1.4512, "step": 5726 }, { "epoch": 0.8203695745595186, "grad_norm": 1.2058179378509521, "learning_rate": 1.6479558835813334e-05, "loss": 1.5604, "step": 5727 }, { "epoch": 0.8205128205128205, "grad_norm": 1.0573688745498657, "learning_rate": 1.6454051857319906e-05, "loss": 1.4043, "step": 5728 }, { "epoch": 0.8206560664661223, "grad_norm": 1.0302889347076416, "learning_rate": 1.6428562864201658e-05, "loss": 1.4157, "step": 5729 }, { "epoch": 0.8207993124194242, "grad_norm": 1.2544777393341064, "learning_rate": 1.6403091861945753e-05, "loss": 1.4363, "step": 5730 }, { "epoch": 0.820942558372726, "grad_norm": 1.0001379251480103, "learning_rate": 1.637763885603546e-05, "loss": 1.5631, "step": 5731 }, { "epoch": 0.8210858043260277, "grad_norm": 1.1475646495819092, "learning_rate": 1.6352203851950198e-05, "loss": 1.4526, "step": 5732 }, { "epoch": 0.8212290502793296, "grad_norm": 1.0426462888717651, "learning_rate": 1.63267868551655e-05, "loss": 1.3183, "step": 5733 }, { "epoch": 0.8213722962326314, "grad_norm": 1.0481704473495483, "learning_rate": 1.6301387871152963e-05, "loss": 1.4865, "step": 5734 }, { "epoch": 0.8215155421859333, "grad_norm": 1.1153337955474854, "learning_rate": 1.6276006905380413e-05, "loss": 1.3879, "step": 5735 }, { "epoch": 0.8216587881392351, "grad_norm": 1.0009732246398926, "learning_rate": 1.625064396331176e-05, "loss": 1.4233, "step": 5736 }, { "epoch": 0.8218020340925369, "grad_norm": 1.2835216522216797, "learning_rate": 1.622529905040696e-05, "loss": 1.5281, "step": 5737 }, { "epoch": 0.8219452800458387, "grad_norm": 0.9856894016265869, "learning_rate": 1.6199972172122147e-05, "loss": 1.3966, "step": 5738 }, { "epoch": 0.8220885259991405, "grad_norm": 1.4175550937652588, "learning_rate": 1.6174663333909646e-05, "loss": 1.4797, "step": 5739 }, { "epoch": 0.8222317719524423, "grad_norm": 1.4727535247802734, "learning_rate": 1.6149372541217755e-05, "loss": 1.5319, "step": 5740 }, { "epoch": 0.8223750179057442, "grad_norm": 1.294858694076538, "learning_rate": 1.6124099799490968e-05, "loss": 1.3865, "step": 5741 }, { "epoch": 0.822518263859046, "grad_norm": 1.0073872804641724, "learning_rate": 1.6098845114169893e-05, "loss": 1.4928, "step": 5742 }, { "epoch": 0.8226615098123478, "grad_norm": 1.1803154945373535, "learning_rate": 1.6073608490691228e-05, "loss": 1.3451, "step": 5743 }, { "epoch": 0.8228047557656496, "grad_norm": 1.1558741331100464, "learning_rate": 1.604838993448783e-05, "loss": 1.233, "step": 5744 }, { "epoch": 0.8229480017189514, "grad_norm": 1.086236834526062, "learning_rate": 1.602318945098855e-05, "loss": 1.2579, "step": 5745 }, { "epoch": 0.8230912476722533, "grad_norm": 1.804756760597229, "learning_rate": 1.5998007045618502e-05, "loss": 1.2915, "step": 5746 }, { "epoch": 0.8232344936255551, "grad_norm": 1.1390624046325684, "learning_rate": 1.597284272379883e-05, "loss": 1.3439, "step": 5747 }, { "epoch": 0.8233777395788568, "grad_norm": 1.2383747100830078, "learning_rate": 1.594769649094675e-05, "loss": 1.6368, "step": 5748 }, { "epoch": 0.8235209855321587, "grad_norm": 0.9799171090126038, "learning_rate": 1.5922568352475642e-05, "loss": 1.26, "step": 5749 }, { "epoch": 0.8236642314854605, "grad_norm": 1.2746466398239136, "learning_rate": 1.5897458313794966e-05, "loss": 1.3836, "step": 5750 }, { "epoch": 0.8238074774387624, "grad_norm": 1.118242621421814, "learning_rate": 1.587236638031031e-05, "loss": 1.3337, "step": 5751 }, { "epoch": 0.8239507233920642, "grad_norm": 1.0641323328018188, "learning_rate": 1.5847292557423344e-05, "loss": 1.2507, "step": 5752 }, { "epoch": 0.8240939693453659, "grad_norm": 0.9406953454017639, "learning_rate": 1.5822236850531824e-05, "loss": 1.4743, "step": 5753 }, { "epoch": 0.8242372152986678, "grad_norm": 1.1316951513290405, "learning_rate": 1.579719926502966e-05, "loss": 1.3658, "step": 5754 }, { "epoch": 0.8243804612519696, "grad_norm": 1.062627911567688, "learning_rate": 1.5772179806306743e-05, "loss": 1.2862, "step": 5755 }, { "epoch": 0.8245237072052715, "grad_norm": 1.075452208518982, "learning_rate": 1.5747178479749236e-05, "loss": 1.4774, "step": 5756 }, { "epoch": 0.8246669531585733, "grad_norm": 1.1451843976974487, "learning_rate": 1.5722195290739285e-05, "loss": 1.3544, "step": 5757 }, { "epoch": 0.8248101991118751, "grad_norm": 1.0830128192901611, "learning_rate": 1.5697230244655114e-05, "loss": 1.3918, "step": 5758 }, { "epoch": 0.8249534450651769, "grad_norm": 1.31010103225708, "learning_rate": 1.5672283346871074e-05, "loss": 1.4374, "step": 5759 }, { "epoch": 0.8250966910184787, "grad_norm": 1.129384994506836, "learning_rate": 1.564735460275769e-05, "loss": 1.1694, "step": 5760 }, { "epoch": 0.8252399369717806, "grad_norm": 1.120633602142334, "learning_rate": 1.562244401768144e-05, "loss": 1.3686, "step": 5761 }, { "epoch": 0.8253831829250824, "grad_norm": 1.2156684398651123, "learning_rate": 1.5597551597004966e-05, "loss": 1.3215, "step": 5762 }, { "epoch": 0.8255264288783842, "grad_norm": 1.2506338357925415, "learning_rate": 1.5572677346087004e-05, "loss": 1.5524, "step": 5763 }, { "epoch": 0.825669674831686, "grad_norm": 1.2577672004699707, "learning_rate": 1.5547821270282346e-05, "loss": 1.3113, "step": 5764 }, { "epoch": 0.8258129207849878, "grad_norm": 1.1754236221313477, "learning_rate": 1.5522983374941937e-05, "loss": 1.17, "step": 5765 }, { "epoch": 0.8259561667382896, "grad_norm": 1.0951234102249146, "learning_rate": 1.549816366541268e-05, "loss": 1.4121, "step": 5766 }, { "epoch": 0.8260994126915915, "grad_norm": 1.1116222143173218, "learning_rate": 1.5473362147037706e-05, "loss": 1.2329, "step": 5767 }, { "epoch": 0.8262426586448933, "grad_norm": 1.074329137802124, "learning_rate": 1.544857882515617e-05, "loss": 1.4376, "step": 5768 }, { "epoch": 0.826385904598195, "grad_norm": 1.0942108631134033, "learning_rate": 1.5423813705103275e-05, "loss": 1.5041, "step": 5769 }, { "epoch": 0.8265291505514969, "grad_norm": 1.1912577152252197, "learning_rate": 1.539906679221035e-05, "loss": 1.5605, "step": 5770 }, { "epoch": 0.8266723965047987, "grad_norm": 1.395216941833496, "learning_rate": 1.537433809180481e-05, "loss": 1.3853, "step": 5771 }, { "epoch": 0.8268156424581006, "grad_norm": 1.2698711156845093, "learning_rate": 1.5349627609210104e-05, "loss": 1.4283, "step": 5772 }, { "epoch": 0.8269588884114024, "grad_norm": 1.2571125030517578, "learning_rate": 1.5324935349745805e-05, "loss": 1.4672, "step": 5773 }, { "epoch": 0.8271021343647041, "grad_norm": 1.0859688520431519, "learning_rate": 1.5300261318727537e-05, "loss": 1.3236, "step": 5774 }, { "epoch": 0.827245380318006, "grad_norm": 0.9561535120010376, "learning_rate": 1.5275605521467052e-05, "loss": 1.3482, "step": 5775 }, { "epoch": 0.8273886262713078, "grad_norm": 1.114216923713684, "learning_rate": 1.5250967963272056e-05, "loss": 1.5138, "step": 5776 }, { "epoch": 0.8275318722246097, "grad_norm": 1.1488627195358276, "learning_rate": 1.5226348649446432e-05, "loss": 1.5342, "step": 5777 }, { "epoch": 0.8276751181779115, "grad_norm": 1.202953577041626, "learning_rate": 1.520174758529016e-05, "loss": 1.3956, "step": 5778 }, { "epoch": 0.8278183641312133, "grad_norm": 1.2208155393600464, "learning_rate": 1.5177164776099184e-05, "loss": 1.4621, "step": 5779 }, { "epoch": 0.8279616100845151, "grad_norm": 1.1186600923538208, "learning_rate": 1.5152600227165591e-05, "loss": 1.4418, "step": 5780 }, { "epoch": 0.8281048560378169, "grad_norm": 1.0873935222625732, "learning_rate": 1.5128053943777532e-05, "loss": 1.5016, "step": 5781 }, { "epoch": 0.8282481019911188, "grad_norm": 1.0604404211044312, "learning_rate": 1.5103525931219186e-05, "loss": 1.4232, "step": 5782 }, { "epoch": 0.8283913479444206, "grad_norm": 1.3000433444976807, "learning_rate": 1.5079016194770889e-05, "loss": 1.5169, "step": 5783 }, { "epoch": 0.8285345938977224, "grad_norm": 1.1387698650360107, "learning_rate": 1.5054524739708876e-05, "loss": 1.3611, "step": 5784 }, { "epoch": 0.8286778398510242, "grad_norm": 1.2203127145767212, "learning_rate": 1.5030051571305637e-05, "loss": 1.6584, "step": 5785 }, { "epoch": 0.828821085804326, "grad_norm": 1.2273352146148682, "learning_rate": 1.5005596694829637e-05, "loss": 1.1703, "step": 5786 }, { "epoch": 0.8289643317576278, "grad_norm": 1.0907166004180908, "learning_rate": 1.4981160115545367e-05, "loss": 1.4006, "step": 5787 }, { "epoch": 0.8291075777109297, "grad_norm": 1.397177815437317, "learning_rate": 1.4956741838713406e-05, "loss": 1.4135, "step": 5788 }, { "epoch": 0.8292508236642315, "grad_norm": 1.0054924488067627, "learning_rate": 1.4932341869590483e-05, "loss": 1.6023, "step": 5789 }, { "epoch": 0.8293940696175333, "grad_norm": 1.109610676765442, "learning_rate": 1.4907960213429239e-05, "loss": 1.348, "step": 5790 }, { "epoch": 0.8295373155708351, "grad_norm": 1.3151237964630127, "learning_rate": 1.4883596875478457e-05, "loss": 1.3057, "step": 5791 }, { "epoch": 0.8296805615241369, "grad_norm": 0.9615287184715271, "learning_rate": 1.485925186098296e-05, "loss": 1.3376, "step": 5792 }, { "epoch": 0.8298238074774388, "grad_norm": 1.1395747661590576, "learning_rate": 1.4834925175183635e-05, "loss": 1.3676, "step": 5793 }, { "epoch": 0.8299670534307406, "grad_norm": 1.0670223236083984, "learning_rate": 1.4810616823317425e-05, "loss": 1.2392, "step": 5794 }, { "epoch": 0.8301102993840423, "grad_norm": 1.2513740062713623, "learning_rate": 1.4786326810617268e-05, "loss": 1.459, "step": 5795 }, { "epoch": 0.8302535453373442, "grad_norm": 1.339961290359497, "learning_rate": 1.476205514231226e-05, "loss": 1.4577, "step": 5796 }, { "epoch": 0.830396791290646, "grad_norm": 1.0669224262237549, "learning_rate": 1.4737801823627485e-05, "loss": 1.5288, "step": 5797 }, { "epoch": 0.8305400372439479, "grad_norm": 1.1694546937942505, "learning_rate": 1.4713566859784045e-05, "loss": 1.3813, "step": 5798 }, { "epoch": 0.8306832831972497, "grad_norm": 1.2854022979736328, "learning_rate": 1.4689350255999146e-05, "loss": 1.4014, "step": 5799 }, { "epoch": 0.8308265291505516, "grad_norm": 1.1078037023544312, "learning_rate": 1.4665152017486028e-05, "loss": 1.3105, "step": 5800 }, { "epoch": 0.8309697751038533, "grad_norm": 1.1335349082946777, "learning_rate": 1.4640972149453969e-05, "loss": 1.2528, "step": 5801 }, { "epoch": 0.8311130210571551, "grad_norm": 1.3562027215957642, "learning_rate": 1.4616810657108304e-05, "loss": 1.4332, "step": 5802 }, { "epoch": 0.831256267010457, "grad_norm": 1.1427617073059082, "learning_rate": 1.4592667545650396e-05, "loss": 1.4867, "step": 5803 }, { "epoch": 0.8313995129637588, "grad_norm": 1.2422910928726196, "learning_rate": 1.4568542820277686e-05, "loss": 1.4606, "step": 5804 }, { "epoch": 0.8315427589170606, "grad_norm": 1.0507043600082397, "learning_rate": 1.4544436486183577e-05, "loss": 1.3251, "step": 5805 }, { "epoch": 0.8316860048703624, "grad_norm": 1.5208712816238403, "learning_rate": 1.4520348548557583e-05, "loss": 1.3776, "step": 5806 }, { "epoch": 0.8318292508236642, "grad_norm": 0.9851091504096985, "learning_rate": 1.449627901258529e-05, "loss": 1.5367, "step": 5807 }, { "epoch": 0.831972496776966, "grad_norm": 1.0047719478607178, "learning_rate": 1.4472227883448219e-05, "loss": 1.4148, "step": 5808 }, { "epoch": 0.8321157427302679, "grad_norm": 0.9701448082923889, "learning_rate": 1.4448195166324008e-05, "loss": 1.4628, "step": 5809 }, { "epoch": 0.8322589886835697, "grad_norm": 1.0646880865097046, "learning_rate": 1.4424180866386283e-05, "loss": 1.3587, "step": 5810 }, { "epoch": 0.8324022346368715, "grad_norm": 1.0203818082809448, "learning_rate": 1.4400184988804754e-05, "loss": 1.4776, "step": 5811 }, { "epoch": 0.8325454805901733, "grad_norm": 1.1237303018569946, "learning_rate": 1.4376207538745134e-05, "loss": 1.2089, "step": 5812 }, { "epoch": 0.8326887265434751, "grad_norm": 1.130392074584961, "learning_rate": 1.4352248521369161e-05, "loss": 1.4239, "step": 5813 }, { "epoch": 0.832831972496777, "grad_norm": 1.0611867904663086, "learning_rate": 1.432830794183464e-05, "loss": 1.3754, "step": 5814 }, { "epoch": 0.8329752184500788, "grad_norm": 1.1104347705841064, "learning_rate": 1.4304385805295384e-05, "loss": 1.6235, "step": 5815 }, { "epoch": 0.8331184644033806, "grad_norm": 1.2149724960327148, "learning_rate": 1.4280482116901195e-05, "loss": 1.3703, "step": 5816 }, { "epoch": 0.8332617103566824, "grad_norm": 1.2738490104675293, "learning_rate": 1.425659688179799e-05, "loss": 1.398, "step": 5817 }, { "epoch": 0.8334049563099842, "grad_norm": 1.101672887802124, "learning_rate": 1.4232730105127689e-05, "loss": 1.2755, "step": 5818 }, { "epoch": 0.8335482022632861, "grad_norm": 1.0011487007141113, "learning_rate": 1.420888179202815e-05, "loss": 1.4193, "step": 5819 }, { "epoch": 0.8336914482165879, "grad_norm": 1.217929720878601, "learning_rate": 1.4185051947633377e-05, "loss": 1.3555, "step": 5820 }, { "epoch": 0.8338346941698896, "grad_norm": 1.4624873399734497, "learning_rate": 1.416124057707331e-05, "loss": 1.3977, "step": 5821 }, { "epoch": 0.8339779401231915, "grad_norm": 1.3152862787246704, "learning_rate": 1.413744768547398e-05, "loss": 1.4383, "step": 5822 }, { "epoch": 0.8341211860764933, "grad_norm": 1.2528916597366333, "learning_rate": 1.4113673277957395e-05, "loss": 1.3645, "step": 5823 }, { "epoch": 0.8342644320297952, "grad_norm": 1.1452208757400513, "learning_rate": 1.408991735964159e-05, "loss": 1.3248, "step": 5824 }, { "epoch": 0.834407677983097, "grad_norm": 1.040930986404419, "learning_rate": 1.4066179935640666e-05, "loss": 1.3063, "step": 5825 }, { "epoch": 0.8345509239363988, "grad_norm": 1.4295469522476196, "learning_rate": 1.4042461011064634e-05, "loss": 1.5531, "step": 5826 }, { "epoch": 0.8346941698897006, "grad_norm": 1.2730448246002197, "learning_rate": 1.401876059101962e-05, "loss": 1.3894, "step": 5827 }, { "epoch": 0.8348374158430024, "grad_norm": 1.305404782295227, "learning_rate": 1.3995078680607776e-05, "loss": 1.4497, "step": 5828 }, { "epoch": 0.8349806617963043, "grad_norm": 1.1287041902542114, "learning_rate": 1.3971415284927192e-05, "loss": 1.353, "step": 5829 }, { "epoch": 0.8351239077496061, "grad_norm": 1.03939688205719, "learning_rate": 1.3947770409072014e-05, "loss": 1.4898, "step": 5830 }, { "epoch": 0.8352671537029079, "grad_norm": 1.0516117811203003, "learning_rate": 1.3924144058132405e-05, "loss": 1.3691, "step": 5831 }, { "epoch": 0.8354103996562097, "grad_norm": 0.9747567176818848, "learning_rate": 1.3900536237194528e-05, "loss": 1.4841, "step": 5832 }, { "epoch": 0.8355536456095115, "grad_norm": 1.2881711721420288, "learning_rate": 1.3876946951340598e-05, "loss": 1.3481, "step": 5833 }, { "epoch": 0.8356968915628133, "grad_norm": 1.158127784729004, "learning_rate": 1.3853376205648727e-05, "loss": 1.4621, "step": 5834 }, { "epoch": 0.8358401375161152, "grad_norm": 1.1932823657989502, "learning_rate": 1.3829824005193181e-05, "loss": 1.3799, "step": 5835 }, { "epoch": 0.835983383469417, "grad_norm": 0.9979770183563232, "learning_rate": 1.3806290355044171e-05, "loss": 1.3411, "step": 5836 }, { "epoch": 0.8361266294227188, "grad_norm": 1.0263804197311401, "learning_rate": 1.3782775260267856e-05, "loss": 1.4818, "step": 5837 }, { "epoch": 0.8362698753760206, "grad_norm": 1.0675756931304932, "learning_rate": 1.3759278725926472e-05, "loss": 1.3811, "step": 5838 }, { "epoch": 0.8364131213293224, "grad_norm": 0.9908127188682556, "learning_rate": 1.3735800757078287e-05, "loss": 1.2359, "step": 5839 }, { "epoch": 0.8365563672826243, "grad_norm": 0.9836218357086182, "learning_rate": 1.3712341358777469e-05, "loss": 1.552, "step": 5840 }, { "epoch": 0.8366996132359261, "grad_norm": 1.071040153503418, "learning_rate": 1.3688900536074278e-05, "loss": 1.3265, "step": 5841 }, { "epoch": 0.8368428591892279, "grad_norm": 1.0249180793762207, "learning_rate": 1.3665478294014922e-05, "loss": 1.3399, "step": 5842 }, { "epoch": 0.8369861051425297, "grad_norm": 1.213796615600586, "learning_rate": 1.3642074637641644e-05, "loss": 1.5158, "step": 5843 }, { "epoch": 0.8371293510958315, "grad_norm": 1.0670266151428223, "learning_rate": 1.3618689571992705e-05, "loss": 1.4355, "step": 5844 }, { "epoch": 0.8372725970491334, "grad_norm": 1.2367445230484009, "learning_rate": 1.359532310210223e-05, "loss": 1.3603, "step": 5845 }, { "epoch": 0.8374158430024352, "grad_norm": 1.3190815448760986, "learning_rate": 1.3571975233000544e-05, "loss": 1.3992, "step": 5846 }, { "epoch": 0.837559088955737, "grad_norm": 1.1637790203094482, "learning_rate": 1.3548645969713848e-05, "loss": 1.2894, "step": 5847 }, { "epoch": 0.8377023349090388, "grad_norm": 1.0262011289596558, "learning_rate": 1.352533531726432e-05, "loss": 1.1367, "step": 5848 }, { "epoch": 0.8378455808623406, "grad_norm": 1.2082775831222534, "learning_rate": 1.3502043280670174e-05, "loss": 1.4281, "step": 5849 }, { "epoch": 0.8379888268156425, "grad_norm": 1.1750949621200562, "learning_rate": 1.3478769864945618e-05, "loss": 1.4625, "step": 5850 }, { "epoch": 0.8381320727689443, "grad_norm": 0.9993886947631836, "learning_rate": 1.3455515075100844e-05, "loss": 1.2688, "step": 5851 }, { "epoch": 0.8382753187222461, "grad_norm": 1.115307092666626, "learning_rate": 1.3432278916142027e-05, "loss": 1.2483, "step": 5852 }, { "epoch": 0.8384185646755479, "grad_norm": 1.1271145343780518, "learning_rate": 1.3409061393071343e-05, "loss": 1.4081, "step": 5853 }, { "epoch": 0.8385618106288497, "grad_norm": 1.104867935180664, "learning_rate": 1.3385862510886971e-05, "loss": 1.2506, "step": 5854 }, { "epoch": 0.8387050565821516, "grad_norm": 1.1933553218841553, "learning_rate": 1.3362682274583017e-05, "loss": 1.4013, "step": 5855 }, { "epoch": 0.8388483025354534, "grad_norm": 1.0813376903533936, "learning_rate": 1.3339520689149599e-05, "loss": 1.3552, "step": 5856 }, { "epoch": 0.8389915484887552, "grad_norm": 1.1912479400634766, "learning_rate": 1.3316377759572906e-05, "loss": 1.4658, "step": 5857 }, { "epoch": 0.839134794442057, "grad_norm": 1.1623120307922363, "learning_rate": 1.3293253490834978e-05, "loss": 1.3824, "step": 5858 }, { "epoch": 0.8392780403953588, "grad_norm": 1.092334508895874, "learning_rate": 1.3270147887913909e-05, "loss": 1.4666, "step": 5859 }, { "epoch": 0.8394212863486606, "grad_norm": 1.21332848072052, "learning_rate": 1.3247060955783775e-05, "loss": 1.4901, "step": 5860 }, { "epoch": 0.8395645323019625, "grad_norm": 1.174304723739624, "learning_rate": 1.322399269941461e-05, "loss": 1.3953, "step": 5861 }, { "epoch": 0.8397077782552643, "grad_norm": 1.1492661237716675, "learning_rate": 1.320094312377247e-05, "loss": 1.2834, "step": 5862 }, { "epoch": 0.839851024208566, "grad_norm": 1.7371139526367188, "learning_rate": 1.3177912233819289e-05, "loss": 1.4874, "step": 5863 }, { "epoch": 0.8399942701618679, "grad_norm": 1.0461859703063965, "learning_rate": 1.3154900034513106e-05, "loss": 1.631, "step": 5864 }, { "epoch": 0.8401375161151697, "grad_norm": 1.1084332466125488, "learning_rate": 1.3131906530807891e-05, "loss": 1.2827, "step": 5865 }, { "epoch": 0.8402807620684716, "grad_norm": 1.31239652633667, "learning_rate": 1.3108931727653529e-05, "loss": 1.2376, "step": 5866 }, { "epoch": 0.8404240080217734, "grad_norm": 1.0189491510391235, "learning_rate": 1.3085975629995916e-05, "loss": 1.5056, "step": 5867 }, { "epoch": 0.8405672539750753, "grad_norm": 1.1481722593307495, "learning_rate": 1.3063038242777014e-05, "loss": 1.1616, "step": 5868 }, { "epoch": 0.840710499928377, "grad_norm": 1.0580345392227173, "learning_rate": 1.3040119570934595e-05, "loss": 1.4877, "step": 5869 }, { "epoch": 0.8408537458816788, "grad_norm": 1.1012507677078247, "learning_rate": 1.3017219619402509e-05, "loss": 1.4117, "step": 5870 }, { "epoch": 0.8409969918349807, "grad_norm": 0.9799444079399109, "learning_rate": 1.299433839311055e-05, "loss": 1.4223, "step": 5871 }, { "epoch": 0.8411402377882825, "grad_norm": 1.0052860975265503, "learning_rate": 1.2971475896984475e-05, "loss": 1.4027, "step": 5872 }, { "epoch": 0.8412834837415843, "grad_norm": 1.049664855003357, "learning_rate": 1.2948632135946048e-05, "loss": 1.412, "step": 5873 }, { "epoch": 0.8414267296948861, "grad_norm": 1.1650714874267578, "learning_rate": 1.292580711491289e-05, "loss": 1.4393, "step": 5874 }, { "epoch": 0.8415699756481879, "grad_norm": 1.0098856687545776, "learning_rate": 1.2903000838798752e-05, "loss": 1.3064, "step": 5875 }, { "epoch": 0.8417132216014898, "grad_norm": 1.223467230796814, "learning_rate": 1.288021331251319e-05, "loss": 1.1921, "step": 5876 }, { "epoch": 0.8418564675547916, "grad_norm": 1.0124261379241943, "learning_rate": 1.2857444540961805e-05, "loss": 1.2792, "step": 5877 }, { "epoch": 0.8419997135080934, "grad_norm": 1.0983185768127441, "learning_rate": 1.2834694529046209e-05, "loss": 1.2931, "step": 5878 }, { "epoch": 0.8421429594613952, "grad_norm": 1.0370594263076782, "learning_rate": 1.2811963281663853e-05, "loss": 1.5213, "step": 5879 }, { "epoch": 0.842286205414697, "grad_norm": 1.2171412706375122, "learning_rate": 1.278925080370823e-05, "loss": 1.2094, "step": 5880 }, { "epoch": 0.8424294513679988, "grad_norm": 1.090790033340454, "learning_rate": 1.2766557100068787e-05, "loss": 1.463, "step": 5881 }, { "epoch": 0.8425726973213007, "grad_norm": 1.4699547290802002, "learning_rate": 1.2743882175630905e-05, "loss": 1.3987, "step": 5882 }, { "epoch": 0.8427159432746025, "grad_norm": 1.0934771299362183, "learning_rate": 1.2721226035275957e-05, "loss": 1.3034, "step": 5883 }, { "epoch": 0.8428591892279043, "grad_norm": 1.1447834968566895, "learning_rate": 1.2698588683881186e-05, "loss": 1.5012, "step": 5884 }, { "epoch": 0.8430024351812061, "grad_norm": 1.1725603342056274, "learning_rate": 1.2675970126319914e-05, "loss": 1.4844, "step": 5885 }, { "epoch": 0.8431456811345079, "grad_norm": 1.3400081396102905, "learning_rate": 1.2653370367461359e-05, "loss": 1.2747, "step": 5886 }, { "epoch": 0.8432889270878098, "grad_norm": 1.1248846054077148, "learning_rate": 1.2630789412170663e-05, "loss": 1.1765, "step": 5887 }, { "epoch": 0.8434321730411116, "grad_norm": 1.4043021202087402, "learning_rate": 1.2608227265308925e-05, "loss": 1.2933, "step": 5888 }, { "epoch": 0.8435754189944135, "grad_norm": 1.1666061878204346, "learning_rate": 1.2585683931733283e-05, "loss": 1.5009, "step": 5889 }, { "epoch": 0.8437186649477152, "grad_norm": 1.1734256744384766, "learning_rate": 1.256315941629671e-05, "loss": 1.3704, "step": 5890 }, { "epoch": 0.843861910901017, "grad_norm": 1.1151437759399414, "learning_rate": 1.2540653723848183e-05, "loss": 1.5216, "step": 5891 }, { "epoch": 0.8440051568543189, "grad_norm": 1.1374375820159912, "learning_rate": 1.2518166859232627e-05, "loss": 1.2139, "step": 5892 }, { "epoch": 0.8441484028076207, "grad_norm": 1.047575831413269, "learning_rate": 1.2495698827290902e-05, "loss": 1.3208, "step": 5893 }, { "epoch": 0.8442916487609226, "grad_norm": 1.1567860841751099, "learning_rate": 1.2473249632859862e-05, "loss": 1.555, "step": 5894 }, { "epoch": 0.8444348947142243, "grad_norm": 1.0781127214431763, "learning_rate": 1.2450819280772164e-05, "loss": 1.4116, "step": 5895 }, { "epoch": 0.8445781406675261, "grad_norm": 0.9418050646781921, "learning_rate": 1.2428407775856598e-05, "loss": 1.4084, "step": 5896 }, { "epoch": 0.844721386620828, "grad_norm": 1.2272260189056396, "learning_rate": 1.2406015122937809e-05, "loss": 1.34, "step": 5897 }, { "epoch": 0.8448646325741298, "grad_norm": 1.4137194156646729, "learning_rate": 1.2383641326836314e-05, "loss": 1.3228, "step": 5898 }, { "epoch": 0.8450078785274316, "grad_norm": 1.4086955785751343, "learning_rate": 1.2361286392368676e-05, "loss": 1.304, "step": 5899 }, { "epoch": 0.8451511244807334, "grad_norm": 0.9694637060165405, "learning_rate": 1.2338950324347354e-05, "loss": 1.3131, "step": 5900 }, { "epoch": 0.8452943704340352, "grad_norm": 1.0456632375717163, "learning_rate": 1.2316633127580757e-05, "loss": 1.1971, "step": 5901 }, { "epoch": 0.845437616387337, "grad_norm": 1.118066430091858, "learning_rate": 1.2294334806873231e-05, "loss": 1.4297, "step": 5902 }, { "epoch": 0.8455808623406389, "grad_norm": 1.0997216701507568, "learning_rate": 1.2272055367025027e-05, "loss": 1.528, "step": 5903 }, { "epoch": 0.8457241082939407, "grad_norm": 1.1842446327209473, "learning_rate": 1.2249794812832404e-05, "loss": 1.4314, "step": 5904 }, { "epoch": 0.8458673542472425, "grad_norm": 1.0090771913528442, "learning_rate": 1.2227553149087446e-05, "loss": 1.5595, "step": 5905 }, { "epoch": 0.8460106002005443, "grad_norm": 1.0627002716064453, "learning_rate": 1.2205330380578251e-05, "loss": 1.2015, "step": 5906 }, { "epoch": 0.8461538461538461, "grad_norm": 1.1673386096954346, "learning_rate": 1.2183126512088882e-05, "loss": 1.4194, "step": 5907 }, { "epoch": 0.846297092107148, "grad_norm": 1.0688797235488892, "learning_rate": 1.216094154839923e-05, "loss": 1.3401, "step": 5908 }, { "epoch": 0.8464403380604498, "grad_norm": 1.3212735652923584, "learning_rate": 1.2138775494285182e-05, "loss": 1.4979, "step": 5909 }, { "epoch": 0.8465835840137517, "grad_norm": 1.032102346420288, "learning_rate": 1.2116628354518544e-05, "loss": 1.4245, "step": 5910 }, { "epoch": 0.8467268299670534, "grad_norm": 1.2318929433822632, "learning_rate": 1.2094500133867038e-05, "loss": 1.1584, "step": 5911 }, { "epoch": 0.8468700759203552, "grad_norm": 1.0476665496826172, "learning_rate": 1.207239083709435e-05, "loss": 1.2911, "step": 5912 }, { "epoch": 0.8470133218736571, "grad_norm": 1.0462672710418701, "learning_rate": 1.2050300468960007e-05, "loss": 1.4615, "step": 5913 }, { "epoch": 0.8471565678269589, "grad_norm": 1.345977783203125, "learning_rate": 1.2028229034219584e-05, "loss": 1.3692, "step": 5914 }, { "epoch": 0.8472998137802608, "grad_norm": 1.1413146257400513, "learning_rate": 1.2006176537624491e-05, "loss": 1.2851, "step": 5915 }, { "epoch": 0.8474430597335625, "grad_norm": 1.1037787199020386, "learning_rate": 1.1984142983922075e-05, "loss": 1.344, "step": 5916 }, { "epoch": 0.8475863056868643, "grad_norm": 1.1656603813171387, "learning_rate": 1.1962128377855597e-05, "loss": 1.5124, "step": 5917 }, { "epoch": 0.8477295516401662, "grad_norm": 1.2180595397949219, "learning_rate": 1.194013272416431e-05, "loss": 1.3643, "step": 5918 }, { "epoch": 0.847872797593468, "grad_norm": 1.0176746845245361, "learning_rate": 1.1918156027583294e-05, "loss": 1.3196, "step": 5919 }, { "epoch": 0.8480160435467698, "grad_norm": 1.0788555145263672, "learning_rate": 1.1896198292843597e-05, "loss": 1.5907, "step": 5920 }, { "epoch": 0.8481592895000716, "grad_norm": 1.145646333694458, "learning_rate": 1.187425952467217e-05, "loss": 1.5551, "step": 5921 }, { "epoch": 0.8483025354533734, "grad_norm": 1.170519232749939, "learning_rate": 1.1852339727791906e-05, "loss": 1.5744, "step": 5922 }, { "epoch": 0.8484457814066753, "grad_norm": 1.2268024682998657, "learning_rate": 1.1830438906921592e-05, "loss": 1.3744, "step": 5923 }, { "epoch": 0.8485890273599771, "grad_norm": 1.0629911422729492, "learning_rate": 1.1808557066775883e-05, "loss": 1.2968, "step": 5924 }, { "epoch": 0.8487322733132789, "grad_norm": 1.0029101371765137, "learning_rate": 1.1786694212065463e-05, "loss": 1.3194, "step": 5925 }, { "epoch": 0.8488755192665807, "grad_norm": 1.829845905303955, "learning_rate": 1.1764850347496859e-05, "loss": 1.4839, "step": 5926 }, { "epoch": 0.8490187652198825, "grad_norm": 1.2716953754425049, "learning_rate": 1.1743025477772474e-05, "loss": 1.3762, "step": 5927 }, { "epoch": 0.8491620111731844, "grad_norm": 1.1459343433380127, "learning_rate": 1.1721219607590683e-05, "loss": 1.357, "step": 5928 }, { "epoch": 0.8493052571264862, "grad_norm": 1.026037335395813, "learning_rate": 1.1699432741645754e-05, "loss": 1.3712, "step": 5929 }, { "epoch": 0.849448503079788, "grad_norm": 1.1822633743286133, "learning_rate": 1.1677664884627847e-05, "loss": 1.4854, "step": 5930 }, { "epoch": 0.8495917490330898, "grad_norm": 1.2121442556381226, "learning_rate": 1.1655916041223059e-05, "loss": 1.1931, "step": 5931 }, { "epoch": 0.8497349949863916, "grad_norm": 1.492619514465332, "learning_rate": 1.1634186216113363e-05, "loss": 1.2943, "step": 5932 }, { "epoch": 0.8498782409396934, "grad_norm": 1.2002801895141602, "learning_rate": 1.1612475413976686e-05, "loss": 1.4388, "step": 5933 }, { "epoch": 0.8500214868929953, "grad_norm": 1.2955142259597778, "learning_rate": 1.159078363948678e-05, "loss": 1.5117, "step": 5934 }, { "epoch": 0.8501647328462971, "grad_norm": 1.133547067642212, "learning_rate": 1.1569110897313351e-05, "loss": 1.4156, "step": 5935 }, { "epoch": 0.850307978799599, "grad_norm": 1.150070071220398, "learning_rate": 1.1547457192122058e-05, "loss": 1.2721, "step": 5936 }, { "epoch": 0.8504512247529007, "grad_norm": 1.137765884399414, "learning_rate": 1.1525822528574349e-05, "loss": 1.4678, "step": 5937 }, { "epoch": 0.8505944707062025, "grad_norm": 1.1612160205841064, "learning_rate": 1.1504206911327653e-05, "loss": 1.5187, "step": 5938 }, { "epoch": 0.8507377166595044, "grad_norm": 1.3049068450927734, "learning_rate": 1.1482610345035283e-05, "loss": 1.3881, "step": 5939 }, { "epoch": 0.8508809626128062, "grad_norm": 1.0773882865905762, "learning_rate": 1.146103283434643e-05, "loss": 1.6275, "step": 5940 }, { "epoch": 0.851024208566108, "grad_norm": 1.176719069480896, "learning_rate": 1.1439474383906223e-05, "loss": 1.3142, "step": 5941 }, { "epoch": 0.8511674545194098, "grad_norm": 1.2327150106430054, "learning_rate": 1.141793499835564e-05, "loss": 1.5813, "step": 5942 }, { "epoch": 0.8513107004727116, "grad_norm": 1.1388254165649414, "learning_rate": 1.1396414682331592e-05, "loss": 1.2359, "step": 5943 }, { "epoch": 0.8514539464260135, "grad_norm": 1.1602637767791748, "learning_rate": 1.1374913440466872e-05, "loss": 1.3281, "step": 5944 }, { "epoch": 0.8515971923793153, "grad_norm": 1.0825024843215942, "learning_rate": 1.1353431277390126e-05, "loss": 1.4394, "step": 5945 }, { "epoch": 0.8517404383326171, "grad_norm": 1.2587839365005493, "learning_rate": 1.1331968197725984e-05, "loss": 1.49, "step": 5946 }, { "epoch": 0.8518836842859189, "grad_norm": 1.001339316368103, "learning_rate": 1.1310524206094919e-05, "loss": 1.4264, "step": 5947 }, { "epoch": 0.8520269302392207, "grad_norm": 1.0211875438690186, "learning_rate": 1.128909930711325e-05, "loss": 1.635, "step": 5948 }, { "epoch": 0.8521701761925226, "grad_norm": 1.0451706647872925, "learning_rate": 1.126769350539324e-05, "loss": 1.3714, "step": 5949 }, { "epoch": 0.8523134221458244, "grad_norm": 1.1826324462890625, "learning_rate": 1.1246306805543038e-05, "loss": 1.4431, "step": 5950 }, { "epoch": 0.8524566680991262, "grad_norm": 1.2270385026931763, "learning_rate": 1.1224939212166662e-05, "loss": 1.5022, "step": 5951 }, { "epoch": 0.852599914052428, "grad_norm": 1.1215671300888062, "learning_rate": 1.1203590729864033e-05, "loss": 1.423, "step": 5952 }, { "epoch": 0.8527431600057298, "grad_norm": 1.0732940435409546, "learning_rate": 1.1182261363230962e-05, "loss": 1.2909, "step": 5953 }, { "epoch": 0.8528864059590316, "grad_norm": 1.3129091262817383, "learning_rate": 1.116095111685913e-05, "loss": 1.3696, "step": 5954 }, { "epoch": 0.8530296519123335, "grad_norm": 1.207662582397461, "learning_rate": 1.1139659995336082e-05, "loss": 1.1986, "step": 5955 }, { "epoch": 0.8531728978656353, "grad_norm": 1.2096807956695557, "learning_rate": 1.1118388003245262e-05, "loss": 1.3376, "step": 5956 }, { "epoch": 0.8533161438189372, "grad_norm": 1.0616837739944458, "learning_rate": 1.1097135145166072e-05, "loss": 1.4806, "step": 5957 }, { "epoch": 0.8534593897722389, "grad_norm": 1.2491848468780518, "learning_rate": 1.1075901425673651e-05, "loss": 1.4954, "step": 5958 }, { "epoch": 0.8536026357255407, "grad_norm": 1.0866069793701172, "learning_rate": 1.1054686849339124e-05, "loss": 1.4437, "step": 5959 }, { "epoch": 0.8537458816788426, "grad_norm": 0.9453035593032837, "learning_rate": 1.103349142072947e-05, "loss": 1.438, "step": 5960 }, { "epoch": 0.8538891276321444, "grad_norm": 1.4669052362442017, "learning_rate": 1.1012315144407538e-05, "loss": 1.2209, "step": 5961 }, { "epoch": 0.8540323735854463, "grad_norm": 1.069587230682373, "learning_rate": 1.0991158024932068e-05, "loss": 1.4056, "step": 5962 }, { "epoch": 0.854175619538748, "grad_norm": 1.2838234901428223, "learning_rate": 1.0970020066857601e-05, "loss": 1.2834, "step": 5963 }, { "epoch": 0.8543188654920498, "grad_norm": 1.108183741569519, "learning_rate": 1.0948901274734691e-05, "loss": 1.3648, "step": 5964 }, { "epoch": 0.8544621114453517, "grad_norm": 0.9594968557357788, "learning_rate": 1.0927801653109682e-05, "loss": 1.076, "step": 5965 }, { "epoch": 0.8546053573986535, "grad_norm": 1.014965295791626, "learning_rate": 1.0906721206524762e-05, "loss": 1.4553, "step": 5966 }, { "epoch": 0.8547486033519553, "grad_norm": 0.9620893001556396, "learning_rate": 1.0885659939518033e-05, "loss": 1.4689, "step": 5967 }, { "epoch": 0.8548918493052571, "grad_norm": 1.0095683336257935, "learning_rate": 1.0864617856623516e-05, "loss": 1.5036, "step": 5968 }, { "epoch": 0.8550350952585589, "grad_norm": 1.3129596710205078, "learning_rate": 1.0843594962371007e-05, "loss": 1.3039, "step": 5969 }, { "epoch": 0.8551783412118608, "grad_norm": 1.3076528310775757, "learning_rate": 1.0822591261286207e-05, "loss": 1.5457, "step": 5970 }, { "epoch": 0.8553215871651626, "grad_norm": 1.1766414642333984, "learning_rate": 1.0801606757890725e-05, "loss": 1.5201, "step": 5971 }, { "epoch": 0.8554648331184644, "grad_norm": 1.141672134399414, "learning_rate": 1.078064145670199e-05, "loss": 1.1997, "step": 5972 }, { "epoch": 0.8556080790717662, "grad_norm": 1.2534316778182983, "learning_rate": 1.0759695362233324e-05, "loss": 1.4074, "step": 5973 }, { "epoch": 0.855751325025068, "grad_norm": 1.0227153301239014, "learning_rate": 1.0738768478993854e-05, "loss": 1.4809, "step": 5974 }, { "epoch": 0.8558945709783699, "grad_norm": 1.2206075191497803, "learning_rate": 1.0717860811488689e-05, "loss": 1.4397, "step": 5975 }, { "epoch": 0.8560378169316717, "grad_norm": 1.250895380973816, "learning_rate": 1.0696972364218705e-05, "loss": 1.4707, "step": 5976 }, { "epoch": 0.8561810628849735, "grad_norm": 1.3817213773727417, "learning_rate": 1.0676103141680649e-05, "loss": 1.5657, "step": 5977 }, { "epoch": 0.8563243088382754, "grad_norm": 1.0946513414382935, "learning_rate": 1.0655253148367173e-05, "loss": 1.3594, "step": 5978 }, { "epoch": 0.8564675547915771, "grad_norm": 1.1710284948349, "learning_rate": 1.0634422388766752e-05, "loss": 1.3982, "step": 5979 }, { "epoch": 0.8566108007448789, "grad_norm": 1.1177366971969604, "learning_rate": 1.0613610867363722e-05, "loss": 1.1394, "step": 5980 }, { "epoch": 0.8567540466981808, "grad_norm": 1.147024154663086, "learning_rate": 1.059281858863832e-05, "loss": 1.2615, "step": 5981 }, { "epoch": 0.8568972926514826, "grad_norm": 1.2107737064361572, "learning_rate": 1.0572045557066579e-05, "loss": 1.2852, "step": 5982 }, { "epoch": 0.8570405386047845, "grad_norm": 1.1890567541122437, "learning_rate": 1.0551291777120464e-05, "loss": 1.2821, "step": 5983 }, { "epoch": 0.8571837845580862, "grad_norm": 0.940528392791748, "learning_rate": 1.053055725326768e-05, "loss": 1.5298, "step": 5984 }, { "epoch": 0.857327030511388, "grad_norm": 1.2618837356567383, "learning_rate": 1.0509841989971891e-05, "loss": 1.5047, "step": 5985 }, { "epoch": 0.8574702764646899, "grad_norm": 1.2272168397903442, "learning_rate": 1.0489145991692618e-05, "loss": 1.3467, "step": 5986 }, { "epoch": 0.8576135224179917, "grad_norm": 0.9903528690338135, "learning_rate": 1.0468469262885139e-05, "loss": 1.5234, "step": 5987 }, { "epoch": 0.8577567683712936, "grad_norm": 0.9970590472221375, "learning_rate": 1.0447811808000673e-05, "loss": 1.2348, "step": 5988 }, { "epoch": 0.8579000143245953, "grad_norm": 1.172945499420166, "learning_rate": 1.0427173631486254e-05, "loss": 1.4641, "step": 5989 }, { "epoch": 0.8580432602778971, "grad_norm": 1.0118929147720337, "learning_rate": 1.0406554737784757e-05, "loss": 1.5057, "step": 5990 }, { "epoch": 0.858186506231199, "grad_norm": 1.046926498413086, "learning_rate": 1.0385955131334958e-05, "loss": 1.2817, "step": 5991 }, { "epoch": 0.8583297521845008, "grad_norm": 1.1159666776657104, "learning_rate": 1.0365374816571382e-05, "loss": 1.3801, "step": 5992 }, { "epoch": 0.8584729981378026, "grad_norm": 1.0702989101409912, "learning_rate": 1.0344813797924501e-05, "loss": 1.1552, "step": 5993 }, { "epoch": 0.8586162440911044, "grad_norm": 1.0954008102416992, "learning_rate": 1.0324272079820618e-05, "loss": 1.334, "step": 5994 }, { "epoch": 0.8587594900444062, "grad_norm": 1.1706568002700806, "learning_rate": 1.0303749666681806e-05, "loss": 1.2833, "step": 5995 }, { "epoch": 0.8589027359977081, "grad_norm": 1.0864201784133911, "learning_rate": 1.0283246562926041e-05, "loss": 1.4798, "step": 5996 }, { "epoch": 0.8590459819510099, "grad_norm": 1.2694381475448608, "learning_rate": 1.0262762772967172e-05, "loss": 1.3743, "step": 5997 }, { "epoch": 0.8591892279043117, "grad_norm": 1.3109025955200195, "learning_rate": 1.0242298301214814e-05, "loss": 1.4325, "step": 5998 }, { "epoch": 0.8593324738576136, "grad_norm": 1.3905590772628784, "learning_rate": 1.0221853152074468e-05, "loss": 1.313, "step": 5999 }, { "epoch": 0.8594757198109153, "grad_norm": 1.391204833984375, "learning_rate": 1.0201427329947477e-05, "loss": 1.3297, "step": 6000 }, { "epoch": 0.8596189657642171, "grad_norm": 1.0731093883514404, "learning_rate": 1.0181020839231014e-05, "loss": 1.3513, "step": 6001 }, { "epoch": 0.859762211717519, "grad_norm": 1.1268808841705322, "learning_rate": 1.0160633684318088e-05, "loss": 1.3535, "step": 6002 }, { "epoch": 0.8599054576708208, "grad_norm": 1.1346286535263062, "learning_rate": 1.0140265869597554e-05, "loss": 1.5329, "step": 6003 }, { "epoch": 0.8600487036241227, "grad_norm": 1.3839482069015503, "learning_rate": 1.0119917399454105e-05, "loss": 1.5164, "step": 6004 }, { "epoch": 0.8601919495774244, "grad_norm": 1.0304023027420044, "learning_rate": 1.0099588278268246e-05, "loss": 1.3203, "step": 6005 }, { "epoch": 0.8603351955307262, "grad_norm": 1.4183589220046997, "learning_rate": 1.0079278510416313e-05, "loss": 1.4108, "step": 6006 }, { "epoch": 0.8604784414840281, "grad_norm": 0.9873524308204651, "learning_rate": 1.0058988100270561e-05, "loss": 1.2691, "step": 6007 }, { "epoch": 0.8606216874373299, "grad_norm": 1.2403537034988403, "learning_rate": 1.0038717052198965e-05, "loss": 1.3485, "step": 6008 }, { "epoch": 0.8607649333906318, "grad_norm": 1.1356829404830933, "learning_rate": 1.0018465370565388e-05, "loss": 1.472, "step": 6009 }, { "epoch": 0.8609081793439335, "grad_norm": 1.0884668827056885, "learning_rate": 9.998233059729523e-06, "loss": 1.2631, "step": 6010 }, { "epoch": 0.8610514252972353, "grad_norm": 1.1299662590026855, "learning_rate": 9.978020124046882e-06, "loss": 1.674, "step": 6011 }, { "epoch": 0.8611946712505372, "grad_norm": 1.1113313436508179, "learning_rate": 9.957826567868822e-06, "loss": 1.4931, "step": 6012 }, { "epoch": 0.861337917203839, "grad_norm": 0.9953776597976685, "learning_rate": 9.937652395542474e-06, "loss": 1.3354, "step": 6013 }, { "epoch": 0.8614811631571409, "grad_norm": 1.106648564338684, "learning_rate": 9.917497611410875e-06, "loss": 1.2895, "step": 6014 }, { "epoch": 0.8616244091104426, "grad_norm": 0.8883472084999084, "learning_rate": 9.897362219812879e-06, "loss": 1.3818, "step": 6015 }, { "epoch": 0.8617676550637444, "grad_norm": 1.147454857826233, "learning_rate": 9.877246225083082e-06, "loss": 1.5192, "step": 6016 }, { "epoch": 0.8619109010170463, "grad_norm": 1.0401158332824707, "learning_rate": 9.857149631551955e-06, "loss": 1.3948, "step": 6017 }, { "epoch": 0.8620541469703481, "grad_norm": 1.001343011856079, "learning_rate": 9.837072443545858e-06, "loss": 1.2575, "step": 6018 }, { "epoch": 0.8621973929236499, "grad_norm": 0.9508216381072998, "learning_rate": 9.81701466538687e-06, "loss": 1.545, "step": 6019 }, { "epoch": 0.8623406388769517, "grad_norm": 1.2408112287521362, "learning_rate": 9.796976301392934e-06, "loss": 1.4792, "step": 6020 }, { "epoch": 0.8624838848302535, "grad_norm": 1.1150646209716797, "learning_rate": 9.776957355877825e-06, "loss": 1.4808, "step": 6021 }, { "epoch": 0.8626271307835554, "grad_norm": 1.0932170152664185, "learning_rate": 9.756957833151115e-06, "loss": 1.2743, "step": 6022 }, { "epoch": 0.8627703767368572, "grad_norm": 1.3037172555923462, "learning_rate": 9.736977737518249e-06, "loss": 1.4166, "step": 6023 }, { "epoch": 0.862913622690159, "grad_norm": 1.2514820098876953, "learning_rate": 9.717017073280366e-06, "loss": 1.2961, "step": 6024 }, { "epoch": 0.8630568686434609, "grad_norm": 1.182352900505066, "learning_rate": 9.697075844734571e-06, "loss": 1.1961, "step": 6025 }, { "epoch": 0.8632001145967626, "grad_norm": 1.0583449602127075, "learning_rate": 9.67715405617371e-06, "loss": 1.3532, "step": 6026 }, { "epoch": 0.8633433605500644, "grad_norm": 0.9429858326911926, "learning_rate": 9.657251711886427e-06, "loss": 1.0791, "step": 6027 }, { "epoch": 0.8634866065033663, "grad_norm": 1.446860432624817, "learning_rate": 9.63736881615721e-06, "loss": 1.3141, "step": 6028 }, { "epoch": 0.8636298524566681, "grad_norm": 1.2455781698226929, "learning_rate": 9.617505373266355e-06, "loss": 1.5134, "step": 6029 }, { "epoch": 0.86377309840997, "grad_norm": 1.1778812408447266, "learning_rate": 9.597661387489987e-06, "loss": 1.6375, "step": 6030 }, { "epoch": 0.8639163443632717, "grad_norm": 1.1342664957046509, "learning_rate": 9.577836863099999e-06, "loss": 1.5913, "step": 6031 }, { "epoch": 0.8640595903165735, "grad_norm": 1.194433331489563, "learning_rate": 9.558031804364142e-06, "loss": 1.5281, "step": 6032 }, { "epoch": 0.8642028362698754, "grad_norm": 1.1607977151870728, "learning_rate": 9.538246215545953e-06, "loss": 1.3322, "step": 6033 }, { "epoch": 0.8643460822231772, "grad_norm": 1.4274064302444458, "learning_rate": 9.518480100904769e-06, "loss": 1.4178, "step": 6034 }, { "epoch": 0.864489328176479, "grad_norm": 1.1272894144058228, "learning_rate": 9.498733464695719e-06, "loss": 1.1893, "step": 6035 }, { "epoch": 0.8646325741297808, "grad_norm": 1.0932059288024902, "learning_rate": 9.479006311169835e-06, "loss": 1.3861, "step": 6036 }, { "epoch": 0.8647758200830826, "grad_norm": 1.0343012809753418, "learning_rate": 9.459298644573833e-06, "loss": 1.3617, "step": 6037 }, { "epoch": 0.8649190660363845, "grad_norm": 1.1004201173782349, "learning_rate": 9.439610469150295e-06, "loss": 1.4174, "step": 6038 }, { "epoch": 0.8650623119896863, "grad_norm": 1.3755923509597778, "learning_rate": 9.419941789137598e-06, "loss": 1.3267, "step": 6039 }, { "epoch": 0.8652055579429881, "grad_norm": 1.0864121913909912, "learning_rate": 9.400292608769933e-06, "loss": 1.4399, "step": 6040 }, { "epoch": 0.8653488038962899, "grad_norm": 1.5144113302230835, "learning_rate": 9.380662932277285e-06, "loss": 1.3598, "step": 6041 }, { "epoch": 0.8654920498495917, "grad_norm": 1.0689212083816528, "learning_rate": 9.361052763885392e-06, "loss": 1.428, "step": 6042 }, { "epoch": 0.8656352958028936, "grad_norm": 1.1189706325531006, "learning_rate": 9.341462107815891e-06, "loss": 1.3398, "step": 6043 }, { "epoch": 0.8657785417561954, "grad_norm": 1.2770318984985352, "learning_rate": 9.321890968286173e-06, "loss": 1.3598, "step": 6044 }, { "epoch": 0.8659217877094972, "grad_norm": 1.0208467245101929, "learning_rate": 9.302339349509371e-06, "loss": 1.3862, "step": 6045 }, { "epoch": 0.8660650336627991, "grad_norm": 1.0923762321472168, "learning_rate": 9.28280725569447e-06, "loss": 1.3155, "step": 6046 }, { "epoch": 0.8662082796161008, "grad_norm": 1.1141787767410278, "learning_rate": 9.263294691046297e-06, "loss": 1.3555, "step": 6047 }, { "epoch": 0.8663515255694026, "grad_norm": 1.1227562427520752, "learning_rate": 9.243801659765383e-06, "loss": 1.4133, "step": 6048 }, { "epoch": 0.8664947715227045, "grad_norm": 1.1975488662719727, "learning_rate": 9.224328166048101e-06, "loss": 1.3672, "step": 6049 }, { "epoch": 0.8666380174760063, "grad_norm": 0.9009758830070496, "learning_rate": 9.204874214086611e-06, "loss": 1.3071, "step": 6050 }, { "epoch": 0.8667812634293082, "grad_norm": 1.150100588798523, "learning_rate": 9.185439808068885e-06, "loss": 1.4024, "step": 6051 }, { "epoch": 0.8669245093826099, "grad_norm": 1.147955298423767, "learning_rate": 9.166024952178665e-06, "loss": 1.4583, "step": 6052 }, { "epoch": 0.8670677553359117, "grad_norm": 1.0562922954559326, "learning_rate": 9.14662965059544e-06, "loss": 1.3527, "step": 6053 }, { "epoch": 0.8672110012892136, "grad_norm": 1.4133577346801758, "learning_rate": 9.127253907494604e-06, "loss": 1.321, "step": 6054 }, { "epoch": 0.8673542472425154, "grad_norm": 1.066960096359253, "learning_rate": 9.107897727047276e-06, "loss": 1.4816, "step": 6055 }, { "epoch": 0.8674974931958173, "grad_norm": 1.1348562240600586, "learning_rate": 9.088561113420302e-06, "loss": 1.4665, "step": 6056 }, { "epoch": 0.867640739149119, "grad_norm": 1.139683485031128, "learning_rate": 9.069244070776428e-06, "loss": 1.448, "step": 6057 }, { "epoch": 0.8677839851024208, "grad_norm": 1.250435709953308, "learning_rate": 9.049946603274106e-06, "loss": 1.3564, "step": 6058 }, { "epoch": 0.8679272310557227, "grad_norm": 1.0921661853790283, "learning_rate": 9.030668715067625e-06, "loss": 1.4496, "step": 6059 }, { "epoch": 0.8680704770090245, "grad_norm": 1.3178164958953857, "learning_rate": 9.011410410307019e-06, "loss": 1.5559, "step": 6060 }, { "epoch": 0.8682137229623264, "grad_norm": 1.1543413400650024, "learning_rate": 8.99217169313814e-06, "loss": 1.5272, "step": 6061 }, { "epoch": 0.8683569689156281, "grad_norm": 0.9124088287353516, "learning_rate": 8.972952567702619e-06, "loss": 1.4914, "step": 6062 }, { "epoch": 0.8685002148689299, "grad_norm": 1.0892658233642578, "learning_rate": 8.953753038137825e-06, "loss": 1.4456, "step": 6063 }, { "epoch": 0.8686434608222318, "grad_norm": 1.132536768913269, "learning_rate": 8.934573108576937e-06, "loss": 1.4949, "step": 6064 }, { "epoch": 0.8687867067755336, "grad_norm": 1.110290765762329, "learning_rate": 8.915412783148969e-06, "loss": 1.4092, "step": 6065 }, { "epoch": 0.8689299527288354, "grad_norm": 1.1697853803634644, "learning_rate": 8.896272065978628e-06, "loss": 1.3386, "step": 6066 }, { "epoch": 0.8690731986821373, "grad_norm": 1.2351714372634888, "learning_rate": 8.87715096118642e-06, "loss": 1.43, "step": 6067 }, { "epoch": 0.869216444635439, "grad_norm": 0.9747641682624817, "learning_rate": 8.858049472888707e-06, "loss": 1.2846, "step": 6068 }, { "epoch": 0.8693596905887409, "grad_norm": 1.0505380630493164, "learning_rate": 8.838967605197512e-06, "loss": 1.3835, "step": 6069 }, { "epoch": 0.8695029365420427, "grad_norm": 1.1157861948013306, "learning_rate": 8.819905362220704e-06, "loss": 1.4389, "step": 6070 }, { "epoch": 0.8696461824953445, "grad_norm": 1.2683335542678833, "learning_rate": 8.800862748061911e-06, "loss": 1.4738, "step": 6071 }, { "epoch": 0.8697894284486464, "grad_norm": 1.2032376527786255, "learning_rate": 8.781839766820543e-06, "loss": 1.3877, "step": 6072 }, { "epoch": 0.8699326744019481, "grad_norm": 1.1583251953125, "learning_rate": 8.762836422591802e-06, "loss": 1.2302, "step": 6073 }, { "epoch": 0.8700759203552499, "grad_norm": 1.1057093143463135, "learning_rate": 8.743852719466561e-06, "loss": 1.3888, "step": 6074 }, { "epoch": 0.8702191663085518, "grad_norm": 1.1242965459823608, "learning_rate": 8.724888661531616e-06, "loss": 1.461, "step": 6075 }, { "epoch": 0.8703624122618536, "grad_norm": 1.1107611656188965, "learning_rate": 8.705944252869446e-06, "loss": 1.2694, "step": 6076 }, { "epoch": 0.8705056582151555, "grad_norm": 1.198706030845642, "learning_rate": 8.68701949755829e-06, "loss": 1.3636, "step": 6077 }, { "epoch": 0.8706489041684572, "grad_norm": 1.0684045553207397, "learning_rate": 8.668114399672179e-06, "loss": 1.3986, "step": 6078 }, { "epoch": 0.870792150121759, "grad_norm": 1.1553736925125122, "learning_rate": 8.649228963280931e-06, "loss": 1.3306, "step": 6079 }, { "epoch": 0.8709353960750609, "grad_norm": 1.1903131008148193, "learning_rate": 8.630363192450109e-06, "loss": 1.6665, "step": 6080 }, { "epoch": 0.8710786420283627, "grad_norm": 1.0007792711257935, "learning_rate": 8.611517091241051e-06, "loss": 1.2915, "step": 6081 }, { "epoch": 0.8712218879816646, "grad_norm": 1.152894377708435, "learning_rate": 8.592690663710834e-06, "loss": 1.321, "step": 6082 }, { "epoch": 0.8713651339349663, "grad_norm": 1.3454468250274658, "learning_rate": 8.573883913912361e-06, "loss": 1.5207, "step": 6083 }, { "epoch": 0.8715083798882681, "grad_norm": 1.0143853425979614, "learning_rate": 8.555096845894228e-06, "loss": 1.3944, "step": 6084 }, { "epoch": 0.87165162584157, "grad_norm": 1.2324594259262085, "learning_rate": 8.53632946370081e-06, "loss": 1.1229, "step": 6085 }, { "epoch": 0.8717948717948718, "grad_norm": 0.9814085364341736, "learning_rate": 8.517581771372318e-06, "loss": 1.5363, "step": 6086 }, { "epoch": 0.8719381177481736, "grad_norm": 1.215084433555603, "learning_rate": 8.498853772944614e-06, "loss": 1.3396, "step": 6087 }, { "epoch": 0.8720813637014755, "grad_norm": 1.068009376525879, "learning_rate": 8.480145472449386e-06, "loss": 1.5574, "step": 6088 }, { "epoch": 0.8722246096547772, "grad_norm": 1.0876370668411255, "learning_rate": 8.461456873914087e-06, "loss": 1.3283, "step": 6089 }, { "epoch": 0.8723678556080791, "grad_norm": 1.1491124629974365, "learning_rate": 8.442787981361877e-06, "loss": 1.378, "step": 6090 }, { "epoch": 0.8725111015613809, "grad_norm": 1.2923574447631836, "learning_rate": 8.424138798811753e-06, "loss": 1.2391, "step": 6091 }, { "epoch": 0.8726543475146827, "grad_norm": 1.048877239227295, "learning_rate": 8.40550933027836e-06, "loss": 1.3252, "step": 6092 }, { "epoch": 0.8727975934679846, "grad_norm": 1.3648351430892944, "learning_rate": 8.386899579772223e-06, "loss": 1.4141, "step": 6093 }, { "epoch": 0.8729408394212863, "grad_norm": 1.4493716955184937, "learning_rate": 8.368309551299536e-06, "loss": 1.3481, "step": 6094 }, { "epoch": 0.8730840853745881, "grad_norm": 1.2175650596618652, "learning_rate": 8.349739248862265e-06, "loss": 1.4596, "step": 6095 }, { "epoch": 0.87322733132789, "grad_norm": 1.1183571815490723, "learning_rate": 8.331188676458113e-06, "loss": 1.483, "step": 6096 }, { "epoch": 0.8733705772811918, "grad_norm": 1.0167449712753296, "learning_rate": 8.312657838080629e-06, "loss": 1.3089, "step": 6097 }, { "epoch": 0.8735138232344937, "grad_norm": 1.1845693588256836, "learning_rate": 8.294146737718988e-06, "loss": 1.566, "step": 6098 }, { "epoch": 0.8736570691877954, "grad_norm": 1.1240553855895996, "learning_rate": 8.275655379358183e-06, "loss": 1.5152, "step": 6099 }, { "epoch": 0.8738003151410972, "grad_norm": 1.0002434253692627, "learning_rate": 8.257183766978948e-06, "loss": 1.2572, "step": 6100 }, { "epoch": 0.8739435610943991, "grad_norm": 1.2044419050216675, "learning_rate": 8.238731904557773e-06, "loss": 1.2952, "step": 6101 }, { "epoch": 0.8740868070477009, "grad_norm": 1.0145857334136963, "learning_rate": 8.220299796066899e-06, "loss": 1.2579, "step": 6102 }, { "epoch": 0.8742300530010028, "grad_norm": 1.0416197776794434, "learning_rate": 8.201887445474244e-06, "loss": 1.4348, "step": 6103 }, { "epoch": 0.8743732989543045, "grad_norm": 1.27960205078125, "learning_rate": 8.18349485674359e-06, "loss": 1.5171, "step": 6104 }, { "epoch": 0.8745165449076063, "grad_norm": 1.1925181150436401, "learning_rate": 8.165122033834393e-06, "loss": 1.2383, "step": 6105 }, { "epoch": 0.8746597908609082, "grad_norm": 1.2742854356765747, "learning_rate": 8.146768980701858e-06, "loss": 1.3624, "step": 6106 }, { "epoch": 0.87480303681421, "grad_norm": 1.167916178703308, "learning_rate": 8.128435701296943e-06, "loss": 1.2973, "step": 6107 }, { "epoch": 0.8749462827675119, "grad_norm": 1.379451870918274, "learning_rate": 8.110122199566362e-06, "loss": 1.4084, "step": 6108 }, { "epoch": 0.8750895287208137, "grad_norm": 1.1810214519500732, "learning_rate": 8.091828479452535e-06, "loss": 1.3628, "step": 6109 }, { "epoch": 0.8752327746741154, "grad_norm": 1.215956449508667, "learning_rate": 8.073554544893668e-06, "loss": 1.3911, "step": 6110 }, { "epoch": 0.8753760206274173, "grad_norm": 1.3227753639221191, "learning_rate": 8.055300399823684e-06, "loss": 1.273, "step": 6111 }, { "epoch": 0.8755192665807191, "grad_norm": 0.9869691133499146, "learning_rate": 8.03706604817226e-06, "loss": 1.573, "step": 6112 }, { "epoch": 0.8756625125340209, "grad_norm": 1.1820207834243774, "learning_rate": 8.018851493864777e-06, "loss": 1.3727, "step": 6113 }, { "epoch": 0.8758057584873228, "grad_norm": 1.1927971839904785, "learning_rate": 8.000656740822365e-06, "loss": 1.286, "step": 6114 }, { "epoch": 0.8759490044406245, "grad_norm": 1.2951496839523315, "learning_rate": 7.98248179296196e-06, "loss": 1.4139, "step": 6115 }, { "epoch": 0.8760922503939264, "grad_norm": 1.1640747785568237, "learning_rate": 7.964326654196119e-06, "loss": 1.472, "step": 6116 }, { "epoch": 0.8762354963472282, "grad_norm": 1.3665701150894165, "learning_rate": 7.946191328433228e-06, "loss": 1.4205, "step": 6117 }, { "epoch": 0.87637874230053, "grad_norm": 1.1885738372802734, "learning_rate": 7.928075819577375e-06, "loss": 1.4741, "step": 6118 }, { "epoch": 0.8765219882538319, "grad_norm": 1.2125725746154785, "learning_rate": 7.909980131528361e-06, "loss": 1.3111, "step": 6119 }, { "epoch": 0.8766652342071336, "grad_norm": 1.1959190368652344, "learning_rate": 7.891904268181772e-06, "loss": 1.413, "step": 6120 }, { "epoch": 0.8768084801604354, "grad_norm": 1.0228461027145386, "learning_rate": 7.873848233428826e-06, "loss": 1.3631, "step": 6121 }, { "epoch": 0.8769517261137373, "grad_norm": 1.12671959400177, "learning_rate": 7.855812031156618e-06, "loss": 1.456, "step": 6122 }, { "epoch": 0.8770949720670391, "grad_norm": 1.1778712272644043, "learning_rate": 7.837795665247882e-06, "loss": 1.5193, "step": 6123 }, { "epoch": 0.877238218020341, "grad_norm": 1.06819486618042, "learning_rate": 7.819799139581051e-06, "loss": 1.1973, "step": 6124 }, { "epoch": 0.8773814639736427, "grad_norm": 1.1329721212387085, "learning_rate": 7.80182245803035e-06, "loss": 1.3627, "step": 6125 }, { "epoch": 0.8775247099269445, "grad_norm": 1.0377130508422852, "learning_rate": 7.783865624465758e-06, "loss": 1.5195, "step": 6126 }, { "epoch": 0.8776679558802464, "grad_norm": 1.0383020639419556, "learning_rate": 7.765928642752884e-06, "loss": 1.4073, "step": 6127 }, { "epoch": 0.8778112018335482, "grad_norm": 0.970842182636261, "learning_rate": 7.74801151675314e-06, "loss": 1.5243, "step": 6128 }, { "epoch": 0.8779544477868501, "grad_norm": 1.077852487564087, "learning_rate": 7.730114250323627e-06, "loss": 1.3819, "step": 6129 }, { "epoch": 0.8780976937401518, "grad_norm": 1.2311354875564575, "learning_rate": 7.712236847317188e-06, "loss": 1.3826, "step": 6130 }, { "epoch": 0.8782409396934536, "grad_norm": 1.1323307752609253, "learning_rate": 7.6943793115824e-06, "loss": 1.5211, "step": 6131 }, { "epoch": 0.8783841856467555, "grad_norm": 0.9956012964248657, "learning_rate": 7.67654164696352e-06, "loss": 1.3943, "step": 6132 }, { "epoch": 0.8785274316000573, "grad_norm": 1.1283388137817383, "learning_rate": 7.658723857300599e-06, "loss": 1.4347, "step": 6133 }, { "epoch": 0.8786706775533591, "grad_norm": 1.2179752588272095, "learning_rate": 7.640925946429323e-06, "loss": 1.2403, "step": 6134 }, { "epoch": 0.878813923506661, "grad_norm": 1.1626495122909546, "learning_rate": 7.623147918181139e-06, "loss": 1.5918, "step": 6135 }, { "epoch": 0.8789571694599627, "grad_norm": 1.060157060623169, "learning_rate": 7.605389776383276e-06, "loss": 1.4642, "step": 6136 }, { "epoch": 0.8791004154132646, "grad_norm": 1.1356686353683472, "learning_rate": 7.587651524858564e-06, "loss": 1.4386, "step": 6137 }, { "epoch": 0.8792436613665664, "grad_norm": 1.006605625152588, "learning_rate": 7.569933167425625e-06, "loss": 1.2253, "step": 6138 }, { "epoch": 0.8793869073198682, "grad_norm": 1.091781735420227, "learning_rate": 7.552234707898787e-06, "loss": 1.4499, "step": 6139 }, { "epoch": 0.8795301532731701, "grad_norm": 1.2405829429626465, "learning_rate": 7.53455615008809e-06, "loss": 1.3995, "step": 6140 }, { "epoch": 0.8796733992264718, "grad_norm": 1.048988699913025, "learning_rate": 7.516897497799302e-06, "loss": 1.3649, "step": 6141 }, { "epoch": 0.8798166451797736, "grad_norm": 1.1714452505111694, "learning_rate": 7.4992587548338485e-06, "loss": 1.4664, "step": 6142 }, { "epoch": 0.8799598911330755, "grad_norm": 1.2107855081558228, "learning_rate": 7.48163992498897e-06, "loss": 1.4069, "step": 6143 }, { "epoch": 0.8801031370863773, "grad_norm": 1.0568186044692993, "learning_rate": 7.464041012057554e-06, "loss": 1.1892, "step": 6144 }, { "epoch": 0.8802463830396792, "grad_norm": 1.1592568159103394, "learning_rate": 7.446462019828182e-06, "loss": 1.2599, "step": 6145 }, { "epoch": 0.8803896289929809, "grad_norm": 1.1263563632965088, "learning_rate": 7.428902952085171e-06, "loss": 1.2484, "step": 6146 }, { "epoch": 0.8805328749462827, "grad_norm": 1.230603814125061, "learning_rate": 7.411363812608618e-06, "loss": 1.1576, "step": 6147 }, { "epoch": 0.8806761208995846, "grad_norm": 1.098705530166626, "learning_rate": 7.393844605174205e-06, "loss": 1.4294, "step": 6148 }, { "epoch": 0.8808193668528864, "grad_norm": 1.0629899501800537, "learning_rate": 7.376345333553403e-06, "loss": 1.4301, "step": 6149 }, { "epoch": 0.8809626128061883, "grad_norm": 1.1268627643585205, "learning_rate": 7.358866001513364e-06, "loss": 1.4504, "step": 6150 }, { "epoch": 0.88110585875949, "grad_norm": 1.1291261911392212, "learning_rate": 7.3414066128169705e-06, "loss": 1.306, "step": 6151 }, { "epoch": 0.8812491047127918, "grad_norm": 1.3080655336380005, "learning_rate": 7.323967171222801e-06, "loss": 1.494, "step": 6152 }, { "epoch": 0.8813923506660937, "grad_norm": 1.0088893175125122, "learning_rate": 7.306547680485088e-06, "loss": 1.3244, "step": 6153 }, { "epoch": 0.8815355966193955, "grad_norm": 0.9800326824188232, "learning_rate": 7.289148144353875e-06, "loss": 1.279, "step": 6154 }, { "epoch": 0.8816788425726974, "grad_norm": 1.3050402402877808, "learning_rate": 7.271768566574843e-06, "loss": 1.3899, "step": 6155 }, { "epoch": 0.8818220885259992, "grad_norm": 1.2049405574798584, "learning_rate": 7.2544089508893575e-06, "loss": 1.4731, "step": 6156 }, { "epoch": 0.8819653344793009, "grad_norm": 1.164433479309082, "learning_rate": 7.237069301034516e-06, "loss": 1.1818, "step": 6157 }, { "epoch": 0.8821085804326028, "grad_norm": 1.2531239986419678, "learning_rate": 7.219749620743144e-06, "loss": 1.3703, "step": 6158 }, { "epoch": 0.8822518263859046, "grad_norm": 1.045610785484314, "learning_rate": 7.202449913743714e-06, "loss": 1.3215, "step": 6159 }, { "epoch": 0.8823950723392064, "grad_norm": 0.9519723057746887, "learning_rate": 7.185170183760437e-06, "loss": 1.2779, "step": 6160 }, { "epoch": 0.8825383182925083, "grad_norm": 1.0717487335205078, "learning_rate": 7.167910434513214e-06, "loss": 1.4592, "step": 6161 }, { "epoch": 0.88268156424581, "grad_norm": 1.2163496017456055, "learning_rate": 7.15067066971764e-06, "loss": 1.4507, "step": 6162 }, { "epoch": 0.8828248101991119, "grad_norm": 1.1159166097640991, "learning_rate": 7.133450893085003e-06, "loss": 1.4047, "step": 6163 }, { "epoch": 0.8829680561524137, "grad_norm": 1.1471072435379028, "learning_rate": 7.116251108322281e-06, "loss": 1.5463, "step": 6164 }, { "epoch": 0.8831113021057155, "grad_norm": 1.1127512454986572, "learning_rate": 7.099071319132211e-06, "loss": 1.2894, "step": 6165 }, { "epoch": 0.8832545480590174, "grad_norm": 1.1845059394836426, "learning_rate": 7.081911529213126e-06, "loss": 1.4745, "step": 6166 }, { "epoch": 0.8833977940123191, "grad_norm": 1.1931324005126953, "learning_rate": 7.064771742259124e-06, "loss": 1.3776, "step": 6167 }, { "epoch": 0.883541039965621, "grad_norm": 1.2990992069244385, "learning_rate": 7.047651961959978e-06, "loss": 1.3339, "step": 6168 }, { "epoch": 0.8836842859189228, "grad_norm": 1.4355381727218628, "learning_rate": 7.03055219200115e-06, "loss": 1.4341, "step": 6169 }, { "epoch": 0.8838275318722246, "grad_norm": 1.1616709232330322, "learning_rate": 7.013472436063817e-06, "loss": 1.4384, "step": 6170 }, { "epoch": 0.8839707778255265, "grad_norm": 1.2471182346343994, "learning_rate": 6.996412697824772e-06, "loss": 1.3269, "step": 6171 }, { "epoch": 0.8841140237788282, "grad_norm": 1.0993856191635132, "learning_rate": 6.979372980956611e-06, "loss": 1.1826, "step": 6172 }, { "epoch": 0.88425726973213, "grad_norm": 1.3381884098052979, "learning_rate": 6.962353289127555e-06, "loss": 1.3908, "step": 6173 }, { "epoch": 0.8844005156854319, "grad_norm": 1.0381137132644653, "learning_rate": 6.945353626001494e-06, "loss": 1.4413, "step": 6174 }, { "epoch": 0.8845437616387337, "grad_norm": 1.0102453231811523, "learning_rate": 6.928373995238047e-06, "loss": 1.4919, "step": 6175 }, { "epoch": 0.8846870075920356, "grad_norm": 1.1457276344299316, "learning_rate": 6.911414400492544e-06, "loss": 1.5965, "step": 6176 }, { "epoch": 0.8848302535453374, "grad_norm": 1.152938961982727, "learning_rate": 6.89447484541591e-06, "loss": 1.5865, "step": 6177 }, { "epoch": 0.8849734994986391, "grad_norm": 1.1252951622009277, "learning_rate": 6.87755533365485e-06, "loss": 1.3366, "step": 6178 }, { "epoch": 0.885116745451941, "grad_norm": 1.2015928030014038, "learning_rate": 6.860655868851696e-06, "loss": 1.288, "step": 6179 }, { "epoch": 0.8852599914052428, "grad_norm": 1.0570125579833984, "learning_rate": 6.84377645464449e-06, "loss": 1.3644, "step": 6180 }, { "epoch": 0.8854032373585446, "grad_norm": 1.1813807487487793, "learning_rate": 6.826917094666973e-06, "loss": 1.3059, "step": 6181 }, { "epoch": 0.8855464833118465, "grad_norm": 1.1073837280273438, "learning_rate": 6.810077792548508e-06, "loss": 1.4599, "step": 6182 }, { "epoch": 0.8856897292651482, "grad_norm": 1.2193603515625, "learning_rate": 6.793258551914206e-06, "loss": 1.3019, "step": 6183 }, { "epoch": 0.8858329752184501, "grad_norm": 1.099416971206665, "learning_rate": 6.776459376384847e-06, "loss": 1.5789, "step": 6184 }, { "epoch": 0.8859762211717519, "grad_norm": 1.204345941543579, "learning_rate": 6.759680269576852e-06, "loss": 1.3298, "step": 6185 }, { "epoch": 0.8861194671250537, "grad_norm": 1.2014309167861938, "learning_rate": 6.74292123510234e-06, "loss": 1.3985, "step": 6186 }, { "epoch": 0.8862627130783556, "grad_norm": 1.2391036748886108, "learning_rate": 6.726182276569148e-06, "loss": 1.388, "step": 6187 }, { "epoch": 0.8864059590316573, "grad_norm": 1.2592679262161255, "learning_rate": 6.709463397580728e-06, "loss": 1.361, "step": 6188 }, { "epoch": 0.8865492049849591, "grad_norm": 1.0428235530853271, "learning_rate": 6.692764601736268e-06, "loss": 1.4172, "step": 6189 }, { "epoch": 0.886692450938261, "grad_norm": 1.1898137331008911, "learning_rate": 6.67608589263059e-06, "loss": 1.497, "step": 6190 }, { "epoch": 0.8868356968915628, "grad_norm": 1.0648175477981567, "learning_rate": 6.659427273854224e-06, "loss": 1.3356, "step": 6191 }, { "epoch": 0.8869789428448647, "grad_norm": 1.0077075958251953, "learning_rate": 6.642788748993323e-06, "loss": 1.5235, "step": 6192 }, { "epoch": 0.8871221887981664, "grad_norm": 1.1708589792251587, "learning_rate": 6.626170321629776e-06, "loss": 1.3852, "step": 6193 }, { "epoch": 0.8872654347514682, "grad_norm": 1.1584268808364868, "learning_rate": 6.609571995341135e-06, "loss": 1.4857, "step": 6194 }, { "epoch": 0.8874086807047701, "grad_norm": 1.1065008640289307, "learning_rate": 6.592993773700573e-06, "loss": 1.3019, "step": 6195 }, { "epoch": 0.8875519266580719, "grad_norm": 1.1764979362487793, "learning_rate": 6.576435660276969e-06, "loss": 1.2969, "step": 6196 }, { "epoch": 0.8876951726113738, "grad_norm": 0.938894510269165, "learning_rate": 6.559897658634928e-06, "loss": 1.4467, "step": 6197 }, { "epoch": 0.8878384185646756, "grad_norm": 1.2336777448654175, "learning_rate": 6.5433797723346235e-06, "loss": 1.3659, "step": 6198 }, { "epoch": 0.8879816645179773, "grad_norm": 1.179431438446045, "learning_rate": 6.526882004931967e-06, "loss": 1.4337, "step": 6199 }, { "epoch": 0.8881249104712792, "grad_norm": 1.0667303800582886, "learning_rate": 6.510404359978506e-06, "loss": 1.4479, "step": 6200 }, { "epoch": 0.888268156424581, "grad_norm": 0.9629502296447754, "learning_rate": 6.493946841021481e-06, "loss": 1.4861, "step": 6201 }, { "epoch": 0.8884114023778829, "grad_norm": 1.0334997177124023, "learning_rate": 6.477509451603791e-06, "loss": 1.5709, "step": 6202 }, { "epoch": 0.8885546483311847, "grad_norm": 1.1851776838302612, "learning_rate": 6.461092195263973e-06, "loss": 1.514, "step": 6203 }, { "epoch": 0.8886978942844864, "grad_norm": 1.1187702417373657, "learning_rate": 6.444695075536289e-06, "loss": 1.5218, "step": 6204 }, { "epoch": 0.8888411402377883, "grad_norm": 1.0871152877807617, "learning_rate": 6.428318095950647e-06, "loss": 1.4528, "step": 6205 }, { "epoch": 0.8889843861910901, "grad_norm": 1.1080232858657837, "learning_rate": 6.411961260032551e-06, "loss": 1.4234, "step": 6206 }, { "epoch": 0.8891276321443919, "grad_norm": 1.0740792751312256, "learning_rate": 6.39562457130326e-06, "loss": 1.4042, "step": 6207 }, { "epoch": 0.8892708780976938, "grad_norm": 0.9928357005119324, "learning_rate": 6.379308033279641e-06, "loss": 1.4636, "step": 6208 }, { "epoch": 0.8894141240509955, "grad_norm": 1.1339960098266602, "learning_rate": 6.363011649474249e-06, "loss": 1.2933, "step": 6209 }, { "epoch": 0.8895573700042974, "grad_norm": 1.13279390335083, "learning_rate": 6.3467354233953e-06, "loss": 1.3009, "step": 6210 }, { "epoch": 0.8897006159575992, "grad_norm": 1.108750343322754, "learning_rate": 6.3304793585466595e-06, "loss": 1.3581, "step": 6211 }, { "epoch": 0.889843861910901, "grad_norm": 1.044707179069519, "learning_rate": 6.3142434584278596e-06, "loss": 1.2312, "step": 6212 }, { "epoch": 0.8899871078642029, "grad_norm": 1.2037557363510132, "learning_rate": 6.2980277265340726e-06, "loss": 1.4726, "step": 6213 }, { "epoch": 0.8901303538175046, "grad_norm": 1.1017308235168457, "learning_rate": 6.281832166356127e-06, "loss": 1.308, "step": 6214 }, { "epoch": 0.8902735997708064, "grad_norm": 0.9754049181938171, "learning_rate": 6.265656781380591e-06, "loss": 1.3819, "step": 6215 }, { "epoch": 0.8904168457241083, "grad_norm": 1.1346205472946167, "learning_rate": 6.249501575089567e-06, "loss": 1.2307, "step": 6216 }, { "epoch": 0.8905600916774101, "grad_norm": 1.1524813175201416, "learning_rate": 6.233366550960884e-06, "loss": 1.3492, "step": 6217 }, { "epoch": 0.890703337630712, "grad_norm": 1.0112504959106445, "learning_rate": 6.2172517124680305e-06, "loss": 1.3212, "step": 6218 }, { "epoch": 0.8908465835840138, "grad_norm": 1.027686357498169, "learning_rate": 6.20115706308011e-06, "loss": 1.4516, "step": 6219 }, { "epoch": 0.8909898295373155, "grad_norm": 1.053300380706787, "learning_rate": 6.185082606261927e-06, "loss": 1.4111, "step": 6220 }, { "epoch": 0.8911330754906174, "grad_norm": 1.029451608657837, "learning_rate": 6.169028345473881e-06, "loss": 1.2354, "step": 6221 }, { "epoch": 0.8912763214439192, "grad_norm": 1.0877399444580078, "learning_rate": 6.152994284172076e-06, "loss": 1.2952, "step": 6222 }, { "epoch": 0.8914195673972211, "grad_norm": 1.2553833723068237, "learning_rate": 6.136980425808281e-06, "loss": 1.2945, "step": 6223 }, { "epoch": 0.8915628133505229, "grad_norm": 1.2141985893249512, "learning_rate": 6.1209867738298286e-06, "loss": 1.4726, "step": 6224 }, { "epoch": 0.8917060593038246, "grad_norm": 1.0399607419967651, "learning_rate": 6.105013331679754e-06, "loss": 1.2743, "step": 6225 }, { "epoch": 0.8918493052571265, "grad_norm": 1.131771445274353, "learning_rate": 6.089060102796807e-06, "loss": 1.3517, "step": 6226 }, { "epoch": 0.8919925512104283, "grad_norm": 1.0004180669784546, "learning_rate": 6.073127090615271e-06, "loss": 1.3639, "step": 6227 }, { "epoch": 0.8921357971637301, "grad_norm": 1.073381781578064, "learning_rate": 6.057214298565151e-06, "loss": 1.2757, "step": 6228 }, { "epoch": 0.892279043117032, "grad_norm": 1.5473004579544067, "learning_rate": 6.041321730072058e-06, "loss": 1.4834, "step": 6229 }, { "epoch": 0.8924222890703337, "grad_norm": 1.2155991792678833, "learning_rate": 6.025449388557281e-06, "loss": 1.4787, "step": 6230 }, { "epoch": 0.8925655350236356, "grad_norm": 1.261610507965088, "learning_rate": 6.009597277437762e-06, "loss": 1.2563, "step": 6231 }, { "epoch": 0.8927087809769374, "grad_norm": 1.23123037815094, "learning_rate": 5.9937654001260164e-06, "loss": 1.2365, "step": 6232 }, { "epoch": 0.8928520269302392, "grad_norm": 1.1676247119903564, "learning_rate": 5.977953760030297e-06, "loss": 1.3071, "step": 6233 }, { "epoch": 0.8929952728835411, "grad_norm": 1.0263564586639404, "learning_rate": 5.962162360554469e-06, "loss": 1.4977, "step": 6234 }, { "epoch": 0.8931385188368428, "grad_norm": 1.1772758960723877, "learning_rate": 5.9463912050979895e-06, "loss": 1.4995, "step": 6235 }, { "epoch": 0.8932817647901447, "grad_norm": 1.3270292282104492, "learning_rate": 5.930640297056022e-06, "loss": 1.3802, "step": 6236 }, { "epoch": 0.8934250107434465, "grad_norm": 1.3941293954849243, "learning_rate": 5.914909639819344e-06, "loss": 1.4855, "step": 6237 }, { "epoch": 0.8935682566967483, "grad_norm": 1.1911969184875488, "learning_rate": 5.899199236774377e-06, "loss": 1.5061, "step": 6238 }, { "epoch": 0.8937115026500502, "grad_norm": 1.10031259059906, "learning_rate": 5.883509091303174e-06, "loss": 1.5929, "step": 6239 }, { "epoch": 0.8938547486033519, "grad_norm": 1.3534270524978638, "learning_rate": 5.867839206783454e-06, "loss": 1.3095, "step": 6240 }, { "epoch": 0.8939979945566537, "grad_norm": 1.1715387105941772, "learning_rate": 5.852189586588552e-06, "loss": 1.4149, "step": 6241 }, { "epoch": 0.8941412405099556, "grad_norm": 1.1300643682479858, "learning_rate": 5.836560234087418e-06, "loss": 1.2783, "step": 6242 }, { "epoch": 0.8942844864632574, "grad_norm": 0.96632981300354, "learning_rate": 5.820951152644671e-06, "loss": 1.2122, "step": 6243 }, { "epoch": 0.8944277324165593, "grad_norm": 1.2658902406692505, "learning_rate": 5.805362345620602e-06, "loss": 1.2676, "step": 6244 }, { "epoch": 0.8945709783698611, "grad_norm": 1.0595402717590332, "learning_rate": 5.7897938163710365e-06, "loss": 1.2445, "step": 6245 }, { "epoch": 0.8947142243231628, "grad_norm": 1.0426127910614014, "learning_rate": 5.7742455682475384e-06, "loss": 1.6148, "step": 6246 }, { "epoch": 0.8948574702764647, "grad_norm": 1.2170608043670654, "learning_rate": 5.758717604597242e-06, "loss": 1.3968, "step": 6247 }, { "epoch": 0.8950007162297665, "grad_norm": 1.1321265697479248, "learning_rate": 5.743209928762927e-06, "loss": 1.3624, "step": 6248 }, { "epoch": 0.8951439621830684, "grad_norm": 1.5619410276412964, "learning_rate": 5.727722544083036e-06, "loss": 1.2768, "step": 6249 }, { "epoch": 0.8952872081363702, "grad_norm": 1.1112035512924194, "learning_rate": 5.71225545389158e-06, "loss": 1.5167, "step": 6250 }, { "epoch": 0.8954304540896719, "grad_norm": 1.0755828619003296, "learning_rate": 5.696808661518283e-06, "loss": 1.2978, "step": 6251 }, { "epoch": 0.8955737000429738, "grad_norm": 1.126643180847168, "learning_rate": 5.681382170288452e-06, "loss": 1.4209, "step": 6252 }, { "epoch": 0.8957169459962756, "grad_norm": 1.1362624168395996, "learning_rate": 5.665975983522997e-06, "loss": 1.441, "step": 6253 }, { "epoch": 0.8958601919495774, "grad_norm": 1.1947660446166992, "learning_rate": 5.6505901045385e-06, "loss": 1.3414, "step": 6254 }, { "epoch": 0.8960034379028793, "grad_norm": 1.162698745727539, "learning_rate": 5.635224536647188e-06, "loss": 1.2617, "step": 6255 }, { "epoch": 0.896146683856181, "grad_norm": 1.077322244644165, "learning_rate": 5.619879283156859e-06, "loss": 1.3195, "step": 6256 }, { "epoch": 0.8962899298094829, "grad_norm": 1.2524361610412598, "learning_rate": 5.604554347370983e-06, "loss": 1.4668, "step": 6257 }, { "epoch": 0.8964331757627847, "grad_norm": 1.153336763381958, "learning_rate": 5.5892497325886195e-06, "loss": 1.4292, "step": 6258 }, { "epoch": 0.8965764217160865, "grad_norm": 1.1895110607147217, "learning_rate": 5.573965442104489e-06, "loss": 1.3815, "step": 6259 }, { "epoch": 0.8967196676693884, "grad_norm": 1.1177923679351807, "learning_rate": 5.558701479208916e-06, "loss": 1.5314, "step": 6260 }, { "epoch": 0.8968629136226901, "grad_norm": 1.3198795318603516, "learning_rate": 5.5434578471878605e-06, "loss": 1.44, "step": 6261 }, { "epoch": 0.897006159575992, "grad_norm": 0.9956361651420593, "learning_rate": 5.528234549322908e-06, "loss": 1.2026, "step": 6262 }, { "epoch": 0.8971494055292938, "grad_norm": 1.0831609964370728, "learning_rate": 5.513031588891226e-06, "loss": 1.5442, "step": 6263 }, { "epoch": 0.8972926514825956, "grad_norm": 1.251988172531128, "learning_rate": 5.497848969165631e-06, "loss": 1.1963, "step": 6264 }, { "epoch": 0.8974358974358975, "grad_norm": 1.1617047786712646, "learning_rate": 5.482686693414629e-06, "loss": 1.3795, "step": 6265 }, { "epoch": 0.8975791433891993, "grad_norm": 1.0372381210327148, "learning_rate": 5.467544764902221e-06, "loss": 1.3057, "step": 6266 }, { "epoch": 0.897722389342501, "grad_norm": 1.274534821510315, "learning_rate": 5.452423186888111e-06, "loss": 1.3118, "step": 6267 }, { "epoch": 0.8978656352958029, "grad_norm": 1.1666808128356934, "learning_rate": 5.437321962627606e-06, "loss": 1.3124, "step": 6268 }, { "epoch": 0.8980088812491047, "grad_norm": 1.0024056434631348, "learning_rate": 5.422241095371605e-06, "loss": 1.3798, "step": 6269 }, { "epoch": 0.8981521272024066, "grad_norm": 1.1305067539215088, "learning_rate": 5.407180588366689e-06, "loss": 1.4827, "step": 6270 }, { "epoch": 0.8982953731557084, "grad_norm": 1.077620267868042, "learning_rate": 5.392140444854954e-06, "loss": 1.3909, "step": 6271 }, { "epoch": 0.8984386191090101, "grad_norm": 1.2894182205200195, "learning_rate": 5.377120668074209e-06, "loss": 1.5995, "step": 6272 }, { "epoch": 0.898581865062312, "grad_norm": 1.1871757507324219, "learning_rate": 5.362121261257847e-06, "loss": 1.4742, "step": 6273 }, { "epoch": 0.8987251110156138, "grad_norm": 1.1306957006454468, "learning_rate": 5.3471422276348385e-06, "loss": 1.3535, "step": 6274 }, { "epoch": 0.8988683569689156, "grad_norm": 1.464682936668396, "learning_rate": 5.332183570429794e-06, "loss": 1.5667, "step": 6275 }, { "epoch": 0.8990116029222175, "grad_norm": 1.258500337600708, "learning_rate": 5.317245292862994e-06, "loss": 1.4959, "step": 6276 }, { "epoch": 0.8991548488755192, "grad_norm": 1.10562002658844, "learning_rate": 5.302327398150242e-06, "loss": 1.5805, "step": 6277 }, { "epoch": 0.8992980948288211, "grad_norm": 1.3135735988616943, "learning_rate": 5.287429889502993e-06, "loss": 1.344, "step": 6278 }, { "epoch": 0.8994413407821229, "grad_norm": 1.1784064769744873, "learning_rate": 5.272552770128314e-06, "loss": 1.4971, "step": 6279 }, { "epoch": 0.8995845867354247, "grad_norm": 1.2157847881317139, "learning_rate": 5.257696043228888e-06, "loss": 1.3408, "step": 6280 }, { "epoch": 0.8997278326887266, "grad_norm": 1.0904299020767212, "learning_rate": 5.242859712003001e-06, "loss": 1.3473, "step": 6281 }, { "epoch": 0.8998710786420283, "grad_norm": 1.1986788511276245, "learning_rate": 5.22804377964452e-06, "loss": 1.4602, "step": 6282 }, { "epoch": 0.9000143245953302, "grad_norm": 1.0969690084457397, "learning_rate": 5.2132482493429835e-06, "loss": 1.5498, "step": 6283 }, { "epoch": 0.900157570548632, "grad_norm": 1.5916728973388672, "learning_rate": 5.198473124283509e-06, "loss": 1.2926, "step": 6284 }, { "epoch": 0.9003008165019338, "grad_norm": 1.1463223695755005, "learning_rate": 5.183718407646787e-06, "loss": 1.3438, "step": 6285 }, { "epoch": 0.9004440624552357, "grad_norm": 0.9669374227523804, "learning_rate": 5.168984102609142e-06, "loss": 1.4953, "step": 6286 }, { "epoch": 0.9005873084085375, "grad_norm": 0.9574347138404846, "learning_rate": 5.1542702123425264e-06, "loss": 1.4277, "step": 6287 }, { "epoch": 0.9007305543618392, "grad_norm": 1.1715991497039795, "learning_rate": 5.139576740014462e-06, "loss": 1.4603, "step": 6288 }, { "epoch": 0.9008738003151411, "grad_norm": 1.0315641164779663, "learning_rate": 5.124903688788096e-06, "loss": 1.4249, "step": 6289 }, { "epoch": 0.9010170462684429, "grad_norm": 1.1423341035842896, "learning_rate": 5.1102510618221686e-06, "loss": 1.2612, "step": 6290 }, { "epoch": 0.9011602922217448, "grad_norm": 1.0631470680236816, "learning_rate": 5.0956188622710455e-06, "loss": 1.354, "step": 6291 }, { "epoch": 0.9013035381750466, "grad_norm": 1.0131776332855225, "learning_rate": 5.081007093284651e-06, "loss": 1.4634, "step": 6292 }, { "epoch": 0.9014467841283483, "grad_norm": 1.192248821258545, "learning_rate": 5.066415758008536e-06, "loss": 1.3189, "step": 6293 }, { "epoch": 0.9015900300816502, "grad_norm": 1.102292776107788, "learning_rate": 5.051844859583888e-06, "loss": 1.1772, "step": 6294 }, { "epoch": 0.901733276034952, "grad_norm": 1.231027364730835, "learning_rate": 5.03729440114743e-06, "loss": 1.3719, "step": 6295 }, { "epoch": 0.9018765219882539, "grad_norm": 1.1043773889541626, "learning_rate": 5.022764385831524e-06, "loss": 1.499, "step": 6296 }, { "epoch": 0.9020197679415557, "grad_norm": 0.9784703254699707, "learning_rate": 5.008254816764124e-06, "loss": 1.4751, "step": 6297 }, { "epoch": 0.9021630138948574, "grad_norm": 1.026368498802185, "learning_rate": 4.993765697068787e-06, "loss": 1.3144, "step": 6298 }, { "epoch": 0.9023062598481593, "grad_norm": 1.023569941520691, "learning_rate": 4.979297029864672e-06, "loss": 1.3939, "step": 6299 }, { "epoch": 0.9024495058014611, "grad_norm": 1.3257770538330078, "learning_rate": 4.964848818266466e-06, "loss": 1.2656, "step": 6300 }, { "epoch": 0.902592751754763, "grad_norm": 1.141319751739502, "learning_rate": 4.950421065384581e-06, "loss": 1.4167, "step": 6301 }, { "epoch": 0.9027359977080648, "grad_norm": 1.2671982049942017, "learning_rate": 4.936013774324944e-06, "loss": 1.492, "step": 6302 }, { "epoch": 0.9028792436613665, "grad_norm": 1.212394118309021, "learning_rate": 4.9216269481890505e-06, "loss": 1.352, "step": 6303 }, { "epoch": 0.9030224896146684, "grad_norm": 1.2171772718429565, "learning_rate": 4.907260590074048e-06, "loss": 1.3144, "step": 6304 }, { "epoch": 0.9031657355679702, "grad_norm": 1.1772184371948242, "learning_rate": 4.892914703072671e-06, "loss": 1.3498, "step": 6305 }, { "epoch": 0.903308981521272, "grad_norm": 1.1683534383773804, "learning_rate": 4.8785892902732275e-06, "loss": 1.3208, "step": 6306 }, { "epoch": 0.9034522274745739, "grad_norm": 1.1672297716140747, "learning_rate": 4.864284354759607e-06, "loss": 1.2106, "step": 6307 }, { "epoch": 0.9035954734278757, "grad_norm": 1.176900029182434, "learning_rate": 4.849999899611324e-06, "loss": 1.2575, "step": 6308 }, { "epoch": 0.9037387193811774, "grad_norm": 1.0992001295089722, "learning_rate": 4.835735927903473e-06, "loss": 1.4681, "step": 6309 }, { "epoch": 0.9038819653344793, "grad_norm": 1.0417816638946533, "learning_rate": 4.821492442706732e-06, "loss": 1.2558, "step": 6310 }, { "epoch": 0.9040252112877811, "grad_norm": 1.1870205402374268, "learning_rate": 4.807269447087348e-06, "loss": 1.3885, "step": 6311 }, { "epoch": 0.904168457241083, "grad_norm": 1.010421872138977, "learning_rate": 4.793066944107205e-06, "loss": 1.4998, "step": 6312 }, { "epoch": 0.9043117031943848, "grad_norm": 1.0818467140197754, "learning_rate": 4.778884936823758e-06, "loss": 1.2913, "step": 6313 }, { "epoch": 0.9044549491476865, "grad_norm": 1.1862214803695679, "learning_rate": 4.764723428290019e-06, "loss": 1.4507, "step": 6314 }, { "epoch": 0.9045981951009884, "grad_norm": 1.1676377058029175, "learning_rate": 4.750582421554605e-06, "loss": 1.3142, "step": 6315 }, { "epoch": 0.9047414410542902, "grad_norm": 1.083066701889038, "learning_rate": 4.7364619196617495e-06, "loss": 1.3508, "step": 6316 }, { "epoch": 0.9048846870075921, "grad_norm": 0.923689067363739, "learning_rate": 4.722361925651231e-06, "loss": 1.3015, "step": 6317 }, { "epoch": 0.9050279329608939, "grad_norm": 1.3136951923370361, "learning_rate": 4.708282442558443e-06, "loss": 1.335, "step": 6318 }, { "epoch": 0.9051711789141956, "grad_norm": 1.0013196468353271, "learning_rate": 4.694223473414328e-06, "loss": 1.4267, "step": 6319 }, { "epoch": 0.9053144248674975, "grad_norm": 1.3306643962860107, "learning_rate": 4.6801850212454755e-06, "loss": 1.404, "step": 6320 }, { "epoch": 0.9054576708207993, "grad_norm": 1.6908385753631592, "learning_rate": 4.6661670890739475e-06, "loss": 1.4333, "step": 6321 }, { "epoch": 0.9056009167741012, "grad_norm": 1.0434209108352661, "learning_rate": 4.652169679917517e-06, "loss": 1.5736, "step": 6322 }, { "epoch": 0.905744162727403, "grad_norm": 1.179317831993103, "learning_rate": 4.638192796789487e-06, "loss": 1.4745, "step": 6323 }, { "epoch": 0.9058874086807047, "grad_norm": 1.0765799283981323, "learning_rate": 4.6242364426986815e-06, "loss": 1.1938, "step": 6324 }, { "epoch": 0.9060306546340066, "grad_norm": 1.0506117343902588, "learning_rate": 4.610300620649577e-06, "loss": 1.3168, "step": 6325 }, { "epoch": 0.9061739005873084, "grad_norm": 1.6132502555847168, "learning_rate": 4.59638533364225e-06, "loss": 1.3708, "step": 6326 }, { "epoch": 0.9063171465406102, "grad_norm": 1.2981005907058716, "learning_rate": 4.582490584672272e-06, "loss": 1.4075, "step": 6327 }, { "epoch": 0.9064603924939121, "grad_norm": 1.20966374874115, "learning_rate": 4.5686163767308606e-06, "loss": 1.2501, "step": 6328 }, { "epoch": 0.9066036384472138, "grad_norm": 1.1635000705718994, "learning_rate": 4.554762712804772e-06, "loss": 1.3593, "step": 6329 }, { "epoch": 0.9067468844005157, "grad_norm": 1.0249067544937134, "learning_rate": 4.540929595876376e-06, "loss": 1.4651, "step": 6330 }, { "epoch": 0.9068901303538175, "grad_norm": 1.0671076774597168, "learning_rate": 4.527117028923599e-06, "loss": 1.5026, "step": 6331 }, { "epoch": 0.9070333763071193, "grad_norm": 1.0926105976104736, "learning_rate": 4.513325014919923e-06, "loss": 1.4043, "step": 6332 }, { "epoch": 0.9071766222604212, "grad_norm": 0.976590633392334, "learning_rate": 4.499553556834446e-06, "loss": 1.2697, "step": 6333 }, { "epoch": 0.907319868213723, "grad_norm": 1.2017138004302979, "learning_rate": 4.4858026576318435e-06, "loss": 1.5284, "step": 6334 }, { "epoch": 0.9074631141670247, "grad_norm": 0.9945162534713745, "learning_rate": 4.472072320272292e-06, "loss": 1.4214, "step": 6335 }, { "epoch": 0.9076063601203266, "grad_norm": 1.093651533126831, "learning_rate": 4.4583625477116256e-06, "loss": 1.271, "step": 6336 }, { "epoch": 0.9077496060736284, "grad_norm": 1.2722675800323486, "learning_rate": 4.444673342901218e-06, "loss": 1.1873, "step": 6337 }, { "epoch": 0.9078928520269303, "grad_norm": 1.261701226234436, "learning_rate": 4.431004708788011e-06, "loss": 1.2441, "step": 6338 }, { "epoch": 0.9080360979802321, "grad_norm": 1.0567492246627808, "learning_rate": 4.41735664831453e-06, "loss": 1.4345, "step": 6339 }, { "epoch": 0.9081793439335338, "grad_norm": 1.1634806394577026, "learning_rate": 4.403729164418857e-06, "loss": 1.4443, "step": 6340 }, { "epoch": 0.9083225898868357, "grad_norm": 1.0430582761764526, "learning_rate": 4.390122260034657e-06, "loss": 1.2278, "step": 6341 }, { "epoch": 0.9084658358401375, "grad_norm": 1.3105398416519165, "learning_rate": 4.376535938091153e-06, "loss": 1.2374, "step": 6342 }, { "epoch": 0.9086090817934394, "grad_norm": 1.0115612745285034, "learning_rate": 4.362970201513139e-06, "loss": 1.6307, "step": 6343 }, { "epoch": 0.9087523277467412, "grad_norm": 1.197774052619934, "learning_rate": 4.3494250532210124e-06, "loss": 1.3851, "step": 6344 }, { "epoch": 0.9088955737000429, "grad_norm": 1.0714023113250732, "learning_rate": 4.335900496130674e-06, "loss": 1.3876, "step": 6345 }, { "epoch": 0.9090388196533448, "grad_norm": 1.2956602573394775, "learning_rate": 4.3223965331536386e-06, "loss": 1.3302, "step": 6346 }, { "epoch": 0.9091820656066466, "grad_norm": 1.0412628650665283, "learning_rate": 4.308913167196982e-06, "loss": 1.4365, "step": 6347 }, { "epoch": 0.9093253115599484, "grad_norm": 1.2051353454589844, "learning_rate": 4.2954504011633255e-06, "loss": 1.4699, "step": 6348 }, { "epoch": 0.9094685575132503, "grad_norm": 1.3467074632644653, "learning_rate": 4.282008237950896e-06, "loss": 1.3888, "step": 6349 }, { "epoch": 0.909611803466552, "grad_norm": 1.051643967628479, "learning_rate": 4.2685866804534236e-06, "loss": 1.5503, "step": 6350 }, { "epoch": 0.9097550494198539, "grad_norm": 1.0065505504608154, "learning_rate": 4.255185731560252e-06, "loss": 1.5185, "step": 6351 }, { "epoch": 0.9098982953731557, "grad_norm": 1.4000816345214844, "learning_rate": 4.241805394156295e-06, "loss": 1.4599, "step": 6352 }, { "epoch": 0.9100415413264575, "grad_norm": 1.0450208187103271, "learning_rate": 4.228445671121972e-06, "loss": 1.4741, "step": 6353 }, { "epoch": 0.9101847872797594, "grad_norm": 0.9874674677848816, "learning_rate": 4.215106565333316e-06, "loss": 1.3918, "step": 6354 }, { "epoch": 0.9103280332330612, "grad_norm": 1.4506988525390625, "learning_rate": 4.201788079661928e-06, "loss": 1.465, "step": 6355 }, { "epoch": 0.910471279186363, "grad_norm": 1.18506920337677, "learning_rate": 4.188490216974916e-06, "loss": 1.3948, "step": 6356 }, { "epoch": 0.9106145251396648, "grad_norm": 1.263118028640747, "learning_rate": 4.17521298013499e-06, "loss": 1.3917, "step": 6357 }, { "epoch": 0.9107577710929666, "grad_norm": 1.317518711090088, "learning_rate": 4.161956372000419e-06, "loss": 1.3948, "step": 6358 }, { "epoch": 0.9109010170462685, "grad_norm": 0.993003785610199, "learning_rate": 4.14872039542501e-06, "loss": 1.4928, "step": 6359 }, { "epoch": 0.9110442629995703, "grad_norm": 0.9878324866294861, "learning_rate": 4.135505053258171e-06, "loss": 1.3773, "step": 6360 }, { "epoch": 0.911187508952872, "grad_norm": 1.0284819602966309, "learning_rate": 4.122310348344782e-06, "loss": 1.5752, "step": 6361 }, { "epoch": 0.9113307549061739, "grad_norm": 1.3280019760131836, "learning_rate": 4.109136283525394e-06, "loss": 1.242, "step": 6362 }, { "epoch": 0.9114740008594757, "grad_norm": 1.0655003786087036, "learning_rate": 4.0959828616360385e-06, "loss": 1.4095, "step": 6363 }, { "epoch": 0.9116172468127776, "grad_norm": 1.1232937574386597, "learning_rate": 4.082850085508305e-06, "loss": 1.3304, "step": 6364 }, { "epoch": 0.9117604927660794, "grad_norm": 1.192458987236023, "learning_rate": 4.069737957969366e-06, "loss": 1.2875, "step": 6365 }, { "epoch": 0.9119037387193811, "grad_norm": 1.1087749004364014, "learning_rate": 4.056646481841952e-06, "loss": 1.4889, "step": 6366 }, { "epoch": 0.912046984672683, "grad_norm": 1.2468127012252808, "learning_rate": 4.04357565994431e-06, "loss": 1.3713, "step": 6367 }, { "epoch": 0.9121902306259848, "grad_norm": 1.2077809572219849, "learning_rate": 4.030525495090276e-06, "loss": 1.4045, "step": 6368 }, { "epoch": 0.9123334765792867, "grad_norm": 0.9777785539627075, "learning_rate": 4.017495990089227e-06, "loss": 1.4268, "step": 6369 }, { "epoch": 0.9124767225325885, "grad_norm": 1.1648367643356323, "learning_rate": 4.004487147746105e-06, "loss": 1.3019, "step": 6370 }, { "epoch": 0.9126199684858902, "grad_norm": 1.1054315567016602, "learning_rate": 3.991498970861373e-06, "loss": 1.3679, "step": 6371 }, { "epoch": 0.9127632144391921, "grad_norm": 1.114991545677185, "learning_rate": 3.9785314622310495e-06, "loss": 1.2732, "step": 6372 }, { "epoch": 0.9129064603924939, "grad_norm": 1.1575613021850586, "learning_rate": 3.965584624646768e-06, "loss": 1.4201, "step": 6373 }, { "epoch": 0.9130497063457957, "grad_norm": 1.0578254461288452, "learning_rate": 3.9526584608956196e-06, "loss": 1.3547, "step": 6374 }, { "epoch": 0.9131929522990976, "grad_norm": 1.1619629859924316, "learning_rate": 3.939752973760313e-06, "loss": 1.3291, "step": 6375 }, { "epoch": 0.9133361982523994, "grad_norm": 1.2380051612854004, "learning_rate": 3.9268681660190595e-06, "loss": 1.3682, "step": 6376 }, { "epoch": 0.9134794442057012, "grad_norm": 1.0176156759262085, "learning_rate": 3.9140040404456515e-06, "loss": 1.2568, "step": 6377 }, { "epoch": 0.913622690159003, "grad_norm": 1.0437486171722412, "learning_rate": 3.901160599809428e-06, "loss": 1.311, "step": 6378 }, { "epoch": 0.9137659361123048, "grad_norm": 1.1589395999908447, "learning_rate": 3.888337846875223e-06, "loss": 1.242, "step": 6379 }, { "epoch": 0.9139091820656067, "grad_norm": 1.2319549322128296, "learning_rate": 3.875535784403495e-06, "loss": 1.3919, "step": 6380 }, { "epoch": 0.9140524280189085, "grad_norm": 1.2359362840652466, "learning_rate": 3.862754415150216e-06, "loss": 1.4164, "step": 6381 }, { "epoch": 0.9141956739722102, "grad_norm": 1.1600825786590576, "learning_rate": 3.849993741866864e-06, "loss": 1.4121, "step": 6382 }, { "epoch": 0.9143389199255121, "grad_norm": 1.1855958700180054, "learning_rate": 3.837253767300519e-06, "loss": 1.3754, "step": 6383 }, { "epoch": 0.9144821658788139, "grad_norm": 1.1956040859222412, "learning_rate": 3.824534494193799e-06, "loss": 1.2825, "step": 6384 }, { "epoch": 0.9146254118321158, "grad_norm": 1.2218660116195679, "learning_rate": 3.8118359252848236e-06, "loss": 1.4137, "step": 6385 }, { "epoch": 0.9147686577854176, "grad_norm": 0.9802232980728149, "learning_rate": 3.799158063307273e-06, "loss": 1.3338, "step": 6386 }, { "epoch": 0.9149119037387193, "grad_norm": 1.0385743379592896, "learning_rate": 3.786500910990398e-06, "loss": 1.1885, "step": 6387 }, { "epoch": 0.9150551496920212, "grad_norm": 1.2746639251708984, "learning_rate": 3.773864471058963e-06, "loss": 1.2978, "step": 6388 }, { "epoch": 0.915198395645323, "grad_norm": 1.0996171236038208, "learning_rate": 3.76124874623327e-06, "loss": 1.2936, "step": 6389 }, { "epoch": 0.9153416415986249, "grad_norm": 1.281622290611267, "learning_rate": 3.748653739229191e-06, "loss": 1.4174, "step": 6390 }, { "epoch": 0.9154848875519267, "grad_norm": 1.054019808769226, "learning_rate": 3.7360794527581234e-06, "loss": 1.1928, "step": 6391 }, { "epoch": 0.9156281335052284, "grad_norm": 1.5539844036102295, "learning_rate": 3.7235258895269686e-06, "loss": 1.4051, "step": 6392 }, { "epoch": 0.9157713794585303, "grad_norm": 1.2283304929733276, "learning_rate": 3.7109930522382097e-06, "loss": 1.4898, "step": 6393 }, { "epoch": 0.9159146254118321, "grad_norm": 1.1192491054534912, "learning_rate": 3.698480943589888e-06, "loss": 1.4192, "step": 6394 }, { "epoch": 0.916057871365134, "grad_norm": 1.0392793416976929, "learning_rate": 3.685989566275516e-06, "loss": 1.3313, "step": 6395 }, { "epoch": 0.9162011173184358, "grad_norm": 1.2398030757904053, "learning_rate": 3.673518922984187e-06, "loss": 1.2957, "step": 6396 }, { "epoch": 0.9163443632717376, "grad_norm": 1.1188124418258667, "learning_rate": 3.66106901640052e-06, "loss": 1.3791, "step": 6397 }, { "epoch": 0.9164876092250394, "grad_norm": 0.9682042002677917, "learning_rate": 3.6486398492046827e-06, "loss": 1.2746, "step": 6398 }, { "epoch": 0.9166308551783412, "grad_norm": 1.229692816734314, "learning_rate": 3.636231424072367e-06, "loss": 1.4289, "step": 6399 }, { "epoch": 0.916774101131643, "grad_norm": 1.460267424583435, "learning_rate": 3.623843743674771e-06, "loss": 1.412, "step": 6400 }, { "epoch": 0.9169173470849449, "grad_norm": 0.9987478256225586, "learning_rate": 3.611476810678693e-06, "loss": 1.4243, "step": 6401 }, { "epoch": 0.9170605930382467, "grad_norm": 1.1185868978500366, "learning_rate": 3.5991306277464167e-06, "loss": 1.3008, "step": 6402 }, { "epoch": 0.9172038389915484, "grad_norm": 0.9727362394332886, "learning_rate": 3.5868051975357587e-06, "loss": 1.4391, "step": 6403 }, { "epoch": 0.9173470849448503, "grad_norm": 1.288607120513916, "learning_rate": 3.5745005227000637e-06, "loss": 1.3324, "step": 6404 }, { "epoch": 0.9174903308981521, "grad_norm": 1.0097432136535645, "learning_rate": 3.56221660588828e-06, "loss": 1.3164, "step": 6405 }, { "epoch": 0.917633576851454, "grad_norm": 1.1059058904647827, "learning_rate": 3.5499534497447807e-06, "loss": 1.3039, "step": 6406 }, { "epoch": 0.9177768228047558, "grad_norm": 1.184360146522522, "learning_rate": 3.5377110569095316e-06, "loss": 1.4156, "step": 6407 }, { "epoch": 0.9179200687580575, "grad_norm": 1.1753672361373901, "learning_rate": 3.5254894300180252e-06, "loss": 1.312, "step": 6408 }, { "epoch": 0.9180633147113594, "grad_norm": 1.2700732946395874, "learning_rate": 3.5132885717012675e-06, "loss": 1.2646, "step": 6409 }, { "epoch": 0.9182065606646612, "grad_norm": 1.0574846267700195, "learning_rate": 3.5011084845858246e-06, "loss": 1.2646, "step": 6410 }, { "epoch": 0.9183498066179631, "grad_norm": 1.0546069145202637, "learning_rate": 3.4889491712937205e-06, "loss": 1.4021, "step": 6411 }, { "epoch": 0.9184930525712649, "grad_norm": 0.9829645156860352, "learning_rate": 3.4768106344425956e-06, "loss": 1.2777, "step": 6412 }, { "epoch": 0.9186362985245666, "grad_norm": 1.2863688468933105, "learning_rate": 3.46469287664557e-06, "loss": 1.552, "step": 6413 }, { "epoch": 0.9187795444778685, "grad_norm": 0.9914535284042358, "learning_rate": 3.4525959005112794e-06, "loss": 1.4084, "step": 6414 }, { "epoch": 0.9189227904311703, "grad_norm": 1.190781831741333, "learning_rate": 3.440519708643919e-06, "loss": 1.3314, "step": 6415 }, { "epoch": 0.9190660363844722, "grad_norm": 1.2144767045974731, "learning_rate": 3.428464303643186e-06, "loss": 1.5512, "step": 6416 }, { "epoch": 0.919209282337774, "grad_norm": 1.0809637308120728, "learning_rate": 3.4164296881043055e-06, "loss": 1.3637, "step": 6417 }, { "epoch": 0.9193525282910758, "grad_norm": 1.2300615310668945, "learning_rate": 3.4044158646180604e-06, "loss": 1.5734, "step": 6418 }, { "epoch": 0.9194957742443776, "grad_norm": 1.1273410320281982, "learning_rate": 3.3924228357706922e-06, "loss": 1.6056, "step": 6419 }, { "epoch": 0.9196390201976794, "grad_norm": 1.2528510093688965, "learning_rate": 3.3804506041440363e-06, "loss": 1.4098, "step": 6420 }, { "epoch": 0.9197822661509812, "grad_norm": 1.1113338470458984, "learning_rate": 3.368499172315398e-06, "loss": 1.2723, "step": 6421 }, { "epoch": 0.9199255121042831, "grad_norm": 1.169180154800415, "learning_rate": 3.356568542857619e-06, "loss": 1.4788, "step": 6422 }, { "epoch": 0.9200687580575849, "grad_norm": 0.9524937272071838, "learning_rate": 3.3446587183390888e-06, "loss": 1.4445, "step": 6423 }, { "epoch": 0.9202120040108867, "grad_norm": 1.9002968072891235, "learning_rate": 3.3327697013236904e-06, "loss": 1.253, "step": 6424 }, { "epoch": 0.9203552499641885, "grad_norm": 1.044958233833313, "learning_rate": 3.3209014943708204e-06, "loss": 1.4228, "step": 6425 }, { "epoch": 0.9204984959174903, "grad_norm": 1.1631228923797607, "learning_rate": 3.3090541000354358e-06, "loss": 1.3982, "step": 6426 }, { "epoch": 0.9206417418707922, "grad_norm": 0.999303936958313, "learning_rate": 3.2972275208679625e-06, "loss": 1.4396, "step": 6427 }, { "epoch": 0.920784987824094, "grad_norm": 1.188114047050476, "learning_rate": 3.2854217594143975e-06, "loss": 1.423, "step": 6428 }, { "epoch": 0.9209282337773957, "grad_norm": 1.1546486616134644, "learning_rate": 3.273636818216197e-06, "loss": 1.5945, "step": 6429 }, { "epoch": 0.9210714797306976, "grad_norm": 1.0848147869110107, "learning_rate": 3.2618726998103867e-06, "loss": 1.5697, "step": 6430 }, { "epoch": 0.9212147256839994, "grad_norm": 1.219969630241394, "learning_rate": 3.2501294067295073e-06, "loss": 1.2476, "step": 6431 }, { "epoch": 0.9213579716373013, "grad_norm": 1.1363762617111206, "learning_rate": 3.2384069415015594e-06, "loss": 1.3722, "step": 6432 }, { "epoch": 0.9215012175906031, "grad_norm": 1.2415932416915894, "learning_rate": 3.226705306650113e-06, "loss": 1.464, "step": 6433 }, { "epoch": 0.9216444635439048, "grad_norm": 1.0337730646133423, "learning_rate": 3.215024504694264e-06, "loss": 1.3139, "step": 6434 }, { "epoch": 0.9217877094972067, "grad_norm": 0.9612577557563782, "learning_rate": 3.203364538148579e-06, "loss": 1.3433, "step": 6435 }, { "epoch": 0.9219309554505085, "grad_norm": 1.1317096948623657, "learning_rate": 3.1917254095231606e-06, "loss": 1.4091, "step": 6436 }, { "epoch": 0.9220742014038104, "grad_norm": 1.034150242805481, "learning_rate": 3.1801071213236277e-06, "loss": 1.4781, "step": 6437 }, { "epoch": 0.9222174473571122, "grad_norm": 1.1174944639205933, "learning_rate": 3.1685096760511123e-06, "loss": 1.2694, "step": 6438 }, { "epoch": 0.9223606933104139, "grad_norm": 1.0812113285064697, "learning_rate": 3.156933076202262e-06, "loss": 1.5264, "step": 6439 }, { "epoch": 0.9225039392637158, "grad_norm": 1.1844664812088013, "learning_rate": 3.145377324269205e-06, "loss": 1.5074, "step": 6440 }, { "epoch": 0.9226471852170176, "grad_norm": 0.9716758728027344, "learning_rate": 3.1338424227396524e-06, "loss": 1.3961, "step": 6441 }, { "epoch": 0.9227904311703194, "grad_norm": 1.077136754989624, "learning_rate": 3.122328374096761e-06, "loss": 1.3807, "step": 6442 }, { "epoch": 0.9229336771236213, "grad_norm": 1.1465879678726196, "learning_rate": 3.110835180819216e-06, "loss": 1.3367, "step": 6443 }, { "epoch": 0.9230769230769231, "grad_norm": 1.2227541208267212, "learning_rate": 3.099362845381215e-06, "loss": 1.3559, "step": 6444 }, { "epoch": 0.9232201690302249, "grad_norm": 1.1602439880371094, "learning_rate": 3.0879113702524832e-06, "loss": 1.2714, "step": 6445 }, { "epoch": 0.9233634149835267, "grad_norm": 1.202672004699707, "learning_rate": 3.0764807578982256e-06, "loss": 1.3714, "step": 6446 }, { "epoch": 0.9235066609368285, "grad_norm": 0.9637908339500427, "learning_rate": 3.0650710107791748e-06, "loss": 1.4164, "step": 6447 }, { "epoch": 0.9236499068901304, "grad_norm": 1.0018640756607056, "learning_rate": 3.053682131351576e-06, "loss": 1.304, "step": 6448 }, { "epoch": 0.9237931528434322, "grad_norm": 1.3392282724380493, "learning_rate": 3.042314122067169e-06, "loss": 1.3068, "step": 6449 }, { "epoch": 0.923936398796734, "grad_norm": 1.3370113372802734, "learning_rate": 3.0309669853731848e-06, "loss": 1.334, "step": 6450 }, { "epoch": 0.9240796447500358, "grad_norm": 1.0360528230667114, "learning_rate": 3.0196407237124024e-06, "loss": 1.3887, "step": 6451 }, { "epoch": 0.9242228907033376, "grad_norm": 1.1306885480880737, "learning_rate": 3.008335339523105e-06, "loss": 1.3213, "step": 6452 }, { "epoch": 0.9243661366566395, "grad_norm": 1.2070178985595703, "learning_rate": 2.9970508352390125e-06, "loss": 1.3842, "step": 6453 }, { "epoch": 0.9245093826099413, "grad_norm": 1.0690031051635742, "learning_rate": 2.985787213289415e-06, "loss": 1.3457, "step": 6454 }, { "epoch": 0.924652628563243, "grad_norm": 1.0372827053070068, "learning_rate": 2.9745444760991283e-06, "loss": 1.4153, "step": 6455 }, { "epoch": 0.9247958745165449, "grad_norm": 1.1572743654251099, "learning_rate": 2.963322626088405e-06, "loss": 1.4694, "step": 6456 }, { "epoch": 0.9249391204698467, "grad_norm": 1.3563387393951416, "learning_rate": 2.9521216656730242e-06, "loss": 1.4365, "step": 6457 }, { "epoch": 0.9250823664231486, "grad_norm": 1.2542288303375244, "learning_rate": 2.9409415972642905e-06, "loss": 1.5315, "step": 6458 }, { "epoch": 0.9252256123764504, "grad_norm": 1.0653622150421143, "learning_rate": 2.9297824232689895e-06, "loss": 1.3843, "step": 6459 }, { "epoch": 0.9253688583297521, "grad_norm": 1.0985112190246582, "learning_rate": 2.9186441460894333e-06, "loss": 1.2992, "step": 6460 }, { "epoch": 0.925512104283054, "grad_norm": 1.2218881845474243, "learning_rate": 2.9075267681233697e-06, "loss": 1.2552, "step": 6461 }, { "epoch": 0.9256553502363558, "grad_norm": 1.2333095073699951, "learning_rate": 2.896430291764152e-06, "loss": 1.2838, "step": 6462 }, { "epoch": 0.9257985961896577, "grad_norm": 1.109918475151062, "learning_rate": 2.885354719400557e-06, "loss": 1.4541, "step": 6463 }, { "epoch": 0.9259418421429595, "grad_norm": 0.9334563612937927, "learning_rate": 2.8743000534168675e-06, "loss": 1.4794, "step": 6464 }, { "epoch": 0.9260850880962613, "grad_norm": 1.1399518251419067, "learning_rate": 2.863266296192879e-06, "loss": 1.383, "step": 6465 }, { "epoch": 0.9262283340495631, "grad_norm": 1.5282297134399414, "learning_rate": 2.8522534501039035e-06, "loss": 1.5363, "step": 6466 }, { "epoch": 0.9263715800028649, "grad_norm": 1.0881413221359253, "learning_rate": 2.8412615175207324e-06, "loss": 1.4165, "step": 6467 }, { "epoch": 0.9265148259561667, "grad_norm": 1.107849359512329, "learning_rate": 2.8302905008096403e-06, "loss": 1.4661, "step": 6468 }, { "epoch": 0.9266580719094686, "grad_norm": 1.1685051918029785, "learning_rate": 2.8193404023324376e-06, "loss": 1.4171, "step": 6469 }, { "epoch": 0.9268013178627704, "grad_norm": 1.1487082242965698, "learning_rate": 2.8084112244464056e-06, "loss": 1.3268, "step": 6470 }, { "epoch": 0.9269445638160722, "grad_norm": 1.0850621461868286, "learning_rate": 2.7975029695043064e-06, "loss": 1.4379, "step": 6471 }, { "epoch": 0.927087809769374, "grad_norm": 1.1466665267944336, "learning_rate": 2.7866156398544176e-06, "loss": 1.2665, "step": 6472 }, { "epoch": 0.9272310557226758, "grad_norm": 1.0455917119979858, "learning_rate": 2.7757492378405414e-06, "loss": 1.4232, "step": 6473 }, { "epoch": 0.9273743016759777, "grad_norm": 1.0983914136886597, "learning_rate": 2.7649037658019183e-06, "loss": 1.6041, "step": 6474 }, { "epoch": 0.9275175476292795, "grad_norm": 0.9594546556472778, "learning_rate": 2.754079226073325e-06, "loss": 1.4214, "step": 6475 }, { "epoch": 0.9276607935825812, "grad_norm": 1.3279978036880493, "learning_rate": 2.7432756209850084e-06, "loss": 1.3889, "step": 6476 }, { "epoch": 0.9278040395358831, "grad_norm": 1.0115203857421875, "learning_rate": 2.7324929528627195e-06, "loss": 1.267, "step": 6477 }, { "epoch": 0.9279472854891849, "grad_norm": 1.119099497795105, "learning_rate": 2.7217312240277127e-06, "loss": 1.4042, "step": 6478 }, { "epoch": 0.9280905314424868, "grad_norm": 1.1826395988464355, "learning_rate": 2.71099043679669e-06, "loss": 1.1922, "step": 6479 }, { "epoch": 0.9282337773957886, "grad_norm": 1.183553695678711, "learning_rate": 2.7002705934819018e-06, "loss": 1.3822, "step": 6480 }, { "epoch": 0.9283770233490903, "grad_norm": 1.1549584865570068, "learning_rate": 2.68957169639108e-06, "loss": 1.3916, "step": 6481 }, { "epoch": 0.9285202693023922, "grad_norm": 1.069952130317688, "learning_rate": 2.6788937478273934e-06, "loss": 1.4052, "step": 6482 }, { "epoch": 0.928663515255694, "grad_norm": 1.0807828903198242, "learning_rate": 2.6682367500895587e-06, "loss": 1.5228, "step": 6483 }, { "epoch": 0.9288067612089959, "grad_norm": 1.1680474281311035, "learning_rate": 2.6576007054717746e-06, "loss": 1.6019, "step": 6484 }, { "epoch": 0.9289500071622977, "grad_norm": 1.100862979888916, "learning_rate": 2.6469856162637086e-06, "loss": 1.3871, "step": 6485 }, { "epoch": 0.9290932531155995, "grad_norm": 1.1147569417953491, "learning_rate": 2.6363914847505113e-06, "loss": 1.4473, "step": 6486 }, { "epoch": 0.9292364990689013, "grad_norm": 1.1767098903656006, "learning_rate": 2.625818313212869e-06, "loss": 1.5332, "step": 6487 }, { "epoch": 0.9293797450222031, "grad_norm": 1.2728763818740845, "learning_rate": 2.6152661039268943e-06, "loss": 1.3662, "step": 6488 }, { "epoch": 0.929522990975505, "grad_norm": 1.3056116104125977, "learning_rate": 2.6047348591642483e-06, "loss": 1.5044, "step": 6489 }, { "epoch": 0.9296662369288068, "grad_norm": 1.077352523803711, "learning_rate": 2.5942245811920065e-06, "loss": 1.5524, "step": 6490 }, { "epoch": 0.9298094828821086, "grad_norm": 0.9235694408416748, "learning_rate": 2.5837352722728026e-06, "loss": 1.5109, "step": 6491 }, { "epoch": 0.9299527288354104, "grad_norm": 1.1439684629440308, "learning_rate": 2.57326693466472e-06, "loss": 1.3727, "step": 6492 }, { "epoch": 0.9300959747887122, "grad_norm": 1.1689507961273193, "learning_rate": 2.562819570621322e-06, "loss": 1.4112, "step": 6493 }, { "epoch": 0.930239220742014, "grad_norm": 1.4457679986953735, "learning_rate": 2.552393182391677e-06, "loss": 1.4417, "step": 6494 }, { "epoch": 0.9303824666953159, "grad_norm": 1.3622556924819946, "learning_rate": 2.5419877722203333e-06, "loss": 1.4546, "step": 6495 }, { "epoch": 0.9305257126486177, "grad_norm": 1.2100732326507568, "learning_rate": 2.5316033423472997e-06, "loss": 1.3524, "step": 6496 }, { "epoch": 0.9306689586019194, "grad_norm": 0.9950403571128845, "learning_rate": 2.5212398950081096e-06, "loss": 1.3074, "step": 6497 }, { "epoch": 0.9308122045552213, "grad_norm": 0.9643857479095459, "learning_rate": 2.510897432433734e-06, "loss": 1.3133, "step": 6498 }, { "epoch": 0.9309554505085231, "grad_norm": 1.0548871755599976, "learning_rate": 2.5005759568506704e-06, "loss": 1.4155, "step": 6499 }, { "epoch": 0.931098696461825, "grad_norm": 1.035657525062561, "learning_rate": 2.490275470480863e-06, "loss": 1.5047, "step": 6500 }, { "epoch": 0.9312419424151268, "grad_norm": 1.0985133647918701, "learning_rate": 2.479995975541749e-06, "loss": 1.2833, "step": 6501 }, { "epoch": 0.9313851883684285, "grad_norm": 1.088080644607544, "learning_rate": 2.4697374742462698e-06, "loss": 1.3867, "step": 6502 }, { "epoch": 0.9315284343217304, "grad_norm": 1.0646523237228394, "learning_rate": 2.4594999688028032e-06, "loss": 1.2632, "step": 6503 }, { "epoch": 0.9316716802750322, "grad_norm": 1.4848166704177856, "learning_rate": 2.4492834614152414e-06, "loss": 1.4676, "step": 6504 }, { "epoch": 0.9318149262283341, "grad_norm": 1.1112170219421387, "learning_rate": 2.439087954282948e-06, "loss": 1.2151, "step": 6505 }, { "epoch": 0.9319581721816359, "grad_norm": 1.133521318435669, "learning_rate": 2.428913449600756e-06, "loss": 1.3383, "step": 6506 }, { "epoch": 0.9321014181349377, "grad_norm": 1.1280066967010498, "learning_rate": 2.418759949558991e-06, "loss": 1.3667, "step": 6507 }, { "epoch": 0.9322446640882395, "grad_norm": 1.3679102659225464, "learning_rate": 2.4086274563434488e-06, "loss": 1.2945, "step": 6508 }, { "epoch": 0.9323879100415413, "grad_norm": 1.04868483543396, "learning_rate": 2.3985159721353956e-06, "loss": 1.4642, "step": 6509 }, { "epoch": 0.9325311559948432, "grad_norm": 1.1025407314300537, "learning_rate": 2.388425499111613e-06, "loss": 1.3017, "step": 6510 }, { "epoch": 0.932674401948145, "grad_norm": 0.9507743716239929, "learning_rate": 2.378356039444285e-06, "loss": 1.6583, "step": 6511 }, { "epoch": 0.9328176479014468, "grad_norm": 1.3442308902740479, "learning_rate": 2.3683075953011558e-06, "loss": 1.458, "step": 6512 }, { "epoch": 0.9329608938547486, "grad_norm": 1.0591504573822021, "learning_rate": 2.3582801688453948e-06, "loss": 1.292, "step": 6513 }, { "epoch": 0.9331041398080504, "grad_norm": 1.2321276664733887, "learning_rate": 2.348273762235642e-06, "loss": 1.4901, "step": 6514 }, { "epoch": 0.9332473857613522, "grad_norm": 1.4661743640899658, "learning_rate": 2.3382883776260524e-06, "loss": 1.4526, "step": 6515 }, { "epoch": 0.9333906317146541, "grad_norm": 1.1465964317321777, "learning_rate": 2.3283240171662167e-06, "loss": 1.3576, "step": 6516 }, { "epoch": 0.9335338776679559, "grad_norm": 1.1291935443878174, "learning_rate": 2.318380683001231e-06, "loss": 1.4967, "step": 6517 }, { "epoch": 0.9336771236212577, "grad_norm": 1.1595145463943481, "learning_rate": 2.3084583772716275e-06, "loss": 1.4097, "step": 6518 }, { "epoch": 0.9338203695745595, "grad_norm": 1.303309440612793, "learning_rate": 2.2985571021134524e-06, "loss": 1.3088, "step": 6519 }, { "epoch": 0.9339636155278613, "grad_norm": 1.1898589134216309, "learning_rate": 2.288676859658212e-06, "loss": 1.455, "step": 6520 }, { "epoch": 0.9341068614811632, "grad_norm": 1.0397287607192993, "learning_rate": 2.27881765203285e-06, "loss": 1.3879, "step": 6521 }, { "epoch": 0.934250107434465, "grad_norm": 1.086340308189392, "learning_rate": 2.268979481359812e-06, "loss": 1.3649, "step": 6522 }, { "epoch": 0.9343933533877667, "grad_norm": 0.9697161912918091, "learning_rate": 2.259162349757038e-06, "loss": 1.392, "step": 6523 }, { "epoch": 0.9345365993410686, "grad_norm": 1.06399405002594, "learning_rate": 2.249366259337893e-06, "loss": 1.3718, "step": 6524 }, { "epoch": 0.9346798452943704, "grad_norm": 1.1041001081466675, "learning_rate": 2.239591212211245e-06, "loss": 1.2082, "step": 6525 }, { "epoch": 0.9348230912476723, "grad_norm": 1.1941328048706055, "learning_rate": 2.2298372104814115e-06, "loss": 1.4065, "step": 6526 }, { "epoch": 0.9349663372009741, "grad_norm": 1.023792028427124, "learning_rate": 2.22010425624819e-06, "loss": 1.3264, "step": 6527 }, { "epoch": 0.9351095831542758, "grad_norm": 1.0017398595809937, "learning_rate": 2.2103923516068605e-06, "loss": 1.255, "step": 6528 }, { "epoch": 0.9352528291075777, "grad_norm": 1.0893917083740234, "learning_rate": 2.2007014986481167e-06, "loss": 1.3831, "step": 6529 }, { "epoch": 0.9353960750608795, "grad_norm": 1.1865171194076538, "learning_rate": 2.1910316994581904e-06, "loss": 1.2383, "step": 6530 }, { "epoch": 0.9355393210141814, "grad_norm": 1.3630356788635254, "learning_rate": 2.1813829561187604e-06, "loss": 1.266, "step": 6531 }, { "epoch": 0.9356825669674832, "grad_norm": 0.9429690837860107, "learning_rate": 2.1717552707069323e-06, "loss": 1.5542, "step": 6532 }, { "epoch": 0.935825812920785, "grad_norm": 1.0557295083999634, "learning_rate": 2.1621486452953144e-06, "loss": 1.1891, "step": 6533 }, { "epoch": 0.9359690588740868, "grad_norm": 1.4938712120056152, "learning_rate": 2.152563081951997e-06, "loss": 1.2814, "step": 6534 }, { "epoch": 0.9361123048273886, "grad_norm": 1.0934022665023804, "learning_rate": 2.1429985827404853e-06, "loss": 1.3834, "step": 6535 }, { "epoch": 0.9362555507806904, "grad_norm": 1.0707919597625732, "learning_rate": 2.1334551497197987e-06, "loss": 1.1631, "step": 6536 }, { "epoch": 0.9363987967339923, "grad_norm": 1.095044732093811, "learning_rate": 2.1239327849444045e-06, "loss": 1.2767, "step": 6537 }, { "epoch": 0.9365420426872941, "grad_norm": 1.1175087690353394, "learning_rate": 2.1144314904642195e-06, "loss": 1.3519, "step": 6538 }, { "epoch": 0.9366852886405959, "grad_norm": 1.3356504440307617, "learning_rate": 2.104951268324651e-06, "loss": 1.2707, "step": 6539 }, { "epoch": 0.9368285345938977, "grad_norm": 1.0914610624313354, "learning_rate": 2.095492120566522e-06, "loss": 1.3643, "step": 6540 }, { "epoch": 0.9369717805471995, "grad_norm": 1.0788918733596802, "learning_rate": 2.0860540492262047e-06, "loss": 1.48, "step": 6541 }, { "epoch": 0.9371150265005014, "grad_norm": 1.0410175323486328, "learning_rate": 2.0766370563354508e-06, "loss": 1.3398, "step": 6542 }, { "epoch": 0.9372582724538032, "grad_norm": 1.231723666191101, "learning_rate": 2.0672411439215165e-06, "loss": 1.3052, "step": 6543 }, { "epoch": 0.937401518407105, "grad_norm": 1.173901915550232, "learning_rate": 2.0578663140070954e-06, "loss": 1.2091, "step": 6544 }, { "epoch": 0.9375447643604068, "grad_norm": 1.286744236946106, "learning_rate": 2.048512568610361e-06, "loss": 1.1458, "step": 6545 }, { "epoch": 0.9376880103137086, "grad_norm": 1.1618173122406006, "learning_rate": 2.0391799097449593e-06, "loss": 1.3611, "step": 6546 }, { "epoch": 0.9378312562670105, "grad_norm": 1.0362958908081055, "learning_rate": 2.02986833941996e-06, "loss": 1.2946, "step": 6547 }, { "epoch": 0.9379745022203123, "grad_norm": 1.2299001216888428, "learning_rate": 2.020577859639927e-06, "loss": 1.2024, "step": 6548 }, { "epoch": 0.938117748173614, "grad_norm": 0.9724285006523132, "learning_rate": 2.0113084724048715e-06, "loss": 1.4473, "step": 6549 }, { "epoch": 0.9382609941269159, "grad_norm": 1.0772038698196411, "learning_rate": 2.0020601797102523e-06, "loss": 1.4566, "step": 6550 }, { "epoch": 0.9384042400802177, "grad_norm": 1.015917420387268, "learning_rate": 1.9928329835469996e-06, "loss": 1.5132, "step": 6551 }, { "epoch": 0.9385474860335196, "grad_norm": 1.069230318069458, "learning_rate": 1.983626885901513e-06, "loss": 1.3421, "step": 6552 }, { "epoch": 0.9386907319868214, "grad_norm": 1.0985478162765503, "learning_rate": 1.974441888755629e-06, "loss": 1.3422, "step": 6553 }, { "epoch": 0.9388339779401232, "grad_norm": 0.9921656847000122, "learning_rate": 1.965277994086645e-06, "loss": 1.3881, "step": 6554 }, { "epoch": 0.938977223893425, "grad_norm": 1.1367942094802856, "learning_rate": 1.9561352038673263e-06, "loss": 1.4682, "step": 6555 }, { "epoch": 0.9391204698467268, "grad_norm": 1.2605880498886108, "learning_rate": 1.947013520065899e-06, "loss": 1.2548, "step": 6556 }, { "epoch": 0.9392637158000287, "grad_norm": 1.0384129285812378, "learning_rate": 1.9379129446460253e-06, "loss": 1.4666, "step": 6557 }, { "epoch": 0.9394069617533305, "grad_norm": 1.0792168378829956, "learning_rate": 1.9288334795668163e-06, "loss": 1.4182, "step": 6558 }, { "epoch": 0.9395502077066323, "grad_norm": 1.2265288829803467, "learning_rate": 1.9197751267828855e-06, "loss": 1.5977, "step": 6559 }, { "epoch": 0.9396934536599341, "grad_norm": 1.328806757926941, "learning_rate": 1.910737888244274e-06, "loss": 1.3863, "step": 6560 }, { "epoch": 0.9398366996132359, "grad_norm": 1.029736876487732, "learning_rate": 1.901721765896447e-06, "loss": 1.3573, "step": 6561 }, { "epoch": 0.9399799455665377, "grad_norm": 1.1217445135116577, "learning_rate": 1.8927267616803634e-06, "loss": 1.4442, "step": 6562 }, { "epoch": 0.9401231915198396, "grad_norm": 1.1470839977264404, "learning_rate": 1.883752877532452e-06, "loss": 1.2833, "step": 6563 }, { "epoch": 0.9402664374731414, "grad_norm": 0.9522246718406677, "learning_rate": 1.874800115384523e-06, "loss": 1.4569, "step": 6564 }, { "epoch": 0.9404096834264432, "grad_norm": 1.136121153831482, "learning_rate": 1.8658684771639234e-06, "loss": 1.4764, "step": 6565 }, { "epoch": 0.940552929379745, "grad_norm": 1.344774842262268, "learning_rate": 1.8569579647933933e-06, "loss": 1.4528, "step": 6566 }, { "epoch": 0.9406961753330468, "grad_norm": 1.141961932182312, "learning_rate": 1.8480685801911424e-06, "loss": 1.3325, "step": 6567 }, { "epoch": 0.9408394212863487, "grad_norm": 1.093137502670288, "learning_rate": 1.8392003252708622e-06, "loss": 1.3707, "step": 6568 }, { "epoch": 0.9409826672396505, "grad_norm": 1.0022450685501099, "learning_rate": 1.8303532019416258e-06, "loss": 1.3573, "step": 6569 }, { "epoch": 0.9411259131929522, "grad_norm": 1.1556183099746704, "learning_rate": 1.8215272121080317e-06, "loss": 1.2707, "step": 6570 }, { "epoch": 0.9412691591462541, "grad_norm": 1.191438913345337, "learning_rate": 1.8127223576701046e-06, "loss": 1.2936, "step": 6571 }, { "epoch": 0.9414124050995559, "grad_norm": 1.1766167879104614, "learning_rate": 1.8039386405232728e-06, "loss": 1.3493, "step": 6572 }, { "epoch": 0.9415556510528578, "grad_norm": 0.9758976101875305, "learning_rate": 1.79517606255849e-06, "loss": 1.3913, "step": 6573 }, { "epoch": 0.9416988970061596, "grad_norm": 1.256264567375183, "learning_rate": 1.7864346256621033e-06, "loss": 1.251, "step": 6574 }, { "epoch": 0.9418421429594614, "grad_norm": 1.0335503816604614, "learning_rate": 1.7777143317159406e-06, "loss": 1.3368, "step": 6575 }, { "epoch": 0.9419853889127632, "grad_norm": 1.110489010810852, "learning_rate": 1.7690151825972446e-06, "loss": 1.2919, "step": 6576 }, { "epoch": 0.942128634866065, "grad_norm": 1.1543210744857788, "learning_rate": 1.7603371801787505e-06, "loss": 1.4825, "step": 6577 }, { "epoch": 0.9422718808193669, "grad_norm": 0.9829211831092834, "learning_rate": 1.7516803263286086e-06, "loss": 1.4664, "step": 6578 }, { "epoch": 0.9424151267726687, "grad_norm": 1.236004114151001, "learning_rate": 1.743044622910417e-06, "loss": 1.446, "step": 6579 }, { "epoch": 0.9425583727259705, "grad_norm": 1.1797484159469604, "learning_rate": 1.734430071783244e-06, "loss": 1.4049, "step": 6580 }, { "epoch": 0.9427016186792723, "grad_norm": 1.2227280139923096, "learning_rate": 1.7258366748015842e-06, "loss": 1.515, "step": 6581 }, { "epoch": 0.9428448646325741, "grad_norm": 1.195163369178772, "learning_rate": 1.7172644338153686e-06, "loss": 1.3252, "step": 6582 }, { "epoch": 0.942988110585876, "grad_norm": 1.1159001588821411, "learning_rate": 1.7087133506699992e-06, "loss": 1.4732, "step": 6583 }, { "epoch": 0.9431313565391778, "grad_norm": 1.1562408208847046, "learning_rate": 1.7001834272063255e-06, "loss": 1.466, "step": 6584 }, { "epoch": 0.9432746024924796, "grad_norm": 1.0880250930786133, "learning_rate": 1.6916746652606119e-06, "loss": 1.3032, "step": 6585 }, { "epoch": 0.9434178484457814, "grad_norm": 0.9678330421447754, "learning_rate": 1.6831870666645822e-06, "loss": 1.2696, "step": 6586 }, { "epoch": 0.9435610943990832, "grad_norm": 1.1218080520629883, "learning_rate": 1.6747206332454191e-06, "loss": 1.2418, "step": 6587 }, { "epoch": 0.943704340352385, "grad_norm": 1.3242117166519165, "learning_rate": 1.6662753668257314e-06, "loss": 1.3203, "step": 6588 }, { "epoch": 0.9438475863056869, "grad_norm": 1.1830748319625854, "learning_rate": 1.657851269223587e-06, "loss": 1.3093, "step": 6589 }, { "epoch": 0.9439908322589887, "grad_norm": 1.0009790658950806, "learning_rate": 1.6494483422524466e-06, "loss": 1.1872, "step": 6590 }, { "epoch": 0.9441340782122905, "grad_norm": 1.3463211059570312, "learning_rate": 1.641066587721296e-06, "loss": 1.3964, "step": 6591 }, { "epoch": 0.9442773241655923, "grad_norm": 1.1451938152313232, "learning_rate": 1.6327060074345147e-06, "loss": 1.1737, "step": 6592 }, { "epoch": 0.9444205701188941, "grad_norm": 1.120936632156372, "learning_rate": 1.624366603191907e-06, "loss": 1.4091, "step": 6593 }, { "epoch": 0.944563816072196, "grad_norm": 1.3889036178588867, "learning_rate": 1.616048376788748e-06, "loss": 1.3486, "step": 6594 }, { "epoch": 0.9447070620254978, "grad_norm": 1.0480918884277344, "learning_rate": 1.6077513300157499e-06, "loss": 1.3105, "step": 6595 }, { "epoch": 0.9448503079787997, "grad_norm": 0.9653658866882324, "learning_rate": 1.5994754646590615e-06, "loss": 1.4037, "step": 6596 }, { "epoch": 0.9449935539321014, "grad_norm": 1.211676001548767, "learning_rate": 1.591220782500269e-06, "loss": 1.3166, "step": 6597 }, { "epoch": 0.9451367998854032, "grad_norm": 1.320763349533081, "learning_rate": 1.5829872853163952e-06, "loss": 1.3134, "step": 6598 }, { "epoch": 0.9452800458387051, "grad_norm": 0.9973997473716736, "learning_rate": 1.5747749748799334e-06, "loss": 1.3369, "step": 6599 }, { "epoch": 0.9454232917920069, "grad_norm": 1.2771992683410645, "learning_rate": 1.5665838529587695e-06, "loss": 1.2982, "step": 6600 }, { "epoch": 0.9455665377453087, "grad_norm": 1.0608091354370117, "learning_rate": 1.5584139213162374e-06, "loss": 1.5314, "step": 6601 }, { "epoch": 0.9457097836986105, "grad_norm": 1.0637578964233398, "learning_rate": 1.5502651817111524e-06, "loss": 1.3096, "step": 6602 }, { "epoch": 0.9458530296519123, "grad_norm": 1.3958479166030884, "learning_rate": 1.5421376358977224e-06, "loss": 1.3305, "step": 6603 }, { "epoch": 0.9459962756052142, "grad_norm": 1.078608512878418, "learning_rate": 1.5340312856255922e-06, "loss": 1.5626, "step": 6604 }, { "epoch": 0.946139521558516, "grad_norm": 1.0765091180801392, "learning_rate": 1.5259461326398772e-06, "loss": 1.363, "step": 6605 }, { "epoch": 0.9462827675118178, "grad_norm": 0.9348585605621338, "learning_rate": 1.517882178681107e-06, "loss": 1.3253, "step": 6606 }, { "epoch": 0.9464260134651196, "grad_norm": 1.1835438013076782, "learning_rate": 1.50983942548526e-06, "loss": 1.2074, "step": 6607 }, { "epoch": 0.9465692594184214, "grad_norm": 1.3026833534240723, "learning_rate": 1.5018178747837174e-06, "loss": 1.3558, "step": 6608 }, { "epoch": 0.9467125053717232, "grad_norm": 1.1636494398117065, "learning_rate": 1.4938175283033319e-06, "loss": 1.3273, "step": 6609 }, { "epoch": 0.9468557513250251, "grad_norm": 0.9853747487068176, "learning_rate": 1.4858383877664029e-06, "loss": 1.3682, "step": 6610 }, { "epoch": 0.9469989972783269, "grad_norm": 1.1408274173736572, "learning_rate": 1.477880454890601e-06, "loss": 1.4118, "step": 6611 }, { "epoch": 0.9471422432316287, "grad_norm": 1.0344606637954712, "learning_rate": 1.4699437313891007e-06, "loss": 1.5545, "step": 6612 }, { "epoch": 0.9472854891849305, "grad_norm": 1.043731689453125, "learning_rate": 1.4620282189704793e-06, "loss": 1.1927, "step": 6613 }, { "epoch": 0.9474287351382323, "grad_norm": 1.1335136890411377, "learning_rate": 1.4541339193387404e-06, "loss": 1.4663, "step": 6614 }, { "epoch": 0.9475719810915342, "grad_norm": 1.17958402633667, "learning_rate": 1.446260834193336e-06, "loss": 1.5314, "step": 6615 }, { "epoch": 0.947715227044836, "grad_norm": 1.1938090324401855, "learning_rate": 1.4384089652291543e-06, "loss": 1.5077, "step": 6616 }, { "epoch": 0.9478584729981379, "grad_norm": 1.2746391296386719, "learning_rate": 1.4305783141364992e-06, "loss": 1.5235, "step": 6617 }, { "epoch": 0.9480017189514396, "grad_norm": 1.0698535442352295, "learning_rate": 1.422768882601122e-06, "loss": 1.3878, "step": 6618 }, { "epoch": 0.9481449649047414, "grad_norm": 1.1420445442199707, "learning_rate": 1.4149806723041892e-06, "loss": 1.3192, "step": 6619 }, { "epoch": 0.9482882108580433, "grad_norm": 1.3574674129486084, "learning_rate": 1.407213684922315e-06, "loss": 1.4601, "step": 6620 }, { "epoch": 0.9484314568113451, "grad_norm": 1.064995288848877, "learning_rate": 1.3994679221275509e-06, "loss": 1.4208, "step": 6621 }, { "epoch": 0.948574702764647, "grad_norm": 1.353135108947754, "learning_rate": 1.391743385587363e-06, "loss": 1.3442, "step": 6622 }, { "epoch": 0.9487179487179487, "grad_norm": 1.1409597396850586, "learning_rate": 1.3840400769646322e-06, "loss": 1.2337, "step": 6623 }, { "epoch": 0.9488611946712505, "grad_norm": 1.0321013927459717, "learning_rate": 1.376357997917721e-06, "loss": 1.4298, "step": 6624 }, { "epoch": 0.9490044406245524, "grad_norm": 1.148101806640625, "learning_rate": 1.3686971501003621e-06, "loss": 1.3047, "step": 6625 }, { "epoch": 0.9491476865778542, "grad_norm": 1.3129265308380127, "learning_rate": 1.3610575351617693e-06, "loss": 1.3453, "step": 6626 }, { "epoch": 0.949290932531156, "grad_norm": 1.1807198524475098, "learning_rate": 1.3534391547465608e-06, "loss": 1.4134, "step": 6627 }, { "epoch": 0.9494341784844578, "grad_norm": 1.305296540260315, "learning_rate": 1.34584201049478e-06, "loss": 1.4866, "step": 6628 }, { "epoch": 0.9495774244377596, "grad_norm": 1.1400765180587769, "learning_rate": 1.3382661040418964e-06, "loss": 1.3743, "step": 6629 }, { "epoch": 0.9497206703910615, "grad_norm": 1.130907654762268, "learning_rate": 1.3307114370188057e-06, "loss": 1.5245, "step": 6630 }, { "epoch": 0.9498639163443633, "grad_norm": 0.9900829195976257, "learning_rate": 1.3231780110518844e-06, "loss": 1.2665, "step": 6631 }, { "epoch": 0.9500071622976651, "grad_norm": 1.1155794858932495, "learning_rate": 1.3156658277628463e-06, "loss": 1.3699, "step": 6632 }, { "epoch": 0.9501504082509669, "grad_norm": 1.0596249103546143, "learning_rate": 1.3081748887689094e-06, "loss": 1.284, "step": 6633 }, { "epoch": 0.9502936542042687, "grad_norm": 0.9751294851303101, "learning_rate": 1.3007051956826611e-06, "loss": 1.4007, "step": 6634 }, { "epoch": 0.9504369001575705, "grad_norm": 1.117329478263855, "learning_rate": 1.2932567501121707e-06, "loss": 1.3742, "step": 6635 }, { "epoch": 0.9505801461108724, "grad_norm": 1.0809996128082275, "learning_rate": 1.2858295536608778e-06, "loss": 1.4682, "step": 6636 }, { "epoch": 0.9507233920641742, "grad_norm": 1.1295413970947266, "learning_rate": 1.2784236079276924e-06, "loss": 1.342, "step": 6637 }, { "epoch": 0.950866638017476, "grad_norm": 1.2350488901138306, "learning_rate": 1.271038914506928e-06, "loss": 1.4668, "step": 6638 }, { "epoch": 0.9510098839707778, "grad_norm": 1.1022533178329468, "learning_rate": 1.263675474988324e-06, "loss": 1.4805, "step": 6639 }, { "epoch": 0.9511531299240796, "grad_norm": 1.1076018810272217, "learning_rate": 1.2563332909570346e-06, "loss": 1.4402, "step": 6640 }, { "epoch": 0.9512963758773815, "grad_norm": 1.118715524673462, "learning_rate": 1.2490123639936625e-06, "loss": 1.379, "step": 6641 }, { "epoch": 0.9514396218306833, "grad_norm": 1.0356736183166504, "learning_rate": 1.2417126956742241e-06, "loss": 1.51, "step": 6642 }, { "epoch": 0.9515828677839852, "grad_norm": 1.1951422691345215, "learning_rate": 1.234434287570163e-06, "loss": 1.3708, "step": 6643 }, { "epoch": 0.9517261137372869, "grad_norm": 1.0350828170776367, "learning_rate": 1.2271771412483146e-06, "loss": 1.3823, "step": 6644 }, { "epoch": 0.9518693596905887, "grad_norm": 1.033468246459961, "learning_rate": 1.2199412582709956e-06, "loss": 1.4576, "step": 6645 }, { "epoch": 0.9520126056438906, "grad_norm": 1.060963749885559, "learning_rate": 1.2127266401958826e-06, "loss": 1.4115, "step": 6646 }, { "epoch": 0.9521558515971924, "grad_norm": 1.1100749969482422, "learning_rate": 1.2055332885761327e-06, "loss": 1.3645, "step": 6647 }, { "epoch": 0.9522990975504942, "grad_norm": 1.1430915594100952, "learning_rate": 1.1983612049602744e-06, "loss": 1.3431, "step": 6648 }, { "epoch": 0.952442343503796, "grad_norm": 1.2031878232955933, "learning_rate": 1.1912103908922945e-06, "loss": 1.4835, "step": 6649 }, { "epoch": 0.9525855894570978, "grad_norm": 1.056313395500183, "learning_rate": 1.1840808479115727e-06, "loss": 1.4814, "step": 6650 }, { "epoch": 0.9527288354103997, "grad_norm": 1.128707766532898, "learning_rate": 1.176972577552915e-06, "loss": 1.3838, "step": 6651 }, { "epoch": 0.9528720813637015, "grad_norm": 1.0374027490615845, "learning_rate": 1.169885581346597e-06, "loss": 1.426, "step": 6652 }, { "epoch": 0.9530153273170033, "grad_norm": 1.2404756546020508, "learning_rate": 1.1628198608182429e-06, "loss": 1.2158, "step": 6653 }, { "epoch": 0.9531585732703051, "grad_norm": 1.094307780265808, "learning_rate": 1.1557754174889247e-06, "loss": 1.3653, "step": 6654 }, { "epoch": 0.9533018192236069, "grad_norm": 1.1155354976654053, "learning_rate": 1.1487522528751404e-06, "loss": 1.3684, "step": 6655 }, { "epoch": 0.9534450651769087, "grad_norm": 0.9912089109420776, "learning_rate": 1.1417503684888142e-06, "loss": 1.3197, "step": 6656 }, { "epoch": 0.9535883111302106, "grad_norm": 1.0741705894470215, "learning_rate": 1.1347697658372847e-06, "loss": 1.3644, "step": 6657 }, { "epoch": 0.9537315570835124, "grad_norm": 1.0323222875595093, "learning_rate": 1.127810446423261e-06, "loss": 1.4997, "step": 6658 }, { "epoch": 0.9538748030368142, "grad_norm": 1.1080387830734253, "learning_rate": 1.1208724117449554e-06, "loss": 1.3796, "step": 6659 }, { "epoch": 0.954018048990116, "grad_norm": 1.042419195175171, "learning_rate": 1.1139556632959515e-06, "loss": 1.4402, "step": 6660 }, { "epoch": 0.9541612949434178, "grad_norm": 1.177602767944336, "learning_rate": 1.1070602025652355e-06, "loss": 1.3802, "step": 6661 }, { "epoch": 0.9543045408967197, "grad_norm": 0.9951816201210022, "learning_rate": 1.1001860310372314e-06, "loss": 1.5112, "step": 6662 }, { "epoch": 0.9544477868500215, "grad_norm": 1.0275555849075317, "learning_rate": 1.0933331501917998e-06, "loss": 1.4019, "step": 6663 }, { "epoch": 0.9545910328033234, "grad_norm": 1.1934802532196045, "learning_rate": 1.0865015615041607e-06, "loss": 1.4442, "step": 6664 }, { "epoch": 0.9547342787566251, "grad_norm": 0.9823391437530518, "learning_rate": 1.0796912664450265e-06, "loss": 1.5455, "step": 6665 }, { "epoch": 0.9548775247099269, "grad_norm": 1.2193284034729004, "learning_rate": 1.0729022664804467e-06, "loss": 1.4387, "step": 6666 }, { "epoch": 0.9550207706632288, "grad_norm": 0.960207462310791, "learning_rate": 1.0661345630719522e-06, "loss": 1.3127, "step": 6667 }, { "epoch": 0.9551640166165306, "grad_norm": 1.169193148612976, "learning_rate": 1.0593881576764664e-06, "loss": 1.5507, "step": 6668 }, { "epoch": 0.9553072625698324, "grad_norm": 1.19430673122406, "learning_rate": 1.0526630517462833e-06, "loss": 1.3299, "step": 6669 }, { "epoch": 0.9554505085231342, "grad_norm": 1.0872056484222412, "learning_rate": 1.045959246729189e-06, "loss": 1.3798, "step": 6670 }, { "epoch": 0.955593754476436, "grad_norm": 1.0095791816711426, "learning_rate": 1.0392767440683516e-06, "loss": 1.2655, "step": 6671 }, { "epoch": 0.9557370004297379, "grad_norm": 0.9927314519882202, "learning_rate": 1.03261554520232e-06, "loss": 1.4826, "step": 6672 }, { "epoch": 0.9558802463830397, "grad_norm": 1.2548762559890747, "learning_rate": 1.0259756515651032e-06, "loss": 1.348, "step": 6673 }, { "epoch": 0.9560234923363415, "grad_norm": 1.0086735486984253, "learning_rate": 1.0193570645860905e-06, "loss": 1.3047, "step": 6674 }, { "epoch": 0.9561667382896433, "grad_norm": 1.1755056381225586, "learning_rate": 1.0127597856901205e-06, "loss": 1.3202, "step": 6675 }, { "epoch": 0.9563099842429451, "grad_norm": 1.2449640035629272, "learning_rate": 1.0061838162974235e-06, "loss": 1.2901, "step": 6676 }, { "epoch": 0.956453230196247, "grad_norm": 1.955307960510254, "learning_rate": 9.996291578236228e-07, "loss": 1.6014, "step": 6677 }, { "epoch": 0.9565964761495488, "grad_norm": 1.2495157718658447, "learning_rate": 9.930958116797895e-07, "loss": 1.2821, "step": 6678 }, { "epoch": 0.9567397221028506, "grad_norm": 1.1265114545822144, "learning_rate": 9.865837792723875e-07, "loss": 1.5538, "step": 6679 }, { "epoch": 0.9568829680561524, "grad_norm": 1.1470140218734741, "learning_rate": 9.800930620032843e-07, "loss": 1.3376, "step": 6680 }, { "epoch": 0.9570262140094542, "grad_norm": 1.289620280265808, "learning_rate": 9.736236612697957e-07, "loss": 1.4186, "step": 6681 }, { "epoch": 0.957169459962756, "grad_norm": 1.0517220497131348, "learning_rate": 9.671755784646075e-07, "loss": 1.2637, "step": 6682 }, { "epoch": 0.9573127059160579, "grad_norm": 1.2137281894683838, "learning_rate": 9.607488149758425e-07, "loss": 1.2991, "step": 6683 }, { "epoch": 0.9574559518693597, "grad_norm": 1.036896824836731, "learning_rate": 9.543433721870055e-07, "loss": 1.39, "step": 6684 }, { "epoch": 0.9575991978226616, "grad_norm": 1.029040813446045, "learning_rate": 9.479592514770486e-07, "loss": 1.3968, "step": 6685 }, { "epoch": 0.9577424437759633, "grad_norm": 1.0036942958831787, "learning_rate": 9.415964542203059e-07, "loss": 1.4887, "step": 6686 }, { "epoch": 0.9578856897292651, "grad_norm": 1.2102922201156616, "learning_rate": 9.352549817865263e-07, "loss": 1.391, "step": 6687 }, { "epoch": 0.958028935682567, "grad_norm": 1.216578483581543, "learning_rate": 9.289348355408734e-07, "loss": 1.4705, "step": 6688 }, { "epoch": 0.9581721816358688, "grad_norm": 0.9445058107376099, "learning_rate": 9.226360168439363e-07, "loss": 1.3832, "step": 6689 }, { "epoch": 0.9583154275891707, "grad_norm": 1.1564074754714966, "learning_rate": 9.16358527051675e-07, "loss": 1.3438, "step": 6690 }, { "epoch": 0.9584586735424724, "grad_norm": 1.5355312824249268, "learning_rate": 9.101023675154751e-07, "loss": 1.3527, "step": 6691 }, { "epoch": 0.9586019194957742, "grad_norm": 1.06477689743042, "learning_rate": 9.038675395821594e-07, "loss": 1.3285, "step": 6692 }, { "epoch": 0.9587451654490761, "grad_norm": 1.0707494020462036, "learning_rate": 8.976540445938986e-07, "loss": 1.2351, "step": 6693 }, { "epoch": 0.9588884114023779, "grad_norm": 1.0883839130401611, "learning_rate": 8.914618838883226e-07, "loss": 1.6989, "step": 6694 }, { "epoch": 0.9590316573556797, "grad_norm": 0.9473715424537659, "learning_rate": 8.852910587984542e-07, "loss": 1.4167, "step": 6695 }, { "epoch": 0.9591749033089815, "grad_norm": 1.2203316688537598, "learning_rate": 8.791415706526973e-07, "loss": 1.4274, "step": 6696 }, { "epoch": 0.9593181492622833, "grad_norm": 1.175966501235962, "learning_rate": 8.730134207749152e-07, "loss": 1.2097, "step": 6697 }, { "epoch": 0.9594613952155852, "grad_norm": 1.180691123008728, "learning_rate": 8.669066104843304e-07, "loss": 1.3798, "step": 6698 }, { "epoch": 0.959604641168887, "grad_norm": 1.0557183027267456, "learning_rate": 8.608211410955802e-07, "loss": 1.3455, "step": 6699 }, { "epoch": 0.9597478871221888, "grad_norm": 1.1434526443481445, "learning_rate": 8.547570139187388e-07, "loss": 1.4122, "step": 6700 }, { "epoch": 0.9598911330754906, "grad_norm": 1.1817371845245361, "learning_rate": 8.487142302592288e-07, "loss": 1.5785, "step": 6701 }, { "epoch": 0.9600343790287924, "grad_norm": 1.0768605470657349, "learning_rate": 8.426927914179428e-07, "loss": 1.5854, "step": 6702 }, { "epoch": 0.9601776249820942, "grad_norm": 1.2435342073440552, "learning_rate": 8.366926986911328e-07, "loss": 1.253, "step": 6703 }, { "epoch": 0.9603208709353961, "grad_norm": 1.085289478302002, "learning_rate": 8.307139533704766e-07, "loss": 1.3159, "step": 6704 }, { "epoch": 0.9604641168886979, "grad_norm": 1.0576350688934326, "learning_rate": 8.247565567430559e-07, "loss": 1.3977, "step": 6705 }, { "epoch": 0.9606073628419998, "grad_norm": 1.1113113164901733, "learning_rate": 8.188205100913337e-07, "loss": 1.4506, "step": 6706 }, { "epoch": 0.9607506087953015, "grad_norm": 1.0279850959777832, "learning_rate": 8.12905814693199e-07, "loss": 1.3242, "step": 6707 }, { "epoch": 0.9608938547486033, "grad_norm": 1.0751326084136963, "learning_rate": 8.07012471821944e-07, "loss": 1.4028, "step": 6708 }, { "epoch": 0.9610371007019052, "grad_norm": 0.9569059014320374, "learning_rate": 8.011404827462654e-07, "loss": 1.3947, "step": 6709 }, { "epoch": 0.961180346655207, "grad_norm": 1.1920746564865112, "learning_rate": 7.952898487302518e-07, "loss": 1.4405, "step": 6710 }, { "epoch": 0.9613235926085089, "grad_norm": 1.0895051956176758, "learning_rate": 7.894605710333847e-07, "loss": 1.4034, "step": 6711 }, { "epoch": 0.9614668385618106, "grad_norm": 1.1912773847579956, "learning_rate": 7.836526509105824e-07, "loss": 1.6514, "step": 6712 }, { "epoch": 0.9616100845151124, "grad_norm": 1.1912981271743774, "learning_rate": 7.778660896121448e-07, "loss": 1.3297, "step": 6713 }, { "epoch": 0.9617533304684143, "grad_norm": 1.3121172189712524, "learning_rate": 7.721008883837755e-07, "loss": 1.3504, "step": 6714 }, { "epoch": 0.9618965764217161, "grad_norm": 1.2163114547729492, "learning_rate": 7.663570484665705e-07, "loss": 1.4911, "step": 6715 }, { "epoch": 0.962039822375018, "grad_norm": 1.1484235525131226, "learning_rate": 7.606345710970409e-07, "loss": 1.3089, "step": 6716 }, { "epoch": 0.9621830683283197, "grad_norm": 0.9658839106559753, "learning_rate": 7.549334575070899e-07, "loss": 1.4439, "step": 6717 }, { "epoch": 0.9623263142816215, "grad_norm": 1.2979060411453247, "learning_rate": 7.492537089240471e-07, "loss": 1.7122, "step": 6718 }, { "epoch": 0.9624695602349234, "grad_norm": 1.1551438570022583, "learning_rate": 7.435953265705897e-07, "loss": 1.1985, "step": 6719 }, { "epoch": 0.9626128061882252, "grad_norm": 1.3193870782852173, "learning_rate": 7.379583116648436e-07, "loss": 1.4095, "step": 6720 }, { "epoch": 0.962756052141527, "grad_norm": 1.118631362915039, "learning_rate": 7.32342665420338e-07, "loss": 1.213, "step": 6721 }, { "epoch": 0.9628992980948288, "grad_norm": 1.288866400718689, "learning_rate": 7.267483890459503e-07, "loss": 1.2805, "step": 6722 }, { "epoch": 0.9630425440481306, "grad_norm": 1.2376817464828491, "learning_rate": 7.21175483745995e-07, "loss": 1.3962, "step": 6723 }, { "epoch": 0.9631857900014325, "grad_norm": 0.9884143471717834, "learning_rate": 7.156239507202011e-07, "loss": 1.1978, "step": 6724 }, { "epoch": 0.9633290359547343, "grad_norm": 1.0311580896377563, "learning_rate": 7.100937911636574e-07, "loss": 1.4594, "step": 6725 }, { "epoch": 0.9634722819080361, "grad_norm": 1.1986788511276245, "learning_rate": 7.04585006266878e-07, "loss": 1.3107, "step": 6726 }, { "epoch": 0.963615527861338, "grad_norm": 0.9753996133804321, "learning_rate": 6.990975972157699e-07, "loss": 1.4494, "step": 6727 }, { "epoch": 0.9637587738146397, "grad_norm": 1.0861049890518188, "learning_rate": 6.936315651916325e-07, "loss": 1.4123, "step": 6728 }, { "epoch": 0.9639020197679415, "grad_norm": 0.8932784795761108, "learning_rate": 6.881869113711692e-07, "loss": 1.6073, "step": 6729 }, { "epoch": 0.9640452657212434, "grad_norm": 1.194557785987854, "learning_rate": 6.827636369264645e-07, "loss": 1.5577, "step": 6730 }, { "epoch": 0.9641885116745452, "grad_norm": 1.1425926685333252, "learning_rate": 6.773617430250512e-07, "loss": 1.5552, "step": 6731 }, { "epoch": 0.9643317576278471, "grad_norm": 1.028408408164978, "learning_rate": 6.71981230829788e-07, "loss": 1.5023, "step": 6732 }, { "epoch": 0.9644750035811488, "grad_norm": 1.1662178039550781, "learning_rate": 6.666221014989815e-07, "loss": 1.1701, "step": 6733 }, { "epoch": 0.9646182495344506, "grad_norm": 1.2152079343795776, "learning_rate": 6.612843561863092e-07, "loss": 1.5107, "step": 6734 }, { "epoch": 0.9647614954877525, "grad_norm": 1.0565358400344849, "learning_rate": 6.559679960408738e-07, "loss": 1.4338, "step": 6735 }, { "epoch": 0.9649047414410543, "grad_norm": 1.0265945196151733, "learning_rate": 6.506730222071488e-07, "loss": 1.4233, "step": 6736 }, { "epoch": 0.9650479873943562, "grad_norm": 0.9951639175415039, "learning_rate": 6.453994358249893e-07, "loss": 1.39, "step": 6737 }, { "epoch": 0.9651912333476579, "grad_norm": 0.936131477355957, "learning_rate": 6.401472380297091e-07, "loss": 1.4467, "step": 6738 }, { "epoch": 0.9653344793009597, "grad_norm": 1.1928865909576416, "learning_rate": 6.349164299519483e-07, "loss": 1.3373, "step": 6739 }, { "epoch": 0.9654777252542616, "grad_norm": 0.9654519557952881, "learning_rate": 6.297070127177617e-07, "loss": 1.3537, "step": 6740 }, { "epoch": 0.9656209712075634, "grad_norm": 1.244594931602478, "learning_rate": 6.245189874486301e-07, "loss": 1.3561, "step": 6741 }, { "epoch": 0.9657642171608652, "grad_norm": 1.2127726078033447, "learning_rate": 6.193523552614044e-07, "loss": 1.3085, "step": 6742 }, { "epoch": 0.965907463114167, "grad_norm": 0.9926570057868958, "learning_rate": 6.142071172683284e-07, "loss": 1.1609, "step": 6743 }, { "epoch": 0.9660507090674688, "grad_norm": 1.2157169580459595, "learning_rate": 6.09083274577038e-07, "loss": 1.328, "step": 6744 }, { "epoch": 0.9661939550207707, "grad_norm": 1.092718482017517, "learning_rate": 6.039808282905735e-07, "loss": 1.5967, "step": 6745 }, { "epoch": 0.9663372009740725, "grad_norm": 1.326289415359497, "learning_rate": 5.98899779507367e-07, "loss": 1.1561, "step": 6746 }, { "epoch": 0.9664804469273743, "grad_norm": 1.0344010591506958, "learning_rate": 5.938401293212547e-07, "loss": 1.4568, "step": 6747 }, { "epoch": 0.9666236928806761, "grad_norm": 1.111846685409546, "learning_rate": 5.888018788214322e-07, "loss": 1.3696, "step": 6748 }, { "epoch": 0.9667669388339779, "grad_norm": 1.266535758972168, "learning_rate": 5.837850290925206e-07, "loss": 1.4055, "step": 6749 }, { "epoch": 0.9669101847872797, "grad_norm": 1.1402356624603271, "learning_rate": 5.787895812145227e-07, "loss": 1.4146, "step": 6750 }, { "epoch": 0.9670534307405816, "grad_norm": 1.2713218927383423, "learning_rate": 5.738155362628339e-07, "loss": 1.3243, "step": 6751 }, { "epoch": 0.9671966766938834, "grad_norm": 1.135552167892456, "learning_rate": 5.68862895308242e-07, "loss": 1.5358, "step": 6752 }, { "epoch": 0.9673399226471853, "grad_norm": 1.0311620235443115, "learning_rate": 5.639316594169386e-07, "loss": 1.4568, "step": 6753 }, { "epoch": 0.967483168600487, "grad_norm": 1.197799563407898, "learning_rate": 5.590218296504857e-07, "loss": 1.6264, "step": 6754 }, { "epoch": 0.9676264145537888, "grad_norm": 1.00772225856781, "learning_rate": 5.541334070658488e-07, "loss": 1.3162, "step": 6755 }, { "epoch": 0.9677696605070907, "grad_norm": 1.1088320016860962, "learning_rate": 5.492663927153863e-07, "loss": 1.4627, "step": 6756 }, { "epoch": 0.9679129064603925, "grad_norm": 1.0879466533660889, "learning_rate": 5.444207876468488e-07, "loss": 1.3604, "step": 6757 }, { "epoch": 0.9680561524136944, "grad_norm": 1.2988510131835938, "learning_rate": 5.395965929033686e-07, "loss": 1.475, "step": 6758 }, { "epoch": 0.9681993983669961, "grad_norm": 1.2885271310806274, "learning_rate": 5.347938095234705e-07, "loss": 1.324, "step": 6759 }, { "epoch": 0.9683426443202979, "grad_norm": 1.1041315793991089, "learning_rate": 5.300124385410943e-07, "loss": 1.379, "step": 6760 }, { "epoch": 0.9684858902735998, "grad_norm": 1.2827993631362915, "learning_rate": 5.252524809855386e-07, "loss": 1.4571, "step": 6761 }, { "epoch": 0.9686291362269016, "grad_norm": 0.9214184880256653, "learning_rate": 5.20513937881506e-07, "loss": 1.3211, "step": 6762 }, { "epoch": 0.9687723821802035, "grad_norm": 1.0917593240737915, "learning_rate": 5.157968102490918e-07, "loss": 1.3754, "step": 6763 }, { "epoch": 0.9689156281335052, "grad_norm": 0.882162868976593, "learning_rate": 5.111010991037613e-07, "loss": 1.3528, "step": 6764 }, { "epoch": 0.969058874086807, "grad_norm": 1.0892316102981567, "learning_rate": 5.064268054564059e-07, "loss": 1.3638, "step": 6765 }, { "epoch": 0.9692021200401089, "grad_norm": 1.177470326423645, "learning_rate": 5.017739303132763e-07, "loss": 1.3821, "step": 6766 }, { "epoch": 0.9693453659934107, "grad_norm": 1.250978708267212, "learning_rate": 4.971424746760156e-07, "loss": 1.573, "step": 6767 }, { "epoch": 0.9694886119467125, "grad_norm": 1.1360459327697754, "learning_rate": 4.925324395416709e-07, "loss": 1.3349, "step": 6768 }, { "epoch": 0.9696318579000143, "grad_norm": 1.0928285121917725, "learning_rate": 4.879438259026592e-07, "loss": 1.3824, "step": 6769 }, { "epoch": 0.9697751038533161, "grad_norm": 0.9838641881942749, "learning_rate": 4.833766347468016e-07, "loss": 1.2204, "step": 6770 }, { "epoch": 0.969918349806618, "grad_norm": 1.1207385063171387, "learning_rate": 4.788308670573005e-07, "loss": 1.2899, "step": 6771 }, { "epoch": 0.9700615957599198, "grad_norm": 1.1774346828460693, "learning_rate": 4.743065238127509e-07, "loss": 1.6145, "step": 6772 }, { "epoch": 0.9702048417132216, "grad_norm": 1.0175540447235107, "learning_rate": 4.698036059871291e-07, "loss": 1.4813, "step": 6773 }, { "epoch": 0.9703480876665235, "grad_norm": 0.9025095701217651, "learning_rate": 4.6532211454979324e-07, "loss": 1.3793, "step": 6774 }, { "epoch": 0.9704913336198252, "grad_norm": 1.0594878196716309, "learning_rate": 4.608620504655048e-07, "loss": 1.3312, "step": 6775 }, { "epoch": 0.970634579573127, "grad_norm": 1.0048699378967285, "learning_rate": 4.5642341469441794e-07, "loss": 1.3631, "step": 6776 }, { "epoch": 0.9707778255264289, "grad_norm": 1.0874847173690796, "learning_rate": 4.520062081920351e-07, "loss": 1.4338, "step": 6777 }, { "epoch": 0.9709210714797307, "grad_norm": 1.3135201930999756, "learning_rate": 4.4761043190929553e-07, "loss": 1.5749, "step": 6778 }, { "epoch": 0.9710643174330326, "grad_norm": 1.0692249536514282, "learning_rate": 4.4323608679248676e-07, "loss": 1.4714, "step": 6779 }, { "epoch": 0.9712075633863343, "grad_norm": 1.385565996170044, "learning_rate": 4.388831737832999e-07, "loss": 1.2997, "step": 6780 }, { "epoch": 0.9713508093396361, "grad_norm": 1.105308175086975, "learning_rate": 4.345516938188188e-07, "loss": 1.4659, "step": 6781 }, { "epoch": 0.971494055292938, "grad_norm": 1.1594161987304688, "learning_rate": 4.3024164783148636e-07, "loss": 1.4033, "step": 6782 }, { "epoch": 0.9716373012462398, "grad_norm": 1.1755322217941284, "learning_rate": 4.2595303674916044e-07, "loss": 1.4945, "step": 6783 }, { "epoch": 0.9717805471995417, "grad_norm": 1.024498701095581, "learning_rate": 4.216858614950692e-07, "loss": 1.4365, "step": 6784 }, { "epoch": 0.9719237931528434, "grad_norm": 0.957750141620636, "learning_rate": 4.174401229878333e-07, "loss": 1.4682, "step": 6785 }, { "epoch": 0.9720670391061452, "grad_norm": 1.08503258228302, "learning_rate": 4.1321582214145506e-07, "loss": 1.2786, "step": 6786 }, { "epoch": 0.9722102850594471, "grad_norm": 1.1912667751312256, "learning_rate": 4.090129598653181e-07, "loss": 1.4855, "step": 6787 }, { "epoch": 0.9723535310127489, "grad_norm": 1.0803782939910889, "learning_rate": 4.048315370641986e-07, "loss": 1.413, "step": 6788 }, { "epoch": 0.9724967769660507, "grad_norm": 1.0952485799789429, "learning_rate": 4.006715546382434e-07, "loss": 1.2921, "step": 6789 }, { "epoch": 0.9726400229193525, "grad_norm": 1.1317952871322632, "learning_rate": 3.9653301348301364e-07, "loss": 1.3688, "step": 6790 }, { "epoch": 0.9727832688726543, "grad_norm": 1.057381272315979, "learning_rate": 3.92415914489408e-07, "loss": 1.301, "step": 6791 }, { "epoch": 0.9729265148259562, "grad_norm": 1.0162808895111084, "learning_rate": 3.8832025854376176e-07, "loss": 1.3706, "step": 6792 }, { "epoch": 0.973069760779258, "grad_norm": 1.200617790222168, "learning_rate": 3.842460465277586e-07, "loss": 1.3051, "step": 6793 }, { "epoch": 0.9732130067325598, "grad_norm": 1.3213880062103271, "learning_rate": 3.801932793184748e-07, "loss": 1.286, "step": 6794 }, { "epoch": 0.9733562526858617, "grad_norm": 1.412574291229248, "learning_rate": 3.7616195778836793e-07, "loss": 1.3873, "step": 6795 }, { "epoch": 0.9734994986391634, "grad_norm": 0.9555412530899048, "learning_rate": 3.721520828052771e-07, "loss": 1.3871, "step": 6796 }, { "epoch": 0.9736427445924652, "grad_norm": 1.0073835849761963, "learning_rate": 3.681636552324452e-07, "loss": 1.3203, "step": 6797 }, { "epoch": 0.9737859905457671, "grad_norm": 1.230358600616455, "learning_rate": 3.6419667592847427e-07, "loss": 1.2928, "step": 6798 }, { "epoch": 0.9739292364990689, "grad_norm": 0.9673385620117188, "learning_rate": 3.6025114574734785e-07, "loss": 1.3819, "step": 6799 }, { "epoch": 0.9740724824523708, "grad_norm": 1.0357972383499146, "learning_rate": 3.563270655384532e-07, "loss": 1.4869, "step": 6800 }, { "epoch": 0.9742157284056725, "grad_norm": 1.2780406475067139, "learning_rate": 3.5242443614654784e-07, "loss": 1.5134, "step": 6801 }, { "epoch": 0.9743589743589743, "grad_norm": 1.0285743474960327, "learning_rate": 3.4854325841175985e-07, "loss": 1.2627, "step": 6802 }, { "epoch": 0.9745022203122762, "grad_norm": 1.036131501197815, "learning_rate": 3.446835331696208e-07, "loss": 1.5288, "step": 6803 }, { "epoch": 0.974645466265578, "grad_norm": 1.0035115480422974, "learning_rate": 3.4084526125103267e-07, "loss": 1.4875, "step": 6804 }, { "epoch": 0.9747887122188799, "grad_norm": 1.1059772968292236, "learning_rate": 3.370284434822679e-07, "loss": 1.5246, "step": 6805 }, { "epoch": 0.9749319581721816, "grad_norm": 1.1269580125808716, "learning_rate": 3.332330806850137e-07, "loss": 1.369, "step": 6806 }, { "epoch": 0.9750752041254834, "grad_norm": 1.1165024042129517, "learning_rate": 3.294591736763164e-07, "loss": 1.3555, "step": 6807 }, { "epoch": 0.9752184500787853, "grad_norm": 1.1266822814941406, "learning_rate": 3.2570672326858175e-07, "loss": 1.484, "step": 6808 }, { "epoch": 0.9753616960320871, "grad_norm": 1.0630747079849243, "learning_rate": 3.219757302696302e-07, "loss": 1.2312, "step": 6809 }, { "epoch": 0.975504941985389, "grad_norm": 1.11784827709198, "learning_rate": 3.182661954826638e-07, "loss": 1.3283, "step": 6810 }, { "epoch": 0.9756481879386907, "grad_norm": 1.0629963874816895, "learning_rate": 3.1457811970624364e-07, "loss": 1.2294, "step": 6811 }, { "epoch": 0.9757914338919925, "grad_norm": 1.2173802852630615, "learning_rate": 3.1091150373433465e-07, "loss": 1.3145, "step": 6812 }, { "epoch": 0.9759346798452944, "grad_norm": 1.1941826343536377, "learning_rate": 3.072663483562388e-07, "loss": 1.4413, "step": 6813 }, { "epoch": 0.9760779257985962, "grad_norm": 1.2987266778945923, "learning_rate": 3.0364265435669503e-07, "loss": 1.4398, "step": 6814 }, { "epoch": 0.976221171751898, "grad_norm": 1.211613655090332, "learning_rate": 3.0004042251579047e-07, "loss": 1.3472, "step": 6815 }, { "epoch": 0.9763644177051999, "grad_norm": 0.992396891117096, "learning_rate": 2.9645965360898255e-07, "loss": 1.4871, "step": 6816 }, { "epoch": 0.9765076636585016, "grad_norm": 1.0732048749923706, "learning_rate": 2.9290034840713245e-07, "loss": 1.3893, "step": 6817 }, { "epoch": 0.9766509096118035, "grad_norm": 1.1515613794326782, "learning_rate": 2.8936250767647167e-07, "loss": 1.2511, "step": 6818 }, { "epoch": 0.9767941555651053, "grad_norm": 0.9855867028236389, "learning_rate": 2.8584613217861324e-07, "loss": 1.3056, "step": 6819 }, { "epoch": 0.9769374015184071, "grad_norm": 1.0777101516723633, "learning_rate": 2.8235122267052936e-07, "loss": 1.3074, "step": 6820 }, { "epoch": 0.977080647471709, "grad_norm": 1.0498789548873901, "learning_rate": 2.7887777990460716e-07, "loss": 1.3389, "step": 6821 }, { "epoch": 0.9772238934250107, "grad_norm": 1.4475789070129395, "learning_rate": 2.754258046285818e-07, "loss": 1.3099, "step": 6822 }, { "epoch": 0.9773671393783125, "grad_norm": 0.9817882776260376, "learning_rate": 2.719952975855811e-07, "loss": 1.4246, "step": 6823 }, { "epoch": 0.9775103853316144, "grad_norm": 1.2602123022079468, "learning_rate": 2.685862595141142e-07, "loss": 1.4832, "step": 6824 }, { "epoch": 0.9776536312849162, "grad_norm": 0.9707384705543518, "learning_rate": 2.6519869114804975e-07, "loss": 1.3286, "step": 6825 }, { "epoch": 0.9777968772382181, "grad_norm": 1.1743457317352295, "learning_rate": 2.618325932166488e-07, "loss": 1.4651, "step": 6826 }, { "epoch": 0.9779401231915198, "grad_norm": 1.1900017261505127, "learning_rate": 2.58487966444565e-07, "loss": 1.3218, "step": 6827 }, { "epoch": 0.9780833691448216, "grad_norm": 1.0269451141357422, "learning_rate": 2.5516481155180014e-07, "loss": 1.3631, "step": 6828 }, { "epoch": 0.9782266150981235, "grad_norm": 1.2873187065124512, "learning_rate": 2.518631292537488e-07, "loss": 1.3058, "step": 6829 }, { "epoch": 0.9783698610514253, "grad_norm": 1.0139222145080566, "learning_rate": 2.485829202611756e-07, "loss": 1.3905, "step": 6830 }, { "epoch": 0.9785131070047272, "grad_norm": 1.2667527198791504, "learning_rate": 2.453241852802379e-07, "loss": 1.1982, "step": 6831 }, { "epoch": 0.9786563529580289, "grad_norm": 0.976852297782898, "learning_rate": 2.4208692501246354e-07, "loss": 1.3519, "step": 6832 }, { "epoch": 0.9787995989113307, "grad_norm": 1.156193733215332, "learning_rate": 2.3887114015475056e-07, "loss": 1.738, "step": 6833 }, { "epoch": 0.9789428448646326, "grad_norm": 1.0942962169647217, "learning_rate": 2.3567683139936735e-07, "loss": 1.375, "step": 6834 }, { "epoch": 0.9790860908179344, "grad_norm": 1.25776207447052, "learning_rate": 2.3250399943398614e-07, "loss": 1.5078, "step": 6835 }, { "epoch": 0.9792293367712362, "grad_norm": 0.9700466990470886, "learning_rate": 2.2935264494162724e-07, "loss": 1.4201, "step": 6836 }, { "epoch": 0.979372582724538, "grad_norm": 1.3482433557510376, "learning_rate": 2.2622276860070346e-07, "loss": 1.3969, "step": 6837 }, { "epoch": 0.9795158286778398, "grad_norm": 1.021613597869873, "learning_rate": 2.231143710849981e-07, "loss": 1.4312, "step": 6838 }, { "epoch": 0.9796590746311417, "grad_norm": 1.0583614110946655, "learning_rate": 2.20027453063687e-07, "loss": 1.2664, "step": 6839 }, { "epoch": 0.9798023205844435, "grad_norm": 1.2423112392425537, "learning_rate": 2.1696201520128302e-07, "loss": 1.2598, "step": 6840 }, { "epoch": 0.9799455665377453, "grad_norm": 1.1483627557754517, "learning_rate": 2.1391805815771382e-07, "loss": 1.3431, "step": 6841 }, { "epoch": 0.9800888124910472, "grad_norm": 0.9708666205406189, "learning_rate": 2.1089558258826637e-07, "loss": 1.2652, "step": 6842 }, { "epoch": 0.9802320584443489, "grad_norm": 1.1543922424316406, "learning_rate": 2.0789458914359793e-07, "loss": 1.4087, "step": 6843 }, { "epoch": 0.9803753043976507, "grad_norm": 1.0889285802841187, "learning_rate": 2.0491507846975843e-07, "loss": 1.4211, "step": 6844 }, { "epoch": 0.9805185503509526, "grad_norm": 1.0157713890075684, "learning_rate": 2.019570512081459e-07, "loss": 1.3535, "step": 6845 }, { "epoch": 0.9806617963042544, "grad_norm": 1.0404671430587769, "learning_rate": 1.9902050799557315e-07, "loss": 1.4604, "step": 6846 }, { "epoch": 0.9808050422575563, "grad_norm": 1.1559128761291504, "learning_rate": 1.9610544946420117e-07, "loss": 1.4137, "step": 6847 }, { "epoch": 0.980948288210858, "grad_norm": 1.3948359489440918, "learning_rate": 1.9321187624155024e-07, "loss": 1.4006, "step": 6848 }, { "epoch": 0.9810915341641598, "grad_norm": 1.0703754425048828, "learning_rate": 1.9033978895054429e-07, "loss": 1.3678, "step": 6849 }, { "epoch": 0.9812347801174617, "grad_norm": 1.1582732200622559, "learning_rate": 1.8748918820948868e-07, "loss": 1.3188, "step": 6850 }, { "epoch": 0.9813780260707635, "grad_norm": 1.0585753917694092, "learning_rate": 1.8466007463202596e-07, "loss": 1.4986, "step": 6851 }, { "epoch": 0.9815212720240654, "grad_norm": 1.053676962852478, "learning_rate": 1.8185244882721332e-07, "loss": 1.3169, "step": 6852 }, { "epoch": 0.9816645179773671, "grad_norm": 1.3174238204956055, "learning_rate": 1.7906631139944508e-07, "loss": 1.3322, "step": 6853 }, { "epoch": 0.9818077639306689, "grad_norm": 1.3252174854278564, "learning_rate": 1.7630166294850813e-07, "loss": 1.4177, "step": 6854 }, { "epoch": 0.9819510098839708, "grad_norm": 1.0847564935684204, "learning_rate": 1.7355850406958196e-07, "loss": 1.3225, "step": 6855 }, { "epoch": 0.9820942558372726, "grad_norm": 1.3278394937515259, "learning_rate": 1.7083683535318306e-07, "loss": 1.497, "step": 6856 }, { "epoch": 0.9822375017905745, "grad_norm": 1.1299734115600586, "learning_rate": 1.6813665738523166e-07, "loss": 1.3496, "step": 6857 }, { "epoch": 0.9823807477438762, "grad_norm": 1.724112868309021, "learning_rate": 1.654579707469961e-07, "loss": 1.2548, "step": 6858 }, { "epoch": 0.982523993697178, "grad_norm": 1.173754334449768, "learning_rate": 1.6280077601513734e-07, "loss": 1.201, "step": 6859 }, { "epoch": 0.9826672396504799, "grad_norm": 1.1308485269546509, "learning_rate": 1.6016507376169777e-07, "loss": 1.2899, "step": 6860 }, { "epoch": 0.9828104856037817, "grad_norm": 0.9947154521942139, "learning_rate": 1.5755086455404577e-07, "loss": 1.4218, "step": 6861 }, { "epoch": 0.9829537315570835, "grad_norm": 1.1863192319869995, "learning_rate": 1.5495814895498673e-07, "loss": 1.4509, "step": 6862 }, { "epoch": 0.9830969775103854, "grad_norm": 0.9860344529151917, "learning_rate": 1.5238692752266303e-07, "loss": 1.3699, "step": 6863 }, { "epoch": 0.9832402234636871, "grad_norm": 1.3135398626327515, "learning_rate": 1.498372008105764e-07, "loss": 1.4112, "step": 6864 }, { "epoch": 0.983383469416989, "grad_norm": 1.0519835948944092, "learning_rate": 1.4730896936764327e-07, "loss": 1.3422, "step": 6865 }, { "epoch": 0.9835267153702908, "grad_norm": 1.4048845767974854, "learning_rate": 1.448022337381061e-07, "loss": 1.3949, "step": 6866 }, { "epoch": 0.9836699613235926, "grad_norm": 1.0456485748291016, "learning_rate": 1.4231699446162205e-07, "loss": 1.571, "step": 6867 }, { "epoch": 0.9838132072768945, "grad_norm": 1.1085842847824097, "learning_rate": 1.3985325207319655e-07, "loss": 1.3395, "step": 6868 }, { "epoch": 0.9839564532301962, "grad_norm": 0.9623729586601257, "learning_rate": 1.3741100710321643e-07, "loss": 1.4144, "step": 6869 }, { "epoch": 0.984099699183498, "grad_norm": 0.9823546409606934, "learning_rate": 1.3499026007741665e-07, "loss": 1.3272, "step": 6870 }, { "epoch": 0.9842429451367999, "grad_norm": 1.1687198877334595, "learning_rate": 1.3259101151694708e-07, "loss": 1.3441, "step": 6871 }, { "epoch": 0.9843861910901017, "grad_norm": 1.1303155422210693, "learning_rate": 1.3021326193830564e-07, "loss": 1.4256, "step": 6872 }, { "epoch": 0.9845294370434036, "grad_norm": 1.0595699548721313, "learning_rate": 1.2785701185333844e-07, "loss": 1.2168, "step": 6873 }, { "epoch": 0.9846726829967053, "grad_norm": 0.982572615146637, "learning_rate": 1.2552226176931746e-07, "loss": 1.4156, "step": 6874 }, { "epoch": 0.9848159289500071, "grad_norm": 1.158421516418457, "learning_rate": 1.2320901218884072e-07, "loss": 1.2366, "step": 6875 }, { "epoch": 0.984959174903309, "grad_norm": 1.055950403213501, "learning_rate": 1.2091726360989874e-07, "loss": 1.6418, "step": 6876 }, { "epoch": 0.9851024208566108, "grad_norm": 1.2009154558181763, "learning_rate": 1.1864701652584132e-07, "loss": 1.3808, "step": 6877 }, { "epoch": 0.9852456668099127, "grad_norm": 1.1028707027435303, "learning_rate": 1.1639827142539972e-07, "loss": 1.4239, "step": 6878 }, { "epoch": 0.9853889127632144, "grad_norm": 1.1144933700561523, "learning_rate": 1.1417102879268671e-07, "loss": 1.3437, "step": 6879 }, { "epoch": 0.9855321587165162, "grad_norm": 1.00905442237854, "learning_rate": 1.1196528910715209e-07, "loss": 1.3753, "step": 6880 }, { "epoch": 0.9856754046698181, "grad_norm": 1.179093360900879, "learning_rate": 1.0978105284363826e-07, "loss": 1.5435, "step": 6881 }, { "epoch": 0.9858186506231199, "grad_norm": 1.1365902423858643, "learning_rate": 1.0761832047238019e-07, "loss": 1.2935, "step": 6882 }, { "epoch": 0.9859618965764217, "grad_norm": 1.0896435976028442, "learning_rate": 1.0547709245893877e-07, "loss": 1.3422, "step": 6883 }, { "epoch": 0.9861051425297236, "grad_norm": 1.0512182712554932, "learning_rate": 1.0335736926426754e-07, "loss": 1.4071, "step": 6884 }, { "epoch": 0.9862483884830253, "grad_norm": 1.0128521919250488, "learning_rate": 1.0125915134470143e-07, "loss": 1.2252, "step": 6885 }, { "epoch": 0.9863916344363272, "grad_norm": 1.15249764919281, "learning_rate": 9.918243915193471e-08, "loss": 1.2801, "step": 6886 }, { "epoch": 0.986534880389629, "grad_norm": 1.1768450736999512, "learning_rate": 9.712723313302085e-08, "loss": 1.3113, "step": 6887 }, { "epoch": 0.9866781263429308, "grad_norm": 1.0490126609802246, "learning_rate": 9.509353373040596e-08, "loss": 1.5212, "step": 6888 }, { "epoch": 0.9868213722962327, "grad_norm": 1.1588236093521118, "learning_rate": 9.308134138188429e-08, "loss": 1.4641, "step": 6889 }, { "epoch": 0.9869646182495344, "grad_norm": 1.3969707489013672, "learning_rate": 9.109065652064263e-08, "loss": 1.4427, "step": 6890 }, { "epoch": 0.9871078642028362, "grad_norm": 0.9793325066566467, "learning_rate": 8.9121479575216e-08, "loss": 1.4414, "step": 6891 }, { "epoch": 0.9872511101561381, "grad_norm": 1.071190357208252, "learning_rate": 8.717381096953192e-08, "loss": 1.363, "step": 6892 }, { "epoch": 0.9873943561094399, "grad_norm": 1.1631848812103271, "learning_rate": 8.524765112286614e-08, "loss": 1.4295, "step": 6893 }, { "epoch": 0.9875376020627418, "grad_norm": 1.0809261798858643, "learning_rate": 8.334300044987587e-08, "loss": 1.3067, "step": 6894 }, { "epoch": 0.9876808480160435, "grad_norm": 1.1413036584854126, "learning_rate": 8.145985936057754e-08, "loss": 1.4591, "step": 6895 }, { "epoch": 0.9878240939693453, "grad_norm": 1.2250999212265015, "learning_rate": 7.959822826038022e-08, "loss": 1.4777, "step": 6896 }, { "epoch": 0.9879673399226472, "grad_norm": 0.9954769611358643, "learning_rate": 7.775810755003e-08, "loss": 1.3453, "step": 6897 }, { "epoch": 0.988110585875949, "grad_norm": 1.1877132654190063, "learning_rate": 7.593949762567664e-08, "loss": 1.2446, "step": 6898 }, { "epoch": 0.9882538318292509, "grad_norm": 0.9511668682098389, "learning_rate": 7.414239887880702e-08, "loss": 1.4448, "step": 6899 }, { "epoch": 0.9883970777825526, "grad_norm": 1.1767019033432007, "learning_rate": 7.236681169628945e-08, "loss": 1.2421, "step": 6900 }, { "epoch": 0.9885403237358544, "grad_norm": 0.9420677423477173, "learning_rate": 7.06127364603848e-08, "loss": 1.2912, "step": 6901 }, { "epoch": 0.9886835696891563, "grad_norm": 0.9167191982269287, "learning_rate": 6.888017354869103e-08, "loss": 1.5713, "step": 6902 }, { "epoch": 0.9888268156424581, "grad_norm": 1.0124984979629517, "learning_rate": 6.716912333417646e-08, "loss": 1.2674, "step": 6903 }, { "epoch": 0.98897006159576, "grad_norm": 1.0829814672470093, "learning_rate": 6.5479586185202e-08, "loss": 1.3155, "step": 6904 }, { "epoch": 0.9891133075490618, "grad_norm": 1.4187833070755005, "learning_rate": 6.38115624654656e-08, "loss": 1.6162, "step": 6905 }, { "epoch": 0.9892565535023635, "grad_norm": 1.0550552606582642, "learning_rate": 6.216505253408e-08, "loss": 1.4177, "step": 6906 }, { "epoch": 0.9893997994556654, "grad_norm": 1.097221851348877, "learning_rate": 6.05400567454728e-08, "loss": 1.1926, "step": 6907 }, { "epoch": 0.9895430454089672, "grad_norm": 1.1215218305587769, "learning_rate": 5.893657544947528e-08, "loss": 1.2473, "step": 6908 }, { "epoch": 0.989686291362269, "grad_norm": 1.2341907024383545, "learning_rate": 5.7354608991266876e-08, "loss": 1.3513, "step": 6909 }, { "epoch": 0.9898295373155709, "grad_norm": 1.072882890701294, "learning_rate": 5.5794157711430705e-08, "loss": 1.4923, "step": 6910 }, { "epoch": 0.9899727832688726, "grad_norm": 1.041577696800232, "learning_rate": 5.4255221945864744e-08, "loss": 1.4795, "step": 6911 }, { "epoch": 0.9901160292221745, "grad_norm": 1.1968357563018799, "learning_rate": 5.273780202588174e-08, "loss": 1.3524, "step": 6912 }, { "epoch": 0.9902592751754763, "grad_norm": 1.264856219291687, "learning_rate": 5.124189827813153e-08, "loss": 1.3515, "step": 6913 }, { "epoch": 0.9904025211287781, "grad_norm": 1.1715129613876343, "learning_rate": 4.9767511024656486e-08, "loss": 1.2751, "step": 6914 }, { "epoch": 0.99054576708208, "grad_norm": 1.036310076713562, "learning_rate": 4.8314640582858284e-08, "loss": 1.4745, "step": 6915 }, { "epoch": 0.9906890130353817, "grad_norm": 1.1034997701644897, "learning_rate": 4.6883287265497844e-08, "loss": 1.3974, "step": 6916 }, { "epoch": 0.9908322589886835, "grad_norm": 1.0676900148391724, "learning_rate": 4.5473451380706463e-08, "loss": 1.4661, "step": 6917 }, { "epoch": 0.9909755049419854, "grad_norm": 1.0976307392120361, "learning_rate": 4.408513323198582e-08, "loss": 1.5258, "step": 6918 }, { "epoch": 0.9911187508952872, "grad_norm": 1.234499216079712, "learning_rate": 4.271833311821905e-08, "loss": 1.4089, "step": 6919 }, { "epoch": 0.9912619968485891, "grad_norm": 1.0692311525344849, "learning_rate": 4.137305133362634e-08, "loss": 1.4845, "step": 6920 }, { "epoch": 0.9914052428018908, "grad_norm": 1.1579920053482056, "learning_rate": 4.0049288167842705e-08, "loss": 1.2708, "step": 6921 }, { "epoch": 0.9915484887551926, "grad_norm": 1.1213072538375854, "learning_rate": 3.8747043905806856e-08, "loss": 1.2183, "step": 6922 }, { "epoch": 0.9916917347084945, "grad_norm": 1.0892211198806763, "learning_rate": 3.746631882787233e-08, "loss": 1.4166, "step": 6923 }, { "epoch": 0.9918349806617963, "grad_norm": 1.3943217992782593, "learning_rate": 3.6207113209763e-08, "loss": 1.4145, "step": 6924 }, { "epoch": 0.9919782266150982, "grad_norm": 1.139284372329712, "learning_rate": 3.496942732253983e-08, "loss": 1.2871, "step": 6925 }, { "epoch": 0.9921214725684, "grad_norm": 1.1605875492095947, "learning_rate": 3.375326143264523e-08, "loss": 1.4004, "step": 6926 }, { "epoch": 0.9922647185217017, "grad_norm": 1.0849120616912842, "learning_rate": 3.2558615801892014e-08, "loss": 1.3176, "step": 6927 }, { "epoch": 0.9924079644750036, "grad_norm": 1.1378611326217651, "learning_rate": 3.138549068745222e-08, "loss": 1.434, "step": 6928 }, { "epoch": 0.9925512104283054, "grad_norm": 1.3089238405227661, "learning_rate": 3.0233886341890504e-08, "loss": 1.1527, "step": 6929 }, { "epoch": 0.9926944563816072, "grad_norm": 1.0546976327896118, "learning_rate": 2.9103803013097453e-08, "loss": 1.4313, "step": 6930 }, { "epoch": 0.9928377023349091, "grad_norm": 1.0690783262252808, "learning_rate": 2.799524094436734e-08, "loss": 1.339, "step": 6931 }, { "epoch": 0.9929809482882108, "grad_norm": 1.171849250793457, "learning_rate": 2.6908200374331503e-08, "loss": 1.3341, "step": 6932 }, { "epoch": 0.9931241942415127, "grad_norm": 1.2075364589691162, "learning_rate": 2.584268153701386e-08, "loss": 1.5586, "step": 6933 }, { "epoch": 0.9932674401948145, "grad_norm": 1.188219666481018, "learning_rate": 2.4798684661786476e-08, "loss": 1.3446, "step": 6934 }, { "epoch": 0.9934106861481163, "grad_norm": 1.0553054809570312, "learning_rate": 2.377620997340291e-08, "loss": 1.3598, "step": 6935 }, { "epoch": 0.9935539321014182, "grad_norm": 1.1896674633026123, "learning_rate": 2.2775257691975972e-08, "loss": 1.5701, "step": 6936 }, { "epoch": 0.9936971780547199, "grad_norm": 1.1312730312347412, "learning_rate": 2.179582803297775e-08, "loss": 1.2754, "step": 6937 }, { "epoch": 0.9938404240080217, "grad_norm": 1.0238103866577148, "learning_rate": 2.0837921207272902e-08, "loss": 1.3748, "step": 6938 }, { "epoch": 0.9939836699613236, "grad_norm": 1.1552125215530396, "learning_rate": 1.990153742105205e-08, "loss": 1.4597, "step": 6939 }, { "epoch": 0.9941269159146254, "grad_norm": 1.0944242477416992, "learning_rate": 1.8986676875909494e-08, "loss": 1.4781, "step": 6940 }, { "epoch": 0.9942701618679273, "grad_norm": 1.121283769607544, "learning_rate": 1.8093339768798788e-08, "loss": 1.4078, "step": 6941 }, { "epoch": 0.994413407821229, "grad_norm": 1.1779378652572632, "learning_rate": 1.722152629201057e-08, "loss": 1.2958, "step": 6942 }, { "epoch": 0.9945566537745308, "grad_norm": 1.2198951244354248, "learning_rate": 1.637123663323914e-08, "loss": 1.4568, "step": 6943 }, { "epoch": 0.9946998997278327, "grad_norm": 1.1129685640335083, "learning_rate": 1.554247097553807e-08, "loss": 1.4461, "step": 6944 }, { "epoch": 0.9948431456811345, "grad_norm": 1.0148792266845703, "learning_rate": 1.47352294973091e-08, "loss": 1.676, "step": 6945 }, { "epoch": 0.9949863916344364, "grad_norm": 1.0224493741989136, "learning_rate": 1.3949512372335438e-08, "loss": 1.3029, "step": 6946 }, { "epoch": 0.9951296375877381, "grad_norm": 1.231289267539978, "learning_rate": 1.3185319769759564e-08, "loss": 1.3854, "step": 6947 }, { "epoch": 0.9952728835410399, "grad_norm": 1.1483474969863892, "learning_rate": 1.2442651854094322e-08, "loss": 1.49, "step": 6948 }, { "epoch": 0.9954161294943418, "grad_norm": 1.0172492265701294, "learning_rate": 1.1721508785211832e-08, "loss": 1.2821, "step": 6949 }, { "epoch": 0.9955593754476436, "grad_norm": 1.2487165927886963, "learning_rate": 1.1021890718376781e-08, "loss": 1.4607, "step": 6950 }, { "epoch": 0.9957026214009455, "grad_norm": 1.0066070556640625, "learning_rate": 1.034379780416872e-08, "loss": 1.3081, "step": 6951 }, { "epoch": 0.9958458673542473, "grad_norm": 1.0978567600250244, "learning_rate": 9.687230188593077e-09, "loss": 1.3771, "step": 6952 }, { "epoch": 0.995989113307549, "grad_norm": 1.0569521188735962, "learning_rate": 9.052188012981244e-09, "loss": 1.5151, "step": 6953 }, { "epoch": 0.9961323592608509, "grad_norm": 1.2607922554016113, "learning_rate": 8.438671414034982e-09, "loss": 1.3134, "step": 6954 }, { "epoch": 0.9962756052141527, "grad_norm": 1.0306569337844849, "learning_rate": 7.846680523837524e-09, "loss": 1.36, "step": 6955 }, { "epoch": 0.9964188511674545, "grad_norm": 0.9606354236602783, "learning_rate": 7.276215469831371e-09, "loss": 1.3915, "step": 6956 }, { "epoch": 0.9965620971207564, "grad_norm": 1.139560580253601, "learning_rate": 6.727276374818292e-09, "loss": 1.2957, "step": 6957 }, { "epoch": 0.9967053430740581, "grad_norm": 1.0927730798721313, "learning_rate": 6.1998633569704256e-09, "loss": 1.2775, "step": 6958 }, { "epoch": 0.99684858902736, "grad_norm": 1.2460246086120605, "learning_rate": 5.693976529841383e-09, "loss": 1.374, "step": 6959 }, { "epoch": 0.9969918349806618, "grad_norm": 1.1963108777999878, "learning_rate": 5.209616002310736e-09, "loss": 1.4948, "step": 6960 }, { "epoch": 0.9971350809339636, "grad_norm": 1.2114789485931396, "learning_rate": 4.746781878672835e-09, "loss": 1.4998, "step": 6961 }, { "epoch": 0.9972783268872655, "grad_norm": 1.190155267715454, "learning_rate": 4.305474258547993e-09, "loss": 1.3253, "step": 6962 }, { "epoch": 0.9974215728405672, "grad_norm": 1.1504417657852173, "learning_rate": 3.885693236949095e-09, "loss": 1.5062, "step": 6963 }, { "epoch": 0.997564818793869, "grad_norm": 1.2030202150344849, "learning_rate": 3.487438904237195e-09, "loss": 1.5094, "step": 6964 }, { "epoch": 0.9977080647471709, "grad_norm": 1.1781359910964966, "learning_rate": 3.1107113461437133e-09, "loss": 1.4648, "step": 6965 }, { "epoch": 0.9978513107004727, "grad_norm": 1.0621192455291748, "learning_rate": 2.755510643792647e-09, "loss": 1.3073, "step": 6966 }, { "epoch": 0.9979945566537746, "grad_norm": 1.0532188415527344, "learning_rate": 2.4218368736228512e-09, "loss": 1.4312, "step": 6967 }, { "epoch": 0.9981378026070763, "grad_norm": 1.0184040069580078, "learning_rate": 2.109690107465756e-09, "loss": 1.3667, "step": 6968 }, { "epoch": 0.9982810485603781, "grad_norm": 1.238198161125183, "learning_rate": 1.819070412545365e-09, "loss": 1.5545, "step": 6969 }, { "epoch": 0.99842429451368, "grad_norm": 1.0503977537155151, "learning_rate": 1.5499778514005415e-09, "loss": 1.3609, "step": 6970 }, { "epoch": 0.9985675404669818, "grad_norm": 1.1124836206436157, "learning_rate": 1.3024124819738247e-09, "loss": 1.4109, "step": 6971 }, { "epoch": 0.9987107864202837, "grad_norm": 1.3609510660171509, "learning_rate": 1.0763743575448182e-09, "loss": 1.394, "step": 6972 }, { "epoch": 0.9988540323735855, "grad_norm": 0.9927933812141418, "learning_rate": 8.718635267856989e-10, "loss": 1.4983, "step": 6973 }, { "epoch": 0.9989972783268872, "grad_norm": 1.0794765949249268, "learning_rate": 6.888800337279122e-10, "loss": 1.4374, "step": 6974 }, { "epoch": 0.9991405242801891, "grad_norm": 1.2688621282577515, "learning_rate": 5.274239177510687e-10, "loss": 1.3399, "step": 6975 }, { "epoch": 0.9992837702334909, "grad_norm": 1.036419153213501, "learning_rate": 3.874952136162513e-10, "loss": 1.3858, "step": 6976 }, { "epoch": 0.9994270161867927, "grad_norm": 1.049072027206421, "learning_rate": 2.690939514438107e-10, "loss": 1.3517, "step": 6977 }, { "epoch": 0.9995702621400946, "grad_norm": 1.1221576929092407, "learning_rate": 1.7222015673556968e-10, "loss": 1.4177, "step": 6978 }, { "epoch": 0.9997135080933963, "grad_norm": 1.1188455820083618, "learning_rate": 9.687385033041451e-11, "loss": 1.3175, "step": 6979 }, { "epoch": 0.9998567540466982, "grad_norm": 1.0498183965682983, "learning_rate": 4.305504844870356e-11, "loss": 1.3533, "step": 6980 }, { "epoch": 1.0, "grad_norm": 1.1539193391799927, "learning_rate": 1.0763762692267421e-11, "loss": 1.3093, "step": 6981 }, { "epoch": 1.0, "step": 6981, "total_flos": 3.971758886234358e+17, "train_loss": 1.5259428392626668, "train_runtime": 15192.8101, "train_samples_per_second": 14.703, "train_steps_per_second": 0.459 } ], "logging_steps": 1.0, "max_steps": 6981, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.971758886234358e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }