{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9994910941475827, "eval_steps": 500, "global_step": 491, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002035623409669211, "grad_norm": 2244783.3021161165, "learning_rate": 1.0000000000000001e-07, "loss": 13.8236, "step": 1 }, { "epoch": 0.004071246819338422, "grad_norm": 4218192.586319844, "learning_rate": 2.0000000000000002e-07, "loss": 13.7798, "step": 2 }, { "epoch": 0.0061068702290076335, "grad_norm": 4594205.112133389, "learning_rate": 3.0000000000000004e-07, "loss": 13.844, "step": 3 }, { "epoch": 0.008142493638676845, "grad_norm": 11263009.553503217, "learning_rate": 4.0000000000000003e-07, "loss": 13.8135, "step": 4 }, { "epoch": 0.010178117048346057, "grad_norm": 8938655.40470094, "learning_rate": 5.000000000000001e-07, "loss": 13.8361, "step": 5 }, { "epoch": 0.012213740458015267, "grad_norm": 3666969.2787727225, "learning_rate": 6.000000000000001e-07, "loss": 13.7754, "step": 6 }, { "epoch": 0.014249363867684479, "grad_norm": 2213140.2581639104, "learning_rate": 7.000000000000001e-07, "loss": 13.816, "step": 7 }, { "epoch": 0.01628498727735369, "grad_norm": 5204945.168357011, "learning_rate": 8.000000000000001e-07, "loss": 13.7919, "step": 8 }, { "epoch": 0.0183206106870229, "grad_norm": 3161273.4979725075, "learning_rate": 9.000000000000001e-07, "loss": 13.7921, "step": 9 }, { "epoch": 0.020356234096692113, "grad_norm": 4721972.526161844, "learning_rate": 1.0000000000000002e-06, "loss": 13.8129, "step": 10 }, { "epoch": 0.022391857506361322, "grad_norm": 1993836.4706857507, "learning_rate": 1.1e-06, "loss": 13.7899, "step": 11 }, { "epoch": 0.024427480916030534, "grad_norm": 2841663.624406631, "learning_rate": 1.2000000000000002e-06, "loss": 13.8222, "step": 12 }, { "epoch": 0.026463104325699746, "grad_norm": 1851625.2694659713, "learning_rate": 1.3e-06, "loss": 13.8051, "step": 13 }, { "epoch": 0.028498727735368958, "grad_norm": 1799567.0732118108, "learning_rate": 1.4000000000000001e-06, "loss": 13.8232, "step": 14 }, { "epoch": 0.030534351145038167, "grad_norm": 1739244.434987474, "learning_rate": 1.5e-06, "loss": 13.787, "step": 15 }, { "epoch": 0.03256997455470738, "grad_norm": 2049274.0550949178, "learning_rate": 1.6000000000000001e-06, "loss": 13.8272, "step": 16 }, { "epoch": 0.03460559796437659, "grad_norm": 2196819.6130670137, "learning_rate": 1.7000000000000002e-06, "loss": 13.7819, "step": 17 }, { "epoch": 0.0366412213740458, "grad_norm": 3122206.233837503, "learning_rate": 1.8000000000000001e-06, "loss": 13.7931, "step": 18 }, { "epoch": 0.03867684478371501, "grad_norm": 6250105.385689335, "learning_rate": 1.9000000000000002e-06, "loss": 13.8169, "step": 19 }, { "epoch": 0.04071246819338423, "grad_norm": 3333302.4953560205, "learning_rate": 2.0000000000000003e-06, "loss": 13.7969, "step": 20 }, { "epoch": 0.042748091603053436, "grad_norm": 1842864.15838755, "learning_rate": 2.1000000000000002e-06, "loss": 13.7789, "step": 21 }, { "epoch": 0.044783715012722644, "grad_norm": 2814803.976988681, "learning_rate": 2.2e-06, "loss": 13.8063, "step": 22 }, { "epoch": 0.04681933842239186, "grad_norm": 5480609.064869341, "learning_rate": 2.3000000000000004e-06, "loss": 13.8059, "step": 23 }, { "epoch": 0.04885496183206107, "grad_norm": 2330308.564996049, "learning_rate": 2.4000000000000003e-06, "loss": 13.7774, "step": 24 }, { "epoch": 0.05089058524173028, "grad_norm": 1954680.474250264, "learning_rate": 2.5e-06, "loss": 13.8188, "step": 25 }, { "epoch": 0.05292620865139949, "grad_norm": 2348178.4882796686, "learning_rate": 2.6e-06, "loss": 13.7916, "step": 26 }, { "epoch": 0.0549618320610687, "grad_norm": 2063424.030791769, "learning_rate": 2.7000000000000004e-06, "loss": 13.8352, "step": 27 }, { "epoch": 0.056997455470737916, "grad_norm": 3096470.2787967124, "learning_rate": 2.8000000000000003e-06, "loss": 13.814, "step": 28 }, { "epoch": 0.059033078880407125, "grad_norm": 2313554.624060415, "learning_rate": 2.9e-06, "loss": 13.8149, "step": 29 }, { "epoch": 0.061068702290076333, "grad_norm": 2759287.606120093, "learning_rate": 3e-06, "loss": 13.7901, "step": 30 }, { "epoch": 0.06310432569974554, "grad_norm": 20688012.381375346, "learning_rate": 3.1000000000000004e-06, "loss": 13.8016, "step": 31 }, { "epoch": 0.06513994910941476, "grad_norm": 2818955.21640328, "learning_rate": 3.2000000000000003e-06, "loss": 13.8174, "step": 32 }, { "epoch": 0.06717557251908397, "grad_norm": 3642298.4739112426, "learning_rate": 3.3000000000000006e-06, "loss": 13.8439, "step": 33 }, { "epoch": 0.06921119592875317, "grad_norm": 4530146.859043687, "learning_rate": 3.4000000000000005e-06, "loss": 13.798, "step": 34 }, { "epoch": 0.07124681933842239, "grad_norm": 3901948.6988777495, "learning_rate": 3.5e-06, "loss": 13.7871, "step": 35 }, { "epoch": 0.0732824427480916, "grad_norm": 2763745.5952281994, "learning_rate": 3.6000000000000003e-06, "loss": 13.7899, "step": 36 }, { "epoch": 0.07531806615776081, "grad_norm": 6149669.662012621, "learning_rate": 3.7e-06, "loss": 13.7854, "step": 37 }, { "epoch": 0.07735368956743002, "grad_norm": 2493897.7698470936, "learning_rate": 3.8000000000000005e-06, "loss": 13.7955, "step": 38 }, { "epoch": 0.07938931297709924, "grad_norm": 1961343.832258401, "learning_rate": 3.900000000000001e-06, "loss": 13.8098, "step": 39 }, { "epoch": 0.08142493638676845, "grad_norm": 2547368.4155277675, "learning_rate": 4.000000000000001e-06, "loss": 13.8144, "step": 40 }, { "epoch": 0.08346055979643766, "grad_norm": 12326034.534851212, "learning_rate": 4.1e-06, "loss": 13.7863, "step": 41 }, { "epoch": 0.08549618320610687, "grad_norm": 3960181.1514259093, "learning_rate": 4.2000000000000004e-06, "loss": 13.766, "step": 42 }, { "epoch": 0.08753180661577609, "grad_norm": 10011098.88152794, "learning_rate": 4.3e-06, "loss": 13.8034, "step": 43 }, { "epoch": 0.08956743002544529, "grad_norm": 3789423.3303024317, "learning_rate": 4.4e-06, "loss": 13.791, "step": 44 }, { "epoch": 0.0916030534351145, "grad_norm": 3039428.004186018, "learning_rate": 4.5e-06, "loss": 13.7903, "step": 45 }, { "epoch": 0.09363867684478372, "grad_norm": 3356582.7961585973, "learning_rate": 4.600000000000001e-06, "loss": 13.8235, "step": 46 }, { "epoch": 0.09567430025445292, "grad_norm": 2820493.914527673, "learning_rate": 4.7e-06, "loss": 13.8022, "step": 47 }, { "epoch": 0.09770992366412214, "grad_norm": 3791222.908966115, "learning_rate": 4.800000000000001e-06, "loss": 13.7717, "step": 48 }, { "epoch": 0.09974554707379135, "grad_norm": 3283270.2287080307, "learning_rate": 4.9000000000000005e-06, "loss": 13.787, "step": 49 }, { "epoch": 0.10178117048346055, "grad_norm": 2485970.4909266913, "learning_rate": 5e-06, "loss": 13.7976, "step": 50 }, { "epoch": 0.10381679389312977, "grad_norm": 4547104.520705372, "learning_rate": 5.1e-06, "loss": 13.7639, "step": 51 }, { "epoch": 0.10585241730279898, "grad_norm": 2824615.9607255333, "learning_rate": 5.2e-06, "loss": 13.7782, "step": 52 }, { "epoch": 0.1078880407124682, "grad_norm": 2095536.7289427207, "learning_rate": 5.300000000000001e-06, "loss": 13.8148, "step": 53 }, { "epoch": 0.1099236641221374, "grad_norm": 2351584.5173169677, "learning_rate": 5.400000000000001e-06, "loss": 13.7998, "step": 54 }, { "epoch": 0.11195928753180662, "grad_norm": 3228695.507820654, "learning_rate": 5.500000000000001e-06, "loss": 13.8394, "step": 55 }, { "epoch": 0.11399491094147583, "grad_norm": 9313699.37550104, "learning_rate": 5.600000000000001e-06, "loss": 13.7991, "step": 56 }, { "epoch": 0.11603053435114503, "grad_norm": 3293386.5704521113, "learning_rate": 5.7e-06, "loss": 13.7839, "step": 57 }, { "epoch": 0.11806615776081425, "grad_norm": 3718314.8927475032, "learning_rate": 5.8e-06, "loss": 13.811, "step": 58 }, { "epoch": 0.12010178117048347, "grad_norm": 3124256.4145811866, "learning_rate": 5.9e-06, "loss": 13.7796, "step": 59 }, { "epoch": 0.12213740458015267, "grad_norm": 3177179.019257336, "learning_rate": 6e-06, "loss": 13.7927, "step": 60 }, { "epoch": 0.12417302798982188, "grad_norm": 3023297.2648282214, "learning_rate": 6.1e-06, "loss": 13.7867, "step": 61 }, { "epoch": 0.12620865139949108, "grad_norm": 3214697.401730724, "learning_rate": 6.200000000000001e-06, "loss": 13.7771, "step": 62 }, { "epoch": 0.1282442748091603, "grad_norm": 5100755.95841697, "learning_rate": 6.300000000000001e-06, "loss": 13.8624, "step": 63 }, { "epoch": 0.13027989821882952, "grad_norm": 2723163.1725419424, "learning_rate": 6.4000000000000006e-06, "loss": 13.8343, "step": 64 }, { "epoch": 0.13231552162849872, "grad_norm": 3190220.582667358, "learning_rate": 6.5000000000000004e-06, "loss": 13.8278, "step": 65 }, { "epoch": 0.13435114503816795, "grad_norm": 4004364.9481327008, "learning_rate": 6.600000000000001e-06, "loss": 13.8181, "step": 66 }, { "epoch": 0.13638676844783715, "grad_norm": 3042780.596978967, "learning_rate": 6.700000000000001e-06, "loss": 13.8136, "step": 67 }, { "epoch": 0.13842239185750635, "grad_norm": 3848690.0005479343, "learning_rate": 6.800000000000001e-06, "loss": 13.8067, "step": 68 }, { "epoch": 0.14045801526717558, "grad_norm": 3081170.591628097, "learning_rate": 6.9e-06, "loss": 13.802, "step": 69 }, { "epoch": 0.14249363867684478, "grad_norm": 3465407.339307021, "learning_rate": 7e-06, "loss": 13.7681, "step": 70 }, { "epoch": 0.14452926208651398, "grad_norm": 2647645.8680279553, "learning_rate": 7.100000000000001e-06, "loss": 13.8244, "step": 71 }, { "epoch": 0.1465648854961832, "grad_norm": 2676701.9454676, "learning_rate": 7.2000000000000005e-06, "loss": 13.8011, "step": 72 }, { "epoch": 0.1486005089058524, "grad_norm": 2103570.9894912024, "learning_rate": 7.3e-06, "loss": 13.7988, "step": 73 }, { "epoch": 0.15063613231552161, "grad_norm": 2809043.7511407104, "learning_rate": 7.4e-06, "loss": 13.8103, "step": 74 }, { "epoch": 0.15267175572519084, "grad_norm": 4000235.7083044164, "learning_rate": 7.500000000000001e-06, "loss": 13.8168, "step": 75 }, { "epoch": 0.15470737913486005, "grad_norm": 8459155.529969739, "learning_rate": 7.600000000000001e-06, "loss": 13.774, "step": 76 }, { "epoch": 0.15674300254452928, "grad_norm": 16399233.534973303, "learning_rate": 7.7e-06, "loss": 13.8219, "step": 77 }, { "epoch": 0.15877862595419848, "grad_norm": 2696806.469277922, "learning_rate": 7.800000000000002e-06, "loss": 13.7919, "step": 78 }, { "epoch": 0.16081424936386768, "grad_norm": 6832348.586697958, "learning_rate": 7.9e-06, "loss": 13.8085, "step": 79 }, { "epoch": 0.1628498727735369, "grad_norm": 2802377.0728139468, "learning_rate": 8.000000000000001e-06, "loss": 13.7758, "step": 80 }, { "epoch": 0.1648854961832061, "grad_norm": 3693570.1419803873, "learning_rate": 8.1e-06, "loss": 13.8108, "step": 81 }, { "epoch": 0.1669211195928753, "grad_norm": 1899967.9199083971, "learning_rate": 8.2e-06, "loss": 13.7788, "step": 82 }, { "epoch": 0.16895674300254454, "grad_norm": 2632120.314827873, "learning_rate": 8.3e-06, "loss": 13.7979, "step": 83 }, { "epoch": 0.17099236641221374, "grad_norm": 2240799.146098359, "learning_rate": 8.400000000000001e-06, "loss": 13.7656, "step": 84 }, { "epoch": 0.17302798982188294, "grad_norm": 2881127.214705138, "learning_rate": 8.5e-06, "loss": 13.8209, "step": 85 }, { "epoch": 0.17506361323155217, "grad_norm": 4313944.882142538, "learning_rate": 8.6e-06, "loss": 13.8141, "step": 86 }, { "epoch": 0.17709923664122137, "grad_norm": 4205463.004393998, "learning_rate": 8.700000000000001e-06, "loss": 13.7967, "step": 87 }, { "epoch": 0.17913486005089058, "grad_norm": 2478941.649437644, "learning_rate": 8.8e-06, "loss": 13.7916, "step": 88 }, { "epoch": 0.1811704834605598, "grad_norm": 3528590.3048747736, "learning_rate": 8.900000000000001e-06, "loss": 13.7464, "step": 89 }, { "epoch": 0.183206106870229, "grad_norm": 2318484.135702536, "learning_rate": 9e-06, "loss": 13.8293, "step": 90 }, { "epoch": 0.1852417302798982, "grad_norm": 2158526.8137466703, "learning_rate": 9.100000000000001e-06, "loss": 13.7882, "step": 91 }, { "epoch": 0.18727735368956744, "grad_norm": 3658620.230343455, "learning_rate": 9.200000000000002e-06, "loss": 13.7981, "step": 92 }, { "epoch": 0.18931297709923664, "grad_norm": 2456882.2321177353, "learning_rate": 9.3e-06, "loss": 13.8297, "step": 93 }, { "epoch": 0.19134860050890584, "grad_norm": 2990599.371813722, "learning_rate": 9.4e-06, "loss": 13.7921, "step": 94 }, { "epoch": 0.19338422391857507, "grad_norm": 5560006.043017588, "learning_rate": 9.5e-06, "loss": 13.8165, "step": 95 }, { "epoch": 0.19541984732824427, "grad_norm": 1762528.334237519, "learning_rate": 9.600000000000001e-06, "loss": 13.794, "step": 96 }, { "epoch": 0.19745547073791347, "grad_norm": 2238736.0246347915, "learning_rate": 9.7e-06, "loss": 13.8312, "step": 97 }, { "epoch": 0.1994910941475827, "grad_norm": 1991545.6391396692, "learning_rate": 9.800000000000001e-06, "loss": 13.7997, "step": 98 }, { "epoch": 0.2015267175572519, "grad_norm": 2812661.8889751355, "learning_rate": 9.9e-06, "loss": 13.7873, "step": 99 }, { "epoch": 0.2035623409669211, "grad_norm": 2569208.410955407, "learning_rate": 1e-05, "loss": 13.8016, "step": 100 }, { "epoch": 0.20559796437659034, "grad_norm": 5078922.075783424, "learning_rate": 9.999838607294157e-06, "loss": 13.8041, "step": 101 }, { "epoch": 0.20763358778625954, "grad_norm": 2652997.6261025066, "learning_rate": 9.999354439595668e-06, "loss": 13.7995, "step": 102 }, { "epoch": 0.20966921119592874, "grad_norm": 7720533.0329790395, "learning_rate": 9.998547528160987e-06, "loss": 13.7946, "step": 103 }, { "epoch": 0.21170483460559797, "grad_norm": 4996759.547611104, "learning_rate": 9.997417925081963e-06, "loss": 13.8414, "step": 104 }, { "epoch": 0.21374045801526717, "grad_norm": 2594583.6722655715, "learning_rate": 9.995965703282472e-06, "loss": 13.8074, "step": 105 }, { "epoch": 0.2157760814249364, "grad_norm": 2448594.3176417607, "learning_rate": 9.99419095651372e-06, "loss": 13.7975, "step": 106 }, { "epoch": 0.2178117048346056, "grad_norm": 2714056.8451403086, "learning_rate": 9.992093799348182e-06, "loss": 13.8033, "step": 107 }, { "epoch": 0.2198473282442748, "grad_norm": 2961036.7491148347, "learning_rate": 9.9896743671722e-06, "loss": 13.8288, "step": 108 }, { "epoch": 0.22188295165394403, "grad_norm": 4655525.14394145, "learning_rate": 9.986932816177258e-06, "loss": 13.809, "step": 109 }, { "epoch": 0.22391857506361323, "grad_norm": 4477853.58417314, "learning_rate": 9.98386932334989e-06, "loss": 13.7901, "step": 110 }, { "epoch": 0.22595419847328244, "grad_norm": 7786381.525914281, "learning_rate": 9.980484086460258e-06, "loss": 13.7985, "step": 111 }, { "epoch": 0.22798982188295167, "grad_norm": 2476772.377738302, "learning_rate": 9.976777324049374e-06, "loss": 13.8245, "step": 112 }, { "epoch": 0.23002544529262087, "grad_norm": 2424723.3375852606, "learning_rate": 9.972749275415005e-06, "loss": 13.8106, "step": 113 }, { "epoch": 0.23206106870229007, "grad_norm": 3908900.533498587, "learning_rate": 9.96840020059622e-06, "loss": 13.8147, "step": 114 }, { "epoch": 0.2340966921119593, "grad_norm": 6536739.236436032, "learning_rate": 9.963730380356599e-06, "loss": 13.7825, "step": 115 }, { "epoch": 0.2361323155216285, "grad_norm": 2895977.217837065, "learning_rate": 9.958740116166113e-06, "loss": 13.8305, "step": 116 }, { "epoch": 0.2381679389312977, "grad_norm": 3560241.3442100273, "learning_rate": 9.953429730181653e-06, "loss": 13.8031, "step": 117 }, { "epoch": 0.24020356234096693, "grad_norm": 1955074.0369023099, "learning_rate": 9.947799565226253e-06, "loss": 13.8003, "step": 118 }, { "epoch": 0.24223918575063613, "grad_norm": 3497318.257096594, "learning_rate": 9.94184998476693e-06, "loss": 13.8214, "step": 119 }, { "epoch": 0.24427480916030533, "grad_norm": 3346632.116332331, "learning_rate": 9.93558137289124e-06, "loss": 13.7768, "step": 120 }, { "epoch": 0.24631043256997456, "grad_norm": 2439047.5049070125, "learning_rate": 9.928994134282477e-06, "loss": 13.8308, "step": 121 }, { "epoch": 0.24834605597964376, "grad_norm": 2664458.243837202, "learning_rate": 9.922088694193546e-06, "loss": 13.8132, "step": 122 }, { "epoch": 0.250381679389313, "grad_norm": 2619630.1649600505, "learning_rate": 9.91486549841951e-06, "loss": 13.8046, "step": 123 }, { "epoch": 0.25241730279898217, "grad_norm": 6268665.590956273, "learning_rate": 9.907325013268816e-06, "loss": 13.8003, "step": 124 }, { "epoch": 0.2544529262086514, "grad_norm": 3719517.4601419703, "learning_rate": 9.899467725533181e-06, "loss": 13.8048, "step": 125 }, { "epoch": 0.2564885496183206, "grad_norm": 3607937.2215440352, "learning_rate": 9.89129414245618e-06, "loss": 13.7788, "step": 126 }, { "epoch": 0.2585241730279898, "grad_norm": 4184303.763179342, "learning_rate": 9.882804791700488e-06, "loss": 13.7775, "step": 127 }, { "epoch": 0.26055979643765903, "grad_norm": 1838438.1959072966, "learning_rate": 9.87400022131382e-06, "loss": 13.8457, "step": 128 }, { "epoch": 0.26259541984732826, "grad_norm": 2268135.323406917, "learning_rate": 9.864880999693551e-06, "loss": 13.7905, "step": 129 }, { "epoch": 0.26463104325699743, "grad_norm": 6234862.720815243, "learning_rate": 9.855447715550024e-06, "loss": 13.8322, "step": 130 }, { "epoch": 0.26666666666666666, "grad_norm": 2161673.2272078265, "learning_rate": 9.845700977868536e-06, "loss": 13.7802, "step": 131 }, { "epoch": 0.2687022900763359, "grad_norm": 2040946.090751483, "learning_rate": 9.835641415870038e-06, "loss": 13.8012, "step": 132 }, { "epoch": 0.27073791348600507, "grad_norm": 1690704.8881864555, "learning_rate": 9.825269678970502e-06, "loss": 13.8144, "step": 133 }, { "epoch": 0.2727735368956743, "grad_norm": 3302151.7787588844, "learning_rate": 9.814586436738998e-06, "loss": 13.7961, "step": 134 }, { "epoch": 0.2748091603053435, "grad_norm": 2678767.7092372375, "learning_rate": 9.803592378854476e-06, "loss": 13.7924, "step": 135 }, { "epoch": 0.2768447837150127, "grad_norm": 1882847.5546937664, "learning_rate": 9.792288215061237e-06, "loss": 13.7571, "step": 136 }, { "epoch": 0.27888040712468193, "grad_norm": 2535286.1758342357, "learning_rate": 9.780674675123113e-06, "loss": 13.8159, "step": 137 }, { "epoch": 0.28091603053435116, "grad_norm": 2564638.3112862715, "learning_rate": 9.768752508776358e-06, "loss": 13.8283, "step": 138 }, { "epoch": 0.28295165394402033, "grad_norm": 4141816.881587341, "learning_rate": 9.756522485681247e-06, "loss": 13.8205, "step": 139 }, { "epoch": 0.28498727735368956, "grad_norm": 2736144.4377185158, "learning_rate": 9.743985395372387e-06, "loss": 13.8191, "step": 140 }, { "epoch": 0.2870229007633588, "grad_norm": 2915473.6501582544, "learning_rate": 9.73114204720775e-06, "loss": 13.7825, "step": 141 }, { "epoch": 0.28905852417302796, "grad_norm": 4206163.240001201, "learning_rate": 9.717993270316421e-06, "loss": 13.8325, "step": 142 }, { "epoch": 0.2910941475826972, "grad_norm": 5230479.261609765, "learning_rate": 9.704539913545073e-06, "loss": 13.8205, "step": 143 }, { "epoch": 0.2931297709923664, "grad_norm": 1844426.584030888, "learning_rate": 9.690782845403164e-06, "loss": 13.7856, "step": 144 }, { "epoch": 0.2951653944020356, "grad_norm": 2494575.837554648, "learning_rate": 9.676722954006878e-06, "loss": 13.7562, "step": 145 }, { "epoch": 0.2972010178117048, "grad_norm": 4691983.503821501, "learning_rate": 9.66236114702178e-06, "loss": 13.8174, "step": 146 }, { "epoch": 0.29923664122137406, "grad_norm": 11448273.842583835, "learning_rate": 9.647698351604227e-06, "loss": 13.811, "step": 147 }, { "epoch": 0.30127226463104323, "grad_norm": 2421950.649919781, "learning_rate": 9.632735514341508e-06, "loss": 13.798, "step": 148 }, { "epoch": 0.30330788804071246, "grad_norm": 3423539.343520856, "learning_rate": 9.617473601190743e-06, "loss": 13.7918, "step": 149 }, { "epoch": 0.3053435114503817, "grad_norm": 3264738.7097397717, "learning_rate": 9.601913597416513e-06, "loss": 13.8008, "step": 150 }, { "epoch": 0.3073791348600509, "grad_norm": 1943889.953196763, "learning_rate": 9.586056507527266e-06, "loss": 13.7854, "step": 151 }, { "epoch": 0.3094147582697201, "grad_norm": 3137121.9149822216, "learning_rate": 9.569903355210457e-06, "loss": 13.8321, "step": 152 }, { "epoch": 0.3114503816793893, "grad_norm": 2080163.2784456573, "learning_rate": 9.55345518326647e-06, "loss": 13.7755, "step": 153 }, { "epoch": 0.31348600508905855, "grad_norm": 1730160.4186251867, "learning_rate": 9.5367130535413e-06, "loss": 13.8155, "step": 154 }, { "epoch": 0.3155216284987277, "grad_norm": 4116918.3545234683, "learning_rate": 9.519678046857987e-06, "loss": 13.8182, "step": 155 }, { "epoch": 0.31755725190839695, "grad_norm": 3736775.455979484, "learning_rate": 9.502351262946865e-06, "loss": 13.8155, "step": 156 }, { "epoch": 0.3195928753180662, "grad_norm": 4078409.8308530655, "learning_rate": 9.48473382037455e-06, "loss": 13.8245, "step": 157 }, { "epoch": 0.32162849872773536, "grad_norm": 102844859.60289915, "learning_rate": 9.466826856471728e-06, "loss": 13.7664, "step": 158 }, { "epoch": 0.3236641221374046, "grad_norm": 2697351.488139713, "learning_rate": 9.448631527259749e-06, "loss": 13.824, "step": 159 }, { "epoch": 0.3256997455470738, "grad_norm": 2650130.4884775463, "learning_rate": 9.430149007375974e-06, "loss": 13.82, "step": 160 }, { "epoch": 0.327735368956743, "grad_norm": 2848577.7461152556, "learning_rate": 9.411380489997962e-06, "loss": 13.7988, "step": 161 }, { "epoch": 0.3297709923664122, "grad_norm": 2491523.6147146528, "learning_rate": 9.392327186766434e-06, "loss": 13.7801, "step": 162 }, { "epoch": 0.33180661577608145, "grad_norm": 2225342.8572617928, "learning_rate": 9.372990327707057e-06, "loss": 13.8134, "step": 163 }, { "epoch": 0.3338422391857506, "grad_norm": 3593367.5642024544, "learning_rate": 9.353371161151032e-06, "loss": 13.7934, "step": 164 }, { "epoch": 0.33587786259541985, "grad_norm": 3596895.2611031746, "learning_rate": 9.333470953654513e-06, "loss": 13.8181, "step": 165 }, { "epoch": 0.3379134860050891, "grad_norm": 1983403.4312289366, "learning_rate": 9.31329098991683e-06, "loss": 13.8016, "step": 166 }, { "epoch": 0.33994910941475825, "grad_norm": 2840359.066969552, "learning_rate": 9.292832572697566e-06, "loss": 13.7929, "step": 167 }, { "epoch": 0.3419847328244275, "grad_norm": 2440280.3409614386, "learning_rate": 9.272097022732444e-06, "loss": 13.7855, "step": 168 }, { "epoch": 0.3440203562340967, "grad_norm": 3564583.9519162746, "learning_rate": 9.251085678648072e-06, "loss": 13.7962, "step": 169 }, { "epoch": 0.3460559796437659, "grad_norm": 3017839.1906693154, "learning_rate": 9.22979989687552e-06, "loss": 13.7882, "step": 170 }, { "epoch": 0.3480916030534351, "grad_norm": 1926367.5770608934, "learning_rate": 9.208241051562753e-06, "loss": 13.829, "step": 171 }, { "epoch": 0.35012722646310435, "grad_norm": 2347553.991884138, "learning_rate": 9.186410534485924e-06, "loss": 13.7865, "step": 172 }, { "epoch": 0.3521628498727735, "grad_norm": 3954302.4427076643, "learning_rate": 9.164309754959523e-06, "loss": 13.8042, "step": 173 }, { "epoch": 0.35419847328244275, "grad_norm": 3440382.5762445726, "learning_rate": 9.14194013974539e-06, "loss": 13.8065, "step": 174 }, { "epoch": 0.356234096692112, "grad_norm": 3100692.355061626, "learning_rate": 9.11930313296062e-06, "loss": 13.8002, "step": 175 }, { "epoch": 0.35826972010178115, "grad_norm": 3335035.757879338, "learning_rate": 9.096400195984322e-06, "loss": 13.8002, "step": 176 }, { "epoch": 0.3603053435114504, "grad_norm": 2934238.222131686, "learning_rate": 9.073232807363283e-06, "loss": 13.7968, "step": 177 }, { "epoch": 0.3623409669211196, "grad_norm": 3863277.6264942884, "learning_rate": 9.049802462716521e-06, "loss": 13.8059, "step": 178 }, { "epoch": 0.3643765903307888, "grad_norm": 1979824.2277182264, "learning_rate": 9.026110674638722e-06, "loss": 13.7686, "step": 179 }, { "epoch": 0.366412213740458, "grad_norm": 3941584.712432807, "learning_rate": 9.002158972602599e-06, "loss": 13.7605, "step": 180 }, { "epoch": 0.36844783715012724, "grad_norm": 2814992.789501057, "learning_rate": 8.977948902860154e-06, "loss": 13.8302, "step": 181 }, { "epoch": 0.3704834605597964, "grad_norm": 2652186.9612782886, "learning_rate": 8.953482028342853e-06, "loss": 13.7782, "step": 182 }, { "epoch": 0.37251908396946565, "grad_norm": 3385003.9687919975, "learning_rate": 8.92875992856073e-06, "loss": 13.8049, "step": 183 }, { "epoch": 0.3745547073791349, "grad_norm": 5173853.448674343, "learning_rate": 8.903784199500412e-06, "loss": 13.7785, "step": 184 }, { "epoch": 0.37659033078880405, "grad_norm": 4211561.315304709, "learning_rate": 8.8785564535221e-06, "loss": 13.8245, "step": 185 }, { "epoch": 0.3786259541984733, "grad_norm": 2082428.2757180594, "learning_rate": 8.853078319255466e-06, "loss": 13.8218, "step": 186 }, { "epoch": 0.3806615776081425, "grad_norm": 2180497.2165646055, "learning_rate": 8.827351441494525e-06, "loss": 13.7686, "step": 187 }, { "epoch": 0.3826972010178117, "grad_norm": 2403683.8496971005, "learning_rate": 8.80137748109144e-06, "loss": 13.8504, "step": 188 }, { "epoch": 0.3847328244274809, "grad_norm": 2088585.2805628132, "learning_rate": 8.77515811484931e-06, "loss": 13.8108, "step": 189 }, { "epoch": 0.38676844783715014, "grad_norm": 2694457.991947191, "learning_rate": 8.748695035413925e-06, "loss": 13.8309, "step": 190 }, { "epoch": 0.3888040712468193, "grad_norm": 3397579.358486345, "learning_rate": 8.72198995116448e-06, "loss": 13.7726, "step": 191 }, { "epoch": 0.39083969465648855, "grad_norm": 1991789.4895514157, "learning_rate": 8.695044586103297e-06, "loss": 13.7618, "step": 192 }, { "epoch": 0.3928753180661578, "grad_norm": 5556327.041255268, "learning_rate": 8.667860679744529e-06, "loss": 13.7844, "step": 193 }, { "epoch": 0.39491094147582695, "grad_norm": 2177637.385711558, "learning_rate": 8.640439987001855e-06, "loss": 13.7805, "step": 194 }, { "epoch": 0.3969465648854962, "grad_norm": 5370184.851617085, "learning_rate": 8.612784278075195e-06, "loss": 13.7892, "step": 195 }, { "epoch": 0.3989821882951654, "grad_norm": 2473463.532725396, "learning_rate": 8.58489533833643e-06, "loss": 13.7826, "step": 196 }, { "epoch": 0.4010178117048346, "grad_norm": 3439577.606571749, "learning_rate": 8.556774968214134e-06, "loss": 13.8133, "step": 197 }, { "epoch": 0.4030534351145038, "grad_norm": 2463771.55347351, "learning_rate": 8.52842498307736e-06, "loss": 13.8139, "step": 198 }, { "epoch": 0.40508905852417304, "grad_norm": 2610726.9405806856, "learning_rate": 8.499847213118431e-06, "loss": 13.7792, "step": 199 }, { "epoch": 0.4071246819338422, "grad_norm": 5126921.560500939, "learning_rate": 8.471043503234796e-06, "loss": 13.7862, "step": 200 }, { "epoch": 0.40916030534351144, "grad_norm": 2337790.258284094, "learning_rate": 8.442015712909926e-06, "loss": 13.8018, "step": 201 }, { "epoch": 0.4111959287531807, "grad_norm": 2269326.88734255, "learning_rate": 8.412765716093273e-06, "loss": 13.8081, "step": 202 }, { "epoch": 0.41323155216284985, "grad_norm": 2482439.54650581, "learning_rate": 8.383295401079284e-06, "loss": 13.8155, "step": 203 }, { "epoch": 0.4152671755725191, "grad_norm": 2331248.12998744, "learning_rate": 8.353606670385514e-06, "loss": 13.7731, "step": 204 }, { "epoch": 0.4173027989821883, "grad_norm": 2419939.8754564477, "learning_rate": 8.3237014406298e-06, "loss": 13.8313, "step": 205 }, { "epoch": 0.4193384223918575, "grad_norm": 2294086.8182832007, "learning_rate": 8.293581642406517e-06, "loss": 13.836, "step": 206 }, { "epoch": 0.4213740458015267, "grad_norm": 3514387.9792135926, "learning_rate": 8.263249220161957e-06, "loss": 13.8349, "step": 207 }, { "epoch": 0.42340966921119594, "grad_norm": 2691961.5452496265, "learning_rate": 8.232706132068806e-06, "loss": 13.8321, "step": 208 }, { "epoch": 0.42544529262086517, "grad_norm": 2645633.6227109493, "learning_rate": 8.201954349899712e-06, "loss": 13.8001, "step": 209 }, { "epoch": 0.42748091603053434, "grad_norm": 3335230.860278415, "learning_rate": 8.17099585890001e-06, "loss": 13.8217, "step": 210 }, { "epoch": 0.42951653944020357, "grad_norm": 2566670.9506267244, "learning_rate": 8.139832657659557e-06, "loss": 13.7714, "step": 211 }, { "epoch": 0.4315521628498728, "grad_norm": 2370635.7525829547, "learning_rate": 8.108466757983695e-06, "loss": 13.7885, "step": 212 }, { "epoch": 0.433587786259542, "grad_norm": 4441547.469959615, "learning_rate": 8.076900184763394e-06, "loss": 13.7823, "step": 213 }, { "epoch": 0.4356234096692112, "grad_norm": 24430772.164419036, "learning_rate": 8.04513497584452e-06, "loss": 13.846, "step": 214 }, { "epoch": 0.43765903307888043, "grad_norm": 1935048.054024762, "learning_rate": 8.013173181896283e-06, "loss": 13.7878, "step": 215 }, { "epoch": 0.4396946564885496, "grad_norm": 1873118.933736968, "learning_rate": 7.981016866278843e-06, "loss": 13.7934, "step": 216 }, { "epoch": 0.44173027989821884, "grad_norm": 2169804.42270086, "learning_rate": 7.94866810491012e-06, "loss": 13.7965, "step": 217 }, { "epoch": 0.44376590330788807, "grad_norm": 3286778.9186343704, "learning_rate": 7.916128986131761e-06, "loss": 13.8332, "step": 218 }, { "epoch": 0.44580152671755724, "grad_norm": 2593694.969662653, "learning_rate": 7.883401610574338e-06, "loss": 13.7957, "step": 219 }, { "epoch": 0.44783715012722647, "grad_norm": 4975950.65536953, "learning_rate": 7.850488091021726e-06, "loss": 13.8212, "step": 220 }, { "epoch": 0.4498727735368957, "grad_norm": 2795061.731221561, "learning_rate": 7.817390552274721e-06, "loss": 13.7835, "step": 221 }, { "epoch": 0.45190839694656487, "grad_norm": 2386068.803973628, "learning_rate": 7.784111131013858e-06, "loss": 13.8274, "step": 222 }, { "epoch": 0.4539440203562341, "grad_norm": 4657398.876885557, "learning_rate": 7.750651975661471e-06, "loss": 13.7771, "step": 223 }, { "epoch": 0.45597964376590333, "grad_norm": 3655891.188475978, "learning_rate": 7.717015246243012e-06, "loss": 13.8151, "step": 224 }, { "epoch": 0.4580152671755725, "grad_norm": 3667526.462351342, "learning_rate": 7.683203114247587e-06, "loss": 13.7915, "step": 225 }, { "epoch": 0.46005089058524173, "grad_norm": 3809298.1813372867, "learning_rate": 7.649217762487786e-06, "loss": 13.8205, "step": 226 }, { "epoch": 0.46208651399491096, "grad_norm": 3288055.728671065, "learning_rate": 7.615061384958764e-06, "loss": 13.8062, "step": 227 }, { "epoch": 0.46412213740458014, "grad_norm": 5662460.544065125, "learning_rate": 7.580736186696593e-06, "loss": 13.8049, "step": 228 }, { "epoch": 0.46615776081424937, "grad_norm": 4421743.27796514, "learning_rate": 7.546244383635929e-06, "loss": 13.7686, "step": 229 }, { "epoch": 0.4681933842239186, "grad_norm": 2313331.4271743125, "learning_rate": 7.5115882024669375e-06, "loss": 13.8003, "step": 230 }, { "epoch": 0.47022900763358777, "grad_norm": 6343328.905525712, "learning_rate": 7.476769880491561e-06, "loss": 13.7806, "step": 231 }, { "epoch": 0.472264631043257, "grad_norm": 2640558.638662984, "learning_rate": 7.44179166547908e-06, "loss": 13.7999, "step": 232 }, { "epoch": 0.47430025445292623, "grad_norm": 4310404.449472289, "learning_rate": 7.406655815520998e-06, "loss": 13.8118, "step": 233 }, { "epoch": 0.4763358778625954, "grad_norm": 4518592.431753858, "learning_rate": 7.371364598885276e-06, "loss": 13.7864, "step": 234 }, { "epoch": 0.47837150127226463, "grad_norm": 2098819.871080459, "learning_rate": 7.335920293869891e-06, "loss": 13.8016, "step": 235 }, { "epoch": 0.48040712468193386, "grad_norm": 2463516.9812008953, "learning_rate": 7.300325188655762e-06, "loss": 13.7913, "step": 236 }, { "epoch": 0.48244274809160304, "grad_norm": 3102860.0228757737, "learning_rate": 7.264581581159024e-06, "loss": 13.7706, "step": 237 }, { "epoch": 0.48447837150127226, "grad_norm": 1914868.4483156833, "learning_rate": 7.2286917788826926e-06, "loss": 13.8093, "step": 238 }, { "epoch": 0.4865139949109415, "grad_norm": 2539581.4122914425, "learning_rate": 7.192658098767686e-06, "loss": 13.7854, "step": 239 }, { "epoch": 0.48854961832061067, "grad_norm": 3707976.67600207, "learning_rate": 7.1564828670432595e-06, "loss": 13.8176, "step": 240 }, { "epoch": 0.4905852417302799, "grad_norm": 3593935.7875790736, "learning_rate": 7.120168419076825e-06, "loss": 13.767, "step": 241 }, { "epoch": 0.4926208651399491, "grad_norm": 19371415.336839173, "learning_rate": 7.083717099223192e-06, "loss": 13.7774, "step": 242 }, { "epoch": 0.4946564885496183, "grad_norm": 2053853.5565662459, "learning_rate": 7.047131260673214e-06, "loss": 13.8225, "step": 243 }, { "epoch": 0.49669211195928753, "grad_norm": 4077586.3030170733, "learning_rate": 7.010413265301888e-06, "loss": 13.7911, "step": 244 }, { "epoch": 0.49872773536895676, "grad_norm": 2640760.978026206, "learning_rate": 6.97356548351586e-06, "loss": 13.8096, "step": 245 }, { "epoch": 0.500763358778626, "grad_norm": 3012267.667522946, "learning_rate": 6.936590294100414e-06, "loss": 13.7508, "step": 246 }, { "epoch": 0.5027989821882952, "grad_norm": 2736053.147326933, "learning_rate": 6.899490084065897e-06, "loss": 13.8161, "step": 247 }, { "epoch": 0.5048346055979643, "grad_norm": 3879858.467910511, "learning_rate": 6.862267248493624e-06, "loss": 13.8145, "step": 248 }, { "epoch": 0.5068702290076336, "grad_norm": 2410669.072572543, "learning_rate": 6.824924190381257e-06, "loss": 13.7883, "step": 249 }, { "epoch": 0.5089058524173028, "grad_norm": 2792170.4070734098, "learning_rate": 6.7874633204876705e-06, "loss": 13.83, "step": 250 }, { "epoch": 0.510941475826972, "grad_norm": 3054954.4878360187, "learning_rate": 6.7498870571773275e-06, "loss": 13.8138, "step": 251 }, { "epoch": 0.5129770992366413, "grad_norm": 13118018.692801312, "learning_rate": 6.712197826264154e-06, "loss": 13.7877, "step": 252 }, { "epoch": 0.5150127226463105, "grad_norm": 2274862.944795723, "learning_rate": 6.674398060854931e-06, "loss": 13.788, "step": 253 }, { "epoch": 0.5170483460559796, "grad_norm": 6493219.085795618, "learning_rate": 6.636490201192229e-06, "loss": 13.8056, "step": 254 }, { "epoch": 0.5190839694656488, "grad_norm": 2210871.447412882, "learning_rate": 6.5984766944968636e-06, "loss": 13.7964, "step": 255 }, { "epoch": 0.5211195928753181, "grad_norm": 2535457.1929967045, "learning_rate": 6.560359994809916e-06, "loss": 13.8025, "step": 256 }, { "epoch": 0.5231552162849873, "grad_norm": 3487433.3872100264, "learning_rate": 6.522142562834307e-06, "loss": 13.7752, "step": 257 }, { "epoch": 0.5251908396946565, "grad_norm": 2350043.7437156746, "learning_rate": 6.483826865775941e-06, "loss": 13.7891, "step": 258 }, { "epoch": 0.5272264631043257, "grad_norm": 3776676.8394483137, "learning_rate": 6.445415377184427e-06, "loss": 13.8172, "step": 259 }, { "epoch": 0.5292620865139949, "grad_norm": 3888987.2901650295, "learning_rate": 6.4069105767933944e-06, "loss": 13.7623, "step": 260 }, { "epoch": 0.5312977099236641, "grad_norm": 3340457.7579122144, "learning_rate": 6.368314950360416e-06, "loss": 13.8065, "step": 261 }, { "epoch": 0.5333333333333333, "grad_norm": 5982292.313514162, "learning_rate": 6.3296309895065215e-06, "loss": 13.7812, "step": 262 }, { "epoch": 0.5353689567430026, "grad_norm": 3219916.0058936477, "learning_rate": 6.290861191555359e-06, "loss": 13.745, "step": 263 }, { "epoch": 0.5374045801526718, "grad_norm": 11720301.310935492, "learning_rate": 6.252008059371968e-06, "loss": 13.8253, "step": 264 }, { "epoch": 0.539440203562341, "grad_norm": 3118271.2300748057, "learning_rate": 6.213074101201202e-06, "loss": 13.7763, "step": 265 }, { "epoch": 0.5414758269720101, "grad_norm": 1697945.554145853, "learning_rate": 6.174061830505801e-06, "loss": 13.7883, "step": 266 }, { "epoch": 0.5435114503816794, "grad_norm": 2857758.0379691618, "learning_rate": 6.1349737658041385e-06, "loss": 13.7939, "step": 267 }, { "epoch": 0.5455470737913486, "grad_norm": 1961975.7441584785, "learning_rate": 6.095812430507627e-06, "loss": 13.7989, "step": 268 }, { "epoch": 0.5475826972010178, "grad_norm": 4193880.83293653, "learning_rate": 6.056580352757813e-06, "loss": 13.8241, "step": 269 }, { "epoch": 0.549618320610687, "grad_norm": 3312744.2202664735, "learning_rate": 6.0172800652631706e-06, "loss": 13.7733, "step": 270 }, { "epoch": 0.5516539440203563, "grad_norm": 7858171.4628072195, "learning_rate": 5.977914105135594e-06, "loss": 13.8072, "step": 271 }, { "epoch": 0.5536895674300254, "grad_norm": 3941663.415453141, "learning_rate": 5.938485013726612e-06, "loss": 13.8103, "step": 272 }, { "epoch": 0.5557251908396946, "grad_norm": 3610433.10118229, "learning_rate": 5.898995336463326e-06, "loss": 13.807, "step": 273 }, { "epoch": 0.5577608142493639, "grad_norm": 4164601.1328435126, "learning_rate": 5.859447622684084e-06, "loss": 13.815, "step": 274 }, { "epoch": 0.5597964376590331, "grad_norm": 2548966.8285937863, "learning_rate": 5.819844425473899e-06, "loss": 13.7778, "step": 275 }, { "epoch": 0.5618320610687023, "grad_norm": 4078882.101176789, "learning_rate": 5.780188301499636e-06, "loss": 13.8193, "step": 276 }, { "epoch": 0.5638676844783715, "grad_norm": 2517308.8894206337, "learning_rate": 5.740481810844952e-06, "loss": 13.8063, "step": 277 }, { "epoch": 0.5659033078880407, "grad_norm": 2769966.422515691, "learning_rate": 5.700727516845038e-06, "loss": 13.8094, "step": 278 }, { "epoch": 0.5679389312977099, "grad_norm": 3505593.62357941, "learning_rate": 5.660927985921122e-06, "loss": 13.8098, "step": 279 }, { "epoch": 0.5699745547073791, "grad_norm": 2349437.1104983217, "learning_rate": 5.621085787414799e-06, "loss": 13.799, "step": 280 }, { "epoch": 0.5720101781170484, "grad_norm": 3132576.359530331, "learning_rate": 5.581203493422161e-06, "loss": 13.7891, "step": 281 }, { "epoch": 0.5740458015267176, "grad_norm": 4404000.913374095, "learning_rate": 5.541283678627742e-06, "loss": 13.7851, "step": 282 }, { "epoch": 0.5760814249363868, "grad_norm": 3276477.3200832414, "learning_rate": 5.501328920138314e-06, "loss": 13.8194, "step": 283 }, { "epoch": 0.5781170483460559, "grad_norm": 3546838.8556743674, "learning_rate": 5.46134179731651e-06, "loss": 13.8107, "step": 284 }, { "epoch": 0.5801526717557252, "grad_norm": 2115264.4674158324, "learning_rate": 5.421324891614312e-06, "loss": 13.7984, "step": 285 }, { "epoch": 0.5821882951653944, "grad_norm": 2235578.204459906, "learning_rate": 5.3812807864063946e-06, "loss": 13.8009, "step": 286 }, { "epoch": 0.5842239185750636, "grad_norm": 4052228.371342962, "learning_rate": 5.341212066823356e-06, "loss": 13.7885, "step": 287 }, { "epoch": 0.5862595419847328, "grad_norm": 2755225.8421591343, "learning_rate": 5.3011213195848245e-06, "loss": 13.7845, "step": 288 }, { "epoch": 0.5882951653944021, "grad_norm": 3206915.4484688323, "learning_rate": 5.26101113283247e-06, "loss": 13.8352, "step": 289 }, { "epoch": 0.5903307888040712, "grad_norm": 8291421.864476618, "learning_rate": 5.220884095962924e-06, "loss": 13.7882, "step": 290 }, { "epoch": 0.5923664122137404, "grad_norm": 4520092.054349328, "learning_rate": 5.1807427994606065e-06, "loss": 13.7911, "step": 291 }, { "epoch": 0.5944020356234097, "grad_norm": 1988928.2728027685, "learning_rate": 5.140589834730503e-06, "loss": 13.8379, "step": 292 }, { "epoch": 0.5964376590330789, "grad_norm": 4465132.927934143, "learning_rate": 5.100427793930862e-06, "loss": 13.7948, "step": 293 }, { "epoch": 0.5984732824427481, "grad_norm": 2286780.168218436, "learning_rate": 5.06025926980586e-06, "loss": 13.8048, "step": 294 }, { "epoch": 0.6005089058524173, "grad_norm": 2724184.449634601, "learning_rate": 5.0200868555182155e-06, "loss": 13.7999, "step": 295 }, { "epoch": 0.6025445292620865, "grad_norm": 2576691.827404387, "learning_rate": 4.979913144481785e-06, "loss": 13.8064, "step": 296 }, { "epoch": 0.6045801526717557, "grad_norm": 2744547.316751667, "learning_rate": 4.939740730194141e-06, "loss": 13.7882, "step": 297 }, { "epoch": 0.6066157760814249, "grad_norm": 2335607.7276287796, "learning_rate": 4.899572206069138e-06, "loss": 13.8027, "step": 298 }, { "epoch": 0.6086513994910941, "grad_norm": 2599599.7384826983, "learning_rate": 4.8594101652694996e-06, "loss": 13.8156, "step": 299 }, { "epoch": 0.6106870229007634, "grad_norm": 3536926.667490122, "learning_rate": 4.819257200539394e-06, "loss": 13.7784, "step": 300 }, { "epoch": 0.6127226463104326, "grad_norm": 3302834.606287667, "learning_rate": 4.779115904037079e-06, "loss": 13.8263, "step": 301 }, { "epoch": 0.6147582697201018, "grad_norm": 3042014.762667294, "learning_rate": 4.738988867167531e-06, "loss": 13.7901, "step": 302 }, { "epoch": 0.616793893129771, "grad_norm": 2828614.249275787, "learning_rate": 4.698878680415176e-06, "loss": 13.8186, "step": 303 }, { "epoch": 0.6188295165394402, "grad_norm": 3470485.7180142673, "learning_rate": 4.6587879331766465e-06, "loss": 13.8252, "step": 304 }, { "epoch": 0.6208651399491094, "grad_norm": 2142989.454887267, "learning_rate": 4.618719213593605e-06, "loss": 13.8266, "step": 305 }, { "epoch": 0.6229007633587786, "grad_norm": 3599418.075481026, "learning_rate": 4.5786751083856895e-06, "loss": 13.7994, "step": 306 }, { "epoch": 0.6249363867684479, "grad_norm": 2468894.215654323, "learning_rate": 4.53865820268349e-06, "loss": 13.7975, "step": 307 }, { "epoch": 0.6269720101781171, "grad_norm": 3514891.7530831015, "learning_rate": 4.498671079861686e-06, "loss": 13.8089, "step": 308 }, { "epoch": 0.6290076335877862, "grad_norm": 2860617.0108131613, "learning_rate": 4.4587163213722595e-06, "loss": 13.8118, "step": 309 }, { "epoch": 0.6310432569974554, "grad_norm": 3631959.0225475803, "learning_rate": 4.41879650657784e-06, "loss": 13.7789, "step": 310 }, { "epoch": 0.6330788804071247, "grad_norm": 16505260.927290315, "learning_rate": 4.3789142125852015e-06, "loss": 13.7988, "step": 311 }, { "epoch": 0.6351145038167939, "grad_norm": 2464199.3044430655, "learning_rate": 4.339072014078879e-06, "loss": 13.8382, "step": 312 }, { "epoch": 0.6371501272264631, "grad_norm": 2003547.7230704648, "learning_rate": 4.299272483154963e-06, "loss": 13.8229, "step": 313 }, { "epoch": 0.6391857506361324, "grad_norm": 2815342.948838401, "learning_rate": 4.259518189155049e-06, "loss": 13.8061, "step": 314 }, { "epoch": 0.6412213740458015, "grad_norm": 3015740.318214435, "learning_rate": 4.219811698500365e-06, "loss": 13.8036, "step": 315 }, { "epoch": 0.6432569974554707, "grad_norm": 3876924.4779627738, "learning_rate": 4.1801555745261025e-06, "loss": 13.7914, "step": 316 }, { "epoch": 0.6452926208651399, "grad_norm": 3172609.6206479096, "learning_rate": 4.140552377315918e-06, "loss": 13.8449, "step": 317 }, { "epoch": 0.6473282442748092, "grad_norm": 4902114.509930776, "learning_rate": 4.101004663536675e-06, "loss": 13.7976, "step": 318 }, { "epoch": 0.6493638676844784, "grad_norm": 2691295.2268303274, "learning_rate": 4.061514986273391e-06, "loss": 13.8136, "step": 319 }, { "epoch": 0.6513994910941476, "grad_norm": 3460007.077345259, "learning_rate": 4.022085894864408e-06, "loss": 13.7403, "step": 320 }, { "epoch": 0.6534351145038167, "grad_norm": 2367194.347019021, "learning_rate": 3.982719934736832e-06, "loss": 13.7971, "step": 321 }, { "epoch": 0.655470737913486, "grad_norm": 15501881.45680894, "learning_rate": 3.943419647242189e-06, "loss": 13.8015, "step": 322 }, { "epoch": 0.6575063613231552, "grad_norm": 2840321.3616006514, "learning_rate": 3.904187569492373e-06, "loss": 13.8104, "step": 323 }, { "epoch": 0.6595419847328244, "grad_norm": 2030054.0765901625, "learning_rate": 3.865026234195863e-06, "loss": 13.7948, "step": 324 }, { "epoch": 0.6615776081424937, "grad_norm": 2852479.1215876606, "learning_rate": 3.8259381694942e-06, "loss": 13.7901, "step": 325 }, { "epoch": 0.6636132315521629, "grad_norm": 3456741.004275556, "learning_rate": 3.786925898798801e-06, "loss": 13.7857, "step": 326 }, { "epoch": 0.665648854961832, "grad_norm": 2066561.8194525177, "learning_rate": 3.7479919406280334e-06, "loss": 13.8063, "step": 327 }, { "epoch": 0.6676844783715012, "grad_norm": 3433587.7234547967, "learning_rate": 3.709138808444641e-06, "loss": 13.8119, "step": 328 }, { "epoch": 0.6697201017811705, "grad_norm": 3578864.1748913922, "learning_rate": 3.6703690104934806e-06, "loss": 13.7908, "step": 329 }, { "epoch": 0.6717557251908397, "grad_norm": 4610701.366359627, "learning_rate": 3.6316850496395863e-06, "loss": 13.7847, "step": 330 }, { "epoch": 0.6737913486005089, "grad_norm": 2286947.6993929525, "learning_rate": 3.5930894232066072e-06, "loss": 13.8097, "step": 331 }, { "epoch": 0.6758269720101782, "grad_norm": 2505249.1545348107, "learning_rate": 3.5545846228155743e-06, "loss": 13.7801, "step": 332 }, { "epoch": 0.6778625954198473, "grad_norm": 5630898.8033584, "learning_rate": 3.516173134224059e-06, "loss": 13.8135, "step": 333 }, { "epoch": 0.6798982188295165, "grad_norm": 5219141.786739386, "learning_rate": 3.477857437165694e-06, "loss": 13.8255, "step": 334 }, { "epoch": 0.6819338422391857, "grad_norm": 2124438.8087317753, "learning_rate": 3.4396400051900846e-06, "loss": 13.7879, "step": 335 }, { "epoch": 0.683969465648855, "grad_norm": 2105149.4046267755, "learning_rate": 3.401523305503139e-06, "loss": 13.808, "step": 336 }, { "epoch": 0.6860050890585242, "grad_norm": 2809609.693441461, "learning_rate": 3.3635097988077724e-06, "loss": 13.8112, "step": 337 }, { "epoch": 0.6880407124681934, "grad_norm": 2457213.1931723696, "learning_rate": 3.3256019391450696e-06, "loss": 13.8489, "step": 338 }, { "epoch": 0.6900763358778625, "grad_norm": 12558459.530425403, "learning_rate": 3.287802173735848e-06, "loss": 13.8277, "step": 339 }, { "epoch": 0.6921119592875318, "grad_norm": 6967431.723878883, "learning_rate": 3.250112942822673e-06, "loss": 13.8185, "step": 340 }, { "epoch": 0.694147582697201, "grad_norm": 3411907.5422053095, "learning_rate": 3.212536679512332e-06, "loss": 13.787, "step": 341 }, { "epoch": 0.6961832061068702, "grad_norm": 2429502.145664427, "learning_rate": 3.1750758096187446e-06, "loss": 13.8105, "step": 342 }, { "epoch": 0.6982188295165395, "grad_norm": 2327547.2445986923, "learning_rate": 3.137732751506376e-06, "loss": 13.7772, "step": 343 }, { "epoch": 0.7002544529262087, "grad_norm": 3569398.8894632743, "learning_rate": 3.1005099159341044e-06, "loss": 13.7938, "step": 344 }, { "epoch": 0.7022900763358778, "grad_norm": 3211149.457873932, "learning_rate": 3.0634097058995877e-06, "loss": 13.7915, "step": 345 }, { "epoch": 0.704325699745547, "grad_norm": 3830655.2384712566, "learning_rate": 3.0264345164841426e-06, "loss": 13.8191, "step": 346 }, { "epoch": 0.7063613231552163, "grad_norm": 3823712.125917996, "learning_rate": 2.989586734698113e-06, "loss": 13.8023, "step": 347 }, { "epoch": 0.7083969465648855, "grad_norm": 32196514.721627507, "learning_rate": 2.9528687393267865e-06, "loss": 13.7764, "step": 348 }, { "epoch": 0.7104325699745547, "grad_norm": 1882821.5554029653, "learning_rate": 2.9162829007768103e-06, "loss": 13.8351, "step": 349 }, { "epoch": 0.712468193384224, "grad_norm": 3350119.682457626, "learning_rate": 2.879831580923176e-06, "loss": 13.7957, "step": 350 }, { "epoch": 0.7145038167938931, "grad_norm": 3937737.379258324, "learning_rate": 2.843517132956742e-06, "loss": 13.8002, "step": 351 }, { "epoch": 0.7165394402035623, "grad_norm": 3146762.068649268, "learning_rate": 2.8073419012323154e-06, "loss": 13.7632, "step": 352 }, { "epoch": 0.7185750636132315, "grad_norm": 2767069.333419986, "learning_rate": 2.771308221117309e-06, "loss": 13.7933, "step": 353 }, { "epoch": 0.7206106870229008, "grad_norm": 4373321.192999916, "learning_rate": 2.7354184188409773e-06, "loss": 13.798, "step": 354 }, { "epoch": 0.72264631043257, "grad_norm": 3450348.839131381, "learning_rate": 2.6996748113442397e-06, "loss": 13.8177, "step": 355 }, { "epoch": 0.7246819338422392, "grad_norm": 5591120.72637471, "learning_rate": 2.66407970613011e-06, "loss": 13.7984, "step": 356 }, { "epoch": 0.7267175572519083, "grad_norm": 1703861.5304970315, "learning_rate": 2.6286354011147252e-06, "loss": 13.8147, "step": 357 }, { "epoch": 0.7287531806615776, "grad_norm": 3465138.9565325286, "learning_rate": 2.593344184479003e-06, "loss": 13.8088, "step": 358 }, { "epoch": 0.7307888040712468, "grad_norm": 4545416.100109938, "learning_rate": 2.5582083345209217e-06, "loss": 13.8249, "step": 359 }, { "epoch": 0.732824427480916, "grad_norm": 3516332.5584949586, "learning_rate": 2.5232301195084395e-06, "loss": 13.8055, "step": 360 }, { "epoch": 0.7348600508905853, "grad_norm": 2312331.751163826, "learning_rate": 2.488411797533064e-06, "loss": 13.8223, "step": 361 }, { "epoch": 0.7368956743002545, "grad_norm": 2685597.665166646, "learning_rate": 2.4537556163640726e-06, "loss": 13.8293, "step": 362 }, { "epoch": 0.7389312977099237, "grad_norm": 2522967.966999191, "learning_rate": 2.4192638133034074e-06, "loss": 13.7902, "step": 363 }, { "epoch": 0.7409669211195928, "grad_norm": 2195261.840643841, "learning_rate": 2.384938615041238e-06, "loss": 13.782, "step": 364 }, { "epoch": 0.7430025445292621, "grad_norm": 6598560.135727499, "learning_rate": 2.350782237512215e-06, "loss": 13.8163, "step": 365 }, { "epoch": 0.7450381679389313, "grad_norm": 2786370.46056762, "learning_rate": 2.316796885752415e-06, "loss": 13.7821, "step": 366 }, { "epoch": 0.7470737913486005, "grad_norm": 2943271.811966078, "learning_rate": 2.2829847537569904e-06, "loss": 13.7909, "step": 367 }, { "epoch": 0.7491094147582698, "grad_norm": 2462160.442139116, "learning_rate": 2.2493480243385298e-06, "loss": 13.814, "step": 368 }, { "epoch": 0.751145038167939, "grad_norm": 2225343.862719593, "learning_rate": 2.2158888689861434e-06, "loss": 13.8084, "step": 369 }, { "epoch": 0.7531806615776081, "grad_norm": 2960178.644234803, "learning_rate": 2.182609447725279e-06, "loss": 13.8266, "step": 370 }, { "epoch": 0.7552162849872773, "grad_norm": 2914387.1052595368, "learning_rate": 2.149511908978275e-06, "loss": 13.8007, "step": 371 }, { "epoch": 0.7572519083969466, "grad_norm": 3028649.837352563, "learning_rate": 2.1165983894256647e-06, "loss": 13.7907, "step": 372 }, { "epoch": 0.7592875318066158, "grad_norm": 2648884.5001803297, "learning_rate": 2.0838710138682412e-06, "loss": 13.8052, "step": 373 }, { "epoch": 0.761323155216285, "grad_norm": 3498649.6090777554, "learning_rate": 2.051331895089882e-06, "loss": 13.8098, "step": 374 }, { "epoch": 0.7633587786259542, "grad_norm": 23098352.355775494, "learning_rate": 2.0189831337211573e-06, "loss": 13.8393, "step": 375 }, { "epoch": 0.7653944020356234, "grad_norm": 2461084.0552417845, "learning_rate": 1.9868268181037186e-06, "loss": 13.7867, "step": 376 }, { "epoch": 0.7674300254452926, "grad_norm": 3735300.3296117457, "learning_rate": 1.9548650241554812e-06, "loss": 13.804, "step": 377 }, { "epoch": 0.7694656488549618, "grad_norm": 3008756.34059785, "learning_rate": 1.923099815236608e-06, "loss": 13.7861, "step": 378 }, { "epoch": 0.771501272264631, "grad_norm": 7710036.601809266, "learning_rate": 1.8915332420163074e-06, "loss": 13.7991, "step": 379 }, { "epoch": 0.7735368956743003, "grad_norm": 3540790.6907704584, "learning_rate": 1.8601673423404449e-06, "loss": 13.7907, "step": 380 }, { "epoch": 0.7755725190839695, "grad_norm": 2333150.5964701963, "learning_rate": 1.8290041410999893e-06, "loss": 13.8226, "step": 381 }, { "epoch": 0.7776081424936386, "grad_norm": 3965290.2964736833, "learning_rate": 1.798045650100289e-06, "loss": 13.7884, "step": 382 }, { "epoch": 0.7796437659033079, "grad_norm": 2968777.429909849, "learning_rate": 1.7672938679311957e-06, "loss": 13.7859, "step": 383 }, { "epoch": 0.7816793893129771, "grad_norm": 5317126.450461407, "learning_rate": 1.736750779838044e-06, "loss": 13.8526, "step": 384 }, { "epoch": 0.7837150127226463, "grad_norm": 3055487.063269149, "learning_rate": 1.7064183575934856e-06, "loss": 13.8009, "step": 385 }, { "epoch": 0.7857506361323155, "grad_norm": 16905669.825724535, "learning_rate": 1.676298559370202e-06, "loss": 13.7807, "step": 386 }, { "epoch": 0.7877862595419848, "grad_norm": 5137867.698262086, "learning_rate": 1.6463933296144863e-06, "loss": 13.8198, "step": 387 }, { "epoch": 0.7898218829516539, "grad_norm": 3398913.1351794973, "learning_rate": 1.6167045989207185e-06, "loss": 13.7712, "step": 388 }, { "epoch": 0.7918575063613231, "grad_norm": 3925212.2891858686, "learning_rate": 1.5872342839067305e-06, "loss": 13.807, "step": 389 }, { "epoch": 0.7938931297709924, "grad_norm": 2924712.3948877575, "learning_rate": 1.5579842870900746e-06, "loss": 13.8241, "step": 390 }, { "epoch": 0.7959287531806616, "grad_norm": 5390343.102311131, "learning_rate": 1.5289564967652033e-06, "loss": 13.8485, "step": 391 }, { "epoch": 0.7979643765903308, "grad_norm": 3074687.848582143, "learning_rate": 1.5001527868815702e-06, "loss": 13.8125, "step": 392 }, { "epoch": 0.8, "grad_norm": 3184498.416088956, "learning_rate": 1.4715750169226417e-06, "loss": 13.766, "step": 393 }, { "epoch": 0.8020356234096692, "grad_norm": 2441964.563817395, "learning_rate": 1.4432250317858675e-06, "loss": 13.7805, "step": 394 }, { "epoch": 0.8040712468193384, "grad_norm": 2091387.9994893048, "learning_rate": 1.4151046616635727e-06, "loss": 13.7952, "step": 395 }, { "epoch": 0.8061068702290076, "grad_norm": 3508560.4814663967, "learning_rate": 1.3872157219248045e-06, "loss": 13.7889, "step": 396 }, { "epoch": 0.8081424936386769, "grad_norm": 3962771.0203186534, "learning_rate": 1.3595600129981469e-06, "loss": 13.7979, "step": 397 }, { "epoch": 0.8101781170483461, "grad_norm": 2731620.334807845, "learning_rate": 1.3321393202554739e-06, "loss": 13.7976, "step": 398 }, { "epoch": 0.8122137404580153, "grad_norm": 3197496.0139092538, "learning_rate": 1.3049554138967052e-06, "loss": 13.7901, "step": 399 }, { "epoch": 0.8142493638676844, "grad_norm": 7014943.992062835, "learning_rate": 1.278010048835523e-06, "loss": 13.7998, "step": 400 }, { "epoch": 0.8162849872773537, "grad_norm": 3316247.1212317836, "learning_rate": 1.2513049645860759e-06, "loss": 13.765, "step": 401 }, { "epoch": 0.8183206106870229, "grad_norm": 2783094.2613453907, "learning_rate": 1.224841885150691e-06, "loss": 13.8445, "step": 402 }, { "epoch": 0.8203562340966921, "grad_norm": 3355566.32780499, "learning_rate": 1.1986225189085627e-06, "loss": 13.8081, "step": 403 }, { "epoch": 0.8223918575063613, "grad_norm": 2212096.023617605, "learning_rate": 1.172648558505477e-06, "loss": 13.7965, "step": 404 }, { "epoch": 0.8244274809160306, "grad_norm": 1744005.9134531072, "learning_rate": 1.1469216807445348e-06, "loss": 13.827, "step": 405 }, { "epoch": 0.8264631043256997, "grad_norm": 5478757.4739052905, "learning_rate": 1.1214435464779006e-06, "loss": 13.8068, "step": 406 }, { "epoch": 0.8284987277353689, "grad_norm": 10845370.007264948, "learning_rate": 1.0962158004995893e-06, "loss": 13.8192, "step": 407 }, { "epoch": 0.8305343511450382, "grad_norm": 6268218.580873969, "learning_rate": 1.0712400714392723e-06, "loss": 13.7939, "step": 408 }, { "epoch": 0.8325699745547074, "grad_norm": 4049672.137206165, "learning_rate": 1.0465179716571467e-06, "loss": 13.7897, "step": 409 }, { "epoch": 0.8346055979643766, "grad_norm": 5672727.971393141, "learning_rate": 1.0220510971398473e-06, "loss": 13.8234, "step": 410 }, { "epoch": 0.8366412213740458, "grad_norm": 2826950.9583422337, "learning_rate": 9.978410273974015e-07, "loss": 13.7928, "step": 411 }, { "epoch": 0.838676844783715, "grad_norm": 2758814.4659312656, "learning_rate": 9.738893253612808e-07, "loss": 13.802, "step": 412 }, { "epoch": 0.8407124681933842, "grad_norm": 2475383.7033035085, "learning_rate": 9.50197537283481e-07, "loss": 13.7978, "step": 413 }, { "epoch": 0.8427480916030534, "grad_norm": 3247115.9631803534, "learning_rate": 9.267671926367166e-07, "loss": 13.826, "step": 414 }, { "epoch": 0.8447837150127226, "grad_norm": 2633292.367379474, "learning_rate": 9.035998040156801e-07, "loss": 13.7931, "step": 415 }, { "epoch": 0.8468193384223919, "grad_norm": 2515821.989602333, "learning_rate": 8.806968670393801e-07, "loss": 13.7718, "step": 416 }, { "epoch": 0.8488549618320611, "grad_norm": 2652660.850368756, "learning_rate": 8.580598602546109e-07, "loss": 13.8055, "step": 417 }, { "epoch": 0.8508905852417303, "grad_norm": 2999838.4744108086, "learning_rate": 8.356902450404792e-07, "loss": 13.817, "step": 418 }, { "epoch": 0.8529262086513995, "grad_norm": 2359848.6979157086, "learning_rate": 8.135894655140758e-07, "loss": 13.79, "step": 419 }, { "epoch": 0.8549618320610687, "grad_norm": 3156996.0591459703, "learning_rate": 7.91758948437249e-07, "loss": 13.7984, "step": 420 }, { "epoch": 0.8569974554707379, "grad_norm": 3762598.8987431657, "learning_rate": 7.702001031244816e-07, "loss": 13.8018, "step": 421 }, { "epoch": 0.8590330788804071, "grad_norm": 1976772.2357703648, "learning_rate": 7.489143213519301e-07, "loss": 13.8201, "step": 422 }, { "epoch": 0.8610687022900764, "grad_norm": 2785725.350606155, "learning_rate": 7.279029772675572e-07, "loss": 13.7565, "step": 423 }, { "epoch": 0.8631043256997456, "grad_norm": 2620438.7904132027, "learning_rate": 7.071674273024353e-07, "loss": 13.793, "step": 424 }, { "epoch": 0.8651399491094147, "grad_norm": 2646276.309564974, "learning_rate": 6.86709010083172e-07, "loss": 13.8229, "step": 425 }, { "epoch": 0.867175572519084, "grad_norm": 2628696.346363584, "learning_rate": 6.665290463454882e-07, "loss": 13.7809, "step": 426 }, { "epoch": 0.8692111959287532, "grad_norm": 3600670.0350286104, "learning_rate": 6.466288388489689e-07, "loss": 13.8044, "step": 427 }, { "epoch": 0.8712468193384224, "grad_norm": 2947142.074939093, "learning_rate": 6.270096722929442e-07, "loss": 13.8006, "step": 428 }, { "epoch": 0.8732824427480916, "grad_norm": 2029930.0771920187, "learning_rate": 6.076728132335669e-07, "loss": 13.796, "step": 429 }, { "epoch": 0.8753180661577609, "grad_norm": 2143832.4235748462, "learning_rate": 5.886195100020408e-07, "loss": 13.7854, "step": 430 }, { "epoch": 0.87735368956743, "grad_norm": 13943126.109787703, "learning_rate": 5.698509926240275e-07, "loss": 13.7955, "step": 431 }, { "epoch": 0.8793893129770992, "grad_norm": 3462678.627784003, "learning_rate": 5.513684727402529e-07, "loss": 13.8238, "step": 432 }, { "epoch": 0.8814249363867684, "grad_norm": 2958565.3339085556, "learning_rate": 5.331731435282705e-07, "loss": 13.8227, "step": 433 }, { "epoch": 0.8834605597964377, "grad_norm": 2315016.094193514, "learning_rate": 5.152661796254505e-07, "loss": 13.7815, "step": 434 }, { "epoch": 0.8854961832061069, "grad_norm": 3124561.491445178, "learning_rate": 4.976487370531352e-07, "loss": 13.7972, "step": 435 }, { "epoch": 0.8875318066157761, "grad_norm": 2569217.587769061, "learning_rate": 4.803219531420128e-07, "loss": 13.7759, "step": 436 }, { "epoch": 0.8895674300254452, "grad_norm": 5211670.405327593, "learning_rate": 4.6328694645870254e-07, "loss": 13.7982, "step": 437 }, { "epoch": 0.8916030534351145, "grad_norm": 2303871.1225886643, "learning_rate": 4.46544816733529e-07, "loss": 13.7919, "step": 438 }, { "epoch": 0.8936386768447837, "grad_norm": 3417955.106120249, "learning_rate": 4.3009664478954384e-07, "loss": 13.8388, "step": 439 }, { "epoch": 0.8956743002544529, "grad_norm": 3491337.261636375, "learning_rate": 4.139434924727359e-07, "loss": 13.7903, "step": 440 }, { "epoch": 0.8977099236641222, "grad_norm": 2598696.463432398, "learning_rate": 3.9808640258348686e-07, "loss": 13.7935, "step": 441 }, { "epoch": 0.8997455470737914, "grad_norm": 2406731.2012268724, "learning_rate": 3.825263988092587e-07, "loss": 13.8001, "step": 442 }, { "epoch": 0.9017811704834605, "grad_norm": 1850343.550069675, "learning_rate": 3.672644856584928e-07, "loss": 13.8345, "step": 443 }, { "epoch": 0.9038167938931297, "grad_norm": 2877460.3830578206, "learning_rate": 3.523016483957742e-07, "loss": 13.8395, "step": 444 }, { "epoch": 0.905852417302799, "grad_norm": 3386724.5492421445, "learning_rate": 3.3763885297822153e-07, "loss": 13.8028, "step": 445 }, { "epoch": 0.9078880407124682, "grad_norm": 2849485.569450586, "learning_rate": 3.2327704599312283e-07, "loss": 13.8038, "step": 446 }, { "epoch": 0.9099236641221374, "grad_norm": 3175459.533705238, "learning_rate": 3.0921715459683753e-07, "loss": 13.8303, "step": 447 }, { "epoch": 0.9119592875318067, "grad_norm": 22724636.64948581, "learning_rate": 2.95460086454929e-07, "loss": 13.772, "step": 448 }, { "epoch": 0.9139949109414758, "grad_norm": 2467166.6966894856, "learning_rate": 2.820067296835799e-07, "loss": 13.7719, "step": 449 }, { "epoch": 0.916030534351145, "grad_norm": 3412055.676564019, "learning_rate": 2.688579527922514e-07, "loss": 13.7956, "step": 450 }, { "epoch": 0.9180661577608142, "grad_norm": 2620791.7542605917, "learning_rate": 2.560146046276135e-07, "loss": 13.7717, "step": 451 }, { "epoch": 0.9201017811704835, "grad_norm": 2077929.3418459287, "learning_rate": 2.4347751431875453e-07, "loss": 13.7763, "step": 452 }, { "epoch": 0.9221374045801527, "grad_norm": 2370550.2619939703, "learning_rate": 2.3124749122364286e-07, "loss": 13.7964, "step": 453 }, { "epoch": 0.9241730279898219, "grad_norm": 5750923.606982284, "learning_rate": 2.1932532487688784e-07, "loss": 13.791, "step": 454 }, { "epoch": 0.926208651399491, "grad_norm": 3460175.4976370293, "learning_rate": 2.0771178493876387e-07, "loss": 13.79, "step": 455 }, { "epoch": 0.9282442748091603, "grad_norm": 5901311.706134819, "learning_rate": 1.964076211455246e-07, "loss": 13.7797, "step": 456 }, { "epoch": 0.9302798982188295, "grad_norm": 5105079.708966371, "learning_rate": 1.8541356326100436e-07, "loss": 13.7849, "step": 457 }, { "epoch": 0.9323155216284987, "grad_norm": 4198563.584774799, "learning_rate": 1.7473032102949983e-07, "loss": 13.8415, "step": 458 }, { "epoch": 0.934351145038168, "grad_norm": 3844225.7229981595, "learning_rate": 1.6435858412996275e-07, "loss": 13.766, "step": 459 }, { "epoch": 0.9363867684478372, "grad_norm": 3854801.945640343, "learning_rate": 1.542990221314644e-07, "loss": 13.7996, "step": 460 }, { "epoch": 0.9384223918575063, "grad_norm": 4666453.104767151, "learning_rate": 1.445522844499775e-07, "loss": 13.8, "step": 461 }, { "epoch": 0.9404580152671755, "grad_norm": 5472437.376052735, "learning_rate": 1.3511900030644954e-07, "loss": 13.7599, "step": 462 }, { "epoch": 0.9424936386768448, "grad_norm": 4682435.578567764, "learning_rate": 1.2599977868618052e-07, "loss": 13.7934, "step": 463 }, { "epoch": 0.944529262086514, "grad_norm": 9272421.276272465, "learning_rate": 1.1719520829951203e-07, "loss": 13.8146, "step": 464 }, { "epoch": 0.9465648854961832, "grad_norm": 4195416.415355079, "learning_rate": 1.087058575438199e-07, "loss": 13.8253, "step": 465 }, { "epoch": 0.9486005089058525, "grad_norm": 3012392.88106662, "learning_rate": 1.0053227446681912e-07, "loss": 13.8254, "step": 466 }, { "epoch": 0.9506361323155216, "grad_norm": 4716265.36093074, "learning_rate": 9.267498673118547e-08, "loss": 13.8287, "step": 467 }, { "epoch": 0.9526717557251908, "grad_norm": 3046833.346747879, "learning_rate": 8.513450158049109e-08, "loss": 13.7954, "step": 468 }, { "epoch": 0.95470737913486, "grad_norm": 3919156.6955382572, "learning_rate": 7.791130580645623e-08, "loss": 13.7727, "step": 469 }, { "epoch": 0.9567430025445293, "grad_norm": 2642703.3691420523, "learning_rate": 7.100586571752444e-08, "loss": 13.8211, "step": 470 }, { "epoch": 0.9587786259541985, "grad_norm": 2860021.0276084486, "learning_rate": 6.441862710876102e-08, "loss": 13.7861, "step": 471 }, { "epoch": 0.9608142493638677, "grad_norm": 2389117.180384712, "learning_rate": 5.815001523307162e-08, "loss": 13.7998, "step": 472 }, { "epoch": 0.9628498727735368, "grad_norm": 4343380.201960487, "learning_rate": 5.220043477374759e-08, "loss": 13.7891, "step": 473 }, { "epoch": 0.9648854961832061, "grad_norm": 5212705.489252775, "learning_rate": 4.657026981834623e-08, "loss": 13.8108, "step": 474 }, { "epoch": 0.9669211195928753, "grad_norm": 2520458.4461040264, "learning_rate": 4.125988383388957e-08, "loss": 13.8067, "step": 475 }, { "epoch": 0.9689567430025445, "grad_norm": 4182021.7452560714, "learning_rate": 3.626961964340203e-08, "loss": 13.7897, "step": 476 }, { "epoch": 0.9709923664122138, "grad_norm": 10159149.969018588, "learning_rate": 3.159979940378088e-08, "loss": 13.8046, "step": 477 }, { "epoch": 0.973027989821883, "grad_norm": 2510949.593741673, "learning_rate": 2.725072458499567e-08, "loss": 13.8275, "step": 478 }, { "epoch": 0.9750636132315522, "grad_norm": 2900958.891211133, "learning_rate": 2.3222675950627106e-08, "loss": 13.7972, "step": 479 }, { "epoch": 0.9770992366412213, "grad_norm": 3768468.1026937226, "learning_rate": 1.9515913539743247e-08, "loss": 13.8208, "step": 480 }, { "epoch": 0.9791348600508906, "grad_norm": 2563277.62892743, "learning_rate": 1.613067665010959e-08, "loss": 13.8265, "step": 481 }, { "epoch": 0.9811704834605598, "grad_norm": 23583570.860715404, "learning_rate": 1.3067183822742525e-08, "loss": 13.8262, "step": 482 }, { "epoch": 0.983206106870229, "grad_norm": 8371515.456262343, "learning_rate": 1.0325632827801745e-08, "loss": 13.8356, "step": 483 }, { "epoch": 0.9852417302798983, "grad_norm": 4260314.378446997, "learning_rate": 7.906200651819907e-09, "loss": 13.8054, "step": 484 }, { "epoch": 0.9872773536895675, "grad_norm": 2235986.274852917, "learning_rate": 5.809043486279531e-09, "loss": 13.7764, "step": 485 }, { "epoch": 0.9893129770992366, "grad_norm": 2261687.826321509, "learning_rate": 4.034296717527752e-09, "loss": 13.8129, "step": 486 }, { "epoch": 0.9913486005089058, "grad_norm": 5923746.692619356, "learning_rate": 2.5820749180388573e-09, "loss": 13.843, "step": 487 }, { "epoch": 0.9933842239185751, "grad_norm": 2729193.3221547497, "learning_rate": 1.4524718390140913e-09, "loss": 13.8395, "step": 488 }, { "epoch": 0.9954198473282443, "grad_norm": 2703798.169864237, "learning_rate": 6.455604043331676e-10, "loss": 13.8165, "step": 489 }, { "epoch": 0.9974554707379135, "grad_norm": 3350867.6861675973, "learning_rate": 1.6139270584358823e-10, "loss": 13.8103, "step": 490 }, { "epoch": 0.9994910941475827, "grad_norm": 1851326.2112600075, "learning_rate": 0.0, "loss": 13.8163, "step": 491 }, { "epoch": 0.9994910941475827, "eval_loss": 13.805337905883789, "eval_runtime": 382.78, "eval_samples_per_second": 2.283, "eval_steps_per_second": 0.572, "step": 491 } ], "logging_steps": 1, "max_steps": 491, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 100344791040000.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }