|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"eval_steps": 900, |
|
"global_step": 22500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10222222222222223, |
|
"grad_norm": 6382.4189453125, |
|
"learning_rate": 3.4074074074074077e-06, |
|
"loss": 306.5843, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.20444444444444446, |
|
"grad_norm": 154.1435546875, |
|
"learning_rate": 6.814814814814815e-06, |
|
"loss": 62.983, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.30666666666666664, |
|
"grad_norm": 10.364509582519531, |
|
"learning_rate": 1.0222222222222223e-05, |
|
"loss": 7.6227, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.4088888888888889, |
|
"grad_norm": 13.348139762878418, |
|
"learning_rate": 1.362962962962963e-05, |
|
"loss": 7.322, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5111111111111111, |
|
"grad_norm": 1.03432297706604, |
|
"learning_rate": 1.7037037037037038e-05, |
|
"loss": 7.2767, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6133333333333333, |
|
"grad_norm": 2.311262369155884, |
|
"learning_rate": 2.0444444444444446e-05, |
|
"loss": 7.2605, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.7155555555555555, |
|
"grad_norm": 1.2174512147903442, |
|
"learning_rate": 2.3851851851851854e-05, |
|
"loss": 7.2589, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.8177777777777778, |
|
"grad_norm": 1.1917160749435425, |
|
"learning_rate": 2.725925925925926e-05, |
|
"loss": 7.2573, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.802689254283905, |
|
"learning_rate": 3.066666666666667e-05, |
|
"loss": 7.2555, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.0222222222222221, |
|
"grad_norm": 0.8915572762489319, |
|
"learning_rate": 3.4074074074074077e-05, |
|
"loss": 7.2539, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.1244444444444444, |
|
"grad_norm": 1.1943933963775635, |
|
"learning_rate": 3.7481481481481484e-05, |
|
"loss": 7.2509, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.2266666666666666, |
|
"grad_norm": 0.9069448709487915, |
|
"learning_rate": 4.088888888888889e-05, |
|
"loss": 7.2492, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.3288888888888888, |
|
"grad_norm": 0.7575523853302002, |
|
"learning_rate": 4.42962962962963e-05, |
|
"loss": 7.2472, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.431111111111111, |
|
"grad_norm": 0.5182924866676331, |
|
"learning_rate": 4.770370370370371e-05, |
|
"loss": 7.2453, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.5333333333333332, |
|
"grad_norm": 0.5943706631660461, |
|
"learning_rate": 5.111111111111111e-05, |
|
"loss": 7.2433, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.6355555555555554, |
|
"grad_norm": 0.5987505912780762, |
|
"learning_rate": 5.451851851851852e-05, |
|
"loss": 7.2415, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.7377777777777776, |
|
"grad_norm": 0.8691719770431519, |
|
"learning_rate": 5.792592592592593e-05, |
|
"loss": 7.2388, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.8399999999999999, |
|
"grad_norm": 0.9830289483070374, |
|
"learning_rate": 6.133333333333334e-05, |
|
"loss": 7.2337, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.942222222222222, |
|
"grad_norm": 0.9140754342079163, |
|
"learning_rate": 6.474074074074075e-05, |
|
"loss": 7.2245, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 2.0444444444444443, |
|
"grad_norm": 1.7775914669036865, |
|
"learning_rate": 6.814814814814815e-05, |
|
"loss": 7.2109, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.1466666666666665, |
|
"grad_norm": 1.317034363746643, |
|
"learning_rate": 7.155555555555555e-05, |
|
"loss": 7.1871, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 2.2488888888888887, |
|
"grad_norm": 1.6885732412338257, |
|
"learning_rate": 7.496296296296297e-05, |
|
"loss": 7.1349, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 2.351111111111111, |
|
"grad_norm": 2.464526653289795, |
|
"learning_rate": 7.837037037037037e-05, |
|
"loss": 7.0371, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 2.453333333333333, |
|
"grad_norm": 3.4147374629974365, |
|
"learning_rate": 8.177777777777778e-05, |
|
"loss": 6.9365, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.5555555555555554, |
|
"grad_norm": 3.6264212131500244, |
|
"learning_rate": 8.518518518518518e-05, |
|
"loss": 6.8548, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.6577777777777776, |
|
"grad_norm": 2.2979955673217773, |
|
"learning_rate": 8.85925925925926e-05, |
|
"loss": 6.7637, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 2.969346046447754, |
|
"learning_rate": 9.200000000000001e-05, |
|
"loss": 6.6778, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 2.862222222222222, |
|
"grad_norm": 4.26610803604126, |
|
"learning_rate": 9.540740740740741e-05, |
|
"loss": 6.5954, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.964444444444444, |
|
"grad_norm": 1.7684084177017212, |
|
"learning_rate": 9.881481481481482e-05, |
|
"loss": 6.5164, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 3.066666666666667, |
|
"grad_norm": 3.720853090286255, |
|
"learning_rate": 9.999988344964554e-05, |
|
"loss": 6.4356, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.168888888888889, |
|
"grad_norm": 2.5611510276794434, |
|
"learning_rate": 9.99992520072995e-05, |
|
"loss": 6.3594, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 3.2711111111111113, |
|
"grad_norm": 5.3843255043029785, |
|
"learning_rate": 9.999807252777301e-05, |
|
"loss": 6.3057, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 3.3733333333333335, |
|
"grad_norm": 4.412026882171631, |
|
"learning_rate": 9.999634502399426e-05, |
|
"loss": 6.25, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 3.4755555555555557, |
|
"grad_norm": 3.188660144805908, |
|
"learning_rate": 9.999406951489825e-05, |
|
"loss": 6.1975, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 3.5777777777777775, |
|
"grad_norm": 4.5765156745910645, |
|
"learning_rate": 9.999124602542662e-05, |
|
"loss": 6.1516, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"grad_norm": 5.967836856842041, |
|
"learning_rate": 9.998787458652739e-05, |
|
"loss": 6.1038, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 3.7822222222222224, |
|
"grad_norm": 6.038416385650635, |
|
"learning_rate": 9.998395523515457e-05, |
|
"loss": 6.078, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 3.8844444444444446, |
|
"grad_norm": 2.577953577041626, |
|
"learning_rate": 9.997948801426783e-05, |
|
"loss": 6.0297, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 3.986666666666667, |
|
"grad_norm": 3.8739564418792725, |
|
"learning_rate": 9.997447297283196e-05, |
|
"loss": 5.9847, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 4.088888888888889, |
|
"grad_norm": 5.759775161743164, |
|
"learning_rate": 9.996891016581633e-05, |
|
"loss": 5.9452, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.191111111111111, |
|
"grad_norm": 5.758726596832275, |
|
"learning_rate": 9.996279965419441e-05, |
|
"loss": 5.9283, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 4.293333333333333, |
|
"grad_norm": 3.345691204071045, |
|
"learning_rate": 9.995614150494293e-05, |
|
"loss": 5.8792, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 4.395555555555555, |
|
"grad_norm": 5.426297664642334, |
|
"learning_rate": 9.994893579104123e-05, |
|
"loss": 5.8526, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 4.497777777777777, |
|
"grad_norm": 4.649121284484863, |
|
"learning_rate": 9.994118259147049e-05, |
|
"loss": 5.8266, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"grad_norm": 5.175451278686523, |
|
"learning_rate": 9.993288199121283e-05, |
|
"loss": 5.8114, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 4.702222222222222, |
|
"grad_norm": 4.655645370483398, |
|
"learning_rate": 9.992403408125033e-05, |
|
"loss": 5.7801, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 4.804444444444444, |
|
"grad_norm": 5.830355644226074, |
|
"learning_rate": 9.991463895856414e-05, |
|
"loss": 5.7576, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 4.906666666666666, |
|
"grad_norm": 3.2799057960510254, |
|
"learning_rate": 9.990469672613331e-05, |
|
"loss": 5.7327, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 5.0088888888888885, |
|
"grad_norm": 5.891563415527344, |
|
"learning_rate": 9.989420749293375e-05, |
|
"loss": 5.7139, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 5.111111111111111, |
|
"grad_norm": 6.125003337860107, |
|
"learning_rate": 9.988317137393697e-05, |
|
"loss": 5.6823, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.213333333333333, |
|
"grad_norm": 4.9209442138671875, |
|
"learning_rate": 9.987158849010885e-05, |
|
"loss": 5.6534, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 5.315555555555555, |
|
"grad_norm": 3.9249610900878906, |
|
"learning_rate": 9.985945896840829e-05, |
|
"loss": 5.6601, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 5.417777777777777, |
|
"grad_norm": 7.975271701812744, |
|
"learning_rate": 9.984678294178589e-05, |
|
"loss": 5.6278, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"grad_norm": 4.072458267211914, |
|
"learning_rate": 9.983356054918238e-05, |
|
"loss": 5.6104, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 5.622222222222222, |
|
"grad_norm": 5.122928142547607, |
|
"learning_rate": 9.981979193552721e-05, |
|
"loss": 5.5991, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 5.724444444444444, |
|
"grad_norm": 6.029202461242676, |
|
"learning_rate": 9.980547725173685e-05, |
|
"loss": 5.5761, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 5.826666666666666, |
|
"grad_norm": 4.795958042144775, |
|
"learning_rate": 9.979061665471326e-05, |
|
"loss": 5.5573, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 5.928888888888888, |
|
"grad_norm": 3.8007431030273438, |
|
"learning_rate": 9.977521030734203e-05, |
|
"loss": 5.5274, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 6.0311111111111115, |
|
"grad_norm": 5.163888931274414, |
|
"learning_rate": 9.975925837849073e-05, |
|
"loss": 5.5212, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 6.133333333333334, |
|
"grad_norm": 5.857538223266602, |
|
"learning_rate": 9.9742761043007e-05, |
|
"loss": 5.5039, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 6.235555555555556, |
|
"grad_norm": 4.817676067352295, |
|
"learning_rate": 9.972571848171657e-05, |
|
"loss": 5.4863, |
|
"step": 1403 |
|
}, |
|
{ |
|
"epoch": 6.337777777777778, |
|
"grad_norm": 4.5216450691223145, |
|
"learning_rate": 9.97081308814214e-05, |
|
"loss": 5.4866, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"grad_norm": 5.7964630126953125, |
|
"learning_rate": 9.968999843489754e-05, |
|
"loss": 5.4544, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 6.542222222222223, |
|
"grad_norm": 7.403745174407959, |
|
"learning_rate": 9.967132134089309e-05, |
|
"loss": 5.4383, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 6.644444444444445, |
|
"grad_norm": 5.906863689422607, |
|
"learning_rate": 9.965209980412593e-05, |
|
"loss": 5.4435, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 6.746666666666667, |
|
"grad_norm": 4.985208511352539, |
|
"learning_rate": 9.963233403528154e-05, |
|
"loss": 5.4271, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 6.848888888888889, |
|
"grad_norm": 5.670632839202881, |
|
"learning_rate": 9.96120242510107e-05, |
|
"loss": 5.4023, |
|
"step": 1541 |
|
}, |
|
{ |
|
"epoch": 6.9511111111111115, |
|
"grad_norm": 4.155480861663818, |
|
"learning_rate": 9.959117067392709e-05, |
|
"loss": 5.3781, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 7.053333333333334, |
|
"grad_norm": 6.202167987823486, |
|
"learning_rate": 9.95697735326048e-05, |
|
"loss": 5.3696, |
|
"step": 1587 |
|
}, |
|
{ |
|
"epoch": 7.155555555555556, |
|
"grad_norm": 5.649682998657227, |
|
"learning_rate": 9.954783306157595e-05, |
|
"loss": 5.3255, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 7.257777777777778, |
|
"grad_norm": 6.699223518371582, |
|
"learning_rate": 9.952534950132802e-05, |
|
"loss": 5.3186, |
|
"step": 1633 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"grad_norm": 5.433987140655518, |
|
"learning_rate": 9.95023230983012e-05, |
|
"loss": 5.3147, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 7.4622222222222225, |
|
"grad_norm": 4.822690010070801, |
|
"learning_rate": 9.947875410488581e-05, |
|
"loss": 5.3022, |
|
"step": 1679 |
|
}, |
|
{ |
|
"epoch": 7.564444444444445, |
|
"grad_norm": 5.345188617706299, |
|
"learning_rate": 9.945464277941939e-05, |
|
"loss": 5.2828, |
|
"step": 1702 |
|
}, |
|
{ |
|
"epoch": 7.666666666666667, |
|
"grad_norm": 4.902531623840332, |
|
"learning_rate": 9.942998938618394e-05, |
|
"loss": 5.2818, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 7.768888888888889, |
|
"grad_norm": 7.8368353843688965, |
|
"learning_rate": 9.940479419540304e-05, |
|
"loss": 5.2735, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 7.871111111111111, |
|
"grad_norm": 5.669989585876465, |
|
"learning_rate": 9.937905748323883e-05, |
|
"loss": 5.2554, |
|
"step": 1771 |
|
}, |
|
{ |
|
"epoch": 7.973333333333334, |
|
"grad_norm": 4.463327407836914, |
|
"learning_rate": 9.935277953178905e-05, |
|
"loss": 5.2421, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 8.075555555555555, |
|
"grad_norm": 4.2700629234313965, |
|
"learning_rate": 9.93259606290839e-05, |
|
"loss": 5.1956, |
|
"step": 1817 |
|
}, |
|
{ |
|
"epoch": 8.177777777777777, |
|
"grad_norm": 5.543842315673828, |
|
"learning_rate": 9.929860106908289e-05, |
|
"loss": 5.1719, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"grad_norm": 10.465546607971191, |
|
"learning_rate": 9.927070115167161e-05, |
|
"loss": 5.1691, |
|
"step": 1863 |
|
}, |
|
{ |
|
"epoch": 8.382222222222222, |
|
"grad_norm": 5.517487525939941, |
|
"learning_rate": 9.924226118265849e-05, |
|
"loss": 5.1431, |
|
"step": 1886 |
|
}, |
|
{ |
|
"epoch": 8.484444444444444, |
|
"grad_norm": 6.022068977355957, |
|
"learning_rate": 9.921328147377143e-05, |
|
"loss": 5.1507, |
|
"step": 1909 |
|
}, |
|
{ |
|
"epoch": 8.586666666666666, |
|
"grad_norm": 4.770472526550293, |
|
"learning_rate": 9.918376234265428e-05, |
|
"loss": 5.1385, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 8.688888888888888, |
|
"grad_norm": 6.177302360534668, |
|
"learning_rate": 9.915370411286356e-05, |
|
"loss": 5.1091, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 8.79111111111111, |
|
"grad_norm": 6.306371688842773, |
|
"learning_rate": 9.912310711386473e-05, |
|
"loss": 5.1276, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 8.893333333333333, |
|
"grad_norm": 7.086174488067627, |
|
"learning_rate": 9.909197168102867e-05, |
|
"loss": 5.0997, |
|
"step": 2001 |
|
}, |
|
{ |
|
"epoch": 8.995555555555555, |
|
"grad_norm": 5.590447902679443, |
|
"learning_rate": 9.906029815562797e-05, |
|
"loss": 5.0776, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 9.097777777777777, |
|
"grad_norm": 4.786597728729248, |
|
"learning_rate": 9.902808688483323e-05, |
|
"loss": 5.0244, |
|
"step": 2047 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"grad_norm": 7.7961015701293945, |
|
"learning_rate": 9.899533822170922e-05, |
|
"loss": 5.0232, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 9.302222222222222, |
|
"grad_norm": 5.857214450836182, |
|
"learning_rate": 9.896205252521099e-05, |
|
"loss": 5.0213, |
|
"step": 2093 |
|
}, |
|
{ |
|
"epoch": 9.404444444444444, |
|
"grad_norm": 6.194970607757568, |
|
"learning_rate": 9.892823016017999e-05, |
|
"loss": 4.984, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 9.506666666666666, |
|
"grad_norm": 7.040445804595947, |
|
"learning_rate": 9.889387149734004e-05, |
|
"loss": 4.9845, |
|
"step": 2139 |
|
}, |
|
{ |
|
"epoch": 9.608888888888888, |
|
"grad_norm": 6.245872497558594, |
|
"learning_rate": 9.885897691329327e-05, |
|
"loss": 4.9771, |
|
"step": 2162 |
|
}, |
|
{ |
|
"epoch": 9.71111111111111, |
|
"grad_norm": 4.590968608856201, |
|
"learning_rate": 9.882354679051598e-05, |
|
"loss": 4.9565, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 9.813333333333333, |
|
"grad_norm": 5.94847297668457, |
|
"learning_rate": 9.87875815173545e-05, |
|
"loss": 4.9531, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 9.915555555555555, |
|
"grad_norm": 8.10450267791748, |
|
"learning_rate": 9.875108148802082e-05, |
|
"loss": 4.9557, |
|
"step": 2231 |
|
}, |
|
{ |
|
"epoch": 10.017777777777777, |
|
"grad_norm": 5.512363910675049, |
|
"learning_rate": 9.871404710258841e-05, |
|
"loss": 4.9295, |
|
"step": 2254 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"grad_norm": 5.455718517303467, |
|
"learning_rate": 9.867647876698775e-05, |
|
"loss": 4.8753, |
|
"step": 2277 |
|
}, |
|
{ |
|
"epoch": 10.222222222222221, |
|
"grad_norm": 5.959130764007568, |
|
"learning_rate": 9.86383768930019e-05, |
|
"loss": 4.8732, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 10.324444444444444, |
|
"grad_norm": 6.239514350891113, |
|
"learning_rate": 9.859974189826198e-05, |
|
"loss": 4.8707, |
|
"step": 2323 |
|
}, |
|
{ |
|
"epoch": 10.426666666666666, |
|
"grad_norm": 7.127731800079346, |
|
"learning_rate": 9.856057420624259e-05, |
|
"loss": 4.846, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 10.528888888888888, |
|
"grad_norm": 6.327420234680176, |
|
"learning_rate": 9.852087424625717e-05, |
|
"loss": 4.8457, |
|
"step": 2369 |
|
}, |
|
{ |
|
"epoch": 10.63111111111111, |
|
"grad_norm": 6.398340225219727, |
|
"learning_rate": 9.848064245345333e-05, |
|
"loss": 4.8295, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 10.733333333333333, |
|
"grad_norm": 5.890859603881836, |
|
"learning_rate": 9.843987926880803e-05, |
|
"loss": 4.8091, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 10.835555555555555, |
|
"grad_norm": 7.191392421722412, |
|
"learning_rate": 9.839858513912276e-05, |
|
"loss": 4.8022, |
|
"step": 2438 |
|
}, |
|
{ |
|
"epoch": 10.937777777777779, |
|
"grad_norm": 6.238222122192383, |
|
"learning_rate": 9.835676051701867e-05, |
|
"loss": 4.7898, |
|
"step": 2461 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"grad_norm": 6.7246551513671875, |
|
"learning_rate": 9.831440586093157e-05, |
|
"loss": 4.7692, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 11.142222222222221, |
|
"grad_norm": 5.07949161529541, |
|
"learning_rate": 9.827152163510693e-05, |
|
"loss": 4.7251, |
|
"step": 2507 |
|
}, |
|
{ |
|
"epoch": 11.244444444444444, |
|
"grad_norm": 7.340390682220459, |
|
"learning_rate": 9.82281083095948e-05, |
|
"loss": 4.7188, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 11.346666666666668, |
|
"grad_norm": 5.695153713226318, |
|
"learning_rate": 9.818416636024461e-05, |
|
"loss": 4.7111, |
|
"step": 2553 |
|
}, |
|
{ |
|
"epoch": 11.448888888888888, |
|
"grad_norm": 5.70296573638916, |
|
"learning_rate": 9.813969626870002e-05, |
|
"loss": 4.7043, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 11.551111111111112, |
|
"grad_norm": 5.775058269500732, |
|
"learning_rate": 9.809469852239359e-05, |
|
"loss": 4.6924, |
|
"step": 2599 |
|
}, |
|
{ |
|
"epoch": 11.653333333333332, |
|
"grad_norm": 7.319630146026611, |
|
"learning_rate": 9.804917361454145e-05, |
|
"loss": 4.6848, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 11.755555555555556, |
|
"grad_norm": 7.945709705352783, |
|
"learning_rate": 9.800312204413793e-05, |
|
"loss": 4.6667, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 11.857777777777777, |
|
"grad_norm": 7.591863632202148, |
|
"learning_rate": 9.795654431595e-05, |
|
"loss": 4.6778, |
|
"step": 2668 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"grad_norm": 6.433276653289795, |
|
"learning_rate": 9.790944094051187e-05, |
|
"loss": 4.6699, |
|
"step": 2691 |
|
}, |
|
{ |
|
"epoch": 12.062222222222223, |
|
"grad_norm": 6.956933975219727, |
|
"learning_rate": 9.786181243411926e-05, |
|
"loss": 4.6113, |
|
"step": 2714 |
|
}, |
|
{ |
|
"epoch": 12.164444444444445, |
|
"grad_norm": 5.551136016845703, |
|
"learning_rate": 9.781365931882387e-05, |
|
"loss": 4.582, |
|
"step": 2737 |
|
}, |
|
{ |
|
"epoch": 12.266666666666667, |
|
"grad_norm": 7.214599609375, |
|
"learning_rate": 9.776498212242749e-05, |
|
"loss": 4.5932, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 12.36888888888889, |
|
"grad_norm": 6.5685715675354, |
|
"learning_rate": 9.771578137847639e-05, |
|
"loss": 4.5896, |
|
"step": 2783 |
|
}, |
|
{ |
|
"epoch": 12.471111111111112, |
|
"grad_norm": 8.017729759216309, |
|
"learning_rate": 9.766605762625541e-05, |
|
"loss": 4.5579, |
|
"step": 2806 |
|
}, |
|
{ |
|
"epoch": 12.573333333333334, |
|
"grad_norm": 6.021265983581543, |
|
"learning_rate": 9.761581141078194e-05, |
|
"loss": 4.5715, |
|
"step": 2829 |
|
}, |
|
{ |
|
"epoch": 12.675555555555556, |
|
"grad_norm": 7.427931785583496, |
|
"learning_rate": 9.756504328280016e-05, |
|
"loss": 4.5681, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 12.777777777777779, |
|
"grad_norm": 6.325420379638672, |
|
"learning_rate": 9.751375379877481e-05, |
|
"loss": 4.5695, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"grad_norm": 4.837381839752197, |
|
"learning_rate": 9.746194352088518e-05, |
|
"loss": 4.5321, |
|
"step": 2898 |
|
}, |
|
{ |
|
"epoch": 12.982222222222223, |
|
"grad_norm": 6.933470726013184, |
|
"learning_rate": 9.740961301701894e-05, |
|
"loss": 4.5286, |
|
"step": 2921 |
|
}, |
|
{ |
|
"epoch": 13.084444444444445, |
|
"grad_norm": 5.810832977294922, |
|
"learning_rate": 9.73567628607659e-05, |
|
"loss": 4.463, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 13.186666666666667, |
|
"grad_norm": 7.62490177154541, |
|
"learning_rate": 9.730339363141175e-05, |
|
"loss": 4.462, |
|
"step": 2967 |
|
}, |
|
{ |
|
"epoch": 13.28888888888889, |
|
"grad_norm": 6.67575216293335, |
|
"learning_rate": 9.72495059139317e-05, |
|
"loss": 4.4402, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 13.391111111111112, |
|
"grad_norm": 6.110825538635254, |
|
"learning_rate": 9.719510029898398e-05, |
|
"loss": 4.443, |
|
"step": 3013 |
|
}, |
|
{ |
|
"epoch": 13.493333333333334, |
|
"grad_norm": 7.317692279815674, |
|
"learning_rate": 9.714017738290358e-05, |
|
"loss": 4.4456, |
|
"step": 3036 |
|
}, |
|
{ |
|
"epoch": 13.595555555555556, |
|
"grad_norm": 6.189058303833008, |
|
"learning_rate": 9.708473776769544e-05, |
|
"loss": 4.4524, |
|
"step": 3059 |
|
}, |
|
{ |
|
"epoch": 13.697777777777778, |
|
"grad_norm": 5.6017632484436035, |
|
"learning_rate": 9.702878206102811e-05, |
|
"loss": 4.4234, |
|
"step": 3082 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"grad_norm": 5.7952189445495605, |
|
"learning_rate": 9.697231087622691e-05, |
|
"loss": 4.4016, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 13.902222222222223, |
|
"grad_norm": 5.7486677169799805, |
|
"learning_rate": 9.691532483226723e-05, |
|
"loss": 4.4106, |
|
"step": 3128 |
|
}, |
|
{ |
|
"epoch": 14.004444444444445, |
|
"grad_norm": 6.603976249694824, |
|
"learning_rate": 9.68578245537679e-05, |
|
"loss": 4.367, |
|
"step": 3151 |
|
}, |
|
{ |
|
"epoch": 14.106666666666667, |
|
"grad_norm": 6.593631744384766, |
|
"learning_rate": 9.679981067098414e-05, |
|
"loss": 4.3122, |
|
"step": 3174 |
|
}, |
|
{ |
|
"epoch": 14.20888888888889, |
|
"grad_norm": 6.519464015960693, |
|
"learning_rate": 9.674128381980072e-05, |
|
"loss": 4.3038, |
|
"step": 3197 |
|
}, |
|
{ |
|
"epoch": 14.311111111111112, |
|
"grad_norm": 7.691238880157471, |
|
"learning_rate": 9.668224464172508e-05, |
|
"loss": 4.3305, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 14.413333333333334, |
|
"grad_norm": 5.136379718780518, |
|
"learning_rate": 9.66226937838802e-05, |
|
"loss": 4.3137, |
|
"step": 3243 |
|
}, |
|
{ |
|
"epoch": 14.515555555555556, |
|
"grad_norm": 5.727292537689209, |
|
"learning_rate": 9.65626318989975e-05, |
|
"loss": 4.3126, |
|
"step": 3266 |
|
}, |
|
{ |
|
"epoch": 14.617777777777778, |
|
"grad_norm": 7.882863998413086, |
|
"learning_rate": 9.650205964540978e-05, |
|
"loss": 4.2942, |
|
"step": 3289 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"grad_norm": 7.945621013641357, |
|
"learning_rate": 9.64409776870439e-05, |
|
"loss": 4.3076, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 14.822222222222223, |
|
"grad_norm": 6.543049335479736, |
|
"learning_rate": 9.637938669341356e-05, |
|
"loss": 4.2815, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 14.924444444444445, |
|
"grad_norm": 5.685489654541016, |
|
"learning_rate": 9.631728733961194e-05, |
|
"loss": 4.2873, |
|
"step": 3358 |
|
}, |
|
{ |
|
"epoch": 15.026666666666667, |
|
"grad_norm": 5.528294563293457, |
|
"learning_rate": 9.625468030630432e-05, |
|
"loss": 4.2617, |
|
"step": 3381 |
|
}, |
|
{ |
|
"epoch": 15.12888888888889, |
|
"grad_norm": 7.666279315948486, |
|
"learning_rate": 9.619156627972064e-05, |
|
"loss": 4.2157, |
|
"step": 3404 |
|
}, |
|
{ |
|
"epoch": 15.231111111111112, |
|
"grad_norm": 6.189380645751953, |
|
"learning_rate": 9.612794595164786e-05, |
|
"loss": 4.207, |
|
"step": 3427 |
|
}, |
|
{ |
|
"epoch": 15.333333333333334, |
|
"grad_norm": 6.782273292541504, |
|
"learning_rate": 9.606382001942255e-05, |
|
"loss": 4.1977, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 15.435555555555556, |
|
"grad_norm": 6.819105625152588, |
|
"learning_rate": 9.599918918592313e-05, |
|
"loss": 4.2046, |
|
"step": 3473 |
|
}, |
|
{ |
|
"epoch": 15.537777777777778, |
|
"grad_norm": 6.558395862579346, |
|
"learning_rate": 9.593405415956216e-05, |
|
"loss": 4.1959, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"grad_norm": 7.579700946807861, |
|
"learning_rate": 9.58684156542787e-05, |
|
"loss": 4.2004, |
|
"step": 3519 |
|
}, |
|
{ |
|
"epoch": 15.742222222222223, |
|
"grad_norm": 5.998022556304932, |
|
"learning_rate": 9.580227438953028e-05, |
|
"loss": 4.1972, |
|
"step": 3542 |
|
}, |
|
{ |
|
"epoch": 15.844444444444445, |
|
"grad_norm": 8.631059646606445, |
|
"learning_rate": 9.573563109028523e-05, |
|
"loss": 4.1674, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 15.946666666666667, |
|
"grad_norm": 6.702101230621338, |
|
"learning_rate": 9.566848648701457e-05, |
|
"loss": 4.1303, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 16.04888888888889, |
|
"grad_norm": 7.247947692871094, |
|
"learning_rate": 9.56008413156841e-05, |
|
"loss": 4.0834, |
|
"step": 3611 |
|
}, |
|
{ |
|
"epoch": 16.15111111111111, |
|
"grad_norm": 6.5919575691223145, |
|
"learning_rate": 9.553269631774631e-05, |
|
"loss": 4.0488, |
|
"step": 3634 |
|
}, |
|
{ |
|
"epoch": 16.253333333333334, |
|
"grad_norm": 8.66784381866455, |
|
"learning_rate": 9.54640522401322e-05, |
|
"loss": 4.0754, |
|
"step": 3657 |
|
}, |
|
{ |
|
"epoch": 16.355555555555554, |
|
"grad_norm": 7.605900764465332, |
|
"learning_rate": 9.539490983524316e-05, |
|
"loss": 4.0721, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 16.45777777777778, |
|
"grad_norm": 7.925562381744385, |
|
"learning_rate": 9.532526986094273e-05, |
|
"loss": 4.0685, |
|
"step": 3703 |
|
}, |
|
{ |
|
"epoch": 16.56, |
|
"grad_norm": 7.180625915527344, |
|
"learning_rate": 9.525513308054819e-05, |
|
"loss": 4.0581, |
|
"step": 3726 |
|
}, |
|
{ |
|
"epoch": 16.662222222222223, |
|
"grad_norm": 6.261662483215332, |
|
"learning_rate": 9.518450026282233e-05, |
|
"loss": 4.0405, |
|
"step": 3749 |
|
}, |
|
{ |
|
"epoch": 16.764444444444443, |
|
"grad_norm": 5.739262580871582, |
|
"learning_rate": 9.511337218196494e-05, |
|
"loss": 4.0315, |
|
"step": 3772 |
|
}, |
|
{ |
|
"epoch": 16.866666666666667, |
|
"grad_norm": 6.229343891143799, |
|
"learning_rate": 9.504174961760435e-05, |
|
"loss": 4.036, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 16.968888888888888, |
|
"grad_norm": 7.991888046264648, |
|
"learning_rate": 9.496963335478884e-05, |
|
"loss": 4.0707, |
|
"step": 3818 |
|
}, |
|
{ |
|
"epoch": 17.07111111111111, |
|
"grad_norm": 5.881919860839844, |
|
"learning_rate": 9.489702418397814e-05, |
|
"loss": 3.9782, |
|
"step": 3841 |
|
}, |
|
{ |
|
"epoch": 17.173333333333332, |
|
"grad_norm": 5.624960899353027, |
|
"learning_rate": 9.482392290103462e-05, |
|
"loss": 3.9473, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 17.275555555555556, |
|
"grad_norm": 5.786345481872559, |
|
"learning_rate": 9.475033030721471e-05, |
|
"loss": 3.9561, |
|
"step": 3887 |
|
}, |
|
{ |
|
"epoch": 17.377777777777776, |
|
"grad_norm": 7.602824687957764, |
|
"learning_rate": 9.467624720916002e-05, |
|
"loss": 3.9605, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"grad_norm": 6.39411735534668, |
|
"learning_rate": 9.460167441888854e-05, |
|
"loss": 3.9324, |
|
"step": 3933 |
|
}, |
|
{ |
|
"epoch": 17.58222222222222, |
|
"grad_norm": 6.903740882873535, |
|
"learning_rate": 9.452661275378576e-05, |
|
"loss": 3.9302, |
|
"step": 3956 |
|
}, |
|
{ |
|
"epoch": 17.684444444444445, |
|
"grad_norm": 7.515189170837402, |
|
"learning_rate": 9.445106303659562e-05, |
|
"loss": 3.911, |
|
"step": 3979 |
|
}, |
|
{ |
|
"epoch": 17.786666666666665, |
|
"grad_norm": 6.514119625091553, |
|
"learning_rate": 9.43750260954116e-05, |
|
"loss": 3.9168, |
|
"step": 4002 |
|
}, |
|
{ |
|
"epoch": 17.88888888888889, |
|
"grad_norm": 5.5810370445251465, |
|
"learning_rate": 9.429850276366758e-05, |
|
"loss": 3.9236, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 17.99111111111111, |
|
"grad_norm": 6.529542446136475, |
|
"learning_rate": 9.422149388012875e-05, |
|
"loss": 3.9076, |
|
"step": 4048 |
|
}, |
|
{ |
|
"epoch": 18.093333333333334, |
|
"grad_norm": 5.167507171630859, |
|
"learning_rate": 9.414400028888235e-05, |
|
"loss": 3.8211, |
|
"step": 4071 |
|
}, |
|
{ |
|
"epoch": 18.195555555555554, |
|
"grad_norm": 6.467238426208496, |
|
"learning_rate": 9.406602283932845e-05, |
|
"loss": 3.8423, |
|
"step": 4094 |
|
}, |
|
{ |
|
"epoch": 18.297777777777778, |
|
"grad_norm": 7.490845203399658, |
|
"learning_rate": 9.398756238617071e-05, |
|
"loss": 3.8308, |
|
"step": 4117 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"grad_norm": 5.916659832000732, |
|
"learning_rate": 9.390861978940686e-05, |
|
"loss": 3.8273, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 18.502222222222223, |
|
"grad_norm": 6.601635456085205, |
|
"learning_rate": 9.382919591431945e-05, |
|
"loss": 3.8316, |
|
"step": 4163 |
|
}, |
|
{ |
|
"epoch": 18.604444444444443, |
|
"grad_norm": 7.86677885055542, |
|
"learning_rate": 9.374929163146621e-05, |
|
"loss": 3.8223, |
|
"step": 4186 |
|
}, |
|
{ |
|
"epoch": 18.706666666666667, |
|
"grad_norm": 6.863983154296875, |
|
"learning_rate": 9.36689078166706e-05, |
|
"loss": 3.8244, |
|
"step": 4209 |
|
}, |
|
{ |
|
"epoch": 18.808888888888887, |
|
"grad_norm": 6.3487467765808105, |
|
"learning_rate": 9.35880453510122e-05, |
|
"loss": 3.7945, |
|
"step": 4232 |
|
}, |
|
{ |
|
"epoch": 18.91111111111111, |
|
"grad_norm": 7.521273612976074, |
|
"learning_rate": 9.350670512081702e-05, |
|
"loss": 3.8077, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 19.013333333333332, |
|
"grad_norm": 7.834831714630127, |
|
"learning_rate": 9.34248880176478e-05, |
|
"loss": 3.7712, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 19.115555555555556, |
|
"grad_norm": 6.245793342590332, |
|
"learning_rate": 9.334259493829423e-05, |
|
"loss": 3.6992, |
|
"step": 4301 |
|
}, |
|
{ |
|
"epoch": 19.217777777777776, |
|
"grad_norm": 7.780862808227539, |
|
"learning_rate": 9.325982678476317e-05, |
|
"loss": 3.6929, |
|
"step": 4324 |
|
}, |
|
{ |
|
"epoch": 19.32, |
|
"grad_norm": 7.378338813781738, |
|
"learning_rate": 9.317658446426871e-05, |
|
"loss": 3.7204, |
|
"step": 4347 |
|
}, |
|
{ |
|
"epoch": 19.42222222222222, |
|
"grad_norm": 6.953887462615967, |
|
"learning_rate": 9.309286888922219e-05, |
|
"loss": 3.7305, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 19.524444444444445, |
|
"grad_norm": 6.669604301452637, |
|
"learning_rate": 9.300868097722235e-05, |
|
"loss": 3.7116, |
|
"step": 4393 |
|
}, |
|
{ |
|
"epoch": 19.626666666666665, |
|
"grad_norm": 6.265507221221924, |
|
"learning_rate": 9.292402165104506e-05, |
|
"loss": 3.6961, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 19.72888888888889, |
|
"grad_norm": 6.823009967803955, |
|
"learning_rate": 9.28388918386334e-05, |
|
"loss": 3.6913, |
|
"step": 4439 |
|
}, |
|
{ |
|
"epoch": 19.83111111111111, |
|
"grad_norm": 6.928945064544678, |
|
"learning_rate": 9.275329247308737e-05, |
|
"loss": 3.7144, |
|
"step": 4462 |
|
}, |
|
{ |
|
"epoch": 19.933333333333334, |
|
"grad_norm": 7.16089391708374, |
|
"learning_rate": 9.26672244926537e-05, |
|
"loss": 3.7168, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 20.035555555555554, |
|
"grad_norm": 6.4695563316345215, |
|
"learning_rate": 9.258068884071559e-05, |
|
"loss": 3.655, |
|
"step": 4508 |
|
}, |
|
{ |
|
"epoch": 20.137777777777778, |
|
"grad_norm": 6.902811527252197, |
|
"learning_rate": 9.249368646578227e-05, |
|
"loss": 3.5855, |
|
"step": 4531 |
|
}, |
|
{ |
|
"epoch": 20.24, |
|
"grad_norm": 7.336968898773193, |
|
"learning_rate": 9.24062183214788e-05, |
|
"loss": 3.5716, |
|
"step": 4554 |
|
}, |
|
{ |
|
"epoch": 20.342222222222222, |
|
"grad_norm": 6.539813995361328, |
|
"learning_rate": 9.231828536653537e-05, |
|
"loss": 3.6035, |
|
"step": 4577 |
|
}, |
|
{ |
|
"epoch": 20.444444444444443, |
|
"grad_norm": 8.689528465270996, |
|
"learning_rate": 9.222988856477702e-05, |
|
"loss": 3.6179, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 20.546666666666667, |
|
"grad_norm": 8.209162712097168, |
|
"learning_rate": 9.214102888511287e-05, |
|
"loss": 3.6182, |
|
"step": 4623 |
|
}, |
|
{ |
|
"epoch": 20.648888888888887, |
|
"grad_norm": 7.320056438446045, |
|
"learning_rate": 9.20517073015257e-05, |
|
"loss": 3.5944, |
|
"step": 4646 |
|
}, |
|
{ |
|
"epoch": 20.75111111111111, |
|
"grad_norm": 7.204301357269287, |
|
"learning_rate": 9.196192479306114e-05, |
|
"loss": 3.5922, |
|
"step": 4669 |
|
}, |
|
{ |
|
"epoch": 20.85333333333333, |
|
"grad_norm": 7.85291051864624, |
|
"learning_rate": 9.187168234381692e-05, |
|
"loss": 3.5992, |
|
"step": 4692 |
|
}, |
|
{ |
|
"epoch": 20.955555555555556, |
|
"grad_norm": 6.276856422424316, |
|
"learning_rate": 9.178098094293222e-05, |
|
"loss": 3.5929, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 21.057777777777776, |
|
"grad_norm": 5.835750102996826, |
|
"learning_rate": 9.168982158457672e-05, |
|
"loss": 3.5289, |
|
"step": 4738 |
|
}, |
|
{ |
|
"epoch": 21.16, |
|
"grad_norm": 6.322780609130859, |
|
"learning_rate": 9.159820526793969e-05, |
|
"loss": 3.4881, |
|
"step": 4761 |
|
}, |
|
{ |
|
"epoch": 21.26222222222222, |
|
"grad_norm": 7.341971397399902, |
|
"learning_rate": 9.150613299721916e-05, |
|
"loss": 3.4799, |
|
"step": 4784 |
|
}, |
|
{ |
|
"epoch": 21.364444444444445, |
|
"grad_norm": 6.387499809265137, |
|
"learning_rate": 9.14136057816107e-05, |
|
"loss": 3.4747, |
|
"step": 4807 |
|
}, |
|
{ |
|
"epoch": 21.466666666666665, |
|
"grad_norm": 7.271056175231934, |
|
"learning_rate": 9.132062463529665e-05, |
|
"loss": 3.4783, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 21.56888888888889, |
|
"grad_norm": 7.323966026306152, |
|
"learning_rate": 9.122719057743473e-05, |
|
"loss": 3.4756, |
|
"step": 4853 |
|
}, |
|
{ |
|
"epoch": 21.67111111111111, |
|
"grad_norm": 7.535403251647949, |
|
"learning_rate": 9.113330463214699e-05, |
|
"loss": 3.4825, |
|
"step": 4876 |
|
}, |
|
{ |
|
"epoch": 21.773333333333333, |
|
"grad_norm": 5.771243095397949, |
|
"learning_rate": 9.103896782850865e-05, |
|
"loss": 3.4737, |
|
"step": 4899 |
|
}, |
|
{ |
|
"epoch": 21.875555555555554, |
|
"grad_norm": 8.020050048828125, |
|
"learning_rate": 9.094418120053667e-05, |
|
"loss": 3.4709, |
|
"step": 4922 |
|
}, |
|
{ |
|
"epoch": 21.977777777777778, |
|
"grad_norm": 6.315218448638916, |
|
"learning_rate": 9.08489457871785e-05, |
|
"loss": 3.4751, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 22.08, |
|
"grad_norm": 7.77646017074585, |
|
"learning_rate": 9.075326263230073e-05, |
|
"loss": 3.4052, |
|
"step": 4968 |
|
}, |
|
{ |
|
"epoch": 22.182222222222222, |
|
"grad_norm": 6.549800872802734, |
|
"learning_rate": 9.065713278467755e-05, |
|
"loss": 3.3815, |
|
"step": 4991 |
|
}, |
|
{ |
|
"epoch": 22.284444444444443, |
|
"grad_norm": 6.499227523803711, |
|
"learning_rate": 9.056055729797938e-05, |
|
"loss": 3.3818, |
|
"step": 5014 |
|
}, |
|
{ |
|
"epoch": 22.386666666666667, |
|
"grad_norm": 7.827967643737793, |
|
"learning_rate": 9.046353723076117e-05, |
|
"loss": 3.3781, |
|
"step": 5037 |
|
}, |
|
{ |
|
"epoch": 22.488888888888887, |
|
"grad_norm": 8.61707592010498, |
|
"learning_rate": 9.036607364645094e-05, |
|
"loss": 3.362, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 22.59111111111111, |
|
"grad_norm": 8.287631034851074, |
|
"learning_rate": 9.026816761333799e-05, |
|
"loss": 3.3951, |
|
"step": 5083 |
|
}, |
|
{ |
|
"epoch": 22.693333333333335, |
|
"grad_norm": 6.027954578399658, |
|
"learning_rate": 9.016982020456133e-05, |
|
"loss": 3.3988, |
|
"step": 5106 |
|
}, |
|
{ |
|
"epoch": 22.795555555555556, |
|
"grad_norm": 5.422713756561279, |
|
"learning_rate": 9.00710324980978e-05, |
|
"loss": 3.3986, |
|
"step": 5129 |
|
}, |
|
{ |
|
"epoch": 22.897777777777776, |
|
"grad_norm": 6.52266788482666, |
|
"learning_rate": 8.997180557675034e-05, |
|
"loss": 3.3685, |
|
"step": 5152 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 8.5319242477417, |
|
"learning_rate": 8.987214052813604e-05, |
|
"loss": 3.3852, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 23.102222222222224, |
|
"grad_norm": 5.753627300262451, |
|
"learning_rate": 8.977203844467432e-05, |
|
"loss": 3.2861, |
|
"step": 5198 |
|
}, |
|
{ |
|
"epoch": 23.204444444444444, |
|
"grad_norm": 6.238333225250244, |
|
"learning_rate": 8.967150042357484e-05, |
|
"loss": 3.297, |
|
"step": 5221 |
|
}, |
|
{ |
|
"epoch": 23.306666666666665, |
|
"grad_norm": 7.126039505004883, |
|
"learning_rate": 8.957052756682556e-05, |
|
"loss": 3.3114, |
|
"step": 5244 |
|
}, |
|
{ |
|
"epoch": 23.40888888888889, |
|
"grad_norm": 7.4155426025390625, |
|
"learning_rate": 8.946912098118066e-05, |
|
"loss": 3.3054, |
|
"step": 5267 |
|
}, |
|
{ |
|
"epoch": 23.511111111111113, |
|
"grad_norm": 6.702388763427734, |
|
"learning_rate": 8.93672817781483e-05, |
|
"loss": 3.2675, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 23.613333333333333, |
|
"grad_norm": 7.878185272216797, |
|
"learning_rate": 8.926501107397863e-05, |
|
"loss": 3.2968, |
|
"step": 5313 |
|
}, |
|
{ |
|
"epoch": 23.715555555555554, |
|
"grad_norm": 7.802605152130127, |
|
"learning_rate": 8.916230998965134e-05, |
|
"loss": 3.2743, |
|
"step": 5336 |
|
}, |
|
{ |
|
"epoch": 23.817777777777778, |
|
"grad_norm": 6.080660820007324, |
|
"learning_rate": 8.905917965086356e-05, |
|
"loss": 3.287, |
|
"step": 5359 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"grad_norm": 7.292867660522461, |
|
"learning_rate": 8.895562118801738e-05, |
|
"loss": 3.2723, |
|
"step": 5382 |
|
}, |
|
{ |
|
"epoch": 24.022222222222222, |
|
"grad_norm": 6.736908435821533, |
|
"learning_rate": 8.885163573620754e-05, |
|
"loss": 3.2406, |
|
"step": 5405 |
|
}, |
|
{ |
|
"epoch": 24.124444444444446, |
|
"grad_norm": 5.8357343673706055, |
|
"learning_rate": 8.874722443520899e-05, |
|
"loss": 3.1797, |
|
"step": 5428 |
|
}, |
|
{ |
|
"epoch": 24.226666666666667, |
|
"grad_norm": 6.487481117248535, |
|
"learning_rate": 8.864238842946433e-05, |
|
"loss": 3.1876, |
|
"step": 5451 |
|
}, |
|
{ |
|
"epoch": 24.32888888888889, |
|
"grad_norm": 7.854300498962402, |
|
"learning_rate": 8.853712886807132e-05, |
|
"loss": 3.2056, |
|
"step": 5474 |
|
}, |
|
{ |
|
"epoch": 24.43111111111111, |
|
"grad_norm": 8.225058555603027, |
|
"learning_rate": 8.84314469047703e-05, |
|
"loss": 3.2518, |
|
"step": 5497 |
|
}, |
|
{ |
|
"epoch": 24.533333333333335, |
|
"grad_norm": 7.0223236083984375, |
|
"learning_rate": 8.832534369793153e-05, |
|
"loss": 3.2102, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 24.635555555555555, |
|
"grad_norm": 7.004525661468506, |
|
"learning_rate": 8.821882041054239e-05, |
|
"loss": 3.1674, |
|
"step": 5543 |
|
}, |
|
{ |
|
"epoch": 24.73777777777778, |
|
"grad_norm": 7.304614543914795, |
|
"learning_rate": 8.811187821019486e-05, |
|
"loss": 3.188, |
|
"step": 5566 |
|
}, |
|
{ |
|
"epoch": 24.84, |
|
"grad_norm": 6.002228736877441, |
|
"learning_rate": 8.800451826907245e-05, |
|
"loss": 3.1785, |
|
"step": 5589 |
|
}, |
|
{ |
|
"epoch": 24.942222222222224, |
|
"grad_norm": 6.998710632324219, |
|
"learning_rate": 8.789674176393761e-05, |
|
"loss": 3.1713, |
|
"step": 5612 |
|
}, |
|
{ |
|
"epoch": 25.044444444444444, |
|
"grad_norm": 7.029483795166016, |
|
"learning_rate": 8.77885498761186e-05, |
|
"loss": 3.1521, |
|
"step": 5635 |
|
}, |
|
{ |
|
"epoch": 25.14666666666667, |
|
"grad_norm": 6.024033069610596, |
|
"learning_rate": 8.767994379149675e-05, |
|
"loss": 3.0885, |
|
"step": 5658 |
|
}, |
|
{ |
|
"epoch": 25.24888888888889, |
|
"grad_norm": 7.233892440795898, |
|
"learning_rate": 8.757092470049329e-05, |
|
"loss": 3.0891, |
|
"step": 5681 |
|
}, |
|
{ |
|
"epoch": 25.351111111111113, |
|
"grad_norm": 7.917546272277832, |
|
"learning_rate": 8.74614937980564e-05, |
|
"loss": 3.1085, |
|
"step": 5704 |
|
}, |
|
{ |
|
"epoch": 25.453333333333333, |
|
"grad_norm": 7.8942437171936035, |
|
"learning_rate": 8.735165228364809e-05, |
|
"loss": 3.0931, |
|
"step": 5727 |
|
}, |
|
{ |
|
"epoch": 25.555555555555557, |
|
"grad_norm": 7.184880256652832, |
|
"learning_rate": 8.724140136123106e-05, |
|
"loss": 3.1079, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 25.657777777777778, |
|
"grad_norm": 5.8746137619018555, |
|
"learning_rate": 8.713074223925546e-05, |
|
"loss": 3.0924, |
|
"step": 5773 |
|
}, |
|
{ |
|
"epoch": 25.76, |
|
"grad_norm": 6.722870826721191, |
|
"learning_rate": 8.701967613064575e-05, |
|
"loss": 3.0918, |
|
"step": 5796 |
|
}, |
|
{ |
|
"epoch": 25.862222222222222, |
|
"grad_norm": 8.777771949768066, |
|
"learning_rate": 8.690820425278721e-05, |
|
"loss": 3.1046, |
|
"step": 5819 |
|
}, |
|
{ |
|
"epoch": 25.964444444444446, |
|
"grad_norm": 7.208896636962891, |
|
"learning_rate": 8.679632782751283e-05, |
|
"loss": 3.1053, |
|
"step": 5842 |
|
}, |
|
{ |
|
"epoch": 26.066666666666666, |
|
"grad_norm": 12.179716110229492, |
|
"learning_rate": 8.668404808108978e-05, |
|
"loss": 3.034, |
|
"step": 5865 |
|
}, |
|
{ |
|
"epoch": 26.16888888888889, |
|
"grad_norm": 6.9270501136779785, |
|
"learning_rate": 8.657136624420596e-05, |
|
"loss": 2.982, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 26.27111111111111, |
|
"grad_norm": 6.495911598205566, |
|
"learning_rate": 8.645828355195658e-05, |
|
"loss": 2.9953, |
|
"step": 5911 |
|
}, |
|
{ |
|
"epoch": 26.373333333333335, |
|
"grad_norm": 6.193568229675293, |
|
"learning_rate": 8.634480124383057e-05, |
|
"loss": 3.0264, |
|
"step": 5934 |
|
}, |
|
{ |
|
"epoch": 26.475555555555555, |
|
"grad_norm": 7.5366034507751465, |
|
"learning_rate": 8.623092056369704e-05, |
|
"loss": 3.029, |
|
"step": 5957 |
|
}, |
|
{ |
|
"epoch": 26.57777777777778, |
|
"grad_norm": 7.380651473999023, |
|
"learning_rate": 8.611664275979157e-05, |
|
"loss": 3.0148, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 26.68, |
|
"grad_norm": 6.579084396362305, |
|
"learning_rate": 8.600196908470265e-05, |
|
"loss": 3.0019, |
|
"step": 6003 |
|
}, |
|
{ |
|
"epoch": 26.782222222222224, |
|
"grad_norm": 7.964267253875732, |
|
"learning_rate": 8.588690079535779e-05, |
|
"loss": 3.0102, |
|
"step": 6026 |
|
}, |
|
{ |
|
"epoch": 26.884444444444444, |
|
"grad_norm": 7.465826034545898, |
|
"learning_rate": 8.577143915300993e-05, |
|
"loss": 2.9759, |
|
"step": 6049 |
|
}, |
|
{ |
|
"epoch": 26.986666666666668, |
|
"grad_norm": 6.584536552429199, |
|
"learning_rate": 8.56555854232234e-05, |
|
"loss": 2.9609, |
|
"step": 6072 |
|
}, |
|
{ |
|
"epoch": 27.08888888888889, |
|
"grad_norm": 6.6631550788879395, |
|
"learning_rate": 8.553934087586026e-05, |
|
"loss": 2.8921, |
|
"step": 6095 |
|
}, |
|
{ |
|
"epoch": 27.191111111111113, |
|
"grad_norm": 7.030783176422119, |
|
"learning_rate": 8.542270678506625e-05, |
|
"loss": 2.8946, |
|
"step": 6118 |
|
}, |
|
{ |
|
"epoch": 27.293333333333333, |
|
"grad_norm": 6.412444114685059, |
|
"learning_rate": 8.530568442925684e-05, |
|
"loss": 2.9002, |
|
"step": 6141 |
|
}, |
|
{ |
|
"epoch": 27.395555555555557, |
|
"grad_norm": 8.111526489257812, |
|
"learning_rate": 8.518827509110328e-05, |
|
"loss": 2.9037, |
|
"step": 6164 |
|
}, |
|
{ |
|
"epoch": 27.497777777777777, |
|
"grad_norm": 6.402091026306152, |
|
"learning_rate": 8.507048005751847e-05, |
|
"loss": 2.9006, |
|
"step": 6187 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"grad_norm": 7.210970878601074, |
|
"learning_rate": 8.495230061964288e-05, |
|
"loss": 2.911, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 27.702222222222222, |
|
"grad_norm": 9.301465034484863, |
|
"learning_rate": 8.48337380728304e-05, |
|
"loss": 2.915, |
|
"step": 6233 |
|
}, |
|
{ |
|
"epoch": 27.804444444444446, |
|
"grad_norm": 10.22038745880127, |
|
"learning_rate": 8.471479371663417e-05, |
|
"loss": 2.9234, |
|
"step": 6256 |
|
}, |
|
{ |
|
"epoch": 27.906666666666666, |
|
"grad_norm": 8.557666778564453, |
|
"learning_rate": 8.459546885479226e-05, |
|
"loss": 2.9312, |
|
"step": 6279 |
|
}, |
|
{ |
|
"epoch": 28.00888888888889, |
|
"grad_norm": 8.308337211608887, |
|
"learning_rate": 8.447576479521348e-05, |
|
"loss": 2.9055, |
|
"step": 6302 |
|
}, |
|
{ |
|
"epoch": 28.11111111111111, |
|
"grad_norm": 9.826993942260742, |
|
"learning_rate": 8.435568284996294e-05, |
|
"loss": 2.795, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 28.213333333333335, |
|
"grad_norm": 7.39091157913208, |
|
"learning_rate": 8.423522433524776e-05, |
|
"loss": 2.7985, |
|
"step": 6348 |
|
}, |
|
{ |
|
"epoch": 28.315555555555555, |
|
"grad_norm": 7.943458557128906, |
|
"learning_rate": 8.411439057140257e-05, |
|
"loss": 2.804, |
|
"step": 6371 |
|
}, |
|
{ |
|
"epoch": 28.41777777777778, |
|
"grad_norm": 7.037588119506836, |
|
"learning_rate": 8.399318288287512e-05, |
|
"loss": 2.8196, |
|
"step": 6394 |
|
}, |
|
{ |
|
"epoch": 28.52, |
|
"grad_norm": 6.966550350189209, |
|
"learning_rate": 8.387160259821166e-05, |
|
"loss": 2.8037, |
|
"step": 6417 |
|
}, |
|
{ |
|
"epoch": 28.622222222222224, |
|
"grad_norm": 6.990281105041504, |
|
"learning_rate": 8.374965105004244e-05, |
|
"loss": 2.8049, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 28.724444444444444, |
|
"grad_norm": 8.029483795166016, |
|
"learning_rate": 8.362732957506714e-05, |
|
"loss": 2.8056, |
|
"step": 6463 |
|
}, |
|
{ |
|
"epoch": 28.826666666666668, |
|
"grad_norm": 6.398525714874268, |
|
"learning_rate": 8.350463951404012e-05, |
|
"loss": 2.8254, |
|
"step": 6486 |
|
}, |
|
{ |
|
"epoch": 28.92888888888889, |
|
"grad_norm": 9.660991668701172, |
|
"learning_rate": 8.338158221175581e-05, |
|
"loss": 2.8516, |
|
"step": 6509 |
|
}, |
|
{ |
|
"epoch": 29.031111111111112, |
|
"grad_norm": 7.429766654968262, |
|
"learning_rate": 8.325815901703394e-05, |
|
"loss": 2.8115, |
|
"step": 6532 |
|
}, |
|
{ |
|
"epoch": 29.133333333333333, |
|
"grad_norm": 6.842705726623535, |
|
"learning_rate": 8.313437128270469e-05, |
|
"loss": 2.7238, |
|
"step": 6555 |
|
}, |
|
{ |
|
"epoch": 29.235555555555557, |
|
"grad_norm": 9.195459365844727, |
|
"learning_rate": 8.301022036559405e-05, |
|
"loss": 2.7192, |
|
"step": 6578 |
|
}, |
|
{ |
|
"epoch": 29.337777777777777, |
|
"grad_norm": 7.685567378997803, |
|
"learning_rate": 8.288570762650869e-05, |
|
"loss": 2.7009, |
|
"step": 6601 |
|
}, |
|
{ |
|
"epoch": 29.44, |
|
"grad_norm": 6.384602069854736, |
|
"learning_rate": 8.276083443022126e-05, |
|
"loss": 2.7286, |
|
"step": 6624 |
|
}, |
|
{ |
|
"epoch": 29.54222222222222, |
|
"grad_norm": 7.564410209655762, |
|
"learning_rate": 8.263560214545532e-05, |
|
"loss": 2.7405, |
|
"step": 6647 |
|
}, |
|
{ |
|
"epoch": 29.644444444444446, |
|
"grad_norm": 6.835319995880127, |
|
"learning_rate": 8.251001214487039e-05, |
|
"loss": 2.7197, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 29.746666666666666, |
|
"grad_norm": 7.009396553039551, |
|
"learning_rate": 8.238406580504683e-05, |
|
"loss": 2.7322, |
|
"step": 6693 |
|
}, |
|
{ |
|
"epoch": 29.84888888888889, |
|
"grad_norm": 6.862404823303223, |
|
"learning_rate": 8.225776450647082e-05, |
|
"loss": 2.7476, |
|
"step": 6716 |
|
}, |
|
{ |
|
"epoch": 29.95111111111111, |
|
"grad_norm": 6.345396041870117, |
|
"learning_rate": 8.213110963351928e-05, |
|
"loss": 2.7317, |
|
"step": 6739 |
|
}, |
|
{ |
|
"epoch": 30.053333333333335, |
|
"grad_norm": 7.607011795043945, |
|
"learning_rate": 8.200410257444451e-05, |
|
"loss": 2.6859, |
|
"step": 6762 |
|
}, |
|
{ |
|
"epoch": 30.155555555555555, |
|
"grad_norm": 6.952041149139404, |
|
"learning_rate": 8.187674472135915e-05, |
|
"loss": 2.6587, |
|
"step": 6785 |
|
}, |
|
{ |
|
"epoch": 30.25777777777778, |
|
"grad_norm": 6.717074394226074, |
|
"learning_rate": 8.17490374702209e-05, |
|
"loss": 2.6636, |
|
"step": 6808 |
|
}, |
|
{ |
|
"epoch": 30.36, |
|
"grad_norm": 7.299156665802002, |
|
"learning_rate": 8.162098222081711e-05, |
|
"loss": 2.6731, |
|
"step": 6831 |
|
}, |
|
{ |
|
"epoch": 30.462222222222223, |
|
"grad_norm": 7.86132287979126, |
|
"learning_rate": 8.149258037674952e-05, |
|
"loss": 2.6568, |
|
"step": 6854 |
|
}, |
|
{ |
|
"epoch": 30.564444444444444, |
|
"grad_norm": 6.957241535186768, |
|
"learning_rate": 8.13638333454189e-05, |
|
"loss": 2.621, |
|
"step": 6877 |
|
}, |
|
{ |
|
"epoch": 30.666666666666668, |
|
"grad_norm": 7.0929741859436035, |
|
"learning_rate": 8.123474253800957e-05, |
|
"loss": 2.6453, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 30.76888888888889, |
|
"grad_norm": 7.3665385246276855, |
|
"learning_rate": 8.110530936947392e-05, |
|
"loss": 2.6668, |
|
"step": 6923 |
|
}, |
|
{ |
|
"epoch": 30.871111111111112, |
|
"grad_norm": 8.744823455810547, |
|
"learning_rate": 8.097553525851693e-05, |
|
"loss": 2.6759, |
|
"step": 6946 |
|
}, |
|
{ |
|
"epoch": 30.973333333333333, |
|
"grad_norm": 6.603512287139893, |
|
"learning_rate": 8.084542162758067e-05, |
|
"loss": 2.6677, |
|
"step": 6969 |
|
}, |
|
{ |
|
"epoch": 31.075555555555557, |
|
"grad_norm": 6.355960369110107, |
|
"learning_rate": 8.071496990282861e-05, |
|
"loss": 2.6044, |
|
"step": 6992 |
|
}, |
|
{ |
|
"epoch": 31.177777777777777, |
|
"grad_norm": 6.957365989685059, |
|
"learning_rate": 8.058418151413005e-05, |
|
"loss": 2.5647, |
|
"step": 7015 |
|
}, |
|
{ |
|
"epoch": 31.28, |
|
"grad_norm": 7.455416679382324, |
|
"learning_rate": 8.045305789504444e-05, |
|
"loss": 2.5981, |
|
"step": 7038 |
|
}, |
|
{ |
|
"epoch": 31.38222222222222, |
|
"grad_norm": 6.41038703918457, |
|
"learning_rate": 8.032160048280566e-05, |
|
"loss": 2.6026, |
|
"step": 7061 |
|
}, |
|
{ |
|
"epoch": 31.484444444444446, |
|
"grad_norm": 8.298896789550781, |
|
"learning_rate": 8.018981071830622e-05, |
|
"loss": 2.5975, |
|
"step": 7084 |
|
}, |
|
{ |
|
"epoch": 31.586666666666666, |
|
"grad_norm": 9.506787300109863, |
|
"learning_rate": 8.005769004608156e-05, |
|
"loss": 2.6356, |
|
"step": 7107 |
|
}, |
|
{ |
|
"epoch": 31.68888888888889, |
|
"grad_norm": 8.870840072631836, |
|
"learning_rate": 7.992523991429419e-05, |
|
"loss": 2.6015, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 31.79111111111111, |
|
"grad_norm": 8.160204887390137, |
|
"learning_rate": 7.979246177471773e-05, |
|
"loss": 2.593, |
|
"step": 7153 |
|
}, |
|
{ |
|
"epoch": 31.893333333333334, |
|
"grad_norm": 6.366309642791748, |
|
"learning_rate": 7.96593570827211e-05, |
|
"loss": 2.5548, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 31.995555555555555, |
|
"grad_norm": 6.812814712524414, |
|
"learning_rate": 7.952592729725254e-05, |
|
"loss": 2.5352, |
|
"step": 7199 |
|
}, |
|
{ |
|
"epoch": 32.09777777777778, |
|
"grad_norm": 6.476632118225098, |
|
"learning_rate": 7.939217388082361e-05, |
|
"loss": 2.4694, |
|
"step": 7222 |
|
}, |
|
{ |
|
"epoch": 32.2, |
|
"grad_norm": 7.325323104858398, |
|
"learning_rate": 7.925809829949312e-05, |
|
"loss": 2.4581, |
|
"step": 7245 |
|
}, |
|
{ |
|
"epoch": 32.30222222222222, |
|
"grad_norm": 7.190999984741211, |
|
"learning_rate": 7.912370202285113e-05, |
|
"loss": 2.4829, |
|
"step": 7268 |
|
}, |
|
{ |
|
"epoch": 32.404444444444444, |
|
"grad_norm": 7.949245452880859, |
|
"learning_rate": 7.898898652400281e-05, |
|
"loss": 2.5134, |
|
"step": 7291 |
|
}, |
|
{ |
|
"epoch": 32.50666666666667, |
|
"grad_norm": 7.711633682250977, |
|
"learning_rate": 7.88539532795523e-05, |
|
"loss": 2.5374, |
|
"step": 7314 |
|
}, |
|
{ |
|
"epoch": 32.60888888888889, |
|
"grad_norm": 7.286764621734619, |
|
"learning_rate": 7.87186037695865e-05, |
|
"loss": 2.4946, |
|
"step": 7337 |
|
}, |
|
{ |
|
"epoch": 32.71111111111111, |
|
"grad_norm": 7.322375774383545, |
|
"learning_rate": 7.858293947765892e-05, |
|
"loss": 2.5086, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 32.81333333333333, |
|
"grad_norm": 7.134939670562744, |
|
"learning_rate": 7.844696189077328e-05, |
|
"loss": 2.4963, |
|
"step": 7383 |
|
}, |
|
{ |
|
"epoch": 32.91555555555556, |
|
"grad_norm": 7.648177623748779, |
|
"learning_rate": 7.831067249936734e-05, |
|
"loss": 2.4857, |
|
"step": 7406 |
|
}, |
|
{ |
|
"epoch": 33.01777777777778, |
|
"grad_norm": 6.730453968048096, |
|
"learning_rate": 7.817407279729657e-05, |
|
"loss": 2.4906, |
|
"step": 7429 |
|
}, |
|
{ |
|
"epoch": 33.12, |
|
"grad_norm": 6.662753105163574, |
|
"learning_rate": 7.803716428181763e-05, |
|
"loss": 2.4054, |
|
"step": 7452 |
|
}, |
|
{ |
|
"epoch": 33.22222222222222, |
|
"grad_norm": 6.583335876464844, |
|
"learning_rate": 7.789994845357212e-05, |
|
"loss": 2.3762, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 33.324444444444445, |
|
"grad_norm": 6.661638259887695, |
|
"learning_rate": 7.776242681657006e-05, |
|
"loss": 2.4166, |
|
"step": 7498 |
|
}, |
|
{ |
|
"epoch": 33.42666666666667, |
|
"grad_norm": 6.506235599517822, |
|
"learning_rate": 7.762460087817343e-05, |
|
"loss": 2.4081, |
|
"step": 7521 |
|
}, |
|
{ |
|
"epoch": 33.528888888888886, |
|
"grad_norm": 8.114941596984863, |
|
"learning_rate": 7.748647214907954e-05, |
|
"loss": 2.4189, |
|
"step": 7544 |
|
}, |
|
{ |
|
"epoch": 33.63111111111111, |
|
"grad_norm": 7.059467315673828, |
|
"learning_rate": 7.73480421433047e-05, |
|
"loss": 2.4416, |
|
"step": 7567 |
|
}, |
|
{ |
|
"epoch": 33.733333333333334, |
|
"grad_norm": 9.18146800994873, |
|
"learning_rate": 7.720931237816735e-05, |
|
"loss": 2.4374, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 33.83555555555556, |
|
"grad_norm": 7.458983898162842, |
|
"learning_rate": 7.707028437427164e-05, |
|
"loss": 2.4392, |
|
"step": 7613 |
|
}, |
|
{ |
|
"epoch": 33.937777777777775, |
|
"grad_norm": 6.761877536773682, |
|
"learning_rate": 7.693095965549069e-05, |
|
"loss": 2.4354, |
|
"step": 7636 |
|
}, |
|
{ |
|
"epoch": 34.04, |
|
"grad_norm": 7.720556735992432, |
|
"learning_rate": 7.679133974894983e-05, |
|
"loss": 2.3844, |
|
"step": 7659 |
|
}, |
|
{ |
|
"epoch": 34.14222222222222, |
|
"grad_norm": 6.558327674865723, |
|
"learning_rate": 7.665142618501e-05, |
|
"loss": 2.3599, |
|
"step": 7682 |
|
}, |
|
{ |
|
"epoch": 34.24444444444445, |
|
"grad_norm": 6.790546894073486, |
|
"learning_rate": 7.651122049725082e-05, |
|
"loss": 2.3541, |
|
"step": 7705 |
|
}, |
|
{ |
|
"epoch": 34.346666666666664, |
|
"grad_norm": 6.559151649475098, |
|
"learning_rate": 7.637072422245386e-05, |
|
"loss": 2.3684, |
|
"step": 7728 |
|
}, |
|
{ |
|
"epoch": 34.44888888888889, |
|
"grad_norm": 8.255489349365234, |
|
"learning_rate": 7.622993890058582e-05, |
|
"loss": 2.3799, |
|
"step": 7751 |
|
}, |
|
{ |
|
"epoch": 34.55111111111111, |
|
"grad_norm": 8.185545921325684, |
|
"learning_rate": 7.60888660747816e-05, |
|
"loss": 2.3723, |
|
"step": 7774 |
|
}, |
|
{ |
|
"epoch": 34.653333333333336, |
|
"grad_norm": 7.4899516105651855, |
|
"learning_rate": 7.594750729132743e-05, |
|
"loss": 2.3813, |
|
"step": 7797 |
|
}, |
|
{ |
|
"epoch": 34.75555555555555, |
|
"grad_norm": 6.652093887329102, |
|
"learning_rate": 7.580586409964382e-05, |
|
"loss": 2.3641, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 34.85777777777778, |
|
"grad_norm": 6.916318893432617, |
|
"learning_rate": 7.566393805226874e-05, |
|
"loss": 2.3689, |
|
"step": 7843 |
|
}, |
|
{ |
|
"epoch": 34.96, |
|
"grad_norm": 7.0521559715271, |
|
"learning_rate": 7.552173070484048e-05, |
|
"loss": 2.3528, |
|
"step": 7866 |
|
}, |
|
{ |
|
"epoch": 35.062222222222225, |
|
"grad_norm": 7.043063163757324, |
|
"learning_rate": 7.537924361608062e-05, |
|
"loss": 2.2977, |
|
"step": 7889 |
|
}, |
|
{ |
|
"epoch": 35.16444444444444, |
|
"grad_norm": 6.285613059997559, |
|
"learning_rate": 7.523647834777698e-05, |
|
"loss": 2.2593, |
|
"step": 7912 |
|
}, |
|
{ |
|
"epoch": 35.266666666666666, |
|
"grad_norm": 7.13001012802124, |
|
"learning_rate": 7.509343646476646e-05, |
|
"loss": 2.268, |
|
"step": 7935 |
|
}, |
|
{ |
|
"epoch": 35.36888888888889, |
|
"grad_norm": 6.38799524307251, |
|
"learning_rate": 7.495011953491793e-05, |
|
"loss": 2.291, |
|
"step": 7958 |
|
}, |
|
{ |
|
"epoch": 35.471111111111114, |
|
"grad_norm": 7.488864421844482, |
|
"learning_rate": 7.480652912911501e-05, |
|
"loss": 2.3234, |
|
"step": 7981 |
|
}, |
|
{ |
|
"epoch": 35.57333333333333, |
|
"grad_norm": 6.8178558349609375, |
|
"learning_rate": 7.466266682123888e-05, |
|
"loss": 2.3204, |
|
"step": 8004 |
|
}, |
|
{ |
|
"epoch": 35.675555555555555, |
|
"grad_norm": 7.1541748046875, |
|
"learning_rate": 7.451853418815097e-05, |
|
"loss": 2.3137, |
|
"step": 8027 |
|
}, |
|
{ |
|
"epoch": 35.77777777777778, |
|
"grad_norm": 8.040066719055176, |
|
"learning_rate": 7.437413280967578e-05, |
|
"loss": 2.3173, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 35.88, |
|
"grad_norm": 8.158806800842285, |
|
"learning_rate": 7.422946426858345e-05, |
|
"loss": 2.2952, |
|
"step": 8073 |
|
}, |
|
{ |
|
"epoch": 35.98222222222222, |
|
"grad_norm": 7.60796594619751, |
|
"learning_rate": 7.408453015057252e-05, |
|
"loss": 2.2707, |
|
"step": 8096 |
|
}, |
|
{ |
|
"epoch": 36.08444444444444, |
|
"grad_norm": 6.903555870056152, |
|
"learning_rate": 7.393933204425244e-05, |
|
"loss": 2.2153, |
|
"step": 8119 |
|
}, |
|
{ |
|
"epoch": 36.18666666666667, |
|
"grad_norm": 7.1362624168396, |
|
"learning_rate": 7.379387154112625e-05, |
|
"loss": 2.2045, |
|
"step": 8142 |
|
}, |
|
{ |
|
"epoch": 36.28888888888889, |
|
"grad_norm": 7.824875354766846, |
|
"learning_rate": 7.364815023557306e-05, |
|
"loss": 2.215, |
|
"step": 8165 |
|
}, |
|
{ |
|
"epoch": 36.39111111111111, |
|
"grad_norm": 10.668073654174805, |
|
"learning_rate": 7.350216972483064e-05, |
|
"loss": 2.2303, |
|
"step": 8188 |
|
}, |
|
{ |
|
"epoch": 36.49333333333333, |
|
"grad_norm": 5.577554225921631, |
|
"learning_rate": 7.33559316089779e-05, |
|
"loss": 2.2175, |
|
"step": 8211 |
|
}, |
|
{ |
|
"epoch": 36.595555555555556, |
|
"grad_norm": 6.902368545532227, |
|
"learning_rate": 7.320943749091728e-05, |
|
"loss": 2.2207, |
|
"step": 8234 |
|
}, |
|
{ |
|
"epoch": 36.69777777777778, |
|
"grad_norm": 6.997749328613281, |
|
"learning_rate": 7.30626889763573e-05, |
|
"loss": 2.2525, |
|
"step": 8257 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"grad_norm": 7.666829586029053, |
|
"learning_rate": 7.291568767379484e-05, |
|
"loss": 2.2427, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 36.90222222222222, |
|
"grad_norm": 6.811129093170166, |
|
"learning_rate": 7.27684351944976e-05, |
|
"loss": 2.25, |
|
"step": 8303 |
|
}, |
|
{ |
|
"epoch": 37.004444444444445, |
|
"grad_norm": 5.935613632202148, |
|
"learning_rate": 7.262093315248641e-05, |
|
"loss": 2.2459, |
|
"step": 8326 |
|
}, |
|
{ |
|
"epoch": 37.10666666666667, |
|
"grad_norm": 6.339777946472168, |
|
"learning_rate": 7.24731831645175e-05, |
|
"loss": 2.167, |
|
"step": 8349 |
|
}, |
|
{ |
|
"epoch": 37.208888888888886, |
|
"grad_norm": 7.560238361358643, |
|
"learning_rate": 7.232518685006485e-05, |
|
"loss": 2.1952, |
|
"step": 8372 |
|
}, |
|
{ |
|
"epoch": 37.31111111111111, |
|
"grad_norm": 6.586178779602051, |
|
"learning_rate": 7.21769458313024e-05, |
|
"loss": 2.1791, |
|
"step": 8395 |
|
}, |
|
{ |
|
"epoch": 37.413333333333334, |
|
"grad_norm": 7.019660949707031, |
|
"learning_rate": 7.20284617330862e-05, |
|
"loss": 2.1754, |
|
"step": 8418 |
|
}, |
|
{ |
|
"epoch": 37.51555555555556, |
|
"grad_norm": 7.03871488571167, |
|
"learning_rate": 7.187973618293678e-05, |
|
"loss": 2.1585, |
|
"step": 8441 |
|
}, |
|
{ |
|
"epoch": 37.617777777777775, |
|
"grad_norm": 6.066256046295166, |
|
"learning_rate": 7.173077081102114e-05, |
|
"loss": 2.1424, |
|
"step": 8464 |
|
}, |
|
{ |
|
"epoch": 37.72, |
|
"grad_norm": 6.991265773773193, |
|
"learning_rate": 7.158156725013493e-05, |
|
"loss": 2.1577, |
|
"step": 8487 |
|
}, |
|
{ |
|
"epoch": 37.82222222222222, |
|
"grad_norm": 8.248811721801758, |
|
"learning_rate": 7.14321271356846e-05, |
|
"loss": 2.1603, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 37.92444444444445, |
|
"grad_norm": 8.15676212310791, |
|
"learning_rate": 7.128245210566947e-05, |
|
"loss": 2.1695, |
|
"step": 8533 |
|
}, |
|
{ |
|
"epoch": 38.026666666666664, |
|
"grad_norm": 7.107559680938721, |
|
"learning_rate": 7.113254380066367e-05, |
|
"loss": 2.1488, |
|
"step": 8556 |
|
}, |
|
{ |
|
"epoch": 38.12888888888889, |
|
"grad_norm": 8.755867004394531, |
|
"learning_rate": 7.098240386379831e-05, |
|
"loss": 2.1009, |
|
"step": 8579 |
|
}, |
|
{ |
|
"epoch": 38.23111111111111, |
|
"grad_norm": 7.037129878997803, |
|
"learning_rate": 7.083203394074334e-05, |
|
"loss": 2.0954, |
|
"step": 8602 |
|
}, |
|
{ |
|
"epoch": 38.333333333333336, |
|
"grad_norm": 6.437880039215088, |
|
"learning_rate": 7.068143567968957e-05, |
|
"loss": 2.085, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 38.43555555555555, |
|
"grad_norm": 10.530925750732422, |
|
"learning_rate": 7.053061073133067e-05, |
|
"loss": 2.1242, |
|
"step": 8648 |
|
}, |
|
{ |
|
"epoch": 38.53777777777778, |
|
"grad_norm": 7.10654878616333, |
|
"learning_rate": 7.037956074884493e-05, |
|
"loss": 2.1354, |
|
"step": 8671 |
|
}, |
|
{ |
|
"epoch": 38.64, |
|
"grad_norm": 6.740297794342041, |
|
"learning_rate": 7.022828738787724e-05, |
|
"loss": 2.1365, |
|
"step": 8694 |
|
}, |
|
{ |
|
"epoch": 38.742222222222225, |
|
"grad_norm": 7.16520357131958, |
|
"learning_rate": 7.007679230652095e-05, |
|
"loss": 2.1163, |
|
"step": 8717 |
|
}, |
|
{ |
|
"epoch": 38.84444444444444, |
|
"grad_norm": 7.305176258087158, |
|
"learning_rate": 6.992507716529965e-05, |
|
"loss": 2.1429, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 38.946666666666665, |
|
"grad_norm": 5.924234390258789, |
|
"learning_rate": 6.977314362714898e-05, |
|
"loss": 2.1132, |
|
"step": 8763 |
|
}, |
|
{ |
|
"epoch": 39.04888888888889, |
|
"grad_norm": 8.262660026550293, |
|
"learning_rate": 6.962099335739837e-05, |
|
"loss": 2.0614, |
|
"step": 8786 |
|
}, |
|
{ |
|
"epoch": 39.15111111111111, |
|
"grad_norm": 7.352762699127197, |
|
"learning_rate": 6.946862802375292e-05, |
|
"loss": 2.0194, |
|
"step": 8809 |
|
}, |
|
{ |
|
"epoch": 39.25333333333333, |
|
"grad_norm": 6.5161824226379395, |
|
"learning_rate": 6.931604929627495e-05, |
|
"loss": 2.0356, |
|
"step": 8832 |
|
}, |
|
{ |
|
"epoch": 39.355555555555554, |
|
"grad_norm": 6.718994140625, |
|
"learning_rate": 6.916325884736576e-05, |
|
"loss": 2.0442, |
|
"step": 8855 |
|
}, |
|
{ |
|
"epoch": 39.45777777777778, |
|
"grad_norm": 6.267631530761719, |
|
"learning_rate": 6.901025835174739e-05, |
|
"loss": 2.0456, |
|
"step": 8878 |
|
}, |
|
{ |
|
"epoch": 39.56, |
|
"grad_norm": 6.105040550231934, |
|
"learning_rate": 6.885704948644411e-05, |
|
"loss": 2.0319, |
|
"step": 8901 |
|
}, |
|
{ |
|
"epoch": 39.66222222222222, |
|
"grad_norm": 6.807146072387695, |
|
"learning_rate": 6.870363393076413e-05, |
|
"loss": 2.051, |
|
"step": 8924 |
|
}, |
|
{ |
|
"epoch": 39.76444444444444, |
|
"grad_norm": 6.0141987800598145, |
|
"learning_rate": 6.855001336628118e-05, |
|
"loss": 2.0376, |
|
"step": 8947 |
|
}, |
|
{ |
|
"epoch": 39.86666666666667, |
|
"grad_norm": 7.84182596206665, |
|
"learning_rate": 6.839618947681609e-05, |
|
"loss": 2.0596, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 39.96888888888889, |
|
"grad_norm": 8.566624641418457, |
|
"learning_rate": 6.824216394841825e-05, |
|
"loss": 2.0607, |
|
"step": 8993 |
|
}, |
|
{ |
|
"epoch": 40.07111111111111, |
|
"grad_norm": 6.4133992195129395, |
|
"learning_rate": 6.808793846934729e-05, |
|
"loss": 1.9994, |
|
"step": 9016 |
|
}, |
|
{ |
|
"epoch": 40.17333333333333, |
|
"grad_norm": 10.160492897033691, |
|
"learning_rate": 6.79335147300544e-05, |
|
"loss": 1.9999, |
|
"step": 9039 |
|
}, |
|
{ |
|
"epoch": 40.275555555555556, |
|
"grad_norm": 6.391870021820068, |
|
"learning_rate": 6.777889442316394e-05, |
|
"loss": 1.9972, |
|
"step": 9062 |
|
}, |
|
{ |
|
"epoch": 40.37777777777778, |
|
"grad_norm": 9.107426643371582, |
|
"learning_rate": 6.762407924345479e-05, |
|
"loss": 1.9891, |
|
"step": 9085 |
|
}, |
|
{ |
|
"epoch": 40.48, |
|
"grad_norm": 6.959272861480713, |
|
"learning_rate": 6.746907088784182e-05, |
|
"loss": 1.9765, |
|
"step": 9108 |
|
}, |
|
{ |
|
"epoch": 40.58222222222222, |
|
"grad_norm": 6.614034175872803, |
|
"learning_rate": 6.73138710553573e-05, |
|
"loss": 1.993, |
|
"step": 9131 |
|
}, |
|
{ |
|
"epoch": 40.684444444444445, |
|
"grad_norm": 7.331613063812256, |
|
"learning_rate": 6.715848144713227e-05, |
|
"loss": 1.9826, |
|
"step": 9154 |
|
}, |
|
{ |
|
"epoch": 40.78666666666667, |
|
"grad_norm": 8.619832992553711, |
|
"learning_rate": 6.700290376637782e-05, |
|
"loss": 2.0247, |
|
"step": 9177 |
|
}, |
|
{ |
|
"epoch": 40.888888888888886, |
|
"grad_norm": 7.282753944396973, |
|
"learning_rate": 6.684713971836656e-05, |
|
"loss": 2.0123, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 40.99111111111111, |
|
"grad_norm": 7.198232173919678, |
|
"learning_rate": 6.669119101041383e-05, |
|
"loss": 2.0095, |
|
"step": 9223 |
|
}, |
|
{ |
|
"epoch": 41.093333333333334, |
|
"grad_norm": 6.148073673248291, |
|
"learning_rate": 6.6535059351859e-05, |
|
"loss": 1.9284, |
|
"step": 9246 |
|
}, |
|
{ |
|
"epoch": 41.19555555555556, |
|
"grad_norm": 7.000942230224609, |
|
"learning_rate": 6.637874645404673e-05, |
|
"loss": 1.9308, |
|
"step": 9269 |
|
}, |
|
{ |
|
"epoch": 41.297777777777775, |
|
"grad_norm": 9.497756004333496, |
|
"learning_rate": 6.622225403030828e-05, |
|
"loss": 1.9316, |
|
"step": 9292 |
|
}, |
|
{ |
|
"epoch": 41.4, |
|
"grad_norm": 6.189666748046875, |
|
"learning_rate": 6.606558379594262e-05, |
|
"loss": 1.9304, |
|
"step": 9315 |
|
}, |
|
{ |
|
"epoch": 41.50222222222222, |
|
"grad_norm": 6.823606014251709, |
|
"learning_rate": 6.590873746819772e-05, |
|
"loss": 1.9582, |
|
"step": 9338 |
|
}, |
|
{ |
|
"epoch": 41.60444444444445, |
|
"grad_norm": 6.261486530303955, |
|
"learning_rate": 6.575171676625169e-05, |
|
"loss": 1.9322, |
|
"step": 9361 |
|
}, |
|
{ |
|
"epoch": 41.70666666666666, |
|
"grad_norm": 6.920318603515625, |
|
"learning_rate": 6.559452341119389e-05, |
|
"loss": 1.9533, |
|
"step": 9384 |
|
}, |
|
{ |
|
"epoch": 41.80888888888889, |
|
"grad_norm": 7.246551513671875, |
|
"learning_rate": 6.543715912600621e-05, |
|
"loss": 1.9548, |
|
"step": 9407 |
|
}, |
|
{ |
|
"epoch": 41.91111111111111, |
|
"grad_norm": 6.377082824707031, |
|
"learning_rate": 6.527962563554402e-05, |
|
"loss": 1.9709, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 42.013333333333335, |
|
"grad_norm": 7.362649440765381, |
|
"learning_rate": 6.512192466651735e-05, |
|
"loss": 1.9402, |
|
"step": 9453 |
|
}, |
|
{ |
|
"epoch": 42.11555555555555, |
|
"grad_norm": 9.08193588256836, |
|
"learning_rate": 6.496405794747193e-05, |
|
"loss": 1.8674, |
|
"step": 9476 |
|
}, |
|
{ |
|
"epoch": 42.217777777777776, |
|
"grad_norm": 6.658238410949707, |
|
"learning_rate": 6.480602720877029e-05, |
|
"loss": 1.8556, |
|
"step": 9499 |
|
}, |
|
{ |
|
"epoch": 42.32, |
|
"grad_norm": 6.951099395751953, |
|
"learning_rate": 6.464783418257277e-05, |
|
"loss": 1.8759, |
|
"step": 9522 |
|
}, |
|
{ |
|
"epoch": 42.422222222222224, |
|
"grad_norm": 8.758234977722168, |
|
"learning_rate": 6.448948060281847e-05, |
|
"loss": 1.8712, |
|
"step": 9545 |
|
}, |
|
{ |
|
"epoch": 42.52444444444444, |
|
"grad_norm": 6.225131988525391, |
|
"learning_rate": 6.433096820520639e-05, |
|
"loss": 1.8857, |
|
"step": 9568 |
|
}, |
|
{ |
|
"epoch": 42.626666666666665, |
|
"grad_norm": 7.351943492889404, |
|
"learning_rate": 6.417229872717624e-05, |
|
"loss": 1.8809, |
|
"step": 9591 |
|
}, |
|
{ |
|
"epoch": 42.72888888888889, |
|
"grad_norm": 7.482339859008789, |
|
"learning_rate": 6.401347390788952e-05, |
|
"loss": 1.8694, |
|
"step": 9614 |
|
}, |
|
{ |
|
"epoch": 42.83111111111111, |
|
"grad_norm": 6.971664905548096, |
|
"learning_rate": 6.385449548821037e-05, |
|
"loss": 1.8744, |
|
"step": 9637 |
|
}, |
|
{ |
|
"epoch": 42.93333333333333, |
|
"grad_norm": 6.296336650848389, |
|
"learning_rate": 6.36953652106866e-05, |
|
"loss": 1.8966, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 43.035555555555554, |
|
"grad_norm": 6.986079216003418, |
|
"learning_rate": 6.353608481953042e-05, |
|
"loss": 1.8555, |
|
"step": 9683 |
|
}, |
|
{ |
|
"epoch": 43.13777777777778, |
|
"grad_norm": 5.542973041534424, |
|
"learning_rate": 6.337665606059953e-05, |
|
"loss": 1.8185, |
|
"step": 9706 |
|
}, |
|
{ |
|
"epoch": 43.24, |
|
"grad_norm": 7.133216381072998, |
|
"learning_rate": 6.321708068137779e-05, |
|
"loss": 1.8241, |
|
"step": 9729 |
|
}, |
|
{ |
|
"epoch": 43.34222222222222, |
|
"grad_norm": 6.318929672241211, |
|
"learning_rate": 6.305736043095619e-05, |
|
"loss": 1.8372, |
|
"step": 9752 |
|
}, |
|
{ |
|
"epoch": 43.44444444444444, |
|
"grad_norm": 6.268241882324219, |
|
"learning_rate": 6.289749706001365e-05, |
|
"loss": 1.8602, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 43.54666666666667, |
|
"grad_norm": 5.881213665008545, |
|
"learning_rate": 6.273749232079778e-05, |
|
"loss": 1.8439, |
|
"step": 9798 |
|
}, |
|
{ |
|
"epoch": 43.64888888888889, |
|
"grad_norm": 6.6124186515808105, |
|
"learning_rate": 6.257734796710575e-05, |
|
"loss": 1.8428, |
|
"step": 9821 |
|
}, |
|
{ |
|
"epoch": 43.75111111111111, |
|
"grad_norm": 7.996447563171387, |
|
"learning_rate": 6.241706575426504e-05, |
|
"loss": 1.8354, |
|
"step": 9844 |
|
}, |
|
{ |
|
"epoch": 43.85333333333333, |
|
"grad_norm": 7.1598639488220215, |
|
"learning_rate": 6.225664743911414e-05, |
|
"loss": 1.8185, |
|
"step": 9867 |
|
}, |
|
{ |
|
"epoch": 43.955555555555556, |
|
"grad_norm": 7.8854265213012695, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 1.832, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 44.05777777777778, |
|
"grad_norm": 8.291993141174316, |
|
"learning_rate": 6.193540953667564e-05, |
|
"loss": 1.7871, |
|
"step": 9913 |
|
}, |
|
{ |
|
"epoch": 44.16, |
|
"grad_norm": 8.600836753845215, |
|
"learning_rate": 6.177459347044703e-05, |
|
"loss": 1.7882, |
|
"step": 9936 |
|
}, |
|
{ |
|
"epoch": 44.26222222222222, |
|
"grad_norm": 8.065147399902344, |
|
"learning_rate": 6.161364834398755e-05, |
|
"loss": 1.7799, |
|
"step": 9959 |
|
}, |
|
{ |
|
"epoch": 44.364444444444445, |
|
"grad_norm": 8.459796905517578, |
|
"learning_rate": 6.145257592140188e-05, |
|
"loss": 1.763, |
|
"step": 9982 |
|
}, |
|
{ |
|
"epoch": 44.46666666666667, |
|
"grad_norm": 6.006131649017334, |
|
"learning_rate": 6.129137796818997e-05, |
|
"loss": 1.7885, |
|
"step": 10005 |
|
}, |
|
{ |
|
"epoch": 44.568888888888885, |
|
"grad_norm": 8.034002304077148, |
|
"learning_rate": 6.113005625122767e-05, |
|
"loss": 1.8008, |
|
"step": 10028 |
|
}, |
|
{ |
|
"epoch": 44.67111111111111, |
|
"grad_norm": 6.57339334487915, |
|
"learning_rate": 6.09686125387474e-05, |
|
"loss": 1.786, |
|
"step": 10051 |
|
}, |
|
{ |
|
"epoch": 44.77333333333333, |
|
"grad_norm": 7.233739376068115, |
|
"learning_rate": 6.080704860031879e-05, |
|
"loss": 1.7973, |
|
"step": 10074 |
|
}, |
|
{ |
|
"epoch": 44.87555555555556, |
|
"grad_norm": 7.365921497344971, |
|
"learning_rate": 6.0645366206829244e-05, |
|
"loss": 1.8094, |
|
"step": 10097 |
|
}, |
|
{ |
|
"epoch": 44.977777777777774, |
|
"grad_norm": 7.772608280181885, |
|
"learning_rate": 6.048356713046452e-05, |
|
"loss": 1.7963, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 45.08, |
|
"grad_norm": 6.320626258850098, |
|
"learning_rate": 6.032165314468935e-05, |
|
"loss": 1.7384, |
|
"step": 10143 |
|
}, |
|
{ |
|
"epoch": 45.18222222222222, |
|
"grad_norm": 6.214219093322754, |
|
"learning_rate": 6.015962602422796e-05, |
|
"loss": 1.7253, |
|
"step": 10166 |
|
}, |
|
{ |
|
"epoch": 45.284444444444446, |
|
"grad_norm": 6.484301567077637, |
|
"learning_rate": 5.999748754504465e-05, |
|
"loss": 1.7361, |
|
"step": 10189 |
|
}, |
|
{ |
|
"epoch": 45.38666666666666, |
|
"grad_norm": 8.989522933959961, |
|
"learning_rate": 5.9835239484324304e-05, |
|
"loss": 1.7443, |
|
"step": 10212 |
|
}, |
|
{ |
|
"epoch": 45.48888888888889, |
|
"grad_norm": 10.29185676574707, |
|
"learning_rate": 5.967288362045291e-05, |
|
"loss": 1.7423, |
|
"step": 10235 |
|
}, |
|
{ |
|
"epoch": 45.59111111111111, |
|
"grad_norm": 7.059528350830078, |
|
"learning_rate": 5.951042173299811e-05, |
|
"loss": 1.7292, |
|
"step": 10258 |
|
}, |
|
{ |
|
"epoch": 45.693333333333335, |
|
"grad_norm": 6.192359447479248, |
|
"learning_rate": 5.9347855602689616e-05, |
|
"loss": 1.7204, |
|
"step": 10281 |
|
}, |
|
{ |
|
"epoch": 45.79555555555555, |
|
"grad_norm": 6.398216247558594, |
|
"learning_rate": 5.918518701139978e-05, |
|
"loss": 1.7395, |
|
"step": 10304 |
|
}, |
|
{ |
|
"epoch": 45.897777777777776, |
|
"grad_norm": 6.21365213394165, |
|
"learning_rate": 5.902241774212398e-05, |
|
"loss": 1.7343, |
|
"step": 10327 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"grad_norm": 6.119551658630371, |
|
"learning_rate": 5.885954957896115e-05, |
|
"loss": 1.7463, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 46.102222222222224, |
|
"grad_norm": 5.506466865539551, |
|
"learning_rate": 5.8696584307094146e-05, |
|
"loss": 1.657, |
|
"step": 10373 |
|
}, |
|
{ |
|
"epoch": 46.20444444444445, |
|
"grad_norm": 6.575307369232178, |
|
"learning_rate": 5.853352371277029e-05, |
|
"loss": 1.6622, |
|
"step": 10396 |
|
}, |
|
{ |
|
"epoch": 46.306666666666665, |
|
"grad_norm": 6.451313018798828, |
|
"learning_rate": 5.8370369583281634e-05, |
|
"loss": 1.6861, |
|
"step": 10419 |
|
}, |
|
{ |
|
"epoch": 46.40888888888889, |
|
"grad_norm": 7.1156816482543945, |
|
"learning_rate": 5.820712370694558e-05, |
|
"loss": 1.6859, |
|
"step": 10442 |
|
}, |
|
{ |
|
"epoch": 46.51111111111111, |
|
"grad_norm": 6.124991416931152, |
|
"learning_rate": 5.8043787873085044e-05, |
|
"loss": 1.6763, |
|
"step": 10465 |
|
}, |
|
{ |
|
"epoch": 46.61333333333333, |
|
"grad_norm": 8.477898597717285, |
|
"learning_rate": 5.7880363872009016e-05, |
|
"loss": 1.6952, |
|
"step": 10488 |
|
}, |
|
{ |
|
"epoch": 46.715555555555554, |
|
"grad_norm": 7.237541198730469, |
|
"learning_rate": 5.771685349499288e-05, |
|
"loss": 1.676, |
|
"step": 10511 |
|
}, |
|
{ |
|
"epoch": 46.81777777777778, |
|
"grad_norm": 5.890578269958496, |
|
"learning_rate": 5.7553258534258756e-05, |
|
"loss": 1.6964, |
|
"step": 10534 |
|
}, |
|
{ |
|
"epoch": 46.92, |
|
"grad_norm": 6.47843074798584, |
|
"learning_rate": 5.7389580782955896e-05, |
|
"loss": 1.7098, |
|
"step": 10557 |
|
}, |
|
{ |
|
"epoch": 47.022222222222226, |
|
"grad_norm": 9.489853858947754, |
|
"learning_rate": 5.722582203514099e-05, |
|
"loss": 1.6894, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 47.12444444444444, |
|
"grad_norm": 5.722830295562744, |
|
"learning_rate": 5.7061984085758555e-05, |
|
"loss": 1.6463, |
|
"step": 10603 |
|
}, |
|
{ |
|
"epoch": 47.22666666666667, |
|
"grad_norm": 5.548519134521484, |
|
"learning_rate": 5.689806873062122e-05, |
|
"loss": 1.6358, |
|
"step": 10626 |
|
}, |
|
{ |
|
"epoch": 47.32888888888889, |
|
"grad_norm": 5.543103218078613, |
|
"learning_rate": 5.6734077766390023e-05, |
|
"loss": 1.6249, |
|
"step": 10649 |
|
}, |
|
{ |
|
"epoch": 47.431111111111115, |
|
"grad_norm": 7.334754467010498, |
|
"learning_rate": 5.6570012990554774e-05, |
|
"loss": 1.6144, |
|
"step": 10672 |
|
}, |
|
{ |
|
"epoch": 47.53333333333333, |
|
"grad_norm": 6.74175500869751, |
|
"learning_rate": 5.6405876201414334e-05, |
|
"loss": 1.6413, |
|
"step": 10695 |
|
}, |
|
{ |
|
"epoch": 47.635555555555555, |
|
"grad_norm": 8.000964164733887, |
|
"learning_rate": 5.624166919805686e-05, |
|
"loss": 1.6583, |
|
"step": 10718 |
|
}, |
|
{ |
|
"epoch": 47.73777777777778, |
|
"grad_norm": 6.7785797119140625, |
|
"learning_rate": 5.607739378034015e-05, |
|
"loss": 1.6346, |
|
"step": 10741 |
|
}, |
|
{ |
|
"epoch": 47.84, |
|
"grad_norm": 8.0484619140625, |
|
"learning_rate": 5.591305174887185e-05, |
|
"loss": 1.6615, |
|
"step": 10764 |
|
}, |
|
{ |
|
"epoch": 47.94222222222222, |
|
"grad_norm": 6.589325428009033, |
|
"learning_rate": 5.574864490498982e-05, |
|
"loss": 1.6556, |
|
"step": 10787 |
|
}, |
|
{ |
|
"epoch": 48.044444444444444, |
|
"grad_norm": 5.7148942947387695, |
|
"learning_rate": 5.558417505074226e-05, |
|
"loss": 1.6129, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 48.14666666666667, |
|
"grad_norm": 6.063688278198242, |
|
"learning_rate": 5.541964398886805e-05, |
|
"loss": 1.5707, |
|
"step": 10833 |
|
}, |
|
{ |
|
"epoch": 48.24888888888889, |
|
"grad_norm": 7.891332626342773, |
|
"learning_rate": 5.525505352277695e-05, |
|
"loss": 1.5966, |
|
"step": 10856 |
|
}, |
|
{ |
|
"epoch": 48.35111111111111, |
|
"grad_norm": 6.462911605834961, |
|
"learning_rate": 5.509040545652984e-05, |
|
"loss": 1.5979, |
|
"step": 10879 |
|
}, |
|
{ |
|
"epoch": 48.45333333333333, |
|
"grad_norm": 6.627693176269531, |
|
"learning_rate": 5.492570159481897e-05, |
|
"loss": 1.5835, |
|
"step": 10902 |
|
}, |
|
{ |
|
"epoch": 48.55555555555556, |
|
"grad_norm": 7.016481399536133, |
|
"learning_rate": 5.4760943742948126e-05, |
|
"loss": 1.6114, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 48.65777777777778, |
|
"grad_norm": 6.203521251678467, |
|
"learning_rate": 5.4596133706812925e-05, |
|
"loss": 1.6261, |
|
"step": 10948 |
|
}, |
|
{ |
|
"epoch": 48.76, |
|
"grad_norm": 8.625542640686035, |
|
"learning_rate": 5.443127329288092e-05, |
|
"loss": 1.6152, |
|
"step": 10971 |
|
}, |
|
{ |
|
"epoch": 48.86222222222222, |
|
"grad_norm": 8.934986114501953, |
|
"learning_rate": 5.426636430817189e-05, |
|
"loss": 1.6155, |
|
"step": 10994 |
|
}, |
|
{ |
|
"epoch": 48.964444444444446, |
|
"grad_norm": 6.330492973327637, |
|
"learning_rate": 5.4101408560237964e-05, |
|
"loss": 1.624, |
|
"step": 11017 |
|
}, |
|
{ |
|
"epoch": 49.06666666666667, |
|
"grad_norm": 7.745333671569824, |
|
"learning_rate": 5.393640785714386e-05, |
|
"loss": 1.5832, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 49.16888888888889, |
|
"grad_norm": 7.9969682693481445, |
|
"learning_rate": 5.377136400744701e-05, |
|
"loss": 1.5664, |
|
"step": 11063 |
|
}, |
|
{ |
|
"epoch": 49.27111111111111, |
|
"grad_norm": 6.262273788452148, |
|
"learning_rate": 5.3606278820177824e-05, |
|
"loss": 1.5464, |
|
"step": 11086 |
|
}, |
|
{ |
|
"epoch": 49.373333333333335, |
|
"grad_norm": 6.109494686126709, |
|
"learning_rate": 5.344115410481977e-05, |
|
"loss": 1.5242, |
|
"step": 11109 |
|
}, |
|
{ |
|
"epoch": 49.47555555555556, |
|
"grad_norm": 6.395167827606201, |
|
"learning_rate": 5.3275991671289594e-05, |
|
"loss": 1.5514, |
|
"step": 11132 |
|
}, |
|
{ |
|
"epoch": 49.577777777777776, |
|
"grad_norm": 8.812541961669922, |
|
"learning_rate": 5.311079332991748e-05, |
|
"loss": 1.527, |
|
"step": 11155 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"grad_norm": 8.040874481201172, |
|
"learning_rate": 5.294556089142716e-05, |
|
"loss": 1.5469, |
|
"step": 11178 |
|
}, |
|
{ |
|
"epoch": 49.782222222222224, |
|
"grad_norm": 6.935076713562012, |
|
"learning_rate": 5.278029616691613e-05, |
|
"loss": 1.566, |
|
"step": 11201 |
|
}, |
|
{ |
|
"epoch": 49.88444444444445, |
|
"grad_norm": 7.0155181884765625, |
|
"learning_rate": 5.261500096783577e-05, |
|
"loss": 1.5642, |
|
"step": 11224 |
|
}, |
|
{ |
|
"epoch": 49.986666666666665, |
|
"grad_norm": 8.399476051330566, |
|
"learning_rate": 5.2449677105971476e-05, |
|
"loss": 1.5664, |
|
"step": 11247 |
|
}, |
|
{ |
|
"epoch": 50.08888888888889, |
|
"grad_norm": 6.229375839233398, |
|
"learning_rate": 5.22843263934228e-05, |
|
"loss": 1.5044, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 50.19111111111111, |
|
"grad_norm": 8.590860366821289, |
|
"learning_rate": 5.211895064258365e-05, |
|
"loss": 1.5104, |
|
"step": 11293 |
|
}, |
|
{ |
|
"epoch": 50.29333333333334, |
|
"grad_norm": 6.563053607940674, |
|
"learning_rate": 5.195355166612234e-05, |
|
"loss": 1.5279, |
|
"step": 11316 |
|
}, |
|
{ |
|
"epoch": 50.39555555555555, |
|
"grad_norm": 6.139184474945068, |
|
"learning_rate": 5.178813127696175e-05, |
|
"loss": 1.5323, |
|
"step": 11339 |
|
}, |
|
{ |
|
"epoch": 50.49777777777778, |
|
"grad_norm": 6.862679958343506, |
|
"learning_rate": 5.162269128825949e-05, |
|
"loss": 1.526, |
|
"step": 11362 |
|
}, |
|
{ |
|
"epoch": 50.6, |
|
"grad_norm": 7.023072719573975, |
|
"learning_rate": 5.1457233513387994e-05, |
|
"loss": 1.5244, |
|
"step": 11385 |
|
}, |
|
{ |
|
"epoch": 50.702222222222225, |
|
"grad_norm": 6.219864368438721, |
|
"learning_rate": 5.1291759765914625e-05, |
|
"loss": 1.5333, |
|
"step": 11408 |
|
}, |
|
{ |
|
"epoch": 50.80444444444444, |
|
"grad_norm": 6.453531265258789, |
|
"learning_rate": 5.112627185958184e-05, |
|
"loss": 1.5319, |
|
"step": 11431 |
|
}, |
|
{ |
|
"epoch": 50.906666666666666, |
|
"grad_norm": 5.3879876136779785, |
|
"learning_rate": 5.096077160828728e-05, |
|
"loss": 1.5279, |
|
"step": 11454 |
|
}, |
|
{ |
|
"epoch": 51.00888888888889, |
|
"grad_norm": 6.174513339996338, |
|
"learning_rate": 5.079526082606394e-05, |
|
"loss": 1.5157, |
|
"step": 11477 |
|
}, |
|
{ |
|
"epoch": 51.111111111111114, |
|
"grad_norm": 8.612546920776367, |
|
"learning_rate": 5.062974132706016e-05, |
|
"loss": 1.4655, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 51.21333333333333, |
|
"grad_norm": 6.833427429199219, |
|
"learning_rate": 5.046421492551992e-05, |
|
"loss": 1.4723, |
|
"step": 11523 |
|
}, |
|
{ |
|
"epoch": 51.315555555555555, |
|
"grad_norm": 6.863546371459961, |
|
"learning_rate": 5.029868343576276e-05, |
|
"loss": 1.4848, |
|
"step": 11546 |
|
}, |
|
{ |
|
"epoch": 51.41777777777778, |
|
"grad_norm": 7.937037467956543, |
|
"learning_rate": 5.013314867216407e-05, |
|
"loss": 1.4613, |
|
"step": 11569 |
|
}, |
|
{ |
|
"epoch": 51.52, |
|
"grad_norm": 6.1333699226379395, |
|
"learning_rate": 4.996761244913508e-05, |
|
"loss": 1.478, |
|
"step": 11592 |
|
}, |
|
{ |
|
"epoch": 51.62222222222222, |
|
"grad_norm": 9.617277145385742, |
|
"learning_rate": 4.980207658110305e-05, |
|
"loss": 1.4705, |
|
"step": 11615 |
|
}, |
|
{ |
|
"epoch": 51.724444444444444, |
|
"grad_norm": 6.086880207061768, |
|
"learning_rate": 4.963654288249134e-05, |
|
"loss": 1.4673, |
|
"step": 11638 |
|
}, |
|
{ |
|
"epoch": 51.82666666666667, |
|
"grad_norm": 5.924047470092773, |
|
"learning_rate": 4.9471013167699476e-05, |
|
"loss": 1.4855, |
|
"step": 11661 |
|
}, |
|
{ |
|
"epoch": 51.92888888888889, |
|
"grad_norm": 5.790915489196777, |
|
"learning_rate": 4.930548925108342e-05, |
|
"loss": 1.4879, |
|
"step": 11684 |
|
}, |
|
{ |
|
"epoch": 52.03111111111111, |
|
"grad_norm": 10.055533409118652, |
|
"learning_rate": 4.913997294693547e-05, |
|
"loss": 1.4776, |
|
"step": 11707 |
|
}, |
|
{ |
|
"epoch": 52.13333333333333, |
|
"grad_norm": 5.994448661804199, |
|
"learning_rate": 4.8974466069464586e-05, |
|
"loss": 1.4281, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 52.23555555555556, |
|
"grad_norm": 6.34792947769165, |
|
"learning_rate": 4.880897043277632e-05, |
|
"loss": 1.4232, |
|
"step": 11753 |
|
}, |
|
{ |
|
"epoch": 52.33777777777778, |
|
"grad_norm": 6.8388285636901855, |
|
"learning_rate": 4.8643487850853093e-05, |
|
"loss": 1.4415, |
|
"step": 11776 |
|
}, |
|
{ |
|
"epoch": 52.44, |
|
"grad_norm": 6.194220542907715, |
|
"learning_rate": 4.847802013753414e-05, |
|
"loss": 1.4363, |
|
"step": 11799 |
|
}, |
|
{ |
|
"epoch": 52.54222222222222, |
|
"grad_norm": 7.254870891571045, |
|
"learning_rate": 4.831256910649582e-05, |
|
"loss": 1.445, |
|
"step": 11822 |
|
}, |
|
{ |
|
"epoch": 52.644444444444446, |
|
"grad_norm": 6.243785858154297, |
|
"learning_rate": 4.814713657123158e-05, |
|
"loss": 1.4399, |
|
"step": 11845 |
|
}, |
|
{ |
|
"epoch": 52.74666666666667, |
|
"grad_norm": 7.5753607749938965, |
|
"learning_rate": 4.798172434503213e-05, |
|
"loss": 1.4521, |
|
"step": 11868 |
|
}, |
|
{ |
|
"epoch": 52.84888888888889, |
|
"grad_norm": 6.7162861824035645, |
|
"learning_rate": 4.781633424096562e-05, |
|
"loss": 1.4446, |
|
"step": 11891 |
|
}, |
|
{ |
|
"epoch": 52.95111111111111, |
|
"grad_norm": 8.405692100524902, |
|
"learning_rate": 4.765096807185767e-05, |
|
"loss": 1.4712, |
|
"step": 11914 |
|
}, |
|
{ |
|
"epoch": 53.053333333333335, |
|
"grad_norm": 5.832555294036865, |
|
"learning_rate": 4.748562765027162e-05, |
|
"loss": 1.4306, |
|
"step": 11937 |
|
}, |
|
{ |
|
"epoch": 53.15555555555556, |
|
"grad_norm": 5.443018436431885, |
|
"learning_rate": 4.7320314788488496e-05, |
|
"loss": 1.3977, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 53.257777777777775, |
|
"grad_norm": 6.506402969360352, |
|
"learning_rate": 4.715503129848733e-05, |
|
"loss": 1.419, |
|
"step": 11983 |
|
}, |
|
{ |
|
"epoch": 53.36, |
|
"grad_norm": 7.063472747802734, |
|
"learning_rate": 4.69897789919252e-05, |
|
"loss": 1.4188, |
|
"step": 12006 |
|
}, |
|
{ |
|
"epoch": 53.46222222222222, |
|
"grad_norm": 6.49618673324585, |
|
"learning_rate": 4.682455968011731e-05, |
|
"loss": 1.421, |
|
"step": 12029 |
|
}, |
|
{ |
|
"epoch": 53.56444444444445, |
|
"grad_norm": 7.384080410003662, |
|
"learning_rate": 4.6659375174017316e-05, |
|
"loss": 1.4157, |
|
"step": 12052 |
|
}, |
|
{ |
|
"epoch": 53.666666666666664, |
|
"grad_norm": 6.499640464782715, |
|
"learning_rate": 4.6494227284197294e-05, |
|
"loss": 1.3914, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 53.76888888888889, |
|
"grad_norm": 8.480474472045898, |
|
"learning_rate": 4.632911782082804e-05, |
|
"loss": 1.387, |
|
"step": 12098 |
|
}, |
|
{ |
|
"epoch": 53.87111111111111, |
|
"grad_norm": 7.255825519561768, |
|
"learning_rate": 4.616404859365907e-05, |
|
"loss": 1.4147, |
|
"step": 12121 |
|
}, |
|
{ |
|
"epoch": 53.973333333333336, |
|
"grad_norm": 5.0700249671936035, |
|
"learning_rate": 4.599902141199897e-05, |
|
"loss": 1.389, |
|
"step": 12144 |
|
}, |
|
{ |
|
"epoch": 54.07555555555555, |
|
"grad_norm": 5.912162780761719, |
|
"learning_rate": 4.583403808469542e-05, |
|
"loss": 1.3623, |
|
"step": 12167 |
|
}, |
|
{ |
|
"epoch": 54.17777777777778, |
|
"grad_norm": 5.70848274230957, |
|
"learning_rate": 4.566910042011539e-05, |
|
"loss": 1.3513, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 54.28, |
|
"grad_norm": 8.14360523223877, |
|
"learning_rate": 4.550421022612542e-05, |
|
"loss": 1.3729, |
|
"step": 12213 |
|
}, |
|
{ |
|
"epoch": 54.382222222222225, |
|
"grad_norm": 5.549880027770996, |
|
"learning_rate": 4.5339369310071654e-05, |
|
"loss": 1.3797, |
|
"step": 12236 |
|
}, |
|
{ |
|
"epoch": 54.48444444444444, |
|
"grad_norm": 6.507516384124756, |
|
"learning_rate": 4.517457947876018e-05, |
|
"loss": 1.3824, |
|
"step": 12259 |
|
}, |
|
{ |
|
"epoch": 54.586666666666666, |
|
"grad_norm": 6.413192272186279, |
|
"learning_rate": 4.500984253843707e-05, |
|
"loss": 1.3718, |
|
"step": 12282 |
|
}, |
|
{ |
|
"epoch": 54.68888888888889, |
|
"grad_norm": 6.168595790863037, |
|
"learning_rate": 4.484516029476873e-05, |
|
"loss": 1.3726, |
|
"step": 12305 |
|
}, |
|
{ |
|
"epoch": 54.791111111111114, |
|
"grad_norm": 6.176178932189941, |
|
"learning_rate": 4.4680534552821996e-05, |
|
"loss": 1.3776, |
|
"step": 12328 |
|
}, |
|
{ |
|
"epoch": 54.89333333333333, |
|
"grad_norm": 6.18988561630249, |
|
"learning_rate": 4.45159671170444e-05, |
|
"loss": 1.3764, |
|
"step": 12351 |
|
}, |
|
{ |
|
"epoch": 54.995555555555555, |
|
"grad_norm": 6.998044490814209, |
|
"learning_rate": 4.4351459791244435e-05, |
|
"loss": 1.375, |
|
"step": 12374 |
|
}, |
|
{ |
|
"epoch": 55.09777777777778, |
|
"grad_norm": 6.069551467895508, |
|
"learning_rate": 4.418701437857166e-05, |
|
"loss": 1.3324, |
|
"step": 12397 |
|
}, |
|
{ |
|
"epoch": 55.2, |
|
"grad_norm": 6.534727096557617, |
|
"learning_rate": 4.402263268149706e-05, |
|
"loss": 1.3301, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 55.30222222222222, |
|
"grad_norm": 6.363480567932129, |
|
"learning_rate": 4.385831650179322e-05, |
|
"loss": 1.3524, |
|
"step": 12443 |
|
}, |
|
{ |
|
"epoch": 55.404444444444444, |
|
"grad_norm": 6.515593528747559, |
|
"learning_rate": 4.3694067640514614e-05, |
|
"loss": 1.3353, |
|
"step": 12466 |
|
}, |
|
{ |
|
"epoch": 55.50666666666667, |
|
"grad_norm": 6.400863170623779, |
|
"learning_rate": 4.352988789797781e-05, |
|
"loss": 1.3292, |
|
"step": 12489 |
|
}, |
|
{ |
|
"epoch": 55.60888888888889, |
|
"grad_norm": 6.897211074829102, |
|
"learning_rate": 4.336577907374181e-05, |
|
"loss": 1.3591, |
|
"step": 12512 |
|
}, |
|
{ |
|
"epoch": 55.71111111111111, |
|
"grad_norm": 7.05909538269043, |
|
"learning_rate": 4.320174296658827e-05, |
|
"loss": 1.3636, |
|
"step": 12535 |
|
}, |
|
{ |
|
"epoch": 55.81333333333333, |
|
"grad_norm": 5.776651859283447, |
|
"learning_rate": 4.303778137450178e-05, |
|
"loss": 1.3475, |
|
"step": 12558 |
|
}, |
|
{ |
|
"epoch": 55.91555555555556, |
|
"grad_norm": 6.0230021476745605, |
|
"learning_rate": 4.287389609465022e-05, |
|
"loss": 1.3681, |
|
"step": 12581 |
|
}, |
|
{ |
|
"epoch": 56.01777777777778, |
|
"grad_norm": 6.32971715927124, |
|
"learning_rate": 4.271008892336497e-05, |
|
"loss": 1.3458, |
|
"step": 12604 |
|
}, |
|
{ |
|
"epoch": 56.12, |
|
"grad_norm": 8.626049041748047, |
|
"learning_rate": 4.2546361656121346e-05, |
|
"loss": 1.2829, |
|
"step": 12627 |
|
}, |
|
{ |
|
"epoch": 56.22222222222222, |
|
"grad_norm": 6.015228748321533, |
|
"learning_rate": 4.238271608751874e-05, |
|
"loss": 1.2816, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 56.324444444444445, |
|
"grad_norm": 5.704399108886719, |
|
"learning_rate": 4.221915401126113e-05, |
|
"loss": 1.3026, |
|
"step": 12673 |
|
}, |
|
{ |
|
"epoch": 56.42666666666667, |
|
"grad_norm": 5.911527156829834, |
|
"learning_rate": 4.205567722013733e-05, |
|
"loss": 1.2857, |
|
"step": 12696 |
|
}, |
|
{ |
|
"epoch": 56.528888888888886, |
|
"grad_norm": 6.171534538269043, |
|
"learning_rate": 4.18922875060013e-05, |
|
"loss": 1.2873, |
|
"step": 12719 |
|
}, |
|
{ |
|
"epoch": 56.63111111111111, |
|
"grad_norm": 7.097690105438232, |
|
"learning_rate": 4.1728986659752636e-05, |
|
"loss": 1.3012, |
|
"step": 12742 |
|
}, |
|
{ |
|
"epoch": 56.733333333333334, |
|
"grad_norm": 5.469725608825684, |
|
"learning_rate": 4.156577647131679e-05, |
|
"loss": 1.2895, |
|
"step": 12765 |
|
}, |
|
{ |
|
"epoch": 56.83555555555556, |
|
"grad_norm": 6.386800765991211, |
|
"learning_rate": 4.1402658729625596e-05, |
|
"loss": 1.3026, |
|
"step": 12788 |
|
}, |
|
{ |
|
"epoch": 56.937777777777775, |
|
"grad_norm": 5.86681604385376, |
|
"learning_rate": 4.1239635222597494e-05, |
|
"loss": 1.3072, |
|
"step": 12811 |
|
}, |
|
{ |
|
"epoch": 57.04, |
|
"grad_norm": 6.062530517578125, |
|
"learning_rate": 4.107670773711812e-05, |
|
"loss": 1.284, |
|
"step": 12834 |
|
}, |
|
{ |
|
"epoch": 57.14222222222222, |
|
"grad_norm": 5.922295570373535, |
|
"learning_rate": 4.091387805902058e-05, |
|
"loss": 1.2621, |
|
"step": 12857 |
|
}, |
|
{ |
|
"epoch": 57.24444444444445, |
|
"grad_norm": 5.438425064086914, |
|
"learning_rate": 4.075114797306589e-05, |
|
"loss": 1.264, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 57.346666666666664, |
|
"grad_norm": 7.964729309082031, |
|
"learning_rate": 4.058851926292353e-05, |
|
"loss": 1.2781, |
|
"step": 12903 |
|
}, |
|
{ |
|
"epoch": 57.44888888888889, |
|
"grad_norm": 6.432003498077393, |
|
"learning_rate": 4.042599371115172e-05, |
|
"loss": 1.2787, |
|
"step": 12926 |
|
}, |
|
{ |
|
"epoch": 57.55111111111111, |
|
"grad_norm": 5.485337257385254, |
|
"learning_rate": 4.026357309917806e-05, |
|
"loss": 1.2663, |
|
"step": 12949 |
|
}, |
|
{ |
|
"epoch": 57.653333333333336, |
|
"grad_norm": 6.874802112579346, |
|
"learning_rate": 4.010125920727982e-05, |
|
"loss": 1.2733, |
|
"step": 12972 |
|
}, |
|
{ |
|
"epoch": 57.75555555555555, |
|
"grad_norm": 5.767955303192139, |
|
"learning_rate": 3.993905381456462e-05, |
|
"loss": 1.2763, |
|
"step": 12995 |
|
}, |
|
{ |
|
"epoch": 57.85777777777778, |
|
"grad_norm": 5.2443389892578125, |
|
"learning_rate": 3.977695869895073e-05, |
|
"loss": 1.273, |
|
"step": 13018 |
|
}, |
|
{ |
|
"epoch": 57.96, |
|
"grad_norm": 7.763814926147461, |
|
"learning_rate": 3.961497563714774e-05, |
|
"loss": 1.2851, |
|
"step": 13041 |
|
}, |
|
{ |
|
"epoch": 58.062222222222225, |
|
"grad_norm": 6.231062412261963, |
|
"learning_rate": 3.945310640463705e-05, |
|
"loss": 1.2581, |
|
"step": 13064 |
|
}, |
|
{ |
|
"epoch": 58.16444444444444, |
|
"grad_norm": 5.801052093505859, |
|
"learning_rate": 3.9291352775652325e-05, |
|
"loss": 1.2376, |
|
"step": 13087 |
|
}, |
|
{ |
|
"epoch": 58.266666666666666, |
|
"grad_norm": 8.022377967834473, |
|
"learning_rate": 3.9129716523160165e-05, |
|
"loss": 1.2403, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 58.36888888888889, |
|
"grad_norm": 6.449449062347412, |
|
"learning_rate": 3.8968199418840575e-05, |
|
"loss": 1.2353, |
|
"step": 13133 |
|
}, |
|
{ |
|
"epoch": 58.471111111111114, |
|
"grad_norm": 5.934969902038574, |
|
"learning_rate": 3.880680323306765e-05, |
|
"loss": 1.2575, |
|
"step": 13156 |
|
}, |
|
{ |
|
"epoch": 58.57333333333333, |
|
"grad_norm": 6.265482425689697, |
|
"learning_rate": 3.8645529734890014e-05, |
|
"loss": 1.247, |
|
"step": 13179 |
|
}, |
|
{ |
|
"epoch": 58.675555555555555, |
|
"grad_norm": 5.975387096405029, |
|
"learning_rate": 3.8484380692011605e-05, |
|
"loss": 1.2634, |
|
"step": 13202 |
|
}, |
|
{ |
|
"epoch": 58.77777777777778, |
|
"grad_norm": 6.401468753814697, |
|
"learning_rate": 3.83233578707722e-05, |
|
"loss": 1.244, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 58.88, |
|
"grad_norm": 5.331010341644287, |
|
"learning_rate": 3.816246303612802e-05, |
|
"loss": 1.2459, |
|
"step": 13248 |
|
}, |
|
{ |
|
"epoch": 58.98222222222222, |
|
"grad_norm": 5.550204277038574, |
|
"learning_rate": 3.800169795163252e-05, |
|
"loss": 1.2541, |
|
"step": 13271 |
|
}, |
|
{ |
|
"epoch": 59.08444444444444, |
|
"grad_norm": 5.241280555725098, |
|
"learning_rate": 3.7841064379416903e-05, |
|
"loss": 1.2155, |
|
"step": 13294 |
|
}, |
|
{ |
|
"epoch": 59.18666666666667, |
|
"grad_norm": 6.312388896942139, |
|
"learning_rate": 3.768056408017094e-05, |
|
"loss": 1.2055, |
|
"step": 13317 |
|
}, |
|
{ |
|
"epoch": 59.28888888888889, |
|
"grad_norm": 5.525976181030273, |
|
"learning_rate": 3.752019881312354e-05, |
|
"loss": 1.211, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 59.39111111111111, |
|
"grad_norm": 6.092748641967773, |
|
"learning_rate": 3.735997033602361e-05, |
|
"loss": 1.2133, |
|
"step": 13363 |
|
}, |
|
{ |
|
"epoch": 59.49333333333333, |
|
"grad_norm": 5.471757888793945, |
|
"learning_rate": 3.719988040512067e-05, |
|
"loss": 1.2267, |
|
"step": 13386 |
|
}, |
|
{ |
|
"epoch": 59.595555555555556, |
|
"grad_norm": 6.422407150268555, |
|
"learning_rate": 3.703993077514563e-05, |
|
"loss": 1.2223, |
|
"step": 13409 |
|
}, |
|
{ |
|
"epoch": 59.69777777777778, |
|
"grad_norm": 5.488748073577881, |
|
"learning_rate": 3.6880123199291635e-05, |
|
"loss": 1.2217, |
|
"step": 13432 |
|
}, |
|
{ |
|
"epoch": 59.8, |
|
"grad_norm": 5.826624393463135, |
|
"learning_rate": 3.672045942919474e-05, |
|
"loss": 1.2216, |
|
"step": 13455 |
|
}, |
|
{ |
|
"epoch": 59.90222222222222, |
|
"grad_norm": 5.7313008308410645, |
|
"learning_rate": 3.656094121491479e-05, |
|
"loss": 1.2271, |
|
"step": 13478 |
|
}, |
|
{ |
|
"epoch": 60.004444444444445, |
|
"grad_norm": 7.073070526123047, |
|
"learning_rate": 3.6401570304916166e-05, |
|
"loss": 1.222, |
|
"step": 13501 |
|
}, |
|
{ |
|
"epoch": 60.10666666666667, |
|
"grad_norm": 5.049999713897705, |
|
"learning_rate": 3.624234844604869e-05, |
|
"loss": 1.1695, |
|
"step": 13524 |
|
}, |
|
{ |
|
"epoch": 60.208888888888886, |
|
"grad_norm": 5.1560211181640625, |
|
"learning_rate": 3.6083277383528466e-05, |
|
"loss": 1.1792, |
|
"step": 13547 |
|
}, |
|
{ |
|
"epoch": 60.31111111111111, |
|
"grad_norm": 5.553138256072998, |
|
"learning_rate": 3.592435886091867e-05, |
|
"loss": 1.1853, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 60.413333333333334, |
|
"grad_norm": 5.489965438842773, |
|
"learning_rate": 3.576559462011057e-05, |
|
"loss": 1.1918, |
|
"step": 13593 |
|
}, |
|
{ |
|
"epoch": 60.51555555555556, |
|
"grad_norm": 6.636351108551025, |
|
"learning_rate": 3.5606986401304324e-05, |
|
"loss": 1.2002, |
|
"step": 13616 |
|
}, |
|
{ |
|
"epoch": 60.617777777777775, |
|
"grad_norm": 8.49821662902832, |
|
"learning_rate": 3.544853594298997e-05, |
|
"loss": 1.2062, |
|
"step": 13639 |
|
}, |
|
{ |
|
"epoch": 60.72, |
|
"grad_norm": 5.866752624511719, |
|
"learning_rate": 3.529024498192832e-05, |
|
"loss": 1.205, |
|
"step": 13662 |
|
}, |
|
{ |
|
"epoch": 60.82222222222222, |
|
"grad_norm": 12.07309627532959, |
|
"learning_rate": 3.5132115253132005e-05, |
|
"loss": 1.2112, |
|
"step": 13685 |
|
}, |
|
{ |
|
"epoch": 60.92444444444445, |
|
"grad_norm": 7.421104431152344, |
|
"learning_rate": 3.4974148489846315e-05, |
|
"loss": 1.2229, |
|
"step": 13708 |
|
}, |
|
{ |
|
"epoch": 61.026666666666664, |
|
"grad_norm": 5.546532154083252, |
|
"learning_rate": 3.4816346423530385e-05, |
|
"loss": 1.1952, |
|
"step": 13731 |
|
}, |
|
{ |
|
"epoch": 61.12888888888889, |
|
"grad_norm": 5.055679798126221, |
|
"learning_rate": 3.465871078383809e-05, |
|
"loss": 1.1628, |
|
"step": 13754 |
|
}, |
|
{ |
|
"epoch": 61.23111111111111, |
|
"grad_norm": 6.14479923248291, |
|
"learning_rate": 3.4501243298599055e-05, |
|
"loss": 1.1767, |
|
"step": 13777 |
|
}, |
|
{ |
|
"epoch": 61.333333333333336, |
|
"grad_norm": 5.632229328155518, |
|
"learning_rate": 3.434394569379988e-05, |
|
"loss": 1.179, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 61.43555555555555, |
|
"grad_norm": 5.1467671394348145, |
|
"learning_rate": 3.4186819693565046e-05, |
|
"loss": 1.1745, |
|
"step": 13823 |
|
}, |
|
{ |
|
"epoch": 61.53777777777778, |
|
"grad_norm": 5.162554740905762, |
|
"learning_rate": 3.4029867020138155e-05, |
|
"loss": 1.1672, |
|
"step": 13846 |
|
}, |
|
{ |
|
"epoch": 61.64, |
|
"grad_norm": 5.325419902801514, |
|
"learning_rate": 3.387308939386291e-05, |
|
"loss": 1.1793, |
|
"step": 13869 |
|
}, |
|
{ |
|
"epoch": 61.742222222222225, |
|
"grad_norm": 5.7772626876831055, |
|
"learning_rate": 3.371648853316442e-05, |
|
"loss": 1.1706, |
|
"step": 13892 |
|
}, |
|
{ |
|
"epoch": 61.84444444444444, |
|
"grad_norm": 7.251054763793945, |
|
"learning_rate": 3.356006615453025e-05, |
|
"loss": 1.1572, |
|
"step": 13915 |
|
}, |
|
{ |
|
"epoch": 61.946666666666665, |
|
"grad_norm": 6.169683933258057, |
|
"learning_rate": 3.340382397249159e-05, |
|
"loss": 1.1553, |
|
"step": 13938 |
|
}, |
|
{ |
|
"epoch": 62.04888888888889, |
|
"grad_norm": 6.773545742034912, |
|
"learning_rate": 3.324776369960461e-05, |
|
"loss": 1.1603, |
|
"step": 13961 |
|
}, |
|
{ |
|
"epoch": 62.15111111111111, |
|
"grad_norm": 6.104127407073975, |
|
"learning_rate": 3.309188704643149e-05, |
|
"loss": 1.1209, |
|
"step": 13984 |
|
}, |
|
{ |
|
"epoch": 62.25333333333333, |
|
"grad_norm": 5.433740615844727, |
|
"learning_rate": 3.2936195721521866e-05, |
|
"loss": 1.1373, |
|
"step": 14007 |
|
}, |
|
{ |
|
"epoch": 62.355555555555554, |
|
"grad_norm": 5.472240924835205, |
|
"learning_rate": 3.2780691431393926e-05, |
|
"loss": 1.143, |
|
"step": 14030 |
|
}, |
|
{ |
|
"epoch": 62.45777777777778, |
|
"grad_norm": 5.382284164428711, |
|
"learning_rate": 3.2625375880515854e-05, |
|
"loss": 1.1471, |
|
"step": 14053 |
|
}, |
|
{ |
|
"epoch": 62.56, |
|
"grad_norm": 5.667013168334961, |
|
"learning_rate": 3.2470250771287036e-05, |
|
"loss": 1.1391, |
|
"step": 14076 |
|
}, |
|
{ |
|
"epoch": 62.66222222222222, |
|
"grad_norm": 5.519725322723389, |
|
"learning_rate": 3.231531780401943e-05, |
|
"loss": 1.1335, |
|
"step": 14099 |
|
}, |
|
{ |
|
"epoch": 62.76444444444444, |
|
"grad_norm": 5.530640125274658, |
|
"learning_rate": 3.2160578676919016e-05, |
|
"loss": 1.1386, |
|
"step": 14122 |
|
}, |
|
{ |
|
"epoch": 62.86666666666667, |
|
"grad_norm": 6.683435440063477, |
|
"learning_rate": 3.200603508606703e-05, |
|
"loss": 1.1362, |
|
"step": 14145 |
|
}, |
|
{ |
|
"epoch": 62.96888888888889, |
|
"grad_norm": 5.929420471191406, |
|
"learning_rate": 3.185168872540153e-05, |
|
"loss": 1.1455, |
|
"step": 14168 |
|
}, |
|
{ |
|
"epoch": 63.07111111111111, |
|
"grad_norm": 6.305390357971191, |
|
"learning_rate": 3.169754128669866e-05, |
|
"loss": 1.1242, |
|
"step": 14191 |
|
}, |
|
{ |
|
"epoch": 63.17333333333333, |
|
"grad_norm": 6.4048542976379395, |
|
"learning_rate": 3.154359445955429e-05, |
|
"loss": 1.1263, |
|
"step": 14214 |
|
}, |
|
{ |
|
"epoch": 63.275555555555556, |
|
"grad_norm": 5.409482002258301, |
|
"learning_rate": 3.138984993136535e-05, |
|
"loss": 1.1052, |
|
"step": 14237 |
|
}, |
|
{ |
|
"epoch": 63.37777777777778, |
|
"grad_norm": 5.47636079788208, |
|
"learning_rate": 3.12363093873114e-05, |
|
"loss": 1.1196, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 63.48, |
|
"grad_norm": 5.092154026031494, |
|
"learning_rate": 3.108297451033616e-05, |
|
"loss": 1.1193, |
|
"step": 14283 |
|
}, |
|
{ |
|
"epoch": 63.58222222222222, |
|
"grad_norm": 5.453930377960205, |
|
"learning_rate": 3.092984698112904e-05, |
|
"loss": 1.1182, |
|
"step": 14306 |
|
}, |
|
{ |
|
"epoch": 63.684444444444445, |
|
"grad_norm": 6.511165618896484, |
|
"learning_rate": 3.0776928478106754e-05, |
|
"loss": 1.1295, |
|
"step": 14329 |
|
}, |
|
{ |
|
"epoch": 63.78666666666667, |
|
"grad_norm": 5.347112655639648, |
|
"learning_rate": 3.062422067739485e-05, |
|
"loss": 1.1239, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 63.888888888888886, |
|
"grad_norm": 5.500729084014893, |
|
"learning_rate": 3.0471725252809458e-05, |
|
"loss": 1.1227, |
|
"step": 14375 |
|
}, |
|
{ |
|
"epoch": 63.99111111111111, |
|
"grad_norm": 5.913949489593506, |
|
"learning_rate": 3.0319443875838794e-05, |
|
"loss": 1.1306, |
|
"step": 14398 |
|
}, |
|
{ |
|
"epoch": 64.09333333333333, |
|
"grad_norm": 5.112490177154541, |
|
"learning_rate": 3.0167378215624974e-05, |
|
"loss": 1.0993, |
|
"step": 14421 |
|
}, |
|
{ |
|
"epoch": 64.19555555555556, |
|
"grad_norm": 5.541341304779053, |
|
"learning_rate": 3.0015529938945668e-05, |
|
"loss": 1.0976, |
|
"step": 14444 |
|
}, |
|
{ |
|
"epoch": 64.29777777777778, |
|
"grad_norm": 5.937663555145264, |
|
"learning_rate": 2.9863900710195758e-05, |
|
"loss": 1.0953, |
|
"step": 14467 |
|
}, |
|
{ |
|
"epoch": 64.4, |
|
"grad_norm": 5.4565558433532715, |
|
"learning_rate": 2.9712492191369244e-05, |
|
"loss": 1.0998, |
|
"step": 14490 |
|
}, |
|
{ |
|
"epoch": 64.50222222222222, |
|
"grad_norm": 6.276011943817139, |
|
"learning_rate": 2.956130604204089e-05, |
|
"loss": 1.1113, |
|
"step": 14513 |
|
}, |
|
{ |
|
"epoch": 64.60444444444444, |
|
"grad_norm": 5.444122791290283, |
|
"learning_rate": 2.9410343919348127e-05, |
|
"loss": 1.108, |
|
"step": 14536 |
|
}, |
|
{ |
|
"epoch": 64.70666666666666, |
|
"grad_norm": 5.791774749755859, |
|
"learning_rate": 2.9259607477972794e-05, |
|
"loss": 1.1149, |
|
"step": 14559 |
|
}, |
|
{ |
|
"epoch": 64.80888888888889, |
|
"grad_norm": 6.028242588043213, |
|
"learning_rate": 2.9109098370123132e-05, |
|
"loss": 1.1236, |
|
"step": 14582 |
|
}, |
|
{ |
|
"epoch": 64.91111111111111, |
|
"grad_norm": 6.835079193115234, |
|
"learning_rate": 2.8958818245515533e-05, |
|
"loss": 1.1148, |
|
"step": 14605 |
|
}, |
|
{ |
|
"epoch": 65.01333333333334, |
|
"grad_norm": 5.5959792137146, |
|
"learning_rate": 2.8808768751356564e-05, |
|
"loss": 1.1054, |
|
"step": 14628 |
|
}, |
|
{ |
|
"epoch": 65.11555555555556, |
|
"grad_norm": 5.705920219421387, |
|
"learning_rate": 2.865895153232489e-05, |
|
"loss": 1.0824, |
|
"step": 14651 |
|
}, |
|
{ |
|
"epoch": 65.21777777777778, |
|
"grad_norm": 4.9849934577941895, |
|
"learning_rate": 2.8509368230553157e-05, |
|
"loss": 1.077, |
|
"step": 14674 |
|
}, |
|
{ |
|
"epoch": 65.32, |
|
"grad_norm": 5.702665328979492, |
|
"learning_rate": 2.8360020485610163e-05, |
|
"loss": 1.0514, |
|
"step": 14697 |
|
}, |
|
{ |
|
"epoch": 65.42222222222222, |
|
"grad_norm": 5.4493207931518555, |
|
"learning_rate": 2.8210909934482678e-05, |
|
"loss": 1.0653, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 65.52444444444444, |
|
"grad_norm": 5.684943199157715, |
|
"learning_rate": 2.8062038211557728e-05, |
|
"loss": 1.0641, |
|
"step": 14743 |
|
}, |
|
{ |
|
"epoch": 65.62666666666667, |
|
"grad_norm": 5.757254123687744, |
|
"learning_rate": 2.791340694860446e-05, |
|
"loss": 1.0754, |
|
"step": 14766 |
|
}, |
|
{ |
|
"epoch": 65.72888888888889, |
|
"grad_norm": 5.588274955749512, |
|
"learning_rate": 2.776501777475644e-05, |
|
"loss": 1.0768, |
|
"step": 14789 |
|
}, |
|
{ |
|
"epoch": 65.83111111111111, |
|
"grad_norm": 5.547431945800781, |
|
"learning_rate": 2.7616872316493708e-05, |
|
"loss": 1.078, |
|
"step": 14812 |
|
}, |
|
{ |
|
"epoch": 65.93333333333334, |
|
"grad_norm": 5.201080322265625, |
|
"learning_rate": 2.7468972197624897e-05, |
|
"loss": 1.0824, |
|
"step": 14835 |
|
}, |
|
{ |
|
"epoch": 66.03555555555556, |
|
"grad_norm": 6.8083271980285645, |
|
"learning_rate": 2.7321319039269576e-05, |
|
"loss": 1.07, |
|
"step": 14858 |
|
}, |
|
{ |
|
"epoch": 66.13777777777777, |
|
"grad_norm": 6.262781620025635, |
|
"learning_rate": 2.7173914459840342e-05, |
|
"loss": 1.0395, |
|
"step": 14881 |
|
}, |
|
{ |
|
"epoch": 66.24, |
|
"grad_norm": 5.109470844268799, |
|
"learning_rate": 2.7026760075025192e-05, |
|
"loss": 1.0467, |
|
"step": 14904 |
|
}, |
|
{ |
|
"epoch": 66.34222222222222, |
|
"grad_norm": 5.397584915161133, |
|
"learning_rate": 2.6879857497769712e-05, |
|
"loss": 1.0531, |
|
"step": 14927 |
|
}, |
|
{ |
|
"epoch": 66.44444444444444, |
|
"grad_norm": 5.602553844451904, |
|
"learning_rate": 2.6733208338259486e-05, |
|
"loss": 1.045, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 66.54666666666667, |
|
"grad_norm": 5.551428318023682, |
|
"learning_rate": 2.6586814203902422e-05, |
|
"loss": 1.042, |
|
"step": 14973 |
|
}, |
|
{ |
|
"epoch": 66.64888888888889, |
|
"grad_norm": 5.80933952331543, |
|
"learning_rate": 2.6440676699311062e-05, |
|
"loss": 1.0555, |
|
"step": 14996 |
|
}, |
|
{ |
|
"epoch": 66.75111111111111, |
|
"grad_norm": 5.058752536773682, |
|
"learning_rate": 2.6294797426285112e-05, |
|
"loss": 1.0507, |
|
"step": 15019 |
|
}, |
|
{ |
|
"epoch": 66.85333333333334, |
|
"grad_norm": 7.067930221557617, |
|
"learning_rate": 2.6149177983793783e-05, |
|
"loss": 1.0599, |
|
"step": 15042 |
|
}, |
|
{ |
|
"epoch": 66.95555555555555, |
|
"grad_norm": 5.901451587677002, |
|
"learning_rate": 2.6003819967958344e-05, |
|
"loss": 1.0527, |
|
"step": 15065 |
|
}, |
|
{ |
|
"epoch": 67.05777777777777, |
|
"grad_norm": 5.727104663848877, |
|
"learning_rate": 2.5858724972034555e-05, |
|
"loss": 1.0395, |
|
"step": 15088 |
|
}, |
|
{ |
|
"epoch": 67.16, |
|
"grad_norm": 7.644411563873291, |
|
"learning_rate": 2.5713894586395283e-05, |
|
"loss": 1.0326, |
|
"step": 15111 |
|
}, |
|
{ |
|
"epoch": 67.26222222222222, |
|
"grad_norm": 4.788581848144531, |
|
"learning_rate": 2.5569330398512957e-05, |
|
"loss": 1.0388, |
|
"step": 15134 |
|
}, |
|
{ |
|
"epoch": 67.36444444444444, |
|
"grad_norm": 4.921880722045898, |
|
"learning_rate": 2.5425033992942316e-05, |
|
"loss": 1.0413, |
|
"step": 15157 |
|
}, |
|
{ |
|
"epoch": 67.46666666666667, |
|
"grad_norm": 5.7385735511779785, |
|
"learning_rate": 2.5281006951302934e-05, |
|
"loss": 1.0328, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 67.56888888888889, |
|
"grad_norm": 5.9198689460754395, |
|
"learning_rate": 2.5137250852261862e-05, |
|
"loss": 1.0416, |
|
"step": 15203 |
|
}, |
|
{ |
|
"epoch": 67.67111111111112, |
|
"grad_norm": 5.01896858215332, |
|
"learning_rate": 2.499376727151646e-05, |
|
"loss": 1.0455, |
|
"step": 15226 |
|
}, |
|
{ |
|
"epoch": 67.77333333333333, |
|
"grad_norm": 5.580973148345947, |
|
"learning_rate": 2.485055778177696e-05, |
|
"loss": 1.0487, |
|
"step": 15249 |
|
}, |
|
{ |
|
"epoch": 67.87555555555555, |
|
"grad_norm": 4.777526378631592, |
|
"learning_rate": 2.470762395274938e-05, |
|
"loss": 1.0434, |
|
"step": 15272 |
|
}, |
|
{ |
|
"epoch": 67.97777777777777, |
|
"grad_norm": 7.526794910430908, |
|
"learning_rate": 2.4564967351118175e-05, |
|
"loss": 1.0477, |
|
"step": 15295 |
|
}, |
|
{ |
|
"epoch": 68.08, |
|
"grad_norm": 6.90614128112793, |
|
"learning_rate": 2.4422589540529185e-05, |
|
"loss": 1.0341, |
|
"step": 15318 |
|
}, |
|
{ |
|
"epoch": 68.18222222222222, |
|
"grad_norm": 6.120336532592773, |
|
"learning_rate": 2.4280492081572455e-05, |
|
"loss": 1.0169, |
|
"step": 15341 |
|
}, |
|
{ |
|
"epoch": 68.28444444444445, |
|
"grad_norm": 5.239770889282227, |
|
"learning_rate": 2.413867653176506e-05, |
|
"loss": 1.0155, |
|
"step": 15364 |
|
}, |
|
{ |
|
"epoch": 68.38666666666667, |
|
"grad_norm": 5.342464923858643, |
|
"learning_rate": 2.3997144445534175e-05, |
|
"loss": 1.0343, |
|
"step": 15387 |
|
}, |
|
{ |
|
"epoch": 68.4888888888889, |
|
"grad_norm": 6.170787811279297, |
|
"learning_rate": 2.3855897374199883e-05, |
|
"loss": 1.0101, |
|
"step": 15410 |
|
}, |
|
{ |
|
"epoch": 68.5911111111111, |
|
"grad_norm": 7.313038349151611, |
|
"learning_rate": 2.371493686595831e-05, |
|
"loss": 1.0369, |
|
"step": 15433 |
|
}, |
|
{ |
|
"epoch": 68.69333333333333, |
|
"grad_norm": 5.434996604919434, |
|
"learning_rate": 2.3574264465864527e-05, |
|
"loss": 1.0345, |
|
"step": 15456 |
|
}, |
|
{ |
|
"epoch": 68.79555555555555, |
|
"grad_norm": 6.723358631134033, |
|
"learning_rate": 2.343388171581573e-05, |
|
"loss": 1.0309, |
|
"step": 15479 |
|
}, |
|
{ |
|
"epoch": 68.89777777777778, |
|
"grad_norm": 5.317188262939453, |
|
"learning_rate": 2.3293790154534283e-05, |
|
"loss": 1.0314, |
|
"step": 15502 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"grad_norm": 6.149099349975586, |
|
"learning_rate": 2.315399131755081e-05, |
|
"loss": 1.0313, |
|
"step": 15525 |
|
}, |
|
{ |
|
"epoch": 69.10222222222222, |
|
"grad_norm": 5.885578632354736, |
|
"learning_rate": 2.3014486737187475e-05, |
|
"loss": 1.0127, |
|
"step": 15548 |
|
}, |
|
{ |
|
"epoch": 69.20444444444445, |
|
"grad_norm": 5.442347049713135, |
|
"learning_rate": 2.2875277942541057e-05, |
|
"loss": 1.0002, |
|
"step": 15571 |
|
}, |
|
{ |
|
"epoch": 69.30666666666667, |
|
"grad_norm": 5.002798080444336, |
|
"learning_rate": 2.2736366459466326e-05, |
|
"loss": 1.0208, |
|
"step": 15594 |
|
}, |
|
{ |
|
"epoch": 69.4088888888889, |
|
"grad_norm": 4.764693737030029, |
|
"learning_rate": 2.259775381055917e-05, |
|
"loss": 1.0147, |
|
"step": 15617 |
|
}, |
|
{ |
|
"epoch": 69.5111111111111, |
|
"grad_norm": 5.556255340576172, |
|
"learning_rate": 2.2459441515140044e-05, |
|
"loss": 0.9888, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 69.61333333333333, |
|
"grad_norm": 5.241755485534668, |
|
"learning_rate": 2.2321431089237256e-05, |
|
"loss": 0.9846, |
|
"step": 15663 |
|
}, |
|
{ |
|
"epoch": 69.71555555555555, |
|
"grad_norm": 5.701202869415283, |
|
"learning_rate": 2.2183724045570286e-05, |
|
"loss": 0.9872, |
|
"step": 15686 |
|
}, |
|
{ |
|
"epoch": 69.81777777777778, |
|
"grad_norm": 8.224358558654785, |
|
"learning_rate": 2.2046321893533362e-05, |
|
"loss": 0.9898, |
|
"step": 15709 |
|
}, |
|
{ |
|
"epoch": 69.92, |
|
"grad_norm": 5.965829849243164, |
|
"learning_rate": 2.1909226139178723e-05, |
|
"loss": 0.9831, |
|
"step": 15732 |
|
}, |
|
{ |
|
"epoch": 70.02222222222223, |
|
"grad_norm": 5.391206741333008, |
|
"learning_rate": 2.1772438285200312e-05, |
|
"loss": 0.9954, |
|
"step": 15755 |
|
}, |
|
{ |
|
"epoch": 70.12444444444445, |
|
"grad_norm": 6.74372673034668, |
|
"learning_rate": 2.1635959830917107e-05, |
|
"loss": 0.9651, |
|
"step": 15778 |
|
}, |
|
{ |
|
"epoch": 70.22666666666667, |
|
"grad_norm": 5.2756123542785645, |
|
"learning_rate": 2.149979227225688e-05, |
|
"loss": 0.9698, |
|
"step": 15801 |
|
}, |
|
{ |
|
"epoch": 70.32888888888888, |
|
"grad_norm": 6.822518825531006, |
|
"learning_rate": 2.1363937101739613e-05, |
|
"loss": 0.9771, |
|
"step": 15824 |
|
}, |
|
{ |
|
"epoch": 70.43111111111111, |
|
"grad_norm": 5.256137847900391, |
|
"learning_rate": 2.1228395808461294e-05, |
|
"loss": 0.9962, |
|
"step": 15847 |
|
}, |
|
{ |
|
"epoch": 70.53333333333333, |
|
"grad_norm": 4.483437538146973, |
|
"learning_rate": 2.1093169878077533e-05, |
|
"loss": 0.9735, |
|
"step": 15870 |
|
}, |
|
{ |
|
"epoch": 70.63555555555556, |
|
"grad_norm": 6.114633083343506, |
|
"learning_rate": 2.0958260792787215e-05, |
|
"loss": 0.9839, |
|
"step": 15893 |
|
}, |
|
{ |
|
"epoch": 70.73777777777778, |
|
"grad_norm": 5.309250831604004, |
|
"learning_rate": 2.08236700313164e-05, |
|
"loss": 0.9745, |
|
"step": 15916 |
|
}, |
|
{ |
|
"epoch": 70.84, |
|
"grad_norm": 5.820844650268555, |
|
"learning_rate": 2.068939906890194e-05, |
|
"loss": 0.9786, |
|
"step": 15939 |
|
}, |
|
{ |
|
"epoch": 70.94222222222223, |
|
"grad_norm": 5.038022041320801, |
|
"learning_rate": 2.055544937727549e-05, |
|
"loss": 0.9912, |
|
"step": 15962 |
|
}, |
|
{ |
|
"epoch": 71.04444444444445, |
|
"grad_norm": 5.100025177001953, |
|
"learning_rate": 2.042182242464719e-05, |
|
"loss": 0.9748, |
|
"step": 15985 |
|
}, |
|
{ |
|
"epoch": 71.14666666666666, |
|
"grad_norm": 5.8269829750061035, |
|
"learning_rate": 2.0288519675689755e-05, |
|
"loss": 0.9614, |
|
"step": 16008 |
|
}, |
|
{ |
|
"epoch": 71.24888888888889, |
|
"grad_norm": 5.484350681304932, |
|
"learning_rate": 2.0155542591522303e-05, |
|
"loss": 0.9655, |
|
"step": 16031 |
|
}, |
|
{ |
|
"epoch": 71.35111111111111, |
|
"grad_norm": 5.463179111480713, |
|
"learning_rate": 2.0022892629694335e-05, |
|
"loss": 0.9633, |
|
"step": 16054 |
|
}, |
|
{ |
|
"epoch": 71.45333333333333, |
|
"grad_norm": 6.4749579429626465, |
|
"learning_rate": 1.9890571244169854e-05, |
|
"loss": 0.9643, |
|
"step": 16077 |
|
}, |
|
{ |
|
"epoch": 71.55555555555556, |
|
"grad_norm": 5.12134313583374, |
|
"learning_rate": 1.97585798853113e-05, |
|
"loss": 0.9771, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 71.65777777777778, |
|
"grad_norm": 5.494293212890625, |
|
"learning_rate": 1.9626919999863802e-05, |
|
"loss": 0.9833, |
|
"step": 16123 |
|
}, |
|
{ |
|
"epoch": 71.76, |
|
"grad_norm": 6.645090579986572, |
|
"learning_rate": 1.9495593030939157e-05, |
|
"loss": 0.966, |
|
"step": 16146 |
|
}, |
|
{ |
|
"epoch": 71.86222222222223, |
|
"grad_norm": 5.469064235687256, |
|
"learning_rate": 1.9364600418000156e-05, |
|
"loss": 0.9752, |
|
"step": 16169 |
|
}, |
|
{ |
|
"epoch": 71.96444444444444, |
|
"grad_norm": 7.400743007659912, |
|
"learning_rate": 1.9233943596844734e-05, |
|
"loss": 0.9729, |
|
"step": 16192 |
|
}, |
|
{ |
|
"epoch": 72.06666666666666, |
|
"grad_norm": 5.228180408477783, |
|
"learning_rate": 1.9103623999590202e-05, |
|
"loss": 0.9706, |
|
"step": 16215 |
|
}, |
|
{ |
|
"epoch": 72.16888888888889, |
|
"grad_norm": 5.571268081665039, |
|
"learning_rate": 1.897364305465766e-05, |
|
"loss": 0.9544, |
|
"step": 16238 |
|
}, |
|
{ |
|
"epoch": 72.27111111111111, |
|
"grad_norm": 5.692650318145752, |
|
"learning_rate": 1.884400218675619e-05, |
|
"loss": 0.9577, |
|
"step": 16261 |
|
}, |
|
{ |
|
"epoch": 72.37333333333333, |
|
"grad_norm": 5.098461151123047, |
|
"learning_rate": 1.87147028168674e-05, |
|
"loss": 0.952, |
|
"step": 16284 |
|
}, |
|
{ |
|
"epoch": 72.47555555555556, |
|
"grad_norm": 5.3133745193481445, |
|
"learning_rate": 1.8585746362229706e-05, |
|
"loss": 0.9623, |
|
"step": 16307 |
|
}, |
|
{ |
|
"epoch": 72.57777777777778, |
|
"grad_norm": 5.299659729003906, |
|
"learning_rate": 1.8457134236322903e-05, |
|
"loss": 0.9505, |
|
"step": 16330 |
|
}, |
|
{ |
|
"epoch": 72.68, |
|
"grad_norm": 6.57431173324585, |
|
"learning_rate": 1.832886784885263e-05, |
|
"loss": 0.9665, |
|
"step": 16353 |
|
}, |
|
{ |
|
"epoch": 72.78222222222222, |
|
"grad_norm": 5.018616199493408, |
|
"learning_rate": 1.820094860573488e-05, |
|
"loss": 0.9565, |
|
"step": 16376 |
|
}, |
|
{ |
|
"epoch": 72.88444444444444, |
|
"grad_norm": 5.487111568450928, |
|
"learning_rate": 1.8073377909080685e-05, |
|
"loss": 0.9551, |
|
"step": 16399 |
|
}, |
|
{ |
|
"epoch": 72.98666666666666, |
|
"grad_norm": 6.0984086990356445, |
|
"learning_rate": 1.7946157157180628e-05, |
|
"loss": 0.9743, |
|
"step": 16422 |
|
}, |
|
{ |
|
"epoch": 73.08888888888889, |
|
"grad_norm": 5.412441730499268, |
|
"learning_rate": 1.7819287744489636e-05, |
|
"loss": 0.9316, |
|
"step": 16445 |
|
}, |
|
{ |
|
"epoch": 73.19111111111111, |
|
"grad_norm": 5.8434929847717285, |
|
"learning_rate": 1.7692771061611603e-05, |
|
"loss": 0.947, |
|
"step": 16468 |
|
}, |
|
{ |
|
"epoch": 73.29333333333334, |
|
"grad_norm": 5.178957462310791, |
|
"learning_rate": 1.756660849528422e-05, |
|
"loss": 0.9455, |
|
"step": 16491 |
|
}, |
|
{ |
|
"epoch": 73.39555555555556, |
|
"grad_norm": 6.5831499099731445, |
|
"learning_rate": 1.7440801428363677e-05, |
|
"loss": 0.9469, |
|
"step": 16514 |
|
}, |
|
{ |
|
"epoch": 73.49777777777778, |
|
"grad_norm": 5.628024101257324, |
|
"learning_rate": 1.731535123980964e-05, |
|
"loss": 0.961, |
|
"step": 16537 |
|
}, |
|
{ |
|
"epoch": 73.6, |
|
"grad_norm": 4.770416736602783, |
|
"learning_rate": 1.7190259304670038e-05, |
|
"loss": 0.9489, |
|
"step": 16560 |
|
}, |
|
{ |
|
"epoch": 73.70222222222222, |
|
"grad_norm": 5.419926166534424, |
|
"learning_rate": 1.7065526994065973e-05, |
|
"loss": 0.9384, |
|
"step": 16583 |
|
}, |
|
{ |
|
"epoch": 73.80444444444444, |
|
"grad_norm": 5.695985794067383, |
|
"learning_rate": 1.6941155675176823e-05, |
|
"loss": 0.9386, |
|
"step": 16606 |
|
}, |
|
{ |
|
"epoch": 73.90666666666667, |
|
"grad_norm": 5.251271724700928, |
|
"learning_rate": 1.6817146711225073e-05, |
|
"loss": 0.9577, |
|
"step": 16629 |
|
}, |
|
{ |
|
"epoch": 74.00888888888889, |
|
"grad_norm": 5.220533847808838, |
|
"learning_rate": 1.669350146146156e-05, |
|
"loss": 0.9513, |
|
"step": 16652 |
|
}, |
|
{ |
|
"epoch": 74.11111111111111, |
|
"grad_norm": 5.326650142669678, |
|
"learning_rate": 1.65702212811504e-05, |
|
"loss": 0.9399, |
|
"step": 16675 |
|
}, |
|
{ |
|
"epoch": 74.21333333333334, |
|
"grad_norm": 5.140909194946289, |
|
"learning_rate": 1.6447307521554273e-05, |
|
"loss": 0.9273, |
|
"step": 16698 |
|
}, |
|
{ |
|
"epoch": 74.31555555555556, |
|
"grad_norm": 5.344797611236572, |
|
"learning_rate": 1.6324761529919556e-05, |
|
"loss": 0.942, |
|
"step": 16721 |
|
}, |
|
{ |
|
"epoch": 74.41777777777777, |
|
"grad_norm": 5.0787835121154785, |
|
"learning_rate": 1.6202584649461505e-05, |
|
"loss": 0.9358, |
|
"step": 16744 |
|
}, |
|
{ |
|
"epoch": 74.52, |
|
"grad_norm": 4.678197383880615, |
|
"learning_rate": 1.608077821934965e-05, |
|
"loss": 0.9313, |
|
"step": 16767 |
|
}, |
|
{ |
|
"epoch": 74.62222222222222, |
|
"grad_norm": 5.813838005065918, |
|
"learning_rate": 1.5959343574692982e-05, |
|
"loss": 0.9375, |
|
"step": 16790 |
|
}, |
|
{ |
|
"epoch": 74.72444444444444, |
|
"grad_norm": 7.276843070983887, |
|
"learning_rate": 1.5838282046525444e-05, |
|
"loss": 0.9359, |
|
"step": 16813 |
|
}, |
|
{ |
|
"epoch": 74.82666666666667, |
|
"grad_norm": 5.635644435882568, |
|
"learning_rate": 1.571759496179123e-05, |
|
"loss": 0.9444, |
|
"step": 16836 |
|
}, |
|
{ |
|
"epoch": 74.92888888888889, |
|
"grad_norm": 5.5287556648254395, |
|
"learning_rate": 1.5597283643330347e-05, |
|
"loss": 0.9345, |
|
"step": 16859 |
|
}, |
|
{ |
|
"epoch": 75.03111111111112, |
|
"grad_norm": 5.956721782684326, |
|
"learning_rate": 1.547734940986404e-05, |
|
"loss": 0.9618, |
|
"step": 16882 |
|
}, |
|
{ |
|
"epoch": 75.13333333333334, |
|
"grad_norm": 6.450102806091309, |
|
"learning_rate": 1.535779357598033e-05, |
|
"loss": 0.9266, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 75.23555555555555, |
|
"grad_norm": 5.966337203979492, |
|
"learning_rate": 1.5238617452119697e-05, |
|
"loss": 0.9089, |
|
"step": 16928 |
|
}, |
|
{ |
|
"epoch": 75.33777777777777, |
|
"grad_norm": 5.400455474853516, |
|
"learning_rate": 1.5119822344560591e-05, |
|
"loss": 0.8967, |
|
"step": 16951 |
|
}, |
|
{ |
|
"epoch": 75.44, |
|
"grad_norm": 5.6878180503845215, |
|
"learning_rate": 1.5001409555405238e-05, |
|
"loss": 0.9058, |
|
"step": 16974 |
|
}, |
|
{ |
|
"epoch": 75.54222222222222, |
|
"grad_norm": 5.092850685119629, |
|
"learning_rate": 1.4883380382565244e-05, |
|
"loss": 0.9037, |
|
"step": 16997 |
|
}, |
|
{ |
|
"epoch": 75.64444444444445, |
|
"grad_norm": 7.444413185119629, |
|
"learning_rate": 1.4765736119747475e-05, |
|
"loss": 0.9191, |
|
"step": 17020 |
|
}, |
|
{ |
|
"epoch": 75.74666666666667, |
|
"grad_norm": 5.114320755004883, |
|
"learning_rate": 1.4648478056439847e-05, |
|
"loss": 0.9132, |
|
"step": 17043 |
|
}, |
|
{ |
|
"epoch": 75.8488888888889, |
|
"grad_norm": 5.615855693817139, |
|
"learning_rate": 1.453160747789712e-05, |
|
"loss": 0.9064, |
|
"step": 17066 |
|
}, |
|
{ |
|
"epoch": 75.95111111111112, |
|
"grad_norm": 5.120584964752197, |
|
"learning_rate": 1.4415125665126933e-05, |
|
"loss": 0.9149, |
|
"step": 17089 |
|
}, |
|
{ |
|
"epoch": 76.05333333333333, |
|
"grad_norm": 5.242557048797607, |
|
"learning_rate": 1.4299033894875647e-05, |
|
"loss": 0.8938, |
|
"step": 17112 |
|
}, |
|
{ |
|
"epoch": 76.15555555555555, |
|
"grad_norm": 5.4338297843933105, |
|
"learning_rate": 1.4183333439614449e-05, |
|
"loss": 0.8982, |
|
"step": 17135 |
|
}, |
|
{ |
|
"epoch": 76.25777777777778, |
|
"grad_norm": 4.58558988571167, |
|
"learning_rate": 1.4068025567525317e-05, |
|
"loss": 0.8992, |
|
"step": 17158 |
|
}, |
|
{ |
|
"epoch": 76.36, |
|
"grad_norm": 5.754461765289307, |
|
"learning_rate": 1.3953111542487202e-05, |
|
"loss": 0.91, |
|
"step": 17181 |
|
}, |
|
{ |
|
"epoch": 76.46222222222222, |
|
"grad_norm": 4.953834533691406, |
|
"learning_rate": 1.383859262406208e-05, |
|
"loss": 0.9014, |
|
"step": 17204 |
|
}, |
|
{ |
|
"epoch": 76.56444444444445, |
|
"grad_norm": 5.375875473022461, |
|
"learning_rate": 1.3724470067481255e-05, |
|
"loss": 0.9027, |
|
"step": 17227 |
|
}, |
|
{ |
|
"epoch": 76.66666666666667, |
|
"grad_norm": 5.019064426422119, |
|
"learning_rate": 1.3610745123631535e-05, |
|
"loss": 0.8902, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 76.7688888888889, |
|
"grad_norm": 7.214736461639404, |
|
"learning_rate": 1.3497419039041488e-05, |
|
"loss": 0.9004, |
|
"step": 17273 |
|
}, |
|
{ |
|
"epoch": 76.8711111111111, |
|
"grad_norm": 5.181694507598877, |
|
"learning_rate": 1.3384493055867885e-05, |
|
"loss": 0.8949, |
|
"step": 17296 |
|
}, |
|
{ |
|
"epoch": 76.97333333333333, |
|
"grad_norm": 5.116537094116211, |
|
"learning_rate": 1.3271968411881963e-05, |
|
"loss": 0.8958, |
|
"step": 17319 |
|
}, |
|
{ |
|
"epoch": 77.07555555555555, |
|
"grad_norm": 4.765411853790283, |
|
"learning_rate": 1.3159846340455967e-05, |
|
"loss": 0.8901, |
|
"step": 17342 |
|
}, |
|
{ |
|
"epoch": 77.17777777777778, |
|
"grad_norm": 4.765920639038086, |
|
"learning_rate": 1.3048128070549543e-05, |
|
"loss": 0.8875, |
|
"step": 17365 |
|
}, |
|
{ |
|
"epoch": 77.28, |
|
"grad_norm": 4.69777250289917, |
|
"learning_rate": 1.2936814826696324e-05, |
|
"loss": 0.881, |
|
"step": 17388 |
|
}, |
|
{ |
|
"epoch": 77.38222222222223, |
|
"grad_norm": 4.7684550285339355, |
|
"learning_rate": 1.2825907828990518e-05, |
|
"loss": 0.8835, |
|
"step": 17411 |
|
}, |
|
{ |
|
"epoch": 77.48444444444445, |
|
"grad_norm": 4.776817321777344, |
|
"learning_rate": 1.271540829307344e-05, |
|
"loss": 0.8896, |
|
"step": 17434 |
|
}, |
|
{ |
|
"epoch": 77.58666666666667, |
|
"grad_norm": 4.983736038208008, |
|
"learning_rate": 1.2605317430120311e-05, |
|
"loss": 0.8845, |
|
"step": 17457 |
|
}, |
|
{ |
|
"epoch": 77.68888888888888, |
|
"grad_norm": 5.313802719116211, |
|
"learning_rate": 1.2495636446826891e-05, |
|
"loss": 0.8922, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 77.7911111111111, |
|
"grad_norm": 4.997971534729004, |
|
"learning_rate": 1.2386366545396328e-05, |
|
"loss": 0.8856, |
|
"step": 17503 |
|
}, |
|
{ |
|
"epoch": 77.89333333333333, |
|
"grad_norm": 5.876720905303955, |
|
"learning_rate": 1.2277508923525876e-05, |
|
"loss": 0.8838, |
|
"step": 17526 |
|
}, |
|
{ |
|
"epoch": 77.99555555555555, |
|
"grad_norm": 4.762071132659912, |
|
"learning_rate": 1.216906477439389e-05, |
|
"loss": 0.8814, |
|
"step": 17549 |
|
}, |
|
{ |
|
"epoch": 78.09777777777778, |
|
"grad_norm": 4.621342658996582, |
|
"learning_rate": 1.2061035286646677e-05, |
|
"loss": 0.8764, |
|
"step": 17572 |
|
}, |
|
{ |
|
"epoch": 78.2, |
|
"grad_norm": 5.084928035736084, |
|
"learning_rate": 1.1953421644385443e-05, |
|
"loss": 0.8747, |
|
"step": 17595 |
|
}, |
|
{ |
|
"epoch": 78.30222222222223, |
|
"grad_norm": 4.952382564544678, |
|
"learning_rate": 1.1846225027153401e-05, |
|
"loss": 0.8886, |
|
"step": 17618 |
|
}, |
|
{ |
|
"epoch": 78.40444444444445, |
|
"grad_norm": 4.579256534576416, |
|
"learning_rate": 1.1739446609922739e-05, |
|
"loss": 0.8729, |
|
"step": 17641 |
|
}, |
|
{ |
|
"epoch": 78.50666666666666, |
|
"grad_norm": 5.518742561340332, |
|
"learning_rate": 1.1633087563081847e-05, |
|
"loss": 0.8863, |
|
"step": 17664 |
|
}, |
|
{ |
|
"epoch": 78.60888888888888, |
|
"grad_norm": 4.966059684753418, |
|
"learning_rate": 1.1527149052422382e-05, |
|
"loss": 0.8839, |
|
"step": 17687 |
|
}, |
|
{ |
|
"epoch": 78.71111111111111, |
|
"grad_norm": 5.001364707946777, |
|
"learning_rate": 1.1421632239126578e-05, |
|
"loss": 0.8893, |
|
"step": 17710 |
|
}, |
|
{ |
|
"epoch": 78.81333333333333, |
|
"grad_norm": 4.7873854637146, |
|
"learning_rate": 1.131653827975449e-05, |
|
"loss": 0.8695, |
|
"step": 17733 |
|
}, |
|
{ |
|
"epoch": 78.91555555555556, |
|
"grad_norm": 5.2424516677856445, |
|
"learning_rate": 1.1211868326231273e-05, |
|
"loss": 0.8857, |
|
"step": 17756 |
|
}, |
|
{ |
|
"epoch": 79.01777777777778, |
|
"grad_norm": 4.72099494934082, |
|
"learning_rate": 1.1107623525834631e-05, |
|
"loss": 0.8844, |
|
"step": 17779 |
|
}, |
|
{ |
|
"epoch": 79.12, |
|
"grad_norm": 5.387650489807129, |
|
"learning_rate": 1.1003805021182168e-05, |
|
"loss": 0.8672, |
|
"step": 17802 |
|
}, |
|
{ |
|
"epoch": 79.22222222222223, |
|
"grad_norm": 6.549093246459961, |
|
"learning_rate": 1.0900413950218947e-05, |
|
"loss": 0.8639, |
|
"step": 17825 |
|
}, |
|
{ |
|
"epoch": 79.32444444444444, |
|
"grad_norm": 5.805511951446533, |
|
"learning_rate": 1.0797451446204904e-05, |
|
"loss": 0.8738, |
|
"step": 17848 |
|
}, |
|
{ |
|
"epoch": 79.42666666666666, |
|
"grad_norm": 5.417078018188477, |
|
"learning_rate": 1.0694918637702562e-05, |
|
"loss": 0.8815, |
|
"step": 17871 |
|
}, |
|
{ |
|
"epoch": 79.52888888888889, |
|
"grad_norm": 4.696217060089111, |
|
"learning_rate": 1.0592816648564535e-05, |
|
"loss": 0.8824, |
|
"step": 17894 |
|
}, |
|
{ |
|
"epoch": 79.63111111111111, |
|
"grad_norm": 4.98297119140625, |
|
"learning_rate": 1.0491146597921309e-05, |
|
"loss": 0.8617, |
|
"step": 17917 |
|
}, |
|
{ |
|
"epoch": 79.73333333333333, |
|
"grad_norm": 4.85457181930542, |
|
"learning_rate": 1.0389909600168911e-05, |
|
"loss": 0.8715, |
|
"step": 17940 |
|
}, |
|
{ |
|
"epoch": 79.83555555555556, |
|
"grad_norm": 5.266817092895508, |
|
"learning_rate": 1.0289106764956702e-05, |
|
"loss": 0.8754, |
|
"step": 17963 |
|
}, |
|
{ |
|
"epoch": 79.93777777777778, |
|
"grad_norm": 5.948962688446045, |
|
"learning_rate": 1.0188739197175268e-05, |
|
"loss": 0.8806, |
|
"step": 17986 |
|
}, |
|
{ |
|
"epoch": 80.04, |
|
"grad_norm": 6.155448913574219, |
|
"learning_rate": 1.0088807996944211e-05, |
|
"loss": 0.8767, |
|
"step": 18009 |
|
}, |
|
{ |
|
"epoch": 80.14222222222222, |
|
"grad_norm": 4.785376071929932, |
|
"learning_rate": 9.989314259600219e-06, |
|
"loss": 0.8719, |
|
"step": 18032 |
|
}, |
|
{ |
|
"epoch": 80.24444444444444, |
|
"grad_norm": 4.980493545532227, |
|
"learning_rate": 9.890259075684915e-06, |
|
"loss": 0.866, |
|
"step": 18055 |
|
}, |
|
{ |
|
"epoch": 80.34666666666666, |
|
"grad_norm": 6.7485032081604, |
|
"learning_rate": 9.791643530933032e-06, |
|
"loss": 0.8639, |
|
"step": 18078 |
|
}, |
|
{ |
|
"epoch": 80.44888888888889, |
|
"grad_norm": 5.030679225921631, |
|
"learning_rate": 9.693468706260456e-06, |
|
"loss": 0.8707, |
|
"step": 18101 |
|
}, |
|
{ |
|
"epoch": 80.55111111111111, |
|
"grad_norm": 5.043888568878174, |
|
"learning_rate": 9.595735677752343e-06, |
|
"loss": 0.8603, |
|
"step": 18124 |
|
}, |
|
{ |
|
"epoch": 80.65333333333334, |
|
"grad_norm": 5.022198677062988, |
|
"learning_rate": 9.49844551665141e-06, |
|
"loss": 0.8598, |
|
"step": 18147 |
|
}, |
|
{ |
|
"epoch": 80.75555555555556, |
|
"grad_norm": 6.346147060394287, |
|
"learning_rate": 9.401599289346091e-06, |
|
"loss": 0.8663, |
|
"step": 18170 |
|
}, |
|
{ |
|
"epoch": 80.85777777777778, |
|
"grad_norm": 5.1296610832214355, |
|
"learning_rate": 9.305198057358972e-06, |
|
"loss": 0.8703, |
|
"step": 18193 |
|
}, |
|
{ |
|
"epoch": 80.96, |
|
"grad_norm": 5.117784023284912, |
|
"learning_rate": 9.209242877335005e-06, |
|
"loss": 0.8624, |
|
"step": 18216 |
|
}, |
|
{ |
|
"epoch": 81.06222222222222, |
|
"grad_norm": 4.949360370635986, |
|
"learning_rate": 9.113734801030076e-06, |
|
"loss": 0.8559, |
|
"step": 18239 |
|
}, |
|
{ |
|
"epoch": 81.16444444444444, |
|
"grad_norm": 4.507094860076904, |
|
"learning_rate": 9.018674875299393e-06, |
|
"loss": 0.861, |
|
"step": 18262 |
|
}, |
|
{ |
|
"epoch": 81.26666666666667, |
|
"grad_norm": 5.280154705047607, |
|
"learning_rate": 8.924064142085985e-06, |
|
"loss": 0.8558, |
|
"step": 18285 |
|
}, |
|
{ |
|
"epoch": 81.36888888888889, |
|
"grad_norm": 4.777374267578125, |
|
"learning_rate": 8.829903638409388e-06, |
|
"loss": 0.8598, |
|
"step": 18308 |
|
}, |
|
{ |
|
"epoch": 81.47111111111111, |
|
"grad_norm": 5.726168632507324, |
|
"learning_rate": 8.736194396354153e-06, |
|
"loss": 0.8649, |
|
"step": 18331 |
|
}, |
|
{ |
|
"epoch": 81.57333333333334, |
|
"grad_norm": 5.1066484451293945, |
|
"learning_rate": 8.642937443058646e-06, |
|
"loss": 0.8558, |
|
"step": 18354 |
|
}, |
|
{ |
|
"epoch": 81.67555555555556, |
|
"grad_norm": 5.291098117828369, |
|
"learning_rate": 8.550133800703686e-06, |
|
"loss": 0.8572, |
|
"step": 18377 |
|
}, |
|
{ |
|
"epoch": 81.77777777777777, |
|
"grad_norm": 4.3951334953308105, |
|
"learning_rate": 8.457784486501452e-06, |
|
"loss": 0.8713, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 81.88, |
|
"grad_norm": 4.807311058044434, |
|
"learning_rate": 8.36589051268421e-06, |
|
"loss": 0.8704, |
|
"step": 18423 |
|
}, |
|
{ |
|
"epoch": 81.98222222222222, |
|
"grad_norm": 6.832765579223633, |
|
"learning_rate": 8.274452886493333e-06, |
|
"loss": 0.862, |
|
"step": 18446 |
|
}, |
|
{ |
|
"epoch": 82.08444444444444, |
|
"grad_norm": 4.566845417022705, |
|
"learning_rate": 8.183472610168197e-06, |
|
"loss": 0.8604, |
|
"step": 18469 |
|
}, |
|
{ |
|
"epoch": 82.18666666666667, |
|
"grad_norm": 4.8708648681640625, |
|
"learning_rate": 8.092950680935185e-06, |
|
"loss": 0.8589, |
|
"step": 18492 |
|
}, |
|
{ |
|
"epoch": 82.28888888888889, |
|
"grad_norm": 5.396876335144043, |
|
"learning_rate": 8.002888090996814e-06, |
|
"loss": 0.8608, |
|
"step": 18515 |
|
}, |
|
{ |
|
"epoch": 82.39111111111112, |
|
"grad_norm": 4.885883808135986, |
|
"learning_rate": 7.913285827520794e-06, |
|
"loss": 0.8484, |
|
"step": 18538 |
|
}, |
|
{ |
|
"epoch": 82.49333333333334, |
|
"grad_norm": 4.598787307739258, |
|
"learning_rate": 7.824144872629269e-06, |
|
"loss": 0.8576, |
|
"step": 18561 |
|
}, |
|
{ |
|
"epoch": 82.59555555555555, |
|
"grad_norm": 4.590323448181152, |
|
"learning_rate": 7.735466203387992e-06, |
|
"loss": 0.8554, |
|
"step": 18584 |
|
}, |
|
{ |
|
"epoch": 82.69777777777777, |
|
"grad_norm": 5.497690200805664, |
|
"learning_rate": 7.647250791795668e-06, |
|
"loss": 0.855, |
|
"step": 18607 |
|
}, |
|
{ |
|
"epoch": 82.8, |
|
"grad_norm": 4.905009746551514, |
|
"learning_rate": 7.559499604773279e-06, |
|
"loss": 0.8563, |
|
"step": 18630 |
|
}, |
|
{ |
|
"epoch": 82.90222222222222, |
|
"grad_norm": 4.675111770629883, |
|
"learning_rate": 7.47221360415346e-06, |
|
"loss": 0.8597, |
|
"step": 18653 |
|
}, |
|
{ |
|
"epoch": 83.00444444444445, |
|
"grad_norm": 5.6808576583862305, |
|
"learning_rate": 7.385393746670022e-06, |
|
"loss": 0.8566, |
|
"step": 18676 |
|
}, |
|
{ |
|
"epoch": 83.10666666666667, |
|
"grad_norm": 6.699379920959473, |
|
"learning_rate": 7.299040983947369e-06, |
|
"loss": 0.856, |
|
"step": 18699 |
|
}, |
|
{ |
|
"epoch": 83.2088888888889, |
|
"grad_norm": 5.053982257843018, |
|
"learning_rate": 7.213156262490173e-06, |
|
"loss": 0.8481, |
|
"step": 18722 |
|
}, |
|
{ |
|
"epoch": 83.31111111111112, |
|
"grad_norm": 5.297053337097168, |
|
"learning_rate": 7.127740523672915e-06, |
|
"loss": 0.85, |
|
"step": 18745 |
|
}, |
|
{ |
|
"epoch": 83.41333333333333, |
|
"grad_norm": 5.744291305541992, |
|
"learning_rate": 7.042794703729622e-06, |
|
"loss": 0.8618, |
|
"step": 18768 |
|
}, |
|
{ |
|
"epoch": 83.51555555555555, |
|
"grad_norm": 4.679412364959717, |
|
"learning_rate": 6.95831973374359e-06, |
|
"loss": 0.8403, |
|
"step": 18791 |
|
}, |
|
{ |
|
"epoch": 83.61777777777777, |
|
"grad_norm": 4.38852596282959, |
|
"learning_rate": 6.874316539637127e-06, |
|
"loss": 0.8464, |
|
"step": 18814 |
|
}, |
|
{ |
|
"epoch": 83.72, |
|
"grad_norm": 4.899384021759033, |
|
"learning_rate": 6.7907860421615066e-06, |
|
"loss": 0.8523, |
|
"step": 18837 |
|
}, |
|
{ |
|
"epoch": 83.82222222222222, |
|
"grad_norm": 5.16193962097168, |
|
"learning_rate": 6.707729156886777e-06, |
|
"loss": 0.8502, |
|
"step": 18860 |
|
}, |
|
{ |
|
"epoch": 83.92444444444445, |
|
"grad_norm": 4.833446979522705, |
|
"learning_rate": 6.625146794191794e-06, |
|
"loss": 0.8551, |
|
"step": 18883 |
|
}, |
|
{ |
|
"epoch": 84.02666666666667, |
|
"grad_norm": 4.920324325561523, |
|
"learning_rate": 6.543039859254185e-06, |
|
"loss": 0.8525, |
|
"step": 18906 |
|
}, |
|
{ |
|
"epoch": 84.1288888888889, |
|
"grad_norm": 5.322509765625, |
|
"learning_rate": 6.4614092520404905e-06, |
|
"loss": 0.8534, |
|
"step": 18929 |
|
}, |
|
{ |
|
"epoch": 84.2311111111111, |
|
"grad_norm": 5.062963485717773, |
|
"learning_rate": 6.380255867296253e-06, |
|
"loss": 0.8519, |
|
"step": 18952 |
|
}, |
|
{ |
|
"epoch": 84.33333333333333, |
|
"grad_norm": 5.186446666717529, |
|
"learning_rate": 6.299580594536214e-06, |
|
"loss": 0.8445, |
|
"step": 18975 |
|
}, |
|
{ |
|
"epoch": 84.43555555555555, |
|
"grad_norm": 5.609063148498535, |
|
"learning_rate": 6.219384318034588e-06, |
|
"loss": 0.8432, |
|
"step": 18998 |
|
}, |
|
{ |
|
"epoch": 84.53777777777778, |
|
"grad_norm": 4.684319972991943, |
|
"learning_rate": 6.1396679168153445e-06, |
|
"loss": 0.8434, |
|
"step": 19021 |
|
}, |
|
{ |
|
"epoch": 84.64, |
|
"grad_norm": 4.717188835144043, |
|
"learning_rate": 6.060432264642601e-06, |
|
"loss": 0.8451, |
|
"step": 19044 |
|
}, |
|
{ |
|
"epoch": 84.74222222222222, |
|
"grad_norm": 6.810020446777344, |
|
"learning_rate": 5.981678230011006e-06, |
|
"loss": 0.8425, |
|
"step": 19067 |
|
}, |
|
{ |
|
"epoch": 84.84444444444445, |
|
"grad_norm": 4.562713146209717, |
|
"learning_rate": 5.903406676136264e-06, |
|
"loss": 0.8468, |
|
"step": 19090 |
|
}, |
|
{ |
|
"epoch": 84.94666666666667, |
|
"grad_norm": 5.388665199279785, |
|
"learning_rate": 5.825618460945636e-06, |
|
"loss": 0.8418, |
|
"step": 19113 |
|
}, |
|
{ |
|
"epoch": 85.04888888888888, |
|
"grad_norm": 5.054759979248047, |
|
"learning_rate": 5.748314437068558e-06, |
|
"loss": 0.8417, |
|
"step": 19136 |
|
}, |
|
{ |
|
"epoch": 85.1511111111111, |
|
"grad_norm": 4.943572521209717, |
|
"learning_rate": 5.671495451827308e-06, |
|
"loss": 0.8444, |
|
"step": 19159 |
|
}, |
|
{ |
|
"epoch": 85.25333333333333, |
|
"grad_norm": 4.801841735839844, |
|
"learning_rate": 5.595162347227661e-06, |
|
"loss": 0.8407, |
|
"step": 19182 |
|
}, |
|
{ |
|
"epoch": 85.35555555555555, |
|
"grad_norm": 4.94541072845459, |
|
"learning_rate": 5.519315959949745e-06, |
|
"loss": 0.8413, |
|
"step": 19205 |
|
}, |
|
{ |
|
"epoch": 85.45777777777778, |
|
"grad_norm": 5.529304027557373, |
|
"learning_rate": 5.443957121338777e-06, |
|
"loss": 0.8462, |
|
"step": 19228 |
|
}, |
|
{ |
|
"epoch": 85.56, |
|
"grad_norm": 4.735396385192871, |
|
"learning_rate": 5.36908665739605e-06, |
|
"loss": 0.8491, |
|
"step": 19251 |
|
}, |
|
{ |
|
"epoch": 85.66222222222223, |
|
"grad_norm": 5.091115474700928, |
|
"learning_rate": 5.294705388769772e-06, |
|
"loss": 0.8444, |
|
"step": 19274 |
|
}, |
|
{ |
|
"epoch": 85.76444444444445, |
|
"grad_norm": 4.820996284484863, |
|
"learning_rate": 5.220814130746165e-06, |
|
"loss": 0.8509, |
|
"step": 19297 |
|
}, |
|
{ |
|
"epoch": 85.86666666666666, |
|
"grad_norm": 4.448352336883545, |
|
"learning_rate": 5.1474136932404935e-06, |
|
"loss": 0.8339, |
|
"step": 19320 |
|
}, |
|
{ |
|
"epoch": 85.96888888888888, |
|
"grad_norm": 4.6064019203186035, |
|
"learning_rate": 5.07450488078815e-06, |
|
"loss": 0.8115, |
|
"step": 19343 |
|
}, |
|
{ |
|
"epoch": 86.07111111111111, |
|
"grad_norm": 6.598939895629883, |
|
"learning_rate": 5.002088492535906e-06, |
|
"loss": 0.818, |
|
"step": 19366 |
|
}, |
|
{ |
|
"epoch": 86.17333333333333, |
|
"grad_norm": 4.426856994628906, |
|
"learning_rate": 4.930165322233082e-06, |
|
"loss": 0.8147, |
|
"step": 19389 |
|
}, |
|
{ |
|
"epoch": 86.27555555555556, |
|
"grad_norm": 4.873010635375977, |
|
"learning_rate": 4.858736158222921e-06, |
|
"loss": 0.8146, |
|
"step": 19412 |
|
}, |
|
{ |
|
"epoch": 86.37777777777778, |
|
"grad_norm": 4.8856520652771, |
|
"learning_rate": 4.787801783433871e-06, |
|
"loss": 0.8158, |
|
"step": 19435 |
|
}, |
|
{ |
|
"epoch": 86.48, |
|
"grad_norm": 5.177906513214111, |
|
"learning_rate": 4.717362975371059e-06, |
|
"loss": 0.8187, |
|
"step": 19458 |
|
}, |
|
{ |
|
"epoch": 86.58222222222223, |
|
"grad_norm": 4.954709529876709, |
|
"learning_rate": 4.647420506107775e-06, |
|
"loss": 0.8131, |
|
"step": 19481 |
|
}, |
|
{ |
|
"epoch": 86.68444444444444, |
|
"grad_norm": 4.427014350891113, |
|
"learning_rate": 4.577975142276925e-06, |
|
"loss": 0.8263, |
|
"step": 19504 |
|
}, |
|
{ |
|
"epoch": 86.78666666666666, |
|
"grad_norm": 5.581162929534912, |
|
"learning_rate": 4.509027645062758e-06, |
|
"loss": 0.8201, |
|
"step": 19527 |
|
}, |
|
{ |
|
"epoch": 86.88888888888889, |
|
"grad_norm": 4.889328479766846, |
|
"learning_rate": 4.4405787701923885e-06, |
|
"loss": 0.8239, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 86.99111111111111, |
|
"grad_norm": 4.658565998077393, |
|
"learning_rate": 4.3726292679276305e-06, |
|
"loss": 0.8211, |
|
"step": 19573 |
|
}, |
|
{ |
|
"epoch": 87.09333333333333, |
|
"grad_norm": 5.102555751800537, |
|
"learning_rate": 4.305179883056687e-06, |
|
"loss": 0.8154, |
|
"step": 19596 |
|
}, |
|
{ |
|
"epoch": 87.19555555555556, |
|
"grad_norm": 4.951329231262207, |
|
"learning_rate": 4.23823135488603e-06, |
|
"loss": 0.8182, |
|
"step": 19619 |
|
}, |
|
{ |
|
"epoch": 87.29777777777778, |
|
"grad_norm": 5.642242908477783, |
|
"learning_rate": 4.171784417232305e-06, |
|
"loss": 0.8076, |
|
"step": 19642 |
|
}, |
|
{ |
|
"epoch": 87.4, |
|
"grad_norm": 5.003154277801514, |
|
"learning_rate": 4.10583979841424e-06, |
|
"loss": 0.8129, |
|
"step": 19665 |
|
}, |
|
{ |
|
"epoch": 87.50222222222222, |
|
"grad_norm": 5.778168678283691, |
|
"learning_rate": 4.040398221244718e-06, |
|
"loss": 0.8123, |
|
"step": 19688 |
|
}, |
|
{ |
|
"epoch": 87.60444444444444, |
|
"grad_norm": 5.08914852142334, |
|
"learning_rate": 3.975460403022801e-06, |
|
"loss": 0.8149, |
|
"step": 19711 |
|
}, |
|
{ |
|
"epoch": 87.70666666666666, |
|
"grad_norm": 4.585403919219971, |
|
"learning_rate": 3.9110270555259345e-06, |
|
"loss": 0.8197, |
|
"step": 19734 |
|
}, |
|
{ |
|
"epoch": 87.80888888888889, |
|
"grad_norm": 4.91745138168335, |
|
"learning_rate": 3.84709888500207e-06, |
|
"loss": 0.8175, |
|
"step": 19757 |
|
}, |
|
{ |
|
"epoch": 87.91111111111111, |
|
"grad_norm": 5.540400981903076, |
|
"learning_rate": 3.7836765921619888e-06, |
|
"loss": 0.8115, |
|
"step": 19780 |
|
}, |
|
{ |
|
"epoch": 88.01333333333334, |
|
"grad_norm": 4.485517501831055, |
|
"learning_rate": 3.720760872171569e-06, |
|
"loss": 0.8122, |
|
"step": 19803 |
|
}, |
|
{ |
|
"epoch": 88.11555555555556, |
|
"grad_norm": 4.355061054229736, |
|
"learning_rate": 3.658352414644206e-06, |
|
"loss": 0.8105, |
|
"step": 19826 |
|
}, |
|
{ |
|
"epoch": 88.21777777777778, |
|
"grad_norm": 5.2161784172058105, |
|
"learning_rate": 3.596451903633247e-06, |
|
"loss": 0.8115, |
|
"step": 19849 |
|
}, |
|
{ |
|
"epoch": 88.32, |
|
"grad_norm": 4.382901191711426, |
|
"learning_rate": 3.535060017624453e-06, |
|
"loss": 0.8118, |
|
"step": 19872 |
|
}, |
|
{ |
|
"epoch": 88.42222222222222, |
|
"grad_norm": 5.805255889892578, |
|
"learning_rate": 3.47417742952863e-06, |
|
"loss": 0.8046, |
|
"step": 19895 |
|
}, |
|
{ |
|
"epoch": 88.52444444444444, |
|
"grad_norm": 4.063962936401367, |
|
"learning_rate": 3.4138048066741867e-06, |
|
"loss": 0.8136, |
|
"step": 19918 |
|
}, |
|
{ |
|
"epoch": 88.62666666666667, |
|
"grad_norm": 5.049718379974365, |
|
"learning_rate": 3.3539428107998814e-06, |
|
"loss": 0.8071, |
|
"step": 19941 |
|
}, |
|
{ |
|
"epoch": 88.72888888888889, |
|
"grad_norm": 4.287143230438232, |
|
"learning_rate": 3.294592098047494e-06, |
|
"loss": 0.8064, |
|
"step": 19964 |
|
}, |
|
{ |
|
"epoch": 88.83111111111111, |
|
"grad_norm": 5.841145992279053, |
|
"learning_rate": 3.2357533189547098e-06, |
|
"loss": 0.8188, |
|
"step": 19987 |
|
}, |
|
{ |
|
"epoch": 88.93333333333334, |
|
"grad_norm": 6.014995098114014, |
|
"learning_rate": 3.1774271184479675e-06, |
|
"loss": 0.8114, |
|
"step": 20010 |
|
}, |
|
{ |
|
"epoch": 89.03555555555556, |
|
"grad_norm": 4.5376386642456055, |
|
"learning_rate": 3.1196141358353357e-06, |
|
"loss": 0.8135, |
|
"step": 20033 |
|
}, |
|
{ |
|
"epoch": 89.13777777777777, |
|
"grad_norm": 4.438096523284912, |
|
"learning_rate": 3.0623150047995873e-06, |
|
"loss": 0.8091, |
|
"step": 20056 |
|
}, |
|
{ |
|
"epoch": 89.24, |
|
"grad_norm": 4.940515518188477, |
|
"learning_rate": 3.005530353391195e-06, |
|
"loss": 0.812, |
|
"step": 20079 |
|
}, |
|
{ |
|
"epoch": 89.34222222222222, |
|
"grad_norm": 4.826828479766846, |
|
"learning_rate": 2.9492608040214862e-06, |
|
"loss": 0.8123, |
|
"step": 20102 |
|
}, |
|
{ |
|
"epoch": 89.44444444444444, |
|
"grad_norm": 4.983479976654053, |
|
"learning_rate": 2.893506973455773e-06, |
|
"loss": 0.8081, |
|
"step": 20125 |
|
}, |
|
{ |
|
"epoch": 89.54666666666667, |
|
"grad_norm": 6.005835056304932, |
|
"learning_rate": 2.838269472806654e-06, |
|
"loss": 0.8095, |
|
"step": 20148 |
|
}, |
|
{ |
|
"epoch": 89.64888888888889, |
|
"grad_norm": 4.9561662673950195, |
|
"learning_rate": 2.7835489075272727e-06, |
|
"loss": 0.8061, |
|
"step": 20171 |
|
}, |
|
{ |
|
"epoch": 89.75111111111111, |
|
"grad_norm": 5.078367233276367, |
|
"learning_rate": 2.729345877404671e-06, |
|
"loss": 0.7997, |
|
"step": 20194 |
|
}, |
|
{ |
|
"epoch": 89.85333333333334, |
|
"grad_norm": 4.345983505249023, |
|
"learning_rate": 2.675660976553268e-06, |
|
"loss": 0.8101, |
|
"step": 20217 |
|
}, |
|
{ |
|
"epoch": 89.95555555555555, |
|
"grad_norm": 4.390908241271973, |
|
"learning_rate": 2.6224947934082923e-06, |
|
"loss": 0.8016, |
|
"step": 20240 |
|
}, |
|
{ |
|
"epoch": 90.05777777777777, |
|
"grad_norm": 4.5562028884887695, |
|
"learning_rate": 2.5698479107193697e-06, |
|
"loss": 0.8039, |
|
"step": 20263 |
|
}, |
|
{ |
|
"epoch": 90.16, |
|
"grad_norm": 4.685390472412109, |
|
"learning_rate": 2.517720905544102e-06, |
|
"loss": 0.7952, |
|
"step": 20286 |
|
}, |
|
{ |
|
"epoch": 90.26222222222222, |
|
"grad_norm": 4.973295211791992, |
|
"learning_rate": 2.466114349241794e-06, |
|
"loss": 0.809, |
|
"step": 20309 |
|
}, |
|
{ |
|
"epoch": 90.36444444444444, |
|
"grad_norm": 5.430562496185303, |
|
"learning_rate": 2.4150288074671346e-06, |
|
"loss": 0.8088, |
|
"step": 20332 |
|
}, |
|
{ |
|
"epoch": 90.46666666666667, |
|
"grad_norm": 4.49529504776001, |
|
"learning_rate": 2.3644648401640156e-06, |
|
"loss": 0.8057, |
|
"step": 20355 |
|
}, |
|
{ |
|
"epoch": 90.56888888888889, |
|
"grad_norm": 5.173520565032959, |
|
"learning_rate": 2.314423001559424e-06, |
|
"loss": 0.8205, |
|
"step": 20378 |
|
}, |
|
{ |
|
"epoch": 90.67111111111112, |
|
"grad_norm": 5.084122657775879, |
|
"learning_rate": 2.264903840157312e-06, |
|
"loss": 0.8096, |
|
"step": 20401 |
|
}, |
|
{ |
|
"epoch": 90.77333333333333, |
|
"grad_norm": 4.675368309020996, |
|
"learning_rate": 2.2159078987326554e-06, |
|
"loss": 0.8109, |
|
"step": 20424 |
|
}, |
|
{ |
|
"epoch": 90.87555555555555, |
|
"grad_norm": 4.598373889923096, |
|
"learning_rate": 2.167435714325411e-06, |
|
"loss": 0.7989, |
|
"step": 20447 |
|
}, |
|
{ |
|
"epoch": 90.97777777777777, |
|
"grad_norm": 4.149188995361328, |
|
"learning_rate": 2.1194878182347334e-06, |
|
"loss": 0.8142, |
|
"step": 20470 |
|
}, |
|
{ |
|
"epoch": 91.08, |
|
"grad_norm": 5.164962291717529, |
|
"learning_rate": 2.0720647360130685e-06, |
|
"loss": 0.8096, |
|
"step": 20493 |
|
}, |
|
{ |
|
"epoch": 91.18222222222222, |
|
"grad_norm": 5.351869106292725, |
|
"learning_rate": 2.0251669874604474e-06, |
|
"loss": 0.8036, |
|
"step": 20516 |
|
}, |
|
{ |
|
"epoch": 91.28444444444445, |
|
"grad_norm": 5.2852935791015625, |
|
"learning_rate": 1.9787950866187565e-06, |
|
"loss": 0.8057, |
|
"step": 20539 |
|
}, |
|
{ |
|
"epoch": 91.38666666666667, |
|
"grad_norm": 6.784205436706543, |
|
"learning_rate": 1.9329495417661046e-06, |
|
"loss": 0.8031, |
|
"step": 20562 |
|
}, |
|
{ |
|
"epoch": 91.4888888888889, |
|
"grad_norm": 4.940450668334961, |
|
"learning_rate": 1.887630855411282e-06, |
|
"loss": 0.8066, |
|
"step": 20585 |
|
}, |
|
{ |
|
"epoch": 91.5911111111111, |
|
"grad_norm": 4.77994441986084, |
|
"learning_rate": 1.84283952428822e-06, |
|
"loss": 0.8038, |
|
"step": 20608 |
|
}, |
|
{ |
|
"epoch": 91.69333333333333, |
|
"grad_norm": 4.902866840362549, |
|
"learning_rate": 1.798576039350558e-06, |
|
"loss": 0.8043, |
|
"step": 20631 |
|
}, |
|
{ |
|
"epoch": 91.79555555555555, |
|
"grad_norm": 5.100454330444336, |
|
"learning_rate": 1.7548408857662623e-06, |
|
"loss": 0.8008, |
|
"step": 20654 |
|
}, |
|
{ |
|
"epoch": 91.89777777777778, |
|
"grad_norm": 4.9377264976501465, |
|
"learning_rate": 1.7116345429123104e-06, |
|
"loss": 0.8098, |
|
"step": 20677 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"grad_norm": 5.0082292556762695, |
|
"learning_rate": 1.6689574843694433e-06, |
|
"loss": 0.7992, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 92.10222222222222, |
|
"grad_norm": 4.688179016113281, |
|
"learning_rate": 1.6268101779169375e-06, |
|
"loss": 0.7928, |
|
"step": 20723 |
|
}, |
|
{ |
|
"epoch": 92.20444444444445, |
|
"grad_norm": 4.243449687957764, |
|
"learning_rate": 1.5851930855275365e-06, |
|
"loss": 0.7957, |
|
"step": 20746 |
|
}, |
|
{ |
|
"epoch": 92.30666666666667, |
|
"grad_norm": 4.956583499908447, |
|
"learning_rate": 1.544106663362338e-06, |
|
"loss": 0.8073, |
|
"step": 20769 |
|
}, |
|
{ |
|
"epoch": 92.4088888888889, |
|
"grad_norm": 4.556548118591309, |
|
"learning_rate": 1.503551361765826e-06, |
|
"loss": 0.8019, |
|
"step": 20792 |
|
}, |
|
{ |
|
"epoch": 92.5111111111111, |
|
"grad_norm": 6.762635707855225, |
|
"learning_rate": 1.4635276252608965e-06, |
|
"loss": 0.8084, |
|
"step": 20815 |
|
}, |
|
{ |
|
"epoch": 92.61333333333333, |
|
"grad_norm": 5.724966049194336, |
|
"learning_rate": 1.4240358925440457e-06, |
|
"loss": 0.8008, |
|
"step": 20838 |
|
}, |
|
{ |
|
"epoch": 92.71555555555555, |
|
"grad_norm": 5.445995330810547, |
|
"learning_rate": 1.3850765964805e-06, |
|
"loss": 0.802, |
|
"step": 20861 |
|
}, |
|
{ |
|
"epoch": 92.81777777777778, |
|
"grad_norm": 4.807301044464111, |
|
"learning_rate": 1.3466501640994944e-06, |
|
"loss": 0.8038, |
|
"step": 20884 |
|
}, |
|
{ |
|
"epoch": 92.92, |
|
"grad_norm": 5.612717151641846, |
|
"learning_rate": 1.308757016589618e-06, |
|
"loss": 0.7996, |
|
"step": 20907 |
|
}, |
|
{ |
|
"epoch": 93.02222222222223, |
|
"grad_norm": 4.5359296798706055, |
|
"learning_rate": 1.2713975692941415e-06, |
|
"loss": 0.801, |
|
"step": 20930 |
|
}, |
|
{ |
|
"epoch": 93.12444444444445, |
|
"grad_norm": 4.222482681274414, |
|
"learning_rate": 1.2345722317065267e-06, |
|
"loss": 0.7996, |
|
"step": 20953 |
|
}, |
|
{ |
|
"epoch": 93.22666666666667, |
|
"grad_norm": 4.250333786010742, |
|
"learning_rate": 1.19828140746589e-06, |
|
"loss": 0.8072, |
|
"step": 20976 |
|
}, |
|
{ |
|
"epoch": 93.32888888888888, |
|
"grad_norm": 4.197777271270752, |
|
"learning_rate": 1.1625254943526065e-06, |
|
"loss": 0.795, |
|
"step": 20999 |
|
}, |
|
{ |
|
"epoch": 93.43111111111111, |
|
"grad_norm": 5.79392671585083, |
|
"learning_rate": 1.1273048842839307e-06, |
|
"loss": 0.8076, |
|
"step": 21022 |
|
}, |
|
{ |
|
"epoch": 93.53333333333333, |
|
"grad_norm": 4.919564723968506, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 0.802, |
|
"step": 21045 |
|
}, |
|
{ |
|
"epoch": 93.63555555555556, |
|
"grad_norm": 5.422025203704834, |
|
"learning_rate": 1.0584711116081837e-06, |
|
"loss": 0.8141, |
|
"step": 21068 |
|
}, |
|
{ |
|
"epoch": 93.73777777777778, |
|
"grad_norm": 4.949449062347412, |
|
"learning_rate": 1.0248587034817237e-06, |
|
"loss": 0.8001, |
|
"step": 21091 |
|
}, |
|
{ |
|
"epoch": 93.84, |
|
"grad_norm": 4.578461647033691, |
|
"learning_rate": 9.917831073528504e-07, |
|
"loss": 0.7959, |
|
"step": 21114 |
|
}, |
|
{ |
|
"epoch": 93.94222222222223, |
|
"grad_norm": 4.7736592292785645, |
|
"learning_rate": 9.59244685760108e-07, |
|
"loss": 0.8007, |
|
"step": 21137 |
|
}, |
|
{ |
|
"epoch": 94.04444444444445, |
|
"grad_norm": 4.64253044128418, |
|
"learning_rate": 9.27243795354138e-07, |
|
"loss": 0.8042, |
|
"step": 21160 |
|
}, |
|
{ |
|
"epoch": 94.14666666666666, |
|
"grad_norm": 5.671309471130371, |
|
"learning_rate": 8.957807868937296e-07, |
|
"loss": 0.7971, |
|
"step": 21183 |
|
}, |
|
{ |
|
"epoch": 94.24888888888889, |
|
"grad_norm": 4.637156963348389, |
|
"learning_rate": 8.648560052420151e-07, |
|
"loss": 0.8008, |
|
"step": 21206 |
|
}, |
|
{ |
|
"epoch": 94.35111111111111, |
|
"grad_norm": 4.140064239501953, |
|
"learning_rate": 8.344697893626741e-07, |
|
"loss": 0.7955, |
|
"step": 21229 |
|
}, |
|
{ |
|
"epoch": 94.45333333333333, |
|
"grad_norm": 4.615813732147217, |
|
"learning_rate": 8.046224723162077e-07, |
|
"loss": 0.7998, |
|
"step": 21252 |
|
}, |
|
{ |
|
"epoch": 94.55555555555556, |
|
"grad_norm": 5.006037712097168, |
|
"learning_rate": 7.75314381256298e-07, |
|
"loss": 0.7944, |
|
"step": 21275 |
|
}, |
|
{ |
|
"epoch": 94.65777777777778, |
|
"grad_norm": 4.940041542053223, |
|
"learning_rate": 7.465458374262213e-07, |
|
"loss": 0.7944, |
|
"step": 21298 |
|
}, |
|
{ |
|
"epoch": 94.76, |
|
"grad_norm": 4.452148914337158, |
|
"learning_rate": 7.183171561553348e-07, |
|
"loss": 0.8021, |
|
"step": 21321 |
|
}, |
|
{ |
|
"epoch": 94.86222222222223, |
|
"grad_norm": 4.3342509269714355, |
|
"learning_rate": 6.906286468555955e-07, |
|
"loss": 0.8016, |
|
"step": 21344 |
|
}, |
|
{ |
|
"epoch": 94.96444444444444, |
|
"grad_norm": 5.098360538482666, |
|
"learning_rate": 6.634806130182025e-07, |
|
"loss": 0.7997, |
|
"step": 21367 |
|
}, |
|
{ |
|
"epoch": 95.06666666666666, |
|
"grad_norm": 4.704761028289795, |
|
"learning_rate": 6.368733522102432e-07, |
|
"loss": 0.8007, |
|
"step": 21390 |
|
}, |
|
{ |
|
"epoch": 95.16888888888889, |
|
"grad_norm": 4.529531002044678, |
|
"learning_rate": 6.108071560714413e-07, |
|
"loss": 0.7976, |
|
"step": 21413 |
|
}, |
|
{ |
|
"epoch": 95.27111111111111, |
|
"grad_norm": 4.470498561859131, |
|
"learning_rate": 5.852823103109639e-07, |
|
"loss": 0.7871, |
|
"step": 21436 |
|
}, |
|
{ |
|
"epoch": 95.37333333333333, |
|
"grad_norm": 4.434628486633301, |
|
"learning_rate": 5.602990947042919e-07, |
|
"loss": 0.8027, |
|
"step": 21459 |
|
}, |
|
{ |
|
"epoch": 95.47555555555556, |
|
"grad_norm": 4.518807411193848, |
|
"learning_rate": 5.358577830901435e-07, |
|
"loss": 0.7986, |
|
"step": 21482 |
|
}, |
|
{ |
|
"epoch": 95.57777777777778, |
|
"grad_norm": 4.176888942718506, |
|
"learning_rate": 5.119586433674661e-07, |
|
"loss": 0.7951, |
|
"step": 21505 |
|
}, |
|
{ |
|
"epoch": 95.68, |
|
"grad_norm": 4.806949138641357, |
|
"learning_rate": 4.886019374925333e-07, |
|
"loss": 0.7995, |
|
"step": 21528 |
|
}, |
|
{ |
|
"epoch": 95.78222222222222, |
|
"grad_norm": 4.371096611022949, |
|
"learning_rate": 4.657879214760297e-07, |
|
"loss": 0.7991, |
|
"step": 21551 |
|
}, |
|
{ |
|
"epoch": 95.88444444444444, |
|
"grad_norm": 4.214781761169434, |
|
"learning_rate": 4.435168453802874e-07, |
|
"loss": 0.7912, |
|
"step": 21574 |
|
}, |
|
{ |
|
"epoch": 95.98666666666666, |
|
"grad_norm": 4.71865177154541, |
|
"learning_rate": 4.2178895331650427e-07, |
|
"loss": 0.804, |
|
"step": 21597 |
|
}, |
|
{ |
|
"epoch": 96.08888888888889, |
|
"grad_norm": 4.573912143707275, |
|
"learning_rate": 4.0060448344209634e-07, |
|
"loss": 0.7969, |
|
"step": 21620 |
|
}, |
|
{ |
|
"epoch": 96.19111111111111, |
|
"grad_norm": 5.047268390655518, |
|
"learning_rate": 3.799636679580887e-07, |
|
"loss": 0.7964, |
|
"step": 21643 |
|
}, |
|
{ |
|
"epoch": 96.29333333333334, |
|
"grad_norm": 4.307917594909668, |
|
"learning_rate": 3.598667331065397e-07, |
|
"loss": 0.7957, |
|
"step": 21666 |
|
}, |
|
{ |
|
"epoch": 96.39555555555556, |
|
"grad_norm": 4.763662815093994, |
|
"learning_rate": 3.403138991681043e-07, |
|
"loss": 0.7958, |
|
"step": 21689 |
|
}, |
|
{ |
|
"epoch": 96.49777777777778, |
|
"grad_norm": 4.808367729187012, |
|
"learning_rate": 3.213053804595911e-07, |
|
"loss": 0.809, |
|
"step": 21712 |
|
}, |
|
{ |
|
"epoch": 96.6, |
|
"grad_norm": 5.026544570922852, |
|
"learning_rate": 3.0284138533160924e-07, |
|
"loss": 0.8024, |
|
"step": 21735 |
|
}, |
|
{ |
|
"epoch": 96.70222222222222, |
|
"grad_norm": 6.12026834487915, |
|
"learning_rate": 2.849221161663085e-07, |
|
"loss": 0.8041, |
|
"step": 21758 |
|
}, |
|
{ |
|
"epoch": 96.80444444444444, |
|
"grad_norm": 4.895252227783203, |
|
"learning_rate": 2.6754776937513717e-07, |
|
"loss": 0.7966, |
|
"step": 21781 |
|
}, |
|
{ |
|
"epoch": 96.90666666666667, |
|
"grad_norm": 4.611559867858887, |
|
"learning_rate": 2.507185353967101e-07, |
|
"loss": 0.8041, |
|
"step": 21804 |
|
}, |
|
{ |
|
"epoch": 97.00888888888889, |
|
"grad_norm": 4.198352813720703, |
|
"learning_rate": 2.344345986946994e-07, |
|
"loss": 0.8013, |
|
"step": 21827 |
|
}, |
|
{ |
|
"epoch": 97.11111111111111, |
|
"grad_norm": 4.63875675201416, |
|
"learning_rate": 2.186961377558361e-07, |
|
"loss": 0.8015, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 97.21333333333334, |
|
"grad_norm": 4.243088245391846, |
|
"learning_rate": 2.0350332508793367e-07, |
|
"loss": 0.7829, |
|
"step": 21873 |
|
}, |
|
{ |
|
"epoch": 97.31555555555556, |
|
"grad_norm": 4.228803634643555, |
|
"learning_rate": 1.8885632721800106e-07, |
|
"loss": 0.7999, |
|
"step": 21896 |
|
}, |
|
{ |
|
"epoch": 97.41777777777777, |
|
"grad_norm": 5.103250980377197, |
|
"learning_rate": 1.7475530469044376e-07, |
|
"loss": 0.7979, |
|
"step": 21919 |
|
}, |
|
{ |
|
"epoch": 97.52, |
|
"grad_norm": 4.691418170928955, |
|
"learning_rate": 1.6120041206524883e-07, |
|
"loss": 0.7972, |
|
"step": 21942 |
|
}, |
|
{ |
|
"epoch": 97.62222222222222, |
|
"grad_norm": 4.644149303436279, |
|
"learning_rate": 1.481917979163583e-07, |
|
"loss": 0.7897, |
|
"step": 21965 |
|
}, |
|
{ |
|
"epoch": 97.72444444444444, |
|
"grad_norm": 4.451114654541016, |
|
"learning_rate": 1.357296048299761e-07, |
|
"loss": 0.8001, |
|
"step": 21988 |
|
}, |
|
{ |
|
"epoch": 97.82666666666667, |
|
"grad_norm": 4.836966037750244, |
|
"learning_rate": 1.2381396940305824e-07, |
|
"loss": 0.7994, |
|
"step": 22011 |
|
}, |
|
{ |
|
"epoch": 97.92888888888889, |
|
"grad_norm": 4.453198432922363, |
|
"learning_rate": 1.12445022241775e-07, |
|
"loss": 0.7969, |
|
"step": 22034 |
|
}, |
|
{ |
|
"epoch": 98.03111111111112, |
|
"grad_norm": 5.4233903884887695, |
|
"learning_rate": 1.0162288796011221e-07, |
|
"loss": 0.8006, |
|
"step": 22057 |
|
}, |
|
{ |
|
"epoch": 98.13333333333334, |
|
"grad_norm": 4.528837203979492, |
|
"learning_rate": 9.134768517848336e-08, |
|
"loss": 0.8031, |
|
"step": 22080 |
|
}, |
|
{ |
|
"epoch": 98.23555555555555, |
|
"grad_norm": 5.245551586151123, |
|
"learning_rate": 8.161952652243621e-08, |
|
"loss": 0.8005, |
|
"step": 22103 |
|
}, |
|
{ |
|
"epoch": 98.33777777777777, |
|
"grad_norm": 4.625002861022949, |
|
"learning_rate": 7.243851862141492e-08, |
|
"loss": 0.8075, |
|
"step": 22126 |
|
}, |
|
{ |
|
"epoch": 98.44, |
|
"grad_norm": 4.824587345123291, |
|
"learning_rate": 6.38047621075999e-08, |
|
"loss": 0.7925, |
|
"step": 22149 |
|
}, |
|
{ |
|
"epoch": 98.54222222222222, |
|
"grad_norm": 4.704883098602295, |
|
"learning_rate": 5.5718351614797437e-08, |
|
"loss": 0.7953, |
|
"step": 22172 |
|
}, |
|
{ |
|
"epoch": 98.64444444444445, |
|
"grad_norm": 4.561920642852783, |
|
"learning_rate": 4.817937577741294e-08, |
|
"loss": 0.7976, |
|
"step": 22195 |
|
}, |
|
{ |
|
"epoch": 98.74666666666667, |
|
"grad_norm": 4.796523094177246, |
|
"learning_rate": 4.118791722945159e-08, |
|
"loss": 0.8026, |
|
"step": 22218 |
|
}, |
|
{ |
|
"epoch": 98.8488888888889, |
|
"grad_norm": 4.576013565063477, |
|
"learning_rate": 3.474405260365798e-08, |
|
"loss": 0.794, |
|
"step": 22241 |
|
}, |
|
{ |
|
"epoch": 98.95111111111112, |
|
"grad_norm": 5.13820743560791, |
|
"learning_rate": 2.8847852530622387e-08, |
|
"loss": 0.7895, |
|
"step": 22264 |
|
}, |
|
{ |
|
"epoch": 99.05333333333333, |
|
"grad_norm": 4.2987060546875, |
|
"learning_rate": 2.3499381638064645e-08, |
|
"loss": 0.7919, |
|
"step": 22287 |
|
}, |
|
{ |
|
"epoch": 99.15555555555555, |
|
"grad_norm": 4.3480305671691895, |
|
"learning_rate": 1.8698698550068117e-08, |
|
"loss": 0.798, |
|
"step": 22310 |
|
}, |
|
{ |
|
"epoch": 99.25777777777778, |
|
"grad_norm": 5.037069797515869, |
|
"learning_rate": 1.4445855886480176e-08, |
|
"loss": 0.8026, |
|
"step": 22333 |
|
}, |
|
{ |
|
"epoch": 99.36, |
|
"grad_norm": 4.374788284301758, |
|
"learning_rate": 1.074090026231267e-08, |
|
"loss": 0.7926, |
|
"step": 22356 |
|
}, |
|
{ |
|
"epoch": 99.46222222222222, |
|
"grad_norm": 4.93529748916626, |
|
"learning_rate": 7.583872287253436e-09, |
|
"loss": 0.8044, |
|
"step": 22379 |
|
}, |
|
{ |
|
"epoch": 99.56444444444445, |
|
"grad_norm": 4.404996395111084, |
|
"learning_rate": 4.974806565177792e-09, |
|
"loss": 0.802, |
|
"step": 22402 |
|
}, |
|
{ |
|
"epoch": 99.66666666666667, |
|
"grad_norm": 4.556636333465576, |
|
"learning_rate": 2.9137316938265825e-09, |
|
"loss": 0.793, |
|
"step": 22425 |
|
}, |
|
{ |
|
"epoch": 99.7688888888889, |
|
"grad_norm": 4.4638190269470215, |
|
"learning_rate": 1.4006702644453474e-09, |
|
"loss": 0.7999, |
|
"step": 22448 |
|
}, |
|
{ |
|
"epoch": 99.8711111111111, |
|
"grad_norm": 4.293120861053467, |
|
"learning_rate": 4.3563886156228196e-10, |
|
"loss": 0.8048, |
|
"step": 22471 |
|
}, |
|
{ |
|
"epoch": 99.97333333333333, |
|
"grad_norm": 4.100605010986328, |
|
"learning_rate": 1.8648062799497822e-11, |
|
"loss": 0.7996, |
|
"step": 22494 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 22500, |
|
"total_flos": 2.1925440120390943e+18, |
|
"train_loss": 2.6133422136730617, |
|
"train_runtime": 133573.7106, |
|
"train_samples_per_second": 86.157, |
|
"train_steps_per_second": 0.168 |
|
} |
|
], |
|
"logging_steps": 23, |
|
"max_steps": 22500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1925440120390943e+18, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|